1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // REQUIRES: amdgpu-registered-target 3 4 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc 5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU 6 7 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR 8 9 // Check same results after serialization round-trip 10 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s 11 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH 12 13 // expected-no-diagnostics 14 15 #ifndef HEADER 16 #define HEADER 17 int foo() { 18 int i; 19 int j; 20 int sum[10][10]; 21 22 #pragma omp target teams loop reduction(+:sum) collapse(2) \ 23 bind(parallel) order(concurrent) lastprivate(j) map(tofrom:sum) 24 for(i=0; i<10; i++) 25 for(j=0; j<10; j++) 26 sum[i][j] += i; 27 28 return 0; 29 } 30 #endif 31 // IR-PCH-HOST-LABEL: define {{[^@]+}}@_Z3foov 32 // IR-PCH-HOST-SAME: () #[[ATTR0:[0-9]+]] { 33 // IR-PCH-HOST-NEXT: entry: 34 // IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 35 // IR-PCH-HOST-NEXT: [[J:%.*]] = alloca i32, align 4 36 // IR-PCH-HOST-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 37 // IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 38 // IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 39 // IR-PCH-HOST-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 40 // IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 41 // IR-PCH-HOST-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] 42 // IR-PCH-HOST-NEXT: ret i32 0 43 // IR-PCH-HOST-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 44 // IR-PCH-HOST-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 45 // IR-PCH-HOST-NEXT: entry: 46 // IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 47 // IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 48 // IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 49 // IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 50 // IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 51 // IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 52 // IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 53 // IR-PCH-HOST-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 54 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 55 // IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP2]], ptr [[TMP0]]) 56 // IR-PCH-HOST-NEXT: ret void 57 // IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined. 58 // IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 59 // IR-PCH-HOST-NEXT: entry: 60 // IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 61 // IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 62 // IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 63 // IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 64 // IR-PCH-HOST-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 65 // IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 66 // IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 67 // IR-PCH-HOST-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 68 // IR-PCH-HOST-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 69 // IR-PCH-HOST-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 70 // IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 71 // IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 72 // IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 73 // IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 74 // IR-PCH-HOST-NEXT: [[J4:%.*]] = alloca i32, align 4 75 // IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 76 // IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 77 // IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 78 // IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 79 // IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 80 // IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 81 // IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 82 // IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 83 // IR-PCH-HOST-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 84 // IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 85 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 86 // IR-PCH-HOST: omp.arrayinit.body: 87 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 88 // IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 89 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 90 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 91 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 92 // IR-PCH-HOST: omp.arrayinit.done: 93 // IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 94 // IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 95 // IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 96 // IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 97 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 98 // IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 99 // IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 100 // IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 101 // IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 102 // IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 103 // IR-PCH-HOST: cond.true: 104 // IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] 105 // IR-PCH-HOST: cond.false: 106 // IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 107 // IR-PCH-HOST-NEXT: br label [[COND_END]] 108 // IR-PCH-HOST: cond.end: 109 // IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 110 // IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 111 // IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 112 // IR-PCH-HOST-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 113 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 114 // IR-PCH-HOST: omp.inner.for.cond: 115 // IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 116 // IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 117 // IR-PCH-HOST-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] 118 // IR-PCH-HOST-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 119 // IR-PCH-HOST: omp.inner.for.body: 120 // IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 121 // IR-PCH-HOST-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 122 // IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 123 // IR-PCH-HOST-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 124 // IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 125 // IR-PCH-HOST-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 126 // IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 127 // IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) 128 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 129 // IR-PCH-HOST: omp.inner.for.inc: 130 // IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 131 // IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 132 // IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] 133 // IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 134 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]] 135 // IR-PCH-HOST: omp.inner.for.end: 136 // IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 137 // IR-PCH-HOST: omp.loop.exit: 138 // IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 139 // IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 140 // IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) 141 // IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 142 // IR-PCH-HOST-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 143 // IR-PCH-HOST-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 144 // IR-PCH-HOST: .omp.lastprivate.then: 145 // IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 146 // IR-PCH-HOST-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 147 // IR-PCH-HOST-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 148 // IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 149 // IR-PCH-HOST: .omp.lastprivate.done: 150 // IR-PCH-HOST-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 151 // IR-PCH-HOST-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 152 // IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 153 // IR-PCH-HOST-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 154 // IR-PCH-HOST-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) 155 // IR-PCH-HOST-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 156 // IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 157 // IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 158 // IR-PCH-HOST-NEXT: ] 159 // IR-PCH-HOST: .omp.reduction.case1: 160 // IR-PCH-HOST-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 161 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] 162 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 163 // IR-PCH-HOST: omp.arraycpy.body: 164 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 165 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] 166 // IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 167 // IR-PCH-HOST-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 168 // IR-PCH-HOST-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 169 // IR-PCH-HOST-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 170 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 171 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 172 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] 173 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] 174 // IR-PCH-HOST: omp.arraycpy.done10: 175 // IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 176 // IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 177 // IR-PCH-HOST: .omp.reduction.case2: 178 // IR-PCH-HOST-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 179 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] 180 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] 181 // IR-PCH-HOST: omp.arraycpy.body12: 182 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 183 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 184 // IR-PCH-HOST-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 185 // IR-PCH-HOST-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 186 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 187 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 188 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] 189 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] 190 // IR-PCH-HOST: omp.arraycpy.done18: 191 // IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 192 // IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 193 // IR-PCH-HOST: .omp.reduction.default: 194 // IR-PCH-HOST-NEXT: ret void 195 // IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined..1 196 // IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 197 // IR-PCH-HOST-NEXT: entry: 198 // IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 199 // IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 200 // IR-PCH-HOST-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 201 // IR-PCH-HOST-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 202 // IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 203 // IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 204 // IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 205 // IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 206 // IR-PCH-HOST-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 207 // IR-PCH-HOST-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 208 // IR-PCH-HOST-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 209 // IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 210 // IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 211 // IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 212 // IR-PCH-HOST-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 213 // IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 214 // IR-PCH-HOST-NEXT: [[J5:%.*]] = alloca i32, align 4 215 // IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 216 // IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 217 // IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 218 // IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 219 // IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 220 // IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 221 // IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 222 // IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 223 // IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 224 // IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 225 // IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 226 // IR-PCH-HOST-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 227 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 228 // IR-PCH-HOST-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 229 // IR-PCH-HOST-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 230 // IR-PCH-HOST-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 231 // IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 232 // IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 233 // IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 234 // IR-PCH-HOST-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 235 // IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 236 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 237 // IR-PCH-HOST: omp.arrayinit.body: 238 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 239 // IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 240 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 241 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 242 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 243 // IR-PCH-HOST: omp.arrayinit.done: 244 // IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 245 // IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 246 // IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 247 // IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 248 // IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 249 // IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 250 // IR-PCH-HOST: cond.true: 251 // IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] 252 // IR-PCH-HOST: cond.false: 253 // IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 254 // IR-PCH-HOST-NEXT: br label [[COND_END]] 255 // IR-PCH-HOST: cond.end: 256 // IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] 257 // IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 258 // IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 259 // IR-PCH-HOST-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 260 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 261 // IR-PCH-HOST: omp.inner.for.cond: 262 // IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] 263 // IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] 264 // IR-PCH-HOST-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] 265 // IR-PCH-HOST-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 266 // IR-PCH-HOST: omp.inner.for.body: 267 // IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 268 // IR-PCH-HOST-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 269 // IR-PCH-HOST-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 270 // IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 271 // IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 272 // IR-PCH-HOST-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 273 // IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 274 // IR-PCH-HOST-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 275 // IR-PCH-HOST-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 276 // IR-PCH-HOST-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] 277 // IR-PCH-HOST-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 278 // IR-PCH-HOST-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 279 // IR-PCH-HOST-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 280 // IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 281 // IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 282 // IR-PCH-HOST-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 283 // IR-PCH-HOST-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] 284 // IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 285 // IR-PCH-HOST-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 286 // IR-PCH-HOST-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 287 // IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 288 // IR-PCH-HOST-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] 289 // IR-PCH-HOST-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 290 // IR-PCH-HOST-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 291 // IR-PCH-HOST: omp.body.continue: 292 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 293 // IR-PCH-HOST: omp.inner.for.inc: 294 // IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 295 // IR-PCH-HOST-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 296 // IR-PCH-HOST-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 297 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] 298 // IR-PCH-HOST: omp.inner.for.end: 299 // IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 300 // IR-PCH-HOST: omp.loop.exit: 301 // IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 302 // IR-PCH-HOST-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 303 // IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) 304 // IR-PCH-HOST-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 305 // IR-PCH-HOST-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 306 // IR-PCH-HOST-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 307 // IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 308 // IR-PCH-HOST-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 309 // IR-PCH-HOST-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 310 // IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 311 // IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 312 // IR-PCH-HOST-NEXT: ] 313 // IR-PCH-HOST: .omp.reduction.case1: 314 // IR-PCH-HOST-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 315 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] 316 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 317 // IR-PCH-HOST: omp.arraycpy.body: 318 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 319 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 320 // IR-PCH-HOST-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 321 // IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 322 // IR-PCH-HOST-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] 323 // IR-PCH-HOST-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 324 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 325 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 326 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] 327 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 328 // IR-PCH-HOST: omp.arraycpy.done19: 329 // IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 330 // IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 331 // IR-PCH-HOST: .omp.reduction.case2: 332 // IR-PCH-HOST-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 333 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] 334 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] 335 // IR-PCH-HOST: omp.arraycpy.body21: 336 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 337 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 338 // IR-PCH-HOST-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 339 // IR-PCH-HOST-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 340 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 341 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 342 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] 343 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] 344 // IR-PCH-HOST: omp.arraycpy.done27: 345 // IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 346 // IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 347 // IR-PCH-HOST: .omp.reduction.default: 348 // IR-PCH-HOST-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 349 // IR-PCH-HOST-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 350 // IR-PCH-HOST-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 351 // IR-PCH-HOST: .omp.lastprivate.then: 352 // IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 353 // IR-PCH-HOST-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 354 // IR-PCH-HOST-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 355 // IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 356 // IR-PCH-HOST: .omp.lastprivate.done: 357 // IR-PCH-HOST-NEXT: ret void 358 // IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func 359 // IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 360 // IR-PCH-HOST-NEXT: entry: 361 // IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 362 // IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 363 // IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 364 // IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 365 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 366 // IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 367 // IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 368 // IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 369 // IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 370 // IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 371 // IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 372 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 373 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 374 // IR-PCH-HOST: omp.arraycpy.body: 375 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 376 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 377 // IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 378 // IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 379 // IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 380 // IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 381 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 382 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 383 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 384 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 385 // IR-PCH-HOST: omp.arraycpy.done2: 386 // IR-PCH-HOST-NEXT: ret void 387 // IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2 388 // IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { 389 // IR-PCH-HOST-NEXT: entry: 390 // IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 391 // IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 392 // IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 393 // IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 394 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 395 // IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 396 // IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 397 // IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 398 // IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 399 // IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 400 // IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 401 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 402 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 403 // IR-PCH-HOST: omp.arraycpy.body: 404 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 405 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 406 // IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 407 // IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 408 // IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 409 // IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 410 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 411 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 412 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 413 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 414 // IR-PCH-HOST: omp.arraycpy.done2: 415 // IR-PCH-HOST-NEXT: ret void 416 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l23 417 // CHECK-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { 418 // CHECK-NEXT: entry: 419 // CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 420 // CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 421 // CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) 422 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 423 // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) 424 // CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 425 // CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 426 // CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr 427 // CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr 428 // CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr 429 // CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 430 // CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 431 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 432 // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) 433 // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 434 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 435 // CHECK: user_code.entry: 436 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 437 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4 438 // CHECK-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4 439 // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 440 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 441 // CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 442 // CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] 443 // CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) 444 // CHECK-NEXT: ret void 445 // CHECK: worker.exit: 446 // CHECK-NEXT: ret void 447 // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ 448 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 449 // CHECK-NEXT: entry: 450 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 451 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 452 // CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 453 // CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 454 // CHECK-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 455 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) 456 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) 457 // CHECK-NEXT: [[_TMP2:%.*]] = alloca i32, align 4, addrspace(5) 458 // CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) 459 // CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) 460 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) 461 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) 462 // CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) 463 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) 464 // CHECK-NEXT: [[J4:%.*]] = alloca i32, align 4, addrspace(5) 465 // CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) 466 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) 467 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 468 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr 469 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr 470 // CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 471 // CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 472 // CHECK-NEXT: [[SUM1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM1]] to ptr 473 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr 474 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 475 // CHECK-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr 476 // CHECK-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr 477 // CHECK-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr 478 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr 479 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr 480 // CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr 481 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr 482 // CHECK-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr 483 // CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr 484 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr 485 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 486 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 487 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 488 // CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 489 // CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 490 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 491 // CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1_ASCAST]], i32 0, i32 0, i32 0 492 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 493 // CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 494 // CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 495 // CHECK: omp.arrayinit.body: 496 // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 497 // CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 498 // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 499 // CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 500 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 501 // CHECK: omp.arrayinit.done: 502 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 503 // CHECK-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 504 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 505 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 506 // CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 507 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 508 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 509 // CHECK-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) 510 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 511 // CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 512 // CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 513 // CHECK: cond.true: 514 // CHECK-NEXT: br label [[COND_END:%.*]] 515 // CHECK: cond.false: 516 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 517 // CHECK-NEXT: br label [[COND_END]] 518 // CHECK: cond.end: 519 // CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 520 // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 521 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 522 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 523 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 524 // CHECK: omp.inner.for.cond: 525 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 526 // CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 100 527 // CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 528 // CHECK: omp.inner.for.body: 529 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 530 // CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 531 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 532 // CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 533 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 534 // CHECK-NEXT: store i32 [[TMP12]], ptr [[J_CASTED_ASCAST]], align 4 535 // CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 536 // CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 537 // CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr 538 // CHECK-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 539 // CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 540 // CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr 541 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 542 // CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 543 // CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP13]] to ptr 544 // CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 545 // CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 546 // CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP20]], align 8 547 // CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 548 // CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 549 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP22]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__.1, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) 550 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 551 // CHECK: omp.inner.for.inc: 552 // CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 553 // CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 554 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] 555 // CHECK-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4 556 // CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 557 // CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 558 // CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] 559 // CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 560 // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 561 // CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 562 // CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 563 // CHECK-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 564 // CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 565 // CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP29]], 99 566 // CHECK-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] 567 // CHECK: cond.true9: 568 // CHECK-NEXT: br label [[COND_END11:%.*]] 569 // CHECK: cond.false10: 570 // CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 571 // CHECK-NEXT: br label [[COND_END11]] 572 // CHECK: cond.end11: 573 // CHECK-NEXT: [[COND12:%.*]] = phi i32 [ 99, [[COND_TRUE9]] ], [ [[TMP30]], [[COND_FALSE10]] ] 574 // CHECK-NEXT: store i32 [[COND12]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 575 // CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 576 // CHECK-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV_ASCAST]], align 4 577 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] 578 // CHECK: omp.inner.for.end: 579 // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 580 // CHECK: omp.loop.exit: 581 // CHECK-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 582 // CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 583 // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]]) 584 // CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 585 // CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 586 // CHECK-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 587 // CHECK: .omp.lastprivate.then: 588 // CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 589 // CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 590 // CHECK-NEXT: store i32 [[TMP36]], ptr [[J_ADDR_ASCAST]], align 4 591 // CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 592 // CHECK: .omp.lastprivate.done: 593 // CHECK-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 594 // CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 595 // CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 596 // CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP39]], align 8 597 // CHECK-NEXT: [[TMP40:%.*]] = load ptr, ptr addrspace(1) @"_openmp_teams_reductions_buffer_$_$ptr", align 8 598 // CHECK-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP38]], ptr [[TMP40]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.3, ptr @_omp_reduction_inter_warp_copy_func.4, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) 599 // CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 1 600 // CHECK-NEXT: br i1 [[TMP42]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 601 // CHECK: .omp.reduction.then: 602 // CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 603 // CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP43]] 604 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 605 // CHECK: omp.arraycpy.body: 606 // CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 607 // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] 608 // CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 609 // CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 610 // CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] 611 // CHECK-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 612 // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 613 // CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 614 // CHECK-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP43]] 615 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] 616 // CHECK: omp.arraycpy.done17: 617 // CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP38]]) 618 // CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 619 // CHECK: .omp.reduction.done: 620 // CHECK-NEXT: ret void 621 // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__.1 622 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 623 // CHECK-NEXT: entry: 624 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 625 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 626 // CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) 627 // CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) 628 // CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 629 // CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 630 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) 631 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) 632 // CHECK-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5) 633 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) 634 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) 635 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) 636 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) 637 // CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) 638 // CHECK-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 639 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) 640 // CHECK-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) 641 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 642 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr 643 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr 644 // CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr 645 // CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr 646 // CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 647 // CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 648 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr 649 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 650 // CHECK-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr 651 // CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr 652 // CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr 653 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr 654 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr 655 // CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr 656 // CHECK-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr 657 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr 658 // CHECK-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr 659 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 660 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 661 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 662 // CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 663 // CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 664 // CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 665 // CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 666 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 667 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 668 // CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4 669 // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 670 // CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 671 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 672 // CHECK-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 673 // CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 674 // CHECK-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4 675 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 676 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 677 // CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i32 0, i32 0, i32 0 678 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 679 // CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 680 // CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 681 // CHECK: omp.arrayinit.body: 682 // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 683 // CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 684 // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 685 // CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 686 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 687 // CHECK: omp.arrayinit.done: 688 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 689 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 690 // CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) 691 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 692 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 693 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 694 // CHECK: omp.inner.for.cond: 695 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] 696 // CHECK-NEXT: [[CONV6:%.*]] = sext i32 [[TMP7]] to i64 697 // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP7]] 698 // CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV6]], [[TMP8]] 699 // CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 700 // CHECK: omp.inner.for.body: 701 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 702 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 10 703 // CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 704 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 705 // CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 706 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 707 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 708 // CHECK-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP11]], 10 709 // CHECK-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 710 // CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL8]] 711 // CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 712 // CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 713 // CHECK-NEXT: store i32 [[ADD10]], ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 714 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 715 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 716 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 717 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i64 0, i64 [[IDXPROM]] 718 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 719 // CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 720 // CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 721 // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] 722 // CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], [[TMP12]] 723 // CHECK-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] 724 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 725 // CHECK: omp.body.continue: 726 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 727 // CHECK: omp.inner.for.inc: 728 // CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 729 // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 730 // CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] 731 // CHECK-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 732 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] 733 // CHECK: omp.inner.for.end: 734 // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 735 // CHECK: omp.loop.exit: 736 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 737 // CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 738 // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP19]]) 739 // CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 740 // CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 741 // CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 742 // CHECK-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP22]], align 8 743 // CHECK-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP21]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) 744 // CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP23]], 1 745 // CHECK-NEXT: br i1 [[TMP24]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 746 // CHECK: .omp.reduction.then: 747 // CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 748 // CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] 749 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 750 // CHECK: omp.arraycpy.body: 751 // CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 752 // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 753 // CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 754 // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 755 // CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] 756 // CHECK-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 757 // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 758 // CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 759 // CHECK-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] 760 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 761 // CHECK: omp.arraycpy.done19: 762 // CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP21]]) 763 // CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 764 // CHECK: .omp.reduction.done: 765 // CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 766 // CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 767 // CHECK-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 768 // CHECK: .omp.lastprivate.then: 769 // CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 770 // CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 771 // CHECK-NEXT: store i32 [[TMP30]], ptr [[J_ADDR_ASCAST]], align 4 772 // CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 773 // CHECK: .omp.lastprivate.done: 774 // CHECK-NEXT: ret void 775 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func 776 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] { 777 // CHECK-NEXT: entry: 778 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 779 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) 780 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) 781 // CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) 782 // CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 783 // CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 784 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 785 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 786 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 787 // CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr 788 // CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr 789 // CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr 790 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 791 // CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 792 // CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 793 // CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 794 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 795 // CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 796 // CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 797 // CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 798 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 799 // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 800 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 801 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 802 // CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 803 // CHECK: .shuffle.pre_cond: 804 // CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 805 // CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] 806 // CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 807 // CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 808 // CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] 809 // CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) 810 // CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 811 // CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 812 // CHECK: .shuffle.then: 813 // CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 814 // CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() 815 // CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 816 // CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) 817 // CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 818 // CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 819 // CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 820 // CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 821 // CHECK: .shuffle.exit: 822 // CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 823 // CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 824 // CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 825 // CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] 826 // CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] 827 // CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 828 // CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 829 // CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 830 // CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] 831 // CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 832 // CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] 833 // CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] 834 // CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] 835 // CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] 836 // CHECK: then: 837 // CHECK-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] 838 // CHECK-NEXT: br label [[IFCONT:%.*]] 839 // CHECK: else: 840 // CHECK-NEXT: br label [[IFCONT]] 841 // CHECK: ifcont: 842 // CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 843 // CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] 844 // CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] 845 // CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 846 // CHECK: then4: 847 // CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 848 // CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 849 // CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 850 // CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 851 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) 852 // CHECK-NEXT: br label [[IFCONT6:%.*]] 853 // CHECK: else5: 854 // CHECK-NEXT: br label [[IFCONT6]] 855 // CHECK: ifcont6: 856 // CHECK-NEXT: ret void 857 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func 858 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 859 // CHECK-NEXT: entry: 860 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 861 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 862 // CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 863 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 864 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 865 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 866 // CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr 867 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 868 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 869 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 870 // CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 871 // CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 872 // CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 873 // CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 874 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 875 // CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 876 // CHECK-NEXT: br label [[PRECOND:%.*]] 877 // CHECK: precond: 878 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 879 // CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 880 // CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] 881 // CHECK: body: 882 // CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP2]]) 883 // CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 884 // CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 885 // CHECK: then: 886 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 887 // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 888 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] 889 // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 890 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 891 // CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 892 // CHECK-NEXT: br label [[IFCONT:%.*]] 893 // CHECK: else: 894 // CHECK-NEXT: br label [[IFCONT]] 895 // CHECK: ifcont: 896 // CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 897 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 898 // CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] 899 // CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 900 // CHECK: then2: 901 // CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 902 // CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 903 // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 904 // CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] 905 // CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 906 // CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 907 // CHECK-NEXT: br label [[IFCONT4:%.*]] 908 // CHECK: else3: 909 // CHECK-NEXT: br label [[IFCONT4]] 910 // CHECK: ifcont4: 911 // CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 912 // CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 913 // CHECK-NEXT: br label [[PRECOND]] 914 // CHECK: exit: 915 // CHECK-NEXT: ret void 916 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func.3 917 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { 918 // CHECK-NEXT: entry: 919 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 920 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) 921 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) 922 // CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) 923 // CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 924 // CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 925 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 926 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 927 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 928 // CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr 929 // CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr 930 // CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr 931 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 932 // CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 933 // CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 934 // CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 935 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 936 // CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 937 // CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 938 // CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 939 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 940 // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 941 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 942 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 943 // CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 944 // CHECK: .shuffle.pre_cond: 945 // CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 946 // CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] 947 // CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 948 // CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 949 // CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] 950 // CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) 951 // CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 952 // CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 953 // CHECK: .shuffle.then: 954 // CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 955 // CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() 956 // CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 957 // CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) 958 // CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 959 // CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 960 // CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 961 // CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 962 // CHECK: .shuffle.exit: 963 // CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 964 // CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 965 // CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 966 // CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] 967 // CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] 968 // CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 969 // CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 970 // CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 971 // CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] 972 // CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 973 // CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] 974 // CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] 975 // CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] 976 // CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] 977 // CHECK: then: 978 // CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] 979 // CHECK-NEXT: br label [[IFCONT:%.*]] 980 // CHECK: else: 981 // CHECK-NEXT: br label [[IFCONT]] 982 // CHECK: ifcont: 983 // CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 984 // CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] 985 // CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] 986 // CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 987 // CHECK: then4: 988 // CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 989 // CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 990 // CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 991 // CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 992 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) 993 // CHECK-NEXT: br label [[IFCONT6:%.*]] 994 // CHECK: else5: 995 // CHECK-NEXT: br label [[IFCONT6]] 996 // CHECK: ifcont6: 997 // CHECK-NEXT: ret void 998 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func.4 999 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 1000 // CHECK-NEXT: entry: 1001 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1002 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1003 // CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 1004 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 1005 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1006 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1007 // CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr 1008 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1009 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1010 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1011 // CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1012 // CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 1013 // CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1014 // CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 1015 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1016 // CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1017 // CHECK-NEXT: br label [[PRECOND:%.*]] 1018 // CHECK: precond: 1019 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1020 // CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 1021 // CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] 1022 // CHECK: body: 1023 // CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1024 // CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 1025 // CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 1026 // CHECK: then: 1027 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1028 // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 1029 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] 1030 // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 1031 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 1032 // CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 1033 // CHECK-NEXT: br label [[IFCONT:%.*]] 1034 // CHECK: else: 1035 // CHECK-NEXT: br label [[IFCONT]] 1036 // CHECK: ifcont: 1037 // CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1038 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1039 // CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] 1040 // CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 1041 // CHECK: then2: 1042 // CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 1043 // CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1044 // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 1045 // CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] 1046 // CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 1047 // CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 1048 // CHECK-NEXT: br label [[IFCONT4:%.*]] 1049 // CHECK: else3: 1050 // CHECK-NEXT: br label [[IFCONT4]] 1051 // CHECK: ifcont4: 1052 // CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 1053 // CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 1054 // CHECK-NEXT: br label [[PRECOND]] 1055 // CHECK: exit: 1056 // CHECK-NEXT: ret void 1057 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func 1058 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1059 // CHECK-NEXT: entry: 1060 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1061 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1062 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1063 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1064 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1065 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1066 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1067 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1068 // CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1069 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1070 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1071 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1072 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 1073 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 1074 // CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 1075 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] 1076 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) 1077 // CHECK-NEXT: ret void 1078 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func 1079 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1080 // CHECK-NEXT: entry: 1081 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1082 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1083 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1084 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1085 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1086 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1087 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1088 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1089 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1090 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1091 // CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1092 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1093 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1094 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1095 // CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 1096 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] 1097 // CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 1098 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1099 // CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] 1100 // CHECK-NEXT: ret void 1101 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func 1102 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1103 // CHECK-NEXT: entry: 1104 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1105 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1106 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1107 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1108 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1109 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1110 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1111 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1112 // CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1113 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1114 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1115 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1116 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 1117 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 1118 // CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 1119 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] 1120 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 128 [[TMP8]], i64 400, i1 false) 1121 // CHECK-NEXT: ret void 1122 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func 1123 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1124 // CHECK-NEXT: entry: 1125 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1126 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1127 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1128 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1129 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1130 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1131 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1132 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1133 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1134 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1135 // CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1136 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1137 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1138 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1139 // CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 1140 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] 1141 // CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 1142 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1143 // CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] 1144 // CHECK-NEXT: ret void 1145 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 1146 // IR-GPU-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { 1147 // IR-GPU-NEXT: entry: 1148 // IR-GPU-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1149 // IR-GPU-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1150 // IR-GPU-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) 1151 // IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 1152 // IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) 1153 // IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 1154 // IR-GPU-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 1155 // IR-GPU-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr 1156 // IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr 1157 // IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr 1158 // IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 1159 // IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 1160 // IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 1161 // IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) 1162 // IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 1163 // IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1164 // IR-GPU: user_code.entry: 1165 // IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 1166 // IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4 1167 // IR-GPU-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4 1168 // IR-GPU-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 1169 // IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 1170 // IR-GPU-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 1171 // IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] 1172 // IR-GPU-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) 1173 // IR-GPU-NEXT: ret void 1174 // IR-GPU: worker.exit: 1175 // IR-GPU-NEXT: ret void 1176 // 1177 // 1178 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined 1179 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 1180 // IR-GPU-NEXT: entry: 1181 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1182 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1183 // IR-GPU-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1184 // IR-GPU-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1185 // IR-GPU-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 1186 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) 1187 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) 1188 // IR-GPU-NEXT: [[_TMP2:%.*]] = alloca i32, align 4, addrspace(5) 1189 // IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) 1190 // IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) 1191 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) 1192 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) 1193 // IR-GPU-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) 1194 // IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) 1195 // IR-GPU-NEXT: [[J4:%.*]] = alloca i32, align 4, addrspace(5) 1196 // IR-GPU-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) 1197 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) 1198 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1199 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr 1200 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr 1201 // IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 1202 // IR-GPU-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 1203 // IR-GPU-NEXT: [[SUM1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM1]] to ptr 1204 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr 1205 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 1206 // IR-GPU-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr 1207 // IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr 1208 // IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr 1209 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr 1210 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr 1211 // IR-GPU-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr 1212 // IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr 1213 // IR-GPU-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr 1214 // IR-GPU-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr 1215 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr 1216 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1217 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1218 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 1219 // IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 1220 // IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 1221 // IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 1222 // IR-GPU-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1_ASCAST]], i32 0, i32 0, i32 0 1223 // IR-GPU-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 1224 // IR-GPU-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 1225 // IR-GPU-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 1226 // IR-GPU: omp.arrayinit.body: 1227 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 1228 // IR-GPU-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 1229 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 1230 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 1231 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 1232 // IR-GPU: omp.arrayinit.done: 1233 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1234 // IR-GPU-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1235 // IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1236 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 1237 // IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1238 // IR-GPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1239 // IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 1240 // IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) 1241 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1242 // IR-GPU-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 1243 // IR-GPU-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1244 // IR-GPU: cond.true: 1245 // IR-GPU-NEXT: br label [[COND_END:%.*]] 1246 // IR-GPU: cond.false: 1247 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1248 // IR-GPU-NEXT: br label [[COND_END]] 1249 // IR-GPU: cond.end: 1250 // IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 1251 // IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1252 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1253 // IR-GPU-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 1254 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1255 // IR-GPU: omp.inner.for.cond: 1256 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 1257 // IR-GPU-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 100 1258 // IR-GPU-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1259 // IR-GPU: omp.inner.for.body: 1260 // IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1261 // IR-GPU-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 1262 // IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1263 // IR-GPU-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 1264 // IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 1265 // IR-GPU-NEXT: store i32 [[TMP12]], ptr [[J_CASTED_ASCAST]], align 4 1266 // IR-GPU-NEXT: [[TMP13:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 1267 // IR-GPU-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 1268 // IR-GPU-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr 1269 // IR-GPU-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 1270 // IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 1271 // IR-GPU-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr 1272 // IR-GPU-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 1273 // IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 1274 // IR-GPU-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP13]] to ptr 1275 // IR-GPU-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 1276 // IR-GPU-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 1277 // IR-GPU-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP20]], align 8 1278 // IR-GPU-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1279 // IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 1280 // IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP22]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) 1281 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1282 // IR-GPU: omp.inner.for.inc: 1283 // IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 1284 // IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1285 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] 1286 // IR-GPU-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4 1287 // IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1288 // IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1289 // IR-GPU-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] 1290 // IR-GPU-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1291 // IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1292 // IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1293 // IR-GPU-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 1294 // IR-GPU-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1295 // IR-GPU-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1296 // IR-GPU-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP29]], 99 1297 // IR-GPU-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] 1298 // IR-GPU: cond.true9: 1299 // IR-GPU-NEXT: br label [[COND_END11:%.*]] 1300 // IR-GPU: cond.false10: 1301 // IR-GPU-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1302 // IR-GPU-NEXT: br label [[COND_END11]] 1303 // IR-GPU: cond.end11: 1304 // IR-GPU-NEXT: [[COND12:%.*]] = phi i32 [ 99, [[COND_TRUE9]] ], [ [[TMP30]], [[COND_FALSE10]] ] 1305 // IR-GPU-NEXT: store i32 [[COND12]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1306 // IR-GPU-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1307 // IR-GPU-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV_ASCAST]], align 4 1308 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] 1309 // IR-GPU: omp.inner.for.end: 1310 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1311 // IR-GPU: omp.loop.exit: 1312 // IR-GPU-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1313 // IR-GPU-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 1314 // IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]]) 1315 // IR-GPU-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 1316 // IR-GPU-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 1317 // IR-GPU-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 1318 // IR-GPU: .omp.lastprivate.then: 1319 // IR-GPU-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 1320 // IR-GPU-NEXT: [[TMP36:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 1321 // IR-GPU-NEXT: store i32 [[TMP36]], ptr [[J_ADDR_ASCAST]], align 4 1322 // IR-GPU-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 1323 // IR-GPU: .omp.lastprivate.done: 1324 // IR-GPU-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1325 // IR-GPU-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 1326 // IR-GPU-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1327 // IR-GPU-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP39]], align 8 1328 // IR-GPU-NEXT: [[TMP40:%.*]] = load ptr, ptr addrspace(1) @"_openmp_teams_reductions_buffer_$_$ptr", align 8 1329 // IR-GPU-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP38]], ptr [[TMP40]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.1, ptr @_omp_reduction_inter_warp_copy_func.2, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) 1330 // IR-GPU-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 1 1331 // IR-GPU-NEXT: br i1 [[TMP42]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 1332 // IR-GPU: .omp.reduction.then: 1333 // IR-GPU-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 1334 // IR-GPU-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP43]] 1335 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 1336 // IR-GPU: omp.arraycpy.body: 1337 // IR-GPU-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 1338 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] 1339 // IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 1340 // IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 1341 // IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] 1342 // IR-GPU-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 1343 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 1344 // IR-GPU-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 1345 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP43]] 1346 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] 1347 // IR-GPU: omp.arraycpy.done17: 1348 // IR-GPU-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP38]]) 1349 // IR-GPU-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 1350 // IR-GPU: .omp.reduction.done: 1351 // IR-GPU-NEXT: ret void 1352 // 1353 // 1354 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp_outlined 1355 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 1356 // IR-GPU-NEXT: entry: 1357 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1358 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1359 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1360 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1361 // IR-GPU-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1362 // IR-GPU-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1363 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) 1364 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) 1365 // IR-GPU-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5) 1366 // IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) 1367 // IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) 1368 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) 1369 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) 1370 // IR-GPU-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) 1371 // IR-GPU-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 1372 // IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) 1373 // IR-GPU-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) 1374 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1375 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr 1376 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr 1377 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr 1378 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr 1379 // IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 1380 // IR-GPU-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 1381 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr 1382 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 1383 // IR-GPU-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr 1384 // IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr 1385 // IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr 1386 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr 1387 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr 1388 // IR-GPU-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr 1389 // IR-GPU-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr 1390 // IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr 1391 // IR-GPU-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr 1392 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1393 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1394 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 1395 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 1396 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 1397 // IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 1398 // IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 1399 // IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 1400 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 1401 // IR-GPU-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4 1402 // IR-GPU-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 1403 // IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 1404 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 1405 // IR-GPU-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 1406 // IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 1407 // IR-GPU-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4 1408 // IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1409 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 1410 // IR-GPU-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i32 0, i32 0, i32 0 1411 // IR-GPU-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 1412 // IR-GPU-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 1413 // IR-GPU-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 1414 // IR-GPU: omp.arrayinit.body: 1415 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 1416 // IR-GPU-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 1417 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 1418 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 1419 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 1420 // IR-GPU: omp.arrayinit.done: 1421 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1422 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 1423 // IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) 1424 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 1425 // IR-GPU-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 1426 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1427 // IR-GPU: omp.inner.for.cond: 1428 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] 1429 // IR-GPU-NEXT: [[CONV6:%.*]] = sext i32 [[TMP7]] to i64 1430 // IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP7]] 1431 // IR-GPU-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV6]], [[TMP8]] 1432 // IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1433 // IR-GPU: omp.inner.for.body: 1434 // IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1435 // IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 10 1436 // IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 1437 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 1438 // IR-GPU-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1439 // IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1440 // IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1441 // IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP11]], 10 1442 // IR-GPU-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 1443 // IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL8]] 1444 // IR-GPU-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 1445 // IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 1446 // IR-GPU-NEXT: store i32 [[ADD10]], ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1447 // IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1448 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1449 // IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 1450 // IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i64 0, i64 [[IDXPROM]] 1451 // IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1452 // IR-GPU-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 1453 // IR-GPU-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 1454 // IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] 1455 // IR-GPU-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], [[TMP12]] 1456 // IR-GPU-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] 1457 // IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 1458 // IR-GPU: omp.body.continue: 1459 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1460 // IR-GPU: omp.inner.for.inc: 1461 // IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1462 // IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1463 // IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] 1464 // IR-GPU-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1465 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] 1466 // IR-GPU: omp.inner.for.end: 1467 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1468 // IR-GPU: omp.loop.exit: 1469 // IR-GPU-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1470 // IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 1471 // IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP19]]) 1472 // IR-GPU-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1473 // IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 1474 // IR-GPU-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1475 // IR-GPU-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP22]], align 8 1476 // IR-GPU-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP21]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) 1477 // IR-GPU-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP23]], 1 1478 // IR-GPU-NEXT: br i1 [[TMP24]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 1479 // IR-GPU: .omp.reduction.then: 1480 // IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 1481 // IR-GPU-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] 1482 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 1483 // IR-GPU: omp.arraycpy.body: 1484 // IR-GPU-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 1485 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 1486 // IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 1487 // IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 1488 // IR-GPU-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] 1489 // IR-GPU-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 1490 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 1491 // IR-GPU-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 1492 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] 1493 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 1494 // IR-GPU: omp.arraycpy.done19: 1495 // IR-GPU-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP21]]) 1496 // IR-GPU-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 1497 // IR-GPU: .omp.reduction.done: 1498 // IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 1499 // IR-GPU-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 1500 // IR-GPU-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 1501 // IR-GPU: .omp.lastprivate.then: 1502 // IR-GPU-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 1503 // IR-GPU-NEXT: [[TMP30:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 1504 // IR-GPU-NEXT: store i32 [[TMP30]], ptr [[J_ADDR_ASCAST]], align 4 1505 // IR-GPU-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 1506 // IR-GPU: .omp.lastprivate.done: 1507 // IR-GPU-NEXT: ret void 1508 // 1509 // 1510 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func 1511 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] { 1512 // IR-GPU-NEXT: entry: 1513 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1514 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) 1515 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) 1516 // IR-GPU-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) 1517 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1518 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 1519 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1520 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1521 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1522 // IR-GPU-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr 1523 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr 1524 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr 1525 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1526 // IR-GPU-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 1527 // IR-GPU-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 1528 // IR-GPU-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 1529 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1530 // IR-GPU-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 1531 // IR-GPU-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 1532 // IR-GPU-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 1533 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 1534 // IR-GPU-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 1535 // IR-GPU-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 1536 // IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 1537 // IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 1538 // IR-GPU: .shuffle.pre_cond: 1539 // IR-GPU-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 1540 // IR-GPU-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] 1541 // IR-GPU-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 1542 // IR-GPU-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 1543 // IR-GPU-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] 1544 // IR-GPU-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) 1545 // IR-GPU-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 1546 // IR-GPU-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 1547 // IR-GPU: .shuffle.then: 1548 // IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 1549 // IR-GPU-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() 1550 // IR-GPU-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1551 // IR-GPU-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) 1552 // IR-GPU-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 1553 // IR-GPU-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 1554 // IR-GPU-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 1555 // IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 1556 // IR-GPU: .shuffle.exit: 1557 // IR-GPU-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 1558 // IR-GPU-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 1559 // IR-GPU-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 1560 // IR-GPU-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] 1561 // IR-GPU-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] 1562 // IR-GPU-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 1563 // IR-GPU-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 1564 // IR-GPU-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 1565 // IR-GPU-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] 1566 // IR-GPU-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 1567 // IR-GPU-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] 1568 // IR-GPU-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] 1569 // IR-GPU-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] 1570 // IR-GPU-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] 1571 // IR-GPU: then: 1572 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] 1573 // IR-GPU-NEXT: br label [[IFCONT:%.*]] 1574 // IR-GPU: else: 1575 // IR-GPU-NEXT: br label [[IFCONT]] 1576 // IR-GPU: ifcont: 1577 // IR-GPU-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 1578 // IR-GPU-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] 1579 // IR-GPU-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] 1580 // IR-GPU-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 1581 // IR-GPU: then4: 1582 // IR-GPU-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 1583 // IR-GPU-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 1584 // IR-GPU-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 1585 // IR-GPU-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 1586 // IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) 1587 // IR-GPU-NEXT: br label [[IFCONT6:%.*]] 1588 // IR-GPU: else5: 1589 // IR-GPU-NEXT: br label [[IFCONT6]] 1590 // IR-GPU: ifcont6: 1591 // IR-GPU-NEXT: ret void 1592 // 1593 // 1594 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func 1595 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 1596 // IR-GPU-NEXT: entry: 1597 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1598 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1599 // IR-GPU-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 1600 // IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 1601 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1602 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1603 // IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr 1604 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1605 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1606 // IR-GPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1607 // IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1608 // IR-GPU-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 1609 // IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1610 // IR-GPU-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 1611 // IR-GPU-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1612 // IR-GPU-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1613 // IR-GPU-NEXT: br label [[PRECOND:%.*]] 1614 // IR-GPU: precond: 1615 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1616 // IR-GPU-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 1617 // IR-GPU-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] 1618 // IR-GPU: body: 1619 // IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP2]]) 1620 // IR-GPU-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 1621 // IR-GPU-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 1622 // IR-GPU: then: 1623 // IR-GPU-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1624 // IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 1625 // IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] 1626 // IR-GPU-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 1627 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 1628 // IR-GPU-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 1629 // IR-GPU-NEXT: br label [[IFCONT:%.*]] 1630 // IR-GPU: else: 1631 // IR-GPU-NEXT: br label [[IFCONT]] 1632 // IR-GPU: ifcont: 1633 // IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1634 // IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1635 // IR-GPU-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] 1636 // IR-GPU-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 1637 // IR-GPU: then2: 1638 // IR-GPU-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 1639 // IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1640 // IR-GPU-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 1641 // IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] 1642 // IR-GPU-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 1643 // IR-GPU-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 1644 // IR-GPU-NEXT: br label [[IFCONT4:%.*]] 1645 // IR-GPU: else3: 1646 // IR-GPU-NEXT: br label [[IFCONT4]] 1647 // IR-GPU: ifcont4: 1648 // IR-GPU-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 1649 // IR-GPU-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 1650 // IR-GPU-NEXT: br label [[PRECOND]] 1651 // IR-GPU: exit: 1652 // IR-GPU-NEXT: ret void 1653 // 1654 // 1655 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func.1 1656 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { 1657 // IR-GPU-NEXT: entry: 1658 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1659 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) 1660 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) 1661 // IR-GPU-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) 1662 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1663 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 1664 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1665 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1666 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1667 // IR-GPU-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr 1668 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr 1669 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr 1670 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1671 // IR-GPU-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 1672 // IR-GPU-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 1673 // IR-GPU-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 1674 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1675 // IR-GPU-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 1676 // IR-GPU-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 1677 // IR-GPU-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 1678 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 1679 // IR-GPU-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 1680 // IR-GPU-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 1681 // IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 1682 // IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 1683 // IR-GPU: .shuffle.pre_cond: 1684 // IR-GPU-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 1685 // IR-GPU-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] 1686 // IR-GPU-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 1687 // IR-GPU-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 1688 // IR-GPU-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] 1689 // IR-GPU-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) 1690 // IR-GPU-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 1691 // IR-GPU-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 1692 // IR-GPU: .shuffle.then: 1693 // IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 1694 // IR-GPU-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() 1695 // IR-GPU-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1696 // IR-GPU-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) 1697 // IR-GPU-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 1698 // IR-GPU-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 1699 // IR-GPU-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 1700 // IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 1701 // IR-GPU: .shuffle.exit: 1702 // IR-GPU-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 1703 // IR-GPU-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 1704 // IR-GPU-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 1705 // IR-GPU-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] 1706 // IR-GPU-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] 1707 // IR-GPU-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 1708 // IR-GPU-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 1709 // IR-GPU-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 1710 // IR-GPU-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] 1711 // IR-GPU-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 1712 // IR-GPU-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] 1713 // IR-GPU-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] 1714 // IR-GPU-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] 1715 // IR-GPU-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] 1716 // IR-GPU: then: 1717 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] 1718 // IR-GPU-NEXT: br label [[IFCONT:%.*]] 1719 // IR-GPU: else: 1720 // IR-GPU-NEXT: br label [[IFCONT]] 1721 // IR-GPU: ifcont: 1722 // IR-GPU-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 1723 // IR-GPU-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] 1724 // IR-GPU-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] 1725 // IR-GPU-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 1726 // IR-GPU: then4: 1727 // IR-GPU-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 1728 // IR-GPU-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 1729 // IR-GPU-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 1730 // IR-GPU-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 1731 // IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) 1732 // IR-GPU-NEXT: br label [[IFCONT6:%.*]] 1733 // IR-GPU: else5: 1734 // IR-GPU-NEXT: br label [[IFCONT6]] 1735 // IR-GPU: ifcont6: 1736 // IR-GPU-NEXT: ret void 1737 // 1738 // 1739 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func.2 1740 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 1741 // IR-GPU-NEXT: entry: 1742 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1743 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1744 // IR-GPU-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 1745 // IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 1746 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1747 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1748 // IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr 1749 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1750 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1751 // IR-GPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1752 // IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1753 // IR-GPU-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 1754 // IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1755 // IR-GPU-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 1756 // IR-GPU-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1757 // IR-GPU-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1758 // IR-GPU-NEXT: br label [[PRECOND:%.*]] 1759 // IR-GPU: precond: 1760 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1761 // IR-GPU-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 1762 // IR-GPU-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] 1763 // IR-GPU: body: 1764 // IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1765 // IR-GPU-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 1766 // IR-GPU-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 1767 // IR-GPU: then: 1768 // IR-GPU-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1769 // IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 1770 // IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] 1771 // IR-GPU-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 1772 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 1773 // IR-GPU-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 1774 // IR-GPU-NEXT: br label [[IFCONT:%.*]] 1775 // IR-GPU: else: 1776 // IR-GPU-NEXT: br label [[IFCONT]] 1777 // IR-GPU: ifcont: 1778 // IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1779 // IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1780 // IR-GPU-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] 1781 // IR-GPU-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 1782 // IR-GPU: then2: 1783 // IR-GPU-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 1784 // IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1785 // IR-GPU-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 1786 // IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] 1787 // IR-GPU-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 1788 // IR-GPU-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 1789 // IR-GPU-NEXT: br label [[IFCONT4:%.*]] 1790 // IR-GPU: else3: 1791 // IR-GPU-NEXT: br label [[IFCONT4]] 1792 // IR-GPU: ifcont4: 1793 // IR-GPU-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 1794 // IR-GPU-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 1795 // IR-GPU-NEXT: br label [[PRECOND]] 1796 // IR-GPU: exit: 1797 // IR-GPU-NEXT: ret void 1798 // 1799 // 1800 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func 1801 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1802 // IR-GPU-NEXT: entry: 1803 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1804 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1805 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1806 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1807 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1808 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1809 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1810 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1811 // IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1812 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1813 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1814 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1815 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 1816 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 1817 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 1818 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] 1819 // IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) 1820 // IR-GPU-NEXT: ret void 1821 // 1822 // 1823 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func 1824 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1825 // IR-GPU-NEXT: entry: 1826 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1827 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1828 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1829 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1830 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1831 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1832 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1833 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1834 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1835 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1836 // IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1837 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1838 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1839 // IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1840 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 1841 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] 1842 // IR-GPU-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 1843 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1844 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] 1845 // IR-GPU-NEXT: ret void 1846 // 1847 // 1848 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func 1849 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1850 // IR-GPU-NEXT: entry: 1851 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1852 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1853 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1854 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1855 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1856 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1857 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1858 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1859 // IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1860 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1861 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1862 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1863 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 1864 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 1865 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 1866 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] 1867 // IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 128 [[TMP8]], i64 400, i1 false) 1868 // IR-GPU-NEXT: ret void 1869 // 1870 // 1871 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func 1872 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1873 // IR-GPU-NEXT: entry: 1874 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1875 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1876 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1877 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1878 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1879 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1880 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1881 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1882 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1883 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1884 // IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1885 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1886 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1887 // IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1888 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 1889 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] 1890 // IR-GPU-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 1891 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1892 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] 1893 // IR-GPU-NEXT: ret void 1894 // 1895 // 1896 // IR-LABEL: define {{[^@]+}}@_Z3foov 1897 // IR-SAME: () #[[ATTR0:[0-9]+]] { 1898 // IR-NEXT: entry: 1899 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 1900 // IR-NEXT: [[J:%.*]] = alloca i32, align 4 1901 // IR-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 1902 // IR-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 1903 // IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 1904 // IR-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 1905 // IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 1906 // IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] 1907 // IR-NEXT: ret i32 0 1908 // 1909 // 1910 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 1911 // IR-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 1912 // IR-NEXT: entry: 1913 // IR-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 1914 // IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 1915 // IR-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 1916 // IR-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 1917 // IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 1918 // IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 1919 // IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 1920 // IR-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 1921 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 1922 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined, i64 [[TMP2]], ptr [[TMP0]]) 1923 // IR-NEXT: ret void 1924 // 1925 // 1926 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined 1927 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 1928 // IR-NEXT: entry: 1929 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1930 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1931 // IR-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 1932 // IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 1933 // IR-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 1934 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1935 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 1936 // IR-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 1937 // IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 1938 // IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 1939 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1940 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1941 // IR-NEXT: [[J3:%.*]] = alloca i32, align 4 1942 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 1943 // IR-NEXT: [[J4:%.*]] = alloca i32, align 4 1944 // IR-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 1945 // IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 1946 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 1947 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 1948 // IR-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 1949 // IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 1950 // IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 1951 // IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 1952 // IR-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 1953 // IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 1954 // IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 1955 // IR: omp.arrayinit.body: 1956 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 1957 // IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 1958 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 1959 // IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 1960 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 1961 // IR: omp.arrayinit.done: 1962 // IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 1963 // IR-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 1964 // IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 1965 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 1966 // IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 1967 // IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 1968 // IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 1969 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 1970 // IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 1971 // IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1972 // IR: cond.true: 1973 // IR-NEXT: br label [[COND_END:%.*]] 1974 // IR: cond.false: 1975 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 1976 // IR-NEXT: br label [[COND_END]] 1977 // IR: cond.end: 1978 // IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 1979 // IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 1980 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 1981 // IR-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 1982 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1983 // IR: omp.inner.for.cond: 1984 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 1985 // IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 1986 // IR-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] 1987 // IR-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1988 // IR: omp.inner.for.body: 1989 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 1990 // IR-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 1991 // IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 1992 // IR-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 1993 // IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 1994 // IR-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 1995 // IR-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 1996 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) 1997 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1998 // IR: omp.inner.for.inc: 1999 // IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 2000 // IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 2001 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] 2002 // IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 2003 // IR-NEXT: br label [[OMP_INNER_FOR_COND]] 2004 // IR: omp.inner.for.end: 2005 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2006 // IR: omp.loop.exit: 2007 // IR-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2008 // IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 2009 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) 2010 // IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 2011 // IR-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 2012 // IR-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 2013 // IR: .omp.lastprivate.then: 2014 // IR-NEXT: store i32 10, ptr [[J3]], align 4 2015 // IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 2016 // IR-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 2017 // IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 2018 // IR: .omp.lastprivate.done: 2019 // IR-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2020 // IR-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 2021 // IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2022 // IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 2023 // IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 2024 // IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 2025 // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 2026 // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 2027 // IR-NEXT: ] 2028 // IR: .omp.reduction.case1: 2029 // IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2030 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] 2031 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2032 // IR: omp.arraycpy.body: 2033 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2034 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2035 // IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 2036 // IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2037 // IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 2038 // IR-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 2039 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 2040 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2041 // IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] 2042 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] 2043 // IR: omp.arraycpy.done10: 2044 // IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 2045 // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2046 // IR: .omp.reduction.case2: 2047 // IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2048 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] 2049 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] 2050 // IR: omp.arraycpy.body12: 2051 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 2052 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 2053 // IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 2054 // IR-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 2055 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 2056 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 2057 // IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] 2058 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] 2059 // IR: omp.arraycpy.done18: 2060 // IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 2061 // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2062 // IR: .omp.reduction.default: 2063 // IR-NEXT: ret void 2064 // 2065 // 2066 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined 2067 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 2068 // IR-NEXT: entry: 2069 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2070 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2071 // IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 2072 // IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 2073 // IR-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 2074 // IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 2075 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2076 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 2077 // IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 2078 // IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2079 // IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2080 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2081 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2082 // IR-NEXT: [[J3:%.*]] = alloca i32, align 4 2083 // IR-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 2084 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 2085 // IR-NEXT: [[J5:%.*]] = alloca i32, align 4 2086 // IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 2087 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 2088 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 2089 // IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 2090 // IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 2091 // IR-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 2092 // IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 2093 // IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 2094 // IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 2095 // IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 2096 // IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 2097 // IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 2098 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 2099 // IR-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 2100 // IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 2101 // IR-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 2102 // IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 2103 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 2104 // IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 2105 // IR-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 2106 // IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 2107 // IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 2108 // IR: omp.arrayinit.body: 2109 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 2110 // IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2111 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2112 // IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 2113 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 2114 // IR: omp.arrayinit.done: 2115 // IR-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2116 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 2117 // IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 2118 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 2119 // IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 2120 // IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2121 // IR: cond.true: 2122 // IR-NEXT: br label [[COND_END:%.*]] 2123 // IR: cond.false: 2124 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 2125 // IR-NEXT: br label [[COND_END]] 2126 // IR: cond.end: 2127 // IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] 2128 // IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 2129 // IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 2130 // IR-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 2131 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2132 // IR: omp.inner.for.cond: 2133 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] 2134 // IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] 2135 // IR-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] 2136 // IR-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2137 // IR: omp.inner.for.body: 2138 // IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2139 // IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 2140 // IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 2141 // IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 2142 // IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2143 // IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2144 // IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2145 // IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 2146 // IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 2147 // IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] 2148 // IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 2149 // IR-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 2150 // IR-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 2151 // IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2152 // IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2153 // IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 2154 // IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] 2155 // IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 2156 // IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 2157 // IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 2158 // IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 2159 // IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] 2160 // IR-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 2161 // IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2162 // IR: omp.body.continue: 2163 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2164 // IR: omp.inner.for.inc: 2165 // IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2166 // IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 2167 // IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2168 // IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] 2169 // IR: omp.inner.for.end: 2170 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2171 // IR: omp.loop.exit: 2172 // IR-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2173 // IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 2174 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) 2175 // IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2176 // IR-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 2177 // IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2178 // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 2179 // IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 2180 // IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 2181 // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 2182 // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 2183 // IR-NEXT: ] 2184 // IR: .omp.reduction.case1: 2185 // IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2186 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] 2187 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2188 // IR: omp.arraycpy.body: 2189 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2190 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2191 // IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 2192 // IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2193 // IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] 2194 // IR-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 2195 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 2196 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2197 // IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] 2198 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 2199 // IR: omp.arraycpy.done19: 2200 // IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 2201 // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2202 // IR: .omp.reduction.case2: 2203 // IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2204 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] 2205 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] 2206 // IR: omp.arraycpy.body21: 2207 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 2208 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 2209 // IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 2210 // IR-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 2211 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 2212 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 2213 // IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] 2214 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] 2215 // IR: omp.arraycpy.done27: 2216 // IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 2217 // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2218 // IR: .omp.reduction.default: 2219 // IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 2220 // IR-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 2221 // IR-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 2222 // IR: .omp.lastprivate.then: 2223 // IR-NEXT: store i32 10, ptr [[J3]], align 4 2224 // IR-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 2225 // IR-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 2226 // IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 2227 // IR: .omp.lastprivate.done: 2228 // IR-NEXT: ret void 2229 // 2230 // 2231 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func 2232 // IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 2233 // IR-NEXT: entry: 2234 // IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 2235 // IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 2236 // IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 2237 // IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 2238 // IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 2239 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 2240 // IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 2241 // IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 2242 // IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 2243 // IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 2244 // IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 2245 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 2246 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2247 // IR: omp.arraycpy.body: 2248 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2249 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2250 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2251 // IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2252 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 2253 // IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2254 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2255 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2256 // IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 2257 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 2258 // IR: omp.arraycpy.done2: 2259 // IR-NEXT: ret void 2260 // 2261 // 2262 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func 2263 // IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { 2264 // IR-NEXT: entry: 2265 // IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 2266 // IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 2267 // IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 2268 // IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 2269 // IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 2270 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 2271 // IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 2272 // IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 2273 // IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 2274 // IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 2275 // IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 2276 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 2277 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2278 // IR: omp.arraycpy.body: 2279 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2280 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2281 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2282 // IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2283 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 2284 // IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2285 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2286 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2287 // IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 2288 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 2289 // IR: omp.arraycpy.done2: 2290 // IR-NEXT: ret void 2291 // 2292 // 2293 // IR-PCH-LABEL: define {{[^@]+}}@_Z3foov 2294 // IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { 2295 // IR-PCH-NEXT: entry: 2296 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 2297 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 2298 // IR-PCH-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 2299 // IR-PCH-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 2300 // IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 2301 // IR-PCH-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 2302 // IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 2303 // IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] 2304 // IR-PCH-NEXT: ret i32 0 2305 // 2306 // 2307 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 2308 // IR-PCH-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 2309 // IR-PCH-NEXT: entry: 2310 // IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 2311 // IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 2312 // IR-PCH-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 2313 // IR-PCH-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 2314 // IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 2315 // IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 2316 // IR-PCH-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 2317 // IR-PCH-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 2318 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 2319 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined, i64 [[TMP2]], ptr [[TMP0]]) 2320 // IR-PCH-NEXT: ret void 2321 // 2322 // 2323 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined 2324 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 2325 // IR-PCH-NEXT: entry: 2326 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2327 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2328 // IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 2329 // IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 2330 // IR-PCH-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 2331 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2332 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 2333 // IR-PCH-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 2334 // IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 2335 // IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 2336 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2337 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2338 // IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 2339 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 2340 // IR-PCH-NEXT: [[J4:%.*]] = alloca i32, align 4 2341 // IR-PCH-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 2342 // IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 2343 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 2344 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 2345 // IR-PCH-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 2346 // IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 2347 // IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 2348 // IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 2349 // IR-PCH-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 2350 // IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 2351 // IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 2352 // IR-PCH: omp.arrayinit.body: 2353 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 2354 // IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2355 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2356 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 2357 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 2358 // IR-PCH: omp.arrayinit.done: 2359 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 2360 // IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 2361 // IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 2362 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 2363 // IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2364 // IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 2365 // IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 2366 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 2367 // IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 2368 // IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2369 // IR-PCH: cond.true: 2370 // IR-PCH-NEXT: br label [[COND_END:%.*]] 2371 // IR-PCH: cond.false: 2372 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 2373 // IR-PCH-NEXT: br label [[COND_END]] 2374 // IR-PCH: cond.end: 2375 // IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 2376 // IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 2377 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 2378 // IR-PCH-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 2379 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2380 // IR-PCH: omp.inner.for.cond: 2381 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 2382 // IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 2383 // IR-PCH-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] 2384 // IR-PCH-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2385 // IR-PCH: omp.inner.for.body: 2386 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 2387 // IR-PCH-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 2388 // IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 2389 // IR-PCH-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 2390 // IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 2391 // IR-PCH-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 2392 // IR-PCH-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 2393 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) 2394 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2395 // IR-PCH: omp.inner.for.inc: 2396 // IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 2397 // IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 2398 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] 2399 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 2400 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] 2401 // IR-PCH: omp.inner.for.end: 2402 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2403 // IR-PCH: omp.loop.exit: 2404 // IR-PCH-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2405 // IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 2406 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) 2407 // IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 2408 // IR-PCH-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 2409 // IR-PCH-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 2410 // IR-PCH: .omp.lastprivate.then: 2411 // IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 2412 // IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 2413 // IR-PCH-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 2414 // IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 2415 // IR-PCH: .omp.lastprivate.done: 2416 // IR-PCH-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2417 // IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 2418 // IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2419 // IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 2420 // IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 2421 // IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 2422 // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 2423 // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 2424 // IR-PCH-NEXT: ] 2425 // IR-PCH: .omp.reduction.case1: 2426 // IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2427 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] 2428 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2429 // IR-PCH: omp.arraycpy.body: 2430 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2431 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2432 // IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 2433 // IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2434 // IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 2435 // IR-PCH-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 2436 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 2437 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2438 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] 2439 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] 2440 // IR-PCH: omp.arraycpy.done10: 2441 // IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 2442 // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2443 // IR-PCH: .omp.reduction.case2: 2444 // IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2445 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] 2446 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] 2447 // IR-PCH: omp.arraycpy.body12: 2448 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 2449 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 2450 // IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 2451 // IR-PCH-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 2452 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 2453 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 2454 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] 2455 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] 2456 // IR-PCH: omp.arraycpy.done18: 2457 // IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 2458 // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2459 // IR-PCH: .omp.reduction.default: 2460 // IR-PCH-NEXT: ret void 2461 // 2462 // 2463 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined 2464 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 2465 // IR-PCH-NEXT: entry: 2466 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2467 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2468 // IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 2469 // IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 2470 // IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 2471 // IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 2472 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2473 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 2474 // IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 2475 // IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2476 // IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2477 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2478 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2479 // IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 2480 // IR-PCH-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 2481 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 2482 // IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4 2483 // IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 2484 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 2485 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 2486 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 2487 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 2488 // IR-PCH-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 2489 // IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 2490 // IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 2491 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 2492 // IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 2493 // IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 2494 // IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 2495 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 2496 // IR-PCH-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 2497 // IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 2498 // IR-PCH-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 2499 // IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 2500 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 2501 // IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 2502 // IR-PCH-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 2503 // IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 2504 // IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 2505 // IR-PCH: omp.arrayinit.body: 2506 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 2507 // IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2508 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2509 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 2510 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 2511 // IR-PCH: omp.arrayinit.done: 2512 // IR-PCH-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2513 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 2514 // IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 2515 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 2516 // IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 2517 // IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2518 // IR-PCH: cond.true: 2519 // IR-PCH-NEXT: br label [[COND_END:%.*]] 2520 // IR-PCH: cond.false: 2521 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 2522 // IR-PCH-NEXT: br label [[COND_END]] 2523 // IR-PCH: cond.end: 2524 // IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] 2525 // IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 2526 // IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 2527 // IR-PCH-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 2528 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2529 // IR-PCH: omp.inner.for.cond: 2530 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] 2531 // IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] 2532 // IR-PCH-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] 2533 // IR-PCH-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2534 // IR-PCH: omp.inner.for.body: 2535 // IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2536 // IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 2537 // IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 2538 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 2539 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2540 // IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2541 // IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2542 // IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 2543 // IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 2544 // IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] 2545 // IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 2546 // IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 2547 // IR-PCH-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 2548 // IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2549 // IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2550 // IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 2551 // IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] 2552 // IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 2553 // IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 2554 // IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 2555 // IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 2556 // IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] 2557 // IR-PCH-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 2558 // IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2559 // IR-PCH: omp.body.continue: 2560 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2561 // IR-PCH: omp.inner.for.inc: 2562 // IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2563 // IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 2564 // IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2565 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] 2566 // IR-PCH: omp.inner.for.end: 2567 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2568 // IR-PCH: omp.loop.exit: 2569 // IR-PCH-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2570 // IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 2571 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) 2572 // IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2573 // IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 2574 // IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2575 // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 2576 // IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 2577 // IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 2578 // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 2579 // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 2580 // IR-PCH-NEXT: ] 2581 // IR-PCH: .omp.reduction.case1: 2582 // IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2583 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] 2584 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2585 // IR-PCH: omp.arraycpy.body: 2586 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2587 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2588 // IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 2589 // IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2590 // IR-PCH-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] 2591 // IR-PCH-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 2592 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 2593 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2594 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] 2595 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 2596 // IR-PCH: omp.arraycpy.done19: 2597 // IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 2598 // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2599 // IR-PCH: .omp.reduction.case2: 2600 // IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2601 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] 2602 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] 2603 // IR-PCH: omp.arraycpy.body21: 2604 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 2605 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 2606 // IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 2607 // IR-PCH-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 2608 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 2609 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 2610 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] 2611 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] 2612 // IR-PCH: omp.arraycpy.done27: 2613 // IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 2614 // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2615 // IR-PCH: .omp.reduction.default: 2616 // IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 2617 // IR-PCH-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 2618 // IR-PCH-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 2619 // IR-PCH: .omp.lastprivate.then: 2620 // IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 2621 // IR-PCH-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 2622 // IR-PCH-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 2623 // IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 2624 // IR-PCH: .omp.lastprivate.done: 2625 // IR-PCH-NEXT: ret void 2626 // 2627 // 2628 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func 2629 // IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 2630 // IR-PCH-NEXT: entry: 2631 // IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 2632 // IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 2633 // IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 2634 // IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 2635 // IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 2636 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 2637 // IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 2638 // IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 2639 // IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 2640 // IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 2641 // IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 2642 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 2643 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2644 // IR-PCH: omp.arraycpy.body: 2645 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2646 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2647 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2648 // IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2649 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 2650 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2651 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2652 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2653 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 2654 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 2655 // IR-PCH: omp.arraycpy.done2: 2656 // IR-PCH-NEXT: ret void 2657 // 2658 // 2659 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func 2660 // IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { 2661 // IR-PCH-NEXT: entry: 2662 // IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 2663 // IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 2664 // IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 2665 // IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 2666 // IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 2667 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 2668 // IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 2669 // IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 2670 // IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 2671 // IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 2672 // IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 2673 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 2674 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2675 // IR-PCH: omp.arraycpy.body: 2676 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2677 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2678 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2679 // IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2680 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 2681 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2682 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2683 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2684 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 2685 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 2686 // IR-PCH: omp.arraycpy.done2: 2687 // IR-PCH-NEXT: ret void 2688 // 2689