1 // Only test codegen on target side, as private clause does not require any action on the host side 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s 6 // RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 7 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 8 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 9 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s 10 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 11 12 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc 13 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s 14 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s 15 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 16 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 17 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s 18 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s 19 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 20 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} 21 22 // expected-no-diagnostics 23 #ifndef HEADER 24 #define HEADER 25 26 template<typename tx, typename ty> 27 struct TT{ 28 tx X; 29 ty Y; 30 }; 31 32 // TCHECK: [[TT:%.+]] = type { i64, i8 } 33 // TCHECK: [[S1:%.+]] = type { double } 34 35 int foo(int n) { 36 int a = 0; 37 short aa = 0; 38 float b[10]; 39 float bn[n]; 40 double c[5][10]; 41 double cn[5][n]; 42 TT<long long, char> d; 43 44 #pragma omp target private(a) 45 { 46 } 47 48 // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}) 49 // TCHECK: [[DYN_PTR:%.+]] = alloca ptr 50 // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, 51 // TCHECK-NOT: store {{.+}}, {{.+}} [[A]], 52 // TCHECK: ret void 53 54 #pragma omp target private(a) 55 { 56 a = 1; 57 } 58 59 // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}) 60 // TCHECK: [[DYN_PTR:%.+]] = alloca ptr 61 // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, 62 // TCHECK: store i{{[0-9]+}} 1, ptr [[A]], 63 // TCHECK: ret void 64 65 #pragma omp target private(a, aa) 66 { 67 a = 1; 68 aa = 1; 69 } 70 71 // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}) 72 // TCHECK: [[DYN_PTR:%.+]] = alloca ptr 73 // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, 74 // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, 75 // TCHECK: store i{{[0-9]+}} 1, ptr [[A]], 76 // TCHECK: store i{{[0-9]+}} 1, ptr [[A2]], 77 // TCHECK: ret void 78 79 #pragma omp target private(a, b, bn, c, cn, d) 80 { 81 a = 1; 82 b[2] = 1.0; 83 bn[3] = 1.0; 84 c[1][2] = 1.0; 85 cn[1][3] = 1.0; 86 d.X = 1; 87 d.Y = 1; 88 } 89 // make sure that private variables are generated in all cases and that we use those instances for operations inside the 90 // target region 91 // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}} noundef [[VLA3:%.+]]) 92 // TCHECK: [[DYN_PTR:%.+]] = alloca ptr 93 // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, 94 // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}}, 95 // TCHECK: [[VLA_ADDR4:%.+]] = alloca i{{[0-9]+}}, 96 // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, 97 // TCHECK: [[B:%.+]] = alloca [10 x float], 98 // TCHECK: [[SSTACK:%.+]] = alloca ptr, 99 // TCHECK: [[C:%.+]] = alloca [5 x [10 x double]], 100 // TCHECK: [[D:%.+]] = alloca [[TT]], 101 // TCHECK: store i{{[0-9]+}} [[VLA]], ptr [[VLA_ADDR]], 102 // TCHECK: store i{{[0-9]+}} [[VLA1]], ptr [[VLA_ADDR2]], 103 // TCHECK: store i{{[0-9]+}} [[VLA3]], ptr [[VLA_ADDR4]], 104 // TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR]], 105 // TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR2]], 106 // TCHECK: [[VLA_ADDR_REF4:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR4]], 107 // TCHECK: [[RET_STACK:%.+]] = call ptr @llvm.stacksave.p0() 108 // TCHECK: store ptr [[RET_STACK]], ptr [[SSTACK]], 109 // TCHECK: [[VLA5:%.+]] = alloca float, i{{[0-9]+}} [[VLA_ADDR_REF]], 110 // TCHECK: [[VLA6_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF2]], [[VLA_ADDR_REF4]] 111 // TCHECK: [[VLA6:%.+]] = alloca double, i{{[0-9]+}} [[VLA6_SIZE]], 112 113 // a = 1 114 // TCHECK: store i{{[0-9]+}} 1, ptr [[A]], 115 116 // b[2] = 1.0 117 // TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x float], ptr [[B]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 118 // TCHECK: store float 1.0{{.*}}, ptr [[B_GEP]], 119 120 // bn[3] = 1.0 121 // TCHECK: [[BN_GEP:%.+]] = getelementptr inbounds float, ptr [[VLA5]], i{{[0-9]+}} 3 122 // TCHECK: store float 1.0{{.*}}, ptr [[BN_GEP]], 123 124 // c[1][2] = 1.0 125 // TCHECK: [[C_GEP1:%.+]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 126 // TCHECK: [[C_GEP2:%.+]] = getelementptr inbounds [10 x double], ptr [[C_GEP1]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 127 // TCHECK: store double 1.0{{.*}}, ptr [[C_GEP2]], 128 129 // cn[1][3] = 1.0 130 // TCHECK: [[CN_IND:%.+]] = mul{{.+}} i{{[0-9]+}} 1, [[VLA_ADDR_REF4]] 131 // TCHECK: [[CN_GEP_IND:%.+]] = getelementptr inbounds double, ptr [[VLA6]], i{{[0-9]+}} [[CN_IND]] 132 // TCHECK: [[CN_GEP_3:%.+]] = getelementptr inbounds double, ptr [[CN_GEP_IND]], i{{[0-9]+}} 3 133 // TCHECK: store double 1.0{{.*}}, ptr [[CN_GEP_3]], 134 135 // d.X = 1 136 // [[X_FIELD:%.+]] = getelementptr inbounds nuw [[TT]] ptr [[D]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 137 // store i{{[0-9]+}} 1, ptr [[X_FIELD]], 138 139 // d.Y = 1 140 // [[Y_FIELD:%.+]] = getelementptr inbounds nuw [[TT]] ptr [[D]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 141 // store i{{[0-9]+}} 1, ptr [[Y_FIELD]], 142 143 // finish 144 // [[RELOAD_SSTACK:%.+]] = load ptr, ptr [[SSTACK]], 145 // call ovid @llvm.stackrestore.p0(ptr [[RELOAD_SSTACK]]) 146 // ret void 147 148 return a; 149 } 150 151 152 template<typename tx> 153 tx ftemplate(int n) { 154 tx a = 0; 155 short aa = 0; 156 tx b[10]; 157 158 #pragma omp target private(a,aa,b) 159 { 160 a = 1; 161 aa = 1; 162 b[2] = 1; 163 } 164 165 return a; 166 } 167 168 static 169 int fstatic(int n) { 170 int a = 0; 171 short aa = 0; 172 char aaa = 0; 173 int b[10]; 174 175 #pragma omp target private(a,aa,aaa,b) 176 { 177 a = 1; 178 aa = 1; 179 aaa = 1; 180 b[2] = 1; 181 } 182 183 return a; 184 } 185 186 // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}) 187 // TCHECK: [[DYN_PTR:%.+]] = alloca ptr 188 // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, 189 // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, 190 // TCHECK: [[A3:%.+]] = alloca i{{[0-9]+}}, 191 // TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}], 192 // TCHECK: store i{{[0-9]+}} 1, ptr [[A]], 193 // TCHECK: store i{{[0-9]+}} 1, ptr [[A2]], 194 // TCHECK: store i{{[0-9]+}} 1, ptr [[A3]], 195 // TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], ptr [[B]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 196 // TCHECK: store i{{[0-9]+}} 1, ptr [[B_GEP]], 197 // TCHECK: ret void 198 199 struct S1 { 200 double a; 201 202 int r1(int n){ 203 int b = n+1; 204 short int c[2][n]; 205 206 #pragma omp target private(b,c) 207 { 208 this->a = (double)b + 1.5; 209 c[1][1] = ++a; 210 } 211 212 return c[1][1] + (int)b; 213 } 214 215 // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[TH:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]]) 216 // TCHECK: [[DYN_PTR:%.+]] = alloca ptr 217 // TCHECK: [[TH_ADDR:%.+]] = alloca ptr, 218 // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, 219 // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}}, 220 // TCHECK: [[B:%.+]] = alloca i{{[0-9]+}}, 221 // TCHECK: [[SSTACK:%.+]] = alloca ptr, 222 // TCHECK: store ptr [[TH]], ptr [[TH_ADDR]], 223 // TCHECK: store i{{[0-9]+}} [[VLA]], ptr [[VLA_ADDR]], 224 // TCHECK: store i{{[0-9]+}} [[VLA1]], ptr [[VLA_ADDR2]], 225 // TCHECK: [[TH_ADDR_REF:%.+]] = load ptr, ptr [[TH_ADDR]], 226 // TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR]], 227 // TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR2]], 228 // TCHECK: [[RET_STACK:%.+]] = call ptr @llvm.stacksave.p0() 229 // TCHECK: store ptr [[RET_STACK:%.+]], ptr [[SSTACK]], 230 231 // this->a = (double)b + 1.5; 232 // TCHECK: [[VLA_IND:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF]], [[VLA_ADDR_REF2]] 233 // TCHECK: [[VLA3:%.+]] = alloca i{{[0-9]+}}, i{{[0-9]+}} [[VLA_IND]], 234 // TCHECK: [[B_VAL:%.+]] = load i{{[0-9]+}}, ptr [[B]], 235 // TCHECK: [[B_CONV:%.+]] = sitofp i{{[0-9]+}} [[B_VAL]] to double 236 // TCHECK: [[NEW_A_VAL:%.+]] = fadd double [[B_CONV]], 1.5{{.+}}+00 237 // TCHECK: [[A_FIELD:%.+]] = getelementptr inbounds nuw [[S1]], ptr [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 238 // TCHECK: store double [[NEW_A_VAL]], ptr [[A_FIELD]], 239 240 // c[1][1] = ++a; 241 // TCHECK: [[A_FIELD4:%.+]] = getelementptr inbounds nuw [[S1]], ptr [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 242 // TCHECK: [[A_FIELD4_VAL:%.+]] = load double, ptr [[A_FIELD4]], 243 // TCHECK: [[A_FIELD_INC:%.+]] = fadd double [[A_FIELD4_VAL]], 1.0{{.+}}+00 244 // TCHECK: store double [[A_FIELD_INC]], ptr [[A_FIELD4]], 245 // TCHECK: [[A_FIELD_INC_CONV:%.+]] = fptosi double [[A_FIELD_INC]] to i{{[0-9]+}} 246 // TCHECK: [[C_IND:%.+]] = mul{{.+}} i{{[0-9]+}} 1, [[VLA_ADDR_REF2]] 247 // TCHECK: [[C_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, ptr [[VLA3]], i{{[0-9]+}} [[C_IND]] 248 // TCHECK: [[C_1_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, ptr [[C_1_REF]], i{{[0-9]+}} 1 249 // TCHECK: store i{{[0-9]+}} [[A_FIELD_INC_CONV]], ptr [[C_1_1_REF]], 250 251 // finish 252 // TCHECK: [[RELOAD_SSTACK:%.+]] = load ptr, ptr [[SSTACK]], 253 // TCHECK: call void @llvm.stackrestore.p0(ptr [[RELOAD_SSTACK]]) 254 // TCHECK: ret void 255 }; 256 257 258 int bar(int n){ 259 int a = 0; 260 a += foo(n); 261 S1 S; 262 a += S.r1(n); 263 a += fstatic(n); 264 a += ftemplate<int>(n); 265 266 return a; 267 } 268 269 // template 270 // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}) 271 // TCHECK: [[DYN_PTR:%.+]] = alloca ptr 272 // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, 273 // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, 274 // TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}], 275 // TCHECK: store i{{[0-9]+}} 1, ptr [[A]], 276 // TCHECK: store i{{[0-9]+}} 1, ptr [[A2]], 277 // TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], ptr [[B]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 278 // TCHECK: store i{{[0-9]+}} 1, ptr [[B_GEP]], 279 // TCHECK: ret void 280 281 #endif 282