1 // Test target codegen - host bc file has to be created first. 2 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 3 // RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 4 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 5 // RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 6 // expected-no-diagnostics 7 #ifndef HEADER 8 #define HEADER 9 10 template <typename tx, typename ty> 11 struct TT { 12 tx X; 13 ty Y; 14 }; 15 16 // TCHECK-DAG: [[TTII:%.+]] = type { i32, i32 } 17 // TCHECK-DAG: [[TT:%.+]] = type { i64, i8 } 18 // TCHECK-DAG: [[S1:%.+]] = type { double } 19 20 int foo(int n, double *ptr) { 21 int a = 0; 22 short aa = 0; 23 float b[10]; 24 double c[5][10]; 25 TT<long long, char> d; 26 const TT<int, int> e = {n, n}; 27 28 #pragma omp target firstprivate(a, e) map(tofrom \ 29 : b) 30 { 31 b[a] = a; 32 b[a] += e.X; 33 } 34 35 // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr addrspace(1) noalias noundef [[B_IN:%.+]], i{{[0-9]+}} noundef [[A_IN:%.+]], ptr noalias noundef [[E_IN:%.+]]) 36 // TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr 37 // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, 38 // TCHECK-NOT: alloca [[TTII]], 39 // TCHECK: alloca i{{[0-9]+}}, 40 // TCHECK: store i{{[0-9]+}} [[A_IN]], ptr [[A_ADDR]], 41 // TCHECK: ret void 42 43 #pragma omp target firstprivate(aa, b, c, d) 44 { 45 aa += 1; 46 b[2] = 1.0; 47 c[1][2] = 1.0; 48 d.X = 1; 49 d.Y = 1; 50 } 51 52 // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the 53 // target region 54 // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, i{{[0-9]+}}{{.*}} [[A2_IN:%.+]], ptr{{.*}} [[B_IN:%.+]], ptr{{.*}} [[C_IN:%.+]], ptr{{.*}} [[D_IN:%.+]]) 55 // TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr 56 // TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}}, 57 // TCHECK: [[B_ADDR:%.+]] = alloca ptr, 58 // TCHECK: [[C_ADDR:%.+]] = alloca ptr, 59 // TCHECK: [[D_ADDR:%.+]] = alloca ptr, 60 // TCHECK-NOT: alloca i{{[0-9]+}}, 61 // TCHECK: [[B_PRIV:%.+]] = alloca [10 x float], 62 // TCHECK: [[C_PRIV:%.+]] = alloca [5 x [10 x double]], 63 // TCHECK: [[D_PRIV:%.+]] = alloca [[TT]], 64 // TCHECK: store i{{[0-9]+}} [[A2_IN]], ptr [[A2_ADDR]], 65 // TCHECK: store ptr [[B_IN]], ptr [[B_ADDR]], 66 // TCHECK: store ptr [[C_IN]], ptr [[C_ADDR]], 67 // TCHECK: store ptr [[D_IN]], ptr [[D_ADDR]], 68 // TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr [[B_ADDR]], 69 // TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr % 70 // TCHECK: [[C_ADDR_REF:%.+]] = load ptr, ptr [[C_ADDR]], 71 // TCHECK: [[C_ADDR_REF:%.+]] = load ptr, ptr % 72 // TCHECK: [[D_ADDR_REF:%.+]] = load ptr, ptr [[D_ADDR]], 73 // TCHECK: [[D_ADDR_REF:%.+]] = load ptr, ptr % 74 75 // firstprivate(aa): a_priv = a_in 76 77 // firstprivate(b): memcpy(b_priv,b_in) 78 // TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[B_PRIV]], ptr align {{[0-9]+}} [[B_ADDR_REF]], {{.+}}) 79 80 // firstprivate(c) 81 // TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[C_PRIV]], ptr align {{[0-9]+}} [[C_ADDR_REF]],{{.+}}) 82 83 // firstprivate(d) 84 // TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[D_PRIV]], ptr align {{[0-9]+}} [[D_ADDR_REF]],{{.+}}) 85 86 // TCHECK: load i16, ptr [[A2_ADDR]], 87 88 #pragma omp target firstprivate(ptr) 89 { 90 ptr[0]++; 91 } 92 93 // TCHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[PTR_IN:%.+]]) 94 // TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr, 95 // TCHECK: [[PTR_ADDR:%.+]] = alloca ptr, 96 // TCHECK-NOT: alloca ptr, 97 // TCHECK: store ptr [[PTR_IN]], ptr [[PTR_ADDR]], 98 // TCHECK: [[PTR_IN_REF:%.+]] = load ptr, ptr [[PTR_ADDR]], 99 // TCHECK-NOT: store ptr [[PTR_IN_REF]], ptr {{%.+}}, 100 101 return a; 102 } 103 104 template <typename tx> 105 tx ftemplate(int n) { 106 tx a = 0; 107 tx b[10]; 108 109 #pragma omp target firstprivate(a, b) 110 { 111 a += 1; 112 b[2] += 1; 113 } 114 115 return a; 116 } 117 118 static int fstatic(int n) { 119 int a = 0; 120 char aaa = 0; 121 int b[10]; 122 123 #pragma omp target firstprivate(a, aaa, b) 124 { 125 a += 1; 126 aaa += 1; 127 b[2] += 1; 128 } 129 130 return a; 131 } 132 133 template <typename tx> 134 void fconst(const tx t) { 135 #pragma omp target firstprivate(t) 136 { } 137 } 138 139 // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, i{{[0-9]+}}{{.*}} [[A_IN:%.+]], i{{[0-9]+}}{{.*}} [[A3_IN:%.+]], ptr {{.+}} [[B_IN:%.+]]) 140 // TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr 141 // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, 142 // TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, 143 // TCHECK: [[B_ADDR:%.+]] = alloca ptr, 144 // TCHECK-NOT: alloca i{{[0-9]+}}, 145 // TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], 146 // TCHECK: store i{{[0-9]+}} [[A_IN]], ptr [[A_ADDR]], 147 // TCHECK: store i{{[0-9]+}} [[A3_IN]], ptr [[A3_ADDR]], 148 // TCHECK: store ptr [[B_IN]], ptr [[B_ADDR]], 149 // TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr [[B_ADDR]], 150 // TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr % 151 152 // firstprivate(a): a_priv = a_in 153 154 // firstprivate(aaa) 155 156 // TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, ptr 157 158 // firstprivate(b) 159 // TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[B_PRIV]], ptr align {{[0-9]+}} [[B_ADDR_REF]],{{.+}}) 160 // TCHECK: ret void 161 162 struct S1 { 163 double a; 164 165 int r1(int n) { 166 int b = n + 1; 167 168 #pragma omp target firstprivate(b) 169 { 170 this->a = (double)b + 1.5; 171 } 172 173 return (int)b; 174 } 175 176 // TCHECK: define internal void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[TH:%.+]], i{{[0-9]+}} noundef [[B_IN:%.+]]) 177 // TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr 178 // TCHECK: [[TH_ADDR:%.+]] = alloca ptr, 179 // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}, 180 // TCHECK-NOT: alloca i{{[0-9]+}}, 181 182 // TCHECK: store ptr [[TH]], ptr [[TH_ADDR]], 183 // TCHECK: store i{{[0-9]+}} [[B_IN]], ptr [[B_ADDR]], 184 // TCHECK: [[TH_ADDR_REF:%.+]] = load ptr, ptr [[TH_ADDR]], 185 186 // firstprivate(b) 187 // TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, ptr 188 189 // TCHECK: ret void 190 }; 191 192 int bar(int n, double *ptr) { 193 int a = 0; 194 a += foo(n, ptr); 195 S1 S; 196 a += S.r1(n); 197 a += fstatic(n); 198 a += ftemplate<int>(n); 199 200 fconst(TT<int, int>{0, 0}); 201 fconst(TT<char, char>{0, 0}); 202 203 return a; 204 } 205 206 // template 207 208 // TCHECK: define internal void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, i{{[0-9]+}} noundef [[A_IN:%.+]], ptr{{.+}} noundef [[B_IN:%.+]]) 209 // TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr 210 // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, 211 // TCHECK: [[B_ADDR:%.+]] = alloca ptr, 212 // TCHECK-NOT: alloca i{{[0-9]+}}, 213 // TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], 214 // TCHECK: store i{{[0-9]+}} [[A_IN]], ptr [[A_ADDR]], 215 // TCHECK: store ptr [[B_IN]], ptr [[B_ADDR]], 216 // TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr [[B_ADDR]], 217 // TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr % 218 219 // firstprivate(a) 220 // TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, ptr 221 222 // firstprivate(b) 223 // TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[B_PRIV]], ptr align {{[0-9]+}} [[B_ADDR_REF]],{{.+}}) 224 225 // TCHECK: ret void 226 227 #endif 228