xref: /llvm-project/clang/test/OpenMP/target_private_codegen.cpp (revision 94473f4db6a6f5f12d7c4081455b5b596094eac5)
1 // Only test codegen on target side, as private clause does not require any action on the host side
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
6 // RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
7 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
8 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
9 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
10 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
11 
12 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
13 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s
14 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
15 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
16 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
17 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s
18 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
19 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
20 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
21 
22 // expected-no-diagnostics
23 #ifndef HEADER
24 #define HEADER
25 
26 template<typename tx, typename ty>
27 struct TT{
28   tx X;
29   ty Y;
30 };
31 
32 // TCHECK: [[TT:%.+]] = type { i64, i8 }
33 // TCHECK: [[S1:%.+]] = type { double }
34 
35 int foo(int n) {
36   int a = 0;
37   short aa = 0;
38   float b[10];
39   float bn[n];
40   double c[5][10];
41   double cn[5][n];
42   TT<long long, char> d;
43 
44   #pragma omp target private(a)
45   {
46   }
47 
48   // TCHECK:  define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}})
49   // TCHECK:  [[DYN_PTR:%.+]] = alloca ptr
50   // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
51   // TCHECK-NOT: store {{.+}}, {{.+}} [[A]],
52   // TCHECK:  ret void
53 
54 #pragma omp target private(a)
55   {
56     a = 1;
57   }
58 
59   // TCHECK:  define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}})
60   // TCHECK:  [[DYN_PTR:%.+]] = alloca ptr
61   // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
62   // TCHECK:  store i{{[0-9]+}} 1, ptr [[A]],
63   // TCHECK:  ret void
64 
65 #pragma omp target private(a, aa)
66   {
67     a = 1;
68     aa = 1;
69   }
70 
71   // TCHECK:  define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}})
72   // TCHECK:   [[DYN_PTR:%.+]] = alloca ptr
73   // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
74   // TCHECK:  [[A2:%.+]] = alloca i{{[0-9]+}},
75   // TCHECK:  store i{{[0-9]+}} 1, ptr [[A]],
76   // TCHECK:  store i{{[0-9]+}} 1, ptr [[A2]],
77   // TCHECK:  ret void
78 
79   #pragma omp target private(a, b, bn, c, cn, d)
80   {
81     a = 1;
82     b[2] = 1.0;
83     bn[3] = 1.0;
84     c[1][2] = 1.0;
85     cn[1][3] = 1.0;
86     d.X = 1;
87     d.Y = 1;
88   }
89   // make sure that private variables are generated in all cases and that we use those instances for operations inside the
90   // target region
91   // TCHECK:  define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}} noundef [[VLA3:%.+]])
92   // TCHECK:  [[DYN_PTR:%.+]] = alloca ptr
93   // TCHECK:  [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
94   // TCHECK:  [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}},
95   // TCHECK:  [[VLA_ADDR4:%.+]] = alloca i{{[0-9]+}},
96   // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
97   // TCHECK:  [[B:%.+]] = alloca [10 x float],
98   // TCHECK:  [[SSTACK:%.+]] = alloca ptr,
99   // TCHECK:  [[C:%.+]] = alloca [5 x [10 x double]],
100   // TCHECK:  [[D:%.+]] = alloca [[TT]],
101   // TCHECK:  store i{{[0-9]+}} [[VLA]], ptr [[VLA_ADDR]],
102   // TCHECK:  store i{{[0-9]+}} [[VLA1]], ptr [[VLA_ADDR2]],
103   // TCHECK:  store i{{[0-9]+}} [[VLA3]], ptr [[VLA_ADDR4]],
104   // TCHECK:  [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR]],
105   // TCHECK:  [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR2]],
106   // TCHECK:  [[VLA_ADDR_REF4:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR4]],
107   // TCHECK:  [[RET_STACK:%.+]] = call ptr @llvm.stacksave.p0()
108   // TCHECK:  store ptr [[RET_STACK]], ptr [[SSTACK]],
109   // TCHECK:  [[VLA5:%.+]] = alloca float, i{{[0-9]+}} [[VLA_ADDR_REF]],
110   // TCHECK:  [[VLA6_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF2]], [[VLA_ADDR_REF4]]
111   // TCHECK:  [[VLA6:%.+]] = alloca double, i{{[0-9]+}} [[VLA6_SIZE]],
112 
113   // a = 1
114   // TCHECK:  store i{{[0-9]+}} 1, ptr [[A]],
115 
116   // b[2] = 1.0
117   // TCHECK:  [[B_GEP:%.+]] = getelementptr inbounds [10 x float], ptr [[B]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
118   // TCHECK:  store float 1.0{{.*}}, ptr [[B_GEP]],
119 
120   // bn[3] = 1.0
121   // TCHECK:  [[BN_GEP:%.+]] = getelementptr inbounds float, ptr [[VLA5]], i{{[0-9]+}} 3
122   // TCHECK:  store float 1.0{{.*}}, ptr [[BN_GEP]],
123 
124   // c[1][2] = 1.0
125   // TCHECK:  [[C_GEP1:%.+]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
126   // TCHECK:  [[C_GEP2:%.+]] = getelementptr inbounds [10 x double], ptr [[C_GEP1]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
127   // TCHECK:  store double 1.0{{.*}}, ptr [[C_GEP2]],
128 
129   // cn[1][3] = 1.0
130   // TCHECK:  [[CN_IND:%.+]] = mul{{.+}} i{{[0-9]+}} 1, [[VLA_ADDR_REF4]]
131   // TCHECK:  [[CN_GEP_IND:%.+]] = getelementptr inbounds double, ptr [[VLA6]], i{{[0-9]+}} [[CN_IND]]
132   // TCHECK:  [[CN_GEP_3:%.+]] = getelementptr inbounds double, ptr [[CN_GEP_IND]], i{{[0-9]+}} 3
133   // TCHECK:  store double 1.0{{.*}}, ptr [[CN_GEP_3]],
134 
135   // d.X = 1
136   // [[X_FIELD:%.+]] = getelementptr inbounds nuw [[TT]] ptr [[D]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
137   // store i{{[0-9]+}} 1, ptr [[X_FIELD]],
138 
139   // d.Y = 1
140   // [[Y_FIELD:%.+]] = getelementptr inbounds nuw [[TT]] ptr [[D]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
141   // store i{{[0-9]+}} 1, ptr [[Y_FIELD]],
142 
143   // finish
144   // [[RELOAD_SSTACK:%.+]] = load ptr, ptr [[SSTACK]],
145   // call ovid @llvm.stackrestore.p0(ptr [[RELOAD_SSTACK]])
146   // ret void
147 
148   return a;
149 }
150 
151 
152 template<typename tx>
153 tx ftemplate(int n) {
154   tx a = 0;
155   short aa = 0;
156   tx b[10];
157 
158 #pragma omp target private(a,aa,b)
159   {
160     a = 1;
161     aa = 1;
162     b[2] = 1;
163   }
164 
165   return a;
166 }
167 
168 static
169 int fstatic(int n) {
170   int a = 0;
171   short aa = 0;
172   char aaa = 0;
173   int b[10];
174 
175 #pragma omp target private(a,aa,aaa,b)
176   {
177     a = 1;
178     aa = 1;
179     aaa = 1;
180     b[2] = 1;
181   }
182 
183   return a;
184 }
185 
186 // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}})
187 // TCHECK:  [[DYN_PTR:%.+]] = alloca ptr
188 // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
189 // TCHECK:  [[A2:%.+]] = alloca i{{[0-9]+}},
190 // TCHECK:  [[A3:%.+]] = alloca i{{[0-9]+}},
191 // TCHECK:  [[B:%.+]] = alloca [10 x i{{[0-9]+}}],
192 // TCHECK:  store i{{[0-9]+}} 1, ptr [[A]],
193 // TCHECK:  store i{{[0-9]+}} 1, ptr [[A2]],
194 // TCHECK:  store i{{[0-9]+}} 1, ptr [[A3]],
195 // TCHECK:  [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], ptr [[B]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
196 // TCHECK:  store i{{[0-9]+}} 1, ptr [[B_GEP]],
197 // TCHECK:  ret void
198 
199 struct S1 {
200   double a;
201 
202   int r1(int n){
203     int b = n+1;
204     short int c[2][n];
205 
206 #pragma omp target private(b,c)
207     {
208       this->a = (double)b + 1.5;
209       c[1][1] = ++a;
210     }
211 
212     return c[1][1] + (int)b;
213   }
214 
215   // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[TH:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]])
216   // TCHECK:  [[DYN_PTR:%.+]] = alloca ptr
217   // TCHECK: [[TH_ADDR:%.+]] = alloca ptr,
218   // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
219   // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}},
220   // TCHECK: [[B:%.+]] = alloca i{{[0-9]+}},
221   // TCHECK: [[SSTACK:%.+]] = alloca ptr,
222   // TCHECK: store ptr [[TH]], ptr [[TH_ADDR]],
223   // TCHECK: store i{{[0-9]+}} [[VLA]], ptr [[VLA_ADDR]],
224   // TCHECK: store i{{[0-9]+}} [[VLA1]], ptr [[VLA_ADDR2]],
225   // TCHECK: [[TH_ADDR_REF:%.+]] = load ptr, ptr [[TH_ADDR]],
226   // TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR]],
227   // TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, ptr [[VLA_ADDR2]],
228   // TCHECK: [[RET_STACK:%.+]] = call ptr @llvm.stacksave.p0()
229   // TCHECK: store ptr [[RET_STACK:%.+]], ptr [[SSTACK]],
230 
231   // this->a = (double)b + 1.5;
232   // TCHECK: [[VLA_IND:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF]], [[VLA_ADDR_REF2]]
233   // TCHECK: [[VLA3:%.+]] = alloca i{{[0-9]+}}, i{{[0-9]+}} [[VLA_IND]],
234   // TCHECK: [[B_VAL:%.+]] = load i{{[0-9]+}}, ptr [[B]],
235   // TCHECK: [[B_CONV:%.+]] = sitofp i{{[0-9]+}} [[B_VAL]] to double
236   // TCHECK: [[NEW_A_VAL:%.+]] = fadd double [[B_CONV]], 1.5{{.+}}+00
237   // TCHECK: [[A_FIELD:%.+]] = getelementptr inbounds nuw [[S1]], ptr [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
238   // TCHECK: store double [[NEW_A_VAL]], ptr [[A_FIELD]],
239 
240   // c[1][1] = ++a;
241   // TCHECK: [[A_FIELD4:%.+]] = getelementptr inbounds nuw [[S1]], ptr [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
242   // TCHECK: [[A_FIELD4_VAL:%.+]] = load double, ptr [[A_FIELD4]],
243   // TCHECK: [[A_FIELD_INC:%.+]] = fadd double [[A_FIELD4_VAL]], 1.0{{.+}}+00
244   // TCHECK: store double [[A_FIELD_INC]], ptr [[A_FIELD4]],
245   // TCHECK: [[A_FIELD_INC_CONV:%.+]] = fptosi double [[A_FIELD_INC]] to i{{[0-9]+}}
246   // TCHECK: [[C_IND:%.+]] = mul{{.+}} i{{[0-9]+}} 1, [[VLA_ADDR_REF2]]
247   // TCHECK: [[C_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, ptr [[VLA3]], i{{[0-9]+}} [[C_IND]]
248   // TCHECK: [[C_1_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, ptr [[C_1_REF]], i{{[0-9]+}} 1
249   // TCHECK: store i{{[0-9]+}} [[A_FIELD_INC_CONV]], ptr [[C_1_1_REF]],
250 
251   // finish
252   // TCHECK: [[RELOAD_SSTACK:%.+]] = load ptr, ptr [[SSTACK]],
253   // TCHECK: call void @llvm.stackrestore.p0(ptr [[RELOAD_SSTACK]])
254   // TCHECK: ret void
255 };
256 
257 
258 int bar(int n){
259   int a = 0;
260   a += foo(n);
261   S1 S;
262   a += S.r1(n);
263   a += fstatic(n);
264   a += ftemplate<int>(n);
265 
266   return a;
267 }
268 
269 // template
270 // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}})
271 // TCHECK: [[DYN_PTR:%.+]] = alloca ptr
272 // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}},
273 // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}},
274 // TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}],
275 // TCHECK: store i{{[0-9]+}} 1, ptr [[A]],
276 // TCHECK: store i{{[0-9]+}} 1, ptr [[A2]],
277 // TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], ptr [[B]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
278 // TCHECK: store i{{[0-9]+}} 1, ptr [[B_GEP]],
279 // TCHECK: ret void
280 
281 #endif
282