xref: /llvm-project/clang/test/OpenMP/declare_variant_construct_codegen_1.c (revision 0c6f2f629cc0017361310fa4c132090413a874db)
1 // expected-no-diagnostics
2 
3 #ifndef HEADER
4 #define HEADER
5 
6 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1
7 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
8 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1
9 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1
10 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1
11 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
12 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1
13 
14 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
15 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
16 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
17 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
18 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
19 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
20 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
21 
22 #ifdef CK1
23 
24 #define N 100
25 
26 void p_vxv(int *v1, int *v2, int *v3, int n);
27 void t_vxv(int *v1, int *v2, int *v3, int n);
28 
29 #pragma omp declare variant(t_vxv) match(construct={target})
30 #pragma omp declare variant(p_vxv) match(construct={parallel})
vxv(int * v1,int * v2,int * v3,int n)31 void vxv(int *v1, int *v2, int *v3, int n) {
32     for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i];
33 }
34 // CK1: define dso_local void @vxv
35 
p_vxv(int * v1,int * v2,int * v3,int n)36 void p_vxv(int *v1, int *v2, int *v3, int n) {
37 #pragma omp for
38     for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3;
39 }
40 // CK1: define dso_local void @p_vxv
41 
42 #pragma omp declare target
t_vxv(int * v1,int * v2,int * v3,int n)43 void t_vxv(int *v1, int *v2, int *v3, int n) {
44 #pragma distribute simd
45     for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2;
46 }
47 #pragma omp end declare target
48 // CK1: define dso_local void @t_vxv
49 
50 
51 // CK1-LABEL: define {{[^@]+}}@test
test(void)52 int test(void) {
53   int v1[N], v2[N], v3[N];
54 
55   // init
56   for (int i = 0; i < N; i++) {
57     v1[i] = (i + 1);
58     v2[i] = -(i + 1);
59     v3[i] = 0;
60   }
61 
62 #pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N])
63   {
64     vxv(v1, v2, v3, N);
65   }
66 // CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}})
67 
68   vxv(v1, v2, v3, N);
69 // CK1: call void @vxv
70 
71 #pragma omp parallel
72   {
73     vxv(v1, v2, v3, N);
74   }
75 // CK1: call void ({{.+}}) @__kmpc_fork_call(ptr {{.+}}, i32 3, ptr [[PARALLEL_REGION:@[^,]+]]
76 
77   return 0;
78 }
79 
80 // CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}})
81 // CK1: call void ({{.+}}) @__kmpc_fork_teams(ptr {{.+}}, i32 3, ptr [[TARGET_REGION:@[^,]+]]
82 // CK1: define internal void [[TARGET_REGION]](
83 // CK1: call void @t_vxv
84 
85 // CK1: define internal void [[PARALLEL_REGION]](
86 // CK1: call void @p_vxv
87 #endif // CK1
88 
89 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2
90 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
91 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK2
92 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2
93 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2
94 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
95 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK2
96 
97 // RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
98 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
99 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
100 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
101 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
102 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
103 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
104 
105 #ifdef CK2
106 
107 void test_teams(int ***v1, int ***v2, int ***v3, int n);
108 void test_target(int ***v1, int ***v2, int ***v3, int n);
109 void test_parallel(int ***v1, int ***v2, int ***v3, int n);
110 
111 #pragma omp declare variant(test_teams) match(construct = {teams})
112 #pragma omp declare variant(test_target) match(construct = {target})
113 #pragma omp declare variant(test_parallel) match(construct = {parallel})
test_base(int *** v1,int *** v2,int *** v3,int n)114 void test_base(int ***v1, int ***v2, int ***v3, int n) {
115   for (int i = 0; i < n; i++)
116     for (int j = 0; j < n; ++j)
117       for (int k = 0; k < n; ++k)
118         v3[i][j][k] = v1[i][j][k] * v2[i][j][k];
119 }
120 
121 #pragma omp declare target
test_teams(int *** v1,int *** v2,int *** v3,int n)122 void test_teams(int ***v1, int ***v2, int ***v3, int n) {
123 #pragma omp distribute parallel for simd collapse(2)
124   for (int i = 0; i < n; ++i)
125     for (int j = 0; j < n; ++j)
126       for (int k = 0; k < n; ++k)
127         v3[i][j][k] = v1[i][j][k] * v2[i][j][k];
128 }
129 #pragma omp end declare target
130 
131 #pragma omp declare target
test_target(int *** v1,int *** v2,int *** v3,int n)132 void test_target(int ***v1, int ***v2, int ***v3, int n) {
133 #pragma omp parallel for simd collapse(3)
134   for (int i = 0; i < n; ++i)
135     for (int j = 0; j < n; ++j)
136       for (int k = 0; k < n; ++k)
137         v3[i][j][k] = v1[i][j][k] * v2[i][j][k];
138 }
139 #pragma omp end declare target
140 
test_parallel(int *** v1,int *** v2,int *** v3,int n)141 void test_parallel(int ***v1, int ***v2, int ***v3, int n) {
142 #pragma omp for collapse(3)
143   for (int i = 0; i < n; ++i)
144     for (int j = 0; j < n; ++j)
145       for (int k = 0; k < n; ++k)
146         v3[i][j][k] = v1[i][j][k] * v2[i][j][k];
147 }
148 
149 // CK2-LABEL: define {{[^@]+}}@test
test(int *** v1,int *** v2,int *** v3,int n)150 void test(int ***v1, int ***v2, int ***v3, int n) {
151   int i;
152 
153 #pragma omp target
154 #pragma omp teams
155   {
156     test_base(v1, v2, v3, 0);
157   }
158 // CK2: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}})
159 
160 #pragma omp target
161   {
162     test_base(v1, v2, v3, 0);
163   }
164 // CK2: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}})
165 
166 #pragma omp parallel
167   {
168     test_base(v1, v2, v3, 0);
169   }
170 // CK2: call void ({{.+}}) @__kmpc_fork_call(ptr {{.+}}, i32 3, ptr [[PARALLEL_REGION:@[^,]+]]
171 }
172 
173 // CK2: define internal void @__omp_offloading_[[OFFLOAD_1]]({{.+}})
174 // CK2: call void ({{.+}}) @__kmpc_fork_teams(ptr {{.+}}, i32 3, ptr [[TARGET_REGION_1:@[^,]+]]
175 // CK2: define internal void [[TARGET_REGION_1]](
176 // CK2: call void @test_teams
177 
178 // CK2: define internal void @__omp_offloading_[[OFFLOAD_2]]({{.+}})
179 // CK2: call void @test_target
180 
181 // CK2: define internal void [[PARALLEL_REGION]](
182 // CK2: call void @test_parallel
183 
184 #endif // CK2
185 
186 // RUN: %clang_cc1 -DCK3 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3
187 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
188 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK3
189 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3
190 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3
191 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
192 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK3
193 
194 // RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
195 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
196 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
197 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
198 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
199 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
200 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
201 
202 #ifdef CK3
203 
204 #define N 100
205 
206 int t_for(int *v1, int *v2, int *v3, int n);
207 int t_simd(int *v1, int *v2, int *v3, int n);
208 
209 #pragma omp declare variant(t_simd) match(construct = {simd})
210 #pragma omp declare variant(t_for) match(construct = {for})
t(int * v1,int * v2,int * v3,int idx)211 int t(int *v1, int *v2, int *v3, int idx) {
212   return v1[idx] * v2[idx];
213 }
214 
t_for(int * v1,int * v2,int * v3,int idx)215 int t_for(int *v1, int *v2, int *v3, int idx) {
216   return v1[idx] * v2[idx];
217 }
218 
219 #pragma omp declare simd
t_simd(int * v1,int * v2,int * v3,int idx)220 int t_simd(int *v1, int *v2, int *v3, int idx) {
221   return v1[idx] * v2[idx];
222 }
223 
224 // CK3-LABEL: define {{[^@]+}}@test
test(void)225 void test(void) {
226   int v1[N], v2[N], v3[N];
227 
228   // init
229   for (int i = 0; i < N; i++) {
230     v1[i] = (i + 1);
231     v2[i] = -(i + 1);
232     v3[i] = 0;
233   }
234 
235 #pragma omp simd
236   for (int i = 0; i < N; i++) {
237     v3[i] = t(v1, v2, v3, i);
238   }
239 // CK3: call = call i32 @t_simd
240 
241 
242 #pragma omp for
243   for (int i = 0; i < N; i++) {
244     v3[i] = t(v1, v2, v3, i);
245   }
246 // CK3: call{{.+}} = call i32 @t_for
247 }
248 
249 #endif // CK3
250 
251 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4
252 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
253 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK4
254 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4
255 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4
256 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
257 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK4
258 
259 // RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
260 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
261 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
262 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
263 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
264 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
265 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
266 
267 #ifdef CK4
268 
269 #define N 100
270 
271 void not_selected_vxv(int *v1, int *v2, int *v3, int n);
272 void combined_vxv(int *v1, int *v2, int *v3, int n);
273 void all_vxv(int *v1, int *v2, int *v3, int n);
274 
275 #pragma omp declare variant(all_vxv) match(construct={target,teams,parallel,for,simd})
276 #pragma omp declare variant(combined_vxv) match(construct={target,teams,parallel,for})
277 #pragma omp declare variant(not_selected_vxv) match(construct={parallel,for})
vxv(int * v1,int * v2,int * v3,int n)278 void vxv(int *v1, int *v2, int *v3, int n) {
279     for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i];
280 }
281 
not_selected_vxv(int * v1,int * v2,int * v3,int n)282 void not_selected_vxv(int *v1, int *v2, int *v3, int n) {
283     for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3;
284 }
285 
286 #pragma omp declare target
combined_vxv(int * v1,int * v2,int * v3,int n)287 void combined_vxv(int *v1, int *v2, int *v3, int n) {
288     for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2;
289 }
290 #pragma omp end declare target
291 
292 #pragma omp declare target
all_vxv(int * v1,int * v2,int * v3,int n)293 void all_vxv(int *v1, int *v2, int *v3, int n) {
294     for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 4;
295 }
296 #pragma omp end declare target
297 
298 // CK4-LABEL: define {{[^@]+}}@test
test(void)299 void test(void) {
300     int v1[N], v2[N], v3[N];
301 
302     //init
303     for (int i = 0; i < N; i++) {
304       v1[i] = (i + 1);
305       v2[i] = -(i + 1);
306       v3[i] = 0;
307     }
308 
309 #pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N])
310     {
311 #pragma omp parallel for
312       for (int i = 0; i < N; i++)
313         vxv(v1, v2, v3, N);
314     }
315 // CK4: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}})
316 
317 #pragma omp simd
318     for (int i = 0; i < N; i++)
319       vxv(v1, v2, v3, N);
320 // CK4: call void @vxv
321 
322 #pragma omp target teams distribute parallel for simd map(from: v3[:N])
323     for (int i = 0; i < N; i++)
324       for (int i = 0; i < N; i++)
325         for (int i = 0; i < N; i++)
326           vxv(v1, v2, v3, N);
327 // CK4: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}})
328 }
329 // CK4-DAG: call void @all_vxv
330 // CK4-DAG: call void @combined_vxv
331 
332 #endif // CK4
333 
334 #endif // HEADER
335