xref: /llvm-project/offload/test/offloading/workshare_chunk.c (revision 8823448807f3b1a1362d1417e062d763734e02f5)
1 // RUN: %libomptarget-compile-run-and-check-generic
2 // RUN: %libomptarget-compileopt-run-and-check-generic
3 
4 // REQUIRES: gpu
5 
6 // clang-format off
7 
8 #include <omp.h>
9 #include <stdio.h>
10 
11 #define N 100
12 #define BLOCK_SHIFT 8
13 
print(int * A,int size)14 void print(int *A, int size) {
15   for (int i = 0; i < size; ++i) {
16     printf("B%dT%d ", A[i] >> BLOCK_SHIFT, A[i] % (1 << BLOCK_SHIFT));
17   }
18   printf("\n");
19 }
20 
main()21 int main() {
22   int A[N];
23 
24 #pragma omp target parallel for map(from:A) num_threads(10) schedule(static, 2)
25   for (int i = 0; i < N; ++i) {
26      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
27   }
28   printf("omp target parallel for thread chunk size %d\n", 2);
29   print(A, N);
30 
31 #pragma omp target teams distribute map(from:A) num_teams(10) \
32         dist_schedule(static, 2)
33   for (int i = 0; i < N; ++i) {
34      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
35   }
36   printf("omp target teams distribute block chunk size %d\n", 2);
37   print(A, N);
38 
39 #pragma omp target teams distribute parallel for map(from:A) \
40         num_teams(10) dist_schedule(static, 2)
41   for (int i = 0; i < N; ++i) {
42      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
43   }
44   printf("omp target teams distribute parallel for block chunk size %d ", 2);
45   printf("thread chunk size default\n");
46   print(A, N);
47 
48 #pragma omp target teams distribute parallel for map(from:A) \
49         num_teams(10) dist_schedule(static, 2) schedule(static, 3)
50   for (int i = 0; i < N; ++i) {
51      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
52   }
53   printf("omp target teams distribute parallel for block chunk size %d ", 2);
54   printf("thread chunk size %d\n", 3);
55   print(A, N);
56 
57 #pragma omp target teams distribute parallel for map(from:A) \
58         num_teams(10) dist_schedule(static, 3) schedule(static, 2)
59   for (int i = 0; i < N; ++i) {
60      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
61   }
62   printf("omp target teams distribute parallel for block chunk size %d ", 3);
63   printf("thread chunk size %d\n", 2);
64   print(A, N);
65 
66 #pragma omp target teams distribute parallel for map(from:A) \
67         num_teams(10) dist_schedule(static, 5) schedule(static, 2)
68   for (int i = 0; i < N; ++i) {
69      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
70   }
71   printf("omp target teams distribute parallel for block chunk size %d ", 5);
72   printf("thread chunk size %d\n", 2);
73   print(A, N);
74 
75 #pragma omp target teams distribute parallel for map(from:A) num_teams(10) \
76         dist_schedule(static, 49) schedule(static, 2)
77   for (int i = 0; i < N; ++i) {
78      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
79   }
80   printf("omp target teams distribute parallel for block chunk size %d ", 49);
81   printf("thread chunk size %d\n", 2);
82   print(A, N);
83 
84 #pragma omp target teams distribute parallel for map(from:A) \
85         num_teams(10) num_threads(10) dist_schedule(static, 29)
86   for (int i = 0; i < N; ++i) {
87      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
88   }
89   printf("omp target teams distribute parallel for block chunk size %d ", 29);
90   printf("thread chunk size default\n");
91   print(A, N);
92 
93 #pragma omp target teams distribute parallel for map(from:A) \
94         num_teams(10) num_threads(10) dist_schedule(static, 101)
95   for (int i = 0; i < N; ++i) {
96      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
97   }
98   printf("omp target teams distribute parallel for block chunk size %d ", 101);
99   printf("thread chunk size default\n");
100   print(A, N);
101 
102 #pragma omp target teams distribute parallel for map(from:A) \
103         num_teams(9) num_threads(10) schedule(static, 101)
104   for (int i = 0; i < N; ++i) {
105      A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
106   }
107   printf("omp target teams distribute parallel for default block chunk size ");
108   printf("thread chunk size %d\n", 101);
109   print(A, N);
110   return 0;
111 }
112 //CHECK:      omp target parallel for thread chunk size 2
113 
114 //CHECK-NEXT: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
115 //CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
116 //CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
117 //CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
118 //CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
119 //CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
120 //CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
121 //CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
122 //CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
123 //CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
124 
125 //CHECK:      omp target teams distribute block chunk size 2
126 
127 //CHECK-NEXT: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
128 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
129 //CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
130 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
131 //CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
132 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
133 //CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
134 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
135 //CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
136 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
137 
138 //CHECK:      omp target teams distribute parallel for
139 //CHECK-SAME: block chunk size 2 thread chunk size default
140 
141 //CHECK-NEXT: B0T0 B0T1 B1T0 B1T1 B2T0 B2T1 B3T0 B3T1 B4T0 B4T1
142 //CHECK-SAME: B5T0 B5T1 B6T0 B6T1 B7T0 B7T1 B8T0 B8T1 B9T0 B9T1
143 //CHECK-SAME: B0T0 B0T1 B1T0 B1T1 B2T0 B2T1 B3T0 B3T1 B4T0 B4T1
144 //CHECK-SAME: B5T0 B5T1 B6T0 B6T1 B7T0 B7T1 B8T0 B8T1 B9T0 B9T1
145 //CHECK-SAME: B0T0 B0T1 B1T0 B1T1 B2T0 B2T1 B3T0 B3T1 B4T0 B4T1
146 //CHECK-SAME: B5T0 B5T1 B6T0 B6T1 B7T0 B7T1 B8T0 B8T1 B9T0 B9T1
147 
148 //CHECK:      omp target teams distribute parallel for
149 //CHECK-SAME  block chunk size 2 thread chunk size 3
150 
151 //CHECK-NEXT: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
152 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
153 //CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
154 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
155 //CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
156 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
157 //CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
158 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
159 //CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
160 //CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
161 
162 //CHECK:      omp target teams distribute parallel for
163 //CHECK-SAME: block chunk size 3 thread chunk size 2
164 
165 //CHECK-NEXT: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1
166 //CHECK-SAME: B3T0 B3T0 B3T1 B4T0 B4T0 B4T1
167 //CHECK-SAME: B5T0 B5T0 B5T1 B6T0 B6T0 B6T1 B7T0 B7T0 B7T1
168 //CHECK-SAME: B8T0 B8T0 B8T1 B9T0 B9T0 B9T1
169 //CHECK-SAME: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1
170 //CHECK-SAME: B3T0 B3T0 B3T1 B4T0 B4T0 B4T1
171 //CHECK-SAME: B5T0 B5T0 B5T1 B6T0 B6T0 B6T1 B7T0 B7T0 B7T1
172 //CHECK-SAME: B8T0 B8T0 B8T1 B9T0 B9T0 B9T1
173 //CHECK-SAME: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1
174 //CHECK-SAME: B3T0 B3T0 B3T1 B4T0 B4T0 B4T1
175 //CHECK-SAME: B5T0 B5T0 B5T1 B6T0 B6T0 B6T1 B7T0 B7T0 B7T1
176 //CHECK-SAME: B8T0 B8T0 B8T1 B9T0 B9T0 B9T1
177 //CHECK-SAME: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1 B3T0
178 
179 //CHECK:      omp target teams distribute parallel for
180 //CHECK-SAME: block chunk size 5 thread chunk size 2
181 
182 //CHECK-NEXT: B0T0 B0T0 B0T1 B0T1 B0T2 B1T0 B1T0 B1T1 B1T1 B1T2
183 //CHECK-SAME: B2T0 B2T0 B2T1 B2T1 B2T2 B3T0 B3T0 B3T1 B3T1 B3T2
184 //CHECK-SAME: B4T0 B4T0 B4T1 B4T1 B4T2 B5T0 B5T0 B5T1 B5T1 B5T2
185 //CHECK-SAME: B6T0 B6T0 B6T1 B6T1 B6T2 B7T0 B7T0 B7T1 B7T1 B7T2
186 //CHECK-SAME: B8T0 B8T0 B8T1 B8T1 B8T2 B9T0 B9T0 B9T1 B9T1 B9T2
187 //CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B1T0 B1T0 B1T1 B1T1 B1T2
188 //CHECK-SAME: B2T0 B2T0 B2T1 B2T1 B2T2 B3T0 B3T0 B3T1 B3T1 B3T2
189 //CHECK-SAME: B4T0 B4T0 B4T1 B4T1 B4T2 B5T0 B5T0 B5T1 B5T1 B5T2
190 //CHECK-SAME: B6T0 B6T0 B6T1 B6T1 B6T2 B7T0 B7T0 B7T1 B7T1 B7T2
191 //CHECK-SAME: B8T0 B8T0 B8T1 B8T1 B8T2 B9T0 B9T0 B9T1 B9T1 B9T2
192 
193 //CHECK:      omp target teams distribute parallel for
194 //CHECK-SAME: block chunk size 49 thread chunk size 2
195 
196 //CHECK-NEXT: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4 B0T5 B0T5
197 //CHECK-SAME: B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9 B0T10 B0T10 B0T11 B0T11
198 //CHECK-SAME: B0T12 B0T12 B0T13 B0T13 B0T14 B0T14 B0T15 B0T15 B0T16 B0T16
199 //CHECK-SAME: B0T17 B0T17 B0T18 B0T18 B0T19 B0T19 B0T20 B0T20 B0T21 B0T21
200 //CHECK-SAME: B0T22 B0T22 B0T23 B0T23 B0T24
201 //CHECK-SAME: B1T0 B1T0 B1T1 B1T1 B1T2 B1T2 B1T3 B1T3 B1T4 B1T4 B1T5 B1T5
202 //CHECK-SAME: B1T6 B1T6 B1T7 B1T7 B1T8 B1T8 B1T9 B1T9 B1T10 B1T10 B1T11 B1T11
203 //CHECK-SAME: B1T12 B1T12 B1T13 B1T13 B1T14 B1T14 B1T15 B1T15 B1T16 B1T16
204 //CHECK-SAME: B1T17 B1T17 B1T18 B1T18 B1T19 B1T19 B1T20 B1T20 B1T21 B1T21
205 //CHECK-SAME: B1T22 B1T22 B1T23 B1T23 B1T24
206 //CHECK-SAME: B2T0 B2T0
207 
208 //CHECK:      omp target teams distribute parallel for
209 //CHECK-SAME: block chunk size 29 thread chunk size default
210 
211 //CHECK-NEXT: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
212 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
213 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8
214 //CHECK-SAME: B1T0 B1T1 B1T2 B1T3 B1T4 B1T5 B1T6 B1T7 B1T8 B1T9
215 //CHECK-SAME: B1T0 B1T1 B1T2 B1T3 B1T4 B1T5 B1T6 B1T7 B1T8 B1T9
216 //CHECK-SAME: B1T0 B1T1 B1T2 B1T3 B1T4 B1T5 B1T6 B1T7 B1T8
217 //CHECK-SAME: B2T0 B2T1 B2T2 B2T3 B2T4 B2T5 B2T6 B2T7 B2T8 B2T9
218 //CHECK-SAME: B2T0 B2T1 B2T2 B2T3 B2T4 B2T5 B2T6 B2T7 B2T8 B2T9
219 //CHECK-SAME: B2T0 B2T1 B2T2 B2T3 B2T4 B2T5 B2T6 B2T7 B2T8
220 //CHECK-SAME: B3T0 B3T1 B3T2 B3T3 B3T4 B3T5 B3T6 B3T7 B3T8 B3T9
221 //CHECK-SAME: B3T0 B3T1 B3T2
222 
223 //CHECK:      omp target teams distribute parallel for
224 //CHECK-SAME: block chunk size 101 thread chunk size default
225 
226 //CHECK-NEXT: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
227 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
228 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
229 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
230 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
231 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
232 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
233 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
234 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
235 //CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
236 
237 //CHECK:      omp target teams distribute parallel for
238 //CHECK-SAME: default block chunk size thread chunk size 101
239 
240 //CHECK-NEXT: B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0
241 //CHECK-SAME: B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0
242 //CHECK-SAME: B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0
243 //CHECK-SAME: B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0
244 //CHECK-SAME: B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0
245 //CHECK-SAME: B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0
246 //CHECK-SAME: B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0
247 //CHECK-SAME: B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0
248 //CHECK-SAME: B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0
249 //CHECK-SAME: B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0
250