xref: /llvm-project/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c (revision ce0911b3e9199a3a2a96dd3d1dec2309ea22b248)
1 // RUN: %libomp-compile-and-run
2 
3 // The test checks schedule(simd:runtime)
4 // in combination with omp_set_schedule()
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <omp.h>
8 
9 #if defined(WIN32) || defined(_WIN32)
10 #include <windows.h>
11 #define delay() Sleep(1);
12 #define seten(a,b,c) _putenv_s((a),(b))
13 #else
14 #include <unistd.h>
15 #define delay() usleep(10);
16 #define seten(a,b,c) setenv((a),(b),(c))
17 #endif
18 
19 #define SIMD_LEN 4
20 int err = 0;
21 
22 // ---------------------------------------------------------------------------
23 // Various definitions copied from OpenMP RTL.
24 enum sched {
25   kmp_sch_static_balanced_chunked = 45,
26   kmp_sch_guided_simd = 46,
27   kmp_sch_runtime_simd = 47,
28 };
29 typedef unsigned u32;
30 typedef long long i64;
31 typedef unsigned long long u64;
32 typedef struct {
33   int reserved_1;
34   int flags;
35   int reserved_2;
36   int reserved_3;
37   char *psource;
38 } id;
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43   int __kmpc_global_thread_num(id*);
44   void __kmpc_barrier(id*, int gtid);
45   void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
46   void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
47   int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
48   int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
49 #ifdef __cplusplus
50 } // extern "C"
51 #endif
52 // End of definitions copied from OpenMP RTL.
53 // ---------------------------------------------------------------------------
54 static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
55 
56 // ---------------------------------------------------------------------------
57 void
run_loop(int loop_lb,int loop_ub,int loop_st,int lchunk)58 run_loop(
59     int loop_lb,   // Loop lower bound.
60     int loop_ub,   // Loop upper bound.
61     int loop_st,   // Loop stride.
62     int lchunk
63 ) {
64   static int volatile loop_sync = 0;
65   int lb;   // Chunk lower bound.
66   int ub;   // Chunk upper bound.
67   int st;   // Chunk stride.
68   int rc;
69   int nthreads = omp_get_num_threads();
70   int tid = omp_get_thread_num();
71   int gtid = __kmpc_global_thread_num(&loc);
72   int last;
73   int tc = (loop_ub - loop_lb) / loop_st + 1;
74   int ch;
75   int no_chunk = 0;
76   if (lchunk == 0) {
77     no_chunk = 1;
78     lchunk = 1;
79   }
80   ch = lchunk * SIMD_LEN;
81 #if _DEBUG > 1
82   printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
83          gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
84 #endif
85   // Don't test degenerate cases that should have been discovered by codegen.
86   if (loop_st == 0)
87     return;
88   if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
89     return;
90   __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
91                          loop_lb, loop_ub, loop_st, SIMD_LEN);
92   {
93     // Let the master thread handle the chunks alone.
94     int chunk;      // No of current chunk.
95     int last_ub;    // Upper bound of the last processed chunk.
96     u64 cur;        // Number of interations in  current chunk.
97     u64 max;        // Max allowed iterations for current chunk.
98     int undersized = 0;
99     last_ub = loop_ub;
100     chunk = 0;
101     max = (loop_ub - loop_lb) / loop_st + 1;
102     // The first chunk can consume all iterations.
103     while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
104       ++ chunk;
105 #if _DEBUG
106       printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
107              tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
108 #endif
109       // Check if previous chunk (it is not the final chunk) is undersized.
110       if (undersized)
111         printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
112       if (loop_st > 0) {
113         if (!(ub <= loop_ub))
114           printf("Error with ub %d, %d, ch %d, err %d\n",
115                  (int)ub, (int)loop_ub, chunk, ++err);
116         if (!(lb <= ub))
117           printf("Error with bounds %d, %d, %d, err %d\n",
118                  (int)lb, (int)ub, chunk, ++err);
119       } else {
120         if (!(ub >= loop_ub))
121           printf("Error with ub %d, %d, %d, err %d\n",
122                  (int)ub, (int)loop_ub, chunk, ++err);
123         if (!(lb >= ub))
124           printf("Error with bounds %d, %d, %d, err %d\n",
125                  (int)lb, (int)ub, chunk, ++err);
126       }; // if
127       // Stride should not change.
128       if (!(st == loop_st))
129         printf("Error with st %d, %d, ch %d, err %d\n",
130                (int)st, (int)loop_st, chunk, ++err);
131       cur = ( ub - lb ) / loop_st + 1;
132       // Guided scheduling uses FP computations, so current chunk may
133       // be a bit bigger (+1) than allowed maximum.
134       if (!( cur <= max + 1))
135         printf("Error with iter %llu, %llu, err %d\n", cur, max, ++err);
136       // Update maximum for the next chunk.
137       if (last) {
138         if (!no_chunk && cur > ch && nthreads > 1)
139           printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
140                  (int)cur, ch, tid, ++err);
141       } else {
142         if (cur % ch)
143           printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
144                  chunk, (int)cur, ch, tid, ++err);
145       }
146       if (cur < max)
147         max = cur;
148       last_ub = ub;
149       undersized = (cur < ch);
150 #if _DEBUG > 1
151       if (last)
152         printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
153                undersized,cur,ch,tid,ub,lb,loop_st);
154 #endif
155     } // while
156     // Must have the right last iteration index.
157     if (loop_st > 0) {
158       if (!(last_ub <= loop_ub))
159         printf("Error with last1 %d, %d, ch %d, err %d\n",
160                (int)last_ub, (int)loop_ub, chunk, ++err);
161       if (last && !(last_ub + loop_st > loop_ub))
162         printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
163                (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
164     } else {
165       if (!(last_ub >= loop_ub))
166         printf("Error with last1 %d, %d, ch %d, err %d\n",
167                (int)last_ub, (int)loop_ub, chunk, ++err);
168       if (last && !(last_ub + loop_st < loop_ub))
169         printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
170                (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
171     } // if
172   }
173   __kmpc_barrier(&loc, gtid);
174 } // run_loop
175 
main(int argc,char * argv[])176 int main(int argc, char *argv[])
177 {
178   int chunk = 0;
179 // static (no chunk)
180   omp_set_schedule(omp_sched_static,0);
181 #pragma omp parallel// num_threads(num_th)
182   run_loop(0, 26, 1, chunk);
183 
184 // auto (chunk should be ignorted)
185   omp_set_schedule(omp_sched_auto,0);
186 #pragma omp parallel// num_threads(num_th)
187   run_loop(0, 26, 1, chunk);
188 
189 // static,1
190   chunk = 1;
191   omp_set_schedule(omp_sched_static,1);
192 #pragma omp parallel// num_threads(num_th)
193   run_loop(0, 26, 1, chunk);
194 
195 // dynamic,1
196   omp_set_schedule(omp_sched_dynamic,1);
197 #pragma omp parallel// num_threads(num_th)
198   run_loop(0, 26, 1, chunk);
199 
200 // guided,1
201   omp_set_schedule(omp_sched_guided,1);
202 #pragma omp parallel// num_threads(num_th)
203   run_loop(0, 26, 1, chunk);
204 
205 // dynamic,0 - use default chunk size 1
206   omp_set_schedule(omp_sched_dynamic,0);
207 #pragma omp parallel// num_threads(num_th)
208   run_loop(0, 26, 1, chunk);
209 
210 // guided,0 - use default chunk size 1
211   omp_set_schedule(omp_sched_guided,0);
212 #pragma omp parallel// num_threads(num_th)
213   run_loop(0, 26, 1, chunk);
214 
215   if (err) {
216     printf("failed, err = %d\n", err);
217     return 1;
218   } else {
219     printf("passed\n");
220     return 0;
221   }
222 }
223