1 // RUN: %libomp-compile-and-run
2
3 // The test checks schedule(simd:runtime)
4 // in combination with omp_set_schedule()
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <omp.h>
8
9 #if defined(WIN32) || defined(_WIN32)
10 #include <windows.h>
11 #define delay() Sleep(1);
12 #define seten(a,b,c) _putenv_s((a),(b))
13 #else
14 #include <unistd.h>
15 #define delay() usleep(10);
16 #define seten(a,b,c) setenv((a),(b),(c))
17 #endif
18
19 #define SIMD_LEN 4
20 int err = 0;
21
22 // ---------------------------------------------------------------------------
23 // Various definitions copied from OpenMP RTL.
24 enum sched {
25 kmp_sch_static_balanced_chunked = 45,
26 kmp_sch_guided_simd = 46,
27 kmp_sch_runtime_simd = 47,
28 };
29 typedef unsigned u32;
30 typedef long long i64;
31 typedef unsigned long long u64;
32 typedef struct {
33 int reserved_1;
34 int flags;
35 int reserved_2;
36 int reserved_3;
37 char *psource;
38 } id;
39
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 int __kmpc_global_thread_num(id*);
44 void __kmpc_barrier(id*, int gtid);
45 void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
46 void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
47 int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
48 int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
49 #ifdef __cplusplus
50 } // extern "C"
51 #endif
52 // End of definitions copied from OpenMP RTL.
53 // ---------------------------------------------------------------------------
54 static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
55
56 // ---------------------------------------------------------------------------
57 void
run_loop(int loop_lb,int loop_ub,int loop_st,int lchunk)58 run_loop(
59 int loop_lb, // Loop lower bound.
60 int loop_ub, // Loop upper bound.
61 int loop_st, // Loop stride.
62 int lchunk
63 ) {
64 static int volatile loop_sync = 0;
65 int lb; // Chunk lower bound.
66 int ub; // Chunk upper bound.
67 int st; // Chunk stride.
68 int rc;
69 int nthreads = omp_get_num_threads();
70 int tid = omp_get_thread_num();
71 int gtid = __kmpc_global_thread_num(&loc);
72 int last;
73 int tc = (loop_ub - loop_lb) / loop_st + 1;
74 int ch;
75 int no_chunk = 0;
76 if (lchunk == 0) {
77 no_chunk = 1;
78 lchunk = 1;
79 }
80 ch = lchunk * SIMD_LEN;
81 #if _DEBUG > 1
82 printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
83 gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
84 #endif
85 // Don't test degenerate cases that should have been discovered by codegen.
86 if (loop_st == 0)
87 return;
88 if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
89 return;
90 __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
91 loop_lb, loop_ub, loop_st, SIMD_LEN);
92 {
93 // Let the master thread handle the chunks alone.
94 int chunk; // No of current chunk.
95 int last_ub; // Upper bound of the last processed chunk.
96 u64 cur; // Number of interations in current chunk.
97 u64 max; // Max allowed iterations for current chunk.
98 int undersized = 0;
99 last_ub = loop_ub;
100 chunk = 0;
101 max = (loop_ub - loop_lb) / loop_st + 1;
102 // The first chunk can consume all iterations.
103 while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
104 ++ chunk;
105 #if _DEBUG
106 printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
107 tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
108 #endif
109 // Check if previous chunk (it is not the final chunk) is undersized.
110 if (undersized)
111 printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
112 if (loop_st > 0) {
113 if (!(ub <= loop_ub))
114 printf("Error with ub %d, %d, ch %d, err %d\n",
115 (int)ub, (int)loop_ub, chunk, ++err);
116 if (!(lb <= ub))
117 printf("Error with bounds %d, %d, %d, err %d\n",
118 (int)lb, (int)ub, chunk, ++err);
119 } else {
120 if (!(ub >= loop_ub))
121 printf("Error with ub %d, %d, %d, err %d\n",
122 (int)ub, (int)loop_ub, chunk, ++err);
123 if (!(lb >= ub))
124 printf("Error with bounds %d, %d, %d, err %d\n",
125 (int)lb, (int)ub, chunk, ++err);
126 }; // if
127 // Stride should not change.
128 if (!(st == loop_st))
129 printf("Error with st %d, %d, ch %d, err %d\n",
130 (int)st, (int)loop_st, chunk, ++err);
131 cur = ( ub - lb ) / loop_st + 1;
132 // Guided scheduling uses FP computations, so current chunk may
133 // be a bit bigger (+1) than allowed maximum.
134 if (!( cur <= max + 1))
135 printf("Error with iter %llu, %llu, err %d\n", cur, max, ++err);
136 // Update maximum for the next chunk.
137 if (last) {
138 if (!no_chunk && cur > ch && nthreads > 1)
139 printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
140 (int)cur, ch, tid, ++err);
141 } else {
142 if (cur % ch)
143 printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
144 chunk, (int)cur, ch, tid, ++err);
145 }
146 if (cur < max)
147 max = cur;
148 last_ub = ub;
149 undersized = (cur < ch);
150 #if _DEBUG > 1
151 if (last)
152 printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
153 undersized,cur,ch,tid,ub,lb,loop_st);
154 #endif
155 } // while
156 // Must have the right last iteration index.
157 if (loop_st > 0) {
158 if (!(last_ub <= loop_ub))
159 printf("Error with last1 %d, %d, ch %d, err %d\n",
160 (int)last_ub, (int)loop_ub, chunk, ++err);
161 if (last && !(last_ub + loop_st > loop_ub))
162 printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
163 (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
164 } else {
165 if (!(last_ub >= loop_ub))
166 printf("Error with last1 %d, %d, ch %d, err %d\n",
167 (int)last_ub, (int)loop_ub, chunk, ++err);
168 if (last && !(last_ub + loop_st < loop_ub))
169 printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
170 (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
171 } // if
172 }
173 __kmpc_barrier(&loc, gtid);
174 } // run_loop
175
main(int argc,char * argv[])176 int main(int argc, char *argv[])
177 {
178 int chunk = 0;
179 // static (no chunk)
180 omp_set_schedule(omp_sched_static,0);
181 #pragma omp parallel// num_threads(num_th)
182 run_loop(0, 26, 1, chunk);
183
184 // auto (chunk should be ignorted)
185 omp_set_schedule(omp_sched_auto,0);
186 #pragma omp parallel// num_threads(num_th)
187 run_loop(0, 26, 1, chunk);
188
189 // static,1
190 chunk = 1;
191 omp_set_schedule(omp_sched_static,1);
192 #pragma omp parallel// num_threads(num_th)
193 run_loop(0, 26, 1, chunk);
194
195 // dynamic,1
196 omp_set_schedule(omp_sched_dynamic,1);
197 #pragma omp parallel// num_threads(num_th)
198 run_loop(0, 26, 1, chunk);
199
200 // guided,1
201 omp_set_schedule(omp_sched_guided,1);
202 #pragma omp parallel// num_threads(num_th)
203 run_loop(0, 26, 1, chunk);
204
205 // dynamic,0 - use default chunk size 1
206 omp_set_schedule(omp_sched_dynamic,0);
207 #pragma omp parallel// num_threads(num_th)
208 run_loop(0, 26, 1, chunk);
209
210 // guided,0 - use default chunk size 1
211 omp_set_schedule(omp_sched_guided,0);
212 #pragma omp parallel// num_threads(num_th)
213 run_loop(0, 26, 1, chunk);
214
215 if (err) {
216 printf("failed, err = %d\n", err);
217 return 1;
218 } else {
219 printf("passed\n");
220 return 0;
221 }
222 }
223