xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/loop.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the LOOP (FOR/DO) construct.  */
27 
28 #include <limits.h>
29 #include <stdlib.h>
30 #include "libgomp.h"
31 
32 
33 /* Initialize the given work share construct from the given arguments.  */
34 
35 static inline void
36 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
37 		enum gomp_schedule_type sched, long chunk_size)
38 {
39   ws->sched = sched;
40   ws->chunk_size = chunk_size;
41   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
42   ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
43 	    ? start : end;
44   ws->incr = incr;
45   ws->next = start;
46   if (sched == GFS_DYNAMIC)
47     {
48       ws->chunk_size *= incr;
49 
50 #ifdef HAVE_SYNC_BUILTINS
51       {
52 	/* For dynamic scheduling prepare things to make each iteration
53 	   faster.  */
54 	struct gomp_thread *thr = gomp_thread ();
55 	struct gomp_team *team = thr->ts.team;
56 	long nthreads = team ? team->nthreads : 1;
57 
58 	if (__builtin_expect (incr > 0, 1))
59 	  {
60 	    /* Cheap overflow protection.  */
61 	    if (__builtin_expect ((nthreads | ws->chunk_size)
62 				  >= 1UL << (sizeof (long)
63 					     * __CHAR_BIT__ / 2 - 1), 0))
64 	      ws->mode = 0;
65 	    else
66 	      ws->mode = ws->end < (LONG_MAX
67 				    - (nthreads + 1) * ws->chunk_size);
68 	  }
69 	/* Cheap overflow protection.  */
70 	else if (__builtin_expect ((nthreads | -ws->chunk_size)
71 				   >= 1UL << (sizeof (long)
72 					      * __CHAR_BIT__ / 2 - 1), 0))
73 	  ws->mode = 0;
74 	else
75 	  ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
76       }
77 #endif
78     }
79 }
80 
81 /* The *_start routines are called when first encountering a loop construct
82    that is not bound directly to a parallel construct.  The first thread
83    that arrives will create the work-share construct; subsequent threads
84    will see the construct exists and allocate work from it.
85 
86    START, END, INCR are the bounds of the loop; due to the restrictions of
87    OpenMP, these values must be the same in every thread.  This is not
88    verified (nor is it entirely verifiable, since START is not necessarily
89    retained intact in the work-share data structure).  CHUNK_SIZE is the
90    scheduling parameter; again this must be identical in all threads.
91 
92    Returns true if there's any work for this thread to perform.  If so,
93    *ISTART and *IEND are filled with the bounds of the iteration block
94    allocated to this thread.  Returns false if all work was assigned to
95    other threads prior to this thread's arrival.  */
96 
97 static bool
98 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
99 			long *istart, long *iend)
100 {
101   struct gomp_thread *thr = gomp_thread ();
102 
103   thr->ts.static_trip = 0;
104   if (gomp_work_share_start (false))
105     {
106       gomp_loop_init (thr->ts.work_share, start, end, incr,
107 		      GFS_STATIC, chunk_size);
108       gomp_work_share_init_done ();
109     }
110 
111   return !gomp_iter_static_next (istart, iend);
112 }
113 
114 static bool
115 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
116 			 long *istart, long *iend)
117 {
118   struct gomp_thread *thr = gomp_thread ();
119   bool ret;
120 
121   if (gomp_work_share_start (false))
122     {
123       gomp_loop_init (thr->ts.work_share, start, end, incr,
124 		      GFS_DYNAMIC, chunk_size);
125       gomp_work_share_init_done ();
126     }
127 
128 #ifdef HAVE_SYNC_BUILTINS
129   ret = gomp_iter_dynamic_next (istart, iend);
130 #else
131   gomp_mutex_lock (&thr->ts.work_share->lock);
132   ret = gomp_iter_dynamic_next_locked (istart, iend);
133   gomp_mutex_unlock (&thr->ts.work_share->lock);
134 #endif
135 
136   return ret;
137 }
138 
139 static bool
140 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
141 			long *istart, long *iend)
142 {
143   struct gomp_thread *thr = gomp_thread ();
144   bool ret;
145 
146   if (gomp_work_share_start (false))
147     {
148       gomp_loop_init (thr->ts.work_share, start, end, incr,
149 		      GFS_GUIDED, chunk_size);
150       gomp_work_share_init_done ();
151     }
152 
153 #ifdef HAVE_SYNC_BUILTINS
154   ret = gomp_iter_guided_next (istart, iend);
155 #else
156   gomp_mutex_lock (&thr->ts.work_share->lock);
157   ret = gomp_iter_guided_next_locked (istart, iend);
158   gomp_mutex_unlock (&thr->ts.work_share->lock);
159 #endif
160 
161   return ret;
162 }
163 
164 bool
165 GOMP_loop_runtime_start (long start, long end, long incr,
166 			 long *istart, long *iend)
167 {
168   struct gomp_task_icv *icv = gomp_icv (false);
169   switch (icv->run_sched_var)
170     {
171     case GFS_STATIC:
172       return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier,
173 				     istart, iend);
174     case GFS_DYNAMIC:
175       return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier,
176 				      istart, iend);
177     case GFS_GUIDED:
178       return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier,
179 				     istart, iend);
180     case GFS_AUTO:
181       /* For now map to schedule(static), later on we could play with feedback
182 	 driven choice.  */
183       return gomp_loop_static_start (start, end, incr, 0, istart, iend);
184     default:
185       abort ();
186     }
187 }
188 
189 /* The *_ordered_*_start routines are similar.  The only difference is that
190    this work-share construct is initialized to expect an ORDERED section.  */
191 
192 static bool
193 gomp_loop_ordered_static_start (long start, long end, long incr,
194 				long chunk_size, long *istart, long *iend)
195 {
196   struct gomp_thread *thr = gomp_thread ();
197 
198   thr->ts.static_trip = 0;
199   if (gomp_work_share_start (true))
200     {
201       gomp_loop_init (thr->ts.work_share, start, end, incr,
202 		      GFS_STATIC, chunk_size);
203       gomp_ordered_static_init ();
204       gomp_work_share_init_done ();
205     }
206 
207   return !gomp_iter_static_next (istart, iend);
208 }
209 
210 static bool
211 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
212 				 long chunk_size, long *istart, long *iend)
213 {
214   struct gomp_thread *thr = gomp_thread ();
215   bool ret;
216 
217   if (gomp_work_share_start (true))
218     {
219       gomp_loop_init (thr->ts.work_share, start, end, incr,
220 		      GFS_DYNAMIC, chunk_size);
221       gomp_mutex_lock (&thr->ts.work_share->lock);
222       gomp_work_share_init_done ();
223     }
224   else
225     gomp_mutex_lock (&thr->ts.work_share->lock);
226 
227   ret = gomp_iter_dynamic_next_locked (istart, iend);
228   if (ret)
229     gomp_ordered_first ();
230   gomp_mutex_unlock (&thr->ts.work_share->lock);
231 
232   return ret;
233 }
234 
235 static bool
236 gomp_loop_ordered_guided_start (long start, long end, long incr,
237 				long chunk_size, long *istart, long *iend)
238 {
239   struct gomp_thread *thr = gomp_thread ();
240   bool ret;
241 
242   if (gomp_work_share_start (true))
243     {
244       gomp_loop_init (thr->ts.work_share, start, end, incr,
245 		      GFS_GUIDED, chunk_size);
246       gomp_mutex_lock (&thr->ts.work_share->lock);
247       gomp_work_share_init_done ();
248     }
249   else
250     gomp_mutex_lock (&thr->ts.work_share->lock);
251 
252   ret = gomp_iter_guided_next_locked (istart, iend);
253   if (ret)
254     gomp_ordered_first ();
255   gomp_mutex_unlock (&thr->ts.work_share->lock);
256 
257   return ret;
258 }
259 
260 bool
261 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
262 				 long *istart, long *iend)
263 {
264   struct gomp_task_icv *icv = gomp_icv (false);
265   switch (icv->run_sched_var)
266     {
267     case GFS_STATIC:
268       return gomp_loop_ordered_static_start (start, end, incr,
269 					     icv->run_sched_modifier,
270 					     istart, iend);
271     case GFS_DYNAMIC:
272       return gomp_loop_ordered_dynamic_start (start, end, incr,
273 					      icv->run_sched_modifier,
274 					      istart, iend);
275     case GFS_GUIDED:
276       return gomp_loop_ordered_guided_start (start, end, incr,
277 					     icv->run_sched_modifier,
278 					     istart, iend);
279     case GFS_AUTO:
280       /* For now map to schedule(static), later on we could play with feedback
281 	 driven choice.  */
282       return gomp_loop_ordered_static_start (start, end, incr,
283 					     0, istart, iend);
284     default:
285       abort ();
286     }
287 }
288 
289 /* The *_next routines are called when the thread completes processing of
290    the iteration block currently assigned to it.  If the work-share
291    construct is bound directly to a parallel construct, then the iteration
292    bounds may have been set up before the parallel.  In which case, this
293    may be the first iteration for the thread.
294 
295    Returns true if there is work remaining to be performed; *ISTART and
296    *IEND are filled with a new iteration block.  Returns false if all work
297    has been assigned.  */
298 
299 static bool
300 gomp_loop_static_next (long *istart, long *iend)
301 {
302   return !gomp_iter_static_next (istart, iend);
303 }
304 
305 static bool
306 gomp_loop_dynamic_next (long *istart, long *iend)
307 {
308   bool ret;
309 
310 #ifdef HAVE_SYNC_BUILTINS
311   ret = gomp_iter_dynamic_next (istart, iend);
312 #else
313   struct gomp_thread *thr = gomp_thread ();
314   gomp_mutex_lock (&thr->ts.work_share->lock);
315   ret = gomp_iter_dynamic_next_locked (istart, iend);
316   gomp_mutex_unlock (&thr->ts.work_share->lock);
317 #endif
318 
319   return ret;
320 }
321 
322 static bool
323 gomp_loop_guided_next (long *istart, long *iend)
324 {
325   bool ret;
326 
327 #ifdef HAVE_SYNC_BUILTINS
328   ret = gomp_iter_guided_next (istart, iend);
329 #else
330   struct gomp_thread *thr = gomp_thread ();
331   gomp_mutex_lock (&thr->ts.work_share->lock);
332   ret = gomp_iter_guided_next_locked (istart, iend);
333   gomp_mutex_unlock (&thr->ts.work_share->lock);
334 #endif
335 
336   return ret;
337 }
338 
339 bool
340 GOMP_loop_runtime_next (long *istart, long *iend)
341 {
342   struct gomp_thread *thr = gomp_thread ();
343 
344   switch (thr->ts.work_share->sched)
345     {
346     case GFS_STATIC:
347     case GFS_AUTO:
348       return gomp_loop_static_next (istart, iend);
349     case GFS_DYNAMIC:
350       return gomp_loop_dynamic_next (istart, iend);
351     case GFS_GUIDED:
352       return gomp_loop_guided_next (istart, iend);
353     default:
354       abort ();
355     }
356 }
357 
358 /* The *_ordered_*_next routines are called when the thread completes
359    processing of the iteration block currently assigned to it.
360 
361    Returns true if there is work remaining to be performed; *ISTART and
362    *IEND are filled with a new iteration block.  Returns false if all work
363    has been assigned.  */
364 
365 static bool
366 gomp_loop_ordered_static_next (long *istart, long *iend)
367 {
368   struct gomp_thread *thr = gomp_thread ();
369   int test;
370 
371   gomp_ordered_sync ();
372   gomp_mutex_lock (&thr->ts.work_share->lock);
373   test = gomp_iter_static_next (istart, iend);
374   if (test >= 0)
375     gomp_ordered_static_next ();
376   gomp_mutex_unlock (&thr->ts.work_share->lock);
377 
378   return test == 0;
379 }
380 
381 static bool
382 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
383 {
384   struct gomp_thread *thr = gomp_thread ();
385   bool ret;
386 
387   gomp_ordered_sync ();
388   gomp_mutex_lock (&thr->ts.work_share->lock);
389   ret = gomp_iter_dynamic_next_locked (istart, iend);
390   if (ret)
391     gomp_ordered_next ();
392   else
393     gomp_ordered_last ();
394   gomp_mutex_unlock (&thr->ts.work_share->lock);
395 
396   return ret;
397 }
398 
399 static bool
400 gomp_loop_ordered_guided_next (long *istart, long *iend)
401 {
402   struct gomp_thread *thr = gomp_thread ();
403   bool ret;
404 
405   gomp_ordered_sync ();
406   gomp_mutex_lock (&thr->ts.work_share->lock);
407   ret = gomp_iter_guided_next_locked (istart, iend);
408   if (ret)
409     gomp_ordered_next ();
410   else
411     gomp_ordered_last ();
412   gomp_mutex_unlock (&thr->ts.work_share->lock);
413 
414   return ret;
415 }
416 
417 bool
418 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
419 {
420   struct gomp_thread *thr = gomp_thread ();
421 
422   switch (thr->ts.work_share->sched)
423     {
424     case GFS_STATIC:
425     case GFS_AUTO:
426       return gomp_loop_ordered_static_next (istart, iend);
427     case GFS_DYNAMIC:
428       return gomp_loop_ordered_dynamic_next (istart, iend);
429     case GFS_GUIDED:
430       return gomp_loop_ordered_guided_next (istart, iend);
431     default:
432       abort ();
433     }
434 }
435 
436 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
437    to avoid one synchronization once we get into the loop.  */
438 
439 static void
440 gomp_parallel_loop_start (void (*fn) (void *), void *data,
441 			  unsigned num_threads, long start, long end,
442 			  long incr, enum gomp_schedule_type sched,
443 			  long chunk_size, unsigned int flags)
444 {
445   struct gomp_team *team;
446 
447   num_threads = gomp_resolve_num_threads (num_threads, 0);
448   team = gomp_new_team (num_threads);
449   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
450   gomp_team_start (fn, data, num_threads, flags, team);
451 }
452 
453 void
454 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
455 				 unsigned num_threads, long start, long end,
456 				 long incr, long chunk_size)
457 {
458   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
459 			    GFS_STATIC, chunk_size, 0);
460 }
461 
462 void
463 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
464 				  unsigned num_threads, long start, long end,
465 				  long incr, long chunk_size)
466 {
467   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
468 			    GFS_DYNAMIC, chunk_size, 0);
469 }
470 
471 void
472 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
473 				 unsigned num_threads, long start, long end,
474 				 long incr, long chunk_size)
475 {
476   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
477 			    GFS_GUIDED, chunk_size, 0);
478 }
479 
480 void
481 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
482 				  unsigned num_threads, long start, long end,
483 				  long incr)
484 {
485   struct gomp_task_icv *icv = gomp_icv (false);
486   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
487 			    icv->run_sched_var, icv->run_sched_modifier, 0);
488 }
489 
490 ialias_redirect (GOMP_parallel_end)
491 
492 void
493 GOMP_parallel_loop_static (void (*fn) (void *), void *data,
494 			   unsigned num_threads, long start, long end,
495 			   long incr, long chunk_size, unsigned flags)
496 {
497   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
498 			    GFS_STATIC, chunk_size, flags);
499   fn (data);
500   GOMP_parallel_end ();
501 }
502 
503 void
504 GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
505 			    unsigned num_threads, long start, long end,
506 			    long incr, long chunk_size, unsigned flags)
507 {
508   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
509 			    GFS_DYNAMIC, chunk_size, flags);
510   fn (data);
511   GOMP_parallel_end ();
512 }
513 
514 void
515 GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
516 			  unsigned num_threads, long start, long end,
517 			  long incr, long chunk_size, unsigned flags)
518 {
519   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
520 			    GFS_GUIDED, chunk_size, flags);
521   fn (data);
522   GOMP_parallel_end ();
523 }
524 
525 void
526 GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
527 			    unsigned num_threads, long start, long end,
528 			    long incr, unsigned flags)
529 {
530   struct gomp_task_icv *icv = gomp_icv (false);
531   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
532 			    icv->run_sched_var, icv->run_sched_modifier,
533 			    flags);
534   fn (data);
535   GOMP_parallel_end ();
536 }
537 
538 /* The GOMP_loop_end* routines are called after the thread is told that
539    all loop iterations are complete.  The first two versions synchronize
540    all threads; the nowait version does not.  */
541 
542 void
543 GOMP_loop_end (void)
544 {
545   gomp_work_share_end ();
546 }
547 
548 bool
549 GOMP_loop_end_cancel (void)
550 {
551   return gomp_work_share_end_cancel ();
552 }
553 
554 void
555 GOMP_loop_end_nowait (void)
556 {
557   gomp_work_share_end_nowait ();
558 }
559 
560 
561 /* We use static functions above so that we're sure that the "runtime"
562    function can defer to the proper routine without interposition.  We
563    export the static function with a strong alias when possible, or with
564    a wrapper function otherwise.  */
565 
566 #ifdef HAVE_ATTRIBUTE_ALIAS
567 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
568 	__attribute__((alias ("gomp_loop_static_start")));
569 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
570 	__attribute__((alias ("gomp_loop_dynamic_start")));
571 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
572 	__attribute__((alias ("gomp_loop_guided_start")));
573 
574 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
575 	__attribute__((alias ("gomp_loop_ordered_static_start")));
576 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
577 	__attribute__((alias ("gomp_loop_ordered_dynamic_start")));
578 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
579 	__attribute__((alias ("gomp_loop_ordered_guided_start")));
580 
581 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
582 	__attribute__((alias ("gomp_loop_static_next")));
583 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
584 	__attribute__((alias ("gomp_loop_dynamic_next")));
585 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
586 	__attribute__((alias ("gomp_loop_guided_next")));
587 
588 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
589 	__attribute__((alias ("gomp_loop_ordered_static_next")));
590 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
591 	__attribute__((alias ("gomp_loop_ordered_dynamic_next")));
592 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
593 	__attribute__((alias ("gomp_loop_ordered_guided_next")));
594 #else
595 bool
596 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
597 			long *istart, long *iend)
598 {
599   return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
600 }
601 
602 bool
603 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
604 			 long *istart, long *iend)
605 {
606   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
607 }
608 
609 bool
610 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
611 			long *istart, long *iend)
612 {
613   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
614 }
615 
616 bool
617 GOMP_loop_ordered_static_start (long start, long end, long incr,
618 				long chunk_size, long *istart, long *iend)
619 {
620   return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
621 					 istart, iend);
622 }
623 
624 bool
625 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
626 				 long chunk_size, long *istart, long *iend)
627 {
628   return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
629 					  istart, iend);
630 }
631 
632 bool
633 GOMP_loop_ordered_guided_start (long start, long end, long incr,
634 				long chunk_size, long *istart, long *iend)
635 {
636   return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
637 					 istart, iend);
638 }
639 
640 bool
641 GOMP_loop_static_next (long *istart, long *iend)
642 {
643   return gomp_loop_static_next (istart, iend);
644 }
645 
646 bool
647 GOMP_loop_dynamic_next (long *istart, long *iend)
648 {
649   return gomp_loop_dynamic_next (istart, iend);
650 }
651 
652 bool
653 GOMP_loop_guided_next (long *istart, long *iend)
654 {
655   return gomp_loop_guided_next (istart, iend);
656 }
657 
658 bool
659 GOMP_loop_ordered_static_next (long *istart, long *iend)
660 {
661   return gomp_loop_ordered_static_next (istart, iend);
662 }
663 
664 bool
665 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
666 {
667   return gomp_loop_ordered_dynamic_next (istart, iend);
668 }
669 
670 bool
671 GOMP_loop_ordered_guided_next (long *istart, long *iend)
672 {
673   return gomp_loop_ordered_guided_next (istart, iend);
674 }
675 #endif
676