xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/parallel/omp_loop_static.h (revision b1e838363e3c6fc78a55519254d99869742dd33c)
14fee23f9Smrg // -*- C++ -*-
24fee23f9Smrg 
3*b1e83836Smrg // Copyright (C) 2007-2022 Free Software Foundation, Inc.
44fee23f9Smrg //
54fee23f9Smrg // This file is part of the GNU ISO C++ Library.  This library is free
64fee23f9Smrg // software; you can redistribute it and/or modify it under the terms
74fee23f9Smrg // of the GNU General Public License as published by the Free Software
84fee23f9Smrg // Foundation; either version 3, or (at your option) any later
94fee23f9Smrg // version.
104fee23f9Smrg 
114fee23f9Smrg // This library is distributed in the hope that it will be useful, but
124fee23f9Smrg // WITHOUT ANY WARRANTY; without even the implied warranty of
134fee23f9Smrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
144fee23f9Smrg // General Public License for more details.
154fee23f9Smrg 
164fee23f9Smrg // Under Section 7 of GPL version 3, you are granted additional
174fee23f9Smrg // permissions described in the GCC Runtime Library Exception, version
184fee23f9Smrg // 3.1, as published by the Free Software Foundation.
194fee23f9Smrg 
204fee23f9Smrg // You should have received a copy of the GNU General Public License and
214fee23f9Smrg // a copy of the GCC Runtime Library Exception along with this program;
224fee23f9Smrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
234fee23f9Smrg // <http://www.gnu.org/licenses/>.
244fee23f9Smrg 
254fee23f9Smrg /** @file parallel/omp_loop_static.h
264fee23f9Smrg  *  @brief Parallelization of embarrassingly parallel execution by
274fee23f9Smrg  *  means of an OpenMP for loop with static scheduling.
284fee23f9Smrg  *  This file is a GNU parallel extension to the Standard C++ Library.
294fee23f9Smrg  */
304fee23f9Smrg 
314fee23f9Smrg // Written by Felix Putze.
324fee23f9Smrg 
334fee23f9Smrg #ifndef _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H
344fee23f9Smrg #define _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H 1
354fee23f9Smrg 
364fee23f9Smrg #include <omp.h>
374fee23f9Smrg 
384fee23f9Smrg #include <parallel/settings.h>
394fee23f9Smrg #include <parallel/basic_iterator.h>
404fee23f9Smrg 
414fee23f9Smrg namespace __gnu_parallel
424fee23f9Smrg {
434fee23f9Smrg   /** @brief Embarrassingly parallel algorithm for random access
444fee23f9Smrg    * iterators, using an OpenMP for loop with static scheduling.
454fee23f9Smrg    *
464fee23f9Smrg    *  @param __begin Begin iterator of element sequence.
474fee23f9Smrg    *  @param __end End iterator of element sequence.
484fee23f9Smrg    *  @param __o User-supplied functor (comparator, predicate, adding
494fee23f9Smrg    *  functor, ...).
504fee23f9Smrg    *  @param __f Functor to @a process an element with __op (depends on
514fee23f9Smrg    *  desired functionality, e. g. for std::for_each(), ...).
524fee23f9Smrg    *  @param __r Functor to @a add a single __result to the already processed
534fee23f9Smrg    *  __elements (depends on functionality).
544fee23f9Smrg    *  @param __base Base value for reduction.
554fee23f9Smrg    *  @param __output Pointer to position where final result is written to
564fee23f9Smrg    *  @param __bound Maximum number of elements processed (e. g. for
574fee23f9Smrg    *  std::count_n()).
584fee23f9Smrg    *  @return User-supplied functor (that may contain a part of the result).
594fee23f9Smrg    */
604fee23f9Smrg   template<typename _RAIter,
614fee23f9Smrg 	   typename _Op,
624fee23f9Smrg 	   typename _Fu,
634fee23f9Smrg 	   typename _Red,
644fee23f9Smrg 	   typename _Result>
654fee23f9Smrg     _Op
__for_each_template_random_access_omp_loop_static(_RAIter __begin,_RAIter __end,_Op __o,_Fu & __f,_Red __r,_Result __base,_Result & __output,typename std::iterator_traits<_RAIter>::difference_type __bound)664fee23f9Smrg     __for_each_template_random_access_omp_loop_static(_RAIter __begin,
674fee23f9Smrg 						      _RAIter __end, _Op __o,
684fee23f9Smrg 						      _Fu& __f, _Red __r,
694fee23f9Smrg 						      _Result __base,
704fee23f9Smrg 						      _Result& __output,
714fee23f9Smrg       typename std::iterator_traits<_RAIter>::difference_type __bound)
724fee23f9Smrg     {
734fee23f9Smrg       typedef typename std::iterator_traits<_RAIter>::difference_type
744fee23f9Smrg 	_DifferenceType;
754fee23f9Smrg 
764fee23f9Smrg       _DifferenceType __length = __end - __begin;
774fee23f9Smrg       _ThreadIndex __num_threads = std::min<_DifferenceType>
784fee23f9Smrg 	(__get_max_threads(), __length);
794fee23f9Smrg 
804fee23f9Smrg       _Result *__thread_results;
814fee23f9Smrg 
824fee23f9Smrg #     pragma omp parallel num_threads(__num_threads)
834fee23f9Smrg       {
844fee23f9Smrg #       pragma omp single
854fee23f9Smrg 	{
864fee23f9Smrg 	  __num_threads = omp_get_num_threads();
874fee23f9Smrg 	  __thread_results = new _Result[__num_threads];
884fee23f9Smrg 
894fee23f9Smrg 	  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
904fee23f9Smrg 	    __thread_results[__i] = _Result();
914fee23f9Smrg 	}
924fee23f9Smrg 
934fee23f9Smrg         _ThreadIndex __iam = omp_get_thread_num();
944fee23f9Smrg 
954fee23f9Smrg #pragma omp for schedule(static, _Settings::get().workstealing_chunk_size)
964fee23f9Smrg         for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
974fee23f9Smrg           __thread_results[__iam] = __r(__thread_results[__iam],
984fee23f9Smrg                                         __f(__o, __begin+__pos));
994fee23f9Smrg       } //parallel
1004fee23f9Smrg 
1014fee23f9Smrg       for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
1024fee23f9Smrg 	__output = __r(__output, __thread_results[__i]);
1034fee23f9Smrg 
1044fee23f9Smrg       delete [] __thread_results;
1054fee23f9Smrg 
1064fee23f9Smrg       // Points to last element processed (needed as return value for
1074fee23f9Smrg       // some algorithms like transform).
1084fee23f9Smrg       __f.finish_iterator = __begin + __length;
1094fee23f9Smrg 
1104fee23f9Smrg       return __o;
1114fee23f9Smrg     }
1124fee23f9Smrg 
1134fee23f9Smrg } // end namespace
1144fee23f9Smrg 
1154fee23f9Smrg #endif /* _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H */
116