1*e4b17023SJohn Marino // -*- C++ -*- 2*e4b17023SJohn Marino 3*e4b17023SJohn Marino // Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4*e4b17023SJohn Marino // 5*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library. This library is free 6*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the terms 7*e4b17023SJohn Marino // of the GNU General Public License as published by the Free Software 8*e4b17023SJohn Marino // Foundation; either version 3, or (at your option) any later 9*e4b17023SJohn Marino // version. 10*e4b17023SJohn Marino 11*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful, but 12*e4b17023SJohn Marino // WITHOUT ANY WARRANTY; without even the implied warranty of 13*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*e4b17023SJohn Marino // General Public License for more details. 15*e4b17023SJohn Marino 16*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional 17*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version 18*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation. 19*e4b17023SJohn Marino 20*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and 21*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program; 22*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>. 24*e4b17023SJohn Marino 25*e4b17023SJohn Marino /** @file parallel/par_loop.h 26*e4b17023SJohn Marino * @brief Parallelization of embarrassingly parallel execution by 27*e4b17023SJohn Marino * means of equal splitting. 28*e4b17023SJohn Marino * This file is a GNU parallel extension to the Standard C++ Library. 29*e4b17023SJohn Marino */ 30*e4b17023SJohn Marino 31*e4b17023SJohn Marino // Written by Felix Putze. 32*e4b17023SJohn Marino 33*e4b17023SJohn Marino #ifndef _GLIBCXX_PARALLEL_PAR_LOOP_H 34*e4b17023SJohn Marino #define _GLIBCXX_PARALLEL_PAR_LOOP_H 1 35*e4b17023SJohn Marino 36*e4b17023SJohn Marino #include <omp.h> 37*e4b17023SJohn Marino #include <parallel/settings.h> 38*e4b17023SJohn Marino #include <parallel/base.h> 39*e4b17023SJohn Marino #include <parallel/equally_split.h> 40*e4b17023SJohn Marino 41*e4b17023SJohn Marino namespace __gnu_parallel 42*e4b17023SJohn Marino { 43*e4b17023SJohn Marino /** @brief Embarrassingly parallel algorithm for random access 44*e4b17023SJohn Marino * iterators, using hand-crafted parallelization by equal splitting 45*e4b17023SJohn Marino * the work. 46*e4b17023SJohn Marino * 47*e4b17023SJohn Marino * @param __begin Begin iterator of element sequence. 48*e4b17023SJohn Marino * @param __end End iterator of element sequence. 49*e4b17023SJohn Marino * @param __o User-supplied functor (comparator, predicate, adding 50*e4b17023SJohn Marino * functor, ...) 51*e4b17023SJohn Marino * @param __f Functor to "process" an element with __op (depends on 52*e4b17023SJohn Marino * desired functionality, e. g. for std::for_each(), ...). 53*e4b17023SJohn Marino * @param __r Functor to "add" a single __result to the already 54*e4b17023SJohn Marino * processed elements (depends on functionality). 55*e4b17023SJohn Marino * @param __base Base value for reduction. 56*e4b17023SJohn Marino * @param __output Pointer to position where final result is written to 57*e4b17023SJohn Marino * @param __bound Maximum number of elements processed (e. g. for 58*e4b17023SJohn Marino * std::count_n()). 59*e4b17023SJohn Marino * @return User-supplied functor (that may contain a part of the result). 60*e4b17023SJohn Marino */ 61*e4b17023SJohn Marino template<typename _RAIter, 62*e4b17023SJohn Marino typename _Op, 63*e4b17023SJohn Marino typename _Fu, 64*e4b17023SJohn Marino typename _Red, 65*e4b17023SJohn Marino typename _Result> 66*e4b17023SJohn Marino _Op __for_each_template_random_access_ed(_RAIter __begin,_RAIter __end,_Op __o,_Fu & __f,_Red __r,_Result __base,_Result & __output,typename std::iterator_traits<_RAIter>::difference_type __bound)67*e4b17023SJohn Marino __for_each_template_random_access_ed(_RAIter __begin, _RAIter __end, 68*e4b17023SJohn Marino _Op __o, _Fu& __f, _Red __r, 69*e4b17023SJohn Marino _Result __base, _Result& __output, 70*e4b17023SJohn Marino typename std::iterator_traits<_RAIter>::difference_type __bound) 71*e4b17023SJohn Marino { 72*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 73*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 74*e4b17023SJohn Marino const _DifferenceType __length = __end - __begin; 75*e4b17023SJohn Marino _Result *__thread_results; 76*e4b17023SJohn Marino bool* __constructed; 77*e4b17023SJohn Marino 78*e4b17023SJohn Marino _ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType> 79*e4b17023SJohn Marino (__get_max_threads(), __length); 80*e4b17023SJohn Marino 81*e4b17023SJohn Marino # pragma omp parallel num_threads(__num_threads) 82*e4b17023SJohn Marino { 83*e4b17023SJohn Marino # pragma omp single 84*e4b17023SJohn Marino { 85*e4b17023SJohn Marino __num_threads = omp_get_num_threads(); 86*e4b17023SJohn Marino __thread_results = static_cast<_Result*> 87*e4b17023SJohn Marino (::operator new(__num_threads * sizeof(_Result))); 88*e4b17023SJohn Marino __constructed = new bool[__num_threads]; 89*e4b17023SJohn Marino } 90*e4b17023SJohn Marino 91*e4b17023SJohn Marino _ThreadIndex __iam = omp_get_thread_num(); 92*e4b17023SJohn Marino 93*e4b17023SJohn Marino // Neutral element. 94*e4b17023SJohn Marino _Result* __reduct; 95*e4b17023SJohn Marino 96*e4b17023SJohn Marino _DifferenceType 97*e4b17023SJohn Marino __start = __equally_split_point(__length, __num_threads, __iam), 98*e4b17023SJohn Marino __stop = __equally_split_point(__length, __num_threads, __iam + 1); 99*e4b17023SJohn Marino 100*e4b17023SJohn Marino if (__start < __stop) 101*e4b17023SJohn Marino { 102*e4b17023SJohn Marino __reduct = new _Result(__f(__o, __begin + __start)); 103*e4b17023SJohn Marino ++__start; 104*e4b17023SJohn Marino __constructed[__iam] = true; 105*e4b17023SJohn Marino } 106*e4b17023SJohn Marino else 107*e4b17023SJohn Marino __constructed[__iam] = false; 108*e4b17023SJohn Marino 109*e4b17023SJohn Marino for (; __start < __stop; ++__start) 110*e4b17023SJohn Marino *__reduct = __r(*__reduct, __f(__o, __begin + __start)); 111*e4b17023SJohn Marino 112*e4b17023SJohn Marino if (__constructed[__iam]) 113*e4b17023SJohn Marino { 114*e4b17023SJohn Marino ::new(&__thread_results[__iam]) _Result(*__reduct); 115*e4b17023SJohn Marino delete __reduct; 116*e4b17023SJohn Marino } 117*e4b17023SJohn Marino } //parallel 118*e4b17023SJohn Marino 119*e4b17023SJohn Marino for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) 120*e4b17023SJohn Marino if (__constructed[__i]) 121*e4b17023SJohn Marino { 122*e4b17023SJohn Marino __output = __r(__output, __thread_results[__i]); 123*e4b17023SJohn Marino __thread_results[__i].~_Result(); 124*e4b17023SJohn Marino } 125*e4b17023SJohn Marino 126*e4b17023SJohn Marino // Points to last element processed (needed as return value for 127*e4b17023SJohn Marino // some algorithms like transform). 128*e4b17023SJohn Marino __f._M_finish_iterator = __begin + __length; 129*e4b17023SJohn Marino 130*e4b17023SJohn Marino ::operator delete(__thread_results); 131*e4b17023SJohn Marino 132*e4b17023SJohn Marino delete[] __constructed; 133*e4b17023SJohn Marino 134*e4b17023SJohn Marino return __o; 135*e4b17023SJohn Marino } 136*e4b17023SJohn Marino 137*e4b17023SJohn Marino } // end namespace 138*e4b17023SJohn Marino 139*e4b17023SJohn Marino #endif /* _GLIBCXX_PARALLEL_PAR_LOOP_H */ 140