1*e4b17023SJohn Marino // -*- C++ -*- 2*e4b17023SJohn Marino 3*e4b17023SJohn Marino // Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. 4*e4b17023SJohn Marino // 5*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library. This library is free 6*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the terms 7*e4b17023SJohn Marino // of the GNU General Public License as published by the Free Software 8*e4b17023SJohn Marino // Foundation; either version 3, or (at your option) any later 9*e4b17023SJohn Marino // version. 10*e4b17023SJohn Marino 11*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful, but 12*e4b17023SJohn Marino // WITHOUT ANY WARRANTY; without even the implied warranty of 13*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*e4b17023SJohn Marino // General Public License for more details. 15*e4b17023SJohn Marino 16*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional 17*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version 18*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation. 19*e4b17023SJohn Marino 20*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and 21*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program; 22*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>. 24*e4b17023SJohn Marino 25*e4b17023SJohn Marino /** @file parallel/balanced_quicksort.h 26*e4b17023SJohn Marino * @brief Implementation of a dynamically load-balanced parallel quicksort. 27*e4b17023SJohn Marino * 28*e4b17023SJohn Marino * It works in-place and needs only logarithmic extra memory. 29*e4b17023SJohn Marino * The algorithm is similar to the one proposed in 30*e4b17023SJohn Marino * 31*e4b17023SJohn Marino * P. Tsigas and Y. Zhang. 32*e4b17023SJohn Marino * A simple, fast parallel implementation of quicksort and 33*e4b17023SJohn Marino * its performance evaluation on SUN enterprise 10000. 34*e4b17023SJohn Marino * In 11th Euromicro Conference on Parallel, Distributed and 35*e4b17023SJohn Marino * Network-Based Processing, page 372, 2003. 36*e4b17023SJohn Marino * 37*e4b17023SJohn Marino * This file is a GNU parallel extension to the Standard C++ Library. 38*e4b17023SJohn Marino */ 39*e4b17023SJohn Marino 40*e4b17023SJohn Marino // Written by Johannes Singler. 41*e4b17023SJohn Marino 42*e4b17023SJohn Marino #ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 43*e4b17023SJohn Marino #define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1 44*e4b17023SJohn Marino 45*e4b17023SJohn Marino #include <parallel/basic_iterator.h> 46*e4b17023SJohn Marino #include <bits/stl_algo.h> 47*e4b17023SJohn Marino #include <bits/stl_function.h> 48*e4b17023SJohn Marino 49*e4b17023SJohn Marino #include <parallel/settings.h> 50*e4b17023SJohn Marino #include <parallel/partition.h> 51*e4b17023SJohn Marino #include <parallel/random_number.h> 52*e4b17023SJohn Marino #include <parallel/queue.h> 53*e4b17023SJohn Marino 54*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 55*e4b17023SJohn Marino #include <parallel/checkers.h> 56*e4b17023SJohn Marino #endif 57*e4b17023SJohn Marino 58*e4b17023SJohn Marino namespace __gnu_parallel 59*e4b17023SJohn Marino { 60*e4b17023SJohn Marino /** @brief Information local to one thread in the parallel quicksort run. */ 61*e4b17023SJohn Marino template<typename _RAIter> 62*e4b17023SJohn Marino struct _QSBThreadLocal 63*e4b17023SJohn Marino { 64*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 65*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 66*e4b17023SJohn Marino 67*e4b17023SJohn Marino /** @brief Continuous part of the sequence, described by an 68*e4b17023SJohn Marino iterator pair. */ 69*e4b17023SJohn Marino typedef std::pair<_RAIter, _RAIter> _Piece; 70*e4b17023SJohn Marino 71*e4b17023SJohn Marino /** @brief Initial piece to work on. */ 72*e4b17023SJohn Marino _Piece _M_initial; 73*e4b17023SJohn Marino 74*e4b17023SJohn Marino /** @brief Work-stealing queue. */ 75*e4b17023SJohn Marino _RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts; 76*e4b17023SJohn Marino 77*e4b17023SJohn Marino /** @brief Number of threads involved in this algorithm. */ 78*e4b17023SJohn Marino _ThreadIndex _M_num_threads; 79*e4b17023SJohn Marino 80*e4b17023SJohn Marino /** @brief Pointer to a counter of elements left over to sort. */ 81*e4b17023SJohn Marino volatile _DifferenceType* _M_elements_leftover; 82*e4b17023SJohn Marino 83*e4b17023SJohn Marino /** @brief The complete sequence to sort. */ 84*e4b17023SJohn Marino _Piece _M_global; 85*e4b17023SJohn Marino 86*e4b17023SJohn Marino /** @brief Constructor. 87*e4b17023SJohn Marino * @param __queue_size size of the work-stealing queue. */ _QSBThreadLocal_QSBThreadLocal88*e4b17023SJohn Marino _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { } 89*e4b17023SJohn Marino }; 90*e4b17023SJohn Marino 91*e4b17023SJohn Marino /** @brief Balanced quicksort divide step. 92*e4b17023SJohn Marino * @param __begin Begin iterator of subsequence. 93*e4b17023SJohn Marino * @param __end End iterator of subsequence. 94*e4b17023SJohn Marino * @param __comp Comparator. 95*e4b17023SJohn Marino * @param __num_threads Number of threads that are allowed to work on 96*e4b17023SJohn Marino * this part. 97*e4b17023SJohn Marino * @pre @c (__end-__begin)>=1 */ 98*e4b17023SJohn Marino template<typename _RAIter, typename _Compare> 99*e4b17023SJohn Marino typename std::iterator_traits<_RAIter>::difference_type __qsb_divide(_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __num_threads)100*e4b17023SJohn Marino __qsb_divide(_RAIter __begin, _RAIter __end, 101*e4b17023SJohn Marino _Compare __comp, _ThreadIndex __num_threads) 102*e4b17023SJohn Marino { 103*e4b17023SJohn Marino _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0); 104*e4b17023SJohn Marino 105*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 106*e4b17023SJohn Marino typedef typename _TraitsType::value_type _ValueType; 107*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 108*e4b17023SJohn Marino 109*e4b17023SJohn Marino _RAIter __pivot_pos = 110*e4b17023SJohn Marino __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2, 111*e4b17023SJohn Marino __end - 1, __comp); 112*e4b17023SJohn Marino 113*e4b17023SJohn Marino #if defined(_GLIBCXX_ASSERTIONS) 114*e4b17023SJohn Marino // Must be in between somewhere. 115*e4b17023SJohn Marino _DifferenceType __n = __end - __begin; 116*e4b17023SJohn Marino 117*e4b17023SJohn Marino _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin) 118*e4b17023SJohn Marino && !__comp(*(__begin + __n / 2), 119*e4b17023SJohn Marino *__pivot_pos)) 120*e4b17023SJohn Marino || (!__comp(*__pivot_pos, *__begin) 121*e4b17023SJohn Marino && !__comp(*(__end - 1), *__pivot_pos)) 122*e4b17023SJohn Marino || (!__comp(*__pivot_pos, *(__begin + __n / 2)) 123*e4b17023SJohn Marino && !__comp(*__begin, *__pivot_pos)) 124*e4b17023SJohn Marino || (!__comp(*__pivot_pos, *(__begin + __n / 2)) 125*e4b17023SJohn Marino && !__comp(*(__end - 1), *__pivot_pos)) 126*e4b17023SJohn Marino || (!__comp(*__pivot_pos, *(__end - 1)) 127*e4b17023SJohn Marino && !__comp(*__begin, *__pivot_pos)) 128*e4b17023SJohn Marino || (!__comp(*__pivot_pos, *(__end - 1)) 129*e4b17023SJohn Marino && !__comp(*(__begin + __n / 2), 130*e4b17023SJohn Marino *__pivot_pos))); 131*e4b17023SJohn Marino #endif 132*e4b17023SJohn Marino 133*e4b17023SJohn Marino // Swap pivot value to end. 134*e4b17023SJohn Marino if (__pivot_pos != (__end - 1)) 135*e4b17023SJohn Marino std::iter_swap(__pivot_pos, __end - 1); 136*e4b17023SJohn Marino __pivot_pos = __end - 1; 137*e4b17023SJohn Marino 138*e4b17023SJohn Marino __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool> 139*e4b17023SJohn Marino __pred(__comp, *__pivot_pos); 140*e4b17023SJohn Marino 141*e4b17023SJohn Marino // Divide, returning __end - __begin - 1 in the worst case. 142*e4b17023SJohn Marino _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1, 143*e4b17023SJohn Marino __pred, 144*e4b17023SJohn Marino __num_threads); 145*e4b17023SJohn Marino 146*e4b17023SJohn Marino // Swap back pivot to middle. 147*e4b17023SJohn Marino std::iter_swap(__begin + __split_pos, __pivot_pos); 148*e4b17023SJohn Marino __pivot_pos = __begin + __split_pos; 149*e4b17023SJohn Marino 150*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 151*e4b17023SJohn Marino _RAIter __r; 152*e4b17023SJohn Marino for (__r = __begin; __r != __pivot_pos; ++__r) 153*e4b17023SJohn Marino _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos)); 154*e4b17023SJohn Marino for (; __r != __end; ++__r) 155*e4b17023SJohn Marino _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos)); 156*e4b17023SJohn Marino #endif 157*e4b17023SJohn Marino 158*e4b17023SJohn Marino return __split_pos; 159*e4b17023SJohn Marino } 160*e4b17023SJohn Marino 161*e4b17023SJohn Marino /** @brief Quicksort conquer step. 162*e4b17023SJohn Marino * @param __tls Array of thread-local storages. 163*e4b17023SJohn Marino * @param __begin Begin iterator of subsequence. 164*e4b17023SJohn Marino * @param __end End iterator of subsequence. 165*e4b17023SJohn Marino * @param __comp Comparator. 166*e4b17023SJohn Marino * @param __iam Number of the thread processing this function. 167*e4b17023SJohn Marino * @param __num_threads 168*e4b17023SJohn Marino * Number of threads that are allowed to work on this part. */ 169*e4b17023SJohn Marino template<typename _RAIter, typename _Compare> 170*e4b17023SJohn Marino void __qsb_conquer(_QSBThreadLocal<_RAIter> ** __tls,_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __iam,_ThreadIndex __num_threads,bool __parent_wait)171*e4b17023SJohn Marino __qsb_conquer(_QSBThreadLocal<_RAIter>** __tls, 172*e4b17023SJohn Marino _RAIter __begin, _RAIter __end, 173*e4b17023SJohn Marino _Compare __comp, 174*e4b17023SJohn Marino _ThreadIndex __iam, _ThreadIndex __num_threads, 175*e4b17023SJohn Marino bool __parent_wait) 176*e4b17023SJohn Marino { 177*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 178*e4b17023SJohn Marino typedef typename _TraitsType::value_type _ValueType; 179*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 180*e4b17023SJohn Marino 181*e4b17023SJohn Marino _DifferenceType __n = __end - __begin; 182*e4b17023SJohn Marino 183*e4b17023SJohn Marino if (__num_threads <= 1 || __n <= 1) 184*e4b17023SJohn Marino { 185*e4b17023SJohn Marino __tls[__iam]->_M_initial.first = __begin; 186*e4b17023SJohn Marino __tls[__iam]->_M_initial.second = __end; 187*e4b17023SJohn Marino 188*e4b17023SJohn Marino __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait); 189*e4b17023SJohn Marino 190*e4b17023SJohn Marino return; 191*e4b17023SJohn Marino } 192*e4b17023SJohn Marino 193*e4b17023SJohn Marino // Divide step. 194*e4b17023SJohn Marino _DifferenceType __split_pos = 195*e4b17023SJohn Marino __qsb_divide(__begin, __end, __comp, __num_threads); 196*e4b17023SJohn Marino 197*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 198*e4b17023SJohn Marino _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && 199*e4b17023SJohn Marino __split_pos < (__end - __begin)); 200*e4b17023SJohn Marino #endif 201*e4b17023SJohn Marino 202*e4b17023SJohn Marino _ThreadIndex 203*e4b17023SJohn Marino __num_threads_leftside = std::max<_ThreadIndex> 204*e4b17023SJohn Marino (1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos 205*e4b17023SJohn Marino * __num_threads / __n)); 206*e4b17023SJohn Marino 207*e4b17023SJohn Marino # pragma omp atomic 208*e4b17023SJohn Marino *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1; 209*e4b17023SJohn Marino 210*e4b17023SJohn Marino // Conquer step. 211*e4b17023SJohn Marino # pragma omp parallel num_threads(2) 212*e4b17023SJohn Marino { 213*e4b17023SJohn Marino bool __wait; 214*e4b17023SJohn Marino if(omp_get_num_threads() < 2) 215*e4b17023SJohn Marino __wait = false; 216*e4b17023SJohn Marino else 217*e4b17023SJohn Marino __wait = __parent_wait; 218*e4b17023SJohn Marino 219*e4b17023SJohn Marino # pragma omp sections 220*e4b17023SJohn Marino { 221*e4b17023SJohn Marino # pragma omp section 222*e4b17023SJohn Marino { 223*e4b17023SJohn Marino __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp, 224*e4b17023SJohn Marino __iam, __num_threads_leftside, __wait); 225*e4b17023SJohn Marino __wait = __parent_wait; 226*e4b17023SJohn Marino } 227*e4b17023SJohn Marino // The pivot_pos is left in place, to ensure termination. 228*e4b17023SJohn Marino # pragma omp section 229*e4b17023SJohn Marino { 230*e4b17023SJohn Marino __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp, 231*e4b17023SJohn Marino __iam + __num_threads_leftside, 232*e4b17023SJohn Marino __num_threads - __num_threads_leftside, __wait); 233*e4b17023SJohn Marino __wait = __parent_wait; 234*e4b17023SJohn Marino } 235*e4b17023SJohn Marino } 236*e4b17023SJohn Marino } 237*e4b17023SJohn Marino } 238*e4b17023SJohn Marino 239*e4b17023SJohn Marino /** 240*e4b17023SJohn Marino * @brief Quicksort step doing load-balanced local sort. 241*e4b17023SJohn Marino * @param __tls Array of thread-local storages. 242*e4b17023SJohn Marino * @param __comp Comparator. 243*e4b17023SJohn Marino * @param __iam Number of the thread processing this function. 244*e4b17023SJohn Marino */ 245*e4b17023SJohn Marino template<typename _RAIter, typename _Compare> 246*e4b17023SJohn Marino void __qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter> ** __tls,_Compare & __comp,_ThreadIndex __iam,bool __wait)247*e4b17023SJohn Marino __qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls, 248*e4b17023SJohn Marino _Compare& __comp, _ThreadIndex __iam, 249*e4b17023SJohn Marino bool __wait) 250*e4b17023SJohn Marino { 251*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 252*e4b17023SJohn Marino typedef typename _TraitsType::value_type _ValueType; 253*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 254*e4b17023SJohn Marino typedef std::pair<_RAIter, _RAIter> _Piece; 255*e4b17023SJohn Marino 256*e4b17023SJohn Marino _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam]; 257*e4b17023SJohn Marino 258*e4b17023SJohn Marino _DifferenceType 259*e4b17023SJohn Marino __base_case_n = _Settings::get().sort_qsb_base_case_maximal_n; 260*e4b17023SJohn Marino if (__base_case_n < 2) 261*e4b17023SJohn Marino __base_case_n = 2; 262*e4b17023SJohn Marino _ThreadIndex __num_threads = __tl._M_num_threads; 263*e4b17023SJohn Marino 264*e4b17023SJohn Marino // Every thread has its own random number generator. 265*e4b17023SJohn Marino _RandomNumber __rng(__iam + 1); 266*e4b17023SJohn Marino 267*e4b17023SJohn Marino _Piece __current = __tl._M_initial; 268*e4b17023SJohn Marino 269*e4b17023SJohn Marino _DifferenceType __elements_done = 0; 270*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 271*e4b17023SJohn Marino _DifferenceType __total_elements_done = 0; 272*e4b17023SJohn Marino #endif 273*e4b17023SJohn Marino 274*e4b17023SJohn Marino for (;;) 275*e4b17023SJohn Marino { 276*e4b17023SJohn Marino // Invariant: __current must be a valid (maybe empty) range. 277*e4b17023SJohn Marino _RAIter __begin = __current.first, __end = __current.second; 278*e4b17023SJohn Marino _DifferenceType __n = __end - __begin; 279*e4b17023SJohn Marino 280*e4b17023SJohn Marino if (__n > __base_case_n) 281*e4b17023SJohn Marino { 282*e4b17023SJohn Marino // Divide. 283*e4b17023SJohn Marino _RAIter __pivot_pos = __begin + __rng(__n); 284*e4b17023SJohn Marino 285*e4b17023SJohn Marino // Swap __pivot_pos value to end. 286*e4b17023SJohn Marino if (__pivot_pos != (__end - 1)) 287*e4b17023SJohn Marino std::iter_swap(__pivot_pos, __end - 1); 288*e4b17023SJohn Marino __pivot_pos = __end - 1; 289*e4b17023SJohn Marino 290*e4b17023SJohn Marino __gnu_parallel::__binder2nd 291*e4b17023SJohn Marino <_Compare, _ValueType, _ValueType, bool> 292*e4b17023SJohn Marino __pred(__comp, *__pivot_pos); 293*e4b17023SJohn Marino 294*e4b17023SJohn Marino // Divide, leave pivot unchanged in last place. 295*e4b17023SJohn Marino _RAIter __split_pos1, __split_pos2; 296*e4b17023SJohn Marino __split_pos1 = __gnu_sequential::partition(__begin, __end - 1, 297*e4b17023SJohn Marino __pred); 298*e4b17023SJohn Marino 299*e4b17023SJohn Marino // Left side: < __pivot_pos; __right side: >= __pivot_pos. 300*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 301*e4b17023SJohn Marino _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 302*e4b17023SJohn Marino && __split_pos1 < __end); 303*e4b17023SJohn Marino #endif 304*e4b17023SJohn Marino // Swap pivot back to middle. 305*e4b17023SJohn Marino if (__split_pos1 != __pivot_pos) 306*e4b17023SJohn Marino std::iter_swap(__split_pos1, __pivot_pos); 307*e4b17023SJohn Marino __pivot_pos = __split_pos1; 308*e4b17023SJohn Marino 309*e4b17023SJohn Marino // In case all elements are equal, __split_pos1 == 0. 310*e4b17023SJohn Marino if ((__split_pos1 + 1 - __begin) < (__n >> 7) 311*e4b17023SJohn Marino || (__end - __split_pos1) < (__n >> 7)) 312*e4b17023SJohn Marino { 313*e4b17023SJohn Marino // Very unequal split, one part smaller than one 128th 314*e4b17023SJohn Marino // elements not strictly larger than the pivot. 315*e4b17023SJohn Marino __gnu_parallel::__unary_negate<__gnu_parallel::__binder1st 316*e4b17023SJohn Marino <_Compare, _ValueType, _ValueType, bool>, _ValueType> 317*e4b17023SJohn Marino __pred(__gnu_parallel::__binder1st 318*e4b17023SJohn Marino <_Compare, _ValueType, _ValueType, bool> 319*e4b17023SJohn Marino (__comp, *__pivot_pos)); 320*e4b17023SJohn Marino 321*e4b17023SJohn Marino // Find other end of pivot-equal range. 322*e4b17023SJohn Marino __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, 323*e4b17023SJohn Marino __end, __pred); 324*e4b17023SJohn Marino } 325*e4b17023SJohn Marino else 326*e4b17023SJohn Marino // Only skip the pivot. 327*e4b17023SJohn Marino __split_pos2 = __split_pos1 + 1; 328*e4b17023SJohn Marino 329*e4b17023SJohn Marino // Elements equal to pivot are done. 330*e4b17023SJohn Marino __elements_done += (__split_pos2 - __split_pos1); 331*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 332*e4b17023SJohn Marino __total_elements_done += (__split_pos2 - __split_pos1); 333*e4b17023SJohn Marino #endif 334*e4b17023SJohn Marino // Always push larger part onto stack. 335*e4b17023SJohn Marino if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2))) 336*e4b17023SJohn Marino { 337*e4b17023SJohn Marino // Right side larger. 338*e4b17023SJohn Marino if ((__split_pos2) != __end) 339*e4b17023SJohn Marino __tl._M_leftover_parts.push_front 340*e4b17023SJohn Marino (std::make_pair(__split_pos2, __end)); 341*e4b17023SJohn Marino 342*e4b17023SJohn Marino //__current.first = __begin; //already set anyway 343*e4b17023SJohn Marino __current.second = __split_pos1; 344*e4b17023SJohn Marino continue; 345*e4b17023SJohn Marino } 346*e4b17023SJohn Marino else 347*e4b17023SJohn Marino { 348*e4b17023SJohn Marino // Left side larger. 349*e4b17023SJohn Marino if (__begin != __split_pos1) 350*e4b17023SJohn Marino __tl._M_leftover_parts.push_front(std::make_pair 351*e4b17023SJohn Marino (__begin, __split_pos1)); 352*e4b17023SJohn Marino 353*e4b17023SJohn Marino __current.first = __split_pos2; 354*e4b17023SJohn Marino //__current.second = __end; //already set anyway 355*e4b17023SJohn Marino continue; 356*e4b17023SJohn Marino } 357*e4b17023SJohn Marino } 358*e4b17023SJohn Marino else 359*e4b17023SJohn Marino { 360*e4b17023SJohn Marino __gnu_sequential::sort(__begin, __end, __comp); 361*e4b17023SJohn Marino __elements_done += __n; 362*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 363*e4b17023SJohn Marino __total_elements_done += __n; 364*e4b17023SJohn Marino #endif 365*e4b17023SJohn Marino 366*e4b17023SJohn Marino // Prefer own stack, small pieces. 367*e4b17023SJohn Marino if (__tl._M_leftover_parts.pop_front(__current)) 368*e4b17023SJohn Marino continue; 369*e4b17023SJohn Marino 370*e4b17023SJohn Marino # pragma omp atomic 371*e4b17023SJohn Marino *__tl._M_elements_leftover -= __elements_done; 372*e4b17023SJohn Marino 373*e4b17023SJohn Marino __elements_done = 0; 374*e4b17023SJohn Marino 375*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 376*e4b17023SJohn Marino double __search_start = omp_get_wtime(); 377*e4b17023SJohn Marino #endif 378*e4b17023SJohn Marino 379*e4b17023SJohn Marino // Look for new work. 380*e4b17023SJohn Marino bool __successfully_stolen = false; 381*e4b17023SJohn Marino while (__wait && *__tl._M_elements_leftover > 0 382*e4b17023SJohn Marino && !__successfully_stolen 383*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 384*e4b17023SJohn Marino // Possible dead-lock. 385*e4b17023SJohn Marino && (omp_get_wtime() < (__search_start + 1.0)) 386*e4b17023SJohn Marino #endif 387*e4b17023SJohn Marino ) 388*e4b17023SJohn Marino { 389*e4b17023SJohn Marino _ThreadIndex __victim; 390*e4b17023SJohn Marino __victim = __rng(__num_threads); 391*e4b17023SJohn Marino 392*e4b17023SJohn Marino // Large pieces. 393*e4b17023SJohn Marino __successfully_stolen = (__victim != __iam) 394*e4b17023SJohn Marino && __tls[__victim]->_M_leftover_parts.pop_back(__current); 395*e4b17023SJohn Marino if (!__successfully_stolen) 396*e4b17023SJohn Marino __yield(); 397*e4b17023SJohn Marino #if !defined(__ICC) && !defined(__ECC) 398*e4b17023SJohn Marino # pragma omp flush 399*e4b17023SJohn Marino #endif 400*e4b17023SJohn Marino } 401*e4b17023SJohn Marino 402*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 403*e4b17023SJohn Marino if (omp_get_wtime() >= (__search_start + 1.0)) 404*e4b17023SJohn Marino { 405*e4b17023SJohn Marino sleep(1); 406*e4b17023SJohn Marino _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() 407*e4b17023SJohn Marino < (__search_start + 1.0)); 408*e4b17023SJohn Marino } 409*e4b17023SJohn Marino #endif 410*e4b17023SJohn Marino if (!__successfully_stolen) 411*e4b17023SJohn Marino { 412*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 413*e4b17023SJohn Marino _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0); 414*e4b17023SJohn Marino #endif 415*e4b17023SJohn Marino return; 416*e4b17023SJohn Marino } 417*e4b17023SJohn Marino } 418*e4b17023SJohn Marino } 419*e4b17023SJohn Marino } 420*e4b17023SJohn Marino 421*e4b17023SJohn Marino /** @brief Top-level quicksort routine. 422*e4b17023SJohn Marino * @param __begin Begin iterator of sequence. 423*e4b17023SJohn Marino * @param __end End iterator of sequence. 424*e4b17023SJohn Marino * @param __comp Comparator. 425*e4b17023SJohn Marino * @param __num_threads Number of threads that are allowed to work on 426*e4b17023SJohn Marino * this part. 427*e4b17023SJohn Marino */ 428*e4b17023SJohn Marino template<typename _RAIter, typename _Compare> 429*e4b17023SJohn Marino void __parallel_sort_qsb(_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __num_threads)430*e4b17023SJohn Marino __parallel_sort_qsb(_RAIter __begin, _RAIter __end, 431*e4b17023SJohn Marino _Compare __comp, _ThreadIndex __num_threads) 432*e4b17023SJohn Marino { 433*e4b17023SJohn Marino _GLIBCXX_CALL(__end - __begin) 434*e4b17023SJohn Marino 435*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 436*e4b17023SJohn Marino typedef typename _TraitsType::value_type _ValueType; 437*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 438*e4b17023SJohn Marino typedef std::pair<_RAIter, _RAIter> _Piece; 439*e4b17023SJohn Marino 440*e4b17023SJohn Marino typedef _QSBThreadLocal<_RAIter> _TLSType; 441*e4b17023SJohn Marino 442*e4b17023SJohn Marino _DifferenceType __n = __end - __begin; 443*e4b17023SJohn Marino 444*e4b17023SJohn Marino if (__n <= 1) 445*e4b17023SJohn Marino return; 446*e4b17023SJohn Marino 447*e4b17023SJohn Marino // At least one element per processor. 448*e4b17023SJohn Marino if (__num_threads > __n) 449*e4b17023SJohn Marino __num_threads = static_cast<_ThreadIndex>(__n); 450*e4b17023SJohn Marino 451*e4b17023SJohn Marino // Initialize thread local storage 452*e4b17023SJohn Marino _TLSType** __tls = new _TLSType*[__num_threads]; 453*e4b17023SJohn Marino _DifferenceType __queue_size = (__num_threads 454*e4b17023SJohn Marino * (_ThreadIndex)(__rd_log2(__n) + 1)); 455*e4b17023SJohn Marino for (_ThreadIndex __t = 0; __t < __num_threads; ++__t) 456*e4b17023SJohn Marino __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size); 457*e4b17023SJohn Marino 458*e4b17023SJohn Marino // There can never be more than ceil(__rd_log2(__n)) ranges on the 459*e4b17023SJohn Marino // stack, because 460*e4b17023SJohn Marino // 1. Only one processor pushes onto the stack 461*e4b17023SJohn Marino // 2. The largest range has at most length __n 462*e4b17023SJohn Marino // 3. Each range is larger than half of the range remaining 463*e4b17023SJohn Marino volatile _DifferenceType __elements_leftover = __n; 464*e4b17023SJohn Marino for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) 465*e4b17023SJohn Marino { 466*e4b17023SJohn Marino __tls[__i]->_M_elements_leftover = &__elements_leftover; 467*e4b17023SJohn Marino __tls[__i]->_M_num_threads = __num_threads; 468*e4b17023SJohn Marino __tls[__i]->_M_global = std::make_pair(__begin, __end); 469*e4b17023SJohn Marino 470*e4b17023SJohn Marino // Just in case nothing is left to assign. 471*e4b17023SJohn Marino __tls[__i]->_M_initial = std::make_pair(__end, __end); 472*e4b17023SJohn Marino } 473*e4b17023SJohn Marino 474*e4b17023SJohn Marino // Main recursion call. 475*e4b17023SJohn Marino __qsb_conquer(__tls, __begin, __begin + __n, __comp, 0, 476*e4b17023SJohn Marino __num_threads, true); 477*e4b17023SJohn Marino 478*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS 479*e4b17023SJohn Marino // All stack must be empty. 480*e4b17023SJohn Marino _Piece __dummy; 481*e4b17023SJohn Marino for (_ThreadIndex __i = 1; __i < __num_threads; ++__i) 482*e4b17023SJohn Marino _GLIBCXX_PARALLEL_ASSERT( 483*e4b17023SJohn Marino !__tls[__i]->_M_leftover_parts.pop_back(__dummy)); 484*e4b17023SJohn Marino #endif 485*e4b17023SJohn Marino 486*e4b17023SJohn Marino for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) 487*e4b17023SJohn Marino delete __tls[__i]; 488*e4b17023SJohn Marino delete[] __tls; 489*e4b17023SJohn Marino } 490*e4b17023SJohn Marino } // namespace __gnu_parallel 491*e4b17023SJohn Marino 492*e4b17023SJohn Marino #endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */ 493