xref: /dflybsd-src/contrib/gcc-4.7/libstdc++-v3/include/parallel/balanced_quicksort.h (revision 04febcfb30580676d3e95f58a16c5137ee478b32)
1*e4b17023SJohn Marino // -*- C++ -*-
2*e4b17023SJohn Marino 
3*e4b17023SJohn Marino // Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
4*e4b17023SJohn Marino //
5*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library.  This library is free
6*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the terms
7*e4b17023SJohn Marino // of the GNU General Public License as published by the Free Software
8*e4b17023SJohn Marino // Foundation; either version 3, or (at your option) any later
9*e4b17023SJohn Marino // version.
10*e4b17023SJohn Marino 
11*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful, but
12*e4b17023SJohn Marino // WITHOUT ANY WARRANTY; without even the implied warranty of
13*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14*e4b17023SJohn Marino // General Public License for more details.
15*e4b17023SJohn Marino 
16*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional
17*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version
18*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation.
19*e4b17023SJohn Marino 
20*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and
21*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program;
22*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>.
24*e4b17023SJohn Marino 
25*e4b17023SJohn Marino /** @file parallel/balanced_quicksort.h
26*e4b17023SJohn Marino  *  @brief Implementation of a dynamically load-balanced parallel quicksort.
27*e4b17023SJohn Marino  *
28*e4b17023SJohn Marino  *  It works in-place and needs only logarithmic extra memory.
29*e4b17023SJohn Marino  *  The algorithm is similar to the one proposed in
30*e4b17023SJohn Marino  *
31*e4b17023SJohn Marino  *  P. Tsigas and Y. Zhang.
32*e4b17023SJohn Marino  *  A simple, fast parallel implementation of quicksort and
33*e4b17023SJohn Marino  *  its performance evaluation on SUN enterprise 10000.
34*e4b17023SJohn Marino  *  In 11th Euromicro Conference on Parallel, Distributed and
35*e4b17023SJohn Marino  *  Network-Based Processing, page 372, 2003.
36*e4b17023SJohn Marino  *
37*e4b17023SJohn Marino  *  This file is a GNU parallel extension to the Standard C++ Library.
38*e4b17023SJohn Marino  */
39*e4b17023SJohn Marino 
40*e4b17023SJohn Marino // Written by Johannes Singler.
41*e4b17023SJohn Marino 
42*e4b17023SJohn Marino #ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H
43*e4b17023SJohn Marino #define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1
44*e4b17023SJohn Marino 
45*e4b17023SJohn Marino #include <parallel/basic_iterator.h>
46*e4b17023SJohn Marino #include <bits/stl_algo.h>
47*e4b17023SJohn Marino #include <bits/stl_function.h>
48*e4b17023SJohn Marino 
49*e4b17023SJohn Marino #include <parallel/settings.h>
50*e4b17023SJohn Marino #include <parallel/partition.h>
51*e4b17023SJohn Marino #include <parallel/random_number.h>
52*e4b17023SJohn Marino #include <parallel/queue.h>
53*e4b17023SJohn Marino 
54*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
55*e4b17023SJohn Marino #include <parallel/checkers.h>
56*e4b17023SJohn Marino #endif
57*e4b17023SJohn Marino 
58*e4b17023SJohn Marino namespace __gnu_parallel
59*e4b17023SJohn Marino {
60*e4b17023SJohn Marino   /** @brief Information local to one thread in the parallel quicksort run. */
61*e4b17023SJohn Marino   template<typename _RAIter>
62*e4b17023SJohn Marino     struct _QSBThreadLocal
63*e4b17023SJohn Marino     {
64*e4b17023SJohn Marino       typedef std::iterator_traits<_RAIter> _TraitsType;
65*e4b17023SJohn Marino       typedef typename _TraitsType::difference_type _DifferenceType;
66*e4b17023SJohn Marino 
67*e4b17023SJohn Marino       /** @brief Continuous part of the sequence, described by an
68*e4b17023SJohn Marino       iterator pair. */
69*e4b17023SJohn Marino       typedef std::pair<_RAIter, _RAIter> _Piece;
70*e4b17023SJohn Marino 
71*e4b17023SJohn Marino       /** @brief Initial piece to work on. */
72*e4b17023SJohn Marino       _Piece _M_initial;
73*e4b17023SJohn Marino 
74*e4b17023SJohn Marino       /** @brief Work-stealing queue. */
75*e4b17023SJohn Marino       _RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts;
76*e4b17023SJohn Marino 
77*e4b17023SJohn Marino       /** @brief Number of threads involved in this algorithm. */
78*e4b17023SJohn Marino       _ThreadIndex _M_num_threads;
79*e4b17023SJohn Marino 
80*e4b17023SJohn Marino       /** @brief Pointer to a counter of elements left over to sort. */
81*e4b17023SJohn Marino       volatile _DifferenceType* _M_elements_leftover;
82*e4b17023SJohn Marino 
83*e4b17023SJohn Marino       /** @brief The complete sequence to sort. */
84*e4b17023SJohn Marino       _Piece _M_global;
85*e4b17023SJohn Marino 
86*e4b17023SJohn Marino       /** @brief Constructor.
87*e4b17023SJohn Marino        *  @param __queue_size size of the work-stealing queue. */
_QSBThreadLocal_QSBThreadLocal88*e4b17023SJohn Marino       _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
89*e4b17023SJohn Marino     };
90*e4b17023SJohn Marino 
91*e4b17023SJohn Marino   /** @brief Balanced quicksort divide step.
92*e4b17023SJohn Marino     *  @param __begin Begin iterator of subsequence.
93*e4b17023SJohn Marino     *  @param __end End iterator of subsequence.
94*e4b17023SJohn Marino     *  @param __comp Comparator.
95*e4b17023SJohn Marino     *  @param __num_threads Number of threads that are allowed to work on
96*e4b17023SJohn Marino     *  this part.
97*e4b17023SJohn Marino     *  @pre @c (__end-__begin)>=1 */
98*e4b17023SJohn Marino   template<typename _RAIter, typename _Compare>
99*e4b17023SJohn Marino     typename std::iterator_traits<_RAIter>::difference_type
__qsb_divide(_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __num_threads)100*e4b17023SJohn Marino     __qsb_divide(_RAIter __begin, _RAIter __end,
101*e4b17023SJohn Marino 		 _Compare __comp, _ThreadIndex __num_threads)
102*e4b17023SJohn Marino     {
103*e4b17023SJohn Marino       _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
104*e4b17023SJohn Marino 
105*e4b17023SJohn Marino       typedef std::iterator_traits<_RAIter> _TraitsType;
106*e4b17023SJohn Marino       typedef typename _TraitsType::value_type _ValueType;
107*e4b17023SJohn Marino       typedef typename _TraitsType::difference_type _DifferenceType;
108*e4b17023SJohn Marino 
109*e4b17023SJohn Marino       _RAIter __pivot_pos =
110*e4b17023SJohn Marino 	__median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
111*e4b17023SJohn Marino 				    __end  - 1, __comp);
112*e4b17023SJohn Marino 
113*e4b17023SJohn Marino #if defined(_GLIBCXX_ASSERTIONS)
114*e4b17023SJohn Marino       // Must be in between somewhere.
115*e4b17023SJohn Marino       _DifferenceType __n = __end - __begin;
116*e4b17023SJohn Marino 
117*e4b17023SJohn Marino       _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin)
118*e4b17023SJohn Marino 				&& !__comp(*(__begin + __n / 2),
119*e4b17023SJohn Marino 					   *__pivot_pos))
120*e4b17023SJohn Marino 			       || (!__comp(*__pivot_pos, *__begin)
121*e4b17023SJohn Marino 				   && !__comp(*(__end - 1), *__pivot_pos))
122*e4b17023SJohn Marino 			       || (!__comp(*__pivot_pos, *(__begin + __n / 2))
123*e4b17023SJohn Marino 				   && !__comp(*__begin, *__pivot_pos))
124*e4b17023SJohn Marino 			       || (!__comp(*__pivot_pos, *(__begin + __n / 2))
125*e4b17023SJohn Marino 				   && !__comp(*(__end - 1), *__pivot_pos))
126*e4b17023SJohn Marino 			       || (!__comp(*__pivot_pos, *(__end - 1))
127*e4b17023SJohn Marino 				   && !__comp(*__begin, *__pivot_pos))
128*e4b17023SJohn Marino 			       || (!__comp(*__pivot_pos, *(__end - 1))
129*e4b17023SJohn Marino 				   && !__comp(*(__begin + __n / 2),
130*e4b17023SJohn Marino 					      *__pivot_pos)));
131*e4b17023SJohn Marino #endif
132*e4b17023SJohn Marino 
133*e4b17023SJohn Marino       // Swap pivot value to end.
134*e4b17023SJohn Marino       if (__pivot_pos != (__end - 1))
135*e4b17023SJohn Marino 	std::iter_swap(__pivot_pos, __end - 1);
136*e4b17023SJohn Marino       __pivot_pos = __end - 1;
137*e4b17023SJohn Marino 
138*e4b17023SJohn Marino       __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool>
139*e4b17023SJohn Marino 	__pred(__comp, *__pivot_pos);
140*e4b17023SJohn Marino 
141*e4b17023SJohn Marino       // Divide, returning __end - __begin - 1 in the worst case.
142*e4b17023SJohn Marino       _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1,
143*e4b17023SJohn Marino 							 __pred,
144*e4b17023SJohn Marino 							 __num_threads);
145*e4b17023SJohn Marino 
146*e4b17023SJohn Marino       // Swap back pivot to middle.
147*e4b17023SJohn Marino       std::iter_swap(__begin + __split_pos, __pivot_pos);
148*e4b17023SJohn Marino       __pivot_pos = __begin + __split_pos;
149*e4b17023SJohn Marino 
150*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
151*e4b17023SJohn Marino       _RAIter __r;
152*e4b17023SJohn Marino       for (__r = __begin; __r != __pivot_pos; ++__r)
153*e4b17023SJohn Marino 	_GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
154*e4b17023SJohn Marino       for (; __r != __end; ++__r)
155*e4b17023SJohn Marino 	_GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
156*e4b17023SJohn Marino #endif
157*e4b17023SJohn Marino 
158*e4b17023SJohn Marino       return __split_pos;
159*e4b17023SJohn Marino     }
160*e4b17023SJohn Marino 
161*e4b17023SJohn Marino   /** @brief Quicksort conquer step.
162*e4b17023SJohn Marino     *  @param __tls Array of thread-local storages.
163*e4b17023SJohn Marino     *  @param __begin Begin iterator of subsequence.
164*e4b17023SJohn Marino     *  @param __end End iterator of subsequence.
165*e4b17023SJohn Marino     *  @param __comp Comparator.
166*e4b17023SJohn Marino     *  @param __iam Number of the thread processing this function.
167*e4b17023SJohn Marino     *  @param __num_threads
168*e4b17023SJohn Marino     *          Number of threads that are allowed to work on this part. */
169*e4b17023SJohn Marino   template<typename _RAIter, typename _Compare>
170*e4b17023SJohn Marino     void
__qsb_conquer(_QSBThreadLocal<_RAIter> ** __tls,_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __iam,_ThreadIndex __num_threads,bool __parent_wait)171*e4b17023SJohn Marino     __qsb_conquer(_QSBThreadLocal<_RAIter>** __tls,
172*e4b17023SJohn Marino 		  _RAIter __begin, _RAIter __end,
173*e4b17023SJohn Marino 		  _Compare __comp,
174*e4b17023SJohn Marino 		  _ThreadIndex __iam, _ThreadIndex __num_threads,
175*e4b17023SJohn Marino 		  bool __parent_wait)
176*e4b17023SJohn Marino     {
177*e4b17023SJohn Marino       typedef std::iterator_traits<_RAIter> _TraitsType;
178*e4b17023SJohn Marino       typedef typename _TraitsType::value_type _ValueType;
179*e4b17023SJohn Marino       typedef typename _TraitsType::difference_type _DifferenceType;
180*e4b17023SJohn Marino 
181*e4b17023SJohn Marino       _DifferenceType __n = __end - __begin;
182*e4b17023SJohn Marino 
183*e4b17023SJohn Marino       if (__num_threads <= 1 || __n <= 1)
184*e4b17023SJohn Marino 	{
185*e4b17023SJohn Marino           __tls[__iam]->_M_initial.first  = __begin;
186*e4b17023SJohn Marino           __tls[__iam]->_M_initial.second = __end;
187*e4b17023SJohn Marino 
188*e4b17023SJohn Marino           __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
189*e4b17023SJohn Marino 
190*e4b17023SJohn Marino           return;
191*e4b17023SJohn Marino 	}
192*e4b17023SJohn Marino 
193*e4b17023SJohn Marino       // Divide step.
194*e4b17023SJohn Marino       _DifferenceType __split_pos =
195*e4b17023SJohn Marino 	__qsb_divide(__begin, __end, __comp, __num_threads);
196*e4b17023SJohn Marino 
197*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
198*e4b17023SJohn Marino       _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
199*e4b17023SJohn Marino                                __split_pos < (__end - __begin));
200*e4b17023SJohn Marino #endif
201*e4b17023SJohn Marino 
202*e4b17023SJohn Marino       _ThreadIndex
203*e4b17023SJohn Marino 	__num_threads_leftside = std::max<_ThreadIndex>
204*e4b17023SJohn Marino 	(1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos
205*e4b17023SJohn Marino 				   * __num_threads / __n));
206*e4b17023SJohn Marino 
207*e4b17023SJohn Marino #     pragma omp atomic
208*e4b17023SJohn Marino       *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
209*e4b17023SJohn Marino 
210*e4b17023SJohn Marino       // Conquer step.
211*e4b17023SJohn Marino #     pragma omp parallel num_threads(2)
212*e4b17023SJohn Marino       {
213*e4b17023SJohn Marino 	bool __wait;
214*e4b17023SJohn Marino 	if(omp_get_num_threads() < 2)
215*e4b17023SJohn Marino           __wait = false;
216*e4b17023SJohn Marino 	else
217*e4b17023SJohn Marino           __wait = __parent_wait;
218*e4b17023SJohn Marino 
219*e4b17023SJohn Marino #       pragma omp sections
220*e4b17023SJohn Marino 	{
221*e4b17023SJohn Marino #         pragma omp section
222*e4b17023SJohn Marino 	  {
223*e4b17023SJohn Marino 	    __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
224*e4b17023SJohn Marino 			  __iam, __num_threads_leftside, __wait);
225*e4b17023SJohn Marino 	    __wait = __parent_wait;
226*e4b17023SJohn Marino 	  }
227*e4b17023SJohn Marino 	  // The pivot_pos is left in place, to ensure termination.
228*e4b17023SJohn Marino #         pragma omp section
229*e4b17023SJohn Marino 	  {
230*e4b17023SJohn Marino 	    __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
231*e4b17023SJohn Marino 			  __iam + __num_threads_leftside,
232*e4b17023SJohn Marino 			  __num_threads - __num_threads_leftside, __wait);
233*e4b17023SJohn Marino 	    __wait = __parent_wait;
234*e4b17023SJohn Marino 	  }
235*e4b17023SJohn Marino 	}
236*e4b17023SJohn Marino       }
237*e4b17023SJohn Marino     }
238*e4b17023SJohn Marino 
239*e4b17023SJohn Marino   /**
240*e4b17023SJohn Marino     *  @brief Quicksort step doing load-balanced local sort.
241*e4b17023SJohn Marino     *  @param __tls Array of thread-local storages.
242*e4b17023SJohn Marino     *  @param __comp Comparator.
243*e4b17023SJohn Marino     *  @param __iam Number of the thread processing this function.
244*e4b17023SJohn Marino     */
245*e4b17023SJohn Marino   template<typename _RAIter, typename _Compare>
246*e4b17023SJohn Marino     void
__qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter> ** __tls,_Compare & __comp,_ThreadIndex __iam,bool __wait)247*e4b17023SJohn Marino     __qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls,
248*e4b17023SJohn Marino 				  _Compare& __comp, _ThreadIndex __iam,
249*e4b17023SJohn Marino 				  bool __wait)
250*e4b17023SJohn Marino     {
251*e4b17023SJohn Marino       typedef std::iterator_traits<_RAIter> _TraitsType;
252*e4b17023SJohn Marino       typedef typename _TraitsType::value_type _ValueType;
253*e4b17023SJohn Marino       typedef typename _TraitsType::difference_type _DifferenceType;
254*e4b17023SJohn Marino       typedef std::pair<_RAIter, _RAIter> _Piece;
255*e4b17023SJohn Marino 
256*e4b17023SJohn Marino       _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
257*e4b17023SJohn Marino 
258*e4b17023SJohn Marino       _DifferenceType
259*e4b17023SJohn Marino 	__base_case_n = _Settings::get().sort_qsb_base_case_maximal_n;
260*e4b17023SJohn Marino       if (__base_case_n < 2)
261*e4b17023SJohn Marino 	__base_case_n = 2;
262*e4b17023SJohn Marino       _ThreadIndex __num_threads = __tl._M_num_threads;
263*e4b17023SJohn Marino 
264*e4b17023SJohn Marino       // Every thread has its own random number generator.
265*e4b17023SJohn Marino       _RandomNumber __rng(__iam + 1);
266*e4b17023SJohn Marino 
267*e4b17023SJohn Marino       _Piece __current = __tl._M_initial;
268*e4b17023SJohn Marino 
269*e4b17023SJohn Marino       _DifferenceType __elements_done = 0;
270*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
271*e4b17023SJohn Marino       _DifferenceType __total_elements_done = 0;
272*e4b17023SJohn Marino #endif
273*e4b17023SJohn Marino 
274*e4b17023SJohn Marino       for (;;)
275*e4b17023SJohn Marino 	{
276*e4b17023SJohn Marino           // Invariant: __current must be a valid (maybe empty) range.
277*e4b17023SJohn Marino           _RAIter __begin = __current.first, __end = __current.second;
278*e4b17023SJohn Marino           _DifferenceType __n = __end - __begin;
279*e4b17023SJohn Marino 
280*e4b17023SJohn Marino           if (__n > __base_case_n)
281*e4b17023SJohn Marino             {
282*e4b17023SJohn Marino               // Divide.
283*e4b17023SJohn Marino               _RAIter __pivot_pos = __begin +  __rng(__n);
284*e4b17023SJohn Marino 
285*e4b17023SJohn Marino               // Swap __pivot_pos value to end.
286*e4b17023SJohn Marino               if (__pivot_pos != (__end - 1))
287*e4b17023SJohn Marino         	std::iter_swap(__pivot_pos, __end - 1);
288*e4b17023SJohn Marino               __pivot_pos = __end - 1;
289*e4b17023SJohn Marino 
290*e4b17023SJohn Marino               __gnu_parallel::__binder2nd
291*e4b17023SJohn Marino 		<_Compare, _ValueType, _ValueType, bool>
292*e4b17023SJohn Marino 		__pred(__comp, *__pivot_pos);
293*e4b17023SJohn Marino 
294*e4b17023SJohn Marino               // Divide, leave pivot unchanged in last place.
295*e4b17023SJohn Marino               _RAIter __split_pos1, __split_pos2;
296*e4b17023SJohn Marino               __split_pos1 = __gnu_sequential::partition(__begin, __end - 1,
297*e4b17023SJohn Marino 							 __pred);
298*e4b17023SJohn Marino 
299*e4b17023SJohn Marino               // Left side: < __pivot_pos; __right side: >= __pivot_pos.
300*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
301*e4b17023SJohn Marino               _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
302*e4b17023SJohn Marino                                        && __split_pos1 < __end);
303*e4b17023SJohn Marino #endif
304*e4b17023SJohn Marino               // Swap pivot back to middle.
305*e4b17023SJohn Marino               if (__split_pos1 != __pivot_pos)
306*e4b17023SJohn Marino         	std::iter_swap(__split_pos1, __pivot_pos);
307*e4b17023SJohn Marino               __pivot_pos = __split_pos1;
308*e4b17023SJohn Marino 
309*e4b17023SJohn Marino               // In case all elements are equal, __split_pos1 == 0.
310*e4b17023SJohn Marino               if ((__split_pos1 + 1 - __begin) < (__n >> 7)
311*e4b17023SJohn Marino 		  || (__end - __split_pos1) < (__n >> 7))
312*e4b17023SJohn Marino         	{
313*e4b17023SJohn Marino                   // Very unequal split, one part smaller than one 128th
314*e4b17023SJohn Marino                   // elements not strictly larger than the pivot.
315*e4b17023SJohn Marino                   __gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
316*e4b17023SJohn Marino                     <_Compare, _ValueType, _ValueType, bool>, _ValueType>
317*e4b17023SJohn Marino                     __pred(__gnu_parallel::__binder1st
318*e4b17023SJohn Marino                 	 <_Compare, _ValueType, _ValueType, bool>
319*e4b17023SJohn Marino 			   (__comp, *__pivot_pos));
320*e4b17023SJohn Marino 
321*e4b17023SJohn Marino                   // Find other end of pivot-equal range.
322*e4b17023SJohn Marino                   __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
323*e4b17023SJohn Marino 							     __end, __pred);
324*e4b17023SJohn Marino         	}
325*e4b17023SJohn Marino               else
326*e4b17023SJohn Marino         	// Only skip the pivot.
327*e4b17023SJohn Marino         	__split_pos2 = __split_pos1 + 1;
328*e4b17023SJohn Marino 
329*e4b17023SJohn Marino               // Elements equal to pivot are done.
330*e4b17023SJohn Marino               __elements_done += (__split_pos2 - __split_pos1);
331*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
332*e4b17023SJohn Marino               __total_elements_done += (__split_pos2 - __split_pos1);
333*e4b17023SJohn Marino #endif
334*e4b17023SJohn Marino               // Always push larger part onto stack.
335*e4b17023SJohn Marino               if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
336*e4b17023SJohn Marino         	{
337*e4b17023SJohn Marino                   // Right side larger.
338*e4b17023SJohn Marino                   if ((__split_pos2) != __end)
339*e4b17023SJohn Marino                     __tl._M_leftover_parts.push_front
340*e4b17023SJohn Marino 		      (std::make_pair(__split_pos2, __end));
341*e4b17023SJohn Marino 
342*e4b17023SJohn Marino                   //__current.first = __begin;    //already set anyway
343*e4b17023SJohn Marino                   __current.second = __split_pos1;
344*e4b17023SJohn Marino                   continue;
345*e4b17023SJohn Marino         	}
346*e4b17023SJohn Marino               else
347*e4b17023SJohn Marino         	{
348*e4b17023SJohn Marino                   // Left side larger.
349*e4b17023SJohn Marino                   if (__begin != __split_pos1)
350*e4b17023SJohn Marino                     __tl._M_leftover_parts.push_front(std::make_pair
351*e4b17023SJohn Marino 						      (__begin, __split_pos1));
352*e4b17023SJohn Marino 
353*e4b17023SJohn Marino                   __current.first = __split_pos2;
354*e4b17023SJohn Marino                   //__current.second = __end;     //already set anyway
355*e4b17023SJohn Marino                   continue;
356*e4b17023SJohn Marino         	}
357*e4b17023SJohn Marino             }
358*e4b17023SJohn Marino           else
359*e4b17023SJohn Marino             {
360*e4b17023SJohn Marino               __gnu_sequential::sort(__begin, __end, __comp);
361*e4b17023SJohn Marino               __elements_done += __n;
362*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
363*e4b17023SJohn Marino               __total_elements_done += __n;
364*e4b17023SJohn Marino #endif
365*e4b17023SJohn Marino 
366*e4b17023SJohn Marino               // Prefer own stack, small pieces.
367*e4b17023SJohn Marino               if (__tl._M_leftover_parts.pop_front(__current))
368*e4b17023SJohn Marino         	continue;
369*e4b17023SJohn Marino 
370*e4b17023SJohn Marino #             pragma omp atomic
371*e4b17023SJohn Marino               *__tl._M_elements_leftover -= __elements_done;
372*e4b17023SJohn Marino 
373*e4b17023SJohn Marino               __elements_done = 0;
374*e4b17023SJohn Marino 
375*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
376*e4b17023SJohn Marino               double __search_start = omp_get_wtime();
377*e4b17023SJohn Marino #endif
378*e4b17023SJohn Marino 
379*e4b17023SJohn Marino               // Look for new work.
380*e4b17023SJohn Marino               bool __successfully_stolen = false;
381*e4b17023SJohn Marino               while (__wait && *__tl._M_elements_leftover > 0
382*e4b17023SJohn Marino                      && !__successfully_stolen
383*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
384*e4b17023SJohn Marino                       // Possible dead-lock.
385*e4b17023SJohn Marino                      && (omp_get_wtime() < (__search_start + 1.0))
386*e4b17023SJohn Marino #endif
387*e4b17023SJohn Marino 		     )
388*e4b17023SJohn Marino         	{
389*e4b17023SJohn Marino                   _ThreadIndex __victim;
390*e4b17023SJohn Marino                   __victim = __rng(__num_threads);
391*e4b17023SJohn Marino 
392*e4b17023SJohn Marino                   // Large pieces.
393*e4b17023SJohn Marino                   __successfully_stolen = (__victim != __iam)
394*e4b17023SJohn Marino 		    && __tls[__victim]->_M_leftover_parts.pop_back(__current);
395*e4b17023SJohn Marino                   if (!__successfully_stolen)
396*e4b17023SJohn Marino                     __yield();
397*e4b17023SJohn Marino #if !defined(__ICC) && !defined(__ECC)
398*e4b17023SJohn Marino #                 pragma omp flush
399*e4b17023SJohn Marino #endif
400*e4b17023SJohn Marino         	}
401*e4b17023SJohn Marino 
402*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
403*e4b17023SJohn Marino               if (omp_get_wtime() >= (__search_start + 1.0))
404*e4b17023SJohn Marino         	{
405*e4b17023SJohn Marino                   sleep(1);
406*e4b17023SJohn Marino                   _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
407*e4b17023SJohn Marino                                            < (__search_start + 1.0));
408*e4b17023SJohn Marino         	}
409*e4b17023SJohn Marino #endif
410*e4b17023SJohn Marino               if (!__successfully_stolen)
411*e4b17023SJohn Marino         	{
412*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
413*e4b17023SJohn Marino                   _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
414*e4b17023SJohn Marino #endif
415*e4b17023SJohn Marino                   return;
416*e4b17023SJohn Marino         	}
417*e4b17023SJohn Marino             }
418*e4b17023SJohn Marino 	}
419*e4b17023SJohn Marino     }
420*e4b17023SJohn Marino 
421*e4b17023SJohn Marino   /** @brief Top-level quicksort routine.
422*e4b17023SJohn Marino     *  @param __begin Begin iterator of sequence.
423*e4b17023SJohn Marino     *  @param __end End iterator of sequence.
424*e4b17023SJohn Marino     *  @param __comp Comparator.
425*e4b17023SJohn Marino     *  @param __num_threads Number of threads that are allowed to work on
426*e4b17023SJohn Marino     *  this part.
427*e4b17023SJohn Marino     */
428*e4b17023SJohn Marino   template<typename _RAIter, typename _Compare>
429*e4b17023SJohn Marino     void
__parallel_sort_qsb(_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __num_threads)430*e4b17023SJohn Marino     __parallel_sort_qsb(_RAIter __begin, _RAIter __end,
431*e4b17023SJohn Marino 			_Compare __comp, _ThreadIndex __num_threads)
432*e4b17023SJohn Marino     {
433*e4b17023SJohn Marino       _GLIBCXX_CALL(__end - __begin)
434*e4b17023SJohn Marino 
435*e4b17023SJohn Marino       typedef std::iterator_traits<_RAIter> _TraitsType;
436*e4b17023SJohn Marino       typedef typename _TraitsType::value_type _ValueType;
437*e4b17023SJohn Marino       typedef typename _TraitsType::difference_type _DifferenceType;
438*e4b17023SJohn Marino       typedef std::pair<_RAIter, _RAIter> _Piece;
439*e4b17023SJohn Marino 
440*e4b17023SJohn Marino       typedef _QSBThreadLocal<_RAIter> _TLSType;
441*e4b17023SJohn Marino 
442*e4b17023SJohn Marino       _DifferenceType __n = __end - __begin;
443*e4b17023SJohn Marino 
444*e4b17023SJohn Marino       if (__n <= 1)
445*e4b17023SJohn Marino 	return;
446*e4b17023SJohn Marino 
447*e4b17023SJohn Marino       // At least one element per processor.
448*e4b17023SJohn Marino       if (__num_threads > __n)
449*e4b17023SJohn Marino 	__num_threads = static_cast<_ThreadIndex>(__n);
450*e4b17023SJohn Marino 
451*e4b17023SJohn Marino       // Initialize thread local storage
452*e4b17023SJohn Marino       _TLSType** __tls = new _TLSType*[__num_threads];
453*e4b17023SJohn Marino       _DifferenceType __queue_size = (__num_threads
454*e4b17023SJohn Marino 				      * (_ThreadIndex)(__rd_log2(__n) + 1));
455*e4b17023SJohn Marino       for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
456*e4b17023SJohn Marino 	__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
457*e4b17023SJohn Marino 
458*e4b17023SJohn Marino       // There can never be more than ceil(__rd_log2(__n)) ranges on the
459*e4b17023SJohn Marino       // stack, because
460*e4b17023SJohn Marino       // 1. Only one processor pushes onto the stack
461*e4b17023SJohn Marino       // 2. The largest range has at most length __n
462*e4b17023SJohn Marino       // 3. Each range is larger than half of the range remaining
463*e4b17023SJohn Marino       volatile _DifferenceType __elements_leftover = __n;
464*e4b17023SJohn Marino       for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
465*e4b17023SJohn Marino 	{
466*e4b17023SJohn Marino           __tls[__i]->_M_elements_leftover = &__elements_leftover;
467*e4b17023SJohn Marino           __tls[__i]->_M_num_threads = __num_threads;
468*e4b17023SJohn Marino           __tls[__i]->_M_global = std::make_pair(__begin, __end);
469*e4b17023SJohn Marino 
470*e4b17023SJohn Marino           // Just in case nothing is left to assign.
471*e4b17023SJohn Marino           __tls[__i]->_M_initial = std::make_pair(__end, __end);
472*e4b17023SJohn Marino 	}
473*e4b17023SJohn Marino 
474*e4b17023SJohn Marino       // Main recursion call.
475*e4b17023SJohn Marino       __qsb_conquer(__tls, __begin, __begin + __n, __comp, 0,
476*e4b17023SJohn Marino 		    __num_threads, true);
477*e4b17023SJohn Marino 
478*e4b17023SJohn Marino #if _GLIBCXX_ASSERTIONS
479*e4b17023SJohn Marino       // All stack must be empty.
480*e4b17023SJohn Marino       _Piece __dummy;
481*e4b17023SJohn Marino       for (_ThreadIndex __i = 1; __i < __num_threads; ++__i)
482*e4b17023SJohn Marino 	_GLIBCXX_PARALLEL_ASSERT(
483*e4b17023SJohn Marino           !__tls[__i]->_M_leftover_parts.pop_back(__dummy));
484*e4b17023SJohn Marino #endif
485*e4b17023SJohn Marino 
486*e4b17023SJohn Marino       for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
487*e4b17023SJohn Marino 	delete __tls[__i];
488*e4b17023SJohn Marino       delete[] __tls;
489*e4b17023SJohn Marino     }
490*e4b17023SJohn Marino } // namespace __gnu_parallel
491*e4b17023SJohn Marino 
492*e4b17023SJohn Marino #endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */
493