xref: /dflybsd-src/contrib/gcc-8.0/libstdc++-v3/include/parallel/quicksort.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj // -*- C++ -*-
2*38fd1498Szrj 
3*38fd1498Szrj // Copyright (C) 2007-2018 Free Software Foundation, Inc.
4*38fd1498Szrj //
5*38fd1498Szrj // This file is part of the GNU ISO C++ Library.  This library is free
6*38fd1498Szrj // software; you can redistribute it and/or modify it under the terms
7*38fd1498Szrj // of the GNU General Public License as published by the Free Software
8*38fd1498Szrj // Foundation; either version 3, or (at your option) any later
9*38fd1498Szrj // version.
10*38fd1498Szrj 
11*38fd1498Szrj // This library is distributed in the hope that it will be useful, but
12*38fd1498Szrj // WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14*38fd1498Szrj // General Public License for more details.
15*38fd1498Szrj 
16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional
17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version
18*38fd1498Szrj // 3.1, as published by the Free Software Foundation.
19*38fd1498Szrj 
20*38fd1498Szrj // You should have received a copy of the GNU General Public License and
21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program;
22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23*38fd1498Szrj // <http://www.gnu.org/licenses/>.
24*38fd1498Szrj 
25*38fd1498Szrj /** @file parallel/quicksort.h
26*38fd1498Szrj  *  @brief Implementation of a unbalanced parallel quicksort (in-place).
27*38fd1498Szrj  *  This file is a GNU parallel extension to the Standard C++ Library.
28*38fd1498Szrj  */
29*38fd1498Szrj 
30*38fd1498Szrj // Written by Johannes Singler.
31*38fd1498Szrj 
32*38fd1498Szrj #ifndef _GLIBCXX_PARALLEL_QUICKSORT_H
33*38fd1498Szrj #define _GLIBCXX_PARALLEL_QUICKSORT_H 1
34*38fd1498Szrj 
35*38fd1498Szrj #include <parallel/parallel.h>
36*38fd1498Szrj #include <parallel/partition.h>
37*38fd1498Szrj 
38*38fd1498Szrj namespace __gnu_parallel
39*38fd1498Szrj {
40*38fd1498Szrj   /** @brief Unbalanced quicksort divide step.
41*38fd1498Szrj    *  @param __begin Begin iterator of subsequence.
42*38fd1498Szrj    *  @param __end End iterator of subsequence.
43*38fd1498Szrj    *  @param __comp Comparator.
44*38fd1498Szrj    *  @param __pivot_rank Desired __rank of the pivot.
45*38fd1498Szrj    *  @param __num_samples Choose pivot from that many samples.
46*38fd1498Szrj    *  @param __num_threads Number of threads that are allowed to work on
47*38fd1498Szrj    *  this part.
48*38fd1498Szrj    */
49*38fd1498Szrj   template<typename _RAIter, typename _Compare>
50*38fd1498Szrj     typename std::iterator_traits<_RAIter>::difference_type
__parallel_sort_qs_divide(_RAIter __begin,_RAIter __end,_Compare __comp,typename std::iterator_traits<_RAIter>::difference_type __pivot_rank,typename std::iterator_traits<_RAIter>::difference_type __num_samples,_ThreadIndex __num_threads)51*38fd1498Szrj     __parallel_sort_qs_divide(_RAIter __begin, _RAIter __end,
52*38fd1498Szrj 			      _Compare __comp, typename std::iterator_traits
53*38fd1498Szrj 			      <_RAIter>::difference_type __pivot_rank,
54*38fd1498Szrj 			      typename std::iterator_traits
55*38fd1498Szrj 			      <_RAIter>::difference_type
56*38fd1498Szrj 			      __num_samples, _ThreadIndex __num_threads)
57*38fd1498Szrj     {
58*38fd1498Szrj       typedef std::iterator_traits<_RAIter> _TraitsType;
59*38fd1498Szrj       typedef typename _TraitsType::value_type _ValueType;
60*38fd1498Szrj       typedef typename _TraitsType::difference_type _DifferenceType;
61*38fd1498Szrj 
62*38fd1498Szrj       _DifferenceType __n = __end - __begin;
63*38fd1498Szrj       __num_samples = std::min(__num_samples, __n);
64*38fd1498Szrj 
65*38fd1498Szrj       // Allocate uninitialized, to avoid default constructor.
66*38fd1498Szrj       _ValueType* __samples = static_cast<_ValueType*>
67*38fd1498Szrj 	(::operator new(__num_samples * sizeof(_ValueType)));
68*38fd1498Szrj 
69*38fd1498Szrj       for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
70*38fd1498Szrj         {
71*38fd1498Szrj           const unsigned long long __index = static_cast<unsigned long long>
72*38fd1498Szrj 	    (__s) * __n / __num_samples;
73*38fd1498Szrj           ::new(&(__samples[__s])) _ValueType(__begin[__index]);
74*38fd1498Szrj         }
75*38fd1498Szrj 
76*38fd1498Szrj       __gnu_sequential::sort(__samples, __samples + __num_samples, __comp);
77*38fd1498Szrj 
78*38fd1498Szrj       _ValueType& __pivot = __samples[__pivot_rank * __num_samples / __n];
79*38fd1498Szrj 
80*38fd1498Szrj       __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool>
81*38fd1498Szrj         __pred(__comp, __pivot);
82*38fd1498Szrj       _DifferenceType __split = __parallel_partition(__begin, __end,
83*38fd1498Szrj 						     __pred, __num_threads);
84*38fd1498Szrj 
85*38fd1498Szrj       for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
86*38fd1498Szrj 	__samples[__s].~_ValueType();
87*38fd1498Szrj       ::operator delete(__samples);
88*38fd1498Szrj 
89*38fd1498Szrj       return __split;
90*38fd1498Szrj     }
91*38fd1498Szrj 
92*38fd1498Szrj   /** @brief Unbalanced quicksort conquer step.
93*38fd1498Szrj    *  @param __begin Begin iterator of subsequence.
94*38fd1498Szrj    *  @param __end End iterator of subsequence.
95*38fd1498Szrj    *  @param __comp Comparator.
96*38fd1498Szrj    *  @param __num_threads Number of threads that are allowed to work on
97*38fd1498Szrj    *  this part.
98*38fd1498Szrj    */
99*38fd1498Szrj   template<typename _RAIter, typename _Compare>
100*38fd1498Szrj     void
__parallel_sort_qs_conquer(_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __num_threads)101*38fd1498Szrj     __parallel_sort_qs_conquer(_RAIter __begin, _RAIter __end,
102*38fd1498Szrj 			       _Compare __comp,
103*38fd1498Szrj 			       _ThreadIndex __num_threads)
104*38fd1498Szrj     {
105*38fd1498Szrj       typedef std::iterator_traits<_RAIter> _TraitsType;
106*38fd1498Szrj       typedef typename _TraitsType::value_type _ValueType;
107*38fd1498Szrj       typedef typename _TraitsType::difference_type _DifferenceType;
108*38fd1498Szrj 
109*38fd1498Szrj       if (__num_threads <= 1)
110*38fd1498Szrj         {
111*38fd1498Szrj           __gnu_sequential::sort(__begin, __end, __comp);
112*38fd1498Szrj           return;
113*38fd1498Szrj         }
114*38fd1498Szrj 
115*38fd1498Szrj       _DifferenceType __n = __end - __begin, __pivot_rank;
116*38fd1498Szrj 
117*38fd1498Szrj       if (__n <= 1)
118*38fd1498Szrj         return;
119*38fd1498Szrj 
120*38fd1498Szrj       _ThreadIndex __num_threads_left;
121*38fd1498Szrj 
122*38fd1498Szrj       if ((__num_threads % 2) == 1)
123*38fd1498Szrj         __num_threads_left = __num_threads / 2 + 1;
124*38fd1498Szrj       else
125*38fd1498Szrj         __num_threads_left = __num_threads / 2;
126*38fd1498Szrj 
127*38fd1498Szrj       __pivot_rank = __n * __num_threads_left / __num_threads;
128*38fd1498Szrj 
129*38fd1498Szrj       _DifferenceType __split = __parallel_sort_qs_divide
130*38fd1498Szrj 	(__begin, __end, __comp, __pivot_rank,
131*38fd1498Szrj 	 _Settings::get().sort_qs_num_samples_preset, __num_threads);
132*38fd1498Szrj 
133*38fd1498Szrj #pragma omp parallel sections num_threads(2)
134*38fd1498Szrj       {
135*38fd1498Szrj #pragma omp section
136*38fd1498Szrj         __parallel_sort_qs_conquer(__begin, __begin + __split,
137*38fd1498Szrj 				   __comp, __num_threads_left);
138*38fd1498Szrj #pragma omp section
139*38fd1498Szrj         __parallel_sort_qs_conquer(__begin + __split, __end,
140*38fd1498Szrj 				   __comp, __num_threads - __num_threads_left);
141*38fd1498Szrj       }
142*38fd1498Szrj     }
143*38fd1498Szrj 
144*38fd1498Szrj 
145*38fd1498Szrj   /** @brief Unbalanced quicksort main call.
146*38fd1498Szrj    *  @param __begin Begin iterator of input sequence.
147*38fd1498Szrj    *  @param __end End iterator input sequence, ignored.
148*38fd1498Szrj    *  @param __comp Comparator.
149*38fd1498Szrj    *  @param __num_threads Number of threads that are allowed to work on
150*38fd1498Szrj    *  this part.
151*38fd1498Szrj    */
152*38fd1498Szrj   template<typename _RAIter, typename _Compare>
153*38fd1498Szrj     void
__parallel_sort_qs(_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __num_threads)154*38fd1498Szrj     __parallel_sort_qs(_RAIter __begin, _RAIter __end,
155*38fd1498Szrj 		       _Compare __comp,
156*38fd1498Szrj 		       _ThreadIndex __num_threads)
157*38fd1498Szrj     {
158*38fd1498Szrj       _GLIBCXX_CALL(__n)
159*38fd1498Szrj 
160*38fd1498Szrj       typedef std::iterator_traits<_RAIter> _TraitsType;
161*38fd1498Szrj       typedef typename _TraitsType::value_type _ValueType;
162*38fd1498Szrj       typedef typename _TraitsType::difference_type _DifferenceType;
163*38fd1498Szrj 
164*38fd1498Szrj       _DifferenceType __n = __end - __begin;
165*38fd1498Szrj 
166*38fd1498Szrj       // At least one element per processor.
167*38fd1498Szrj       if (__num_threads > __n)
168*38fd1498Szrj         __num_threads = static_cast<_ThreadIndex>(__n);
169*38fd1498Szrj 
170*38fd1498Szrj       __parallel_sort_qs_conquer(
171*38fd1498Szrj         __begin, __begin + __n, __comp, __num_threads);
172*38fd1498Szrj     }
173*38fd1498Szrj 
174*38fd1498Szrj } //namespace __gnu_parallel
175*38fd1498Szrj 
176*38fd1498Szrj #endif /* _GLIBCXX_PARALLEL_QUICKSORT_H */
177