1*38fd1498Szrj // -*- C++ -*- 2*38fd1498Szrj 3*38fd1498Szrj // Copyright (C) 2007-2018 Free Software Foundation, Inc. 4*38fd1498Szrj // 5*38fd1498Szrj // This file is part of the GNU ISO C++ Library. This library is free 6*38fd1498Szrj // software; you can redistribute it and/or modify it under the terms 7*38fd1498Szrj // of the GNU General Public License as published by the Free Software 8*38fd1498Szrj // Foundation; either version 3, or (at your option) any later 9*38fd1498Szrj // version. 10*38fd1498Szrj 11*38fd1498Szrj // This library is distributed in the hope that it will be useful, but 12*38fd1498Szrj // WITHOUT ANY WARRANTY; without even the implied warranty of 13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*38fd1498Szrj // General Public License for more details. 15*38fd1498Szrj 16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional 17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version 18*38fd1498Szrj // 3.1, as published by the Free Software Foundation. 19*38fd1498Szrj 20*38fd1498Szrj // You should have received a copy of the GNU General Public License and 21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program; 22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*38fd1498Szrj // <http://www.gnu.org/licenses/>. 24*38fd1498Szrj 25*38fd1498Szrj /** @file parallel/quicksort.h 26*38fd1498Szrj * @brief Implementation of a unbalanced parallel quicksort (in-place). 27*38fd1498Szrj * This file is a GNU parallel extension to the Standard C++ Library. 28*38fd1498Szrj */ 29*38fd1498Szrj 30*38fd1498Szrj // Written by Johannes Singler. 31*38fd1498Szrj 32*38fd1498Szrj #ifndef _GLIBCXX_PARALLEL_QUICKSORT_H 33*38fd1498Szrj #define _GLIBCXX_PARALLEL_QUICKSORT_H 1 34*38fd1498Szrj 35*38fd1498Szrj #include <parallel/parallel.h> 36*38fd1498Szrj #include <parallel/partition.h> 37*38fd1498Szrj 38*38fd1498Szrj namespace __gnu_parallel 39*38fd1498Szrj { 40*38fd1498Szrj /** @brief Unbalanced quicksort divide step. 41*38fd1498Szrj * @param __begin Begin iterator of subsequence. 42*38fd1498Szrj * @param __end End iterator of subsequence. 43*38fd1498Szrj * @param __comp Comparator. 44*38fd1498Szrj * @param __pivot_rank Desired __rank of the pivot. 45*38fd1498Szrj * @param __num_samples Choose pivot from that many samples. 46*38fd1498Szrj * @param __num_threads Number of threads that are allowed to work on 47*38fd1498Szrj * this part. 48*38fd1498Szrj */ 49*38fd1498Szrj template<typename _RAIter, typename _Compare> 50*38fd1498Szrj typename std::iterator_traits<_RAIter>::difference_type __parallel_sort_qs_divide(_RAIter __begin,_RAIter __end,_Compare __comp,typename std::iterator_traits<_RAIter>::difference_type __pivot_rank,typename std::iterator_traits<_RAIter>::difference_type __num_samples,_ThreadIndex __num_threads)51*38fd1498Szrj __parallel_sort_qs_divide(_RAIter __begin, _RAIter __end, 52*38fd1498Szrj _Compare __comp, typename std::iterator_traits 53*38fd1498Szrj <_RAIter>::difference_type __pivot_rank, 54*38fd1498Szrj typename std::iterator_traits 55*38fd1498Szrj <_RAIter>::difference_type 56*38fd1498Szrj __num_samples, _ThreadIndex __num_threads) 57*38fd1498Szrj { 58*38fd1498Szrj typedef std::iterator_traits<_RAIter> _TraitsType; 59*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 60*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 61*38fd1498Szrj 62*38fd1498Szrj _DifferenceType __n = __end - __begin; 63*38fd1498Szrj __num_samples = std::min(__num_samples, __n); 64*38fd1498Szrj 65*38fd1498Szrj // Allocate uninitialized, to avoid default constructor. 66*38fd1498Szrj _ValueType* __samples = static_cast<_ValueType*> 67*38fd1498Szrj (::operator new(__num_samples * sizeof(_ValueType))); 68*38fd1498Szrj 69*38fd1498Szrj for (_DifferenceType __s = 0; __s < __num_samples; ++__s) 70*38fd1498Szrj { 71*38fd1498Szrj const unsigned long long __index = static_cast<unsigned long long> 72*38fd1498Szrj (__s) * __n / __num_samples; 73*38fd1498Szrj ::new(&(__samples[__s])) _ValueType(__begin[__index]); 74*38fd1498Szrj } 75*38fd1498Szrj 76*38fd1498Szrj __gnu_sequential::sort(__samples, __samples + __num_samples, __comp); 77*38fd1498Szrj 78*38fd1498Szrj _ValueType& __pivot = __samples[__pivot_rank * __num_samples / __n]; 79*38fd1498Szrj 80*38fd1498Szrj __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool> 81*38fd1498Szrj __pred(__comp, __pivot); 82*38fd1498Szrj _DifferenceType __split = __parallel_partition(__begin, __end, 83*38fd1498Szrj __pred, __num_threads); 84*38fd1498Szrj 85*38fd1498Szrj for (_DifferenceType __s = 0; __s < __num_samples; ++__s) 86*38fd1498Szrj __samples[__s].~_ValueType(); 87*38fd1498Szrj ::operator delete(__samples); 88*38fd1498Szrj 89*38fd1498Szrj return __split; 90*38fd1498Szrj } 91*38fd1498Szrj 92*38fd1498Szrj /** @brief Unbalanced quicksort conquer step. 93*38fd1498Szrj * @param __begin Begin iterator of subsequence. 94*38fd1498Szrj * @param __end End iterator of subsequence. 95*38fd1498Szrj * @param __comp Comparator. 96*38fd1498Szrj * @param __num_threads Number of threads that are allowed to work on 97*38fd1498Szrj * this part. 98*38fd1498Szrj */ 99*38fd1498Szrj template<typename _RAIter, typename _Compare> 100*38fd1498Szrj void __parallel_sort_qs_conquer(_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __num_threads)101*38fd1498Szrj __parallel_sort_qs_conquer(_RAIter __begin, _RAIter __end, 102*38fd1498Szrj _Compare __comp, 103*38fd1498Szrj _ThreadIndex __num_threads) 104*38fd1498Szrj { 105*38fd1498Szrj typedef std::iterator_traits<_RAIter> _TraitsType; 106*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 107*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 108*38fd1498Szrj 109*38fd1498Szrj if (__num_threads <= 1) 110*38fd1498Szrj { 111*38fd1498Szrj __gnu_sequential::sort(__begin, __end, __comp); 112*38fd1498Szrj return; 113*38fd1498Szrj } 114*38fd1498Szrj 115*38fd1498Szrj _DifferenceType __n = __end - __begin, __pivot_rank; 116*38fd1498Szrj 117*38fd1498Szrj if (__n <= 1) 118*38fd1498Szrj return; 119*38fd1498Szrj 120*38fd1498Szrj _ThreadIndex __num_threads_left; 121*38fd1498Szrj 122*38fd1498Szrj if ((__num_threads % 2) == 1) 123*38fd1498Szrj __num_threads_left = __num_threads / 2 + 1; 124*38fd1498Szrj else 125*38fd1498Szrj __num_threads_left = __num_threads / 2; 126*38fd1498Szrj 127*38fd1498Szrj __pivot_rank = __n * __num_threads_left / __num_threads; 128*38fd1498Szrj 129*38fd1498Szrj _DifferenceType __split = __parallel_sort_qs_divide 130*38fd1498Szrj (__begin, __end, __comp, __pivot_rank, 131*38fd1498Szrj _Settings::get().sort_qs_num_samples_preset, __num_threads); 132*38fd1498Szrj 133*38fd1498Szrj #pragma omp parallel sections num_threads(2) 134*38fd1498Szrj { 135*38fd1498Szrj #pragma omp section 136*38fd1498Szrj __parallel_sort_qs_conquer(__begin, __begin + __split, 137*38fd1498Szrj __comp, __num_threads_left); 138*38fd1498Szrj #pragma omp section 139*38fd1498Szrj __parallel_sort_qs_conquer(__begin + __split, __end, 140*38fd1498Szrj __comp, __num_threads - __num_threads_left); 141*38fd1498Szrj } 142*38fd1498Szrj } 143*38fd1498Szrj 144*38fd1498Szrj 145*38fd1498Szrj /** @brief Unbalanced quicksort main call. 146*38fd1498Szrj * @param __begin Begin iterator of input sequence. 147*38fd1498Szrj * @param __end End iterator input sequence, ignored. 148*38fd1498Szrj * @param __comp Comparator. 149*38fd1498Szrj * @param __num_threads Number of threads that are allowed to work on 150*38fd1498Szrj * this part. 151*38fd1498Szrj */ 152*38fd1498Szrj template<typename _RAIter, typename _Compare> 153*38fd1498Szrj void __parallel_sort_qs(_RAIter __begin,_RAIter __end,_Compare __comp,_ThreadIndex __num_threads)154*38fd1498Szrj __parallel_sort_qs(_RAIter __begin, _RAIter __end, 155*38fd1498Szrj _Compare __comp, 156*38fd1498Szrj _ThreadIndex __num_threads) 157*38fd1498Szrj { 158*38fd1498Szrj _GLIBCXX_CALL(__n) 159*38fd1498Szrj 160*38fd1498Szrj typedef std::iterator_traits<_RAIter> _TraitsType; 161*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 162*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 163*38fd1498Szrj 164*38fd1498Szrj _DifferenceType __n = __end - __begin; 165*38fd1498Szrj 166*38fd1498Szrj // At least one element per processor. 167*38fd1498Szrj if (__num_threads > __n) 168*38fd1498Szrj __num_threads = static_cast<_ThreadIndex>(__n); 169*38fd1498Szrj 170*38fd1498Szrj __parallel_sort_qs_conquer( 171*38fd1498Szrj __begin, __begin + __n, __comp, __num_threads); 172*38fd1498Szrj } 173*38fd1498Szrj 174*38fd1498Szrj } //namespace __gnu_parallel 175*38fd1498Szrj 176*38fd1498Szrj #endif /* _GLIBCXX_PARALLEL_QUICKSORT_H */ 177