1*e4b17023SJohn Marino // -*- C++ -*- 2*e4b17023SJohn Marino 3*e4b17023SJohn Marino // Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4*e4b17023SJohn Marino // 5*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library. This library is free 6*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the terms 7*e4b17023SJohn Marino // of the GNU General Public License as published by the Free Software 8*e4b17023SJohn Marino // Foundation; either version 3, or (at your option) any later 9*e4b17023SJohn Marino // version. 10*e4b17023SJohn Marino 11*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful, but 12*e4b17023SJohn Marino // WITHOUT ANY WARRANTY; without even the implied warranty of 13*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*e4b17023SJohn Marino // General Public License for more details. 15*e4b17023SJohn Marino 16*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional 17*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version 18*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation. 19*e4b17023SJohn Marino 20*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and 21*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program; 22*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>. 24*e4b17023SJohn Marino 25*e4b17023SJohn Marino /** @file parallel/multiway_mergesort.h 26*e4b17023SJohn Marino * @brief Parallel multiway merge sort. 27*e4b17023SJohn Marino * This file is a GNU parallel extension to the Standard C++ Library. 28*e4b17023SJohn Marino */ 29*e4b17023SJohn Marino 30*e4b17023SJohn Marino // Written by Johannes Singler. 31*e4b17023SJohn Marino 32*e4b17023SJohn Marino #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 33*e4b17023SJohn Marino #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1 34*e4b17023SJohn Marino 35*e4b17023SJohn Marino #include <vector> 36*e4b17023SJohn Marino 37*e4b17023SJohn Marino #include <parallel/basic_iterator.h> 38*e4b17023SJohn Marino #include <bits/stl_algo.h> 39*e4b17023SJohn Marino #include <parallel/parallel.h> 40*e4b17023SJohn Marino #include <parallel/multiway_merge.h> 41*e4b17023SJohn Marino 42*e4b17023SJohn Marino namespace __gnu_parallel 43*e4b17023SJohn Marino { 44*e4b17023SJohn Marino /** @brief Subsequence description. */ 45*e4b17023SJohn Marino template<typename _DifferenceTp> 46*e4b17023SJohn Marino struct _Piece 47*e4b17023SJohn Marino { 48*e4b17023SJohn Marino typedef _DifferenceTp _DifferenceType; 49*e4b17023SJohn Marino 50*e4b17023SJohn Marino /** @brief Begin of subsequence. */ 51*e4b17023SJohn Marino _DifferenceType _M_begin; 52*e4b17023SJohn Marino 53*e4b17023SJohn Marino /** @brief End of subsequence. */ 54*e4b17023SJohn Marino _DifferenceType _M_end; 55*e4b17023SJohn Marino }; 56*e4b17023SJohn Marino 57*e4b17023SJohn Marino /** @brief Data accessed by all threads. 58*e4b17023SJohn Marino * 59*e4b17023SJohn Marino * PMWMS = parallel multiway mergesort */ 60*e4b17023SJohn Marino template<typename _RAIter> 61*e4b17023SJohn Marino struct _PMWMSSortingData 62*e4b17023SJohn Marino { 63*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 64*e4b17023SJohn Marino typedef typename _TraitsType::value_type _ValueType; 65*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 66*e4b17023SJohn Marino 67*e4b17023SJohn Marino /** @brief Number of threads involved. */ 68*e4b17023SJohn Marino _ThreadIndex _M_num_threads; 69*e4b17023SJohn Marino 70*e4b17023SJohn Marino /** @brief Input __begin. */ 71*e4b17023SJohn Marino _RAIter _M_source; 72*e4b17023SJohn Marino 73*e4b17023SJohn Marino /** @brief Start indices, per thread. */ 74*e4b17023SJohn Marino _DifferenceType* _M_starts; 75*e4b17023SJohn Marino 76*e4b17023SJohn Marino /** @brief Storage in which to sort. */ 77*e4b17023SJohn Marino _ValueType** _M_temporary; 78*e4b17023SJohn Marino 79*e4b17023SJohn Marino /** @brief Samples. */ 80*e4b17023SJohn Marino _ValueType* _M_samples; 81*e4b17023SJohn Marino 82*e4b17023SJohn Marino /** @brief Offsets to add to the found positions. */ 83*e4b17023SJohn Marino _DifferenceType* _M_offsets; 84*e4b17023SJohn Marino 85*e4b17023SJohn Marino /** @brief Pieces of data to merge @c [thread][__sequence] */ 86*e4b17023SJohn Marino std::vector<_Piece<_DifferenceType> >* _M_pieces; 87*e4b17023SJohn Marino }; 88*e4b17023SJohn Marino 89*e4b17023SJohn Marino /** 90*e4b17023SJohn Marino * @brief Select _M_samples from a sequence. 91*e4b17023SJohn Marino * @param __sd Pointer to algorithm data. _Result will be placed in 92*e4b17023SJohn Marino * @c __sd->_M_samples. 93*e4b17023SJohn Marino * @param __num_samples Number of _M_samples to select. 94*e4b17023SJohn Marino */ 95*e4b17023SJohn Marino template<typename _RAIter, typename _DifferenceTp> 96*e4b17023SJohn Marino void __determine_samples(_PMWMSSortingData<_RAIter> * __sd,_DifferenceTp __num_samples)97*e4b17023SJohn Marino __determine_samples(_PMWMSSortingData<_RAIter>* __sd, 98*e4b17023SJohn Marino _DifferenceTp __num_samples) 99*e4b17023SJohn Marino { 100*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 101*e4b17023SJohn Marino typedef typename _TraitsType::value_type _ValueType; 102*e4b17023SJohn Marino typedef _DifferenceTp _DifferenceType; 103*e4b17023SJohn Marino 104*e4b17023SJohn Marino _ThreadIndex __iam = omp_get_thread_num(); 105*e4b17023SJohn Marino 106*e4b17023SJohn Marino _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; 107*e4b17023SJohn Marino 108*e4b17023SJohn Marino __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], 109*e4b17023SJohn Marino __num_samples + 1, __es); 110*e4b17023SJohn Marino 111*e4b17023SJohn Marino for (_DifferenceType __i = 0; __i < __num_samples; ++__i) 112*e4b17023SJohn Marino ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) 113*e4b17023SJohn Marino _ValueType(__sd->_M_source[__sd->_M_starts[__iam] 114*e4b17023SJohn Marino + __es[__i + 1]]); 115*e4b17023SJohn Marino 116*e4b17023SJohn Marino delete[] __es; 117*e4b17023SJohn Marino } 118*e4b17023SJohn Marino 119*e4b17023SJohn Marino /** @brief Split consistently. */ 120*e4b17023SJohn Marino template<bool __exact, typename _RAIter, 121*e4b17023SJohn Marino typename _Compare, typename _SortingPlacesIterator> 122*e4b17023SJohn Marino struct _SplitConsistently 123*e4b17023SJohn Marino { }; 124*e4b17023SJohn Marino 125*e4b17023SJohn Marino /** @brief Split by exact splitting. */ 126*e4b17023SJohn Marino template<typename _RAIter, typename _Compare, 127*e4b17023SJohn Marino typename _SortingPlacesIterator> 128*e4b17023SJohn Marino struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator> 129*e4b17023SJohn Marino { 130*e4b17023SJohn Marino void 131*e4b17023SJohn Marino operator()(const _ThreadIndex __iam, 132*e4b17023SJohn Marino _PMWMSSortingData<_RAIter>* __sd, 133*e4b17023SJohn Marino _Compare& __comp, 134*e4b17023SJohn Marino const typename 135*e4b17023SJohn Marino std::iterator_traits<_RAIter>::difference_type 136*e4b17023SJohn Marino __num_samples) const 137*e4b17023SJohn Marino { 138*e4b17023SJohn Marino # pragma omp barrier 139*e4b17023SJohn Marino 140*e4b17023SJohn Marino std::vector<std::pair<_SortingPlacesIterator, 141*e4b17023SJohn Marino _SortingPlacesIterator> > 142*e4b17023SJohn Marino __seqs(__sd->_M_num_threads); 143*e4b17023SJohn Marino for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) 144*e4b17023SJohn Marino __seqs[__s] = std::make_pair(__sd->_M_temporary[__s], 145*e4b17023SJohn Marino __sd->_M_temporary[__s] 146*e4b17023SJohn Marino + (__sd->_M_starts[__s + 1] 147*e4b17023SJohn Marino - __sd->_M_starts[__s])); 148*e4b17023SJohn Marino 149*e4b17023SJohn Marino std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads); 150*e4b17023SJohn Marino 151*e4b17023SJohn Marino // if not last thread 152*e4b17023SJohn Marino if (__iam < __sd->_M_num_threads - 1) 153*e4b17023SJohn Marino multiseq_partition(__seqs.begin(), __seqs.end(), 154*e4b17023SJohn Marino __sd->_M_starts[__iam + 1], __offsets.begin(), 155*e4b17023SJohn Marino __comp); 156*e4b17023SJohn Marino 157*e4b17023SJohn Marino for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) 158*e4b17023SJohn Marino { 159*e4b17023SJohn Marino // for each sequence 160*e4b17023SJohn Marino if (__iam < (__sd->_M_num_threads - 1)) 161*e4b17023SJohn Marino __sd->_M_pieces[__iam][__seq]._M_end 162*e4b17023SJohn Marino = __offsets[__seq] - __seqs[__seq].first; 163*e4b17023SJohn Marino else 164*e4b17023SJohn Marino // very end of this sequence 165*e4b17023SJohn Marino __sd->_M_pieces[__iam][__seq]._M_end = 166*e4b17023SJohn Marino __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; 167*e4b17023SJohn Marino } 168*e4b17023SJohn Marino 169*e4b17023SJohn Marino # pragma omp barrier 170*e4b17023SJohn Marino 171*e4b17023SJohn Marino for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) 172*e4b17023SJohn Marino { 173*e4b17023SJohn Marino // For each sequence. 174*e4b17023SJohn Marino if (__iam > 0) 175*e4b17023SJohn Marino __sd->_M_pieces[__iam][__seq]._M_begin = 176*e4b17023SJohn Marino __sd->_M_pieces[__iam - 1][__seq]._M_end; 177*e4b17023SJohn Marino else 178*e4b17023SJohn Marino // Absolute beginning. 179*e4b17023SJohn Marino __sd->_M_pieces[__iam][__seq]._M_begin = 0; 180*e4b17023SJohn Marino } 181*e4b17023SJohn Marino } 182*e4b17023SJohn Marino }; 183*e4b17023SJohn Marino 184*e4b17023SJohn Marino /** @brief Split by sampling. */ 185*e4b17023SJohn Marino template<typename _RAIter, typename _Compare, 186*e4b17023SJohn Marino typename _SortingPlacesIterator> 187*e4b17023SJohn Marino struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator> 188*e4b17023SJohn Marino { 189*e4b17023SJohn Marino void 190*e4b17023SJohn Marino operator()(const _ThreadIndex __iam, 191*e4b17023SJohn Marino _PMWMSSortingData<_RAIter>* __sd, 192*e4b17023SJohn Marino _Compare& __comp, 193*e4b17023SJohn Marino const typename 194*e4b17023SJohn Marino std::iterator_traits<_RAIter>::difference_type 195*e4b17023SJohn Marino __num_samples) const 196*e4b17023SJohn Marino { 197*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 198*e4b17023SJohn Marino typedef typename _TraitsType::value_type _ValueType; 199*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 200*e4b17023SJohn Marino 201*e4b17023SJohn Marino __determine_samples(__sd, __num_samples); 202*e4b17023SJohn Marino 203*e4b17023SJohn Marino # pragma omp barrier 204*e4b17023SJohn Marino 205*e4b17023SJohn Marino # pragma omp single 206*e4b17023SJohn Marino __gnu_sequential::sort(__sd->_M_samples, 207*e4b17023SJohn Marino __sd->_M_samples 208*e4b17023SJohn Marino + (__num_samples * __sd->_M_num_threads), 209*e4b17023SJohn Marino __comp); 210*e4b17023SJohn Marino 211*e4b17023SJohn Marino # pragma omp barrier 212*e4b17023SJohn Marino 213*e4b17023SJohn Marino for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) 214*e4b17023SJohn Marino { 215*e4b17023SJohn Marino // For each sequence. 216*e4b17023SJohn Marino if (__num_samples * __iam > 0) 217*e4b17023SJohn Marino __sd->_M_pieces[__iam][__s]._M_begin = 218*e4b17023SJohn Marino std::lower_bound(__sd->_M_temporary[__s], 219*e4b17023SJohn Marino __sd->_M_temporary[__s] 220*e4b17023SJohn Marino + (__sd->_M_starts[__s + 1] 221*e4b17023SJohn Marino - __sd->_M_starts[__s]), 222*e4b17023SJohn Marino __sd->_M_samples[__num_samples * __iam], 223*e4b17023SJohn Marino __comp) 224*e4b17023SJohn Marino - __sd->_M_temporary[__s]; 225*e4b17023SJohn Marino else 226*e4b17023SJohn Marino // Absolute beginning. 227*e4b17023SJohn Marino __sd->_M_pieces[__iam][__s]._M_begin = 0; 228*e4b17023SJohn Marino 229*e4b17023SJohn Marino if ((__num_samples * (__iam + 1)) < 230*e4b17023SJohn Marino (__num_samples * __sd->_M_num_threads)) 231*e4b17023SJohn Marino __sd->_M_pieces[__iam][__s]._M_end = 232*e4b17023SJohn Marino std::lower_bound(__sd->_M_temporary[__s], 233*e4b17023SJohn Marino __sd->_M_temporary[__s] 234*e4b17023SJohn Marino + (__sd->_M_starts[__s + 1] 235*e4b17023SJohn Marino - __sd->_M_starts[__s]), 236*e4b17023SJohn Marino __sd->_M_samples[__num_samples * (__iam + 1)], 237*e4b17023SJohn Marino __comp) 238*e4b17023SJohn Marino - __sd->_M_temporary[__s]; 239*e4b17023SJohn Marino else 240*e4b17023SJohn Marino // Absolute end. 241*e4b17023SJohn Marino __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1] 242*e4b17023SJohn Marino - __sd->_M_starts[__s]); 243*e4b17023SJohn Marino } 244*e4b17023SJohn Marino } 245*e4b17023SJohn Marino }; 246*e4b17023SJohn Marino 247*e4b17023SJohn Marino template<bool __stable, typename _RAIter, typename _Compare> 248*e4b17023SJohn Marino struct __possibly_stable_sort 249*e4b17023SJohn Marino { }; 250*e4b17023SJohn Marino 251*e4b17023SJohn Marino template<typename _RAIter, typename _Compare> 252*e4b17023SJohn Marino struct __possibly_stable_sort<true, _RAIter, _Compare> 253*e4b17023SJohn Marino { 254*e4b17023SJohn Marino void operator()(const _RAIter& __begin, 255*e4b17023SJohn Marino const _RAIter& __end, _Compare& __comp) const 256*e4b17023SJohn Marino { __gnu_sequential::stable_sort(__begin, __end, __comp); } 257*e4b17023SJohn Marino }; 258*e4b17023SJohn Marino 259*e4b17023SJohn Marino template<typename _RAIter, typename _Compare> 260*e4b17023SJohn Marino struct __possibly_stable_sort<false, _RAIter, _Compare> 261*e4b17023SJohn Marino { 262*e4b17023SJohn Marino void operator()(const _RAIter __begin, 263*e4b17023SJohn Marino const _RAIter __end, _Compare& __comp) const 264*e4b17023SJohn Marino { __gnu_sequential::sort(__begin, __end, __comp); } 265*e4b17023SJohn Marino }; 266*e4b17023SJohn Marino 267*e4b17023SJohn Marino template<bool __stable, typename Seq_RAIter, 268*e4b17023SJohn Marino typename _RAIter, typename _Compare, 269*e4b17023SJohn Marino typename DiffType> 270*e4b17023SJohn Marino struct __possibly_stable_multiway_merge 271*e4b17023SJohn Marino { }; 272*e4b17023SJohn Marino 273*e4b17023SJohn Marino template<typename Seq_RAIter, typename _RAIter, 274*e4b17023SJohn Marino typename _Compare, typename _DiffType> 275*e4b17023SJohn Marino struct __possibly_stable_multiway_merge<true, Seq_RAIter, 276*e4b17023SJohn Marino _RAIter, _Compare, _DiffType> 277*e4b17023SJohn Marino { 278*e4b17023SJohn Marino void operator()(const Seq_RAIter& __seqs_begin, 279*e4b17023SJohn Marino const Seq_RAIter& __seqs_end, 280*e4b17023SJohn Marino const _RAIter& __target, 281*e4b17023SJohn Marino _Compare& __comp, 282*e4b17023SJohn Marino _DiffType __length_am) const 283*e4b17023SJohn Marino { stable_multiway_merge(__seqs_begin, __seqs_end, __target, 284*e4b17023SJohn Marino __length_am, __comp, sequential_tag()); } 285*e4b17023SJohn Marino }; 286*e4b17023SJohn Marino 287*e4b17023SJohn Marino template<typename Seq_RAIter, typename _RAIter, 288*e4b17023SJohn Marino typename _Compare, typename _DiffType> 289*e4b17023SJohn Marino struct __possibly_stable_multiway_merge<false, Seq_RAIter, 290*e4b17023SJohn Marino _RAIter, _Compare, _DiffType> 291*e4b17023SJohn Marino { 292*e4b17023SJohn Marino void operator()(const Seq_RAIter& __seqs_begin, 293*e4b17023SJohn Marino const Seq_RAIter& __seqs_end, 294*e4b17023SJohn Marino const _RAIter& __target, 295*e4b17023SJohn Marino _Compare& __comp, 296*e4b17023SJohn Marino _DiffType __length_am) const 297*e4b17023SJohn Marino { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, 298*e4b17023SJohn Marino __comp, sequential_tag()); } 299*e4b17023SJohn Marino }; 300*e4b17023SJohn Marino 301*e4b17023SJohn Marino /** @brief PMWMS code executed by each thread. 302*e4b17023SJohn Marino * @param __sd Pointer to algorithm data. 303*e4b17023SJohn Marino * @param __comp Comparator. 304*e4b17023SJohn Marino */ 305*e4b17023SJohn Marino template<bool __stable, bool __exact, typename _RAIter, 306*e4b17023SJohn Marino typename _Compare> 307*e4b17023SJohn Marino void 308*e4b17023SJohn Marino parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, 309*e4b17023SJohn Marino _Compare& __comp) 310*e4b17023SJohn Marino { 311*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 312*e4b17023SJohn Marino typedef typename _TraitsType::value_type _ValueType; 313*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 314*e4b17023SJohn Marino 315*e4b17023SJohn Marino _ThreadIndex __iam = omp_get_thread_num(); 316*e4b17023SJohn Marino 317*e4b17023SJohn Marino // Length of this thread's chunk, before merging. 318*e4b17023SJohn Marino _DifferenceType __length_local = 319*e4b17023SJohn Marino __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; 320*e4b17023SJohn Marino 321*e4b17023SJohn Marino // Sort in temporary storage, leave space for sentinel. 322*e4b17023SJohn Marino 323*e4b17023SJohn Marino typedef _ValueType* _SortingPlacesIterator; 324*e4b17023SJohn Marino 325*e4b17023SJohn Marino __sd->_M_temporary[__iam] = 326*e4b17023SJohn Marino static_cast<_ValueType*>(::operator new(sizeof(_ValueType) 327*e4b17023SJohn Marino * (__length_local + 1))); 328*e4b17023SJohn Marino 329*e4b17023SJohn Marino // Copy there. 330*e4b17023SJohn Marino std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam], 331*e4b17023SJohn Marino __sd->_M_source + __sd->_M_starts[__iam] 332*e4b17023SJohn Marino + __length_local, 333*e4b17023SJohn Marino __sd->_M_temporary[__iam]); 334*e4b17023SJohn Marino 335*e4b17023SJohn Marino __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() 336*e4b17023SJohn Marino (__sd->_M_temporary[__iam], 337*e4b17023SJohn Marino __sd->_M_temporary[__iam] + __length_local, 338*e4b17023SJohn Marino __comp); 339*e4b17023SJohn Marino 340*e4b17023SJohn Marino // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], 341*e4b17023SJohn Marino // __sd->_M_temporary[__iam] + __length_local. 342*e4b17023SJohn Marino 343*e4b17023SJohn Marino // No barrier here: Synchronization is done by the splitting routine. 344*e4b17023SJohn Marino 345*e4b17023SJohn Marino _DifferenceType __num_samples = 346*e4b17023SJohn Marino _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1; 347*e4b17023SJohn Marino _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>() 348*e4b17023SJohn Marino (__iam, __sd, __comp, __num_samples); 349*e4b17023SJohn Marino 350*e4b17023SJohn Marino // Offset from __target __begin, __length after merging. 351*e4b17023SJohn Marino _DifferenceType __offset = 0, __length_am = 0; 352*e4b17023SJohn Marino for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) 353*e4b17023SJohn Marino { 354*e4b17023SJohn Marino __length_am += (__sd->_M_pieces[__iam][__s]._M_end 355*e4b17023SJohn Marino - __sd->_M_pieces[__iam][__s]._M_begin); 356*e4b17023SJohn Marino __offset += __sd->_M_pieces[__iam][__s]._M_begin; 357*e4b17023SJohn Marino } 358*e4b17023SJohn Marino 359*e4b17023SJohn Marino typedef std::vector< 360*e4b17023SJohn Marino std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > 361*e4b17023SJohn Marino _SeqVector; 362*e4b17023SJohn Marino _SeqVector __seqs(__sd->_M_num_threads); 363*e4b17023SJohn Marino 364*e4b17023SJohn Marino for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) 365*e4b17023SJohn Marino { 366*e4b17023SJohn Marino __seqs[__s] = 367*e4b17023SJohn Marino std::make_pair(__sd->_M_temporary[__s] 368*e4b17023SJohn Marino + __sd->_M_pieces[__iam][__s]._M_begin, 369*e4b17023SJohn Marino __sd->_M_temporary[__s] 370*e4b17023SJohn Marino + __sd->_M_pieces[__iam][__s]._M_end); 371*e4b17023SJohn Marino } 372*e4b17023SJohn Marino 373*e4b17023SJohn Marino __possibly_stable_multiway_merge< 374*e4b17023SJohn Marino __stable, typename _SeqVector::iterator, 375*e4b17023SJohn Marino _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(), 376*e4b17023SJohn Marino __sd->_M_source + __offset, __comp, 377*e4b17023SJohn Marino __length_am); 378*e4b17023SJohn Marino 379*e4b17023SJohn Marino # pragma omp barrier 380*e4b17023SJohn Marino 381*e4b17023SJohn Marino for (_DifferenceType __i = 0; __i < __length_local; ++__i) 382*e4b17023SJohn Marino __sd->_M_temporary[__iam][__i].~_ValueType(); 383*e4b17023SJohn Marino ::operator delete(__sd->_M_temporary[__iam]); 384*e4b17023SJohn Marino } 385*e4b17023SJohn Marino 386*e4b17023SJohn Marino /** @brief PMWMS main call. 387*e4b17023SJohn Marino * @param __begin Begin iterator of sequence. 388*e4b17023SJohn Marino * @param __end End iterator of sequence. 389*e4b17023SJohn Marino * @param __comp Comparator. 390*e4b17023SJohn Marino * @param __num_threads Number of threads to use. 391*e4b17023SJohn Marino */ 392*e4b17023SJohn Marino template<bool __stable, bool __exact, typename _RAIter, 393*e4b17023SJohn Marino typename _Compare> 394*e4b17023SJohn Marino void 395*e4b17023SJohn Marino parallel_sort_mwms(_RAIter __begin, _RAIter __end, 396*e4b17023SJohn Marino _Compare __comp, 397*e4b17023SJohn Marino _ThreadIndex __num_threads) 398*e4b17023SJohn Marino { 399*e4b17023SJohn Marino _GLIBCXX_CALL(__end - __begin) 400*e4b17023SJohn Marino 401*e4b17023SJohn Marino typedef std::iterator_traits<_RAIter> _TraitsType; 402*e4b17023SJohn Marino typedef typename _TraitsType::value_type _ValueType; 403*e4b17023SJohn Marino typedef typename _TraitsType::difference_type _DifferenceType; 404*e4b17023SJohn Marino 405*e4b17023SJohn Marino _DifferenceType __n = __end - __begin; 406*e4b17023SJohn Marino 407*e4b17023SJohn Marino if (__n <= 1) 408*e4b17023SJohn Marino return; 409*e4b17023SJohn Marino 410*e4b17023SJohn Marino // at least one element per thread 411*e4b17023SJohn Marino if (__num_threads > __n) 412*e4b17023SJohn Marino __num_threads = static_cast<_ThreadIndex>(__n); 413*e4b17023SJohn Marino 414*e4b17023SJohn Marino // shared variables 415*e4b17023SJohn Marino _PMWMSSortingData<_RAIter> __sd; 416*e4b17023SJohn Marino _DifferenceType* __starts; 417*e4b17023SJohn Marino _DifferenceType __size; 418*e4b17023SJohn Marino 419*e4b17023SJohn Marino # pragma omp parallel num_threads(__num_threads) 420*e4b17023SJohn Marino { 421*e4b17023SJohn Marino __num_threads = omp_get_num_threads(); //no more threads than requested 422*e4b17023SJohn Marino 423*e4b17023SJohn Marino # pragma omp single 424*e4b17023SJohn Marino { 425*e4b17023SJohn Marino __sd._M_num_threads = __num_threads; 426*e4b17023SJohn Marino __sd._M_source = __begin; 427*e4b17023SJohn Marino 428*e4b17023SJohn Marino __sd._M_temporary = new _ValueType*[__num_threads]; 429*e4b17023SJohn Marino 430*e4b17023SJohn Marino if (!__exact) 431*e4b17023SJohn Marino { 432*e4b17023SJohn Marino __size = 433*e4b17023SJohn Marino (_Settings::get().sort_mwms_oversampling * __num_threads - 1) 434*e4b17023SJohn Marino * __num_threads; 435*e4b17023SJohn Marino __sd._M_samples = static_cast<_ValueType*> 436*e4b17023SJohn Marino (::operator new(__size * sizeof(_ValueType))); 437*e4b17023SJohn Marino } 438*e4b17023SJohn Marino else 439*e4b17023SJohn Marino __sd._M_samples = 0; 440*e4b17023SJohn Marino 441*e4b17023SJohn Marino __sd._M_offsets = new _DifferenceType[__num_threads - 1]; 442*e4b17023SJohn Marino __sd._M_pieces 443*e4b17023SJohn Marino = new std::vector<_Piece<_DifferenceType> >[__num_threads]; 444*e4b17023SJohn Marino for (_ThreadIndex __s = 0; __s < __num_threads; ++__s) 445*e4b17023SJohn Marino __sd._M_pieces[__s].resize(__num_threads); 446*e4b17023SJohn Marino __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; 447*e4b17023SJohn Marino 448*e4b17023SJohn Marino _DifferenceType __chunk_length = __n / __num_threads; 449*e4b17023SJohn Marino _DifferenceType __split = __n % __num_threads; 450*e4b17023SJohn Marino _DifferenceType __pos = 0; 451*e4b17023SJohn Marino for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) 452*e4b17023SJohn Marino { 453*e4b17023SJohn Marino __starts[__i] = __pos; 454*e4b17023SJohn Marino __pos += ((__i < __split) 455*e4b17023SJohn Marino ? (__chunk_length + 1) : __chunk_length); 456*e4b17023SJohn Marino } 457*e4b17023SJohn Marino __starts[__num_threads] = __pos; 458*e4b17023SJohn Marino } //single 459*e4b17023SJohn Marino 460*e4b17023SJohn Marino // Now sort in parallel. 461*e4b17023SJohn Marino parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp); 462*e4b17023SJohn Marino } //parallel 463*e4b17023SJohn Marino 464*e4b17023SJohn Marino delete[] __starts; 465*e4b17023SJohn Marino delete[] __sd._M_temporary; 466*e4b17023SJohn Marino 467*e4b17023SJohn Marino if (!__exact) 468*e4b17023SJohn Marino { 469*e4b17023SJohn Marino for (_DifferenceType __i = 0; __i < __size; ++__i) 470*e4b17023SJohn Marino __sd._M_samples[__i].~_ValueType(); 471*e4b17023SJohn Marino ::operator delete(__sd._M_samples); 472*e4b17023SJohn Marino } 473*e4b17023SJohn Marino 474*e4b17023SJohn Marino delete[] __sd._M_offsets; 475*e4b17023SJohn Marino delete[] __sd._M_pieces; 476*e4b17023SJohn Marino } 477*e4b17023SJohn Marino 478*e4b17023SJohn Marino } //namespace __gnu_parallel 479*e4b17023SJohn Marino 480*e4b17023SJohn Marino #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */ 481