1*38fd1498Szrj // -*- C++ -*- 2*38fd1498Szrj 3*38fd1498Szrj // Copyright (C) 2007-2018 Free Software Foundation, Inc. 4*38fd1498Szrj // 5*38fd1498Szrj // This file is part of the GNU ISO C++ Library. This library is free 6*38fd1498Szrj // software; you can redistribute it and/or modify it under the terms 7*38fd1498Szrj // of the GNU General Public License as published by the Free Software 8*38fd1498Szrj // Foundation; either version 3, or (at your option) any later 9*38fd1498Szrj // version. 10*38fd1498Szrj 11*38fd1498Szrj // This library is distributed in the hope that it will be useful, but 12*38fd1498Szrj // WITHOUT ANY WARRANTY; without even the implied warranty of 13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*38fd1498Szrj // General Public License for more details. 15*38fd1498Szrj 16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional 17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version 18*38fd1498Szrj // 3.1, as published by the Free Software Foundation. 19*38fd1498Szrj 20*38fd1498Szrj // You should have received a copy of the GNU General Public License and 21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program; 22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*38fd1498Szrj // <http://www.gnu.org/licenses/>. 24*38fd1498Szrj 25*38fd1498Szrj /** @file parallel/multiway_mergesort.h 26*38fd1498Szrj * @brief Parallel multiway merge sort. 27*38fd1498Szrj * This file is a GNU parallel extension to the Standard C++ Library. 28*38fd1498Szrj */ 29*38fd1498Szrj 30*38fd1498Szrj // Written by Johannes Singler. 31*38fd1498Szrj 32*38fd1498Szrj #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 33*38fd1498Szrj #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1 34*38fd1498Szrj 35*38fd1498Szrj #include <vector> 36*38fd1498Szrj 37*38fd1498Szrj #include <parallel/basic_iterator.h> 38*38fd1498Szrj #include <bits/stl_algo.h> 39*38fd1498Szrj #include <parallel/parallel.h> 40*38fd1498Szrj #include <parallel/multiway_merge.h> 41*38fd1498Szrj 42*38fd1498Szrj namespace __gnu_parallel 43*38fd1498Szrj { 44*38fd1498Szrj /** @brief Subsequence description. */ 45*38fd1498Szrj template<typename _DifferenceTp> 46*38fd1498Szrj struct _Piece 47*38fd1498Szrj { 48*38fd1498Szrj typedef _DifferenceTp _DifferenceType; 49*38fd1498Szrj 50*38fd1498Szrj /** @brief Begin of subsequence. */ 51*38fd1498Szrj _DifferenceType _M_begin; 52*38fd1498Szrj 53*38fd1498Szrj /** @brief End of subsequence. */ 54*38fd1498Szrj _DifferenceType _M_end; 55*38fd1498Szrj }; 56*38fd1498Szrj 57*38fd1498Szrj /** @brief Data accessed by all threads. 58*38fd1498Szrj * 59*38fd1498Szrj * PMWMS = parallel multiway mergesort */ 60*38fd1498Szrj template<typename _RAIter> 61*38fd1498Szrj struct _PMWMSSortingData 62*38fd1498Szrj { 63*38fd1498Szrj typedef std::iterator_traits<_RAIter> _TraitsType; 64*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 65*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 66*38fd1498Szrj 67*38fd1498Szrj /** @brief Number of threads involved. */ 68*38fd1498Szrj _ThreadIndex _M_num_threads; 69*38fd1498Szrj 70*38fd1498Szrj /** @brief Input __begin. */ 71*38fd1498Szrj _RAIter _M_source; 72*38fd1498Szrj 73*38fd1498Szrj /** @brief Start indices, per thread. */ 74*38fd1498Szrj _DifferenceType* _M_starts; 75*38fd1498Szrj 76*38fd1498Szrj /** @brief Storage in which to sort. */ 77*38fd1498Szrj _ValueType** _M_temporary; 78*38fd1498Szrj 79*38fd1498Szrj /** @brief Samples. */ 80*38fd1498Szrj _ValueType* _M_samples; 81*38fd1498Szrj 82*38fd1498Szrj /** @brief Offsets to add to the found positions. */ 83*38fd1498Szrj _DifferenceType* _M_offsets; 84*38fd1498Szrj 85*38fd1498Szrj /** @brief Pieces of data to merge @c [thread][__sequence] */ 86*38fd1498Szrj std::vector<_Piece<_DifferenceType> >* _M_pieces; 87*38fd1498Szrj }; 88*38fd1498Szrj 89*38fd1498Szrj /** 90*38fd1498Szrj * @brief Select _M_samples from a sequence. 91*38fd1498Szrj * @param __sd Pointer to algorithm data. _Result will be placed in 92*38fd1498Szrj * @c __sd->_M_samples. 93*38fd1498Szrj * @param __num_samples Number of _M_samples to select. 94*38fd1498Szrj */ 95*38fd1498Szrj template<typename _RAIter, typename _DifferenceTp> 96*38fd1498Szrj void __determine_samples(_PMWMSSortingData<_RAIter> * __sd,_DifferenceTp __num_samples)97*38fd1498Szrj __determine_samples(_PMWMSSortingData<_RAIter>* __sd, 98*38fd1498Szrj _DifferenceTp __num_samples) 99*38fd1498Szrj { 100*38fd1498Szrj typedef std::iterator_traits<_RAIter> _TraitsType; 101*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 102*38fd1498Szrj typedef _DifferenceTp _DifferenceType; 103*38fd1498Szrj 104*38fd1498Szrj _ThreadIndex __iam = omp_get_thread_num(); 105*38fd1498Szrj 106*38fd1498Szrj _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; 107*38fd1498Szrj 108*38fd1498Szrj __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], 109*38fd1498Szrj __num_samples + 1, __es); 110*38fd1498Szrj 111*38fd1498Szrj for (_DifferenceType __i = 0; __i < __num_samples; ++__i) 112*38fd1498Szrj ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) 113*38fd1498Szrj _ValueType(__sd->_M_source[__sd->_M_starts[__iam] 114*38fd1498Szrj + __es[__i + 1]]); 115*38fd1498Szrj 116*38fd1498Szrj delete[] __es; 117*38fd1498Szrj } 118*38fd1498Szrj 119*38fd1498Szrj /** @brief Split consistently. */ 120*38fd1498Szrj template<bool __exact, typename _RAIter, 121*38fd1498Szrj typename _Compare, typename _SortingPlacesIterator> 122*38fd1498Szrj struct _SplitConsistently 123*38fd1498Szrj { }; 124*38fd1498Szrj 125*38fd1498Szrj /** @brief Split by exact splitting. */ 126*38fd1498Szrj template<typename _RAIter, typename _Compare, 127*38fd1498Szrj typename _SortingPlacesIterator> 128*38fd1498Szrj struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator> 129*38fd1498Szrj { 130*38fd1498Szrj void 131*38fd1498Szrj operator()(const _ThreadIndex __iam, 132*38fd1498Szrj _PMWMSSortingData<_RAIter>* __sd, 133*38fd1498Szrj _Compare& __comp, 134*38fd1498Szrj const typename 135*38fd1498Szrj std::iterator_traits<_RAIter>::difference_type 136*38fd1498Szrj __num_samples) const 137*38fd1498Szrj { 138*38fd1498Szrj # pragma omp barrier 139*38fd1498Szrj 140*38fd1498Szrj std::vector<std::pair<_SortingPlacesIterator, 141*38fd1498Szrj _SortingPlacesIterator> > 142*38fd1498Szrj __seqs(__sd->_M_num_threads); 143*38fd1498Szrj for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) 144*38fd1498Szrj __seqs[__s] = std::make_pair(__sd->_M_temporary[__s], 145*38fd1498Szrj __sd->_M_temporary[__s] 146*38fd1498Szrj + (__sd->_M_starts[__s + 1] 147*38fd1498Szrj - __sd->_M_starts[__s])); 148*38fd1498Szrj 149*38fd1498Szrj std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads); 150*38fd1498Szrj 151*38fd1498Szrj // if not last thread 152*38fd1498Szrj if (__iam < __sd->_M_num_threads - 1) 153*38fd1498Szrj multiseq_partition(__seqs.begin(), __seqs.end(), 154*38fd1498Szrj __sd->_M_starts[__iam + 1], __offsets.begin(), 155*38fd1498Szrj __comp); 156*38fd1498Szrj 157*38fd1498Szrj for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) 158*38fd1498Szrj { 159*38fd1498Szrj // for each sequence 160*38fd1498Szrj if (__iam < (__sd->_M_num_threads - 1)) 161*38fd1498Szrj __sd->_M_pieces[__iam][__seq]._M_end 162*38fd1498Szrj = __offsets[__seq] - __seqs[__seq].first; 163*38fd1498Szrj else 164*38fd1498Szrj // very end of this sequence 165*38fd1498Szrj __sd->_M_pieces[__iam][__seq]._M_end = 166*38fd1498Szrj __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; 167*38fd1498Szrj } 168*38fd1498Szrj 169*38fd1498Szrj # pragma omp barrier 170*38fd1498Szrj 171*38fd1498Szrj for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) 172*38fd1498Szrj { 173*38fd1498Szrj // For each sequence. 174*38fd1498Szrj if (__iam > 0) 175*38fd1498Szrj __sd->_M_pieces[__iam][__seq]._M_begin = 176*38fd1498Szrj __sd->_M_pieces[__iam - 1][__seq]._M_end; 177*38fd1498Szrj else 178*38fd1498Szrj // Absolute beginning. 179*38fd1498Szrj __sd->_M_pieces[__iam][__seq]._M_begin = 0; 180*38fd1498Szrj } 181*38fd1498Szrj } 182*38fd1498Szrj }; 183*38fd1498Szrj 184*38fd1498Szrj /** @brief Split by sampling. */ 185*38fd1498Szrj template<typename _RAIter, typename _Compare, 186*38fd1498Szrj typename _SortingPlacesIterator> 187*38fd1498Szrj struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator> 188*38fd1498Szrj { 189*38fd1498Szrj void 190*38fd1498Szrj operator()(const _ThreadIndex __iam, 191*38fd1498Szrj _PMWMSSortingData<_RAIter>* __sd, 192*38fd1498Szrj _Compare& __comp, 193*38fd1498Szrj const typename 194*38fd1498Szrj std::iterator_traits<_RAIter>::difference_type 195*38fd1498Szrj __num_samples) const 196*38fd1498Szrj { 197*38fd1498Szrj typedef std::iterator_traits<_RAIter> _TraitsType; 198*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 199*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 200*38fd1498Szrj 201*38fd1498Szrj __determine_samples(__sd, __num_samples); 202*38fd1498Szrj 203*38fd1498Szrj # pragma omp barrier 204*38fd1498Szrj 205*38fd1498Szrj # pragma omp single 206*38fd1498Szrj __gnu_sequential::sort(__sd->_M_samples, 207*38fd1498Szrj __sd->_M_samples 208*38fd1498Szrj + (__num_samples * __sd->_M_num_threads), 209*38fd1498Szrj __comp); 210*38fd1498Szrj 211*38fd1498Szrj # pragma omp barrier 212*38fd1498Szrj 213*38fd1498Szrj for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) 214*38fd1498Szrj { 215*38fd1498Szrj // For each sequence. 216*38fd1498Szrj if (__num_samples * __iam > 0) 217*38fd1498Szrj __sd->_M_pieces[__iam][__s]._M_begin = 218*38fd1498Szrj std::lower_bound(__sd->_M_temporary[__s], 219*38fd1498Szrj __sd->_M_temporary[__s] 220*38fd1498Szrj + (__sd->_M_starts[__s + 1] 221*38fd1498Szrj - __sd->_M_starts[__s]), 222*38fd1498Szrj __sd->_M_samples[__num_samples * __iam], 223*38fd1498Szrj __comp) 224*38fd1498Szrj - __sd->_M_temporary[__s]; 225*38fd1498Szrj else 226*38fd1498Szrj // Absolute beginning. 227*38fd1498Szrj __sd->_M_pieces[__iam][__s]._M_begin = 0; 228*38fd1498Szrj 229*38fd1498Szrj if ((__num_samples * (__iam + 1)) < 230*38fd1498Szrj (__num_samples * __sd->_M_num_threads)) 231*38fd1498Szrj __sd->_M_pieces[__iam][__s]._M_end = 232*38fd1498Szrj std::lower_bound(__sd->_M_temporary[__s], 233*38fd1498Szrj __sd->_M_temporary[__s] 234*38fd1498Szrj + (__sd->_M_starts[__s + 1] 235*38fd1498Szrj - __sd->_M_starts[__s]), 236*38fd1498Szrj __sd->_M_samples[__num_samples * (__iam + 1)], 237*38fd1498Szrj __comp) 238*38fd1498Szrj - __sd->_M_temporary[__s]; 239*38fd1498Szrj else 240*38fd1498Szrj // Absolute end. 241*38fd1498Szrj __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1] 242*38fd1498Szrj - __sd->_M_starts[__s]); 243*38fd1498Szrj } 244*38fd1498Szrj } 245*38fd1498Szrj }; 246*38fd1498Szrj 247*38fd1498Szrj template<bool __stable, typename _RAIter, typename _Compare> 248*38fd1498Szrj struct __possibly_stable_sort 249*38fd1498Szrj { }; 250*38fd1498Szrj 251*38fd1498Szrj template<typename _RAIter, typename _Compare> 252*38fd1498Szrj struct __possibly_stable_sort<true, _RAIter, _Compare> 253*38fd1498Szrj { 254*38fd1498Szrj void operator()(const _RAIter& __begin, 255*38fd1498Szrj const _RAIter& __end, _Compare& __comp) const 256*38fd1498Szrj { __gnu_sequential::stable_sort(__begin, __end, __comp); } 257*38fd1498Szrj }; 258*38fd1498Szrj 259*38fd1498Szrj template<typename _RAIter, typename _Compare> 260*38fd1498Szrj struct __possibly_stable_sort<false, _RAIter, _Compare> 261*38fd1498Szrj { 262*38fd1498Szrj void operator()(const _RAIter __begin, 263*38fd1498Szrj const _RAIter __end, _Compare& __comp) const 264*38fd1498Szrj { __gnu_sequential::sort(__begin, __end, __comp); } 265*38fd1498Szrj }; 266*38fd1498Szrj 267*38fd1498Szrj template<bool __stable, typename Seq_RAIter, 268*38fd1498Szrj typename _RAIter, typename _Compare, 269*38fd1498Szrj typename DiffType> 270*38fd1498Szrj struct __possibly_stable_multiway_merge 271*38fd1498Szrj { }; 272*38fd1498Szrj 273*38fd1498Szrj template<typename Seq_RAIter, typename _RAIter, 274*38fd1498Szrj typename _Compare, typename _DiffType> 275*38fd1498Szrj struct __possibly_stable_multiway_merge<true, Seq_RAIter, 276*38fd1498Szrj _RAIter, _Compare, _DiffType> 277*38fd1498Szrj { 278*38fd1498Szrj void operator()(const Seq_RAIter& __seqs_begin, 279*38fd1498Szrj const Seq_RAIter& __seqs_end, 280*38fd1498Szrj const _RAIter& __target, 281*38fd1498Szrj _Compare& __comp, 282*38fd1498Szrj _DiffType __length_am) const 283*38fd1498Szrj { stable_multiway_merge(__seqs_begin, __seqs_end, __target, 284*38fd1498Szrj __length_am, __comp, sequential_tag()); } 285*38fd1498Szrj }; 286*38fd1498Szrj 287*38fd1498Szrj template<typename Seq_RAIter, typename _RAIter, 288*38fd1498Szrj typename _Compare, typename _DiffType> 289*38fd1498Szrj struct __possibly_stable_multiway_merge<false, Seq_RAIter, 290*38fd1498Szrj _RAIter, _Compare, _DiffType> 291*38fd1498Szrj { 292*38fd1498Szrj void operator()(const Seq_RAIter& __seqs_begin, 293*38fd1498Szrj const Seq_RAIter& __seqs_end, 294*38fd1498Szrj const _RAIter& __target, 295*38fd1498Szrj _Compare& __comp, 296*38fd1498Szrj _DiffType __length_am) const 297*38fd1498Szrj { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, 298*38fd1498Szrj __comp, sequential_tag()); } 299*38fd1498Szrj }; 300*38fd1498Szrj 301*38fd1498Szrj /** @brief PMWMS code executed by each thread. 302*38fd1498Szrj * @param __sd Pointer to algorithm data. 303*38fd1498Szrj * @param __comp Comparator. 304*38fd1498Szrj */ 305*38fd1498Szrj template<bool __stable, bool __exact, typename _RAIter, 306*38fd1498Szrj typename _Compare> 307*38fd1498Szrj void 308*38fd1498Szrj parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, 309*38fd1498Szrj _Compare& __comp) 310*38fd1498Szrj { 311*38fd1498Szrj typedef std::iterator_traits<_RAIter> _TraitsType; 312*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 313*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 314*38fd1498Szrj 315*38fd1498Szrj _ThreadIndex __iam = omp_get_thread_num(); 316*38fd1498Szrj 317*38fd1498Szrj // Length of this thread's chunk, before merging. 318*38fd1498Szrj _DifferenceType __length_local = 319*38fd1498Szrj __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; 320*38fd1498Szrj 321*38fd1498Szrj // Sort in temporary storage, leave space for sentinel. 322*38fd1498Szrj 323*38fd1498Szrj typedef _ValueType* _SortingPlacesIterator; 324*38fd1498Szrj 325*38fd1498Szrj __sd->_M_temporary[__iam] = 326*38fd1498Szrj static_cast<_ValueType*>(::operator new(sizeof(_ValueType) 327*38fd1498Szrj * (__length_local + 1))); 328*38fd1498Szrj 329*38fd1498Szrj // Copy there. 330*38fd1498Szrj std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam], 331*38fd1498Szrj __sd->_M_source + __sd->_M_starts[__iam] 332*38fd1498Szrj + __length_local, 333*38fd1498Szrj __sd->_M_temporary[__iam]); 334*38fd1498Szrj 335*38fd1498Szrj __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() 336*38fd1498Szrj (__sd->_M_temporary[__iam], 337*38fd1498Szrj __sd->_M_temporary[__iam] + __length_local, 338*38fd1498Szrj __comp); 339*38fd1498Szrj 340*38fd1498Szrj // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], 341*38fd1498Szrj // __sd->_M_temporary[__iam] + __length_local. 342*38fd1498Szrj 343*38fd1498Szrj // No barrier here: Synchronization is done by the splitting routine. 344*38fd1498Szrj 345*38fd1498Szrj _DifferenceType __num_samples = 346*38fd1498Szrj _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1; 347*38fd1498Szrj _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>() 348*38fd1498Szrj (__iam, __sd, __comp, __num_samples); 349*38fd1498Szrj 350*38fd1498Szrj // Offset from __target __begin, __length after merging. 351*38fd1498Szrj _DifferenceType __offset = 0, __length_am = 0; 352*38fd1498Szrj for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) 353*38fd1498Szrj { 354*38fd1498Szrj __length_am += (__sd->_M_pieces[__iam][__s]._M_end 355*38fd1498Szrj - __sd->_M_pieces[__iam][__s]._M_begin); 356*38fd1498Szrj __offset += __sd->_M_pieces[__iam][__s]._M_begin; 357*38fd1498Szrj } 358*38fd1498Szrj 359*38fd1498Szrj typedef std::vector< 360*38fd1498Szrj std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > 361*38fd1498Szrj _SeqVector; 362*38fd1498Szrj _SeqVector __seqs(__sd->_M_num_threads); 363*38fd1498Szrj 364*38fd1498Szrj for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) 365*38fd1498Szrj { 366*38fd1498Szrj __seqs[__s] = 367*38fd1498Szrj std::make_pair(__sd->_M_temporary[__s] 368*38fd1498Szrj + __sd->_M_pieces[__iam][__s]._M_begin, 369*38fd1498Szrj __sd->_M_temporary[__s] 370*38fd1498Szrj + __sd->_M_pieces[__iam][__s]._M_end); 371*38fd1498Szrj } 372*38fd1498Szrj 373*38fd1498Szrj __possibly_stable_multiway_merge< 374*38fd1498Szrj __stable, typename _SeqVector::iterator, 375*38fd1498Szrj _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(), 376*38fd1498Szrj __sd->_M_source + __offset, __comp, 377*38fd1498Szrj __length_am); 378*38fd1498Szrj 379*38fd1498Szrj # pragma omp barrier 380*38fd1498Szrj 381*38fd1498Szrj for (_DifferenceType __i = 0; __i < __length_local; ++__i) 382*38fd1498Szrj __sd->_M_temporary[__iam][__i].~_ValueType(); 383*38fd1498Szrj ::operator delete(__sd->_M_temporary[__iam]); 384*38fd1498Szrj } 385*38fd1498Szrj 386*38fd1498Szrj /** @brief PMWMS main call. 387*38fd1498Szrj * @param __begin Begin iterator of sequence. 388*38fd1498Szrj * @param __end End iterator of sequence. 389*38fd1498Szrj * @param __comp Comparator. 390*38fd1498Szrj * @param __num_threads Number of threads to use. 391*38fd1498Szrj */ 392*38fd1498Szrj template<bool __stable, bool __exact, typename _RAIter, 393*38fd1498Szrj typename _Compare> 394*38fd1498Szrj void 395*38fd1498Szrj parallel_sort_mwms(_RAIter __begin, _RAIter __end, 396*38fd1498Szrj _Compare __comp, 397*38fd1498Szrj _ThreadIndex __num_threads) 398*38fd1498Szrj { 399*38fd1498Szrj _GLIBCXX_CALL(__end - __begin) 400*38fd1498Szrj 401*38fd1498Szrj typedef std::iterator_traits<_RAIter> _TraitsType; 402*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 403*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 404*38fd1498Szrj 405*38fd1498Szrj _DifferenceType __n = __end - __begin; 406*38fd1498Szrj 407*38fd1498Szrj if (__n <= 1) 408*38fd1498Szrj return; 409*38fd1498Szrj 410*38fd1498Szrj // at least one element per thread 411*38fd1498Szrj if (__num_threads > __n) 412*38fd1498Szrj __num_threads = static_cast<_ThreadIndex>(__n); 413*38fd1498Szrj 414*38fd1498Szrj // shared variables 415*38fd1498Szrj _PMWMSSortingData<_RAIter> __sd; 416*38fd1498Szrj _DifferenceType* __starts; 417*38fd1498Szrj _DifferenceType __size; 418*38fd1498Szrj 419*38fd1498Szrj # pragma omp parallel num_threads(__num_threads) 420*38fd1498Szrj { 421*38fd1498Szrj __num_threads = omp_get_num_threads(); //no more threads than requested 422*38fd1498Szrj 423*38fd1498Szrj # pragma omp single 424*38fd1498Szrj { 425*38fd1498Szrj __sd._M_num_threads = __num_threads; 426*38fd1498Szrj __sd._M_source = __begin; 427*38fd1498Szrj 428*38fd1498Szrj __sd._M_temporary = new _ValueType*[__num_threads]; 429*38fd1498Szrj 430*38fd1498Szrj if (!__exact) 431*38fd1498Szrj { 432*38fd1498Szrj __size = 433*38fd1498Szrj (_Settings::get().sort_mwms_oversampling * __num_threads - 1) 434*38fd1498Szrj * __num_threads; 435*38fd1498Szrj __sd._M_samples = static_cast<_ValueType*> 436*38fd1498Szrj (::operator new(__size * sizeof(_ValueType))); 437*38fd1498Szrj } 438*38fd1498Szrj else 439*38fd1498Szrj __sd._M_samples = 0; 440*38fd1498Szrj 441*38fd1498Szrj __sd._M_offsets = new _DifferenceType[__num_threads - 1]; 442*38fd1498Szrj __sd._M_pieces 443*38fd1498Szrj = new std::vector<_Piece<_DifferenceType> >[__num_threads]; 444*38fd1498Szrj for (_ThreadIndex __s = 0; __s < __num_threads; ++__s) 445*38fd1498Szrj __sd._M_pieces[__s].resize(__num_threads); 446*38fd1498Szrj __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; 447*38fd1498Szrj 448*38fd1498Szrj _DifferenceType __chunk_length = __n / __num_threads; 449*38fd1498Szrj _DifferenceType __split = __n % __num_threads; 450*38fd1498Szrj _DifferenceType __pos = 0; 451*38fd1498Szrj for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) 452*38fd1498Szrj { 453*38fd1498Szrj __starts[__i] = __pos; 454*38fd1498Szrj __pos += ((__i < __split) 455*38fd1498Szrj ? (__chunk_length + 1) : __chunk_length); 456*38fd1498Szrj } 457*38fd1498Szrj __starts[__num_threads] = __pos; 458*38fd1498Szrj } //single 459*38fd1498Szrj 460*38fd1498Szrj // Now sort in parallel. 461*38fd1498Szrj parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp); 462*38fd1498Szrj } //parallel 463*38fd1498Szrj 464*38fd1498Szrj delete[] __starts; 465*38fd1498Szrj delete[] __sd._M_temporary; 466*38fd1498Szrj 467*38fd1498Szrj if (!__exact) 468*38fd1498Szrj { 469*38fd1498Szrj for (_DifferenceType __i = 0; __i < __size; ++__i) 470*38fd1498Szrj __sd._M_samples[__i].~_ValueType(); 471*38fd1498Szrj ::operator delete(__sd._M_samples); 472*38fd1498Szrj } 473*38fd1498Szrj 474*38fd1498Szrj delete[] __sd._M_offsets; 475*38fd1498Szrj delete[] __sd._M_pieces; 476*38fd1498Szrj } 477*38fd1498Szrj 478*38fd1498Szrj } //namespace __gnu_parallel 479*38fd1498Szrj 480*38fd1498Szrj #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */ 481