xref: /llvm-project/libcxx/include/__algorithm/sort.h (revision 4f15267d3dd797a15901fe9352f0d5fa121b9095)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___ALGORITHM_SORT_H
10 #define _LIBCPP___ALGORITHM_SORT_H
11 
12 #include <__algorithm/comp.h>
13 #include <__algorithm/comp_ref_type.h>
14 #include <__algorithm/iter_swap.h>
15 #include <__algorithm/iterator_operations.h>
16 #include <__algorithm/min_element.h>
17 #include <__algorithm/partial_sort.h>
18 #include <__algorithm/unwrap_iter.h>
19 #include <__assert>
20 #include <__bit/blsr.h>
21 #include <__bit/countl.h>
22 #include <__bit/countr.h>
23 #include <__config>
24 #include <__debug>
25 #include <__debug_utils/randomize_range.h>
26 #include <__functional/operations.h>
27 #include <__functional/ranges_operations.h>
28 #include <__iterator/iterator_traits.h>
29 #include <__memory/destruct_n.h>
30 #include <__memory/unique_ptr.h>
31 #include <__type_traits/conditional.h>
32 #include <__type_traits/disjunction.h>
33 #include <__type_traits/is_arithmetic.h>
34 #include <__utility/move.h>
35 #include <__utility/pair.h>
36 #include <climits>
37 #include <cstdint>
38 
39 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
40 #  pragma GCC system_header
41 #endif
42 
43 _LIBCPP_BEGIN_NAMESPACE_STD
44 
45 // stable, 2-3 compares, 0-2 swaps
46 
47 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
48 _LIBCPP_HIDE_FROM_ABI
49 _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z,
50                                                _Compare __c) {
51   using _Ops = _IterOps<_AlgPolicy>;
52 
53   unsigned __r = 0;
54   if (!__c(*__y, *__x))   // if x <= y
55   {
56     if (!__c(*__z, *__y)) // if y <= z
57       return __r;         // x <= y && y <= z
58                           // x <= y && y > z
59     _Ops::iter_swap(__y, __z);     // x <= z && y < z
60     __r = 1;
61     if (__c(*__y, *__x))  // if x > y
62     {
63       _Ops::iter_swap(__x, __y);   // x < y && y <= z
64       __r = 2;
65     }
66     return __r;           // x <= y && y < z
67   }
68   if (__c(*__z, *__y))    // x > y, if y > z
69   {
70     _Ops::iter_swap(__x, __z);     // x < y && y < z
71     __r = 1;
72     return __r;
73   }
74   _Ops::iter_swap(__x, __y);       // x > y && y <= z
75   __r = 1;                // x < y && x <= z
76   if (__c(*__z, *__y))    // if y > z
77   {
78     _Ops::iter_swap(__y, __z);     // x <= y && y < z
79     __r = 2;
80   }
81   return __r;
82 }                         // x <= y && y <= z
83 
84 // stable, 3-6 compares, 0-5 swaps
85 
86 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
87 _LIBCPP_HIDE_FROM_ABI
88 unsigned __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4,
89                  _Compare __c) {
90   using _Ops   = _IterOps<_AlgPolicy>;
91   unsigned __r = std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
92   if (__c(*__x4, *__x3)) {
93     _Ops::iter_swap(__x3, __x4);
94     ++__r;
95     if (__c(*__x3, *__x2)) {
96       _Ops::iter_swap(__x2, __x3);
97       ++__r;
98       if (__c(*__x2, *__x1)) {
99         _Ops::iter_swap(__x1, __x2);
100         ++__r;
101       }
102     }
103   }
104   return __r;
105 }
106 
107 // stable, 4-10 compares, 0-9 swaps
108 
109 template <class _AlgPolicy, class _Comp, class _ForwardIterator>
110 _LIBCPP_HIDE_FROM_ABI unsigned __sort5(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3,
111                                        _ForwardIterator __x4, _ForwardIterator __x5, _Comp __comp) {
112   using _Ops = _IterOps<_AlgPolicy>;
113 
114   unsigned __r = std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp);
115   if (__comp(*__x5, *__x4)) {
116     _Ops::iter_swap(__x4, __x5);
117     ++__r;
118     if (__comp(*__x4, *__x3)) {
119       _Ops::iter_swap(__x3, __x4);
120       ++__r;
121       if (__comp(*__x3, *__x2)) {
122         _Ops::iter_swap(__x2, __x3);
123         ++__r;
124         if (__comp(*__x2, *__x1)) {
125           _Ops::iter_swap(__x1, __x2);
126           ++__r;
127         }
128       }
129     }
130   }
131   return __r;
132 }
133 
134 // The comparator being simple is a prerequisite for using the branchless optimization.
135 template <class _Tp>
136 struct __is_simple_comparator : false_type {};
137 template <class _Tp>
138 struct __is_simple_comparator<__less<_Tp>&> : true_type {};
139 template <class _Tp>
140 struct __is_simple_comparator<less<_Tp>&> : true_type {};
141 template <class _Tp>
142 struct __is_simple_comparator<greater<_Tp>&> : true_type {};
143 #if _LIBCPP_STD_VER >= 20
144 template <>
145 struct __is_simple_comparator<ranges::less&> : true_type {};
146 template <>
147 struct __is_simple_comparator<ranges::greater&> : true_type {};
148 #endif
149 
150 template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type>
151 using __use_branchless_sort =
152     integral_constant<bool, __is_cpp17_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) &&
153                                 is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>;
154 
155 namespace __detail {
156 
157 // Size in bits for the bitset in use.
158 enum { __block_size = sizeof(uint64_t) * 8 };
159 
160 } // namespace __detail
161 
162 // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
163 template <class _Compare, class _RandomAccessIterator>
164 inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
165   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
166   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
167   bool __r = __c(*__x, *__y);
168   value_type __tmp = __r ? *__x : *__y;
169   *__y = __r ? *__y : *__x;
170   *__x = __tmp;
171 }
172 
173 // Ensures that *__x, *__y and *__z are ordered according to the comparator __c,
174 // under the assumption that *__y and *__z are already ordered.
175 template <class _Compare, class _RandomAccessIterator>
176 inline _LIBCPP_HIDE_FROM_ABI void __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y,
177                                                           _RandomAccessIterator __z, _Compare __c) {
178   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
179   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
180   bool __r = __c(*__z, *__x);
181   value_type __tmp = __r ? *__z : *__x;
182   *__z = __r ? *__x : *__z;
183   __r = __c(__tmp, *__y);
184   *__x = __r ? *__x : *__y;
185   *__y = __r ? *__y : __tmp;
186 }
187 
188 template <class, class _Compare, class _RandomAccessIterator>
189 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
190 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
191                          _Compare __c) {
192   std::__cond_swap<_Compare>(__x2, __x3, __c);
193   std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
194 }
195 
196 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
197 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
198 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
199                          _Compare __c) {
200   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
201 }
202 
203 template <class, class _Compare, class _RandomAccessIterator>
204 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
205 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
206                          _RandomAccessIterator __x4, _Compare __c) {
207   std::__cond_swap<_Compare>(__x1, __x3, __c);
208   std::__cond_swap<_Compare>(__x2, __x4, __c);
209   std::__cond_swap<_Compare>(__x1, __x2, __c);
210   std::__cond_swap<_Compare>(__x3, __x4, __c);
211   std::__cond_swap<_Compare>(__x2, __x3, __c);
212 }
213 
214 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
215 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
216 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
217                          _RandomAccessIterator __x4, _Compare __c) {
218   std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c);
219 }
220 
221 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
222 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
223 __sort5_maybe_branchless(
224     _RandomAccessIterator __x1,
225     _RandomAccessIterator __x2,
226     _RandomAccessIterator __x3,
227     _RandomAccessIterator __x4,
228     _RandomAccessIterator __x5,
229     _Compare __c) {
230   std::__cond_swap<_Compare>(__x1, __x2, __c);
231   std::__cond_swap<_Compare>(__x4, __x5, __c);
232   std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
233   std::__cond_swap<_Compare>(__x2, __x5, __c);
234   std::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
235   std::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
236 }
237 
238 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
239 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
240 __sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
241                          _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) {
242   std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>(
243       std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c);
244 }
245 
246 // Assumes size > 0
247 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
248 _LIBCPP_HIDE_FROM_ABI
249 _LIBCPP_CONSTEXPR_SINCE_CXX14 void __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last,
250                                                     _Compare __comp) {
251   _BidirectionalIterator __lm1 = __last;
252   for (--__lm1; __first != __lm1; ++__first) {
253     _BidirectionalIterator __i = std::__min_element<_Compare>(__first, __last, __comp);
254     if (__i != __first)
255       _IterOps<_AlgPolicy>::iter_swap(__first, __i);
256   }
257 }
258 
259 // Sort the iterator range [__first, __last) using the comparator __comp using
260 // the insertion sort algorithm.
261 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
262 _LIBCPP_HIDE_FROM_ABI
263 void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) {
264   using _Ops = _IterOps<_AlgPolicy>;
265 
266   typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
267   if (__first == __last)
268     return;
269   _BidirectionalIterator __i = __first;
270   for (++__i; __i != __last; ++__i) {
271     _BidirectionalIterator __j = __i;
272     --__j;
273     if (__comp(*__i, *__j)) {
274       value_type __t(_Ops::__iter_move(__i));
275       _BidirectionalIterator __k = __j;
276       __j                        = __i;
277       do {
278         *__j = _Ops::__iter_move(__k);
279         __j  = __k;
280       } while (__j != __first && __comp(__t, *--__k));
281       *__j = std::move(__t);
282     }
283   }
284 }
285 
286 // Sort the iterator range [__first, __last) using the comparator __comp using
287 // the insertion sort algorithm.  Insertion sort has two loops, outer and inner.
288 // The implementation below has not bounds check (unguarded) for the inner loop.
289 // Assumes that there is an element in the position (__first - 1) and that each
290 // element in the input range is greater or equal to the element at __first - 1.
291 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
292 _LIBCPP_HIDE_FROM_ABI void
293 __insertion_sort_unguarded(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
294   using _Ops = _IterOps<_AlgPolicy>;
295   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
296   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
297   if (__first == __last)
298     return;
299   for (_RandomAccessIterator __i = __first + difference_type(1); __i != __last; ++__i) {
300     _RandomAccessIterator __j = __i - difference_type(1);
301     if (__comp(*__i, *__j)) {
302       value_type __t(_Ops::__iter_move(__i));
303       _RandomAccessIterator __k = __j;
304       __j = __i;
305       do {
306         *__j = _Ops::__iter_move(__k);
307         __j = __k;
308       } while (__comp(__t, *--__k)); // No need for bounds check due to the assumption stated above.
309       *__j = std::move(__t);
310     }
311   }
312 }
313 
314 template <class _AlgPolicy, class _Comp, class _RandomAccessIterator>
315 _LIBCPP_HIDE_FROM_ABI bool __insertion_sort_incomplete(
316     _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
317   using _Ops = _IterOps<_AlgPolicy>;
318 
319   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
320   switch (__last - __first) {
321   case 0:
322   case 1:
323     return true;
324   case 2:
325     if (__comp(*--__last, *__first))
326       _Ops::iter_swap(__first, __last);
327     return true;
328   case 3:
329     std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
330     return true;
331   case 4:
332     std::__sort4_maybe_branchless<_AlgPolicy, _Comp>(
333         __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
334     return true;
335   case 5:
336     std::__sort5_maybe_branchless<_AlgPolicy, _Comp>(
337         __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
338         --__last, __comp);
339     return true;
340   }
341   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
342   _RandomAccessIterator __j = __first + difference_type(2);
343   std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
344   const unsigned __limit = 8;
345   unsigned __count = 0;
346   for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) {
347     if (__comp(*__i, *__j)) {
348       value_type __t(_Ops::__iter_move(__i));
349       _RandomAccessIterator __k = __j;
350       __j = __i;
351       do {
352         *__j = _Ops::__iter_move(__k);
353         __j = __k;
354       } while (__j != __first && __comp(__t, *--__k));
355       *__j = std::move(__t);
356       if (++__count == __limit)
357         return ++__i == __last;
358     }
359     __j = __i;
360   }
361   return true;
362 }
363 
364 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
365 _LIBCPP_HIDE_FROM_ABI
366 void __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterator __last1,
367                            typename iterator_traits<_BidirectionalIterator>::value_type* __first2, _Compare __comp) {
368   using _Ops = _IterOps<_AlgPolicy>;
369 
370   typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
371   if (__first1 != __last1) {
372     __destruct_n __d(0);
373     unique_ptr<value_type, __destruct_n&> __h(__first2, __d);
374     value_type* __last2 = __first2;
375     ::new ((void*)__last2) value_type(_Ops::__iter_move(__first1));
376     __d.template __incr<value_type>();
377     for (++__last2; ++__first1 != __last1; ++__last2) {
378       value_type* __j2 = __last2;
379       value_type* __i2 = __j2;
380       if (__comp(*__first1, *--__i2)) {
381         ::new ((void*)__j2) value_type(std::move(*__i2));
382         __d.template __incr<value_type>();
383         for (--__j2; __i2 != __first2 && __comp(*__first1, *--__i2); --__j2)
384           *__j2 = std::move(*__i2);
385         *__j2 = _Ops::__iter_move(__first1);
386       } else {
387         ::new ((void*)__j2) value_type(_Ops::__iter_move(__first1));
388         __d.template __incr<value_type>();
389       }
390     }
391     __h.release();
392   }
393 }
394 
395 template <class _AlgPolicy, class _RandomAccessIterator>
396 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
397     _RandomAccessIterator __first, _RandomAccessIterator __last, uint64_t& __left_bitset, uint64_t& __right_bitset) {
398   using _Ops = _IterOps<_AlgPolicy>;
399   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
400   // Swap one pair on each iteration as long as both bitsets have at least one
401   // element for swapping.
402   while (__left_bitset != 0 && __right_bitset != 0) {
403     difference_type __tz_left  = __libcpp_ctz(__left_bitset);
404     __left_bitset              = __libcpp_blsr(__left_bitset);
405     difference_type __tz_right = __libcpp_ctz(__right_bitset);
406     __right_bitset             = __libcpp_blsr(__right_bitset);
407     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
408   }
409 }
410 
411 template <class _Compare,
412           class _RandomAccessIterator,
413           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
414 inline _LIBCPP_HIDE_FROM_ABI void
415 __populate_left_bitset(_RandomAccessIterator __first, _Compare __comp, _ValueType& __pivot, uint64_t& __left_bitset) {
416   // Possible vectorization. With a proper "-march" flag, the following loop
417   // will be compiled into a set of SIMD instructions.
418   _RandomAccessIterator __iter = __first;
419   for (int __j = 0; __j < __detail::__block_size;) {
420     bool __comp_result = !__comp(*__iter, __pivot);
421     __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
422     __j++;
423     ++__iter;
424   }
425 }
426 
427 template <class _Compare,
428           class _RandomAccessIterator,
429           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
430 inline _LIBCPP_HIDE_FROM_ABI void
431 __populate_right_bitset(_RandomAccessIterator __lm1, _Compare __comp, _ValueType& __pivot, uint64_t& __right_bitset) {
432   // Possible vectorization. With a proper "-march" flag, the following loop
433   // will be compiled into a set of SIMD instructions.
434   _RandomAccessIterator __iter = __lm1;
435   for (int __j = 0; __j < __detail::__block_size;) {
436     bool __comp_result = __comp(*__iter, __pivot);
437     __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
438     __j++;
439     --__iter;
440   }
441 }
442 
443 template <class _AlgPolicy,
444           class _Compare,
445           class _RandomAccessIterator,
446           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
447 inline _LIBCPP_HIDE_FROM_ABI void __bitset_partition_partial_blocks(
448     _RandomAccessIterator& __first,
449     _RandomAccessIterator& __lm1,
450     _Compare __comp,
451     _ValueType& __pivot,
452     uint64_t& __left_bitset,
453     uint64_t& __right_bitset) {
454   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
455   difference_type __remaining_len = __lm1 - __first + 1;
456   difference_type __l_size;
457   difference_type __r_size;
458   if (__left_bitset == 0 && __right_bitset == 0) {
459     __l_size = __remaining_len / 2;
460     __r_size = __remaining_len - __l_size;
461   } else if (__left_bitset == 0) {
462     // We know at least one side is a full block.
463     __l_size = __remaining_len - __detail::__block_size;
464     __r_size = __detail::__block_size;
465   } else { // if (__right_bitset == 0)
466     __l_size = __detail::__block_size;
467     __r_size = __remaining_len - __detail::__block_size;
468   }
469   // Record the comparison outcomes for the elements currently on the left side.
470   if (__left_bitset == 0) {
471     _RandomAccessIterator __iter = __first;
472     for (int __j = 0; __j < __l_size; __j++) {
473       bool __comp_result = !__comp(*__iter, __pivot);
474       __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
475       ++__iter;
476     }
477   }
478   // Record the comparison outcomes for the elements currently on the right
479   // side.
480   if (__right_bitset == 0) {
481     _RandomAccessIterator __iter = __lm1;
482     for (int __j = 0; __j < __r_size; __j++) {
483       bool __comp_result = __comp(*__iter, __pivot);
484       __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
485       --__iter;
486     }
487   }
488   std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
489   __first += (__left_bitset == 0) ? __l_size : 0;
490   __lm1 -= (__right_bitset == 0) ? __r_size : 0;
491 }
492 
493 template <class _AlgPolicy, class _RandomAccessIterator>
494 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
495     _RandomAccessIterator& __first, _RandomAccessIterator& __lm1, uint64_t& __left_bitset, uint64_t& __right_bitset) {
496   using _Ops = _IterOps<_AlgPolicy>;
497   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
498   if (__left_bitset) {
499     // Swap within the left side.  Need to find set positions in the reverse
500     // order.
501     while (__left_bitset != 0) {
502       difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
503       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
504       _RandomAccessIterator __it = __first + __tz_left;
505       if (__it != __lm1) {
506         _Ops::iter_swap(__it, __lm1);
507       }
508       --__lm1;
509     }
510     __first = __lm1 + difference_type(1);
511   } else if (__right_bitset) {
512     // Swap within the right side.  Need to find set positions in the reverse
513     // order.
514     while (__right_bitset != 0) {
515       difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
516       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
517       _RandomAccessIterator __it = __lm1 - __tz_right;
518       if (__it != __first) {
519         _Ops::iter_swap(__it, __first);
520       }
521       ++__first;
522     }
523   }
524 }
525 
526 // Partition [__first, __last) using the comparator __comp.  *__first has the
527 // chosen pivot.  Elements that are equivalent are kept to the left of the
528 // pivot.  Returns the iterator for the pivot and a bool value which is true if
529 // the provided range is already sorted, false otherwise.  We assume that the
530 // length of the range is at least three elements.
531 //
532 // __bitset_partition uses bitsets for storing outcomes of the comparisons
533 // between the pivot and other elements.
534 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
535 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
536 __bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
537   using _Ops = _IterOps<_AlgPolicy>;
538   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
539   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
540   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
541 
542   _RandomAccessIterator __begin = __first;
543   value_type __pivot(_Ops::__iter_move(__first));
544   // Find the first element greater than the pivot.
545   if (__comp(__pivot, *(__last - difference_type(1)))) {
546     // Not guarded since we know the last element is greater than the pivot.
547     while (!__comp(__pivot, *++__first)) {
548     }
549   } else {
550     while (++__first < __last && !__comp(__pivot, *__first)) {
551     }
552   }
553   // Find the last element less than or equal to the pivot.
554   if (__first < __last) {
555     // It will be always guarded because __introsort will do the median-of-three
556     // before calling this.
557     while (__comp(__pivot, *--__last)) {
558     }
559   }
560   // If the first element greater than the pivot is at or after the
561   // last element less than or equal to the pivot, then we have covered the
562   // entire range without swapping elements.  This implies the range is already
563   // partitioned.
564   bool __already_partitioned = __first >= __last;
565   if (!__already_partitioned) {
566     _Ops::iter_swap(__first, __last);
567     ++__first;
568   }
569 
570   // In [__first, __last) __last is not inclusive. From now on, it uses last
571   // minus one to be inclusive on both sides.
572   _RandomAccessIterator __lm1 = __last - difference_type(1);
573   uint64_t __left_bitset      = 0;
574   uint64_t __right_bitset     = 0;
575 
576   // Reminder: length = __lm1 - __first + 1.
577   while (__lm1 - __first >= 2 * __detail::__block_size - 1) {
578     // Record the comparison outcomes for the elements currently on the left
579     // side.
580     if (__left_bitset == 0)
581       std::__populate_left_bitset<_Compare>(__first, __comp, __pivot, __left_bitset);
582     // Record the comparison outcomes for the elements currently on the right
583     // side.
584     if (__right_bitset == 0)
585       std::__populate_right_bitset<_Compare>(__lm1, __comp, __pivot, __right_bitset);
586     // Swap the elements recorded to be the candidates for swapping in the
587     // bitsets.
588     std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
589     // Only advance the iterator if all the elements that need to be moved to
590     // other side were moved.
591     __first += (__left_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
592     __lm1 -= (__right_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
593   }
594   // Now, we have a less-than a block worth of elements on at least one of the
595   // sides.
596   std::__bitset_partition_partial_blocks<_AlgPolicy, _Compare>(
597       __first, __lm1, __comp, __pivot, __left_bitset, __right_bitset);
598   // At least one the bitsets would be empty.  For the non-empty one, we need to
599   // properly partition the elements that appear within that bitset.
600   std::__swap_bitmap_pos_within<_AlgPolicy>(__first, __lm1, __left_bitset, __right_bitset);
601 
602   // Move the pivot to its correct position.
603   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
604   if (__begin != __pivot_pos) {
605     *__begin = _Ops::__iter_move(__pivot_pos);
606   }
607   *__pivot_pos = std::move(__pivot);
608   return std::make_pair(__pivot_pos, __already_partitioned);
609 }
610 
611 // Partition [__first, __last) using the comparator __comp.  *__first has the
612 // chosen pivot.  Elements that are equivalent are kept to the right of the
613 // pivot.  Returns the iterator for the pivot and a bool value which is true if
614 // the provided range is already sorted, false otherwise.  We assume that the
615 // length of the range is at least three elements.
616 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
617 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
618 __partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
619   using _Ops = _IterOps<_AlgPolicy>;
620   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
621   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
622   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
623   _RandomAccessIterator __begin = __first;
624   value_type __pivot(_Ops::__iter_move(__first));
625   // Find the first element greater or equal to the pivot.  It will be always
626   // guarded because __introsort will do the median-of-three before calling
627   // this.
628   while (__comp(*++__first, __pivot))
629     ;
630 
631   // Find the last element less than the pivot.
632   if (__begin == __first - difference_type(1)) {
633     while (__first < __last && !__comp(*--__last, __pivot))
634       ;
635   } else {
636     // Guarded.
637     while (!__comp(*--__last, __pivot))
638       ;
639   }
640 
641   // If the first element greater than or equal to the pivot is at or after the
642   // last element less than the pivot, then we have covered the entire range
643   // without swapping elements.  This implies the range is already partitioned.
644   bool __already_partitioned = __first >= __last;
645   // Go through the remaining elements.  Swap pairs of elements (one to the
646   // right of the pivot and the other to left of the pivot) that are not on the
647   // correct side of the pivot.
648   while (__first < __last) {
649     _Ops::iter_swap(__first, __last);
650     while (__comp(*++__first, __pivot))
651       ;
652     while (!__comp(*--__last, __pivot))
653       ;
654   }
655   // Move the pivot to its correct position.
656   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
657   if (__begin != __pivot_pos) {
658     *__begin = _Ops::__iter_move(__pivot_pos);
659   }
660   *__pivot_pos = std::move(__pivot);
661   return std::make_pair(__pivot_pos, __already_partitioned);
662 }
663 
664 // Similar to the above function.  Elements equivalent to the pivot are put to
665 // the left of the pivot.  Returns the iterator to the pivot element.
666 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
667 _LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
668 __partition_with_equals_on_left(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
669   using _Ops = _IterOps<_AlgPolicy>;
670   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
671   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
672   _RandomAccessIterator __begin = __first;
673   value_type __pivot(_Ops::__iter_move(__first));
674   if (__comp(__pivot, *(__last - difference_type(1)))) {
675     // Guarded.
676     while (!__comp(__pivot, *++__first)) {
677     }
678   } else {
679     while (++__first < __last && !__comp(__pivot, *__first)) {
680     }
681   }
682 
683   if (__first < __last) {
684     // It will be always guarded because __introsort will do the
685     // median-of-three before calling this.
686     while (__comp(__pivot, *--__last)) {
687     }
688   }
689   while (__first < __last) {
690     _Ops::iter_swap(__first, __last);
691     while (!__comp(__pivot, *++__first))
692       ;
693     while (__comp(__pivot, *--__last))
694       ;
695   }
696   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
697   if (__begin != __pivot_pos) {
698     *__begin = _Ops::__iter_move(__pivot_pos);
699   }
700   *__pivot_pos = std::move(__pivot);
701   return __first;
702 }
703 
704 // The main sorting function.  Implements introsort combined with other ideas:
705 //  - option of using block quick sort for partitioning,
706 //  - guarded and unguarded insertion sort for small lengths,
707 //  - Tuckey's ninther technique for computing the pivot,
708 //  - check on whether partition was not required.
709 // The implementation is partly based on Orson Peters' pattern-defeating
710 // quicksort, published at: <https://github.com/orlp/pdqsort>.
711 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _UseBitSetPartition>
712 void __introsort(_RandomAccessIterator __first,
713                  _RandomAccessIterator __last,
714                  _Compare __comp,
715                  typename iterator_traits<_RandomAccessIterator>::difference_type __depth,
716                  bool __leftmost = true) {
717   using _Ops = _IterOps<_AlgPolicy>;
718   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
719   using _Comp_ref = __comp_ref_type<_Compare>;
720   // Upper bound for using insertion sort for sorting.
721   _LIBCPP_CONSTEXPR difference_type __limit = 24;
722   // Lower bound for using Tuckey's ninther technique for median computation.
723   _LIBCPP_CONSTEXPR difference_type __ninther_threshold = 128;
724   while (true) {
725     difference_type __len = __last - __first;
726     switch (__len) {
727     case 0:
728     case 1:
729       return;
730     case 2:
731       if (__comp(*--__last, *__first))
732         _Ops::iter_swap(__first, __last);
733       return;
734     case 3:
735       std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
736       return;
737     case 4:
738       std::__sort4_maybe_branchless<_AlgPolicy, _Compare>(
739           __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
740       return;
741     case 5:
742       std::__sort5_maybe_branchless<_AlgPolicy, _Compare>(
743           __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
744           --__last, __comp);
745       return;
746     }
747     // Use insertion sort if the length of the range is below the specified limit.
748     if (__len < __limit) {
749       if (__leftmost) {
750         std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
751       } else {
752         std::__insertion_sort_unguarded<_AlgPolicy, _Compare>(__first, __last, __comp);
753       }
754       return;
755     }
756     if (__depth == 0) {
757       // Fallback to heap sort as Introsort suggests.
758       std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp);
759       return;
760     }
761     --__depth;
762     {
763       difference_type __half_len = __len / 2;
764       // Use Tuckey's ninther technique or median of 3 for pivot selection
765       // depending on the length of the range being sorted.
766       if (__len > __ninther_threshold) {
767         std::__sort3<_AlgPolicy, _Compare>(__first, __first + __half_len, __last - difference_type(1), __comp);
768         std::__sort3<_AlgPolicy, _Compare>(
769             __first + difference_type(1), __first + (__half_len - 1), __last - difference_type(2), __comp);
770         std::__sort3<_AlgPolicy, _Compare>(
771             __first + difference_type(2), __first + (__half_len + 1), __last - difference_type(3), __comp);
772         std::__sort3<_AlgPolicy, _Compare>(
773             __first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), __comp);
774         _Ops::iter_swap(__first, __first + __half_len);
775       } else {
776         std::__sort3<_AlgPolicy, _Compare>(__first + __half_len, __first, __last - difference_type(1), __comp);
777       }
778     }
779     // The elements to the left of the current iterator range are already
780     // sorted.  If the current iterator range to be sorted is not the
781     // leftmost part of the entire iterator range and the pivot is same as
782     // the highest element in the range to the left, then we know that all
783     // the elements in the range [first, pivot] would be equal to the pivot,
784     // assuming the equal elements are put on the left side when
785     // partitioned.  This also means that we do not need to sort the left
786     // side of the partition.
787     if (!__leftmost && !__comp(*(__first - difference_type(1)), *__first)) {
788       __first = std::__partition_with_equals_on_left<_AlgPolicy, _RandomAccessIterator, _Comp_ref>(
789           __first, __last, _Comp_ref(__comp));
790       continue;
791     }
792     // Use bitset partition only if asked for.
793     auto __ret =
794         _UseBitSetPartition
795             ? std::__bitset_partition<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp)
796             : std::__partition_with_equals_on_right<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp);
797     _RandomAccessIterator __i = __ret.first;
798     // [__first, __i) < *__i and *__i <= [__i+1, __last)
799     // If we were given a perfect partition, see if insertion sort is quick...
800     if (__ret.second) {
801       bool __fs = std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__first, __i, __comp);
802       if (std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__i + difference_type(1), __last, __comp)) {
803         if (__fs)
804           return;
805         __last = __i;
806         continue;
807       } else {
808         if (__fs) {
809           __first = ++__i;
810           continue;
811         }
812       }
813     }
814     // Sort the left partiton recursively and the right partition with tail recursion elimination.
815     std::__introsort<_AlgPolicy, _Compare, _RandomAccessIterator, _UseBitSetPartition>(
816         __first, __i, __comp, __depth, __leftmost);
817     __leftmost = false;
818     __first    = ++__i;
819   }
820 }
821 
822 template <typename _Number>
823 inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
824   if (__n == 0)
825     return 0;
826   if (sizeof(__n) <= sizeof(unsigned))
827     return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
828   if (sizeof(__n) <= sizeof(unsigned long))
829     return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
830   if (sizeof(__n) <= sizeof(unsigned long long))
831     return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
832 
833   _Number __log2 = 0;
834   while (__n > 1) {
835     __log2++;
836     __n >>= 1;
837   }
838   return __log2;
839 }
840 
841 template <class _Comp, class _RandomAccessIterator>
842 void __sort(_RandomAccessIterator, _RandomAccessIterator, _Comp);
843 
844 extern template _LIBCPP_FUNC_VIS void __sort<__less<char>&, char*>(char*, char*, __less<char>&);
845 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
846 extern template _LIBCPP_FUNC_VIS void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&);
847 #endif
848 extern template _LIBCPP_FUNC_VIS void __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&);
849 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&);
850 extern template _LIBCPP_FUNC_VIS void __sort<__less<short>&, short*>(short*, short*, __less<short>&);
851 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned short>&, unsigned short*>(unsigned short*, unsigned short*, __less<unsigned short>&);
852 extern template _LIBCPP_FUNC_VIS void __sort<__less<int>&, int*>(int*, int*, __less<int>&);
853 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned>&, unsigned*>(unsigned*, unsigned*, __less<unsigned>&);
854 extern template _LIBCPP_FUNC_VIS void __sort<__less<long>&, long*>(long*, long*, __less<long>&);
855 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long>&, unsigned long*>(unsigned long*, unsigned long*, __less<unsigned long>&);
856 extern template _LIBCPP_FUNC_VIS void __sort<__less<long long>&, long long*>(long long*, long long*, __less<long long>&);
857 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long long>&, unsigned long long*>(unsigned long long*, unsigned long long*, __less<unsigned long long>&);
858 extern template _LIBCPP_FUNC_VIS void __sort<__less<float>&, float*>(float*, float*, __less<float>&);
859 extern template _LIBCPP_FUNC_VIS void __sort<__less<double>&, double*>(double*, double*, __less<double>&);
860 extern template _LIBCPP_FUNC_VIS void __sort<__less<long double>&, long double*>(long double*, long double*, __less<long double>&);
861 
862 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
863 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
864 __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
865   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
866   difference_type __depth_limit = 2 * std::__log2i(__last - __first);
867 
868   // Only use bitset partitioning for arithmetic types.  We should also check
869   // that the default comparator is in use so that we are sure that there are no
870   // branches in the comparator.
871   std::__introsort<_AlgPolicy,
872                    _Comp&,
873                    _RandomAccessIterator,
874                    __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(
875       __first, __last, __comp, __depth_limit);
876 }
877 
878 template <class _Type, class... _Options>
879 using __is_any_of = _Or<is_same<_Type, _Options>...>;
880 
881 template <class _Type>
882 using __sort_is_specialized_in_library = __is_any_of<
883     _Type,
884     char,
885 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
886     wchar_t,
887 #endif
888     signed char,
889     unsigned char,
890     short,
891     unsigned short,
892     int,
893     unsigned int,
894     long,
895     unsigned long,
896     long long,
897     unsigned long long,
898     float,
899     double,
900     long double>;
901 
902 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
903 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, __less<_Type>& __comp) {
904   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
905 }
906 
907 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
908 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<_Type>&) {
909   __less<_Type> __comp;
910   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
911 }
912 
913 #if _LIBCPP_STD_VER >= 14
914 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
915 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<>&) {
916   __less<_Type> __comp;
917   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
918 }
919 #endif
920 
921 #if _LIBCPP_STD_VER >= 20
922 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
923 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, ranges::less&) {
924   __less<_Type> __comp;
925   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
926 }
927 #endif
928 
929 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
930 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
931 void __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
932   std::__debug_randomize_range<_AlgPolicy>(__first, __last);
933 
934   if (__libcpp_is_constant_evaluated()) {
935     std::__partial_sort<_AlgPolicy>(
936         std::__unwrap_iter(__first), std::__unwrap_iter(__last), std::__unwrap_iter(__last), __comp);
937   } else {
938     std::__sort_dispatch<_AlgPolicy>(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __comp);
939   }
940 }
941 
942 template <class _RandomAccessIterator, class _Comp>
943 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
944 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
945   std::__sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp);
946 }
947 
948 template <class _RandomAccessIterator>
949 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
950 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
951   std::sort(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>());
952 }
953 
954 _LIBCPP_END_NAMESPACE_STD
955 
956 #endif // _LIBCPP___ALGORITHM_SORT_H
957