xref: /llvm-project/libcxx/include/__algorithm/sort.h (revision c54c90da4fb3940528de36df6dc69f3189705b2b)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___ALGORITHM_SORT_H
10 #define _LIBCPP___ALGORITHM_SORT_H
11 
12 #include <__algorithm/comp.h>
13 #include <__algorithm/comp_ref_type.h>
14 #include <__algorithm/iter_swap.h>
15 #include <__algorithm/iterator_operations.h>
16 #include <__algorithm/min_element.h>
17 #include <__algorithm/partial_sort.h>
18 #include <__algorithm/unwrap_iter.h>
19 #include <__assert>
20 #include <__bit/blsr.h>
21 #include <__bit/countl.h>
22 #include <__bit/countr.h>
23 #include <__config>
24 #include <__debug>
25 #include <__debug_utils/randomize_range.h>
26 #include <__functional/operations.h>
27 #include <__functional/ranges_operations.h>
28 #include <__iterator/iterator_traits.h>
29 #include <__memory/destruct_n.h>
30 #include <__memory/unique_ptr.h>
31 #include <__type_traits/conditional.h>
32 #include <__type_traits/disjunction.h>
33 #include <__type_traits/is_arithmetic.h>
34 #include <__utility/move.h>
35 #include <__utility/pair.h>
36 #include <climits>
37 #include <cstdint>
38 
39 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
40 #  pragma GCC system_header
41 #endif
42 
43 _LIBCPP_BEGIN_NAMESPACE_STD
44 
45 // stable, 2-3 compares, 0-2 swaps
46 
47 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
48 _LIBCPP_HIDE_FROM_ABI
49 _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z,
50                                                _Compare __c) {
51   using _Ops = _IterOps<_AlgPolicy>;
52 
53   unsigned __r = 0;
54   if (!__c(*__y, *__x))   // if x <= y
55   {
56     if (!__c(*__z, *__y)) // if y <= z
57       return __r;         // x <= y && y <= z
58                           // x <= y && y > z
59     _Ops::iter_swap(__y, __z);     // x <= z && y < z
60     __r = 1;
61     if (__c(*__y, *__x))  // if x > y
62     {
63       _Ops::iter_swap(__x, __y);   // x < y && y <= z
64       __r = 2;
65     }
66     return __r;           // x <= y && y < z
67   }
68   if (__c(*__z, *__y))    // x > y, if y > z
69   {
70     _Ops::iter_swap(__x, __z);     // x < y && y < z
71     __r = 1;
72     return __r;
73   }
74   _Ops::iter_swap(__x, __y);       // x > y && y <= z
75   __r = 1;                // x < y && x <= z
76   if (__c(*__z, *__y))    // if y > z
77   {
78     _Ops::iter_swap(__y, __z);     // x <= y && y < z
79     __r = 2;
80   }
81   return __r;
82 }                         // x <= y && y <= z
83 
84 // stable, 3-6 compares, 0-5 swaps
85 
86 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
87 _LIBCPP_HIDE_FROM_ABI
88 void __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4,
89                  _Compare __c) {
90   using _Ops   = _IterOps<_AlgPolicy>;
91   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
92   if (__c(*__x4, *__x3)) {
93     _Ops::iter_swap(__x3, __x4);
94     if (__c(*__x3, *__x2)) {
95       _Ops::iter_swap(__x2, __x3);
96       if (__c(*__x2, *__x1)) {
97         _Ops::iter_swap(__x1, __x2);
98       }
99     }
100   }
101 }
102 
103 // stable, 4-10 compares, 0-9 swaps
104 
105 template <class _AlgPolicy, class _Comp, class _ForwardIterator>
106 _LIBCPP_HIDE_FROM_ABI void __sort5(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3,
107                                    _ForwardIterator __x4, _ForwardIterator __x5, _Comp __comp) {
108   using _Ops = _IterOps<_AlgPolicy>;
109 
110   std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp);
111   if (__comp(*__x5, *__x4)) {
112     _Ops::iter_swap(__x4, __x5);
113     if (__comp(*__x4, *__x3)) {
114       _Ops::iter_swap(__x3, __x4);
115       if (__comp(*__x3, *__x2)) {
116         _Ops::iter_swap(__x2, __x3);
117         if (__comp(*__x2, *__x1)) {
118           _Ops::iter_swap(__x1, __x2);
119         }
120       }
121     }
122   }
123 }
124 
125 // The comparator being simple is a prerequisite for using the branchless optimization.
126 template <class _Tp>
127 struct __is_simple_comparator : false_type {};
128 template <class _Tp>
129 struct __is_simple_comparator<__less<_Tp>&> : true_type {};
130 template <class _Tp>
131 struct __is_simple_comparator<less<_Tp>&> : true_type {};
132 template <class _Tp>
133 struct __is_simple_comparator<greater<_Tp>&> : true_type {};
134 #if _LIBCPP_STD_VER >= 20
135 template <>
136 struct __is_simple_comparator<ranges::less&> : true_type {};
137 template <>
138 struct __is_simple_comparator<ranges::greater&> : true_type {};
139 #endif
140 
141 template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type>
142 using __use_branchless_sort =
143     integral_constant<bool, __is_cpp17_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) &&
144                                 is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>;
145 
146 namespace __detail {
147 
148 // Size in bits for the bitset in use.
149 enum { __block_size = sizeof(uint64_t) * 8 };
150 
151 } // namespace __detail
152 
153 // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
154 template <class _Compare, class _RandomAccessIterator>
155 inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
156   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
157   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
158   bool __r = __c(*__x, *__y);
159   value_type __tmp = __r ? *__x : *__y;
160   *__y = __r ? *__y : *__x;
161   *__x = __tmp;
162 }
163 
164 // Ensures that *__x, *__y and *__z are ordered according to the comparator __c,
165 // under the assumption that *__y and *__z are already ordered.
166 template <class _Compare, class _RandomAccessIterator>
167 inline _LIBCPP_HIDE_FROM_ABI void __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y,
168                                                           _RandomAccessIterator __z, _Compare __c) {
169   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
170   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
171   bool __r = __c(*__z, *__x);
172   value_type __tmp = __r ? *__z : *__x;
173   *__z = __r ? *__x : *__z;
174   __r = __c(__tmp, *__y);
175   *__x = __r ? *__x : *__y;
176   *__y = __r ? *__y : __tmp;
177 }
178 
179 template <class, class _Compare, class _RandomAccessIterator>
180 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
181 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
182                          _Compare __c) {
183   std::__cond_swap<_Compare>(__x2, __x3, __c);
184   std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
185 }
186 
187 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
188 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
189 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
190                          _Compare __c) {
191   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
192 }
193 
194 template <class, class _Compare, class _RandomAccessIterator>
195 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
196 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
197                          _RandomAccessIterator __x4, _Compare __c) {
198   std::__cond_swap<_Compare>(__x1, __x3, __c);
199   std::__cond_swap<_Compare>(__x2, __x4, __c);
200   std::__cond_swap<_Compare>(__x1, __x2, __c);
201   std::__cond_swap<_Compare>(__x3, __x4, __c);
202   std::__cond_swap<_Compare>(__x2, __x3, __c);
203 }
204 
205 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
206 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
207 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
208                          _RandomAccessIterator __x4, _Compare __c) {
209   std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c);
210 }
211 
212 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
213 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
214 __sort5_maybe_branchless(
215     _RandomAccessIterator __x1,
216     _RandomAccessIterator __x2,
217     _RandomAccessIterator __x3,
218     _RandomAccessIterator __x4,
219     _RandomAccessIterator __x5,
220     _Compare __c) {
221   std::__cond_swap<_Compare>(__x1, __x2, __c);
222   std::__cond_swap<_Compare>(__x4, __x5, __c);
223   std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
224   std::__cond_swap<_Compare>(__x2, __x5, __c);
225   std::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
226   std::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
227 }
228 
229 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
230 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
231 __sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
232                          _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) {
233   std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>(
234       std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c);
235 }
236 
237 // Assumes size > 0
238 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
239 _LIBCPP_HIDE_FROM_ABI
240 _LIBCPP_CONSTEXPR_SINCE_CXX14 void __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last,
241                                                     _Compare __comp) {
242   _BidirectionalIterator __lm1 = __last;
243   for (--__lm1; __first != __lm1; ++__first) {
244     _BidirectionalIterator __i = std::__min_element<_Compare>(__first, __last, __comp);
245     if (__i != __first)
246       _IterOps<_AlgPolicy>::iter_swap(__first, __i);
247   }
248 }
249 
250 // Sort the iterator range [__first, __last) using the comparator __comp using
251 // the insertion sort algorithm.
252 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
253 _LIBCPP_HIDE_FROM_ABI
254 void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) {
255   using _Ops = _IterOps<_AlgPolicy>;
256 
257   typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
258   if (__first == __last)
259     return;
260   _BidirectionalIterator __i = __first;
261   for (++__i; __i != __last; ++__i) {
262     _BidirectionalIterator __j = __i;
263     --__j;
264     if (__comp(*__i, *__j)) {
265       value_type __t(_Ops::__iter_move(__i));
266       _BidirectionalIterator __k = __j;
267       __j                        = __i;
268       do {
269         *__j = _Ops::__iter_move(__k);
270         __j  = __k;
271       } while (__j != __first && __comp(__t, *--__k));
272       *__j = std::move(__t);
273     }
274   }
275 }
276 
277 // Sort the iterator range [__first, __last) using the comparator __comp using
278 // the insertion sort algorithm.  Insertion sort has two loops, outer and inner.
279 // The implementation below has no bounds check (unguarded) for the inner loop.
280 // Assumes that there is an element in the position (__first - 1) and that each
281 // element in the input range is greater or equal to the element at __first - 1.
282 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
283 _LIBCPP_HIDE_FROM_ABI void
284 __insertion_sort_unguarded(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
285   using _Ops = _IterOps<_AlgPolicy>;
286   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
287   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
288   if (__first == __last)
289     return;
290   for (_RandomAccessIterator __i = __first + difference_type(1); __i != __last; ++__i) {
291     _RandomAccessIterator __j = __i - difference_type(1);
292     if (__comp(*__i, *__j)) {
293       value_type __t(_Ops::__iter_move(__i));
294       _RandomAccessIterator __k = __j;
295       __j = __i;
296       do {
297         *__j = _Ops::__iter_move(__k);
298         __j = __k;
299       } while (__comp(__t, *--__k)); // No need for bounds check due to the assumption stated above.
300       *__j = std::move(__t);
301     }
302   }
303 }
304 
305 template <class _AlgPolicy, class _Comp, class _RandomAccessIterator>
306 _LIBCPP_HIDE_FROM_ABI bool __insertion_sort_incomplete(
307     _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
308   using _Ops = _IterOps<_AlgPolicy>;
309 
310   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
311   switch (__last - __first) {
312   case 0:
313   case 1:
314     return true;
315   case 2:
316     if (__comp(*--__last, *__first))
317       _Ops::iter_swap(__first, __last);
318     return true;
319   case 3:
320     std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
321     return true;
322   case 4:
323     std::__sort4_maybe_branchless<_AlgPolicy, _Comp>(
324         __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
325     return true;
326   case 5:
327     std::__sort5_maybe_branchless<_AlgPolicy, _Comp>(
328         __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
329         --__last, __comp);
330     return true;
331   }
332   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
333   _RandomAccessIterator __j = __first + difference_type(2);
334   std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
335   const unsigned __limit = 8;
336   unsigned __count = 0;
337   for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) {
338     if (__comp(*__i, *__j)) {
339       value_type __t(_Ops::__iter_move(__i));
340       _RandomAccessIterator __k = __j;
341       __j = __i;
342       do {
343         *__j = _Ops::__iter_move(__k);
344         __j = __k;
345       } while (__j != __first && __comp(__t, *--__k));
346       *__j = std::move(__t);
347       if (++__count == __limit)
348         return ++__i == __last;
349     }
350     __j = __i;
351   }
352   return true;
353 }
354 
355 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
356 _LIBCPP_HIDE_FROM_ABI
357 void __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterator __last1,
358                            typename iterator_traits<_BidirectionalIterator>::value_type* __first2, _Compare __comp) {
359   using _Ops = _IterOps<_AlgPolicy>;
360 
361   typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
362   if (__first1 != __last1) {
363     __destruct_n __d(0);
364     unique_ptr<value_type, __destruct_n&> __h(__first2, __d);
365     value_type* __last2 = __first2;
366     ::new ((void*)__last2) value_type(_Ops::__iter_move(__first1));
367     __d.template __incr<value_type>();
368     for (++__last2; ++__first1 != __last1; ++__last2) {
369       value_type* __j2 = __last2;
370       value_type* __i2 = __j2;
371       if (__comp(*__first1, *--__i2)) {
372         ::new ((void*)__j2) value_type(std::move(*__i2));
373         __d.template __incr<value_type>();
374         for (--__j2; __i2 != __first2 && __comp(*__first1, *--__i2); --__j2)
375           *__j2 = std::move(*__i2);
376         *__j2 = _Ops::__iter_move(__first1);
377       } else {
378         ::new ((void*)__j2) value_type(_Ops::__iter_move(__first1));
379         __d.template __incr<value_type>();
380       }
381     }
382     __h.release();
383   }
384 }
385 
386 template <class _AlgPolicy, class _RandomAccessIterator>
387 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
388     _RandomAccessIterator __first, _RandomAccessIterator __last, uint64_t& __left_bitset, uint64_t& __right_bitset) {
389   using _Ops = _IterOps<_AlgPolicy>;
390   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
391   // Swap one pair on each iteration as long as both bitsets have at least one
392   // element for swapping.
393   while (__left_bitset != 0 && __right_bitset != 0) {
394     difference_type __tz_left  = __libcpp_ctz(__left_bitset);
395     __left_bitset              = __libcpp_blsr(__left_bitset);
396     difference_type __tz_right = __libcpp_ctz(__right_bitset);
397     __right_bitset             = __libcpp_blsr(__right_bitset);
398     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
399   }
400 }
401 
402 template <class _Compare,
403           class _RandomAccessIterator,
404           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
405 inline _LIBCPP_HIDE_FROM_ABI void
406 __populate_left_bitset(_RandomAccessIterator __first, _Compare __comp, _ValueType& __pivot, uint64_t& __left_bitset) {
407   // Possible vectorization. With a proper "-march" flag, the following loop
408   // will be compiled into a set of SIMD instructions.
409   _RandomAccessIterator __iter = __first;
410   for (int __j = 0; __j < __detail::__block_size;) {
411     bool __comp_result = !__comp(*__iter, __pivot);
412     __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
413     __j++;
414     ++__iter;
415   }
416 }
417 
418 template <class _Compare,
419           class _RandomAccessIterator,
420           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
421 inline _LIBCPP_HIDE_FROM_ABI void
422 __populate_right_bitset(_RandomAccessIterator __lm1, _Compare __comp, _ValueType& __pivot, uint64_t& __right_bitset) {
423   // Possible vectorization. With a proper "-march" flag, the following loop
424   // will be compiled into a set of SIMD instructions.
425   _RandomAccessIterator __iter = __lm1;
426   for (int __j = 0; __j < __detail::__block_size;) {
427     bool __comp_result = __comp(*__iter, __pivot);
428     __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
429     __j++;
430     --__iter;
431   }
432 }
433 
434 template <class _AlgPolicy,
435           class _Compare,
436           class _RandomAccessIterator,
437           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
438 inline _LIBCPP_HIDE_FROM_ABI void __bitset_partition_partial_blocks(
439     _RandomAccessIterator& __first,
440     _RandomAccessIterator& __lm1,
441     _Compare __comp,
442     _ValueType& __pivot,
443     uint64_t& __left_bitset,
444     uint64_t& __right_bitset) {
445   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
446   difference_type __remaining_len = __lm1 - __first + 1;
447   difference_type __l_size;
448   difference_type __r_size;
449   if (__left_bitset == 0 && __right_bitset == 0) {
450     __l_size = __remaining_len / 2;
451     __r_size = __remaining_len - __l_size;
452   } else if (__left_bitset == 0) {
453     // We know at least one side is a full block.
454     __l_size = __remaining_len - __detail::__block_size;
455     __r_size = __detail::__block_size;
456   } else { // if (__right_bitset == 0)
457     __l_size = __detail::__block_size;
458     __r_size = __remaining_len - __detail::__block_size;
459   }
460   // Record the comparison outcomes for the elements currently on the left side.
461   if (__left_bitset == 0) {
462     _RandomAccessIterator __iter = __first;
463     for (int __j = 0; __j < __l_size; __j++) {
464       bool __comp_result = !__comp(*__iter, __pivot);
465       __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
466       ++__iter;
467     }
468   }
469   // Record the comparison outcomes for the elements currently on the right
470   // side.
471   if (__right_bitset == 0) {
472     _RandomAccessIterator __iter = __lm1;
473     for (int __j = 0; __j < __r_size; __j++) {
474       bool __comp_result = __comp(*__iter, __pivot);
475       __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
476       --__iter;
477     }
478   }
479   std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
480   __first += (__left_bitset == 0) ? __l_size : 0;
481   __lm1 -= (__right_bitset == 0) ? __r_size : 0;
482 }
483 
484 template <class _AlgPolicy, class _RandomAccessIterator>
485 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
486     _RandomAccessIterator& __first, _RandomAccessIterator& __lm1, uint64_t& __left_bitset, uint64_t& __right_bitset) {
487   using _Ops = _IterOps<_AlgPolicy>;
488   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
489   if (__left_bitset) {
490     // Swap within the left side.  Need to find set positions in the reverse
491     // order.
492     while (__left_bitset != 0) {
493       difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
494       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
495       _RandomAccessIterator __it = __first + __tz_left;
496       if (__it != __lm1) {
497         _Ops::iter_swap(__it, __lm1);
498       }
499       --__lm1;
500     }
501     __first = __lm1 + difference_type(1);
502   } else if (__right_bitset) {
503     // Swap within the right side.  Need to find set positions in the reverse
504     // order.
505     while (__right_bitset != 0) {
506       difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
507       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
508       _RandomAccessIterator __it = __lm1 - __tz_right;
509       if (__it != __first) {
510         _Ops::iter_swap(__it, __first);
511       }
512       ++__first;
513     }
514   }
515 }
516 
517 // Partition [__first, __last) using the comparator __comp.  *__first has the
518 // chosen pivot.  Elements that are equivalent are kept to the left of the
519 // pivot.  Returns the iterator for the pivot and a bool value which is true if
520 // the provided range is already sorted, false otherwise.  We assume that the
521 // length of the range is at least three elements.
522 //
523 // __bitset_partition uses bitsets for storing outcomes of the comparisons
524 // between the pivot and other elements.
525 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
526 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
527 __bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
528   using _Ops = _IterOps<_AlgPolicy>;
529   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
530   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
531   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
532 
533   _RandomAccessIterator __begin = __first;
534   value_type __pivot(_Ops::__iter_move(__first));
535   // Find the first element greater than the pivot.
536   if (__comp(__pivot, *(__last - difference_type(1)))) {
537     // Not guarded since we know the last element is greater than the pivot.
538     while (!__comp(__pivot, *++__first)) {
539     }
540   } else {
541     while (++__first < __last && !__comp(__pivot, *__first)) {
542     }
543   }
544   // Find the last element less than or equal to the pivot.
545   if (__first < __last) {
546     // It will be always guarded because __introsort will do the median-of-three
547     // before calling this.
548     while (__comp(__pivot, *--__last)) {
549     }
550   }
551   // If the first element greater than the pivot is at or after the
552   // last element less than or equal to the pivot, then we have covered the
553   // entire range without swapping elements.  This implies the range is already
554   // partitioned.
555   bool __already_partitioned = __first >= __last;
556   if (!__already_partitioned) {
557     _Ops::iter_swap(__first, __last);
558     ++__first;
559   }
560 
561   // In [__first, __last) __last is not inclusive. From now on, it uses last
562   // minus one to be inclusive on both sides.
563   _RandomAccessIterator __lm1 = __last - difference_type(1);
564   uint64_t __left_bitset      = 0;
565   uint64_t __right_bitset     = 0;
566 
567   // Reminder: length = __lm1 - __first + 1.
568   while (__lm1 - __first >= 2 * __detail::__block_size - 1) {
569     // Record the comparison outcomes for the elements currently on the left
570     // side.
571     if (__left_bitset == 0)
572       std::__populate_left_bitset<_Compare>(__first, __comp, __pivot, __left_bitset);
573     // Record the comparison outcomes for the elements currently on the right
574     // side.
575     if (__right_bitset == 0)
576       std::__populate_right_bitset<_Compare>(__lm1, __comp, __pivot, __right_bitset);
577     // Swap the elements recorded to be the candidates for swapping in the
578     // bitsets.
579     std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
580     // Only advance the iterator if all the elements that need to be moved to
581     // other side were moved.
582     __first += (__left_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
583     __lm1 -= (__right_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
584   }
585   // Now, we have a less-than a block worth of elements on at least one of the
586   // sides.
587   std::__bitset_partition_partial_blocks<_AlgPolicy, _Compare>(
588       __first, __lm1, __comp, __pivot, __left_bitset, __right_bitset);
589   // At least one the bitsets would be empty.  For the non-empty one, we need to
590   // properly partition the elements that appear within that bitset.
591   std::__swap_bitmap_pos_within<_AlgPolicy>(__first, __lm1, __left_bitset, __right_bitset);
592 
593   // Move the pivot to its correct position.
594   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
595   if (__begin != __pivot_pos) {
596     *__begin = _Ops::__iter_move(__pivot_pos);
597   }
598   *__pivot_pos = std::move(__pivot);
599   return std::make_pair(__pivot_pos, __already_partitioned);
600 }
601 
602 // Partition [__first, __last) using the comparator __comp.  *__first has the
603 // chosen pivot.  Elements that are equivalent are kept to the right of the
604 // pivot.  Returns the iterator for the pivot and a bool value which is true if
605 // the provided range is already sorted, false otherwise.  We assume that the
606 // length of the range is at least three elements.
607 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
608 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
609 __partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
610   using _Ops = _IterOps<_AlgPolicy>;
611   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
612   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
613   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
614   _RandomAccessIterator __begin = __first;
615   value_type __pivot(_Ops::__iter_move(__first));
616   // Find the first element greater or equal to the pivot.  It will be always
617   // guarded because __introsort will do the median-of-three before calling
618   // this.
619   while (__comp(*++__first, __pivot))
620     ;
621 
622   // Find the last element less than the pivot.
623   if (__begin == __first - difference_type(1)) {
624     while (__first < __last && !__comp(*--__last, __pivot))
625       ;
626   } else {
627     // Guarded.
628     while (!__comp(*--__last, __pivot))
629       ;
630   }
631 
632   // If the first element greater than or equal to the pivot is at or after the
633   // last element less than the pivot, then we have covered the entire range
634   // without swapping elements.  This implies the range is already partitioned.
635   bool __already_partitioned = __first >= __last;
636   // Go through the remaining elements.  Swap pairs of elements (one to the
637   // right of the pivot and the other to left of the pivot) that are not on the
638   // correct side of the pivot.
639   while (__first < __last) {
640     _Ops::iter_swap(__first, __last);
641     while (__comp(*++__first, __pivot))
642       ;
643     while (!__comp(*--__last, __pivot))
644       ;
645   }
646   // Move the pivot to its correct position.
647   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
648   if (__begin != __pivot_pos) {
649     *__begin = _Ops::__iter_move(__pivot_pos);
650   }
651   *__pivot_pos = std::move(__pivot);
652   return std::make_pair(__pivot_pos, __already_partitioned);
653 }
654 
655 // Similar to the above function.  Elements equivalent to the pivot are put to
656 // the left of the pivot.  Returns the iterator to the pivot element.
657 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
658 _LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
659 __partition_with_equals_on_left(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
660   using _Ops = _IterOps<_AlgPolicy>;
661   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
662   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
663   _RandomAccessIterator __begin = __first;
664   value_type __pivot(_Ops::__iter_move(__first));
665   if (__comp(__pivot, *(__last - difference_type(1)))) {
666     // Guarded.
667     while (!__comp(__pivot, *++__first)) {
668     }
669   } else {
670     while (++__first < __last && !__comp(__pivot, *__first)) {
671     }
672   }
673 
674   if (__first < __last) {
675     // It will be always guarded because __introsort will do the
676     // median-of-three before calling this.
677     while (__comp(__pivot, *--__last)) {
678     }
679   }
680   while (__first < __last) {
681     _Ops::iter_swap(__first, __last);
682     while (!__comp(__pivot, *++__first))
683       ;
684     while (__comp(__pivot, *--__last))
685       ;
686   }
687   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
688   if (__begin != __pivot_pos) {
689     *__begin = _Ops::__iter_move(__pivot_pos);
690   }
691   *__pivot_pos = std::move(__pivot);
692   return __first;
693 }
694 
695 // The main sorting function.  Implements introsort combined with other ideas:
696 //  - option of using block quick sort for partitioning,
697 //  - guarded and unguarded insertion sort for small lengths,
698 //  - Tuckey's ninther technique for computing the pivot,
699 //  - check on whether partition was not required.
700 // The implementation is partly based on Orson Peters' pattern-defeating
701 // quicksort, published at: <https://github.com/orlp/pdqsort>.
702 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _UseBitSetPartition>
703 void __introsort(_RandomAccessIterator __first,
704                  _RandomAccessIterator __last,
705                  _Compare __comp,
706                  typename iterator_traits<_RandomAccessIterator>::difference_type __depth,
707                  bool __leftmost = true) {
708   using _Ops = _IterOps<_AlgPolicy>;
709   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
710   using _Comp_ref = __comp_ref_type<_Compare>;
711   // Upper bound for using insertion sort for sorting.
712   _LIBCPP_CONSTEXPR difference_type __limit = 24;
713   // Lower bound for using Tuckey's ninther technique for median computation.
714   _LIBCPP_CONSTEXPR difference_type __ninther_threshold = 128;
715   while (true) {
716     difference_type __len = __last - __first;
717     switch (__len) {
718     case 0:
719     case 1:
720       return;
721     case 2:
722       if (__comp(*--__last, *__first))
723         _Ops::iter_swap(__first, __last);
724       return;
725     case 3:
726       std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
727       return;
728     case 4:
729       std::__sort4_maybe_branchless<_AlgPolicy, _Compare>(
730           __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
731       return;
732     case 5:
733       std::__sort5_maybe_branchless<_AlgPolicy, _Compare>(
734           __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
735           --__last, __comp);
736       return;
737     }
738     // Use insertion sort if the length of the range is below the specified limit.
739     if (__len < __limit) {
740       if (__leftmost) {
741         std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
742       } else {
743         std::__insertion_sort_unguarded<_AlgPolicy, _Compare>(__first, __last, __comp);
744       }
745       return;
746     }
747     if (__depth == 0) {
748       // Fallback to heap sort as Introsort suggests.
749       std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp);
750       return;
751     }
752     --__depth;
753     {
754       difference_type __half_len = __len / 2;
755       // Use Tuckey's ninther technique or median of 3 for pivot selection
756       // depending on the length of the range being sorted.
757       if (__len > __ninther_threshold) {
758         std::__sort3<_AlgPolicy, _Compare>(__first, __first + __half_len, __last - difference_type(1), __comp);
759         std::__sort3<_AlgPolicy, _Compare>(
760             __first + difference_type(1), __first + (__half_len - 1), __last - difference_type(2), __comp);
761         std::__sort3<_AlgPolicy, _Compare>(
762             __first + difference_type(2), __first + (__half_len + 1), __last - difference_type(3), __comp);
763         std::__sort3<_AlgPolicy, _Compare>(
764             __first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), __comp);
765         _Ops::iter_swap(__first, __first + __half_len);
766       } else {
767         std::__sort3<_AlgPolicy, _Compare>(__first + __half_len, __first, __last - difference_type(1), __comp);
768       }
769     }
770     // The elements to the left of the current iterator range are already
771     // sorted.  If the current iterator range to be sorted is not the
772     // leftmost part of the entire iterator range and the pivot is same as
773     // the highest element in the range to the left, then we know that all
774     // the elements in the range [first, pivot] would be equal to the pivot,
775     // assuming the equal elements are put on the left side when
776     // partitioned.  This also means that we do not need to sort the left
777     // side of the partition.
778     if (!__leftmost && !__comp(*(__first - difference_type(1)), *__first)) {
779       __first = std::__partition_with_equals_on_left<_AlgPolicy, _RandomAccessIterator, _Comp_ref>(
780           __first, __last, _Comp_ref(__comp));
781       continue;
782     }
783     // Use bitset partition only if asked for.
784     auto __ret =
785         _UseBitSetPartition
786             ? std::__bitset_partition<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp)
787             : std::__partition_with_equals_on_right<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp);
788     _RandomAccessIterator __i = __ret.first;
789     // [__first, __i) < *__i and *__i <= [__i+1, __last)
790     // If we were given a perfect partition, see if insertion sort is quick...
791     if (__ret.second) {
792       bool __fs = std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__first, __i, __comp);
793       if (std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__i + difference_type(1), __last, __comp)) {
794         if (__fs)
795           return;
796         __last = __i;
797         continue;
798       } else {
799         if (__fs) {
800           __first = ++__i;
801           continue;
802         }
803       }
804     }
805     // Sort the left partiton recursively and the right partition with tail recursion elimination.
806     std::__introsort<_AlgPolicy, _Compare, _RandomAccessIterator, _UseBitSetPartition>(
807         __first, __i, __comp, __depth, __leftmost);
808     __leftmost = false;
809     __first    = ++__i;
810   }
811 }
812 
813 template <typename _Number>
814 inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
815   if (__n == 0)
816     return 0;
817   if (sizeof(__n) <= sizeof(unsigned))
818     return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
819   if (sizeof(__n) <= sizeof(unsigned long))
820     return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
821   if (sizeof(__n) <= sizeof(unsigned long long))
822     return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
823 
824   _Number __log2 = 0;
825   while (__n > 1) {
826     __log2++;
827     __n >>= 1;
828   }
829   return __log2;
830 }
831 
832 template <class _Comp, class _RandomAccessIterator>
833 void __sort(_RandomAccessIterator, _RandomAccessIterator, _Comp);
834 
835 extern template _LIBCPP_FUNC_VIS void __sort<__less<char>&, char*>(char*, char*, __less<char>&);
836 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
837 extern template _LIBCPP_FUNC_VIS void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&);
838 #endif
839 extern template _LIBCPP_FUNC_VIS void __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&);
840 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&);
841 extern template _LIBCPP_FUNC_VIS void __sort<__less<short>&, short*>(short*, short*, __less<short>&);
842 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned short>&, unsigned short*>(unsigned short*, unsigned short*, __less<unsigned short>&);
843 extern template _LIBCPP_FUNC_VIS void __sort<__less<int>&, int*>(int*, int*, __less<int>&);
844 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned>&, unsigned*>(unsigned*, unsigned*, __less<unsigned>&);
845 extern template _LIBCPP_FUNC_VIS void __sort<__less<long>&, long*>(long*, long*, __less<long>&);
846 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long>&, unsigned long*>(unsigned long*, unsigned long*, __less<unsigned long>&);
847 extern template _LIBCPP_FUNC_VIS void __sort<__less<long long>&, long long*>(long long*, long long*, __less<long long>&);
848 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long long>&, unsigned long long*>(unsigned long long*, unsigned long long*, __less<unsigned long long>&);
849 extern template _LIBCPP_FUNC_VIS void __sort<__less<float>&, float*>(float*, float*, __less<float>&);
850 extern template _LIBCPP_FUNC_VIS void __sort<__less<double>&, double*>(double*, double*, __less<double>&);
851 extern template _LIBCPP_FUNC_VIS void __sort<__less<long double>&, long double*>(long double*, long double*, __less<long double>&);
852 
853 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
854 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
855 __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
856   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
857   difference_type __depth_limit = 2 * std::__log2i(__last - __first);
858 
859   // Only use bitset partitioning for arithmetic types.  We should also check
860   // that the default comparator is in use so that we are sure that there are no
861   // branches in the comparator.
862   std::__introsort<_AlgPolicy,
863                    _Comp&,
864                    _RandomAccessIterator,
865                    __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(
866       __first, __last, __comp, __depth_limit);
867 }
868 
869 template <class _Type, class... _Options>
870 using __is_any_of = _Or<is_same<_Type, _Options>...>;
871 
872 template <class _Type>
873 using __sort_is_specialized_in_library = __is_any_of<
874     _Type,
875     char,
876 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
877     wchar_t,
878 #endif
879     signed char,
880     unsigned char,
881     short,
882     unsigned short,
883     int,
884     unsigned int,
885     long,
886     unsigned long,
887     long long,
888     unsigned long long,
889     float,
890     double,
891     long double>;
892 
893 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
894 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, __less<_Type>& __comp) {
895   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
896 }
897 
898 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
899 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<_Type>&) {
900   __less<_Type> __comp;
901   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
902 }
903 
904 #if _LIBCPP_STD_VER >= 14
905 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
906 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<>&) {
907   __less<_Type> __comp;
908   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
909 }
910 #endif
911 
912 #if _LIBCPP_STD_VER >= 20
913 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
914 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, ranges::less&) {
915   __less<_Type> __comp;
916   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
917 }
918 #endif
919 
920 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
921 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
922 void __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
923   std::__debug_randomize_range<_AlgPolicy>(__first, __last);
924 
925   if (__libcpp_is_constant_evaluated()) {
926     std::__partial_sort<_AlgPolicy>(
927         std::__unwrap_iter(__first), std::__unwrap_iter(__last), std::__unwrap_iter(__last), __comp);
928   } else {
929     std::__sort_dispatch<_AlgPolicy>(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __comp);
930   }
931 }
932 
933 template <class _RandomAccessIterator, class _Comp>
934 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
935 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
936   std::__sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp);
937 }
938 
939 template <class _RandomAccessIterator>
940 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
941 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
942   std::sort(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>());
943 }
944 
945 _LIBCPP_END_NAMESPACE_STD
946 
947 #endif // _LIBCPP___ALGORITHM_SORT_H
948