xref: /llvm-project/libcxx/include/__algorithm/sort.h (revision 35f9fd64350e5fffba727a9cb1df336ccb39d3be)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___ALGORITHM_SORT_H
10 #define _LIBCPP___ALGORITHM_SORT_H
11 
12 #include <__algorithm/comp.h>
13 #include <__algorithm/comp_ref_type.h>
14 #include <__algorithm/iter_swap.h>
15 #include <__algorithm/iterator_operations.h>
16 #include <__algorithm/min_element.h>
17 #include <__algorithm/partial_sort.h>
18 #include <__algorithm/unwrap_iter.h>
19 #include <__assert>
20 #include <__bit/blsr.h>
21 #include <__bit/countl.h>
22 #include <__bit/countr.h>
23 #include <__config>
24 #include <__debug>
25 #include <__debug_utils/randomize_range.h>
26 #include <__functional/operations.h>
27 #include <__functional/ranges_operations.h>
28 #include <__iterator/iterator_traits.h>
29 #include <__type_traits/conditional.h>
30 #include <__type_traits/disjunction.h>
31 #include <__type_traits/is_arithmetic.h>
32 #include <__utility/move.h>
33 #include <__utility/pair.h>
34 #include <climits>
35 #include <cstdint>
36 
37 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
38 #  pragma GCC system_header
39 #endif
40 
41 _LIBCPP_BEGIN_NAMESPACE_STD
42 
43 // stable, 2-3 compares, 0-2 swaps
44 
45 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
46 _LIBCPP_HIDE_FROM_ABI
47 _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z,
48                                                _Compare __c) {
49   using _Ops = _IterOps<_AlgPolicy>;
50 
51   unsigned __r = 0;
52   if (!__c(*__y, *__x))   // if x <= y
53   {
54     if (!__c(*__z, *__y)) // if y <= z
55       return __r;         // x <= y && y <= z
56                           // x <= y && y > z
57     _Ops::iter_swap(__y, __z);     // x <= z && y < z
58     __r = 1;
59     if (__c(*__y, *__x))  // if x > y
60     {
61       _Ops::iter_swap(__x, __y);   // x < y && y <= z
62       __r = 2;
63     }
64     return __r;           // x <= y && y < z
65   }
66   if (__c(*__z, *__y))    // x > y, if y > z
67   {
68     _Ops::iter_swap(__x, __z);     // x < y && y < z
69     __r = 1;
70     return __r;
71   }
72   _Ops::iter_swap(__x, __y);       // x > y && y <= z
73   __r = 1;                // x < y && x <= z
74   if (__c(*__z, *__y))    // if y > z
75   {
76     _Ops::iter_swap(__y, __z);     // x <= y && y < z
77     __r = 2;
78   }
79   return __r;
80 }                         // x <= y && y <= z
81 
82 // stable, 3-6 compares, 0-5 swaps
83 
84 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
85 _LIBCPP_HIDE_FROM_ABI
86 void __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4,
87                  _Compare __c) {
88   using _Ops   = _IterOps<_AlgPolicy>;
89   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
90   if (__c(*__x4, *__x3)) {
91     _Ops::iter_swap(__x3, __x4);
92     if (__c(*__x3, *__x2)) {
93       _Ops::iter_swap(__x2, __x3);
94       if (__c(*__x2, *__x1)) {
95         _Ops::iter_swap(__x1, __x2);
96       }
97     }
98   }
99 }
100 
101 // stable, 4-10 compares, 0-9 swaps
102 
103 template <class _AlgPolicy, class _Comp, class _ForwardIterator>
104 _LIBCPP_HIDE_FROM_ABI void __sort5(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3,
105                                    _ForwardIterator __x4, _ForwardIterator __x5, _Comp __comp) {
106   using _Ops = _IterOps<_AlgPolicy>;
107 
108   std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp);
109   if (__comp(*__x5, *__x4)) {
110     _Ops::iter_swap(__x4, __x5);
111     if (__comp(*__x4, *__x3)) {
112       _Ops::iter_swap(__x3, __x4);
113       if (__comp(*__x3, *__x2)) {
114         _Ops::iter_swap(__x2, __x3);
115         if (__comp(*__x2, *__x1)) {
116           _Ops::iter_swap(__x1, __x2);
117         }
118       }
119     }
120   }
121 }
122 
123 // The comparator being simple is a prerequisite for using the branchless optimization.
124 template <class _Tp>
125 struct __is_simple_comparator : false_type {};
126 template <class _Tp>
127 struct __is_simple_comparator<__less<_Tp>&> : true_type {};
128 template <class _Tp>
129 struct __is_simple_comparator<less<_Tp>&> : true_type {};
130 template <class _Tp>
131 struct __is_simple_comparator<greater<_Tp>&> : true_type {};
132 #if _LIBCPP_STD_VER >= 20
133 template <>
134 struct __is_simple_comparator<ranges::less&> : true_type {};
135 template <>
136 struct __is_simple_comparator<ranges::greater&> : true_type {};
137 #endif
138 
139 template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type>
140 using __use_branchless_sort =
141     integral_constant<bool, __is_cpp17_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) &&
142                                 is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>;
143 
144 namespace __detail {
145 
146 // Size in bits for the bitset in use.
147 enum { __block_size = sizeof(uint64_t) * 8 };
148 
149 } // namespace __detail
150 
151 // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
152 template <class _Compare, class _RandomAccessIterator>
153 inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
154   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
155   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
156   bool __r = __c(*__x, *__y);
157   value_type __tmp = __r ? *__x : *__y;
158   *__y = __r ? *__y : *__x;
159   *__x = __tmp;
160 }
161 
162 // Ensures that *__x, *__y and *__z are ordered according to the comparator __c,
163 // under the assumption that *__y and *__z are already ordered.
164 template <class _Compare, class _RandomAccessIterator>
165 inline _LIBCPP_HIDE_FROM_ABI void __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y,
166                                                           _RandomAccessIterator __z, _Compare __c) {
167   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
168   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
169   bool __r = __c(*__z, *__x);
170   value_type __tmp = __r ? *__z : *__x;
171   *__z = __r ? *__x : *__z;
172   __r = __c(__tmp, *__y);
173   *__x = __r ? *__x : *__y;
174   *__y = __r ? *__y : __tmp;
175 }
176 
177 template <class, class _Compare, class _RandomAccessIterator>
178 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
179 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
180                          _Compare __c) {
181   std::__cond_swap<_Compare>(__x2, __x3, __c);
182   std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
183 }
184 
185 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
186 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
187 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
188                          _Compare __c) {
189   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
190 }
191 
192 template <class, class _Compare, class _RandomAccessIterator>
193 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
194 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
195                          _RandomAccessIterator __x4, _Compare __c) {
196   std::__cond_swap<_Compare>(__x1, __x3, __c);
197   std::__cond_swap<_Compare>(__x2, __x4, __c);
198   std::__cond_swap<_Compare>(__x1, __x2, __c);
199   std::__cond_swap<_Compare>(__x3, __x4, __c);
200   std::__cond_swap<_Compare>(__x2, __x3, __c);
201 }
202 
203 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
204 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
205 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
206                          _RandomAccessIterator __x4, _Compare __c) {
207   std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c);
208 }
209 
210 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
211 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
212 __sort5_maybe_branchless(
213     _RandomAccessIterator __x1,
214     _RandomAccessIterator __x2,
215     _RandomAccessIterator __x3,
216     _RandomAccessIterator __x4,
217     _RandomAccessIterator __x5,
218     _Compare __c) {
219   std::__cond_swap<_Compare>(__x1, __x2, __c);
220   std::__cond_swap<_Compare>(__x4, __x5, __c);
221   std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
222   std::__cond_swap<_Compare>(__x2, __x5, __c);
223   std::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
224   std::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
225 }
226 
227 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
228 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
229 __sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
230                          _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) {
231   std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>(
232       std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c);
233 }
234 
235 // Assumes size > 0
236 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
237 _LIBCPP_HIDE_FROM_ABI
238 _LIBCPP_CONSTEXPR_SINCE_CXX14 void __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last,
239                                                     _Compare __comp) {
240   _BidirectionalIterator __lm1 = __last;
241   for (--__lm1; __first != __lm1; ++__first) {
242     _BidirectionalIterator __i = std::__min_element<_Compare>(__first, __last, __comp);
243     if (__i != __first)
244       _IterOps<_AlgPolicy>::iter_swap(__first, __i);
245   }
246 }
247 
248 // Sort the iterator range [__first, __last) using the comparator __comp using
249 // the insertion sort algorithm.
250 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
251 _LIBCPP_HIDE_FROM_ABI
252 void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) {
253   using _Ops = _IterOps<_AlgPolicy>;
254 
255   typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
256   if (__first == __last)
257     return;
258   _BidirectionalIterator __i = __first;
259   for (++__i; __i != __last; ++__i) {
260     _BidirectionalIterator __j = __i;
261     --__j;
262     if (__comp(*__i, *__j)) {
263       value_type __t(_Ops::__iter_move(__i));
264       _BidirectionalIterator __k = __j;
265       __j                        = __i;
266       do {
267         *__j = _Ops::__iter_move(__k);
268         __j  = __k;
269       } while (__j != __first && __comp(__t, *--__k));
270       *__j = std::move(__t);
271     }
272   }
273 }
274 
275 // Sort the iterator range [__first, __last) using the comparator __comp using
276 // the insertion sort algorithm.  Insertion sort has two loops, outer and inner.
277 // The implementation below has no bounds check (unguarded) for the inner loop.
278 // Assumes that there is an element in the position (__first - 1) and that each
279 // element in the input range is greater or equal to the element at __first - 1.
280 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
281 _LIBCPP_HIDE_FROM_ABI void
282 __insertion_sort_unguarded(_RandomAccessIterator const __first, _RandomAccessIterator __last, _Compare __comp) {
283   using _Ops = _IterOps<_AlgPolicy>;
284   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
285   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
286   if (__first == __last)
287     return;
288   const _RandomAccessIterator __leftmost = __first - difference_type(1); (void)__leftmost; // can be unused when assertions are disabled
289   for (_RandomAccessIterator __i = __first + difference_type(1); __i != __last; ++__i) {
290     _RandomAccessIterator __j = __i - difference_type(1);
291     if (__comp(*__i, *__j)) {
292       value_type __t(_Ops::__iter_move(__i));
293       _RandomAccessIterator __k = __j;
294       __j = __i;
295       do {
296         *__j = _Ops::__iter_move(__k);
297         __j = __k;
298         _LIBCPP_ASSERT(__k != __leftmost, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
299       } while (__comp(__t, *--__k)); // No need for bounds check due to the assumption stated above.
300       *__j = std::move(__t);
301     }
302   }
303 }
304 
305 template <class _AlgPolicy, class _Comp, class _RandomAccessIterator>
306 _LIBCPP_HIDE_FROM_ABI bool __insertion_sort_incomplete(
307     _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
308   using _Ops = _IterOps<_AlgPolicy>;
309 
310   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
311   switch (__last - __first) {
312   case 0:
313   case 1:
314     return true;
315   case 2:
316     if (__comp(*--__last, *__first))
317       _Ops::iter_swap(__first, __last);
318     return true;
319   case 3:
320     std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
321     return true;
322   case 4:
323     std::__sort4_maybe_branchless<_AlgPolicy, _Comp>(
324         __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
325     return true;
326   case 5:
327     std::__sort5_maybe_branchless<_AlgPolicy, _Comp>(
328         __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
329         --__last, __comp);
330     return true;
331   }
332   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
333   _RandomAccessIterator __j = __first + difference_type(2);
334   std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
335   const unsigned __limit = 8;
336   unsigned __count = 0;
337   for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) {
338     if (__comp(*__i, *__j)) {
339       value_type __t(_Ops::__iter_move(__i));
340       _RandomAccessIterator __k = __j;
341       __j = __i;
342       do {
343         *__j = _Ops::__iter_move(__k);
344         __j = __k;
345       } while (__j != __first && __comp(__t, *--__k));
346       *__j = std::move(__t);
347       if (++__count == __limit)
348         return ++__i == __last;
349     }
350     __j = __i;
351   }
352   return true;
353 }
354 
355 template <class _AlgPolicy, class _RandomAccessIterator>
356 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
357     _RandomAccessIterator __first, _RandomAccessIterator __last, uint64_t& __left_bitset, uint64_t& __right_bitset) {
358   using _Ops = _IterOps<_AlgPolicy>;
359   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
360   // Swap one pair on each iteration as long as both bitsets have at least one
361   // element for swapping.
362   while (__left_bitset != 0 && __right_bitset != 0) {
363     difference_type __tz_left  = __libcpp_ctz(__left_bitset);
364     __left_bitset              = __libcpp_blsr(__left_bitset);
365     difference_type __tz_right = __libcpp_ctz(__right_bitset);
366     __right_bitset             = __libcpp_blsr(__right_bitset);
367     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
368   }
369 }
370 
371 template <class _Compare,
372           class _RandomAccessIterator,
373           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
374 inline _LIBCPP_HIDE_FROM_ABI void
375 __populate_left_bitset(_RandomAccessIterator __first, _Compare __comp, _ValueType& __pivot, uint64_t& __left_bitset) {
376   // Possible vectorization. With a proper "-march" flag, the following loop
377   // will be compiled into a set of SIMD instructions.
378   _RandomAccessIterator __iter = __first;
379   for (int __j = 0; __j < __detail::__block_size;) {
380     bool __comp_result = !__comp(*__iter, __pivot);
381     __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
382     __j++;
383     ++__iter;
384   }
385 }
386 
387 template <class _Compare,
388           class _RandomAccessIterator,
389           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
390 inline _LIBCPP_HIDE_FROM_ABI void
391 __populate_right_bitset(_RandomAccessIterator __lm1, _Compare __comp, _ValueType& __pivot, uint64_t& __right_bitset) {
392   // Possible vectorization. With a proper "-march" flag, the following loop
393   // will be compiled into a set of SIMD instructions.
394   _RandomAccessIterator __iter = __lm1;
395   for (int __j = 0; __j < __detail::__block_size;) {
396     bool __comp_result = __comp(*__iter, __pivot);
397     __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
398     __j++;
399     --__iter;
400   }
401 }
402 
403 template <class _AlgPolicy,
404           class _Compare,
405           class _RandomAccessIterator,
406           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
407 inline _LIBCPP_HIDE_FROM_ABI void __bitset_partition_partial_blocks(
408     _RandomAccessIterator& __first,
409     _RandomAccessIterator& __lm1,
410     _Compare __comp,
411     _ValueType& __pivot,
412     uint64_t& __left_bitset,
413     uint64_t& __right_bitset) {
414   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
415   difference_type __remaining_len = __lm1 - __first + 1;
416   difference_type __l_size;
417   difference_type __r_size;
418   if (__left_bitset == 0 && __right_bitset == 0) {
419     __l_size = __remaining_len / 2;
420     __r_size = __remaining_len - __l_size;
421   } else if (__left_bitset == 0) {
422     // We know at least one side is a full block.
423     __l_size = __remaining_len - __detail::__block_size;
424     __r_size = __detail::__block_size;
425   } else { // if (__right_bitset == 0)
426     __l_size = __detail::__block_size;
427     __r_size = __remaining_len - __detail::__block_size;
428   }
429   // Record the comparison outcomes for the elements currently on the left side.
430   if (__left_bitset == 0) {
431     _RandomAccessIterator __iter = __first;
432     for (int __j = 0; __j < __l_size; __j++) {
433       bool __comp_result = !__comp(*__iter, __pivot);
434       __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
435       ++__iter;
436     }
437   }
438   // Record the comparison outcomes for the elements currently on the right
439   // side.
440   if (__right_bitset == 0) {
441     _RandomAccessIterator __iter = __lm1;
442     for (int __j = 0; __j < __r_size; __j++) {
443       bool __comp_result = __comp(*__iter, __pivot);
444       __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
445       --__iter;
446     }
447   }
448   std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
449   __first += (__left_bitset == 0) ? __l_size : 0;
450   __lm1 -= (__right_bitset == 0) ? __r_size : 0;
451 }
452 
453 template <class _AlgPolicy, class _RandomAccessIterator>
454 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
455     _RandomAccessIterator& __first, _RandomAccessIterator& __lm1, uint64_t& __left_bitset, uint64_t& __right_bitset) {
456   using _Ops = _IterOps<_AlgPolicy>;
457   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
458   if (__left_bitset) {
459     // Swap within the left side.  Need to find set positions in the reverse
460     // order.
461     while (__left_bitset != 0) {
462       difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
463       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
464       _RandomAccessIterator __it = __first + __tz_left;
465       if (__it != __lm1) {
466         _Ops::iter_swap(__it, __lm1);
467       }
468       --__lm1;
469     }
470     __first = __lm1 + difference_type(1);
471   } else if (__right_bitset) {
472     // Swap within the right side.  Need to find set positions in the reverse
473     // order.
474     while (__right_bitset != 0) {
475       difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
476       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
477       _RandomAccessIterator __it = __lm1 - __tz_right;
478       if (__it != __first) {
479         _Ops::iter_swap(__it, __first);
480       }
481       ++__first;
482     }
483   }
484 }
485 
486 // Partition [__first, __last) using the comparator __comp.  *__first has the
487 // chosen pivot.  Elements that are equivalent are kept to the left of the
488 // pivot.  Returns the iterator for the pivot and a bool value which is true if
489 // the provided range is already sorted, false otherwise.  We assume that the
490 // length of the range is at least three elements.
491 //
492 // __bitset_partition uses bitsets for storing outcomes of the comparisons
493 // between the pivot and other elements.
494 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
495 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
496 __bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
497   using _Ops = _IterOps<_AlgPolicy>;
498   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
499   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
500   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
501   const _RandomAccessIterator __begin = __first;            // used for bounds checking, those are not moved around
502   const _RandomAccessIterator __end = __last; (void)__end;  //
503 
504   value_type __pivot(_Ops::__iter_move(__first));
505   // Find the first element greater than the pivot.
506   if (__comp(__pivot, *(__last - difference_type(1)))) {
507     // Not guarded since we know the last element is greater than the pivot.
508     do {
509       ++__first;
510       _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
511     } while (!__comp(__pivot, *__first));
512   } else {
513     while (++__first < __last && !__comp(__pivot, *__first)) {
514     }
515   }
516   // Find the last element less than or equal to the pivot.
517   if (__first < __last) {
518     // It will be always guarded because __introsort will do the median-of-three
519     // before calling this.
520     do {
521       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
522       --__last;
523     } while (__comp(__pivot, *__last));
524   }
525   // If the first element greater than the pivot is at or after the
526   // last element less than or equal to the pivot, then we have covered the
527   // entire range without swapping elements.  This implies the range is already
528   // partitioned.
529   bool __already_partitioned = __first >= __last;
530   if (!__already_partitioned) {
531     _Ops::iter_swap(__first, __last);
532     ++__first;
533   }
534 
535   // In [__first, __last) __last is not inclusive. From now on, it uses last
536   // minus one to be inclusive on both sides.
537   _RandomAccessIterator __lm1 = __last - difference_type(1);
538   uint64_t __left_bitset      = 0;
539   uint64_t __right_bitset     = 0;
540 
541   // Reminder: length = __lm1 - __first + 1.
542   while (__lm1 - __first >= 2 * __detail::__block_size - 1) {
543     // Record the comparison outcomes for the elements currently on the left
544     // side.
545     if (__left_bitset == 0)
546       std::__populate_left_bitset<_Compare>(__first, __comp, __pivot, __left_bitset);
547     // Record the comparison outcomes for the elements currently on the right
548     // side.
549     if (__right_bitset == 0)
550       std::__populate_right_bitset<_Compare>(__lm1, __comp, __pivot, __right_bitset);
551     // Swap the elements recorded to be the candidates for swapping in the
552     // bitsets.
553     std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
554     // Only advance the iterator if all the elements that need to be moved to
555     // other side were moved.
556     __first += (__left_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
557     __lm1 -= (__right_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
558   }
559   // Now, we have a less-than a block worth of elements on at least one of the
560   // sides.
561   std::__bitset_partition_partial_blocks<_AlgPolicy, _Compare>(
562       __first, __lm1, __comp, __pivot, __left_bitset, __right_bitset);
563   // At least one the bitsets would be empty.  For the non-empty one, we need to
564   // properly partition the elements that appear within that bitset.
565   std::__swap_bitmap_pos_within<_AlgPolicy>(__first, __lm1, __left_bitset, __right_bitset);
566 
567   // Move the pivot to its correct position.
568   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
569   if (__begin != __pivot_pos) {
570     *__begin = _Ops::__iter_move(__pivot_pos);
571   }
572   *__pivot_pos = std::move(__pivot);
573   return std::make_pair(__pivot_pos, __already_partitioned);
574 }
575 
576 // Partition [__first, __last) using the comparator __comp.  *__first has the
577 // chosen pivot.  Elements that are equivalent are kept to the right of the
578 // pivot.  Returns the iterator for the pivot and a bool value which is true if
579 // the provided range is already sorted, false otherwise.  We assume that the
580 // length of the range is at least three elements.
581 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
582 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
583 __partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
584   using _Ops = _IterOps<_AlgPolicy>;
585   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
586   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
587   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
588   const _RandomAccessIterator __begin = __first;            // used for bounds checking, those are not moved around
589   const _RandomAccessIterator __end = __last; (void)__end;  //
590   value_type __pivot(_Ops::__iter_move(__first));
591   // Find the first element greater or equal to the pivot.  It will be always
592   // guarded because __introsort will do the median-of-three before calling
593   // this.
594   do {
595     ++__first;
596     _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
597   } while (__comp(*__first, __pivot));
598 
599   // Find the last element less than the pivot.
600   if (__begin == __first - difference_type(1)) {
601     while (__first < __last && !__comp(*--__last, __pivot))
602       ;
603   } else {
604     // Guarded.
605     do {
606       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
607       --__last;
608     } while (!__comp(*__last, __pivot));
609   }
610 
611   // If the first element greater than or equal to the pivot is at or after the
612   // last element less than the pivot, then we have covered the entire range
613   // without swapping elements.  This implies the range is already partitioned.
614   bool __already_partitioned = __first >= __last;
615   // Go through the remaining elements.  Swap pairs of elements (one to the
616   // right of the pivot and the other to left of the pivot) that are not on the
617   // correct side of the pivot.
618   while (__first < __last) {
619     _Ops::iter_swap(__first, __last);
620     do {
621       ++__first;
622       _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
623     } while (__comp(*__first, __pivot));
624     do {
625       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
626       --__last;
627     } while (!__comp(*__last, __pivot));
628   }
629   // Move the pivot to its correct position.
630   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
631   if (__begin != __pivot_pos) {
632     *__begin = _Ops::__iter_move(__pivot_pos);
633   }
634   *__pivot_pos = std::move(__pivot);
635   return std::make_pair(__pivot_pos, __already_partitioned);
636 }
637 
638 // Similar to the above function.  Elements equivalent to the pivot are put to
639 // the left of the pivot.  Returns the iterator to the pivot element.
640 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
641 _LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
642 __partition_with_equals_on_left(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
643   using _Ops = _IterOps<_AlgPolicy>;
644   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
645   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
646   // TODO(LLVM18): Make __begin const, see https://reviews.llvm.org/D147089#4349748
647   _RandomAccessIterator __begin = __first;                  // used for bounds checking, those are not moved around
648   const _RandomAccessIterator __end = __last; (void)__end;  //
649   value_type __pivot(_Ops::__iter_move(__first));
650   if (__comp(__pivot, *(__last - difference_type(1)))) {
651     // Guarded.
652     do {
653       ++__first;
654       _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
655     } while (!__comp(__pivot, *__first));
656   } else {
657     while (++__first < __last && !__comp(__pivot, *__first)) {
658     }
659   }
660 
661   if (__first < __last) {
662     // It will be always guarded because __introsort will do the
663     // median-of-three before calling this.
664     do {
665       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
666       --__last;
667     } while (__comp(__pivot, *__last));
668   }
669   while (__first < __last) {
670     _Ops::iter_swap(__first, __last);
671     do {
672       ++__first;
673       _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
674     } while (!__comp(__pivot, *__first));
675     do {
676       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
677       --__last;
678     } while (__comp(__pivot, *__last));
679   }
680   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
681   if (__begin != __pivot_pos) {
682     *__begin = _Ops::__iter_move(__pivot_pos);
683   }
684   *__pivot_pos = std::move(__pivot);
685   return __first;
686 }
687 
688 // The main sorting function.  Implements introsort combined with other ideas:
689 //  - option of using block quick sort for partitioning,
690 //  - guarded and unguarded insertion sort for small lengths,
691 //  - Tuckey's ninther technique for computing the pivot,
692 //  - check on whether partition was not required.
693 // The implementation is partly based on Orson Peters' pattern-defeating
694 // quicksort, published at: <https://github.com/orlp/pdqsort>.
695 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _UseBitSetPartition>
696 void __introsort(_RandomAccessIterator __first,
697                  _RandomAccessIterator __last,
698                  _Compare __comp,
699                  typename iterator_traits<_RandomAccessIterator>::difference_type __depth,
700                  bool __leftmost = true) {
701   using _Ops = _IterOps<_AlgPolicy>;
702   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
703   using _Comp_ref = __comp_ref_type<_Compare>;
704   // Upper bound for using insertion sort for sorting.
705   _LIBCPP_CONSTEXPR difference_type __limit = 24;
706   // Lower bound for using Tuckey's ninther technique for median computation.
707   _LIBCPP_CONSTEXPR difference_type __ninther_threshold = 128;
708   while (true) {
709     difference_type __len = __last - __first;
710     switch (__len) {
711     case 0:
712     case 1:
713       return;
714     case 2:
715       if (__comp(*--__last, *__first))
716         _Ops::iter_swap(__first, __last);
717       return;
718     case 3:
719       std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
720       return;
721     case 4:
722       std::__sort4_maybe_branchless<_AlgPolicy, _Compare>(
723           __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
724       return;
725     case 5:
726       std::__sort5_maybe_branchless<_AlgPolicy, _Compare>(
727           __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
728           --__last, __comp);
729       return;
730     }
731     // Use insertion sort if the length of the range is below the specified limit.
732     if (__len < __limit) {
733       if (__leftmost) {
734         std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
735       } else {
736         std::__insertion_sort_unguarded<_AlgPolicy, _Compare>(__first, __last, __comp);
737       }
738       return;
739     }
740     if (__depth == 0) {
741       // Fallback to heap sort as Introsort suggests.
742       std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp);
743       return;
744     }
745     --__depth;
746     {
747       difference_type __half_len = __len / 2;
748       // Use Tuckey's ninther technique or median of 3 for pivot selection
749       // depending on the length of the range being sorted.
750       if (__len > __ninther_threshold) {
751         std::__sort3<_AlgPolicy, _Compare>(__first, __first + __half_len, __last - difference_type(1), __comp);
752         std::__sort3<_AlgPolicy, _Compare>(
753             __first + difference_type(1), __first + (__half_len - 1), __last - difference_type(2), __comp);
754         std::__sort3<_AlgPolicy, _Compare>(
755             __first + difference_type(2), __first + (__half_len + 1), __last - difference_type(3), __comp);
756         std::__sort3<_AlgPolicy, _Compare>(
757             __first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), __comp);
758         _Ops::iter_swap(__first, __first + __half_len);
759       } else {
760         std::__sort3<_AlgPolicy, _Compare>(__first + __half_len, __first, __last - difference_type(1), __comp);
761       }
762     }
763     // The elements to the left of the current iterator range are already
764     // sorted.  If the current iterator range to be sorted is not the
765     // leftmost part of the entire iterator range and the pivot is same as
766     // the highest element in the range to the left, then we know that all
767     // the elements in the range [first, pivot] would be equal to the pivot,
768     // assuming the equal elements are put on the left side when
769     // partitioned.  This also means that we do not need to sort the left
770     // side of the partition.
771     if (!__leftmost && !__comp(*(__first - difference_type(1)), *__first)) {
772       __first = std::__partition_with_equals_on_left<_AlgPolicy, _RandomAccessIterator, _Comp_ref>(
773           __first, __last, _Comp_ref(__comp));
774       continue;
775     }
776     // Use bitset partition only if asked for.
777     auto __ret =
778         _UseBitSetPartition
779             ? std::__bitset_partition<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp)
780             : std::__partition_with_equals_on_right<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp);
781     _RandomAccessIterator __i = __ret.first;
782     // [__first, __i) < *__i and *__i <= [__i+1, __last)
783     // If we were given a perfect partition, see if insertion sort is quick...
784     if (__ret.second) {
785       bool __fs = std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__first, __i, __comp);
786       if (std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__i + difference_type(1), __last, __comp)) {
787         if (__fs)
788           return;
789         __last = __i;
790         continue;
791       } else {
792         if (__fs) {
793           __first = ++__i;
794           continue;
795         }
796       }
797     }
798     // Sort the left partiton recursively and the right partition with tail recursion elimination.
799     std::__introsort<_AlgPolicy, _Compare, _RandomAccessIterator, _UseBitSetPartition>(
800         __first, __i, __comp, __depth, __leftmost);
801     __leftmost = false;
802     __first    = ++__i;
803   }
804 }
805 
806 template <typename _Number>
807 inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
808   if (__n == 0)
809     return 0;
810   if (sizeof(__n) <= sizeof(unsigned))
811     return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
812   if (sizeof(__n) <= sizeof(unsigned long))
813     return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
814   if (sizeof(__n) <= sizeof(unsigned long long))
815     return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
816 
817   _Number __log2 = 0;
818   while (__n > 1) {
819     __log2++;
820     __n >>= 1;
821   }
822   return __log2;
823 }
824 
825 template <class _Comp, class _RandomAccessIterator>
826 void __sort(_RandomAccessIterator, _RandomAccessIterator, _Comp);
827 
828 extern template _LIBCPP_FUNC_VIS void __sort<__less<char>&, char*>(char*, char*, __less<char>&);
829 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
830 extern template _LIBCPP_FUNC_VIS void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&);
831 #endif
832 extern template _LIBCPP_FUNC_VIS void __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&);
833 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&);
834 extern template _LIBCPP_FUNC_VIS void __sort<__less<short>&, short*>(short*, short*, __less<short>&);
835 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned short>&, unsigned short*>(unsigned short*, unsigned short*, __less<unsigned short>&);
836 extern template _LIBCPP_FUNC_VIS void __sort<__less<int>&, int*>(int*, int*, __less<int>&);
837 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned>&, unsigned*>(unsigned*, unsigned*, __less<unsigned>&);
838 extern template _LIBCPP_FUNC_VIS void __sort<__less<long>&, long*>(long*, long*, __less<long>&);
839 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long>&, unsigned long*>(unsigned long*, unsigned long*, __less<unsigned long>&);
840 extern template _LIBCPP_FUNC_VIS void __sort<__less<long long>&, long long*>(long long*, long long*, __less<long long>&);
841 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long long>&, unsigned long long*>(unsigned long long*, unsigned long long*, __less<unsigned long long>&);
842 extern template _LIBCPP_FUNC_VIS void __sort<__less<float>&, float*>(float*, float*, __less<float>&);
843 extern template _LIBCPP_FUNC_VIS void __sort<__less<double>&, double*>(double*, double*, __less<double>&);
844 extern template _LIBCPP_FUNC_VIS void __sort<__less<long double>&, long double*>(long double*, long double*, __less<long double>&);
845 
846 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
847 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
848 __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
849   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
850   difference_type __depth_limit = 2 * std::__log2i(__last - __first);
851 
852   // Only use bitset partitioning for arithmetic types.  We should also check
853   // that the default comparator is in use so that we are sure that there are no
854   // branches in the comparator.
855   std::__introsort<_AlgPolicy,
856                    _Comp&,
857                    _RandomAccessIterator,
858                    __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(
859       __first, __last, __comp, __depth_limit);
860 }
861 
862 template <class _Type, class... _Options>
863 using __is_any_of = _Or<is_same<_Type, _Options>...>;
864 
865 template <class _Type>
866 using __sort_is_specialized_in_library = __is_any_of<
867     _Type,
868     char,
869 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
870     wchar_t,
871 #endif
872     signed char,
873     unsigned char,
874     short,
875     unsigned short,
876     int,
877     unsigned int,
878     long,
879     unsigned long,
880     long long,
881     unsigned long long,
882     float,
883     double,
884     long double>;
885 
886 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
887 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, __less<_Type>& __comp) {
888   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
889 }
890 
891 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
892 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<_Type>&) {
893   __less<_Type> __comp;
894   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
895 }
896 
897 #if _LIBCPP_STD_VER >= 14
898 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
899 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<>&) {
900   __less<_Type> __comp;
901   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
902 }
903 #endif
904 
905 #if _LIBCPP_STD_VER >= 20
906 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
907 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, ranges::less&) {
908   __less<_Type> __comp;
909   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
910 }
911 #endif
912 
913 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
914 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
915 void __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
916   std::__debug_randomize_range<_AlgPolicy>(__first, __last);
917 
918   if (__libcpp_is_constant_evaluated()) {
919     std::__partial_sort<_AlgPolicy>(
920         std::__unwrap_iter(__first), std::__unwrap_iter(__last), std::__unwrap_iter(__last), __comp);
921   } else {
922     std::__sort_dispatch<_AlgPolicy>(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __comp);
923   }
924 }
925 
926 template <class _RandomAccessIterator, class _Comp>
927 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
928 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
929   std::__sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp);
930 }
931 
932 template <class _RandomAccessIterator>
933 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
934 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
935   std::sort(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>());
936 }
937 
938 _LIBCPP_END_NAMESPACE_STD
939 
940 #endif // _LIBCPP___ALGORITHM_SORT_H
941