xref: /llvm-project/libcxx/include/__algorithm/sort.h (revision 7e1ee1e10dc0b77914de714b8f420c487e5705c6)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___ALGORITHM_SORT_H
10 #define _LIBCPP___ALGORITHM_SORT_H
11 
12 #include <__algorithm/comp.h>
13 #include <__algorithm/comp_ref_type.h>
14 #include <__algorithm/iter_swap.h>
15 #include <__algorithm/iterator_operations.h>
16 #include <__algorithm/min_element.h>
17 #include <__algorithm/partial_sort.h>
18 #include <__algorithm/unwrap_iter.h>
19 #include <__assert>
20 #include <__bit/blsr.h>
21 #include <__bit/countl.h>
22 #include <__bit/countr.h>
23 #include <__config>
24 #include <__debug>
25 #include <__debug_utils/randomize_range.h>
26 #include <__debug_utils/strict_weak_ordering_check.h>
27 #include <__functional/operations.h>
28 #include <__functional/ranges_operations.h>
29 #include <__iterator/iterator_traits.h>
30 #include <__type_traits/conditional.h>
31 #include <__type_traits/disjunction.h>
32 #include <__type_traits/is_arithmetic.h>
33 #include <__utility/move.h>
34 #include <__utility/pair.h>
35 #include <climits>
36 #include <cstdint>
37 
38 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
39 #  pragma GCC system_header
40 #endif
41 
42 _LIBCPP_BEGIN_NAMESPACE_STD
43 
44 // stable, 2-3 compares, 0-2 swaps
45 
46 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
47 _LIBCPP_HIDE_FROM_ABI
48 _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z,
49                                                _Compare __c) {
50   using _Ops = _IterOps<_AlgPolicy>;
51 
52   unsigned __r = 0;
53   if (!__c(*__y, *__x))   // if x <= y
54   {
55     if (!__c(*__z, *__y)) // if y <= z
56       return __r;         // x <= y && y <= z
57                           // x <= y && y > z
58     _Ops::iter_swap(__y, __z);     // x <= z && y < z
59     __r = 1;
60     if (__c(*__y, *__x))  // if x > y
61     {
62       _Ops::iter_swap(__x, __y);   // x < y && y <= z
63       __r = 2;
64     }
65     return __r;           // x <= y && y < z
66   }
67   if (__c(*__z, *__y))    // x > y, if y > z
68   {
69     _Ops::iter_swap(__x, __z);     // x < y && y < z
70     __r = 1;
71     return __r;
72   }
73   _Ops::iter_swap(__x, __y);       // x > y && y <= z
74   __r = 1;                // x < y && x <= z
75   if (__c(*__z, *__y))    // if y > z
76   {
77     _Ops::iter_swap(__y, __z);     // x <= y && y < z
78     __r = 2;
79   }
80   return __r;
81 }                         // x <= y && y <= z
82 
83 // stable, 3-6 compares, 0-5 swaps
84 
85 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
86 _LIBCPP_HIDE_FROM_ABI
87 void __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4,
88                  _Compare __c) {
89   using _Ops   = _IterOps<_AlgPolicy>;
90   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
91   if (__c(*__x4, *__x3)) {
92     _Ops::iter_swap(__x3, __x4);
93     if (__c(*__x3, *__x2)) {
94       _Ops::iter_swap(__x2, __x3);
95       if (__c(*__x2, *__x1)) {
96         _Ops::iter_swap(__x1, __x2);
97       }
98     }
99   }
100 }
101 
102 // stable, 4-10 compares, 0-9 swaps
103 
104 template <class _AlgPolicy, class _Comp, class _ForwardIterator>
105 _LIBCPP_HIDE_FROM_ABI void __sort5(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3,
106                                    _ForwardIterator __x4, _ForwardIterator __x5, _Comp __comp) {
107   using _Ops = _IterOps<_AlgPolicy>;
108 
109   std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp);
110   if (__comp(*__x5, *__x4)) {
111     _Ops::iter_swap(__x4, __x5);
112     if (__comp(*__x4, *__x3)) {
113       _Ops::iter_swap(__x3, __x4);
114       if (__comp(*__x3, *__x2)) {
115         _Ops::iter_swap(__x2, __x3);
116         if (__comp(*__x2, *__x1)) {
117           _Ops::iter_swap(__x1, __x2);
118         }
119       }
120     }
121   }
122 }
123 
124 // The comparator being simple is a prerequisite for using the branchless optimization.
125 template <class _Tp>
126 struct __is_simple_comparator : false_type {};
127 template <class _Tp>
128 struct __is_simple_comparator<__less<_Tp>&> : true_type {};
129 template <class _Tp>
130 struct __is_simple_comparator<less<_Tp>&> : true_type {};
131 template <class _Tp>
132 struct __is_simple_comparator<greater<_Tp>&> : true_type {};
133 #if _LIBCPP_STD_VER >= 20
134 template <>
135 struct __is_simple_comparator<ranges::less&> : true_type {};
136 template <>
137 struct __is_simple_comparator<ranges::greater&> : true_type {};
138 #endif
139 
140 template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type>
141 using __use_branchless_sort =
142     integral_constant<bool, __libcpp_is_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) &&
143                                 is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>;
144 
145 namespace __detail {
146 
147 // Size in bits for the bitset in use.
148 enum { __block_size = sizeof(uint64_t) * 8 };
149 
150 } // namespace __detail
151 
152 // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
153 template <class _Compare, class _RandomAccessIterator>
154 inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
155   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
156   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
157   bool __r = __c(*__x, *__y);
158   value_type __tmp = __r ? *__x : *__y;
159   *__y = __r ? *__y : *__x;
160   *__x = __tmp;
161 }
162 
163 // Ensures that *__x, *__y and *__z are ordered according to the comparator __c,
164 // under the assumption that *__y and *__z are already ordered.
165 template <class _Compare, class _RandomAccessIterator>
166 inline _LIBCPP_HIDE_FROM_ABI void __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y,
167                                                           _RandomAccessIterator __z, _Compare __c) {
168   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
169   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
170   bool __r = __c(*__z, *__x);
171   value_type __tmp = __r ? *__z : *__x;
172   *__z = __r ? *__x : *__z;
173   __r = __c(__tmp, *__y);
174   *__x = __r ? *__x : *__y;
175   *__y = __r ? *__y : __tmp;
176 }
177 
178 template <class, class _Compare, class _RandomAccessIterator>
179 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
180 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
181                          _Compare __c) {
182   std::__cond_swap<_Compare>(__x2, __x3, __c);
183   std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
184 }
185 
186 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
187 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
188 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
189                          _Compare __c) {
190   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
191 }
192 
193 template <class, class _Compare, class _RandomAccessIterator>
194 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
195 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
196                          _RandomAccessIterator __x4, _Compare __c) {
197   std::__cond_swap<_Compare>(__x1, __x3, __c);
198   std::__cond_swap<_Compare>(__x2, __x4, __c);
199   std::__cond_swap<_Compare>(__x1, __x2, __c);
200   std::__cond_swap<_Compare>(__x3, __x4, __c);
201   std::__cond_swap<_Compare>(__x2, __x3, __c);
202 }
203 
204 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
205 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
206 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
207                          _RandomAccessIterator __x4, _Compare __c) {
208   std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c);
209 }
210 
211 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
212 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
213 __sort5_maybe_branchless(
214     _RandomAccessIterator __x1,
215     _RandomAccessIterator __x2,
216     _RandomAccessIterator __x3,
217     _RandomAccessIterator __x4,
218     _RandomAccessIterator __x5,
219     _Compare __c) {
220   std::__cond_swap<_Compare>(__x1, __x2, __c);
221   std::__cond_swap<_Compare>(__x4, __x5, __c);
222   std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
223   std::__cond_swap<_Compare>(__x2, __x5, __c);
224   std::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
225   std::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
226 }
227 
228 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
229 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
230 __sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
231                          _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) {
232   std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>(
233       std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c);
234 }
235 
236 // Assumes size > 0
237 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
238 _LIBCPP_HIDE_FROM_ABI
239 _LIBCPP_CONSTEXPR_SINCE_CXX14 void __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last,
240                                                     _Compare __comp) {
241   _BidirectionalIterator __lm1 = __last;
242   for (--__lm1; __first != __lm1; ++__first) {
243     _BidirectionalIterator __i = std::__min_element<_Compare>(__first, __last, __comp);
244     if (__i != __first)
245       _IterOps<_AlgPolicy>::iter_swap(__first, __i);
246   }
247 }
248 
249 // Sort the iterator range [__first, __last) using the comparator __comp using
250 // the insertion sort algorithm.
251 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
252 _LIBCPP_HIDE_FROM_ABI
253 void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) {
254   using _Ops = _IterOps<_AlgPolicy>;
255 
256   typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
257   if (__first == __last)
258     return;
259   _BidirectionalIterator __i = __first;
260   for (++__i; __i != __last; ++__i) {
261     _BidirectionalIterator __j = __i;
262     --__j;
263     if (__comp(*__i, *__j)) {
264       value_type __t(_Ops::__iter_move(__i));
265       _BidirectionalIterator __k = __j;
266       __j                        = __i;
267       do {
268         *__j = _Ops::__iter_move(__k);
269         __j  = __k;
270       } while (__j != __first && __comp(__t, *--__k));
271       *__j = std::move(__t);
272     }
273   }
274 }
275 
276 // Sort the iterator range [__first, __last) using the comparator __comp using
277 // the insertion sort algorithm.  Insertion sort has two loops, outer and inner.
278 // The implementation below has no bounds check (unguarded) for the inner loop.
279 // Assumes that there is an element in the position (__first - 1) and that each
280 // element in the input range is greater or equal to the element at __first - 1.
281 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
282 _LIBCPP_HIDE_FROM_ABI void
283 __insertion_sort_unguarded(_RandomAccessIterator const __first, _RandomAccessIterator __last, _Compare __comp) {
284   using _Ops = _IterOps<_AlgPolicy>;
285   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
286   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
287   if (__first == __last)
288     return;
289   const _RandomAccessIterator __leftmost = __first - difference_type(1); (void)__leftmost; // can be unused when assertions are disabled
290   for (_RandomAccessIterator __i = __first + difference_type(1); __i != __last; ++__i) {
291     _RandomAccessIterator __j = __i - difference_type(1);
292     if (__comp(*__i, *__j)) {
293       value_type __t(_Ops::__iter_move(__i));
294       _RandomAccessIterator __k = __j;
295       __j = __i;
296       do {
297         *__j = _Ops::__iter_move(__k);
298         __j = __k;
299         _LIBCPP_ASSERT(__k != __leftmost, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
300       } while (__comp(__t, *--__k)); // No need for bounds check due to the assumption stated above.
301       *__j = std::move(__t);
302     }
303   }
304 }
305 
306 template <class _AlgPolicy, class _Comp, class _RandomAccessIterator>
307 _LIBCPP_HIDE_FROM_ABI bool __insertion_sort_incomplete(
308     _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
309   using _Ops = _IterOps<_AlgPolicy>;
310 
311   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
312   switch (__last - __first) {
313   case 0:
314   case 1:
315     return true;
316   case 2:
317     if (__comp(*--__last, *__first))
318       _Ops::iter_swap(__first, __last);
319     return true;
320   case 3:
321     std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
322     return true;
323   case 4:
324     std::__sort4_maybe_branchless<_AlgPolicy, _Comp>(
325         __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
326     return true;
327   case 5:
328     std::__sort5_maybe_branchless<_AlgPolicy, _Comp>(
329         __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
330         --__last, __comp);
331     return true;
332   }
333   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
334   _RandomAccessIterator __j = __first + difference_type(2);
335   std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
336   const unsigned __limit = 8;
337   unsigned __count = 0;
338   for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) {
339     if (__comp(*__i, *__j)) {
340       value_type __t(_Ops::__iter_move(__i));
341       _RandomAccessIterator __k = __j;
342       __j = __i;
343       do {
344         *__j = _Ops::__iter_move(__k);
345         __j = __k;
346       } while (__j != __first && __comp(__t, *--__k));
347       *__j = std::move(__t);
348       if (++__count == __limit)
349         return ++__i == __last;
350     }
351     __j = __i;
352   }
353   return true;
354 }
355 
356 template <class _AlgPolicy, class _RandomAccessIterator>
357 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
358     _RandomAccessIterator __first, _RandomAccessIterator __last, uint64_t& __left_bitset, uint64_t& __right_bitset) {
359   using _Ops = _IterOps<_AlgPolicy>;
360   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
361   // Swap one pair on each iteration as long as both bitsets have at least one
362   // element for swapping.
363   while (__left_bitset != 0 && __right_bitset != 0) {
364     difference_type __tz_left  = __libcpp_ctz(__left_bitset);
365     __left_bitset              = __libcpp_blsr(__left_bitset);
366     difference_type __tz_right = __libcpp_ctz(__right_bitset);
367     __right_bitset             = __libcpp_blsr(__right_bitset);
368     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
369   }
370 }
371 
372 template <class _Compare,
373           class _RandomAccessIterator,
374           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
375 inline _LIBCPP_HIDE_FROM_ABI void
376 __populate_left_bitset(_RandomAccessIterator __first, _Compare __comp, _ValueType& __pivot, uint64_t& __left_bitset) {
377   // Possible vectorization. With a proper "-march" flag, the following loop
378   // will be compiled into a set of SIMD instructions.
379   _RandomAccessIterator __iter = __first;
380   for (int __j = 0; __j < __detail::__block_size;) {
381     bool __comp_result = !__comp(*__iter, __pivot);
382     __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
383     __j++;
384     ++__iter;
385   }
386 }
387 
388 template <class _Compare,
389           class _RandomAccessIterator,
390           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
391 inline _LIBCPP_HIDE_FROM_ABI void
392 __populate_right_bitset(_RandomAccessIterator __lm1, _Compare __comp, _ValueType& __pivot, uint64_t& __right_bitset) {
393   // Possible vectorization. With a proper "-march" flag, the following loop
394   // will be compiled into a set of SIMD instructions.
395   _RandomAccessIterator __iter = __lm1;
396   for (int __j = 0; __j < __detail::__block_size;) {
397     bool __comp_result = __comp(*__iter, __pivot);
398     __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
399     __j++;
400     --__iter;
401   }
402 }
403 
404 template <class _AlgPolicy,
405           class _Compare,
406           class _RandomAccessIterator,
407           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
408 inline _LIBCPP_HIDE_FROM_ABI void __bitset_partition_partial_blocks(
409     _RandomAccessIterator& __first,
410     _RandomAccessIterator& __lm1,
411     _Compare __comp,
412     _ValueType& __pivot,
413     uint64_t& __left_bitset,
414     uint64_t& __right_bitset) {
415   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
416   difference_type __remaining_len = __lm1 - __first + 1;
417   difference_type __l_size;
418   difference_type __r_size;
419   if (__left_bitset == 0 && __right_bitset == 0) {
420     __l_size = __remaining_len / 2;
421     __r_size = __remaining_len - __l_size;
422   } else if (__left_bitset == 0) {
423     // We know at least one side is a full block.
424     __l_size = __remaining_len - __detail::__block_size;
425     __r_size = __detail::__block_size;
426   } else { // if (__right_bitset == 0)
427     __l_size = __detail::__block_size;
428     __r_size = __remaining_len - __detail::__block_size;
429   }
430   // Record the comparison outcomes for the elements currently on the left side.
431   if (__left_bitset == 0) {
432     _RandomAccessIterator __iter = __first;
433     for (int __j = 0; __j < __l_size; __j++) {
434       bool __comp_result = !__comp(*__iter, __pivot);
435       __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
436       ++__iter;
437     }
438   }
439   // Record the comparison outcomes for the elements currently on the right
440   // side.
441   if (__right_bitset == 0) {
442     _RandomAccessIterator __iter = __lm1;
443     for (int __j = 0; __j < __r_size; __j++) {
444       bool __comp_result = __comp(*__iter, __pivot);
445       __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
446       --__iter;
447     }
448   }
449   std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
450   __first += (__left_bitset == 0) ? __l_size : 0;
451   __lm1 -= (__right_bitset == 0) ? __r_size : 0;
452 }
453 
454 template <class _AlgPolicy, class _RandomAccessIterator>
455 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
456     _RandomAccessIterator& __first, _RandomAccessIterator& __lm1, uint64_t& __left_bitset, uint64_t& __right_bitset) {
457   using _Ops = _IterOps<_AlgPolicy>;
458   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
459   if (__left_bitset) {
460     // Swap within the left side.  Need to find set positions in the reverse
461     // order.
462     while (__left_bitset != 0) {
463       difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
464       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
465       _RandomAccessIterator __it = __first + __tz_left;
466       if (__it != __lm1) {
467         _Ops::iter_swap(__it, __lm1);
468       }
469       --__lm1;
470     }
471     __first = __lm1 + difference_type(1);
472   } else if (__right_bitset) {
473     // Swap within the right side.  Need to find set positions in the reverse
474     // order.
475     while (__right_bitset != 0) {
476       difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
477       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
478       _RandomAccessIterator __it = __lm1 - __tz_right;
479       if (__it != __first) {
480         _Ops::iter_swap(__it, __first);
481       }
482       ++__first;
483     }
484   }
485 }
486 
487 // Partition [__first, __last) using the comparator __comp.  *__first has the
488 // chosen pivot.  Elements that are equivalent are kept to the left of the
489 // pivot.  Returns the iterator for the pivot and a bool value which is true if
490 // the provided range is already sorted, false otherwise.  We assume that the
491 // length of the range is at least three elements.
492 //
493 // __bitset_partition uses bitsets for storing outcomes of the comparisons
494 // between the pivot and other elements.
495 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
496 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
497 __bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
498   using _Ops = _IterOps<_AlgPolicy>;
499   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
500   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
501   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
502   const _RandomAccessIterator __begin = __first;            // used for bounds checking, those are not moved around
503   const _RandomAccessIterator __end = __last; (void)__end;  //
504 
505   value_type __pivot(_Ops::__iter_move(__first));
506   // Find the first element greater than the pivot.
507   if (__comp(__pivot, *(__last - difference_type(1)))) {
508     // Not guarded since we know the last element is greater than the pivot.
509     do {
510       ++__first;
511       _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
512     } while (!__comp(__pivot, *__first));
513   } else {
514     while (++__first < __last && !__comp(__pivot, *__first)) {
515     }
516   }
517   // Find the last element less than or equal to the pivot.
518   if (__first < __last) {
519     // It will be always guarded because __introsort will do the median-of-three
520     // before calling this.
521     do {
522       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
523       --__last;
524     } while (__comp(__pivot, *__last));
525   }
526   // If the first element greater than the pivot is at or after the
527   // last element less than or equal to the pivot, then we have covered the
528   // entire range without swapping elements.  This implies the range is already
529   // partitioned.
530   bool __already_partitioned = __first >= __last;
531   if (!__already_partitioned) {
532     _Ops::iter_swap(__first, __last);
533     ++__first;
534   }
535 
536   // In [__first, __last) __last is not inclusive. From now on, it uses last
537   // minus one to be inclusive on both sides.
538   _RandomAccessIterator __lm1 = __last - difference_type(1);
539   uint64_t __left_bitset      = 0;
540   uint64_t __right_bitset     = 0;
541 
542   // Reminder: length = __lm1 - __first + 1.
543   while (__lm1 - __first >= 2 * __detail::__block_size - 1) {
544     // Record the comparison outcomes for the elements currently on the left
545     // side.
546     if (__left_bitset == 0)
547       std::__populate_left_bitset<_Compare>(__first, __comp, __pivot, __left_bitset);
548     // Record the comparison outcomes for the elements currently on the right
549     // side.
550     if (__right_bitset == 0)
551       std::__populate_right_bitset<_Compare>(__lm1, __comp, __pivot, __right_bitset);
552     // Swap the elements recorded to be the candidates for swapping in the
553     // bitsets.
554     std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
555     // Only advance the iterator if all the elements that need to be moved to
556     // other side were moved.
557     __first += (__left_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
558     __lm1 -= (__right_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
559   }
560   // Now, we have a less-than a block worth of elements on at least one of the
561   // sides.
562   std::__bitset_partition_partial_blocks<_AlgPolicy, _Compare>(
563       __first, __lm1, __comp, __pivot, __left_bitset, __right_bitset);
564   // At least one the bitsets would be empty.  For the non-empty one, we need to
565   // properly partition the elements that appear within that bitset.
566   std::__swap_bitmap_pos_within<_AlgPolicy>(__first, __lm1, __left_bitset, __right_bitset);
567 
568   // Move the pivot to its correct position.
569   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
570   if (__begin != __pivot_pos) {
571     *__begin = _Ops::__iter_move(__pivot_pos);
572   }
573   *__pivot_pos = std::move(__pivot);
574   return std::make_pair(__pivot_pos, __already_partitioned);
575 }
576 
577 // Partition [__first, __last) using the comparator __comp.  *__first has the
578 // chosen pivot.  Elements that are equivalent are kept to the right of the
579 // pivot.  Returns the iterator for the pivot and a bool value which is true if
580 // the provided range is already sorted, false otherwise.  We assume that the
581 // length of the range is at least three elements.
582 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
583 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
584 __partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
585   using _Ops = _IterOps<_AlgPolicy>;
586   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
587   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
588   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
589   const _RandomAccessIterator __begin = __first;            // used for bounds checking, those are not moved around
590   const _RandomAccessIterator __end = __last; (void)__end;  //
591   value_type __pivot(_Ops::__iter_move(__first));
592   // Find the first element greater or equal to the pivot.  It will be always
593   // guarded because __introsort will do the median-of-three before calling
594   // this.
595   do {
596     ++__first;
597     _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
598   } while (__comp(*__first, __pivot));
599 
600   // Find the last element less than the pivot.
601   if (__begin == __first - difference_type(1)) {
602     while (__first < __last && !__comp(*--__last, __pivot))
603       ;
604   } else {
605     // Guarded.
606     do {
607       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
608       --__last;
609     } while (!__comp(*__last, __pivot));
610   }
611 
612   // If the first element greater than or equal to the pivot is at or after the
613   // last element less than the pivot, then we have covered the entire range
614   // without swapping elements.  This implies the range is already partitioned.
615   bool __already_partitioned = __first >= __last;
616   // Go through the remaining elements.  Swap pairs of elements (one to the
617   // right of the pivot and the other to left of the pivot) that are not on the
618   // correct side of the pivot.
619   while (__first < __last) {
620     _Ops::iter_swap(__first, __last);
621     do {
622       ++__first;
623       _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
624     } while (__comp(*__first, __pivot));
625     do {
626       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
627       --__last;
628     } while (!__comp(*__last, __pivot));
629   }
630   // Move the pivot to its correct position.
631   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
632   if (__begin != __pivot_pos) {
633     *__begin = _Ops::__iter_move(__pivot_pos);
634   }
635   *__pivot_pos = std::move(__pivot);
636   return std::make_pair(__pivot_pos, __already_partitioned);
637 }
638 
639 // Similar to the above function.  Elements equivalent to the pivot are put to
640 // the left of the pivot.  Returns the iterator to the pivot element.
641 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
642 _LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
643 __partition_with_equals_on_left(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
644   using _Ops = _IterOps<_AlgPolicy>;
645   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
646   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
647   // TODO(LLVM18): Make __begin const, see https://reviews.llvm.org/D147089#4349748
648   _RandomAccessIterator __begin = __first;                  // used for bounds checking, those are not moved around
649   const _RandomAccessIterator __end = __last; (void)__end;  //
650   value_type __pivot(_Ops::__iter_move(__first));
651   if (__comp(__pivot, *(__last - difference_type(1)))) {
652     // Guarded.
653     do {
654       ++__first;
655       _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
656     } while (!__comp(__pivot, *__first));
657   } else {
658     while (++__first < __last && !__comp(__pivot, *__first)) {
659     }
660   }
661 
662   if (__first < __last) {
663     // It will be always guarded because __introsort will do the
664     // median-of-three before calling this.
665     do {
666       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
667       --__last;
668     } while (__comp(__pivot, *__last));
669   }
670   while (__first < __last) {
671     _Ops::iter_swap(__first, __last);
672     do {
673       ++__first;
674       _LIBCPP_ASSERT(__first != __end, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
675     } while (!__comp(__pivot, *__first));
676     do {
677       _LIBCPP_ASSERT(__last != __begin, "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
678       --__last;
679     } while (__comp(__pivot, *__last));
680   }
681   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
682   if (__begin != __pivot_pos) {
683     *__begin = _Ops::__iter_move(__pivot_pos);
684   }
685   *__pivot_pos = std::move(__pivot);
686   return __first;
687 }
688 
689 // The main sorting function.  Implements introsort combined with other ideas:
690 //  - option of using block quick sort for partitioning,
691 //  - guarded and unguarded insertion sort for small lengths,
692 //  - Tuckey's ninther technique for computing the pivot,
693 //  - check on whether partition was not required.
694 // The implementation is partly based on Orson Peters' pattern-defeating
695 // quicksort, published at: <https://github.com/orlp/pdqsort>.
696 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _UseBitSetPartition>
697 void __introsort(_RandomAccessIterator __first,
698                  _RandomAccessIterator __last,
699                  _Compare __comp,
700                  typename iterator_traits<_RandomAccessIterator>::difference_type __depth,
701                  bool __leftmost = true) {
702   using _Ops = _IterOps<_AlgPolicy>;
703   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
704   using _Comp_ref = __comp_ref_type<_Compare>;
705   // Upper bound for using insertion sort for sorting.
706   _LIBCPP_CONSTEXPR difference_type __limit = 24;
707   // Lower bound for using Tuckey's ninther technique for median computation.
708   _LIBCPP_CONSTEXPR difference_type __ninther_threshold = 128;
709   while (true) {
710     difference_type __len = __last - __first;
711     switch (__len) {
712     case 0:
713     case 1:
714       return;
715     case 2:
716       if (__comp(*--__last, *__first))
717         _Ops::iter_swap(__first, __last);
718       return;
719     case 3:
720       std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
721       return;
722     case 4:
723       std::__sort4_maybe_branchless<_AlgPolicy, _Compare>(
724           __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
725       return;
726     case 5:
727       std::__sort5_maybe_branchless<_AlgPolicy, _Compare>(
728           __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
729           --__last, __comp);
730       return;
731     }
732     // Use insertion sort if the length of the range is below the specified limit.
733     if (__len < __limit) {
734       if (__leftmost) {
735         std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
736       } else {
737         std::__insertion_sort_unguarded<_AlgPolicy, _Compare>(__first, __last, __comp);
738       }
739       return;
740     }
741     if (__depth == 0) {
742       // Fallback to heap sort as Introsort suggests.
743       std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp);
744       return;
745     }
746     --__depth;
747     {
748       difference_type __half_len = __len / 2;
749       // Use Tuckey's ninther technique or median of 3 for pivot selection
750       // depending on the length of the range being sorted.
751       if (__len > __ninther_threshold) {
752         std::__sort3<_AlgPolicy, _Compare>(__first, __first + __half_len, __last - difference_type(1), __comp);
753         std::__sort3<_AlgPolicy, _Compare>(
754             __first + difference_type(1), __first + (__half_len - 1), __last - difference_type(2), __comp);
755         std::__sort3<_AlgPolicy, _Compare>(
756             __first + difference_type(2), __first + (__half_len + 1), __last - difference_type(3), __comp);
757         std::__sort3<_AlgPolicy, _Compare>(
758             __first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), __comp);
759         _Ops::iter_swap(__first, __first + __half_len);
760       } else {
761         std::__sort3<_AlgPolicy, _Compare>(__first + __half_len, __first, __last - difference_type(1), __comp);
762       }
763     }
764     // The elements to the left of the current iterator range are already
765     // sorted.  If the current iterator range to be sorted is not the
766     // leftmost part of the entire iterator range and the pivot is same as
767     // the highest element in the range to the left, then we know that all
768     // the elements in the range [first, pivot] would be equal to the pivot,
769     // assuming the equal elements are put on the left side when
770     // partitioned.  This also means that we do not need to sort the left
771     // side of the partition.
772     if (!__leftmost && !__comp(*(__first - difference_type(1)), *__first)) {
773       __first = std::__partition_with_equals_on_left<_AlgPolicy, _RandomAccessIterator, _Comp_ref>(
774           __first, __last, _Comp_ref(__comp));
775       continue;
776     }
777     // Use bitset partition only if asked for.
778     auto __ret =
779         _UseBitSetPartition
780             ? std::__bitset_partition<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp)
781             : std::__partition_with_equals_on_right<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp);
782     _RandomAccessIterator __i = __ret.first;
783     // [__first, __i) < *__i and *__i <= [__i+1, __last)
784     // If we were given a perfect partition, see if insertion sort is quick...
785     if (__ret.second) {
786       bool __fs = std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__first, __i, __comp);
787       if (std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__i + difference_type(1), __last, __comp)) {
788         if (__fs)
789           return;
790         __last = __i;
791         continue;
792       } else {
793         if (__fs) {
794           __first = ++__i;
795           continue;
796         }
797       }
798     }
799     // Sort the left partiton recursively and the right partition with tail recursion elimination.
800     std::__introsort<_AlgPolicy, _Compare, _RandomAccessIterator, _UseBitSetPartition>(
801         __first, __i, __comp, __depth, __leftmost);
802     __leftmost = false;
803     __first    = ++__i;
804   }
805 }
806 
807 template <typename _Number>
808 inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
809   if (__n == 0)
810     return 0;
811   if (sizeof(__n) <= sizeof(unsigned))
812     return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
813   if (sizeof(__n) <= sizeof(unsigned long))
814     return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
815   if (sizeof(__n) <= sizeof(unsigned long long))
816     return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
817 
818   _Number __log2 = 0;
819   while (__n > 1) {
820     __log2++;
821     __n >>= 1;
822   }
823   return __log2;
824 }
825 
826 template <class _Comp, class _RandomAccessIterator>
827 void __sort(_RandomAccessIterator, _RandomAccessIterator, _Comp);
828 
829 extern template _LIBCPP_FUNC_VIS void __sort<__less<char>&, char*>(char*, char*, __less<char>&);
830 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
831 extern template _LIBCPP_FUNC_VIS void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&);
832 #endif
833 extern template _LIBCPP_FUNC_VIS void __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&);
834 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&);
835 extern template _LIBCPP_FUNC_VIS void __sort<__less<short>&, short*>(short*, short*, __less<short>&);
836 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned short>&, unsigned short*>(unsigned short*, unsigned short*, __less<unsigned short>&);
837 extern template _LIBCPP_FUNC_VIS void __sort<__less<int>&, int*>(int*, int*, __less<int>&);
838 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned>&, unsigned*>(unsigned*, unsigned*, __less<unsigned>&);
839 extern template _LIBCPP_FUNC_VIS void __sort<__less<long>&, long*>(long*, long*, __less<long>&);
840 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long>&, unsigned long*>(unsigned long*, unsigned long*, __less<unsigned long>&);
841 extern template _LIBCPP_FUNC_VIS void __sort<__less<long long>&, long long*>(long long*, long long*, __less<long long>&);
842 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long long>&, unsigned long long*>(unsigned long long*, unsigned long long*, __less<unsigned long long>&);
843 extern template _LIBCPP_FUNC_VIS void __sort<__less<float>&, float*>(float*, float*, __less<float>&);
844 extern template _LIBCPP_FUNC_VIS void __sort<__less<double>&, double*>(double*, double*, __less<double>&);
845 extern template _LIBCPP_FUNC_VIS void __sort<__less<long double>&, long double*>(long double*, long double*, __less<long double>&);
846 
847 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
848 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
849 __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
850   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
851   difference_type __depth_limit = 2 * std::__log2i(__last - __first);
852 
853   // Only use bitset partitioning for arithmetic types.  We should also check
854   // that the default comparator is in use so that we are sure that there are no
855   // branches in the comparator.
856   std::__introsort<_AlgPolicy,
857                    _Comp&,
858                    _RandomAccessIterator,
859                    __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(
860       __first, __last, __comp, __depth_limit);
861 }
862 
863 template <class _Type, class... _Options>
864 using __is_any_of = _Or<is_same<_Type, _Options>...>;
865 
866 template <class _Type>
867 using __sort_is_specialized_in_library = __is_any_of<
868     _Type,
869     char,
870 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
871     wchar_t,
872 #endif
873     signed char,
874     unsigned char,
875     short,
876     unsigned short,
877     int,
878     unsigned int,
879     long,
880     unsigned long,
881     long long,
882     unsigned long long,
883     float,
884     double,
885     long double>;
886 
887 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
888 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, __less<_Type>& __comp) {
889   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
890 }
891 
892 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
893 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<_Type>&) {
894   __less<_Type> __comp;
895   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
896 }
897 
898 #if _LIBCPP_STD_VER >= 14
899 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
900 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<>&) {
901   __less<_Type> __comp;
902   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
903 }
904 #endif
905 
906 #if _LIBCPP_STD_VER >= 20
907 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
908 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, ranges::less&) {
909   __less<_Type> __comp;
910   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
911 }
912 #endif
913 
914 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
915 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
916 void __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
917   std::__debug_randomize_range<_AlgPolicy>(__first, __last);
918 
919   if (__libcpp_is_constant_evaluated()) {
920     std::__partial_sort<_AlgPolicy>(
921         std::__unwrap_iter(__first), std::__unwrap_iter(__last), std::__unwrap_iter(__last), __comp);
922   } else {
923     std::__sort_dispatch<_AlgPolicy>(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __comp);
924   }
925   std::__check_strict_weak_ordering_sorted(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __comp);
926 }
927 
928 template <class _RandomAccessIterator, class _Comp>
929 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
930 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
931   std::__sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp);
932 }
933 
934 template <class _RandomAccessIterator>
935 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
936 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
937   std::sort(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>());
938 }
939 
940 _LIBCPP_END_NAMESPACE_STD
941 
942 #endif // _LIBCPP___ALGORITHM_SORT_H
943