xref: /llvm-project/libcxx/include/__algorithm/sort.h (revision d4ffccfce103b01401b8a9222e373f2d404f8439)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___ALGORITHM_SORT_H
10 #define _LIBCPP___ALGORITHM_SORT_H
11 
12 #include <__algorithm/comp.h>
13 #include <__algorithm/comp_ref_type.h>
14 #include <__algorithm/iter_swap.h>
15 #include <__algorithm/iterator_operations.h>
16 #include <__algorithm/min_element.h>
17 #include <__algorithm/partial_sort.h>
18 #include <__algorithm/unwrap_iter.h>
19 #include <__assert>
20 #include <__bit/blsr.h>
21 #include <__bit/countl.h>
22 #include <__bit/countr.h>
23 #include <__config>
24 #include <__debug_utils/randomize_range.h>
25 #include <__debug_utils/strict_weak_ordering_check.h>
26 #include <__functional/operations.h>
27 #include <__functional/ranges_operations.h>
28 #include <__iterator/iterator_traits.h>
29 #include <__type_traits/conditional.h>
30 #include <__type_traits/desugars_to.h>
31 #include <__type_traits/disjunction.h>
32 #include <__type_traits/is_arithmetic.h>
33 #include <__type_traits/is_constant_evaluated.h>
34 #include <__type_traits/is_trivially_copyable.h>
35 #include <__utility/move.h>
36 #include <__utility/pair.h>
37 #include <climits>
38 #include <cstdint>
39 
40 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
41 #  pragma GCC system_header
42 #endif
43 
44 _LIBCPP_PUSH_MACROS
45 #include <__undef_macros>
46 
47 _LIBCPP_BEGIN_NAMESPACE_STD
48 
49 template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type>
50 inline const bool __use_branchless_sort =
51     __libcpp_is_contiguous_iterator<_Iter>::value && __is_cheap_to_copy<_Tp> && is_arithmetic<_Tp>::value &&
52     (__desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, _Tp, _Tp> ||
53      __desugars_to_v<__greater_tag, __remove_cvref_t<_Compare>, _Tp, _Tp>);
54 
55 namespace __detail {
56 
57 // Size in bits for the bitset in use.
58 enum { __block_size = sizeof(uint64_t) * 8 };
59 
60 } // namespace __detail
61 
62 // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
63 template <class _Compare, class _RandomAccessIterator>
64 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
65 __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
66   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
67   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
68   bool __r         = __c(*__x, *__y);
69   value_type __tmp = __r ? *__x : *__y;
70   *__y             = __r ? *__y : *__x;
71   *__x             = __tmp;
72   return !__r;
73 }
74 
75 // Ensures that *__x, *__y and *__z are ordered according to the comparator __c,
76 // under the assumption that *__y and *__z are already ordered.
77 template <class _Compare, class _RandomAccessIterator>
78 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
79 __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) {
80   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
81   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
82   bool __r1        = __c(*__z, *__x);
83   value_type __tmp = __r1 ? *__z : *__x;
84   *__z             = __r1 ? *__x : *__z;
85   bool __r2        = __c(__tmp, *__y);
86   *__x             = __r2 ? *__x : *__y;
87   *__y             = __r2 ? *__y : __tmp;
88   return !__r1 || !__r2;
89 }
90 
91 // stable, 2-3 compares, 0-2 swaps
92 
93 template <class,
94           class _Compare,
95           class _RandomAccessIterator,
96           __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
97 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
98 __sort3(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) {
99   bool __swapped1 = std::__cond_swap<_Compare>(__x2, __x3, __c);
100   bool __swapped2 = std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
101   return __swapped1 || __swapped2;
102 }
103 
104 template <class _AlgPolicy,
105           class _Compare,
106           class _RandomAccessIterator,
107           __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
108 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
109 __sort3(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) {
110   using _Ops = _IterOps<_AlgPolicy>;
111 
112   if (!__c(*__y, *__x)) // if x <= y
113   {
114     if (!__c(*__z, *__y))        // if y <= z
115       return false;              // x <= y && y <= z
116                                  // x <= y && y > z
117     _Ops::iter_swap(__y, __z);   // x <= z && y < z
118     if (__c(*__y, *__x))         // if x > y
119       _Ops::iter_swap(__x, __y); // x < y && y <= z
120     return true;                 // x <= y && y < z
121   }
122   if (__c(*__z, *__y)) // x > y, if y > z
123   {
124     _Ops::iter_swap(__x, __z); // x < y && y < z
125     return true;
126   }
127   _Ops::iter_swap(__x, __y); // x > y && y <= z
128   // x < y && x <= z
129   if (__c(*__z, *__y))         // if y > z
130     _Ops::iter_swap(__y, __z); // x <= y && y < z
131   return true;
132 } // x <= y && y <= z
133 
134 // stable, 3-6 compares, 0-5 swaps
135 
136 template <class,
137           class _Compare,
138           class _RandomAccessIterator,
139           __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
140 inline _LIBCPP_HIDE_FROM_ABI void
141 __sort4(_RandomAccessIterator __x1,
142         _RandomAccessIterator __x2,
143         _RandomAccessIterator __x3,
144         _RandomAccessIterator __x4,
145         _Compare __c) {
146   std::__cond_swap<_Compare>(__x1, __x3, __c);
147   std::__cond_swap<_Compare>(__x2, __x4, __c);
148   std::__cond_swap<_Compare>(__x1, __x2, __c);
149   std::__cond_swap<_Compare>(__x3, __x4, __c);
150   std::__cond_swap<_Compare>(__x2, __x3, __c);
151 }
152 
153 template <class _AlgPolicy,
154           class _Compare,
155           class _RandomAccessIterator,
156           __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
157 inline _LIBCPP_HIDE_FROM_ABI void
158 __sort4(_RandomAccessIterator __x1,
159         _RandomAccessIterator __x2,
160         _RandomAccessIterator __x3,
161         _RandomAccessIterator __x4,
162         _Compare __c) {
163   using _Ops = _IterOps<_AlgPolicy>;
164   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
165   if (__c(*__x4, *__x3)) {
166     _Ops::iter_swap(__x3, __x4);
167     if (__c(*__x3, *__x2)) {
168       _Ops::iter_swap(__x2, __x3);
169       if (__c(*__x2, *__x1)) {
170         _Ops::iter_swap(__x1, __x2);
171       }
172     }
173   }
174 }
175 
176 // stable, 4-10 compares, 0-9 swaps
177 
178 template <class _AlgPolicy,
179           class _Compare,
180           class _RandomAccessIterator,
181           __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
182 inline _LIBCPP_HIDE_FROM_ABI void
183 __sort5(_RandomAccessIterator __x1,
184         _RandomAccessIterator __x2,
185         _RandomAccessIterator __x3,
186         _RandomAccessIterator __x4,
187         _RandomAccessIterator __x5,
188         _Compare __c) {
189   std::__cond_swap<_Compare>(__x1, __x2, __c);
190   std::__cond_swap<_Compare>(__x4, __x5, __c);
191   std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
192   std::__cond_swap<_Compare>(__x2, __x5, __c);
193   std::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
194   std::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
195 }
196 
197 template <class _AlgPolicy,
198           class _Compare,
199           class _RandomAccessIterator,
200           __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
201 inline _LIBCPP_HIDE_FROM_ABI void
202 __sort5(_RandomAccessIterator __x1,
203         _RandomAccessIterator __x2,
204         _RandomAccessIterator __x3,
205         _RandomAccessIterator __x4,
206         _RandomAccessIterator __x5,
207         _Compare __comp) {
208   using _Ops = _IterOps<_AlgPolicy>;
209 
210   std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __comp);
211   if (__comp(*__x5, *__x4)) {
212     _Ops::iter_swap(__x4, __x5);
213     if (__comp(*__x4, *__x3)) {
214       _Ops::iter_swap(__x3, __x4);
215       if (__comp(*__x3, *__x2)) {
216         _Ops::iter_swap(__x2, __x3);
217         if (__comp(*__x2, *__x1)) {
218           _Ops::iter_swap(__x1, __x2);
219         }
220       }
221     }
222   }
223 }
224 
225 // Assumes size > 0
226 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
227 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
228 __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) {
229   _BidirectionalIterator __lm1 = __last;
230   for (--__lm1; __first != __lm1; ++__first) {
231     _BidirectionalIterator __i = std::__min_element<_Compare>(__first, __last, __comp);
232     if (__i != __first)
233       _IterOps<_AlgPolicy>::iter_swap(__first, __i);
234   }
235 }
236 
237 // Sort the iterator range [__first, __last) using the comparator __comp using
238 // the insertion sort algorithm.
239 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
240 _LIBCPP_HIDE_FROM_ABI void
241 __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) {
242   using _Ops = _IterOps<_AlgPolicy>;
243 
244   typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
245   if (__first == __last)
246     return;
247   _BidirectionalIterator __i = __first;
248   for (++__i; __i != __last; ++__i) {
249     _BidirectionalIterator __j = __i;
250     --__j;
251     if (__comp(*__i, *__j)) {
252       value_type __t(_Ops::__iter_move(__i));
253       _BidirectionalIterator __k = __j;
254       __j                        = __i;
255       do {
256         *__j = _Ops::__iter_move(__k);
257         __j  = __k;
258       } while (__j != __first && __comp(__t, *--__k));
259       *__j = std::move(__t);
260     }
261   }
262 }
263 
264 // Sort the iterator range [__first, __last) using the comparator __comp using
265 // the insertion sort algorithm.  Insertion sort has two loops, outer and inner.
266 // The implementation below has no bounds check (unguarded) for the inner loop.
267 // Assumes that there is an element in the position (__first - 1) and that each
268 // element in the input range is greater or equal to the element at __first - 1.
269 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
270 _LIBCPP_HIDE_FROM_ABI void
271 __insertion_sort_unguarded(_RandomAccessIterator const __first, _RandomAccessIterator __last, _Compare __comp) {
272   using _Ops = _IterOps<_AlgPolicy>;
273   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
274   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
275   if (__first == __last)
276     return;
277   const _RandomAccessIterator __leftmost = __first - difference_type(1);
278   (void)__leftmost; // can be unused when assertions are disabled
279   for (_RandomAccessIterator __i = __first + difference_type(1); __i != __last; ++__i) {
280     _RandomAccessIterator __j = __i - difference_type(1);
281     if (__comp(*__i, *__j)) {
282       value_type __t(_Ops::__iter_move(__i));
283       _RandomAccessIterator __k = __j;
284       __j                       = __i;
285       do {
286         *__j = _Ops::__iter_move(__k);
287         __j  = __k;
288         _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
289             __k != __leftmost,
290             "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
291       } while (__comp(__t, *--__k)); // No need for bounds check due to the assumption stated above.
292       *__j = std::move(__t);
293     }
294   }
295 }
296 
297 template <class _AlgPolicy, class _Comp, class _RandomAccessIterator>
298 _LIBCPP_HIDE_FROM_ABI bool
299 __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
300   using _Ops = _IterOps<_AlgPolicy>;
301 
302   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
303   switch (__last - __first) {
304   case 0:
305   case 1:
306     return true;
307   case 2:
308     if (__comp(*--__last, *__first))
309       _Ops::iter_swap(__first, __last);
310     return true;
311   case 3:
312     std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
313     return true;
314   case 4:
315     std::__sort4<_AlgPolicy, _Comp>(
316         __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
317     return true;
318   case 5:
319     std::__sort5<_AlgPolicy, _Comp>(
320         __first,
321         __first + difference_type(1),
322         __first + difference_type(2),
323         __first + difference_type(3),
324         --__last,
325         __comp);
326     return true;
327   }
328   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
329   _RandomAccessIterator __j = __first + difference_type(2);
330   std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
331   const unsigned __limit = 8;
332   unsigned __count       = 0;
333   for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) {
334     if (__comp(*__i, *__j)) {
335       value_type __t(_Ops::__iter_move(__i));
336       _RandomAccessIterator __k = __j;
337       __j                       = __i;
338       do {
339         *__j = _Ops::__iter_move(__k);
340         __j  = __k;
341       } while (__j != __first && __comp(__t, *--__k));
342       *__j = std::move(__t);
343       if (++__count == __limit)
344         return ++__i == __last;
345     }
346     __j = __i;
347   }
348   return true;
349 }
350 
351 template <class _AlgPolicy, class _RandomAccessIterator>
352 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
353     _RandomAccessIterator __first, _RandomAccessIterator __last, uint64_t& __left_bitset, uint64_t& __right_bitset) {
354   using _Ops = _IterOps<_AlgPolicy>;
355   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
356   // Swap one pair on each iteration as long as both bitsets have at least one
357   // element for swapping.
358   while (__left_bitset != 0 && __right_bitset != 0) {
359     difference_type __tz_left  = __libcpp_ctz(__left_bitset);
360     __left_bitset              = __libcpp_blsr(__left_bitset);
361     difference_type __tz_right = __libcpp_ctz(__right_bitset);
362     __right_bitset             = __libcpp_blsr(__right_bitset);
363     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
364   }
365 }
366 
367 template <class _Compare,
368           class _RandomAccessIterator,
369           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
370 inline _LIBCPP_HIDE_FROM_ABI void
371 __populate_left_bitset(_RandomAccessIterator __first, _Compare __comp, _ValueType& __pivot, uint64_t& __left_bitset) {
372   // Possible vectorization. With a proper "-march" flag, the following loop
373   // will be compiled into a set of SIMD instructions.
374   _RandomAccessIterator __iter = __first;
375   for (int __j = 0; __j < __detail::__block_size;) {
376     bool __comp_result = !__comp(*__iter, __pivot);
377     __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
378     __j++;
379     ++__iter;
380   }
381 }
382 
383 template <class _Compare,
384           class _RandomAccessIterator,
385           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
386 inline _LIBCPP_HIDE_FROM_ABI void
387 __populate_right_bitset(_RandomAccessIterator __lm1, _Compare __comp, _ValueType& __pivot, uint64_t& __right_bitset) {
388   // Possible vectorization. With a proper "-march" flag, the following loop
389   // will be compiled into a set of SIMD instructions.
390   _RandomAccessIterator __iter = __lm1;
391   for (int __j = 0; __j < __detail::__block_size;) {
392     bool __comp_result = __comp(*__iter, __pivot);
393     __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
394     __j++;
395     --__iter;
396   }
397 }
398 
399 template <class _AlgPolicy,
400           class _Compare,
401           class _RandomAccessIterator,
402           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
403 inline _LIBCPP_HIDE_FROM_ABI void __bitset_partition_partial_blocks(
404     _RandomAccessIterator& __first,
405     _RandomAccessIterator& __lm1,
406     _Compare __comp,
407     _ValueType& __pivot,
408     uint64_t& __left_bitset,
409     uint64_t& __right_bitset) {
410   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
411   difference_type __remaining_len = __lm1 - __first + 1;
412   difference_type __l_size;
413   difference_type __r_size;
414   if (__left_bitset == 0 && __right_bitset == 0) {
415     __l_size = __remaining_len / 2;
416     __r_size = __remaining_len - __l_size;
417   } else if (__left_bitset == 0) {
418     // We know at least one side is a full block.
419     __l_size = __remaining_len - __detail::__block_size;
420     __r_size = __detail::__block_size;
421   } else { // if (__right_bitset == 0)
422     __l_size = __detail::__block_size;
423     __r_size = __remaining_len - __detail::__block_size;
424   }
425   // Record the comparison outcomes for the elements currently on the left side.
426   if (__left_bitset == 0) {
427     _RandomAccessIterator __iter = __first;
428     for (int __j = 0; __j < __l_size; __j++) {
429       bool __comp_result = !__comp(*__iter, __pivot);
430       __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
431       ++__iter;
432     }
433   }
434   // Record the comparison outcomes for the elements currently on the right
435   // side.
436   if (__right_bitset == 0) {
437     _RandomAccessIterator __iter = __lm1;
438     for (int __j = 0; __j < __r_size; __j++) {
439       bool __comp_result = __comp(*__iter, __pivot);
440       __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
441       --__iter;
442     }
443   }
444   std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
445   __first += (__left_bitset == 0) ? __l_size : 0;
446   __lm1 -= (__right_bitset == 0) ? __r_size : 0;
447 }
448 
449 template <class _AlgPolicy, class _RandomAccessIterator>
450 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
451     _RandomAccessIterator& __first, _RandomAccessIterator& __lm1, uint64_t& __left_bitset, uint64_t& __right_bitset) {
452   using _Ops = _IterOps<_AlgPolicy>;
453   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
454   if (__left_bitset) {
455     // Swap within the left side.  Need to find set positions in the reverse
456     // order.
457     while (__left_bitset != 0) {
458       difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
459       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
460       _RandomAccessIterator __it = __first + __tz_left;
461       if (__it != __lm1) {
462         _Ops::iter_swap(__it, __lm1);
463       }
464       --__lm1;
465     }
466     __first = __lm1 + difference_type(1);
467   } else if (__right_bitset) {
468     // Swap within the right side.  Need to find set positions in the reverse
469     // order.
470     while (__right_bitset != 0) {
471       difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
472       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
473       _RandomAccessIterator __it = __lm1 - __tz_right;
474       if (__it != __first) {
475         _Ops::iter_swap(__it, __first);
476       }
477       ++__first;
478     }
479   }
480 }
481 
482 // Partition [__first, __last) using the comparator __comp.  *__first has the
483 // chosen pivot.  Elements that are equivalent are kept to the left of the
484 // pivot.  Returns the iterator for the pivot and a bool value which is true if
485 // the provided range is already sorted, false otherwise.  We assume that the
486 // length of the range is at least three elements.
487 //
488 // __bitset_partition uses bitsets for storing outcomes of the comparisons
489 // between the pivot and other elements.
490 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
491 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
492 __bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
493   using _Ops = _IterOps<_AlgPolicy>;
494   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
495   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
496   _LIBCPP_ASSERT_INTERNAL(__last - __first >= difference_type(3), "");
497   const _RandomAccessIterator __begin = __first; // used for bounds checking, those are not moved around
498   const _RandomAccessIterator __end   = __last;
499   (void)__end; //
500 
501   value_type __pivot(_Ops::__iter_move(__first));
502   // Find the first element greater than the pivot.
503   if (__comp(__pivot, *(__last - difference_type(1)))) {
504     // Not guarded since we know the last element is greater than the pivot.
505     do {
506       ++__first;
507       _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
508           __first != __end,
509           "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
510     } while (!__comp(__pivot, *__first));
511   } else {
512     while (++__first < __last && !__comp(__pivot, *__first)) {
513     }
514   }
515   // Find the last element less than or equal to the pivot.
516   if (__first < __last) {
517     // It will be always guarded because __introsort will do the median-of-three
518     // before calling this.
519     do {
520       _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
521           __last != __begin,
522           "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
523       --__last;
524     } while (__comp(__pivot, *__last));
525   }
526   // If the first element greater than the pivot is at or after the
527   // last element less than or equal to the pivot, then we have covered the
528   // entire range without swapping elements.  This implies the range is already
529   // partitioned.
530   bool __already_partitioned = __first >= __last;
531   if (!__already_partitioned) {
532     _Ops::iter_swap(__first, __last);
533     ++__first;
534   }
535 
536   // In [__first, __last) __last is not inclusive. From now on, it uses last
537   // minus one to be inclusive on both sides.
538   _RandomAccessIterator __lm1 = __last - difference_type(1);
539   uint64_t __left_bitset      = 0;
540   uint64_t __right_bitset     = 0;
541 
542   // Reminder: length = __lm1 - __first + 1.
543   while (__lm1 - __first >= 2 * __detail::__block_size - 1) {
544     // Record the comparison outcomes for the elements currently on the left
545     // side.
546     if (__left_bitset == 0)
547       std::__populate_left_bitset<_Compare>(__first, __comp, __pivot, __left_bitset);
548     // Record the comparison outcomes for the elements currently on the right
549     // side.
550     if (__right_bitset == 0)
551       std::__populate_right_bitset<_Compare>(__lm1, __comp, __pivot, __right_bitset);
552     // Swap the elements recorded to be the candidates for swapping in the
553     // bitsets.
554     std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
555     // Only advance the iterator if all the elements that need to be moved to
556     // other side were moved.
557     __first += (__left_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
558     __lm1 -= (__right_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
559   }
560   // Now, we have a less-than a block worth of elements on at least one of the
561   // sides.
562   std::__bitset_partition_partial_blocks<_AlgPolicy, _Compare>(
563       __first, __lm1, __comp, __pivot, __left_bitset, __right_bitset);
564   // At least one the bitsets would be empty.  For the non-empty one, we need to
565   // properly partition the elements that appear within that bitset.
566   std::__swap_bitmap_pos_within<_AlgPolicy>(__first, __lm1, __left_bitset, __right_bitset);
567 
568   // Move the pivot to its correct position.
569   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
570   if (__begin != __pivot_pos) {
571     *__begin = _Ops::__iter_move(__pivot_pos);
572   }
573   *__pivot_pos = std::move(__pivot);
574   return std::make_pair(__pivot_pos, __already_partitioned);
575 }
576 
577 // Partition [__first, __last) using the comparator __comp.  *__first has the
578 // chosen pivot.  Elements that are equivalent are kept to the right of the
579 // pivot.  Returns the iterator for the pivot and a bool value which is true if
580 // the provided range is already sorted, false otherwise.  We assume that the
581 // length of the range is at least three elements.
582 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
583 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
584 __partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
585   using _Ops = _IterOps<_AlgPolicy>;
586   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
587   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
588   _LIBCPP_ASSERT_INTERNAL(__last - __first >= difference_type(3), "");
589   const _RandomAccessIterator __begin = __first; // used for bounds checking, those are not moved around
590   const _RandomAccessIterator __end   = __last;
591   (void)__end; //
592   value_type __pivot(_Ops::__iter_move(__first));
593   // Find the first element greater or equal to the pivot.  It will be always
594   // guarded because __introsort will do the median-of-three before calling
595   // this.
596   do {
597     ++__first;
598     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
599         __first != __end,
600         "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
601   } while (__comp(*__first, __pivot));
602 
603   // Find the last element less than the pivot.
604   if (__begin == __first - difference_type(1)) {
605     while (__first < __last && !__comp(*--__last, __pivot))
606       ;
607   } else {
608     // Guarded.
609     do {
610       _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
611           __last != __begin,
612           "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
613       --__last;
614     } while (!__comp(*__last, __pivot));
615   }
616 
617   // If the first element greater than or equal to the pivot is at or after the
618   // last element less than the pivot, then we have covered the entire range
619   // without swapping elements.  This implies the range is already partitioned.
620   bool __already_partitioned = __first >= __last;
621   // Go through the remaining elements.  Swap pairs of elements (one to the
622   // right of the pivot and the other to left of the pivot) that are not on the
623   // correct side of the pivot.
624   while (__first < __last) {
625     _Ops::iter_swap(__first, __last);
626     do {
627       ++__first;
628       _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
629           __first != __end,
630           "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
631     } while (__comp(*__first, __pivot));
632     do {
633       _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
634           __last != __begin,
635           "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
636       --__last;
637     } while (!__comp(*__last, __pivot));
638   }
639   // Move the pivot to its correct position.
640   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
641   if (__begin != __pivot_pos) {
642     *__begin = _Ops::__iter_move(__pivot_pos);
643   }
644   *__pivot_pos = std::move(__pivot);
645   return std::make_pair(__pivot_pos, __already_partitioned);
646 }
647 
648 // Similar to the above function.  Elements equivalent to the pivot are put to
649 // the left of the pivot.  Returns the iterator to the pivot element.
650 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
651 _LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
652 __partition_with_equals_on_left(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
653   using _Ops = _IterOps<_AlgPolicy>;
654   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
655   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
656   const _RandomAccessIterator __begin = __first; // used for bounds checking, those are not moved around
657   const _RandomAccessIterator __end   = __last;
658   (void)__end; //
659   value_type __pivot(_Ops::__iter_move(__first));
660   if (__comp(__pivot, *(__last - difference_type(1)))) {
661     // Guarded.
662     do {
663       ++__first;
664       _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
665           __first != __end,
666           "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
667     } while (!__comp(__pivot, *__first));
668   } else {
669     while (++__first < __last && !__comp(__pivot, *__first)) {
670     }
671   }
672 
673   if (__first < __last) {
674     // It will be always guarded because __introsort will do the
675     // median-of-three before calling this.
676     do {
677       _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
678           __last != __begin,
679           "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
680       --__last;
681     } while (__comp(__pivot, *__last));
682   }
683   while (__first < __last) {
684     _Ops::iter_swap(__first, __last);
685     do {
686       ++__first;
687       _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
688           __first != __end,
689           "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
690     } while (!__comp(__pivot, *__first));
691     do {
692       _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
693           __last != __begin,
694           "Would read out of bounds, does your comparator satisfy the strict-weak ordering requirement?");
695       --__last;
696     } while (__comp(__pivot, *__last));
697   }
698   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
699   if (__begin != __pivot_pos) {
700     *__begin = _Ops::__iter_move(__pivot_pos);
701   }
702   *__pivot_pos = std::move(__pivot);
703   return __first;
704 }
705 
706 // The main sorting function.  Implements introsort combined with other ideas:
707 //  - option of using block quick sort for partitioning,
708 //  - guarded and unguarded insertion sort for small lengths,
709 //  - Tuckey's ninther technique for computing the pivot,
710 //  - check on whether partition was not required.
711 // The implementation is partly based on Orson Peters' pattern-defeating
712 // quicksort, published at: <https://github.com/orlp/pdqsort>.
713 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _UseBitSetPartition>
714 void __introsort(_RandomAccessIterator __first,
715                  _RandomAccessIterator __last,
716                  _Compare __comp,
717                  typename iterator_traits<_RandomAccessIterator>::difference_type __depth,
718                  bool __leftmost = true) {
719   using _Ops = _IterOps<_AlgPolicy>;
720   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
721   using _Comp_ref = __comp_ref_type<_Compare>;
722   // Upper bound for using insertion sort for sorting.
723   _LIBCPP_CONSTEXPR difference_type __limit = 24;
724   // Lower bound for using Tuckey's ninther technique for median computation.
725   _LIBCPP_CONSTEXPR difference_type __ninther_threshold = 128;
726   while (true) {
727     difference_type __len = __last - __first;
728     switch (__len) {
729     case 0:
730     case 1:
731       return;
732     case 2:
733       if (__comp(*--__last, *__first))
734         _Ops::iter_swap(__first, __last);
735       return;
736     case 3:
737       std::__sort3<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
738       return;
739     case 4:
740       std::__sort4<_AlgPolicy, _Compare>(
741           __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
742       return;
743     case 5:
744       std::__sort5<_AlgPolicy, _Compare>(
745           __first,
746           __first + difference_type(1),
747           __first + difference_type(2),
748           __first + difference_type(3),
749           --__last,
750           __comp);
751       return;
752     }
753     // Use insertion sort if the length of the range is below the specified limit.
754     if (__len < __limit) {
755       if (__leftmost) {
756         std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
757       } else {
758         std::__insertion_sort_unguarded<_AlgPolicy, _Compare>(__first, __last, __comp);
759       }
760       return;
761     }
762     if (__depth == 0) {
763       // Fallback to heap sort as Introsort suggests.
764       std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp);
765       return;
766     }
767     --__depth;
768     {
769       difference_type __half_len = __len / 2;
770       // Use Tuckey's ninther technique or median of 3 for pivot selection
771       // depending on the length of the range being sorted.
772       if (__len > __ninther_threshold) {
773         std::__sort3<_AlgPolicy, _Compare>(__first, __first + __half_len, __last - difference_type(1), __comp);
774         std::__sort3<_AlgPolicy, _Compare>(
775             __first + difference_type(1), __first + (__half_len - 1), __last - difference_type(2), __comp);
776         std::__sort3<_AlgPolicy, _Compare>(
777             __first + difference_type(2), __first + (__half_len + 1), __last - difference_type(3), __comp);
778         std::__sort3<_AlgPolicy, _Compare>(
779             __first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), __comp);
780         _Ops::iter_swap(__first, __first + __half_len);
781       } else {
782         std::__sort3<_AlgPolicy, _Compare>(__first + __half_len, __first, __last - difference_type(1), __comp);
783       }
784     }
785     // The elements to the left of the current iterator range are already
786     // sorted.  If the current iterator range to be sorted is not the
787     // leftmost part of the entire iterator range and the pivot is same as
788     // the highest element in the range to the left, then we know that all
789     // the elements in the range [first, pivot] would be equal to the pivot,
790     // assuming the equal elements are put on the left side when
791     // partitioned.  This also means that we do not need to sort the left
792     // side of the partition.
793     if (!__leftmost && !__comp(*(__first - difference_type(1)), *__first)) {
794       __first = std::__partition_with_equals_on_left<_AlgPolicy, _RandomAccessIterator, _Comp_ref>(
795           __first, __last, _Comp_ref(__comp));
796       continue;
797     }
798     // Use bitset partition only if asked for.
799     auto __ret                = _UseBitSetPartition
800                                   ? std::__bitset_partition<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp)
801                                   : std::__partition_with_equals_on_right<_AlgPolicy, _RandomAccessIterator, _Compare>(
802                          __first, __last, __comp);
803     _RandomAccessIterator __i = __ret.first;
804     // [__first, __i) < *__i and *__i <= [__i+1, __last)
805     // If we were given a perfect partition, see if insertion sort is quick...
806     if (__ret.second) {
807       bool __fs = std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__first, __i, __comp);
808       if (std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__i + difference_type(1), __last, __comp)) {
809         if (__fs)
810           return;
811         __last = __i;
812         continue;
813       } else {
814         if (__fs) {
815           __first = ++__i;
816           continue;
817         }
818       }
819     }
820     // Sort the left partiton recursively and the right partition with tail recursion elimination.
821     std::__introsort<_AlgPolicy, _Compare, _RandomAccessIterator, _UseBitSetPartition>(
822         __first, __i, __comp, __depth, __leftmost);
823     __leftmost = false;
824     __first    = ++__i;
825   }
826 }
827 
828 template <typename _Number>
829 inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
830   if (__n == 0)
831     return 0;
832   if (sizeof(__n) <= sizeof(unsigned))
833     return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
834   if (sizeof(__n) <= sizeof(unsigned long))
835     return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
836   if (sizeof(__n) <= sizeof(unsigned long long))
837     return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
838 
839   _Number __log2 = 0;
840   while (__n > 1) {
841     __log2++;
842     __n >>= 1;
843   }
844   return __log2;
845 }
846 
847 template <class _Comp, class _RandomAccessIterator>
848 void __sort(_RandomAccessIterator, _RandomAccessIterator, _Comp);
849 
850 extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less<char>&, char*>(char*, char*, __less<char>&);
851 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
852 extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&);
853 #endif
854 extern template _LIBCPP_EXPORTED_FROM_ABI void
855 __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&);
856 extern template _LIBCPP_EXPORTED_FROM_ABI void
857 __sort<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&);
858 extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less<short>&, short*>(short*, short*, __less<short>&);
859 extern template _LIBCPP_EXPORTED_FROM_ABI void
860 __sort<__less<unsigned short>&, unsigned short*>(unsigned short*, unsigned short*, __less<unsigned short>&);
861 extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less<int>&, int*>(int*, int*, __less<int>&);
862 extern template _LIBCPP_EXPORTED_FROM_ABI void
863 __sort<__less<unsigned>&, unsigned*>(unsigned*, unsigned*, __less<unsigned>&);
864 extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less<long>&, long*>(long*, long*, __less<long>&);
865 extern template _LIBCPP_EXPORTED_FROM_ABI void
866 __sort<__less<unsigned long>&, unsigned long*>(unsigned long*, unsigned long*, __less<unsigned long>&);
867 extern template _LIBCPP_EXPORTED_FROM_ABI void
868 __sort<__less<long long>&, long long*>(long long*, long long*, __less<long long>&);
869 extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less<unsigned long long>&, unsigned long long*>(
870     unsigned long long*, unsigned long long*, __less<unsigned long long>&);
871 extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less<float>&, float*>(float*, float*, __less<float>&);
872 extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less<double>&, double*>(double*, double*, __less<double>&);
873 extern template _LIBCPP_EXPORTED_FROM_ABI void
874 __sort<__less<long double>&, long double*>(long double*, long double*, __less<long double>&);
875 
876 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
877 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
878 __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
879   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
880   difference_type __depth_limit = 2 * std::__log2i(__last - __first);
881 
882   // Only use bitset partitioning for arithmetic types.  We should also check
883   // that the default comparator is in use so that we are sure that there are no
884   // branches in the comparator.
885   std::__introsort<_AlgPolicy, _Comp&, _RandomAccessIterator, __use_branchless_sort<_Comp, _RandomAccessIterator> >(
886       __first, __last, __comp, __depth_limit);
887 }
888 
889 template <class _Type, class... _Options>
890 using __is_any_of = _Or<is_same<_Type, _Options>...>;
891 
892 template <class _Type>
893 using __sort_is_specialized_in_library = __is_any_of<
894     _Type,
895     char,
896 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
897     wchar_t,
898 #endif
899     signed char,
900     unsigned char,
901     short,
902     unsigned short,
903     int,
904     unsigned int,
905     long,
906     unsigned long,
907     long long,
908     unsigned long long,
909     float,
910     double,
911     long double>;
912 
913 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
914 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, __less<>&) {
915   __less<_Type> __comp;
916   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
917 }
918 
919 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
920 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<_Type>&) {
921   __less<_Type> __comp;
922   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
923 }
924 
925 #if _LIBCPP_STD_VER >= 14
926 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
927 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<>&) {
928   __less<_Type> __comp;
929   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
930 }
931 #endif
932 
933 #if _LIBCPP_STD_VER >= 20
934 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
935 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, ranges::less&) {
936   __less<_Type> __comp;
937   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
938 }
939 #endif
940 
941 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
942 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
943 __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
944   std::__debug_randomize_range<_AlgPolicy>(__first, __last);
945 
946   if (__libcpp_is_constant_evaluated()) {
947     std::__partial_sort<_AlgPolicy>(
948         std::__unwrap_iter(__first), std::__unwrap_iter(__last), std::__unwrap_iter(__last), __comp);
949   } else {
950     std::__sort_dispatch<_AlgPolicy>(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __comp);
951   }
952   std::__check_strict_weak_ordering_sorted(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __comp);
953 }
954 
955 template <class _RandomAccessIterator, class _Comp>
956 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
957 sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
958   std::__sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp);
959 }
960 
961 template <class _RandomAccessIterator>
962 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
963 sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
964   std::sort(__first, __last, __less<>());
965 }
966 
967 _LIBCPP_END_NAMESPACE_STD
968 
969 _LIBCPP_POP_MACROS
970 
971 #endif // _LIBCPP___ALGORITHM_SORT_H
972