xref: /llvm-project/libc/src/stdlib/quick_sort.h (revision a738d81cd2822698539b0482af48d49d91ea5a2e)
1a6d2da8bSlntue //===-- Implementation header for qsort utilities ---------------*- C++ -*-===//
2a6d2da8bSlntue //
3a6d2da8bSlntue // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a6d2da8bSlntue // See https://llvm.org/LICENSE.txt for license information.
5a6d2da8bSlntue // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a6d2da8bSlntue //
7a6d2da8bSlntue //===----------------------------------------------------------------------===//
8a6d2da8bSlntue 
9a6d2da8bSlntue #ifndef LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
10a6d2da8bSlntue #define LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
11a6d2da8bSlntue 
12*a738d81cSLukas Bergdoll #include "src/__support/CPP/bit.h"
13*a738d81cSLukas Bergdoll #include "src/__support/CPP/cstddef.h"
14a6d2da8bSlntue #include "src/__support/macros/config.h"
15*a738d81cSLukas Bergdoll #include "src/stdlib/qsort_pivot.h"
16a6d2da8bSlntue 
17a6d2da8bSlntue #include <stdint.h>
18a6d2da8bSlntue 
19a6d2da8bSlntue namespace LIBC_NAMESPACE_DECL {
20a6d2da8bSlntue namespace internal {
21a6d2da8bSlntue 
22*a738d81cSLukas Bergdoll // Branchless Lomuto partition based on the implementation by Lukas
23*a738d81cSLukas Bergdoll // Bergdoll and Orson Peters
24*a738d81cSLukas Bergdoll // https://github.com/Voultapher/sort-research-rs/blob/main/writeup/lomcyc_partition/text.md.
25*a738d81cSLukas Bergdoll // Simplified to avoid having to stack allocate.
26*a738d81cSLukas Bergdoll template <typename A, typename F>
27*a738d81cSLukas Bergdoll LIBC_INLINE size_t partition_lomuto_branchless(const A &array,
28*a738d81cSLukas Bergdoll                                                const void *pivot,
29*a738d81cSLukas Bergdoll                                                const F &is_less) {
30*a738d81cSLukas Bergdoll   const size_t array_len = array.len();
31*a738d81cSLukas Bergdoll 
32*a738d81cSLukas Bergdoll   size_t left = 0;
33*a738d81cSLukas Bergdoll   size_t right = 0;
34*a738d81cSLukas Bergdoll 
35*a738d81cSLukas Bergdoll   while (right < array_len) {
36*a738d81cSLukas Bergdoll     const bool right_is_lt = is_less(array.get(right), pivot);
37*a738d81cSLukas Bergdoll     array.swap(left, right);
38*a738d81cSLukas Bergdoll     left += static_cast<size_t>(right_is_lt);
39*a738d81cSLukas Bergdoll     right += 1;
40*a738d81cSLukas Bergdoll   }
41*a738d81cSLukas Bergdoll 
42*a738d81cSLukas Bergdoll   return left;
43*a738d81cSLukas Bergdoll }
44*a738d81cSLukas Bergdoll 
45*a738d81cSLukas Bergdoll // Optimized for large types that are expensive to move. Not optimized
46*a738d81cSLukas Bergdoll // for integers. It's possible to use a cyclic permutation here for
47*a738d81cSLukas Bergdoll // large types as done in ipnsort but the advantages of this are limited
48*a738d81cSLukas Bergdoll // as `is_less` is a small wrapper around a call to a function pointer
49*a738d81cSLukas Bergdoll // and won't incur much binary-size overhead. The other reason to use
50*a738d81cSLukas Bergdoll // cyclic permutation is to have more efficient swapping, but we don't
51*a738d81cSLukas Bergdoll // know the element size so this isn't applicable here either.
52*a738d81cSLukas Bergdoll template <typename A, typename F>
53*a738d81cSLukas Bergdoll LIBC_INLINE size_t partition_hoare_branchy(const A &array, const void *pivot,
54*a738d81cSLukas Bergdoll                                            const F &is_less) {
55*a738d81cSLukas Bergdoll   const size_t array_len = array.len();
56*a738d81cSLukas Bergdoll 
57*a738d81cSLukas Bergdoll   size_t left = 0;
58*a738d81cSLukas Bergdoll   size_t right = array_len;
59a6d2da8bSlntue 
60a6d2da8bSlntue   while (true) {
61*a738d81cSLukas Bergdoll     while (left < right && is_less(array.get(left), pivot))
62*a738d81cSLukas Bergdoll       ++left;
63a6d2da8bSlntue 
6458ef1eb0SSimon Tatham     while (true) {
65*a738d81cSLukas Bergdoll       --right;
66*a738d81cSLukas Bergdoll       if (left >= right || is_less(array.get(right), pivot)) {
67*a738d81cSLukas Bergdoll         break;
68*a738d81cSLukas Bergdoll       }
69*a738d81cSLukas Bergdoll     }
70*a738d81cSLukas Bergdoll 
71*a738d81cSLukas Bergdoll     if (left >= right)
72*a738d81cSLukas Bergdoll       break;
73*a738d81cSLukas Bergdoll 
74*a738d81cSLukas Bergdoll     array.swap(left, right);
75*a738d81cSLukas Bergdoll     ++left;
76*a738d81cSLukas Bergdoll   }
77*a738d81cSLukas Bergdoll 
78*a738d81cSLukas Bergdoll   return left;
79*a738d81cSLukas Bergdoll }
80*a738d81cSLukas Bergdoll 
81*a738d81cSLukas Bergdoll template <typename A, typename F>
82*a738d81cSLukas Bergdoll LIBC_INLINE size_t partition(const A &array, size_t pivot_index,
83*a738d81cSLukas Bergdoll                              const F &is_less) {
84*a738d81cSLukas Bergdoll   // Place the pivot at the beginning of the array.
85*a738d81cSLukas Bergdoll   if (pivot_index != 0) {
86*a738d81cSLukas Bergdoll     array.swap(0, pivot_index);
87*a738d81cSLukas Bergdoll   }
88*a738d81cSLukas Bergdoll 
89*a738d81cSLukas Bergdoll   const A array_without_pivot = array.make_array(1, array.len() - 1);
90*a738d81cSLukas Bergdoll   const void *pivot = array.get(0);
91*a738d81cSLukas Bergdoll 
92*a738d81cSLukas Bergdoll   size_t num_lt;
93*a738d81cSLukas Bergdoll   if constexpr (A::has_fixed_size()) {
94*a738d81cSLukas Bergdoll     // Branchless Lomuto avoid branch misprediction penalties, but
95*a738d81cSLukas Bergdoll     // it also swaps more often which is only faster if the swap is a fast
96*a738d81cSLukas Bergdoll     // constant operation.
97*a738d81cSLukas Bergdoll     num_lt = partition_lomuto_branchless(array_without_pivot, pivot, is_less);
98*a738d81cSLukas Bergdoll   } else {
99*a738d81cSLukas Bergdoll     num_lt = partition_hoare_branchy(array_without_pivot, pivot, is_less);
100*a738d81cSLukas Bergdoll   }
101*a738d81cSLukas Bergdoll 
102*a738d81cSLukas Bergdoll   // Place the pivot between the two partitions.
103*a738d81cSLukas Bergdoll   array.swap(0, num_lt);
104*a738d81cSLukas Bergdoll 
105*a738d81cSLukas Bergdoll   return num_lt;
106*a738d81cSLukas Bergdoll }
107*a738d81cSLukas Bergdoll 
108*a738d81cSLukas Bergdoll template <typename A, typename F>
109*a738d81cSLukas Bergdoll LIBC_INLINE void quick_sort_impl(A &array, const void *ancestor_pivot,
110*a738d81cSLukas Bergdoll                                  size_t limit, const F &is_less) {
111*a738d81cSLukas Bergdoll   while (true) {
112*a738d81cSLukas Bergdoll     const size_t array_len = array.len();
113*a738d81cSLukas Bergdoll     if (array_len <= 1)
114a6d2da8bSlntue       return;
115*a738d81cSLukas Bergdoll 
116*a738d81cSLukas Bergdoll     // If too many bad pivot choices were made, simply fall back to
117*a738d81cSLukas Bergdoll     // heapsort in order to guarantee `O(N x log(N))` worst-case.
118*a738d81cSLukas Bergdoll     if (limit == 0) {
119*a738d81cSLukas Bergdoll       heap_sort(array, is_less);
120*a738d81cSLukas Bergdoll       return;
121*a738d81cSLukas Bergdoll     }
122*a738d81cSLukas Bergdoll 
123*a738d81cSLukas Bergdoll     limit -= 1;
124*a738d81cSLukas Bergdoll 
125*a738d81cSLukas Bergdoll     const size_t pivot_index = choose_pivot(array, is_less);
126*a738d81cSLukas Bergdoll 
127*a738d81cSLukas Bergdoll     // If the chosen pivot is equal to the predecessor, then it's the smallest
128*a738d81cSLukas Bergdoll     // element in the slice. Partition the slice into elements equal to and
129*a738d81cSLukas Bergdoll     // elements greater than the pivot. This case is usually hit when the slice
130*a738d81cSLukas Bergdoll     // contains many duplicate elements.
131*a738d81cSLukas Bergdoll     if (ancestor_pivot) {
132*a738d81cSLukas Bergdoll       if (!is_less(ancestor_pivot, array.get(pivot_index))) {
133*a738d81cSLukas Bergdoll         const size_t num_lt =
134*a738d81cSLukas Bergdoll             partition(array, pivot_index,
135*a738d81cSLukas Bergdoll                       [is_less](const void *a, const void *b) -> bool {
136*a738d81cSLukas Bergdoll                         return !is_less(b, a);
137*a738d81cSLukas Bergdoll                       });
138*a738d81cSLukas Bergdoll 
139*a738d81cSLukas Bergdoll         // Continue sorting elements greater than the pivot. We know that
140*a738d81cSLukas Bergdoll         // `num_lt` cont
141*a738d81cSLukas Bergdoll         array.reset_bounds(num_lt + 1, array.len() - (num_lt + 1));
142*a738d81cSLukas Bergdoll         ancestor_pivot = nullptr;
143*a738d81cSLukas Bergdoll         continue;
144*a738d81cSLukas Bergdoll       }
145*a738d81cSLukas Bergdoll     }
146*a738d81cSLukas Bergdoll 
147*a738d81cSLukas Bergdoll     size_t split_index = partition(array, pivot_index, is_less);
148*a738d81cSLukas Bergdoll 
149*a738d81cSLukas Bergdoll     if (array_len == 2)
150a6d2da8bSlntue       // The partition operation sorts the two element array.
151a6d2da8bSlntue       return;
15258ef1eb0SSimon Tatham 
153*a738d81cSLukas Bergdoll     // Split the array into `left`, `pivot`, and `right`.
154*a738d81cSLukas Bergdoll     A left = array.make_array(0, split_index);
155*a738d81cSLukas Bergdoll     const void *pivot = array.get(split_index);
156*a738d81cSLukas Bergdoll     const size_t right_start = split_index + 1;
157*a738d81cSLukas Bergdoll     A right = array.make_array(right_start, array.len() - right_start);
15858ef1eb0SSimon Tatham 
159*a738d81cSLukas Bergdoll     // Recurse into the left side. We have a fixed recursion limit,
160*a738d81cSLukas Bergdoll     // testing shows no real benefit for recursing into the shorter
161*a738d81cSLukas Bergdoll     // side.
162*a738d81cSLukas Bergdoll     quick_sort_impl(left, ancestor_pivot, limit, is_less);
163*a738d81cSLukas Bergdoll 
164*a738d81cSLukas Bergdoll     // Continue with the right side.
165*a738d81cSLukas Bergdoll     array = right;
166*a738d81cSLukas Bergdoll     ancestor_pivot = pivot;
167a6d2da8bSlntue   }
16858ef1eb0SSimon Tatham }
169*a738d81cSLukas Bergdoll 
170*a738d81cSLukas Bergdoll constexpr size_t ilog2(size_t n) { return cpp::bit_width(n) - 1; }
171*a738d81cSLukas Bergdoll 
172*a738d81cSLukas Bergdoll template <typename A, typename F>
173*a738d81cSLukas Bergdoll LIBC_INLINE void quick_sort(A &array, const F &is_less) {
174*a738d81cSLukas Bergdoll   const void *ancestor_pivot = nullptr;
175*a738d81cSLukas Bergdoll   // Limit the number of imbalanced partitions to `2 * floor(log2(len))`.
176*a738d81cSLukas Bergdoll   // The binary OR by one is used to eliminate the zero-check in the logarithm.
177*a738d81cSLukas Bergdoll   const size_t limit = 2 * ilog2((array.len() | 1));
178*a738d81cSLukas Bergdoll   quick_sort_impl(array, ancestor_pivot, limit, is_less);
179a6d2da8bSlntue }
180a6d2da8bSlntue 
181a6d2da8bSlntue } // namespace internal
182a6d2da8bSlntue } // namespace LIBC_NAMESPACE_DECL
183a6d2da8bSlntue 
184a6d2da8bSlntue #endif // LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
185