1a6d2da8bSlntue //===-- Implementation header for qsort utilities ---------------*- C++ -*-===// 2a6d2da8bSlntue // 3a6d2da8bSlntue // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a6d2da8bSlntue // See https://llvm.org/LICENSE.txt for license information. 5a6d2da8bSlntue // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a6d2da8bSlntue // 7a6d2da8bSlntue //===----------------------------------------------------------------------===// 8a6d2da8bSlntue 9a6d2da8bSlntue #ifndef LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H 10a6d2da8bSlntue #define LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H 11a6d2da8bSlntue 12*a738d81cSLukas Bergdoll #include "src/__support/CPP/bit.h" 13*a738d81cSLukas Bergdoll #include "src/__support/CPP/cstddef.h" 14a6d2da8bSlntue #include "src/__support/macros/config.h" 15*a738d81cSLukas Bergdoll #include "src/stdlib/qsort_pivot.h" 16a6d2da8bSlntue 17a6d2da8bSlntue #include <stdint.h> 18a6d2da8bSlntue 19a6d2da8bSlntue namespace LIBC_NAMESPACE_DECL { 20a6d2da8bSlntue namespace internal { 21a6d2da8bSlntue 22*a738d81cSLukas Bergdoll // Branchless Lomuto partition based on the implementation by Lukas 23*a738d81cSLukas Bergdoll // Bergdoll and Orson Peters 24*a738d81cSLukas Bergdoll // https://github.com/Voultapher/sort-research-rs/blob/main/writeup/lomcyc_partition/text.md. 25*a738d81cSLukas Bergdoll // Simplified to avoid having to stack allocate. 26*a738d81cSLukas Bergdoll template <typename A, typename F> 27*a738d81cSLukas Bergdoll LIBC_INLINE size_t partition_lomuto_branchless(const A &array, 28*a738d81cSLukas Bergdoll const void *pivot, 29*a738d81cSLukas Bergdoll const F &is_less) { 30*a738d81cSLukas Bergdoll const size_t array_len = array.len(); 31*a738d81cSLukas Bergdoll 32*a738d81cSLukas Bergdoll size_t left = 0; 33*a738d81cSLukas Bergdoll size_t right = 0; 34*a738d81cSLukas Bergdoll 35*a738d81cSLukas Bergdoll while (right < array_len) { 36*a738d81cSLukas Bergdoll const bool right_is_lt = is_less(array.get(right), pivot); 37*a738d81cSLukas Bergdoll array.swap(left, right); 38*a738d81cSLukas Bergdoll left += static_cast<size_t>(right_is_lt); 39*a738d81cSLukas Bergdoll right += 1; 40*a738d81cSLukas Bergdoll } 41*a738d81cSLukas Bergdoll 42*a738d81cSLukas Bergdoll return left; 43*a738d81cSLukas Bergdoll } 44*a738d81cSLukas Bergdoll 45*a738d81cSLukas Bergdoll // Optimized for large types that are expensive to move. Not optimized 46*a738d81cSLukas Bergdoll // for integers. It's possible to use a cyclic permutation here for 47*a738d81cSLukas Bergdoll // large types as done in ipnsort but the advantages of this are limited 48*a738d81cSLukas Bergdoll // as `is_less` is a small wrapper around a call to a function pointer 49*a738d81cSLukas Bergdoll // and won't incur much binary-size overhead. The other reason to use 50*a738d81cSLukas Bergdoll // cyclic permutation is to have more efficient swapping, but we don't 51*a738d81cSLukas Bergdoll // know the element size so this isn't applicable here either. 52*a738d81cSLukas Bergdoll template <typename A, typename F> 53*a738d81cSLukas Bergdoll LIBC_INLINE size_t partition_hoare_branchy(const A &array, const void *pivot, 54*a738d81cSLukas Bergdoll const F &is_less) { 55*a738d81cSLukas Bergdoll const size_t array_len = array.len(); 56*a738d81cSLukas Bergdoll 57*a738d81cSLukas Bergdoll size_t left = 0; 58*a738d81cSLukas Bergdoll size_t right = array_len; 59a6d2da8bSlntue 60a6d2da8bSlntue while (true) { 61*a738d81cSLukas Bergdoll while (left < right && is_less(array.get(left), pivot)) 62*a738d81cSLukas Bergdoll ++left; 63a6d2da8bSlntue 6458ef1eb0SSimon Tatham while (true) { 65*a738d81cSLukas Bergdoll --right; 66*a738d81cSLukas Bergdoll if (left >= right || is_less(array.get(right), pivot)) { 67*a738d81cSLukas Bergdoll break; 68*a738d81cSLukas Bergdoll } 69*a738d81cSLukas Bergdoll } 70*a738d81cSLukas Bergdoll 71*a738d81cSLukas Bergdoll if (left >= right) 72*a738d81cSLukas Bergdoll break; 73*a738d81cSLukas Bergdoll 74*a738d81cSLukas Bergdoll array.swap(left, right); 75*a738d81cSLukas Bergdoll ++left; 76*a738d81cSLukas Bergdoll } 77*a738d81cSLukas Bergdoll 78*a738d81cSLukas Bergdoll return left; 79*a738d81cSLukas Bergdoll } 80*a738d81cSLukas Bergdoll 81*a738d81cSLukas Bergdoll template <typename A, typename F> 82*a738d81cSLukas Bergdoll LIBC_INLINE size_t partition(const A &array, size_t pivot_index, 83*a738d81cSLukas Bergdoll const F &is_less) { 84*a738d81cSLukas Bergdoll // Place the pivot at the beginning of the array. 85*a738d81cSLukas Bergdoll if (pivot_index != 0) { 86*a738d81cSLukas Bergdoll array.swap(0, pivot_index); 87*a738d81cSLukas Bergdoll } 88*a738d81cSLukas Bergdoll 89*a738d81cSLukas Bergdoll const A array_without_pivot = array.make_array(1, array.len() - 1); 90*a738d81cSLukas Bergdoll const void *pivot = array.get(0); 91*a738d81cSLukas Bergdoll 92*a738d81cSLukas Bergdoll size_t num_lt; 93*a738d81cSLukas Bergdoll if constexpr (A::has_fixed_size()) { 94*a738d81cSLukas Bergdoll // Branchless Lomuto avoid branch misprediction penalties, but 95*a738d81cSLukas Bergdoll // it also swaps more often which is only faster if the swap is a fast 96*a738d81cSLukas Bergdoll // constant operation. 97*a738d81cSLukas Bergdoll num_lt = partition_lomuto_branchless(array_without_pivot, pivot, is_less); 98*a738d81cSLukas Bergdoll } else { 99*a738d81cSLukas Bergdoll num_lt = partition_hoare_branchy(array_without_pivot, pivot, is_less); 100*a738d81cSLukas Bergdoll } 101*a738d81cSLukas Bergdoll 102*a738d81cSLukas Bergdoll // Place the pivot between the two partitions. 103*a738d81cSLukas Bergdoll array.swap(0, num_lt); 104*a738d81cSLukas Bergdoll 105*a738d81cSLukas Bergdoll return num_lt; 106*a738d81cSLukas Bergdoll } 107*a738d81cSLukas Bergdoll 108*a738d81cSLukas Bergdoll template <typename A, typename F> 109*a738d81cSLukas Bergdoll LIBC_INLINE void quick_sort_impl(A &array, const void *ancestor_pivot, 110*a738d81cSLukas Bergdoll size_t limit, const F &is_less) { 111*a738d81cSLukas Bergdoll while (true) { 112*a738d81cSLukas Bergdoll const size_t array_len = array.len(); 113*a738d81cSLukas Bergdoll if (array_len <= 1) 114a6d2da8bSlntue return; 115*a738d81cSLukas Bergdoll 116*a738d81cSLukas Bergdoll // If too many bad pivot choices were made, simply fall back to 117*a738d81cSLukas Bergdoll // heapsort in order to guarantee `O(N x log(N))` worst-case. 118*a738d81cSLukas Bergdoll if (limit == 0) { 119*a738d81cSLukas Bergdoll heap_sort(array, is_less); 120*a738d81cSLukas Bergdoll return; 121*a738d81cSLukas Bergdoll } 122*a738d81cSLukas Bergdoll 123*a738d81cSLukas Bergdoll limit -= 1; 124*a738d81cSLukas Bergdoll 125*a738d81cSLukas Bergdoll const size_t pivot_index = choose_pivot(array, is_less); 126*a738d81cSLukas Bergdoll 127*a738d81cSLukas Bergdoll // If the chosen pivot is equal to the predecessor, then it's the smallest 128*a738d81cSLukas Bergdoll // element in the slice. Partition the slice into elements equal to and 129*a738d81cSLukas Bergdoll // elements greater than the pivot. This case is usually hit when the slice 130*a738d81cSLukas Bergdoll // contains many duplicate elements. 131*a738d81cSLukas Bergdoll if (ancestor_pivot) { 132*a738d81cSLukas Bergdoll if (!is_less(ancestor_pivot, array.get(pivot_index))) { 133*a738d81cSLukas Bergdoll const size_t num_lt = 134*a738d81cSLukas Bergdoll partition(array, pivot_index, 135*a738d81cSLukas Bergdoll [is_less](const void *a, const void *b) -> bool { 136*a738d81cSLukas Bergdoll return !is_less(b, a); 137*a738d81cSLukas Bergdoll }); 138*a738d81cSLukas Bergdoll 139*a738d81cSLukas Bergdoll // Continue sorting elements greater than the pivot. We know that 140*a738d81cSLukas Bergdoll // `num_lt` cont 141*a738d81cSLukas Bergdoll array.reset_bounds(num_lt + 1, array.len() - (num_lt + 1)); 142*a738d81cSLukas Bergdoll ancestor_pivot = nullptr; 143*a738d81cSLukas Bergdoll continue; 144*a738d81cSLukas Bergdoll } 145*a738d81cSLukas Bergdoll } 146*a738d81cSLukas Bergdoll 147*a738d81cSLukas Bergdoll size_t split_index = partition(array, pivot_index, is_less); 148*a738d81cSLukas Bergdoll 149*a738d81cSLukas Bergdoll if (array_len == 2) 150a6d2da8bSlntue // The partition operation sorts the two element array. 151a6d2da8bSlntue return; 15258ef1eb0SSimon Tatham 153*a738d81cSLukas Bergdoll // Split the array into `left`, `pivot`, and `right`. 154*a738d81cSLukas Bergdoll A left = array.make_array(0, split_index); 155*a738d81cSLukas Bergdoll const void *pivot = array.get(split_index); 156*a738d81cSLukas Bergdoll const size_t right_start = split_index + 1; 157*a738d81cSLukas Bergdoll A right = array.make_array(right_start, array.len() - right_start); 15858ef1eb0SSimon Tatham 159*a738d81cSLukas Bergdoll // Recurse into the left side. We have a fixed recursion limit, 160*a738d81cSLukas Bergdoll // testing shows no real benefit for recursing into the shorter 161*a738d81cSLukas Bergdoll // side. 162*a738d81cSLukas Bergdoll quick_sort_impl(left, ancestor_pivot, limit, is_less); 163*a738d81cSLukas Bergdoll 164*a738d81cSLukas Bergdoll // Continue with the right side. 165*a738d81cSLukas Bergdoll array = right; 166*a738d81cSLukas Bergdoll ancestor_pivot = pivot; 167a6d2da8bSlntue } 16858ef1eb0SSimon Tatham } 169*a738d81cSLukas Bergdoll 170*a738d81cSLukas Bergdoll constexpr size_t ilog2(size_t n) { return cpp::bit_width(n) - 1; } 171*a738d81cSLukas Bergdoll 172*a738d81cSLukas Bergdoll template <typename A, typename F> 173*a738d81cSLukas Bergdoll LIBC_INLINE void quick_sort(A &array, const F &is_less) { 174*a738d81cSLukas Bergdoll const void *ancestor_pivot = nullptr; 175*a738d81cSLukas Bergdoll // Limit the number of imbalanced partitions to `2 * floor(log2(len))`. 176*a738d81cSLukas Bergdoll // The binary OR by one is used to eliminate the zero-check in the logarithm. 177*a738d81cSLukas Bergdoll const size_t limit = 2 * ilog2((array.len() | 1)); 178*a738d81cSLukas Bergdoll quick_sort_impl(array, ancestor_pivot, limit, is_less); 179a6d2da8bSlntue } 180a6d2da8bSlntue 181a6d2da8bSlntue } // namespace internal 182a6d2da8bSlntue } // namespace LIBC_NAMESPACE_DECL 183a6d2da8bSlntue 184a6d2da8bSlntue #endif // LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H 185