1 //===-- Data structures for sorting routines --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H 10 #define LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H 11 12 #include "src/__support/CPP/cstddef.h" 13 #include "src/__support/macros/config.h" 14 15 #include <stdint.h> 16 17 namespace LIBC_NAMESPACE_DECL { 18 namespace internal { 19 20 class ArrayGenericSize { 21 cpp::byte *array_base; 22 size_t array_len; 23 size_t elem_size; 24 25 LIBC_INLINE cpp::byte *get_internal(size_t i) const { 26 return array_base + (i * elem_size); 27 } 28 29 public: 30 LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e) 31 : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s), 32 elem_size(e) {} 33 34 static constexpr bool has_fixed_size() { return false; } 35 36 LIBC_INLINE void *get(size_t i) const { return get_internal(i); } 37 38 LIBC_INLINE void swap(size_t i, size_t j) const { 39 // It's possible to use 8 byte blocks with `uint64_t`, but that 40 // generates more machine code as the remainder loop gets 41 // unrolled, plus 4 byte operations are more likely to be 42 // efficient on a wider variety of hardware. On x86 LLVM tends 43 // to unroll the block loop again into 2 16 byte swaps per 44 // iteration which is another reason that 4 byte blocks yields 45 // good performance even for big types. 46 using block_t = uint32_t; 47 constexpr size_t BLOCK_SIZE = sizeof(block_t); 48 49 alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE]; 50 51 cpp::byte *elem_i = get_internal(i); 52 cpp::byte *elem_j = get_internal(j); 53 54 const size_t elem_size_rem = elem_size % BLOCK_SIZE; 55 const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem); 56 57 while (elem_i != elem_i_block_end) { 58 __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE); 59 __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE); 60 __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE); 61 62 elem_i += BLOCK_SIZE; 63 elem_j += BLOCK_SIZE; 64 } 65 66 for (size_t n = 0; n < elem_size_rem; ++n) { 67 cpp::byte tmp = elem_i[n]; 68 elem_i[n] = elem_j[n]; 69 elem_j[n] = tmp; 70 } 71 } 72 73 LIBC_INLINE size_t len() const { return array_len; } 74 75 // Make an Array starting at index |i| and length |s|. 76 LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const { 77 return ArrayGenericSize(get_internal(i), s, elem_size); 78 } 79 80 // Reset this Array to point at a different interval of the same 81 // items starting at index |i|. 82 LIBC_INLINE void reset_bounds(size_t i, size_t s) { 83 array_base = get_internal(i); 84 array_len = s; 85 } 86 }; 87 88 // Having a specialized Array type for sorting that knows at 89 // compile-time what the size of the element is, allows for much more 90 // efficient swapping and for cheaper offset calculations. 91 template <size_t ELEM_SIZE> class ArrayFixedSize { 92 cpp::byte *array_base; 93 size_t array_len; 94 95 LIBC_INLINE cpp::byte *get_internal(size_t i) const { 96 return array_base + (i * ELEM_SIZE); 97 } 98 99 public: 100 LIBC_INLINE ArrayFixedSize(void *a, size_t s) 101 : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {} 102 103 // Beware this function is used a heuristic for cheap to swap types, so 104 // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad 105 // idea perf wise. 106 static constexpr bool has_fixed_size() { return true; } 107 108 LIBC_INLINE void *get(size_t i) const { return get_internal(i); } 109 110 LIBC_INLINE void swap(size_t i, size_t j) const { 111 alignas(32) cpp::byte tmp[ELEM_SIZE]; 112 113 cpp::byte *elem_i = get_internal(i); 114 cpp::byte *elem_j = get_internal(j); 115 116 __builtin_memcpy(tmp, elem_i, ELEM_SIZE); 117 __builtin_memmove(elem_i, elem_j, ELEM_SIZE); 118 __builtin_memcpy(elem_j, tmp, ELEM_SIZE); 119 } 120 121 LIBC_INLINE size_t len() const { return array_len; } 122 123 // Make an Array starting at index |i| and length |s|. 124 LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const { 125 return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s); 126 } 127 128 // Reset this Array to point at a different interval of the same 129 // items starting at index |i|. 130 LIBC_INLINE void reset_bounds(size_t i, size_t s) { 131 array_base = get_internal(i); 132 array_len = s; 133 } 134 }; 135 136 } // namespace internal 137 } // namespace LIBC_NAMESPACE_DECL 138 139 #endif // LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H 140