xref: /llvm-project/libc/src/stdlib/qsort_data.h (revision a738d81cd2822698539b0482af48d49d91ea5a2e)
1 //===-- Data structures for sorting routines --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H
10 #define LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H
11 
12 #include "src/__support/CPP/cstddef.h"
13 #include "src/__support/macros/config.h"
14 
15 #include <stdint.h>
16 
17 namespace LIBC_NAMESPACE_DECL {
18 namespace internal {
19 
20 class ArrayGenericSize {
21   cpp::byte *array_base;
22   size_t array_len;
23   size_t elem_size;
24 
25   LIBC_INLINE cpp::byte *get_internal(size_t i) const {
26     return array_base + (i * elem_size);
27   }
28 
29 public:
30   LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e)
31       : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
32         elem_size(e) {}
33 
34   static constexpr bool has_fixed_size() { return false; }
35 
36   LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
37 
38   LIBC_INLINE void swap(size_t i, size_t j) const {
39     // It's possible to use 8 byte blocks with `uint64_t`, but that
40     // generates more machine code as the remainder loop gets
41     // unrolled, plus 4 byte operations are more likely to be
42     // efficient on a wider variety of hardware. On x86 LLVM tends
43     // to unroll the block loop again into 2 16 byte swaps per
44     // iteration which is another reason that 4 byte blocks yields
45     // good performance even for big types.
46     using block_t = uint32_t;
47     constexpr size_t BLOCK_SIZE = sizeof(block_t);
48 
49     alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE];
50 
51     cpp::byte *elem_i = get_internal(i);
52     cpp::byte *elem_j = get_internal(j);
53 
54     const size_t elem_size_rem = elem_size % BLOCK_SIZE;
55     const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
56 
57     while (elem_i != elem_i_block_end) {
58       __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE);
59       __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE);
60       __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE);
61 
62       elem_i += BLOCK_SIZE;
63       elem_j += BLOCK_SIZE;
64     }
65 
66     for (size_t n = 0; n < elem_size_rem; ++n) {
67       cpp::byte tmp = elem_i[n];
68       elem_i[n] = elem_j[n];
69       elem_j[n] = tmp;
70     }
71   }
72 
73   LIBC_INLINE size_t len() const { return array_len; }
74 
75   // Make an Array starting at index |i| and length |s|.
76   LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const {
77     return ArrayGenericSize(get_internal(i), s, elem_size);
78   }
79 
80   // Reset this Array to point at a different interval of the same
81   // items starting at index |i|.
82   LIBC_INLINE void reset_bounds(size_t i, size_t s) {
83     array_base = get_internal(i);
84     array_len = s;
85   }
86 };
87 
88 // Having a specialized Array type for sorting that knows at
89 // compile-time what the size of the element is, allows for much more
90 // efficient swapping and for cheaper offset calculations.
91 template <size_t ELEM_SIZE> class ArrayFixedSize {
92   cpp::byte *array_base;
93   size_t array_len;
94 
95   LIBC_INLINE cpp::byte *get_internal(size_t i) const {
96     return array_base + (i * ELEM_SIZE);
97   }
98 
99 public:
100   LIBC_INLINE ArrayFixedSize(void *a, size_t s)
101       : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
102 
103   // Beware this function is used a heuristic for cheap to swap types, so
104   // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
105   // idea perf wise.
106   static constexpr bool has_fixed_size() { return true; }
107 
108   LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
109 
110   LIBC_INLINE void swap(size_t i, size_t j) const {
111     alignas(32) cpp::byte tmp[ELEM_SIZE];
112 
113     cpp::byte *elem_i = get_internal(i);
114     cpp::byte *elem_j = get_internal(j);
115 
116     __builtin_memcpy(tmp, elem_i, ELEM_SIZE);
117     __builtin_memmove(elem_i, elem_j, ELEM_SIZE);
118     __builtin_memcpy(elem_j, tmp, ELEM_SIZE);
119   }
120 
121   LIBC_INLINE size_t len() const { return array_len; }
122 
123   // Make an Array starting at index |i| and length |s|.
124   LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const {
125     return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s);
126   }
127 
128   // Reset this Array to point at a different interval of the same
129   // items starting at index |i|.
130   LIBC_INLINE void reset_bounds(size_t i, size_t s) {
131     array_base = get_internal(i);
132     array_len = s;
133   }
134 };
135 
136 } // namespace internal
137 } // namespace LIBC_NAMESPACE_DECL
138 
139 #endif // LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H
140