xref: /llvm-project/libc/src/string/memory_utils/op_generic.h (revision 3950e44eb0ad8d12705d9211c76d622c76f387e9)
1 //===-- Generic implementation of memory function building blocks ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides generic C++ building blocks.
10 // Depending on the requested size, the block operation uses unsigned integral
11 // types, vector types or an array of the type with the maximum size.
12 //
13 // The maximum size is passed as a template argument. For instance, on x86
14 // platforms that only supports integral types the maximum size would be 8
15 // (corresponding to uint64_t). On this platform if we request the size 32, this
16 // would be treated as a cpp::array<uint64_t, 4>.
17 //
18 // On the other hand, if the platform is x86 with support for AVX the maximum
19 // size is 32 and the operation can be handled with a single native operation.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
24 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
25 
26 #include "src/__support/CPP/array.h"
27 #include "src/__support/CPP/type_traits.h"
28 #include "src/__support/common.h"
29 #include "src/__support/endian.h"
30 #include "src/__support/macros/optimization.h"
31 #include "src/string/memory_utils/op_builtin.h"
32 #include "src/string/memory_utils/utils.h"
33 
34 #include <stdint.h>
35 
36 namespace __llvm_libc {
37 // Compiler types using the vector attributes.
38 using uint8x1_t = uint8_t __attribute__((__vector_size__(1)));
39 using uint8x2_t = uint8_t __attribute__((__vector_size__(2)));
40 using uint8x4_t = uint8_t __attribute__((__vector_size__(4)));
41 using uint8x8_t = uint8_t __attribute__((__vector_size__(8)));
42 using uint8x16_t = uint8_t __attribute__((__vector_size__(16)));
43 using uint8x32_t = uint8_t __attribute__((__vector_size__(32)));
44 using uint8x64_t = uint8_t __attribute__((__vector_size__(64)));
45 } // namespace __llvm_libc
46 
47 namespace __llvm_libc::generic {
48 // We accept three types of values as elements for generic operations:
49 // - scalar : unsigned integral types
50 // - vector : compiler types using the vector attributes
51 // - array  : a cpp::array<T, N> where T is itself either a scalar or a vector.
52 // The following traits help discriminate between these cases.
53 template <typename T>
54 constexpr bool is_scalar_v = cpp::is_integral_v<T> && cpp::is_unsigned_v<T>;
55 
56 template <typename T>
57 constexpr bool is_vector_v =
58     cpp::details::is_unqualified_any_of<T, uint8x1_t, uint8x2_t, uint8x4_t,
59                                         uint8x8_t, uint8x16_t, uint8x32_t,
60                                         uint8x64_t>();
61 
62 template <class T> struct is_array : cpp::false_type {};
63 template <class T, size_t N> struct is_array<cpp::array<T, N>> {
64   static constexpr bool value = is_scalar_v<T> || is_vector_v<T>;
65 };
66 template <typename T> constexpr bool is_array_v = is_array<T>::value;
67 
68 template <typename T>
69 constexpr bool is_element_type_v =
70     is_scalar_v<T> || is_vector_v<T> || is_array_v<T>;
71 
72 //
73 template <class T> struct array_size {};
74 template <class T, size_t N>
75 struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {};
76 template <typename T> constexpr size_t array_size_v = array_size<T>::value;
77 
78 // Generic operations for the above type categories.
79 
80 template <typename T> T load(CPtr src) {
81   static_assert(is_element_type_v<T>);
82   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
83     return ::__llvm_libc::load<T>(src);
84   } else if constexpr (is_array_v<T>) {
85     using value_type = typename T::value_type;
86     T Value;
87     for (size_t I = 0; I < array_size_v<T>; ++I)
88       Value[I] = load<value_type>(src + (I * sizeof(value_type)));
89     return Value;
90   }
91 }
92 
93 template <typename T> void store(Ptr dst, T value) {
94   static_assert(is_element_type_v<T>);
95   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
96     ::__llvm_libc::store<T>(dst, value);
97   } else if constexpr (is_array_v<T>) {
98     using value_type = typename T::value_type;
99     for (size_t I = 0; I < array_size_v<T>; ++I)
100       store<value_type>(dst + (I * sizeof(value_type)), value[I]);
101   }
102 }
103 
104 template <typename T> T splat(uint8_t value) {
105   static_assert(is_scalar_v<T> || is_vector_v<T>);
106   if constexpr (is_scalar_v<T>)
107     return T(~0) / T(0xFF) * T(value);
108   else if constexpr (is_vector_v<T>) {
109     T Out;
110     // This for loop is optimized out for vector types.
111     for (size_t i = 0; i < sizeof(T); ++i)
112       Out[i] = value;
113     return Out;
114   }
115 }
116 
117 static_assert((UINTPTR_MAX == 4294967295U) ||
118                   (UINTPTR_MAX == 18446744073709551615UL),
119               "We currently only support 32- or 64-bit platforms");
120 
121 #if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
122 #define LLVM_LIBC_HAS_UINT64
123 #endif
124 
125 namespace details {
126 // Checks that each type is sorted in strictly decreasing order of size.
127 // i.e. sizeof(First) > sizeof(Second) > ... > sizeof(Last)
128 template <typename First> constexpr bool is_decreasing_size() {
129   return sizeof(First) == 1;
130 }
131 template <typename First, typename Second, typename... Next>
132 constexpr bool is_decreasing_size() {
133   if constexpr (sizeof...(Next) > 0)
134     return sizeof(First) > sizeof(Second) && is_decreasing_size<Next...>();
135   else
136     return sizeof(First) > sizeof(Second) && is_decreasing_size<Second>();
137 }
138 
139 template <size_t Size, typename... Ts> struct Largest;
140 template <size_t Size> struct Largest<Size> : cpp::type_identity<uint8_t> {};
141 template <size_t Size, typename T, typename... Ts>
142 struct Largest<Size, T, Ts...> {
143   using next = Largest<Size, Ts...>;
144   using type = cpp::conditional_t<(Size >= sizeof(T)), T, typename next::type>;
145 };
146 
147 } // namespace details
148 
149 // 'SupportedTypes' holds a list of natively supported types.
150 // The types are instanciations of ScalarType or VectorType.
151 // They should be ordered in strictly decreasing order.
152 // The 'TypeFor<Size>' type retrieves is the largest supported type that can
153 // handle 'Size' bytes. e.g.
154 //
155 // using ST = SupportedTypes<ScalarType<uint16_t>, ScalarType<uint8_t>>;
156 // using Type = ST::TypeFor<10>;
157 // static_assert(cpp:is_same_v<Type, ScalarType<uint16_t>>);
158 
159 template <typename First, typename... Ts> struct SupportedTypes {
160   static_assert(details::is_decreasing_size<First, Ts...>());
161 
162   using MaxType = First;
163 
164   template <size_t Size>
165   using TypeFor = typename details::Largest<Size, First, Ts...>::type;
166 };
167 
168 // Returns the sum of the sizeof of all the TS types.
169 template <typename... TS> static constexpr size_t sum_sizeof() {
170   return (... + sizeof(TS));
171 }
172 
173 // Map from sizes to structures offering static load, store and splat methods.
174 // Note: On platforms lacking vector support, we use the ArrayType below and
175 // decompose the operation in smaller pieces.
176 
177 // Lists a generic native types to use for Memset and Memmove operations.
178 // TODO: Inject the native types within Memset and Memmove depending on the
179 // target architectures and derive MaxSize from it.
180 using NativeTypeMap = SupportedTypes<uint8x64_t, //
181                                      uint8x32_t, //
182                                      uint8x16_t,
183 #if defined(LLVM_LIBC_HAS_UINT64)
184                                      uint64_t, // Not available on 32bit
185 #endif
186                                      uint32_t, //
187                                      uint16_t, //
188                                      uint8_t>;
189 
190 namespace details {
191 
192 // Helper to test if a type is void.
193 template <typename T> inline constexpr bool is_void_v = cpp::is_same_v<T, void>;
194 
195 // In case the 'Size' is not supported we can fall back to a sequence of smaller
196 // operations using the largest natively supported type.
197 template <size_t Size, size_t MaxSize> static constexpr bool useArrayType() {
198   return (Size > MaxSize) && ((Size % MaxSize) == 0) &&
199          !details::is_void_v<NativeTypeMap::TypeFor<MaxSize>>;
200 }
201 
202 // Compute the type to handle an operation of 'Size' bytes knowing that the
203 // underlying platform only support native types up to MaxSize bytes.
204 template <size_t Size, size_t MaxSize>
205 using getTypeFor = cpp::conditional_t<
206     useArrayType<Size, MaxSize>(),
207     cpp::array<NativeTypeMap::TypeFor<MaxSize>, Size / MaxSize>,
208     NativeTypeMap::TypeFor<Size>>;
209 
210 } // namespace details
211 
212 ///////////////////////////////////////////////////////////////////////////////
213 // Memset
214 ///////////////////////////////////////////////////////////////////////////////
215 
216 template <typename T, typename... TS> struct Memset {
217   static constexpr size_t SIZE = sum_sizeof<T, TS...>();
218 
219   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
220     static_assert(is_element_type_v<T>);
221     if constexpr (is_scalar_v<T> || is_vector_v<T>) {
222       store<T>(dst, splat<T>(value));
223     } else if constexpr (is_array_v<T>) {
224       using value_type = typename T::value_type;
225       const auto Splat = splat<value_type>(value);
226       for (size_t I = 0; I < array_size_v<T>; ++I)
227         store<value_type>(dst + (I * sizeof(value_type)), Splat);
228     }
229     if constexpr (sizeof...(TS))
230       Memset<TS...>::block(dst + sizeof(T), value);
231   }
232 
233   LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) {
234     block(dst + count - SIZE, value);
235   }
236 
237   LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) {
238     block(dst, value);
239     tail(dst, value, count);
240   }
241 
242   LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
243     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
244     size_t offset = 0;
245     do {
246       block(dst + offset, value);
247       offset += SIZE;
248     } while (offset < count - SIZE);
249     tail(dst, value, count);
250   }
251 };
252 
253 ///////////////////////////////////////////////////////////////////////////////
254 // Memmove
255 ///////////////////////////////////////////////////////////////////////////////
256 
257 template <typename T> struct Memmove {
258   static constexpr size_t SIZE = sum_sizeof<T>();
259 
260   LIBC_INLINE static void block(Ptr dst, CPtr src) {
261     store<T>(dst, load<T>(src));
262   }
263 
264   LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
265     const size_t offset = count - SIZE;
266     // The load and store operations can be performed in any order as long as
267     // they are not interleaved. More investigations are needed to determine
268     // the best order.
269     const auto head = load<T>(src);
270     const auto tail = load<T>(src + offset);
271     store<T>(dst, head);
272     store<T>(dst + offset, tail);
273   }
274 
275   // Align forward suitable when dst < src. The alignment is performed with
276   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
277   //
278   // e.g. Moving two bytes forward, we make sure src is aligned.
279   // [  |       |       |       |      ]
280   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
281   // [____LLLLLLLL_____________________]
282   // [___________LLLLLLLA______________]
283   // [_SSSSSSSS________________________]
284   // [________SSSSSSSS_________________]
285   //
286   // e.g. Moving two bytes forward, we make sure dst is aligned.
287   // [  |       |       |       |      ]
288   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
289   // [____LLLLLLLL_____________________]
290   // [______LLLLLLLL___________________]
291   // [_SSSSSSSS________________________]
292   // [___SSSSSSSA______________________]
293   template <Arg AlignOn>
294   LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) {
295     Ptr prev_dst = dst;
296     CPtr prev_src = src;
297     size_t prev_count = count;
298     align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
299     adjust(SIZE, dst, src, count);
300     head_tail(prev_dst, prev_src, prev_count - count);
301   }
302 
303   // Align backward suitable when dst > src. The alignment is performed with
304   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
305   //
306   // e.g. Moving two bytes backward, we make sure src is aligned.
307   // [  |       |       |       |      ]
308   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
309   // [ _________________ALLLLLLL_______]
310   // [ ___________________LLLLLLLL_____]
311   // [____________________SSSSSSSS_____]
312   // [______________________SSSSSSSS___]
313   //
314   // e.g. Moving two bytes backward, we make sure dst is aligned.
315   // [  |       |       |       |      ]
316   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
317   // [ _______________LLLLLLLL_________]
318   // [ ___________________LLLLLLLL_____]
319   // [__________________ASSSSSSS_______]
320   // [______________________SSSSSSSS___]
321   template <Arg AlignOn>
322   LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) {
323     Ptr headtail_dst = dst + count;
324     CPtr headtail_src = src + count;
325     size_t headtail_size = 0;
326     align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
327                                           headtail_size);
328     adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
329     head_tail(headtail_dst, headtail_src, headtail_size);
330     count -= headtail_size;
331   }
332 
333   // Move forward suitable when dst < src. We load the tail bytes before
334   // handling the loop.
335   //
336   // e.g. Moving two bytes
337   // [   |       |       |       |       |]
338   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
339   // [_________________________LLLLLLLL___]
340   // [___LLLLLLLL_________________________]
341   // [_SSSSSSSS___________________________]
342   // [___________LLLLLLLL_________________]
343   // [_________SSSSSSSS___________________]
344   // [___________________LLLLLLLL_________]
345   // [_________________SSSSSSSS___________]
346   // [_______________________SSSSSSSS_____]
347   LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
348                                                 size_t count) {
349     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
350     const size_t tail_offset = count - SIZE;
351     const auto tail_value = load<T>(src + tail_offset);
352     size_t offset = 0;
353     LIBC_LOOP_NOUNROLL
354     do {
355       block(dst + offset, src + offset);
356       offset += SIZE;
357     } while (offset < count - SIZE);
358     store<T>(dst + tail_offset, tail_value);
359   }
360 
361   // Move backward suitable when dst > src. We load the head bytes before
362   // handling the loop.
363   //
364   // e.g. Moving two bytes
365   // [   |       |       |       |       |]
366   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
367   // [___LLLLLLLL_________________________]
368   // [_________________________LLLLLLLL___]
369   // [___________________________SSSSSSSS_]
370   // [_________________LLLLLLLL___________]
371   // [___________________SSSSSSSS_________]
372   // [_________LLLLLLLL___________________]
373   // [___________SSSSSSSS_________________]
374   // [_____SSSSSSSS_______________________]
375   LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
376                                                  size_t count) {
377     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
378     const auto head_value = load<T>(src);
379     ptrdiff_t offset = count - SIZE;
380     LIBC_LOOP_NOUNROLL
381     do {
382       block(dst + offset, src + offset);
383       offset -= SIZE;
384     } while (offset >= 0);
385     store<T>(dst, head_value);
386   }
387 };
388 
389 ///////////////////////////////////////////////////////////////////////////////
390 // Bcmp
391 ///////////////////////////////////////////////////////////////////////////////
392 template <size_t Size> struct Bcmp {
393   static constexpr size_t SIZE = Size;
394   static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64)
395                                         ? sizeof(uint64_t)
396                                         : sizeof(uint32_t);
397 
398   template <typename T> LIBC_INLINE static uint32_t load_xor(CPtr p1, CPtr p2) {
399     static_assert(sizeof(T) <= sizeof(uint32_t));
400     return load<T>(p1) ^ load<T>(p2);
401   }
402 
403   template <typename T>
404   LIBC_INLINE static uint32_t load_not_equal(CPtr p1, CPtr p2) {
405     return load<T>(p1) != load<T>(p2);
406   }
407 
408   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
409     if constexpr (Size == 1) {
410       return load_xor<uint8_t>(p1, p2);
411     } else if constexpr (Size == 2) {
412       return load_xor<uint16_t>(p1, p2);
413     } else if constexpr (Size == 4) {
414       return load_xor<uint32_t>(p1, p2);
415     } else if constexpr (Size == 8) {
416       return load_not_equal<uint64_t>(p1, p2);
417     } else if constexpr (details::useArrayType<Size, MaxSize>()) {
418       for (size_t offset = 0; offset < Size; offset += MaxSize)
419         if (auto value = Bcmp<MaxSize>::block(p1 + offset, p2 + offset))
420           return value;
421     } else {
422       deferred_static_assert("Unimplemented Size");
423     }
424     return BcmpReturnType::ZERO();
425   }
426 
427   LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
428     return block(p1 + count - SIZE, p2 + count - SIZE);
429   }
430 
431   LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
432     return block(p1, p2) | tail(p1, p2, count);
433   }
434 
435   LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
436                                                   size_t count) {
437     static_assert(Size > 1, "a loop of size 1 does not need tail");
438     size_t offset = 0;
439     do {
440       if (auto value = block(p1 + offset, p2 + offset))
441         return value;
442       offset += SIZE;
443     } while (offset < count - SIZE);
444     return tail(p1, p2, count);
445   }
446 };
447 
448 ///////////////////////////////////////////////////////////////////////////////
449 // Memcmp
450 ///////////////////////////////////////////////////////////////////////////////
451 template <size_t Size> struct Memcmp {
452   static constexpr size_t SIZE = Size;
453   static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64)
454                                         ? sizeof(uint64_t)
455                                         : sizeof(uint32_t);
456 
457   template <typename T> LIBC_INLINE static T load_be(CPtr ptr) {
458     return Endian::to_big_endian(load<T>(ptr));
459   }
460 
461   template <typename T>
462   LIBC_INLINE static MemcmpReturnType load_be_diff(CPtr p1, CPtr p2) {
463     return load_be<T>(p1) - load_be<T>(p2);
464   }
465 
466   template <typename T>
467   LIBC_INLINE static MemcmpReturnType load_be_cmp(CPtr p1, CPtr p2) {
468     const auto la = load_be<T>(p1);
469     const auto lb = load_be<T>(p2);
470     return la > lb ? 1 : la < lb ? -1 : 0;
471   }
472 
473   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
474     if constexpr (Size == 1) {
475       return load_be_diff<uint8_t>(p1, p2);
476     } else if constexpr (Size == 2) {
477       return load_be_diff<uint16_t>(p1, p2);
478     } else if constexpr (Size == 4) {
479       return load_be_cmp<uint32_t>(p1, p2);
480     } else if constexpr (Size == 8) {
481       return load_be_cmp<uint64_t>(p1, p2);
482     } else if constexpr (details::useArrayType<Size, MaxSize>()) {
483       for (size_t offset = 0; offset < Size; offset += MaxSize)
484         if (Bcmp<MaxSize>::block(p1 + offset, p2 + offset))
485           return Memcmp<MaxSize>::block(p1 + offset, p2 + offset);
486       return MemcmpReturnType::ZERO();
487     } else if constexpr (Size == 3) {
488       if (auto value = Memcmp<2>::block(p1, p2))
489         return value;
490       return Memcmp<1>::block(p1 + 2, p2 + 2);
491     } else {
492       deferred_static_assert("Unimplemented Size");
493     }
494   }
495 
496   LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
497     return block(p1 + count - SIZE, p2 + count - SIZE);
498   }
499 
500   LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2,
501                                                 size_t count) {
502     if (auto value = block(p1, p2))
503       return value;
504     return tail(p1, p2, count);
505   }
506 
507   LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
508                                                     size_t count) {
509     static_assert(Size > 1, "a loop of size 1 does not need tail");
510     size_t offset = 0;
511     do {
512       if (auto value = block(p1 + offset, p2 + offset))
513         return value;
514       offset += SIZE;
515     } while (offset < count - SIZE);
516     return tail(p1, p2, count);
517   }
518 };
519 
520 } // namespace __llvm_libc::generic
521 
522 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
523