xref: /llvm-project/libc/src/string/memory_utils/op_generic.h (revision 95b680e4c353d479fbfb96adb39696042c005e99)
169090143SGuillaume Chatelet //===-- Generic implementation of memory function building blocks ---------===//
269090143SGuillaume Chatelet //
369090143SGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
469090143SGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information.
569090143SGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
669090143SGuillaume Chatelet //
769090143SGuillaume Chatelet //===----------------------------------------------------------------------===//
869090143SGuillaume Chatelet //
969090143SGuillaume Chatelet // This file provides generic C++ building blocks.
1069090143SGuillaume Chatelet // Depending on the requested size, the block operation uses unsigned integral
1169090143SGuillaume Chatelet // types, vector types or an array of the type with the maximum size.
1269090143SGuillaume Chatelet //
1369090143SGuillaume Chatelet // The maximum size is passed as a template argument. For instance, on x86
1469090143SGuillaume Chatelet // platforms that only supports integral types the maximum size would be 8
1569090143SGuillaume Chatelet // (corresponding to uint64_t). On this platform if we request the size 32, this
1669090143SGuillaume Chatelet // would be treated as a cpp::array<uint64_t, 4>.
1769090143SGuillaume Chatelet //
1869090143SGuillaume Chatelet // On the other hand, if the platform is x86 with support for AVX the maximum
1969090143SGuillaume Chatelet // size is 32 and the operation can be handled with a single native operation.
2069090143SGuillaume Chatelet //
2169090143SGuillaume Chatelet //===----------------------------------------------------------------------===//
2269090143SGuillaume Chatelet 
2369090143SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
2469090143SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
2569090143SGuillaume Chatelet 
2669090143SGuillaume Chatelet #include "src/__support/CPP/array.h"
2769090143SGuillaume Chatelet #include "src/__support/CPP/type_traits.h"
28282fe508SGuillaume Chatelet #include "src/__support/common.h"
29*95b680e4SDaniel Thornburgh #include "src/__support/endian_internal.h"
305ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
31406b3f2cSGuillaume Chatelet #include "src/__support/macros/optimization.h"
32a84e66a9SGuillaume Chatelet #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT64
3369090143SGuillaume Chatelet #include "src/string/memory_utils/op_builtin.h"
3469090143SGuillaume Chatelet #include "src/string/memory_utils/utils.h"
3569090143SGuillaume Chatelet 
3669090143SGuillaume Chatelet #include <stdint.h>
3769090143SGuillaume Chatelet 
381c814c99SGuillaume Chatelet static_assert((UINTPTR_MAX == 4294967295U) ||
391c814c99SGuillaume Chatelet                   (UINTPTR_MAX == 18446744073709551615UL),
401c814c99SGuillaume Chatelet               "We currently only support 32- or 64-bit platforms");
411c814c99SGuillaume Chatelet 
425ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
4326c17f4bSGuillaume Chatelet // Compiler types using the vector attributes.
441c814c99SGuillaume Chatelet using generic_v128 = uint8_t __attribute__((__vector_size__(16)));
451c814c99SGuillaume Chatelet using generic_v256 = uint8_t __attribute__((__vector_size__(32)));
461c814c99SGuillaume Chatelet using generic_v512 = uint8_t __attribute__((__vector_size__(64)));
475ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
4869090143SGuillaume Chatelet 
495ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
505ff3ff33SPetr Hosek namespace generic {
511c814c99SGuillaume Chatelet 
5226c17f4bSGuillaume Chatelet // We accept three types of values as elements for generic operations:
531c814c99SGuillaume Chatelet // - scalar : unsigned integral types,
541c814c99SGuillaume Chatelet // - vector : compiler types using the vector attributes or platform builtins,
5526c17f4bSGuillaume Chatelet // - array  : a cpp::array<T, N> where T is itself either a scalar or a vector.
5626c17f4bSGuillaume Chatelet // The following traits help discriminate between these cases.
5793ac4493SGuillaume Chatelet 
581c814c99SGuillaume Chatelet template <typename T> struct is_scalar : cpp::false_type {};
591c814c99SGuillaume Chatelet template <> struct is_scalar<uint8_t> : cpp::true_type {};
601c814c99SGuillaume Chatelet template <> struct is_scalar<uint16_t> : cpp::true_type {};
611c814c99SGuillaume Chatelet template <> struct is_scalar<uint32_t> : cpp::true_type {};
62a84e66a9SGuillaume Chatelet #ifdef LIBC_TYPES_HAS_INT64
631c814c99SGuillaume Chatelet template <> struct is_scalar<uint64_t> : cpp::true_type {};
64a84e66a9SGuillaume Chatelet #endif // LIBC_TYPES_HAS_INT64
652aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface.
662aa22ca2SNick Desaulniers // NOLINTNEXTLINE(readability-identifier-naming)
671c814c99SGuillaume Chatelet template <typename T> constexpr bool is_scalar_v = is_scalar<T>::value;
681c814c99SGuillaume Chatelet 
691c814c99SGuillaume Chatelet template <typename T> struct is_vector : cpp::false_type {};
701c814c99SGuillaume Chatelet template <> struct is_vector<generic_v128> : cpp::true_type {};
711c814c99SGuillaume Chatelet template <> struct is_vector<generic_v256> : cpp::true_type {};
721c814c99SGuillaume Chatelet template <> struct is_vector<generic_v512> : cpp::true_type {};
732aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface.
742aa22ca2SNick Desaulniers // NOLINTNEXTLINE(readability-identifier-naming)
751c814c99SGuillaume Chatelet template <typename T> constexpr bool is_vector_v = is_vector<T>::value;
7626c17f4bSGuillaume Chatelet 
7726c17f4bSGuillaume Chatelet template <class T> struct is_array : cpp::false_type {};
7826c17f4bSGuillaume Chatelet template <class T, size_t N> struct is_array<cpp::array<T, N>> {
792aa22ca2SNick Desaulniers   // Meant to match std::numeric_limits interface.
802aa22ca2SNick Desaulniers   // NOLINTNEXTLINE(readability-identifier-naming)
8126c17f4bSGuillaume Chatelet   static constexpr bool value = is_scalar_v<T> || is_vector_v<T>;
8226c17f4bSGuillaume Chatelet };
832aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface.
842aa22ca2SNick Desaulniers // NOLINTNEXTLINE(readability-identifier-naming)
8526c17f4bSGuillaume Chatelet template <typename T> constexpr bool is_array_v = is_array<T>::value;
8626c17f4bSGuillaume Chatelet 
872aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface.
882aa22ca2SNick Desaulniers // NOLINTBEGIN(readability-identifier-naming)
8926c17f4bSGuillaume Chatelet template <typename T>
9026c17f4bSGuillaume Chatelet constexpr bool is_element_type_v =
9126c17f4bSGuillaume Chatelet     is_scalar_v<T> || is_vector_v<T> || is_array_v<T>;
922aa22ca2SNick Desaulniers // NOLINTEND(readability-identifier-naming)
9326c17f4bSGuillaume Chatelet 
941c814c99SGuillaume Chatelet // Helper struct to retrieve the number of elements of an array.
9526c17f4bSGuillaume Chatelet template <class T> struct array_size {};
9626c17f4bSGuillaume Chatelet template <class T, size_t N>
9726c17f4bSGuillaume Chatelet struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {};
982aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface.
992aa22ca2SNick Desaulniers // NOLINTNEXTLINE(readability-identifier-naming)
10026c17f4bSGuillaume Chatelet template <typename T> constexpr size_t array_size_v = array_size<T>::value;
10126c17f4bSGuillaume Chatelet 
10226c17f4bSGuillaume Chatelet // Generic operations for the above type categories.
10326c17f4bSGuillaume Chatelet 
10426c17f4bSGuillaume Chatelet template <typename T> T load(CPtr src) {
10526c17f4bSGuillaume Chatelet   static_assert(is_element_type_v<T>);
10626c17f4bSGuillaume Chatelet   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
107b6bc9d72SGuillaume Chatelet     return ::LIBC_NAMESPACE::load<T>(src);
10826c17f4bSGuillaume Chatelet   } else if constexpr (is_array_v<T>) {
10926c17f4bSGuillaume Chatelet     using value_type = typename T::value_type;
11088d82b74SNick Desaulniers     T value;
11188d82b74SNick Desaulniers     for (size_t i = 0; i < array_size_v<T>; ++i)
11288d82b74SNick Desaulniers       value[i] = load<value_type>(src + (i * sizeof(value_type)));
11388d82b74SNick Desaulniers     return value;
1146363320bSSiva Chandra Reddy   }
11569090143SGuillaume Chatelet }
11626c17f4bSGuillaume Chatelet 
11726c17f4bSGuillaume Chatelet template <typename T> void store(Ptr dst, T value) {
11826c17f4bSGuillaume Chatelet   static_assert(is_element_type_v<T>);
11926c17f4bSGuillaume Chatelet   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
120b6bc9d72SGuillaume Chatelet     ::LIBC_NAMESPACE::store<T>(dst, value);
12126c17f4bSGuillaume Chatelet   } else if constexpr (is_array_v<T>) {
12226c17f4bSGuillaume Chatelet     using value_type = typename T::value_type;
12388d82b74SNick Desaulniers     for (size_t i = 0; i < array_size_v<T>; ++i)
12488d82b74SNick Desaulniers       store<value_type>(dst + (i * sizeof(value_type)), value[i]);
12526c17f4bSGuillaume Chatelet   }
12626c17f4bSGuillaume Chatelet }
12726c17f4bSGuillaume Chatelet 
12826c17f4bSGuillaume Chatelet template <typename T> T splat(uint8_t value) {
12926c17f4bSGuillaume Chatelet   static_assert(is_scalar_v<T> || is_vector_v<T>);
13026c17f4bSGuillaume Chatelet   if constexpr (is_scalar_v<T>)
13126c17f4bSGuillaume Chatelet     return T(~0) / T(0xFF) * T(value);
13226c17f4bSGuillaume Chatelet   else if constexpr (is_vector_v<T>) {
13388d82b74SNick Desaulniers     T out;
13469090143SGuillaume Chatelet     // This for loop is optimized out for vector types.
13526c17f4bSGuillaume Chatelet     for (size_t i = 0; i < sizeof(T); ++i)
13688d82b74SNick Desaulniers       out[i] = value;
13788d82b74SNick Desaulniers     return out;
13869090143SGuillaume Chatelet   }
139917e0d9cSGuillaume Chatelet }
14069090143SGuillaume Chatelet 
14169090143SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
14269090143SGuillaume Chatelet // Memset
14369090143SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
1441a4696d9SGuillaume Chatelet 
145458734b8SGuillaume Chatelet template <typename T> struct Memset {
1461c814c99SGuillaume Chatelet   static_assert(is_element_type_v<T>);
147458734b8SGuillaume Chatelet   static constexpr size_t SIZE = sizeof(T);
14869090143SGuillaume Chatelet 
1496363320bSSiva Chandra Reddy   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
1501a4696d9SGuillaume Chatelet     if constexpr (is_scalar_v<T> || is_vector_v<T>) {
1511a4696d9SGuillaume Chatelet       store<T>(dst, splat<T>(value));
1521a4696d9SGuillaume Chatelet     } else if constexpr (is_array_v<T>) {
1531a4696d9SGuillaume Chatelet       using value_type = typename T::value_type;
1541a4696d9SGuillaume Chatelet       const auto Splat = splat<value_type>(value);
15588d82b74SNick Desaulniers       for (size_t i = 0; i < array_size_v<T>; ++i)
15688d82b74SNick Desaulniers         store<value_type>(dst + (i * sizeof(value_type)), Splat);
15769090143SGuillaume Chatelet     }
15869090143SGuillaume Chatelet   }
15969090143SGuillaume Chatelet 
1606363320bSSiva Chandra Reddy   LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) {
16169090143SGuillaume Chatelet     block(dst + count - SIZE, value);
16269090143SGuillaume Chatelet   }
16369090143SGuillaume Chatelet 
1646363320bSSiva Chandra Reddy   LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) {
16569090143SGuillaume Chatelet     block(dst, value);
16669090143SGuillaume Chatelet     tail(dst, value, count);
16769090143SGuillaume Chatelet   }
16869090143SGuillaume Chatelet 
1693153aa4cSdoshimili   LIBC_INLINE static void loop_and_tail_offset(Ptr dst, uint8_t value,
1703153aa4cSdoshimili                                                size_t count, size_t offset) {
1711a4696d9SGuillaume Chatelet     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
17269090143SGuillaume Chatelet     do {
17369090143SGuillaume Chatelet       block(dst + offset, value);
17469090143SGuillaume Chatelet       offset += SIZE;
17569090143SGuillaume Chatelet     } while (offset < count - SIZE);
17669090143SGuillaume Chatelet     tail(dst, value, count);
17769090143SGuillaume Chatelet   }
1783153aa4cSdoshimili 
1793153aa4cSdoshimili   LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
1803153aa4cSdoshimili     return loop_and_tail_offset(dst, value, count, 0);
1813153aa4cSdoshimili   }
18269090143SGuillaume Chatelet };
18369090143SGuillaume Chatelet 
184458734b8SGuillaume Chatelet template <typename T, typename... TS> struct MemsetSequence {
185458734b8SGuillaume Chatelet   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
186458734b8SGuillaume Chatelet   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
187458734b8SGuillaume Chatelet     Memset<T>::block(dst, value);
1881c814c99SGuillaume Chatelet     if constexpr (sizeof...(TS) > 0)
189458734b8SGuillaume Chatelet       return MemsetSequence<TS...>::block(dst + sizeof(T), value);
190458734b8SGuillaume Chatelet   }
191458734b8SGuillaume Chatelet };
192458734b8SGuillaume Chatelet 
19369090143SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
19469090143SGuillaume Chatelet // Memmove
19569090143SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
19669090143SGuillaume Chatelet 
197355a5d5eSGuillaume Chatelet template <typename T> struct Memmove {
1981c814c99SGuillaume Chatelet   static_assert(is_element_type_v<T>);
199458734b8SGuillaume Chatelet   static constexpr size_t SIZE = sizeof(T);
20069090143SGuillaume Chatelet 
2016363320bSSiva Chandra Reddy   LIBC_INLINE static void block(Ptr dst, CPtr src) {
20226c17f4bSGuillaume Chatelet     store<T>(dst, load<T>(src));
20369090143SGuillaume Chatelet   }
20469090143SGuillaume Chatelet 
2056363320bSSiva Chandra Reddy   LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
206355a5d5eSGuillaume Chatelet     const size_t offset = count - SIZE;
20769090143SGuillaume Chatelet     // The load and store operations can be performed in any order as long as
20869090143SGuillaume Chatelet     // they are not interleaved. More investigations are needed to determine
20969090143SGuillaume Chatelet     // the best order.
21026c17f4bSGuillaume Chatelet     const auto head = load<T>(src);
21126c17f4bSGuillaume Chatelet     const auto tail = load<T>(src + offset);
21226c17f4bSGuillaume Chatelet     store<T>(dst, head);
21326c17f4bSGuillaume Chatelet     store<T>(dst + offset, tail);
21469090143SGuillaume Chatelet   }
21569090143SGuillaume Chatelet 
21669090143SGuillaume Chatelet   // Align forward suitable when dst < src. The alignment is performed with
21769090143SGuillaume Chatelet   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
21869090143SGuillaume Chatelet   //
21969090143SGuillaume Chatelet   // e.g. Moving two bytes forward, we make sure src is aligned.
22069090143SGuillaume Chatelet   // [  |       |       |       |      ]
22169090143SGuillaume Chatelet   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
22269090143SGuillaume Chatelet   // [____LLLLLLLL_____________________]
22369090143SGuillaume Chatelet   // [___________LLLLLLLA______________]
22469090143SGuillaume Chatelet   // [_SSSSSSSS________________________]
22569090143SGuillaume Chatelet   // [________SSSSSSSS_________________]
22669090143SGuillaume Chatelet   //
22769090143SGuillaume Chatelet   // e.g. Moving two bytes forward, we make sure dst is aligned.
22869090143SGuillaume Chatelet   // [  |       |       |       |      ]
22969090143SGuillaume Chatelet   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
23069090143SGuillaume Chatelet   // [____LLLLLLLL_____________________]
23169090143SGuillaume Chatelet   // [______LLLLLLLL___________________]
23269090143SGuillaume Chatelet   // [_SSSSSSSS________________________]
23369090143SGuillaume Chatelet   // [___SSSSSSSA______________________]
23469090143SGuillaume Chatelet   template <Arg AlignOn>
2356363320bSSiva Chandra Reddy   LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) {
23669090143SGuillaume Chatelet     Ptr prev_dst = dst;
23769090143SGuillaume Chatelet     CPtr prev_src = src;
23869090143SGuillaume Chatelet     size_t prev_count = count;
239355a5d5eSGuillaume Chatelet     align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
240355a5d5eSGuillaume Chatelet     adjust(SIZE, dst, src, count);
24169090143SGuillaume Chatelet     head_tail(prev_dst, prev_src, prev_count - count);
24269090143SGuillaume Chatelet   }
24369090143SGuillaume Chatelet 
24469090143SGuillaume Chatelet   // Align backward suitable when dst > src. The alignment is performed with
24569090143SGuillaume Chatelet   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
24669090143SGuillaume Chatelet   //
24769090143SGuillaume Chatelet   // e.g. Moving two bytes backward, we make sure src is aligned.
24869090143SGuillaume Chatelet   // [  |       |       |       |      ]
24969090143SGuillaume Chatelet   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
25069090143SGuillaume Chatelet   // [ _________________ALLLLLLL_______]
25169090143SGuillaume Chatelet   // [ ___________________LLLLLLLL_____]
25269090143SGuillaume Chatelet   // [____________________SSSSSSSS_____]
25369090143SGuillaume Chatelet   // [______________________SSSSSSSS___]
25469090143SGuillaume Chatelet   //
25569090143SGuillaume Chatelet   // e.g. Moving two bytes backward, we make sure dst is aligned.
25669090143SGuillaume Chatelet   // [  |       |       |       |      ]
25769090143SGuillaume Chatelet   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
25869090143SGuillaume Chatelet   // [ _______________LLLLLLLL_________]
25969090143SGuillaume Chatelet   // [ ___________________LLLLLLLL_____]
26069090143SGuillaume Chatelet   // [__________________ASSSSSSS_______]
26169090143SGuillaume Chatelet   // [______________________SSSSSSSS___]
26269090143SGuillaume Chatelet   template <Arg AlignOn>
2636363320bSSiva Chandra Reddy   LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) {
26469090143SGuillaume Chatelet     Ptr headtail_dst = dst + count;
26569090143SGuillaume Chatelet     CPtr headtail_src = src + count;
26669090143SGuillaume Chatelet     size_t headtail_size = 0;
267355a5d5eSGuillaume Chatelet     align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
26869090143SGuillaume Chatelet                                           headtail_size);
269355a5d5eSGuillaume Chatelet     adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
27069090143SGuillaume Chatelet     head_tail(headtail_dst, headtail_src, headtail_size);
27169090143SGuillaume Chatelet     count -= headtail_size;
27269090143SGuillaume Chatelet   }
27369090143SGuillaume Chatelet 
27469090143SGuillaume Chatelet   // Move forward suitable when dst < src. We load the tail bytes before
27569090143SGuillaume Chatelet   // handling the loop.
27669090143SGuillaume Chatelet   //
27769090143SGuillaume Chatelet   // e.g. Moving two bytes
27869090143SGuillaume Chatelet   // [   |       |       |       |       |]
27969090143SGuillaume Chatelet   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
28069090143SGuillaume Chatelet   // [_________________________LLLLLLLL___]
28169090143SGuillaume Chatelet   // [___LLLLLLLL_________________________]
28269090143SGuillaume Chatelet   // [_SSSSSSSS___________________________]
28369090143SGuillaume Chatelet   // [___________LLLLLLLL_________________]
28469090143SGuillaume Chatelet   // [_________SSSSSSSS___________________]
28569090143SGuillaume Chatelet   // [___________________LLLLLLLL_________]
28669090143SGuillaume Chatelet   // [_________________SSSSSSSS___________]
28769090143SGuillaume Chatelet   // [_______________________SSSSSSSS_____]
2886363320bSSiva Chandra Reddy   LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
2896363320bSSiva Chandra Reddy                                                 size_t count) {
290355a5d5eSGuillaume Chatelet     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
291355a5d5eSGuillaume Chatelet     const size_t tail_offset = count - SIZE;
29226c17f4bSGuillaume Chatelet     const auto tail_value = load<T>(src + tail_offset);
29369090143SGuillaume Chatelet     size_t offset = 0;
294f6d99722SGuillaume Chatelet     LIBC_LOOP_NOUNROLL
29569090143SGuillaume Chatelet     do {
29669090143SGuillaume Chatelet       block(dst + offset, src + offset);
297355a5d5eSGuillaume Chatelet       offset += SIZE;
298355a5d5eSGuillaume Chatelet     } while (offset < count - SIZE);
29926c17f4bSGuillaume Chatelet     store<T>(dst + tail_offset, tail_value);
30069090143SGuillaume Chatelet   }
30169090143SGuillaume Chatelet 
30269090143SGuillaume Chatelet   // Move backward suitable when dst > src. We load the head bytes before
30369090143SGuillaume Chatelet   // handling the loop.
30469090143SGuillaume Chatelet   //
30569090143SGuillaume Chatelet   // e.g. Moving two bytes
30669090143SGuillaume Chatelet   // [   |       |       |       |       |]
30769090143SGuillaume Chatelet   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
30869090143SGuillaume Chatelet   // [___LLLLLLLL_________________________]
30969090143SGuillaume Chatelet   // [_________________________LLLLLLLL___]
31069090143SGuillaume Chatelet   // [___________________________SSSSSSSS_]
31169090143SGuillaume Chatelet   // [_________________LLLLLLLL___________]
31269090143SGuillaume Chatelet   // [___________________SSSSSSSS_________]
31369090143SGuillaume Chatelet   // [_________LLLLLLLL___________________]
31469090143SGuillaume Chatelet   // [___________SSSSSSSS_________________]
31569090143SGuillaume Chatelet   // [_____SSSSSSSS_______________________]
3166363320bSSiva Chandra Reddy   LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
3176363320bSSiva Chandra Reddy                                                  size_t count) {
318355a5d5eSGuillaume Chatelet     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
31926c17f4bSGuillaume Chatelet     const auto head_value = load<T>(src);
320355a5d5eSGuillaume Chatelet     ptrdiff_t offset = count - SIZE;
321f6d99722SGuillaume Chatelet     LIBC_LOOP_NOUNROLL
32269090143SGuillaume Chatelet     do {
32369090143SGuillaume Chatelet       block(dst + offset, src + offset);
324355a5d5eSGuillaume Chatelet       offset -= SIZE;
32569090143SGuillaume Chatelet     } while (offset >= 0);
32626c17f4bSGuillaume Chatelet     store<T>(dst, head_value);
32769090143SGuillaume Chatelet   }
32869090143SGuillaume Chatelet };
32969090143SGuillaume Chatelet 
330917e0d9cSGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
3311c814c99SGuillaume Chatelet // Low level operations for Bcmp and Memcmp that operate on memory locations.
3329ec6ebd3SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
333e49a6085SGuillaume Chatelet 
3341c814c99SGuillaume Chatelet // Same as load above but with an offset to the pointer.
3351c814c99SGuillaume Chatelet // Making the offset explicit hints the compiler to use relevant addressing mode
3361c814c99SGuillaume Chatelet // consistently.
3375bf8efd2SRoland McGrath template <typename T> LIBC_INLINE T load(CPtr ptr, size_t offset) {
338b6bc9d72SGuillaume Chatelet   return ::LIBC_NAMESPACE::load<T>(ptr + offset);
339e49a6085SGuillaume Chatelet }
340e49a6085SGuillaume Chatelet 
3411c814c99SGuillaume Chatelet // Same as above but also makes sure the loaded value is in big endian format.
3421c814c99SGuillaume Chatelet // This is useful when implementing lexicograhic comparisons as big endian
3431c814c99SGuillaume Chatelet // scalar comparison directly maps to lexicographic byte comparisons.
3445bf8efd2SRoland McGrath template <typename T> LIBC_INLINE T load_be(CPtr ptr, size_t offset) {
3451c814c99SGuillaume Chatelet   return Endian::to_big_endian(load<T>(ptr, offset));
3461c814c99SGuillaume Chatelet }
3471c814c99SGuillaume Chatelet 
3481c814c99SGuillaume Chatelet // Equality: returns true iff values at locations (p1 + offset) and (p2 +
3491c814c99SGuillaume Chatelet // offset) compare equal.
3505bf8efd2SRoland McGrath template <typename T> LIBC_INLINE bool eq(CPtr p1, CPtr p2, size_t offset);
3511c814c99SGuillaume Chatelet 
3521c814c99SGuillaume Chatelet // Not equals: returns non-zero iff values at locations (p1 + offset) and (p2 +
3531c814c99SGuillaume Chatelet // offset) differ.
3545bf8efd2SRoland McGrath template <typename T> LIBC_INLINE uint32_t neq(CPtr p1, CPtr p2, size_t offset);
3551c814c99SGuillaume Chatelet 
3561c814c99SGuillaume Chatelet // Lexicographic comparison:
3571c814c99SGuillaume Chatelet // - returns 0 iff values at locations (p1 + offset) and (p2 + offset) compare
3581c814c99SGuillaume Chatelet //   equal.
3591c814c99SGuillaume Chatelet // - returns a negative value if value at location (p1 + offset) is
3601c814c99SGuillaume Chatelet //   lexicographically less than value at (p2 + offset).
3611c814c99SGuillaume Chatelet // - returns a positive value if value at location (p1 + offset) is
3621c814c99SGuillaume Chatelet //   lexicographically greater than value at (p2 + offset).
363bd1cba9fSGuillaume Chatelet template <typename T>
3645bf8efd2SRoland McGrath LIBC_INLINE MemcmpReturnType cmp(CPtr p1, CPtr p2, size_t offset);
3655e32765cSGuillaume Chatelet 
3661c814c99SGuillaume Chatelet // Lexicographic comparison of non-equal values:
3671c814c99SGuillaume Chatelet // - returns a negative value if value at location (p1 + offset) is
3681c814c99SGuillaume Chatelet //   lexicographically less than value at (p2 + offset).
3691c814c99SGuillaume Chatelet // - returns a positive value if value at location (p1 + offset) is
3701c814c99SGuillaume Chatelet //   lexicographically greater than value at (p2 + offset).
3711c814c99SGuillaume Chatelet template <typename T>
3725bf8efd2SRoland McGrath LIBC_INLINE MemcmpReturnType cmp_neq(CPtr p1, CPtr p2, size_t offset);
373bd1cba9fSGuillaume Chatelet 
374bd1cba9fSGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
3751c814c99SGuillaume Chatelet // Memcmp implementation
3761c814c99SGuillaume Chatelet //
3771c814c99SGuillaume Chatelet // When building memcmp, not all types are considered equals.
3781c814c99SGuillaume Chatelet //
3791c814c99SGuillaume Chatelet // For instance, the lexicographic comparison of two uint8_t can be implemented
3801c814c99SGuillaume Chatelet // as a simple subtraction, but for wider operations the logic can be much more
3811c814c99SGuillaume Chatelet // involving, especially on little endian platforms.
3821c814c99SGuillaume Chatelet //
3831c814c99SGuillaume Chatelet // For such wider types it is a good strategy to test for equality first and
3841c814c99SGuillaume Chatelet // only do the expensive lexicographic comparison if necessary.
3851c814c99SGuillaume Chatelet //
3861c814c99SGuillaume Chatelet // Decomposing the algorithm like this for wider types allows us to have
3871c814c99SGuillaume Chatelet // efficient implementation of higher order functions like 'head_tail' or
3881c814c99SGuillaume Chatelet // 'loop_and_tail'.
389bd1cba9fSGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
390bd1cba9fSGuillaume Chatelet 
3911c814c99SGuillaume Chatelet // Type traits to decide whether we can use 'cmp' directly or if we need to
3921c814c99SGuillaume Chatelet // split the computation.
3931c814c99SGuillaume Chatelet template <typename T> struct cmp_is_expensive;
394bd1cba9fSGuillaume Chatelet 
3951c814c99SGuillaume Chatelet template <typename T> struct Memcmp {
3961c814c99SGuillaume Chatelet   static_assert(is_element_type_v<T>);
3971c814c99SGuillaume Chatelet   static constexpr size_t SIZE = sizeof(T);
398bd1cba9fSGuillaume Chatelet 
3991c814c99SGuillaume Chatelet private:
4001c814c99SGuillaume Chatelet   LIBC_INLINE static MemcmpReturnType block_offset(CPtr p1, CPtr p2,
4011c814c99SGuillaume Chatelet                                                    size_t offset) {
4021c814c99SGuillaume Chatelet     if constexpr (cmp_is_expensive<T>::value) {
4031c814c99SGuillaume Chatelet       if (!eq<T>(p1, p2, offset))
4041c814c99SGuillaume Chatelet         return cmp_neq<T>(p1, p2, offset);
4056f8d826bSNick Desaulniers       return MemcmpReturnType::zero();
406bd1cba9fSGuillaume Chatelet     } else {
4071c814c99SGuillaume Chatelet       return cmp<T>(p1, p2, offset);
408bd1cba9fSGuillaume Chatelet     }
4095e32765cSGuillaume Chatelet   }
4105e32765cSGuillaume Chatelet 
4111c814c99SGuillaume Chatelet public:
4121c814c99SGuillaume Chatelet   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
4131c814c99SGuillaume Chatelet     return block_offset(p1, p2, 0);
4141c814c99SGuillaume Chatelet   }
4151c814c99SGuillaume Chatelet 
416e49a6085SGuillaume Chatelet   LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
4171c814c99SGuillaume Chatelet     return block_offset(p1, p2, count - SIZE);
418e49a6085SGuillaume Chatelet   }
419e49a6085SGuillaume Chatelet 
420e49a6085SGuillaume Chatelet   LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2,
421e49a6085SGuillaume Chatelet                                                 size_t count) {
4221c814c99SGuillaume Chatelet     if constexpr (cmp_is_expensive<T>::value) {
4231c814c99SGuillaume Chatelet       if (!eq<T>(p1, p2, 0))
4241c814c99SGuillaume Chatelet         return cmp_neq<T>(p1, p2, 0);
4251c814c99SGuillaume Chatelet     } else {
4261c814c99SGuillaume Chatelet       if (const auto value = cmp<T>(p1, p2, 0))
4279ec6ebd3SGuillaume Chatelet         return value;
4281c814c99SGuillaume Chatelet     }
429e49a6085SGuillaume Chatelet     return tail(p1, p2, count);
4309ec6ebd3SGuillaume Chatelet   }
4319ec6ebd3SGuillaume Chatelet 
432e49a6085SGuillaume Chatelet   LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
433e49a6085SGuillaume Chatelet                                                     size_t count) {
4341c814c99SGuillaume Chatelet     return loop_and_tail_offset(p1, p2, count, 0);
4351c814c99SGuillaume Chatelet   }
4361c814c99SGuillaume Chatelet 
4371c814c99SGuillaume Chatelet   LIBC_INLINE static MemcmpReturnType
4381c814c99SGuillaume Chatelet   loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
4391c814c99SGuillaume Chatelet     if constexpr (SIZE > 1) {
4401c814c99SGuillaume Chatelet       const size_t limit = count - SIZE;
4411c814c99SGuillaume Chatelet       LIBC_LOOP_NOUNROLL
4421c814c99SGuillaume Chatelet       for (; offset < limit; offset += SIZE) {
4431c814c99SGuillaume Chatelet         if constexpr (cmp_is_expensive<T>::value) {
4441c814c99SGuillaume Chatelet           if (!eq<T>(p1, p2, offset))
4451c814c99SGuillaume Chatelet             return cmp_neq<T>(p1, p2, offset);
4461c814c99SGuillaume Chatelet         } else {
4471c814c99SGuillaume Chatelet           if (const auto value = cmp<T>(p1, p2, offset))
4489ec6ebd3SGuillaume Chatelet             return value;
4491c814c99SGuillaume Chatelet         }
4501c814c99SGuillaume Chatelet       }
4511c814c99SGuillaume Chatelet       return block_offset(p1, p2, limit); // tail
4521c814c99SGuillaume Chatelet     } else {
4531c814c99SGuillaume Chatelet       // No need for a tail operation when SIZE == 1.
4541c814c99SGuillaume Chatelet       LIBC_LOOP_NOUNROLL
4551c814c99SGuillaume Chatelet       for (; offset < count; offset += SIZE)
4561c814c99SGuillaume Chatelet         if (auto value = cmp<T>(p1, p2, offset))
4571c814c99SGuillaume Chatelet           return value;
4586f8d826bSNick Desaulniers       return MemcmpReturnType::zero();
4591c814c99SGuillaume Chatelet     }
4601c814c99SGuillaume Chatelet   }
4611c814c99SGuillaume Chatelet 
4621c814c99SGuillaume Chatelet   LIBC_INLINE static MemcmpReturnType
4631c814c99SGuillaume Chatelet   loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
4641c814c99SGuillaume Chatelet     const AlignHelper<sizeof(T)> helper(p1);
4651c814c99SGuillaume Chatelet     if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
4661c814c99SGuillaume Chatelet       if (auto value = block(p1, p2))
4671c814c99SGuillaume Chatelet         return value;
468640c8574SNick Desaulniers       adjust(helper.offset, p1, p2, count);
4691c814c99SGuillaume Chatelet     }
4701c814c99SGuillaume Chatelet     return loop_and_tail(p1, p2, count);
4715e32765cSGuillaume Chatelet   }
4725e32765cSGuillaume Chatelet };
4735e32765cSGuillaume Chatelet 
4741c814c99SGuillaume Chatelet template <typename T, typename... TS> struct MemcmpSequence {
4751c814c99SGuillaume Chatelet   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
4761c814c99SGuillaume Chatelet   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
4771c814c99SGuillaume Chatelet     // TODO: test suggestion in
4781c814c99SGuillaume Chatelet     // https://reviews.llvm.org/D148717?id=515724#inline-1446890
4791c814c99SGuillaume Chatelet     // once we have a proper way to check memory operation latency.
4801c814c99SGuillaume Chatelet     if constexpr (cmp_is_expensive<T>::value) {
4811c814c99SGuillaume Chatelet       if (!eq<T>(p1, p2, 0))
4821c814c99SGuillaume Chatelet         return cmp_neq<T>(p1, p2, 0);
4831c814c99SGuillaume Chatelet     } else {
4841c814c99SGuillaume Chatelet       if (auto value = cmp<T>(p1, p2, 0))
4851c814c99SGuillaume Chatelet         return value;
4861c814c99SGuillaume Chatelet     }
4871c814c99SGuillaume Chatelet     if constexpr (sizeof...(TS) > 0)
4881c814c99SGuillaume Chatelet       return MemcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
4891c814c99SGuillaume Chatelet     else
4906f8d826bSNick Desaulniers       return MemcmpReturnType::zero();
4911c814c99SGuillaume Chatelet   }
4921c814c99SGuillaume Chatelet };
4931c814c99SGuillaume Chatelet 
4941c814c99SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
4951c814c99SGuillaume Chatelet // Bcmp
4961c814c99SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
4971c814c99SGuillaume Chatelet template <typename T> struct Bcmp {
4981c814c99SGuillaume Chatelet   static_assert(is_element_type_v<T>);
4991c814c99SGuillaume Chatelet   static constexpr size_t SIZE = sizeof(T);
5001c814c99SGuillaume Chatelet 
5011c814c99SGuillaume Chatelet   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
5021c814c99SGuillaume Chatelet     return neq<T>(p1, p2, 0);
5031c814c99SGuillaume Chatelet   }
5041c814c99SGuillaume Chatelet 
5051c814c99SGuillaume Chatelet   LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
5061c814c99SGuillaume Chatelet     const size_t tail_offset = count - SIZE;
5071c814c99SGuillaume Chatelet     return neq<T>(p1, p2, tail_offset);
5081c814c99SGuillaume Chatelet   }
5091c814c99SGuillaume Chatelet 
5101c814c99SGuillaume Chatelet   LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
5111c814c99SGuillaume Chatelet     if (const auto value = neq<T>(p1, p2, 0))
5121c814c99SGuillaume Chatelet       return value;
5131c814c99SGuillaume Chatelet     return tail(p1, p2, count);
5141c814c99SGuillaume Chatelet   }
5151c814c99SGuillaume Chatelet 
5161c814c99SGuillaume Chatelet   LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
5171c814c99SGuillaume Chatelet                                                   size_t count) {
5181c814c99SGuillaume Chatelet     return loop_and_tail_offset(p1, p2, count, 0);
5191c814c99SGuillaume Chatelet   }
5201c814c99SGuillaume Chatelet 
5211c814c99SGuillaume Chatelet   LIBC_INLINE static BcmpReturnType
5221c814c99SGuillaume Chatelet   loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
5231c814c99SGuillaume Chatelet     if constexpr (SIZE > 1) {
5241c814c99SGuillaume Chatelet       const size_t limit = count - SIZE;
5251c814c99SGuillaume Chatelet       LIBC_LOOP_NOUNROLL
5261c814c99SGuillaume Chatelet       for (; offset < limit; offset += SIZE)
5271c814c99SGuillaume Chatelet         if (const auto value = neq<T>(p1, p2, offset))
5281c814c99SGuillaume Chatelet           return value;
5291c814c99SGuillaume Chatelet       return tail(p1, p2, count);
5301c814c99SGuillaume Chatelet     } else {
5311c814c99SGuillaume Chatelet       // No need for a tail operation when SIZE == 1.
5321c814c99SGuillaume Chatelet       LIBC_LOOP_NOUNROLL
5331c814c99SGuillaume Chatelet       for (; offset < count; offset += SIZE)
5341c814c99SGuillaume Chatelet         if (const auto value = neq<T>(p1, p2, offset))
5351c814c99SGuillaume Chatelet           return value;
5366f8d826bSNick Desaulniers       return BcmpReturnType::zero();
5371c814c99SGuillaume Chatelet     }
5381c814c99SGuillaume Chatelet   }
5391c814c99SGuillaume Chatelet 
5401c814c99SGuillaume Chatelet   LIBC_INLINE static BcmpReturnType
5411c814c99SGuillaume Chatelet   loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
5421c814c99SGuillaume Chatelet     static_assert(SIZE > 1,
5431c814c99SGuillaume Chatelet                   "No need to align when processing one byte at a time");
5441c814c99SGuillaume Chatelet     const AlignHelper<sizeof(T)> helper(p1);
5451c814c99SGuillaume Chatelet     if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
5461c814c99SGuillaume Chatelet       if (auto value = block(p1, p2))
5471c814c99SGuillaume Chatelet         return value;
548640c8574SNick Desaulniers       adjust(helper.offset, p1, p2, count);
5491c814c99SGuillaume Chatelet     }
5501c814c99SGuillaume Chatelet     return loop_and_tail(p1, p2, count);
5511c814c99SGuillaume Chatelet   }
5521c814c99SGuillaume Chatelet };
5531c814c99SGuillaume Chatelet 
5541c814c99SGuillaume Chatelet template <typename T, typename... TS> struct BcmpSequence {
5551c814c99SGuillaume Chatelet   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
5561c814c99SGuillaume Chatelet   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
5571c814c99SGuillaume Chatelet     if (auto value = neq<T>(p1, p2, 0))
5581c814c99SGuillaume Chatelet       return value;
5591c814c99SGuillaume Chatelet     if constexpr (sizeof...(TS) > 0)
5601c814c99SGuillaume Chatelet       return BcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
5611c814c99SGuillaume Chatelet     else
5626f8d826bSNick Desaulniers       return BcmpReturnType::zero();
5631c814c99SGuillaume Chatelet   }
5641c814c99SGuillaume Chatelet };
5651c814c99SGuillaume Chatelet 
5661c814c99SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
5671c814c99SGuillaume Chatelet // Specializations for uint8_t
5681c814c99SGuillaume Chatelet template <> struct cmp_is_expensive<uint8_t> : public cpp::false_type {};
5691c814c99SGuillaume Chatelet template <> LIBC_INLINE bool eq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
5701c814c99SGuillaume Chatelet   return load<uint8_t>(p1, offset) == load<uint8_t>(p2, offset);
5711c814c99SGuillaume Chatelet }
5721c814c99SGuillaume Chatelet template <> LIBC_INLINE uint32_t neq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
5731c814c99SGuillaume Chatelet   return load<uint8_t>(p1, offset) ^ load<uint8_t>(p2, offset);
5741c814c99SGuillaume Chatelet }
5751c814c99SGuillaume Chatelet template <>
5761c814c99SGuillaume Chatelet LIBC_INLINE MemcmpReturnType cmp<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
5771c814c99SGuillaume Chatelet   return static_cast<int32_t>(load<uint8_t>(p1, offset)) -
5781c814c99SGuillaume Chatelet          static_cast<int32_t>(load<uint8_t>(p2, offset));
5791c814c99SGuillaume Chatelet }
5801c814c99SGuillaume Chatelet template <>
5811c814c99SGuillaume Chatelet LIBC_INLINE MemcmpReturnType cmp_neq<uint8_t>(CPtr p1, CPtr p2, size_t offset);
5825bf8efd2SRoland McGrath 
5835ff3ff33SPetr Hosek } // namespace generic
5845ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
58569090143SGuillaume Chatelet 
58669090143SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
587