169090143SGuillaume Chatelet //===-- Generic implementation of memory function building blocks ---------===// 269090143SGuillaume Chatelet // 369090143SGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 469090143SGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information. 569090143SGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 669090143SGuillaume Chatelet // 769090143SGuillaume Chatelet //===----------------------------------------------------------------------===// 869090143SGuillaume Chatelet // 969090143SGuillaume Chatelet // This file provides generic C++ building blocks. 1069090143SGuillaume Chatelet // Depending on the requested size, the block operation uses unsigned integral 1169090143SGuillaume Chatelet // types, vector types or an array of the type with the maximum size. 1269090143SGuillaume Chatelet // 1369090143SGuillaume Chatelet // The maximum size is passed as a template argument. For instance, on x86 1469090143SGuillaume Chatelet // platforms that only supports integral types the maximum size would be 8 1569090143SGuillaume Chatelet // (corresponding to uint64_t). On this platform if we request the size 32, this 1669090143SGuillaume Chatelet // would be treated as a cpp::array<uint64_t, 4>. 1769090143SGuillaume Chatelet // 1869090143SGuillaume Chatelet // On the other hand, if the platform is x86 with support for AVX the maximum 1969090143SGuillaume Chatelet // size is 32 and the operation can be handled with a single native operation. 2069090143SGuillaume Chatelet // 2169090143SGuillaume Chatelet //===----------------------------------------------------------------------===// 2269090143SGuillaume Chatelet 2369090143SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H 2469090143SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H 2569090143SGuillaume Chatelet 2669090143SGuillaume Chatelet #include "src/__support/CPP/array.h" 2769090143SGuillaume Chatelet #include "src/__support/CPP/type_traits.h" 28282fe508SGuillaume Chatelet #include "src/__support/common.h" 29*95b680e4SDaniel Thornburgh #include "src/__support/endian_internal.h" 305ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 31406b3f2cSGuillaume Chatelet #include "src/__support/macros/optimization.h" 32a84e66a9SGuillaume Chatelet #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT64 3369090143SGuillaume Chatelet #include "src/string/memory_utils/op_builtin.h" 3469090143SGuillaume Chatelet #include "src/string/memory_utils/utils.h" 3569090143SGuillaume Chatelet 3669090143SGuillaume Chatelet #include <stdint.h> 3769090143SGuillaume Chatelet 381c814c99SGuillaume Chatelet static_assert((UINTPTR_MAX == 4294967295U) || 391c814c99SGuillaume Chatelet (UINTPTR_MAX == 18446744073709551615UL), 401c814c99SGuillaume Chatelet "We currently only support 32- or 64-bit platforms"); 411c814c99SGuillaume Chatelet 425ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 4326c17f4bSGuillaume Chatelet // Compiler types using the vector attributes. 441c814c99SGuillaume Chatelet using generic_v128 = uint8_t __attribute__((__vector_size__(16))); 451c814c99SGuillaume Chatelet using generic_v256 = uint8_t __attribute__((__vector_size__(32))); 461c814c99SGuillaume Chatelet using generic_v512 = uint8_t __attribute__((__vector_size__(64))); 475ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 4869090143SGuillaume Chatelet 495ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 505ff3ff33SPetr Hosek namespace generic { 511c814c99SGuillaume Chatelet 5226c17f4bSGuillaume Chatelet // We accept three types of values as elements for generic operations: 531c814c99SGuillaume Chatelet // - scalar : unsigned integral types, 541c814c99SGuillaume Chatelet // - vector : compiler types using the vector attributes or platform builtins, 5526c17f4bSGuillaume Chatelet // - array : a cpp::array<T, N> where T is itself either a scalar or a vector. 5626c17f4bSGuillaume Chatelet // The following traits help discriminate between these cases. 5793ac4493SGuillaume Chatelet 581c814c99SGuillaume Chatelet template <typename T> struct is_scalar : cpp::false_type {}; 591c814c99SGuillaume Chatelet template <> struct is_scalar<uint8_t> : cpp::true_type {}; 601c814c99SGuillaume Chatelet template <> struct is_scalar<uint16_t> : cpp::true_type {}; 611c814c99SGuillaume Chatelet template <> struct is_scalar<uint32_t> : cpp::true_type {}; 62a84e66a9SGuillaume Chatelet #ifdef LIBC_TYPES_HAS_INT64 631c814c99SGuillaume Chatelet template <> struct is_scalar<uint64_t> : cpp::true_type {}; 64a84e66a9SGuillaume Chatelet #endif // LIBC_TYPES_HAS_INT64 652aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface. 662aa22ca2SNick Desaulniers // NOLINTNEXTLINE(readability-identifier-naming) 671c814c99SGuillaume Chatelet template <typename T> constexpr bool is_scalar_v = is_scalar<T>::value; 681c814c99SGuillaume Chatelet 691c814c99SGuillaume Chatelet template <typename T> struct is_vector : cpp::false_type {}; 701c814c99SGuillaume Chatelet template <> struct is_vector<generic_v128> : cpp::true_type {}; 711c814c99SGuillaume Chatelet template <> struct is_vector<generic_v256> : cpp::true_type {}; 721c814c99SGuillaume Chatelet template <> struct is_vector<generic_v512> : cpp::true_type {}; 732aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface. 742aa22ca2SNick Desaulniers // NOLINTNEXTLINE(readability-identifier-naming) 751c814c99SGuillaume Chatelet template <typename T> constexpr bool is_vector_v = is_vector<T>::value; 7626c17f4bSGuillaume Chatelet 7726c17f4bSGuillaume Chatelet template <class T> struct is_array : cpp::false_type {}; 7826c17f4bSGuillaume Chatelet template <class T, size_t N> struct is_array<cpp::array<T, N>> { 792aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface. 802aa22ca2SNick Desaulniers // NOLINTNEXTLINE(readability-identifier-naming) 8126c17f4bSGuillaume Chatelet static constexpr bool value = is_scalar_v<T> || is_vector_v<T>; 8226c17f4bSGuillaume Chatelet }; 832aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface. 842aa22ca2SNick Desaulniers // NOLINTNEXTLINE(readability-identifier-naming) 8526c17f4bSGuillaume Chatelet template <typename T> constexpr bool is_array_v = is_array<T>::value; 8626c17f4bSGuillaume Chatelet 872aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface. 882aa22ca2SNick Desaulniers // NOLINTBEGIN(readability-identifier-naming) 8926c17f4bSGuillaume Chatelet template <typename T> 9026c17f4bSGuillaume Chatelet constexpr bool is_element_type_v = 9126c17f4bSGuillaume Chatelet is_scalar_v<T> || is_vector_v<T> || is_array_v<T>; 922aa22ca2SNick Desaulniers // NOLINTEND(readability-identifier-naming) 9326c17f4bSGuillaume Chatelet 941c814c99SGuillaume Chatelet // Helper struct to retrieve the number of elements of an array. 9526c17f4bSGuillaume Chatelet template <class T> struct array_size {}; 9626c17f4bSGuillaume Chatelet template <class T, size_t N> 9726c17f4bSGuillaume Chatelet struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {}; 982aa22ca2SNick Desaulniers // Meant to match std::numeric_limits interface. 992aa22ca2SNick Desaulniers // NOLINTNEXTLINE(readability-identifier-naming) 10026c17f4bSGuillaume Chatelet template <typename T> constexpr size_t array_size_v = array_size<T>::value; 10126c17f4bSGuillaume Chatelet 10226c17f4bSGuillaume Chatelet // Generic operations for the above type categories. 10326c17f4bSGuillaume Chatelet 10426c17f4bSGuillaume Chatelet template <typename T> T load(CPtr src) { 10526c17f4bSGuillaume Chatelet static_assert(is_element_type_v<T>); 10626c17f4bSGuillaume Chatelet if constexpr (is_scalar_v<T> || is_vector_v<T>) { 107b6bc9d72SGuillaume Chatelet return ::LIBC_NAMESPACE::load<T>(src); 10826c17f4bSGuillaume Chatelet } else if constexpr (is_array_v<T>) { 10926c17f4bSGuillaume Chatelet using value_type = typename T::value_type; 11088d82b74SNick Desaulniers T value; 11188d82b74SNick Desaulniers for (size_t i = 0; i < array_size_v<T>; ++i) 11288d82b74SNick Desaulniers value[i] = load<value_type>(src + (i * sizeof(value_type))); 11388d82b74SNick Desaulniers return value; 1146363320bSSiva Chandra Reddy } 11569090143SGuillaume Chatelet } 11626c17f4bSGuillaume Chatelet 11726c17f4bSGuillaume Chatelet template <typename T> void store(Ptr dst, T value) { 11826c17f4bSGuillaume Chatelet static_assert(is_element_type_v<T>); 11926c17f4bSGuillaume Chatelet if constexpr (is_scalar_v<T> || is_vector_v<T>) { 120b6bc9d72SGuillaume Chatelet ::LIBC_NAMESPACE::store<T>(dst, value); 12126c17f4bSGuillaume Chatelet } else if constexpr (is_array_v<T>) { 12226c17f4bSGuillaume Chatelet using value_type = typename T::value_type; 12388d82b74SNick Desaulniers for (size_t i = 0; i < array_size_v<T>; ++i) 12488d82b74SNick Desaulniers store<value_type>(dst + (i * sizeof(value_type)), value[i]); 12526c17f4bSGuillaume Chatelet } 12626c17f4bSGuillaume Chatelet } 12726c17f4bSGuillaume Chatelet 12826c17f4bSGuillaume Chatelet template <typename T> T splat(uint8_t value) { 12926c17f4bSGuillaume Chatelet static_assert(is_scalar_v<T> || is_vector_v<T>); 13026c17f4bSGuillaume Chatelet if constexpr (is_scalar_v<T>) 13126c17f4bSGuillaume Chatelet return T(~0) / T(0xFF) * T(value); 13226c17f4bSGuillaume Chatelet else if constexpr (is_vector_v<T>) { 13388d82b74SNick Desaulniers T out; 13469090143SGuillaume Chatelet // This for loop is optimized out for vector types. 13526c17f4bSGuillaume Chatelet for (size_t i = 0; i < sizeof(T); ++i) 13688d82b74SNick Desaulniers out[i] = value; 13788d82b74SNick Desaulniers return out; 13869090143SGuillaume Chatelet } 139917e0d9cSGuillaume Chatelet } 14069090143SGuillaume Chatelet 14169090143SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 14269090143SGuillaume Chatelet // Memset 14369090143SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 1441a4696d9SGuillaume Chatelet 145458734b8SGuillaume Chatelet template <typename T> struct Memset { 1461c814c99SGuillaume Chatelet static_assert(is_element_type_v<T>); 147458734b8SGuillaume Chatelet static constexpr size_t SIZE = sizeof(T); 14869090143SGuillaume Chatelet 1496363320bSSiva Chandra Reddy LIBC_INLINE static void block(Ptr dst, uint8_t value) { 1501a4696d9SGuillaume Chatelet if constexpr (is_scalar_v<T> || is_vector_v<T>) { 1511a4696d9SGuillaume Chatelet store<T>(dst, splat<T>(value)); 1521a4696d9SGuillaume Chatelet } else if constexpr (is_array_v<T>) { 1531a4696d9SGuillaume Chatelet using value_type = typename T::value_type; 1541a4696d9SGuillaume Chatelet const auto Splat = splat<value_type>(value); 15588d82b74SNick Desaulniers for (size_t i = 0; i < array_size_v<T>; ++i) 15688d82b74SNick Desaulniers store<value_type>(dst + (i * sizeof(value_type)), Splat); 15769090143SGuillaume Chatelet } 15869090143SGuillaume Chatelet } 15969090143SGuillaume Chatelet 1606363320bSSiva Chandra Reddy LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) { 16169090143SGuillaume Chatelet block(dst + count - SIZE, value); 16269090143SGuillaume Chatelet } 16369090143SGuillaume Chatelet 1646363320bSSiva Chandra Reddy LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) { 16569090143SGuillaume Chatelet block(dst, value); 16669090143SGuillaume Chatelet tail(dst, value, count); 16769090143SGuillaume Chatelet } 16869090143SGuillaume Chatelet 1693153aa4cSdoshimili LIBC_INLINE static void loop_and_tail_offset(Ptr dst, uint8_t value, 1703153aa4cSdoshimili size_t count, size_t offset) { 1711a4696d9SGuillaume Chatelet static_assert(SIZE > 1, "a loop of size 1 does not need tail"); 17269090143SGuillaume Chatelet do { 17369090143SGuillaume Chatelet block(dst + offset, value); 17469090143SGuillaume Chatelet offset += SIZE; 17569090143SGuillaume Chatelet } while (offset < count - SIZE); 17669090143SGuillaume Chatelet tail(dst, value, count); 17769090143SGuillaume Chatelet } 1783153aa4cSdoshimili 1793153aa4cSdoshimili LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) { 1803153aa4cSdoshimili return loop_and_tail_offset(dst, value, count, 0); 1813153aa4cSdoshimili } 18269090143SGuillaume Chatelet }; 18369090143SGuillaume Chatelet 184458734b8SGuillaume Chatelet template <typename T, typename... TS> struct MemsetSequence { 185458734b8SGuillaume Chatelet static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS)); 186458734b8SGuillaume Chatelet LIBC_INLINE static void block(Ptr dst, uint8_t value) { 187458734b8SGuillaume Chatelet Memset<T>::block(dst, value); 1881c814c99SGuillaume Chatelet if constexpr (sizeof...(TS) > 0) 189458734b8SGuillaume Chatelet return MemsetSequence<TS...>::block(dst + sizeof(T), value); 190458734b8SGuillaume Chatelet } 191458734b8SGuillaume Chatelet }; 192458734b8SGuillaume Chatelet 19369090143SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 19469090143SGuillaume Chatelet // Memmove 19569090143SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 19669090143SGuillaume Chatelet 197355a5d5eSGuillaume Chatelet template <typename T> struct Memmove { 1981c814c99SGuillaume Chatelet static_assert(is_element_type_v<T>); 199458734b8SGuillaume Chatelet static constexpr size_t SIZE = sizeof(T); 20069090143SGuillaume Chatelet 2016363320bSSiva Chandra Reddy LIBC_INLINE static void block(Ptr dst, CPtr src) { 20226c17f4bSGuillaume Chatelet store<T>(dst, load<T>(src)); 20369090143SGuillaume Chatelet } 20469090143SGuillaume Chatelet 2056363320bSSiva Chandra Reddy LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) { 206355a5d5eSGuillaume Chatelet const size_t offset = count - SIZE; 20769090143SGuillaume Chatelet // The load and store operations can be performed in any order as long as 20869090143SGuillaume Chatelet // they are not interleaved. More investigations are needed to determine 20969090143SGuillaume Chatelet // the best order. 21026c17f4bSGuillaume Chatelet const auto head = load<T>(src); 21126c17f4bSGuillaume Chatelet const auto tail = load<T>(src + offset); 21226c17f4bSGuillaume Chatelet store<T>(dst, head); 21326c17f4bSGuillaume Chatelet store<T>(dst + offset, tail); 21469090143SGuillaume Chatelet } 21569090143SGuillaume Chatelet 21669090143SGuillaume Chatelet // Align forward suitable when dst < src. The alignment is performed with 21769090143SGuillaume Chatelet // an HeadTail operation of count ∈ [Alignment, 2 x Alignment]. 21869090143SGuillaume Chatelet // 21969090143SGuillaume Chatelet // e.g. Moving two bytes forward, we make sure src is aligned. 22069090143SGuillaume Chatelet // [ | | | | ] 22169090143SGuillaume Chatelet // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] 22269090143SGuillaume Chatelet // [____LLLLLLLL_____________________] 22369090143SGuillaume Chatelet // [___________LLLLLLLA______________] 22469090143SGuillaume Chatelet // [_SSSSSSSS________________________] 22569090143SGuillaume Chatelet // [________SSSSSSSS_________________] 22669090143SGuillaume Chatelet // 22769090143SGuillaume Chatelet // e.g. Moving two bytes forward, we make sure dst is aligned. 22869090143SGuillaume Chatelet // [ | | | | ] 22969090143SGuillaume Chatelet // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] 23069090143SGuillaume Chatelet // [____LLLLLLLL_____________________] 23169090143SGuillaume Chatelet // [______LLLLLLLL___________________] 23269090143SGuillaume Chatelet // [_SSSSSSSS________________________] 23369090143SGuillaume Chatelet // [___SSSSSSSA______________________] 23469090143SGuillaume Chatelet template <Arg AlignOn> 2356363320bSSiva Chandra Reddy LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) { 23669090143SGuillaume Chatelet Ptr prev_dst = dst; 23769090143SGuillaume Chatelet CPtr prev_src = src; 23869090143SGuillaume Chatelet size_t prev_count = count; 239355a5d5eSGuillaume Chatelet align_to_next_boundary<SIZE, AlignOn>(dst, src, count); 240355a5d5eSGuillaume Chatelet adjust(SIZE, dst, src, count); 24169090143SGuillaume Chatelet head_tail(prev_dst, prev_src, prev_count - count); 24269090143SGuillaume Chatelet } 24369090143SGuillaume Chatelet 24469090143SGuillaume Chatelet // Align backward suitable when dst > src. The alignment is performed with 24569090143SGuillaume Chatelet // an HeadTail operation of count ∈ [Alignment, 2 x Alignment]. 24669090143SGuillaume Chatelet // 24769090143SGuillaume Chatelet // e.g. Moving two bytes backward, we make sure src is aligned. 24869090143SGuillaume Chatelet // [ | | | | ] 24969090143SGuillaume Chatelet // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] 25069090143SGuillaume Chatelet // [ _________________ALLLLLLL_______] 25169090143SGuillaume Chatelet // [ ___________________LLLLLLLL_____] 25269090143SGuillaume Chatelet // [____________________SSSSSSSS_____] 25369090143SGuillaume Chatelet // [______________________SSSSSSSS___] 25469090143SGuillaume Chatelet // 25569090143SGuillaume Chatelet // e.g. Moving two bytes backward, we make sure dst is aligned. 25669090143SGuillaume Chatelet // [ | | | | ] 25769090143SGuillaume Chatelet // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] 25869090143SGuillaume Chatelet // [ _______________LLLLLLLL_________] 25969090143SGuillaume Chatelet // [ ___________________LLLLLLLL_____] 26069090143SGuillaume Chatelet // [__________________ASSSSSSS_______] 26169090143SGuillaume Chatelet // [______________________SSSSSSSS___] 26269090143SGuillaume Chatelet template <Arg AlignOn> 2636363320bSSiva Chandra Reddy LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) { 26469090143SGuillaume Chatelet Ptr headtail_dst = dst + count; 26569090143SGuillaume Chatelet CPtr headtail_src = src + count; 26669090143SGuillaume Chatelet size_t headtail_size = 0; 267355a5d5eSGuillaume Chatelet align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src, 26869090143SGuillaume Chatelet headtail_size); 269355a5d5eSGuillaume Chatelet adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size); 27069090143SGuillaume Chatelet head_tail(headtail_dst, headtail_src, headtail_size); 27169090143SGuillaume Chatelet count -= headtail_size; 27269090143SGuillaume Chatelet } 27369090143SGuillaume Chatelet 27469090143SGuillaume Chatelet // Move forward suitable when dst < src. We load the tail bytes before 27569090143SGuillaume Chatelet // handling the loop. 27669090143SGuillaume Chatelet // 27769090143SGuillaume Chatelet // e.g. Moving two bytes 27869090143SGuillaume Chatelet // [ | | | | |] 27969090143SGuillaume Chatelet // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 28069090143SGuillaume Chatelet // [_________________________LLLLLLLL___] 28169090143SGuillaume Chatelet // [___LLLLLLLL_________________________] 28269090143SGuillaume Chatelet // [_SSSSSSSS___________________________] 28369090143SGuillaume Chatelet // [___________LLLLLLLL_________________] 28469090143SGuillaume Chatelet // [_________SSSSSSSS___________________] 28569090143SGuillaume Chatelet // [___________________LLLLLLLL_________] 28669090143SGuillaume Chatelet // [_________________SSSSSSSS___________] 28769090143SGuillaume Chatelet // [_______________________SSSSSSSS_____] 2886363320bSSiva Chandra Reddy LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src, 2896363320bSSiva Chandra Reddy size_t count) { 290355a5d5eSGuillaume Chatelet static_assert(SIZE > 1, "a loop of size 1 does not need tail"); 291355a5d5eSGuillaume Chatelet const size_t tail_offset = count - SIZE; 29226c17f4bSGuillaume Chatelet const auto tail_value = load<T>(src + tail_offset); 29369090143SGuillaume Chatelet size_t offset = 0; 294f6d99722SGuillaume Chatelet LIBC_LOOP_NOUNROLL 29569090143SGuillaume Chatelet do { 29669090143SGuillaume Chatelet block(dst + offset, src + offset); 297355a5d5eSGuillaume Chatelet offset += SIZE; 298355a5d5eSGuillaume Chatelet } while (offset < count - SIZE); 29926c17f4bSGuillaume Chatelet store<T>(dst + tail_offset, tail_value); 30069090143SGuillaume Chatelet } 30169090143SGuillaume Chatelet 30269090143SGuillaume Chatelet // Move backward suitable when dst > src. We load the head bytes before 30369090143SGuillaume Chatelet // handling the loop. 30469090143SGuillaume Chatelet // 30569090143SGuillaume Chatelet // e.g. Moving two bytes 30669090143SGuillaume Chatelet // [ | | | | |] 30769090143SGuillaume Chatelet // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 30869090143SGuillaume Chatelet // [___LLLLLLLL_________________________] 30969090143SGuillaume Chatelet // [_________________________LLLLLLLL___] 31069090143SGuillaume Chatelet // [___________________________SSSSSSSS_] 31169090143SGuillaume Chatelet // [_________________LLLLLLLL___________] 31269090143SGuillaume Chatelet // [___________________SSSSSSSS_________] 31369090143SGuillaume Chatelet // [_________LLLLLLLL___________________] 31469090143SGuillaume Chatelet // [___________SSSSSSSS_________________] 31569090143SGuillaume Chatelet // [_____SSSSSSSS_______________________] 3166363320bSSiva Chandra Reddy LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src, 3176363320bSSiva Chandra Reddy size_t count) { 318355a5d5eSGuillaume Chatelet static_assert(SIZE > 1, "a loop of size 1 does not need tail"); 31926c17f4bSGuillaume Chatelet const auto head_value = load<T>(src); 320355a5d5eSGuillaume Chatelet ptrdiff_t offset = count - SIZE; 321f6d99722SGuillaume Chatelet LIBC_LOOP_NOUNROLL 32269090143SGuillaume Chatelet do { 32369090143SGuillaume Chatelet block(dst + offset, src + offset); 324355a5d5eSGuillaume Chatelet offset -= SIZE; 32569090143SGuillaume Chatelet } while (offset >= 0); 32626c17f4bSGuillaume Chatelet store<T>(dst, head_value); 32769090143SGuillaume Chatelet } 32869090143SGuillaume Chatelet }; 32969090143SGuillaume Chatelet 330917e0d9cSGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 3311c814c99SGuillaume Chatelet // Low level operations for Bcmp and Memcmp that operate on memory locations. 3329ec6ebd3SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 333e49a6085SGuillaume Chatelet 3341c814c99SGuillaume Chatelet // Same as load above but with an offset to the pointer. 3351c814c99SGuillaume Chatelet // Making the offset explicit hints the compiler to use relevant addressing mode 3361c814c99SGuillaume Chatelet // consistently. 3375bf8efd2SRoland McGrath template <typename T> LIBC_INLINE T load(CPtr ptr, size_t offset) { 338b6bc9d72SGuillaume Chatelet return ::LIBC_NAMESPACE::load<T>(ptr + offset); 339e49a6085SGuillaume Chatelet } 340e49a6085SGuillaume Chatelet 3411c814c99SGuillaume Chatelet // Same as above but also makes sure the loaded value is in big endian format. 3421c814c99SGuillaume Chatelet // This is useful when implementing lexicograhic comparisons as big endian 3431c814c99SGuillaume Chatelet // scalar comparison directly maps to lexicographic byte comparisons. 3445bf8efd2SRoland McGrath template <typename T> LIBC_INLINE T load_be(CPtr ptr, size_t offset) { 3451c814c99SGuillaume Chatelet return Endian::to_big_endian(load<T>(ptr, offset)); 3461c814c99SGuillaume Chatelet } 3471c814c99SGuillaume Chatelet 3481c814c99SGuillaume Chatelet // Equality: returns true iff values at locations (p1 + offset) and (p2 + 3491c814c99SGuillaume Chatelet // offset) compare equal. 3505bf8efd2SRoland McGrath template <typename T> LIBC_INLINE bool eq(CPtr p1, CPtr p2, size_t offset); 3511c814c99SGuillaume Chatelet 3521c814c99SGuillaume Chatelet // Not equals: returns non-zero iff values at locations (p1 + offset) and (p2 + 3531c814c99SGuillaume Chatelet // offset) differ. 3545bf8efd2SRoland McGrath template <typename T> LIBC_INLINE uint32_t neq(CPtr p1, CPtr p2, size_t offset); 3551c814c99SGuillaume Chatelet 3561c814c99SGuillaume Chatelet // Lexicographic comparison: 3571c814c99SGuillaume Chatelet // - returns 0 iff values at locations (p1 + offset) and (p2 + offset) compare 3581c814c99SGuillaume Chatelet // equal. 3591c814c99SGuillaume Chatelet // - returns a negative value if value at location (p1 + offset) is 3601c814c99SGuillaume Chatelet // lexicographically less than value at (p2 + offset). 3611c814c99SGuillaume Chatelet // - returns a positive value if value at location (p1 + offset) is 3621c814c99SGuillaume Chatelet // lexicographically greater than value at (p2 + offset). 363bd1cba9fSGuillaume Chatelet template <typename T> 3645bf8efd2SRoland McGrath LIBC_INLINE MemcmpReturnType cmp(CPtr p1, CPtr p2, size_t offset); 3655e32765cSGuillaume Chatelet 3661c814c99SGuillaume Chatelet // Lexicographic comparison of non-equal values: 3671c814c99SGuillaume Chatelet // - returns a negative value if value at location (p1 + offset) is 3681c814c99SGuillaume Chatelet // lexicographically less than value at (p2 + offset). 3691c814c99SGuillaume Chatelet // - returns a positive value if value at location (p1 + offset) is 3701c814c99SGuillaume Chatelet // lexicographically greater than value at (p2 + offset). 3711c814c99SGuillaume Chatelet template <typename T> 3725bf8efd2SRoland McGrath LIBC_INLINE MemcmpReturnType cmp_neq(CPtr p1, CPtr p2, size_t offset); 373bd1cba9fSGuillaume Chatelet 374bd1cba9fSGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 3751c814c99SGuillaume Chatelet // Memcmp implementation 3761c814c99SGuillaume Chatelet // 3771c814c99SGuillaume Chatelet // When building memcmp, not all types are considered equals. 3781c814c99SGuillaume Chatelet // 3791c814c99SGuillaume Chatelet // For instance, the lexicographic comparison of two uint8_t can be implemented 3801c814c99SGuillaume Chatelet // as a simple subtraction, but for wider operations the logic can be much more 3811c814c99SGuillaume Chatelet // involving, especially on little endian platforms. 3821c814c99SGuillaume Chatelet // 3831c814c99SGuillaume Chatelet // For such wider types it is a good strategy to test for equality first and 3841c814c99SGuillaume Chatelet // only do the expensive lexicographic comparison if necessary. 3851c814c99SGuillaume Chatelet // 3861c814c99SGuillaume Chatelet // Decomposing the algorithm like this for wider types allows us to have 3871c814c99SGuillaume Chatelet // efficient implementation of higher order functions like 'head_tail' or 3881c814c99SGuillaume Chatelet // 'loop_and_tail'. 389bd1cba9fSGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 390bd1cba9fSGuillaume Chatelet 3911c814c99SGuillaume Chatelet // Type traits to decide whether we can use 'cmp' directly or if we need to 3921c814c99SGuillaume Chatelet // split the computation. 3931c814c99SGuillaume Chatelet template <typename T> struct cmp_is_expensive; 394bd1cba9fSGuillaume Chatelet 3951c814c99SGuillaume Chatelet template <typename T> struct Memcmp { 3961c814c99SGuillaume Chatelet static_assert(is_element_type_v<T>); 3971c814c99SGuillaume Chatelet static constexpr size_t SIZE = sizeof(T); 398bd1cba9fSGuillaume Chatelet 3991c814c99SGuillaume Chatelet private: 4001c814c99SGuillaume Chatelet LIBC_INLINE static MemcmpReturnType block_offset(CPtr p1, CPtr p2, 4011c814c99SGuillaume Chatelet size_t offset) { 4021c814c99SGuillaume Chatelet if constexpr (cmp_is_expensive<T>::value) { 4031c814c99SGuillaume Chatelet if (!eq<T>(p1, p2, offset)) 4041c814c99SGuillaume Chatelet return cmp_neq<T>(p1, p2, offset); 4056f8d826bSNick Desaulniers return MemcmpReturnType::zero(); 406bd1cba9fSGuillaume Chatelet } else { 4071c814c99SGuillaume Chatelet return cmp<T>(p1, p2, offset); 408bd1cba9fSGuillaume Chatelet } 4095e32765cSGuillaume Chatelet } 4105e32765cSGuillaume Chatelet 4111c814c99SGuillaume Chatelet public: 4121c814c99SGuillaume Chatelet LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) { 4131c814c99SGuillaume Chatelet return block_offset(p1, p2, 0); 4141c814c99SGuillaume Chatelet } 4151c814c99SGuillaume Chatelet 416e49a6085SGuillaume Chatelet LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { 4171c814c99SGuillaume Chatelet return block_offset(p1, p2, count - SIZE); 418e49a6085SGuillaume Chatelet } 419e49a6085SGuillaume Chatelet 420e49a6085SGuillaume Chatelet LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2, 421e49a6085SGuillaume Chatelet size_t count) { 4221c814c99SGuillaume Chatelet if constexpr (cmp_is_expensive<T>::value) { 4231c814c99SGuillaume Chatelet if (!eq<T>(p1, p2, 0)) 4241c814c99SGuillaume Chatelet return cmp_neq<T>(p1, p2, 0); 4251c814c99SGuillaume Chatelet } else { 4261c814c99SGuillaume Chatelet if (const auto value = cmp<T>(p1, p2, 0)) 4279ec6ebd3SGuillaume Chatelet return value; 4281c814c99SGuillaume Chatelet } 429e49a6085SGuillaume Chatelet return tail(p1, p2, count); 4309ec6ebd3SGuillaume Chatelet } 4319ec6ebd3SGuillaume Chatelet 432e49a6085SGuillaume Chatelet LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2, 433e49a6085SGuillaume Chatelet size_t count) { 4341c814c99SGuillaume Chatelet return loop_and_tail_offset(p1, p2, count, 0); 4351c814c99SGuillaume Chatelet } 4361c814c99SGuillaume Chatelet 4371c814c99SGuillaume Chatelet LIBC_INLINE static MemcmpReturnType 4381c814c99SGuillaume Chatelet loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) { 4391c814c99SGuillaume Chatelet if constexpr (SIZE > 1) { 4401c814c99SGuillaume Chatelet const size_t limit = count - SIZE; 4411c814c99SGuillaume Chatelet LIBC_LOOP_NOUNROLL 4421c814c99SGuillaume Chatelet for (; offset < limit; offset += SIZE) { 4431c814c99SGuillaume Chatelet if constexpr (cmp_is_expensive<T>::value) { 4441c814c99SGuillaume Chatelet if (!eq<T>(p1, p2, offset)) 4451c814c99SGuillaume Chatelet return cmp_neq<T>(p1, p2, offset); 4461c814c99SGuillaume Chatelet } else { 4471c814c99SGuillaume Chatelet if (const auto value = cmp<T>(p1, p2, offset)) 4489ec6ebd3SGuillaume Chatelet return value; 4491c814c99SGuillaume Chatelet } 4501c814c99SGuillaume Chatelet } 4511c814c99SGuillaume Chatelet return block_offset(p1, p2, limit); // tail 4521c814c99SGuillaume Chatelet } else { 4531c814c99SGuillaume Chatelet // No need for a tail operation when SIZE == 1. 4541c814c99SGuillaume Chatelet LIBC_LOOP_NOUNROLL 4551c814c99SGuillaume Chatelet for (; offset < count; offset += SIZE) 4561c814c99SGuillaume Chatelet if (auto value = cmp<T>(p1, p2, offset)) 4571c814c99SGuillaume Chatelet return value; 4586f8d826bSNick Desaulniers return MemcmpReturnType::zero(); 4591c814c99SGuillaume Chatelet } 4601c814c99SGuillaume Chatelet } 4611c814c99SGuillaume Chatelet 4621c814c99SGuillaume Chatelet LIBC_INLINE static MemcmpReturnType 4631c814c99SGuillaume Chatelet loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) { 4641c814c99SGuillaume Chatelet const AlignHelper<sizeof(T)> helper(p1); 4651c814c99SGuillaume Chatelet if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) { 4661c814c99SGuillaume Chatelet if (auto value = block(p1, p2)) 4671c814c99SGuillaume Chatelet return value; 468640c8574SNick Desaulniers adjust(helper.offset, p1, p2, count); 4691c814c99SGuillaume Chatelet } 4701c814c99SGuillaume Chatelet return loop_and_tail(p1, p2, count); 4715e32765cSGuillaume Chatelet } 4725e32765cSGuillaume Chatelet }; 4735e32765cSGuillaume Chatelet 4741c814c99SGuillaume Chatelet template <typename T, typename... TS> struct MemcmpSequence { 4751c814c99SGuillaume Chatelet static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS)); 4761c814c99SGuillaume Chatelet LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) { 4771c814c99SGuillaume Chatelet // TODO: test suggestion in 4781c814c99SGuillaume Chatelet // https://reviews.llvm.org/D148717?id=515724#inline-1446890 4791c814c99SGuillaume Chatelet // once we have a proper way to check memory operation latency. 4801c814c99SGuillaume Chatelet if constexpr (cmp_is_expensive<T>::value) { 4811c814c99SGuillaume Chatelet if (!eq<T>(p1, p2, 0)) 4821c814c99SGuillaume Chatelet return cmp_neq<T>(p1, p2, 0); 4831c814c99SGuillaume Chatelet } else { 4841c814c99SGuillaume Chatelet if (auto value = cmp<T>(p1, p2, 0)) 4851c814c99SGuillaume Chatelet return value; 4861c814c99SGuillaume Chatelet } 4871c814c99SGuillaume Chatelet if constexpr (sizeof...(TS) > 0) 4881c814c99SGuillaume Chatelet return MemcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T)); 4891c814c99SGuillaume Chatelet else 4906f8d826bSNick Desaulniers return MemcmpReturnType::zero(); 4911c814c99SGuillaume Chatelet } 4921c814c99SGuillaume Chatelet }; 4931c814c99SGuillaume Chatelet 4941c814c99SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 4951c814c99SGuillaume Chatelet // Bcmp 4961c814c99SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 4971c814c99SGuillaume Chatelet template <typename T> struct Bcmp { 4981c814c99SGuillaume Chatelet static_assert(is_element_type_v<T>); 4991c814c99SGuillaume Chatelet static constexpr size_t SIZE = sizeof(T); 5001c814c99SGuillaume Chatelet 5011c814c99SGuillaume Chatelet LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) { 5021c814c99SGuillaume Chatelet return neq<T>(p1, p2, 0); 5031c814c99SGuillaume Chatelet } 5041c814c99SGuillaume Chatelet 5051c814c99SGuillaume Chatelet LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { 5061c814c99SGuillaume Chatelet const size_t tail_offset = count - SIZE; 5071c814c99SGuillaume Chatelet return neq<T>(p1, p2, tail_offset); 5081c814c99SGuillaume Chatelet } 5091c814c99SGuillaume Chatelet 5101c814c99SGuillaume Chatelet LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) { 5111c814c99SGuillaume Chatelet if (const auto value = neq<T>(p1, p2, 0)) 5121c814c99SGuillaume Chatelet return value; 5131c814c99SGuillaume Chatelet return tail(p1, p2, count); 5141c814c99SGuillaume Chatelet } 5151c814c99SGuillaume Chatelet 5161c814c99SGuillaume Chatelet LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2, 5171c814c99SGuillaume Chatelet size_t count) { 5181c814c99SGuillaume Chatelet return loop_and_tail_offset(p1, p2, count, 0); 5191c814c99SGuillaume Chatelet } 5201c814c99SGuillaume Chatelet 5211c814c99SGuillaume Chatelet LIBC_INLINE static BcmpReturnType 5221c814c99SGuillaume Chatelet loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) { 5231c814c99SGuillaume Chatelet if constexpr (SIZE > 1) { 5241c814c99SGuillaume Chatelet const size_t limit = count - SIZE; 5251c814c99SGuillaume Chatelet LIBC_LOOP_NOUNROLL 5261c814c99SGuillaume Chatelet for (; offset < limit; offset += SIZE) 5271c814c99SGuillaume Chatelet if (const auto value = neq<T>(p1, p2, offset)) 5281c814c99SGuillaume Chatelet return value; 5291c814c99SGuillaume Chatelet return tail(p1, p2, count); 5301c814c99SGuillaume Chatelet } else { 5311c814c99SGuillaume Chatelet // No need for a tail operation when SIZE == 1. 5321c814c99SGuillaume Chatelet LIBC_LOOP_NOUNROLL 5331c814c99SGuillaume Chatelet for (; offset < count; offset += SIZE) 5341c814c99SGuillaume Chatelet if (const auto value = neq<T>(p1, p2, offset)) 5351c814c99SGuillaume Chatelet return value; 5366f8d826bSNick Desaulniers return BcmpReturnType::zero(); 5371c814c99SGuillaume Chatelet } 5381c814c99SGuillaume Chatelet } 5391c814c99SGuillaume Chatelet 5401c814c99SGuillaume Chatelet LIBC_INLINE static BcmpReturnType 5411c814c99SGuillaume Chatelet loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) { 5421c814c99SGuillaume Chatelet static_assert(SIZE > 1, 5431c814c99SGuillaume Chatelet "No need to align when processing one byte at a time"); 5441c814c99SGuillaume Chatelet const AlignHelper<sizeof(T)> helper(p1); 5451c814c99SGuillaume Chatelet if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) { 5461c814c99SGuillaume Chatelet if (auto value = block(p1, p2)) 5471c814c99SGuillaume Chatelet return value; 548640c8574SNick Desaulniers adjust(helper.offset, p1, p2, count); 5491c814c99SGuillaume Chatelet } 5501c814c99SGuillaume Chatelet return loop_and_tail(p1, p2, count); 5511c814c99SGuillaume Chatelet } 5521c814c99SGuillaume Chatelet }; 5531c814c99SGuillaume Chatelet 5541c814c99SGuillaume Chatelet template <typename T, typename... TS> struct BcmpSequence { 5551c814c99SGuillaume Chatelet static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS)); 5561c814c99SGuillaume Chatelet LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) { 5571c814c99SGuillaume Chatelet if (auto value = neq<T>(p1, p2, 0)) 5581c814c99SGuillaume Chatelet return value; 5591c814c99SGuillaume Chatelet if constexpr (sizeof...(TS) > 0) 5601c814c99SGuillaume Chatelet return BcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T)); 5611c814c99SGuillaume Chatelet else 5626f8d826bSNick Desaulniers return BcmpReturnType::zero(); 5631c814c99SGuillaume Chatelet } 5641c814c99SGuillaume Chatelet }; 5651c814c99SGuillaume Chatelet 5661c814c99SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 5671c814c99SGuillaume Chatelet // Specializations for uint8_t 5681c814c99SGuillaume Chatelet template <> struct cmp_is_expensive<uint8_t> : public cpp::false_type {}; 5691c814c99SGuillaume Chatelet template <> LIBC_INLINE bool eq<uint8_t>(CPtr p1, CPtr p2, size_t offset) { 5701c814c99SGuillaume Chatelet return load<uint8_t>(p1, offset) == load<uint8_t>(p2, offset); 5711c814c99SGuillaume Chatelet } 5721c814c99SGuillaume Chatelet template <> LIBC_INLINE uint32_t neq<uint8_t>(CPtr p1, CPtr p2, size_t offset) { 5731c814c99SGuillaume Chatelet return load<uint8_t>(p1, offset) ^ load<uint8_t>(p2, offset); 5741c814c99SGuillaume Chatelet } 5751c814c99SGuillaume Chatelet template <> 5761c814c99SGuillaume Chatelet LIBC_INLINE MemcmpReturnType cmp<uint8_t>(CPtr p1, CPtr p2, size_t offset) { 5771c814c99SGuillaume Chatelet return static_cast<int32_t>(load<uint8_t>(p1, offset)) - 5781c814c99SGuillaume Chatelet static_cast<int32_t>(load<uint8_t>(p2, offset)); 5791c814c99SGuillaume Chatelet } 5801c814c99SGuillaume Chatelet template <> 5811c814c99SGuillaume Chatelet LIBC_INLINE MemcmpReturnType cmp_neq<uint8_t>(CPtr p1, CPtr p2, size_t offset); 5825bf8efd2SRoland McGrath 5835ff3ff33SPetr Hosek } // namespace generic 5845ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 58569090143SGuillaume Chatelet 58669090143SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H 587