166d00febSPaula Toth //===-- Memory utils --------------------------------------------*- C++ -*-===// 285314e9bSGuillaume Chatelet // 385314e9bSGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 485314e9bSGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information. 585314e9bSGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 685314e9bSGuillaume Chatelet // 785314e9bSGuillaume Chatelet //===----------------------------------------------------------------------===// 885314e9bSGuillaume Chatelet 9270547f3SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H 10270547f3SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H 1185314e9bSGuillaume Chatelet 12a786096fSGuillaume Chatelet #include "src/__support/CPP/bit.h" 13d7917fdcSGuillaume Chatelet #include "src/__support/CPP/cstddef.h" 14a786096fSGuillaume Chatelet #include "src/__support/CPP/type_traits.h" 15*95b680e4SDaniel Thornburgh #include "src/__support/endian_internal.h" 16e2f8c556SGuillaume Chatelet #include "src/__support/macros/attributes.h" // LIBC_INLINE 175ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 181c814c99SGuillaume Chatelet #include "src/__support/macros/properties/architectures.h" 1985314e9bSGuillaume Chatelet 2085314e9bSGuillaume Chatelet #include <stddef.h> // size_t 211c814c99SGuillaume Chatelet #include <stdint.h> // intptr_t / uintptr_t / INT32_MAX / INT32_MIN 2285314e9bSGuillaume Chatelet 235ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 2485314e9bSGuillaume Chatelet 25a786096fSGuillaume Chatelet // Returns the number of bytes to substract from ptr to get to the previous 26a786096fSGuillaume Chatelet // multiple of alignment. If ptr is already aligned returns 0. 275bf8efd2SRoland McGrath template <size_t alignment> 285bf8efd2SRoland McGrath LIBC_INLINE uintptr_t distance_to_align_down(const void *ptr) { 291d894788SGuillaume Chatelet static_assert(cpp::has_single_bit(alignment), 301d894788SGuillaume Chatelet "alignment must be a power of 2"); 3104a309ddSGuillaume Chatelet return reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U); 3204a309ddSGuillaume Chatelet } 3304a309ddSGuillaume Chatelet 34a786096fSGuillaume Chatelet // Returns the number of bytes to add to ptr to get to the next multiple of 35a786096fSGuillaume Chatelet // alignment. If ptr is already aligned returns 0. 365bf8efd2SRoland McGrath template <size_t alignment> 375bf8efd2SRoland McGrath LIBC_INLINE uintptr_t distance_to_align_up(const void *ptr) { 381d894788SGuillaume Chatelet static_assert(cpp::has_single_bit(alignment), 391d894788SGuillaume Chatelet "alignment must be a power of 2"); 4085314e9bSGuillaume Chatelet // The logic is not straightforward and involves unsigned modulo arithmetic 4185314e9bSGuillaume Chatelet // but the generated code is as fast as it can be. 4285314e9bSGuillaume Chatelet return -reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U); 4385314e9bSGuillaume Chatelet } 4485314e9bSGuillaume Chatelet 45a786096fSGuillaume Chatelet // Returns the number of bytes to add to ptr to get to the next multiple of 46a786096fSGuillaume Chatelet // alignment. If ptr is already aligned returns alignment. 47a786096fSGuillaume Chatelet template <size_t alignment> 485bf8efd2SRoland McGrath LIBC_INLINE uintptr_t distance_to_next_aligned(const void *ptr) { 49a786096fSGuillaume Chatelet return alignment - distance_to_align_down<alignment>(ptr); 5085314e9bSGuillaume Chatelet } 5185314e9bSGuillaume Chatelet 52a786096fSGuillaume Chatelet // Returns the same pointer but notifies the compiler that it is aligned. 53019a477cSRoland McGrath template <size_t alignment, typename T> LIBC_INLINE T *assume_aligned(T *ptr) { 545bf47e14SGuillaume Chatelet return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment)); 555bf47e14SGuillaume Chatelet } 56a786096fSGuillaume Chatelet 572cfae7cdSGuillaume Chatelet // Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are 582cfae7cdSGuillaume Chatelet // disjoint. 592cfae7cdSGuillaume Chatelet LIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) { 60f364a7a8SDmitry Vyukov const ptrdiff_t sdiff = 61f364a7a8SDmitry Vyukov static_cast<const char *>(p1) - static_cast<const char *>(p2); 62f364a7a8SDmitry Vyukov // We use bit_cast to make sure that we don't run into accidental integer 63f364a7a8SDmitry Vyukov // promotion. Notably the unary minus operator goes through integer promotion 64f364a7a8SDmitry Vyukov // at the expression level. We assume arithmetic to be two's complement (i.e., 65f364a7a8SDmitry Vyukov // bit_cast has the same behavior as a regular signed to unsigned cast). 66f364a7a8SDmitry Vyukov static_assert(-1 == ~0, "not 2's complement"); 67f364a7a8SDmitry Vyukov const size_t udiff = cpp::bit_cast<size_t>(sdiff); 68f364a7a8SDmitry Vyukov // Integer promition would be caught here. 69f364a7a8SDmitry Vyukov const size_t neg_udiff = cpp::bit_cast<size_t>(-sdiff); 70f364a7a8SDmitry Vyukov // This is expected to compile a conditional move. 71f364a7a8SDmitry Vyukov return sdiff >= 0 ? size <= udiff : size <= neg_udiff; 722cfae7cdSGuillaume Chatelet } 732cfae7cdSGuillaume Chatelet 7477118536SMarc Auberer #if __has_builtin(__builtin_memcpy_inline) 75060a43ceSGuillaume Chatelet #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE 76060a43ceSGuillaume Chatelet #endif 77060a43ceSGuillaume Chatelet 7877118536SMarc Auberer #if __has_builtin(__builtin_memset_inline) 79a786096fSGuillaume Chatelet #define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE 80a786096fSGuillaume Chatelet #endif 81a786096fSGuillaume Chatelet 82060a43ceSGuillaume Chatelet // Performs a constant count copy. 83060a43ceSGuillaume Chatelet template <size_t Size> 846363320bSSiva Chandra Reddy LIBC_INLINE void memcpy_inline(void *__restrict dst, 85060a43ceSGuillaume Chatelet const void *__restrict src) { 86060a43ceSGuillaume Chatelet #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE 87060a43ceSGuillaume Chatelet __builtin_memcpy_inline(dst, src, Size); 88060a43ceSGuillaume Chatelet #else 89f4a35492SGuillaume Chatelet // In memory functions `memcpy_inline` is instantiated several times with 90f4a35492SGuillaume Chatelet // different value of the Size parameter. This doesn't play well with GCC's 91019a477cSRoland McGrath // Value Range Analysis that wrongly detects out of bounds accesses. We 92bc4f3e31SGuillaume Chatelet // disable these warnings for the purpose of this function. 93f4a35492SGuillaume Chatelet #pragma GCC diagnostic push 94f4a35492SGuillaume Chatelet #pragma GCC diagnostic ignored "-Warray-bounds" 95bc4f3e31SGuillaume Chatelet #pragma GCC diagnostic ignored "-Wstringop-overread" 96bc4f3e31SGuillaume Chatelet #pragma GCC diagnostic ignored "-Wstringop-overflow" 97060a43ceSGuillaume Chatelet for (size_t i = 0; i < Size; ++i) 98060a43ceSGuillaume Chatelet static_cast<char *>(dst)[i] = static_cast<const char *>(src)[i]; 99f4a35492SGuillaume Chatelet #pragma GCC diagnostic pop 100060a43ceSGuillaume Chatelet #endif 101060a43ceSGuillaume Chatelet } 102060a43ceSGuillaume Chatelet 103d7917fdcSGuillaume Chatelet using Ptr = cpp::byte *; // Pointer to raw data. 104d7917fdcSGuillaume Chatelet using CPtr = const cpp::byte *; // Const pointer to raw data. 105060a43ceSGuillaume Chatelet 10669090143SGuillaume Chatelet // This type makes sure that we don't accidentally promote an integral type to 10769090143SGuillaume Chatelet // another one. It is only constructible from the exact T type. 10869090143SGuillaume Chatelet template <typename T> struct StrictIntegralType { 10969090143SGuillaume Chatelet static_assert(cpp::is_integral_v<T>); 11069090143SGuillaume Chatelet 11169090143SGuillaume Chatelet // Can only be constructed from a T. 11269090143SGuillaume Chatelet template <typename U, cpp::enable_if_t<cpp::is_same_v<U, T>, bool> = 0> 113019a477cSRoland McGrath LIBC_INLINE StrictIntegralType(U value) : value(value) {} 11469090143SGuillaume Chatelet 11569090143SGuillaume Chatelet // Allows using the type in an if statement. 116019a477cSRoland McGrath LIBC_INLINE explicit operator bool() const { return value; } 11769090143SGuillaume Chatelet 11869090143SGuillaume Chatelet // If type is unsigned (bcmp) we allow bitwise OR operations. 119019a477cSRoland McGrath LIBC_INLINE StrictIntegralType 120019a477cSRoland McGrath operator|(const StrictIntegralType &Rhs) const { 12169090143SGuillaume Chatelet static_assert(!cpp::is_signed_v<T>); 12269090143SGuillaume Chatelet return value | Rhs.value; 12369090143SGuillaume Chatelet } 12469090143SGuillaume Chatelet 12569090143SGuillaume Chatelet // For interation with the C API we allow explicit conversion back to the 12669090143SGuillaume Chatelet // `int` type. 127019a477cSRoland McGrath LIBC_INLINE explicit operator int() const { 12869090143SGuillaume Chatelet // bit_cast makes sure that T and int have the same size. 12969090143SGuillaume Chatelet return cpp::bit_cast<int>(value); 13069090143SGuillaume Chatelet } 13169090143SGuillaume Chatelet 13269090143SGuillaume Chatelet // Helper to get the zero value. 1336f8d826bSNick Desaulniers LIBC_INLINE static constexpr StrictIntegralType zero() { return {T(0)}; } 1346f8d826bSNick Desaulniers LIBC_INLINE static constexpr StrictIntegralType nonzero() { return {T(1)}; } 13569090143SGuillaume Chatelet 13669090143SGuillaume Chatelet private: 13769090143SGuillaume Chatelet T value; 13869090143SGuillaume Chatelet }; 13969090143SGuillaume Chatelet 14069090143SGuillaume Chatelet using MemcmpReturnType = StrictIntegralType<int32_t>; 14169090143SGuillaume Chatelet using BcmpReturnType = StrictIntegralType<uint32_t>; 14269090143SGuillaume Chatelet 1431c814c99SGuillaume Chatelet // This implements the semantic of 'memcmp' returning a negative value when 'a' 1441c814c99SGuillaume Chatelet // is less than 'b', '0' when 'a' equals 'b' and a positive number otherwise. 1451c814c99SGuillaume Chatelet LIBC_INLINE MemcmpReturnType cmp_uint32_t(uint32_t a, uint32_t b) { 1461c814c99SGuillaume Chatelet // We perform the difference as an int64_t. 1471c814c99SGuillaume Chatelet const int64_t diff = static_cast<int64_t>(a) - static_cast<int64_t>(b); 1481c814c99SGuillaume Chatelet // For the int64_t to int32_t conversion we want the following properties: 1491c814c99SGuillaume Chatelet // - int32_t[31:31] == 1 iff diff < 0 1501c814c99SGuillaume Chatelet // - int32_t[31:0] == 0 iff diff == 0 1511c814c99SGuillaume Chatelet 1521c814c99SGuillaume Chatelet // We also observe that: 1531c814c99SGuillaume Chatelet // - When diff < 0: diff[63:32] == 0xffffffff and diff[31:0] != 0 1541c814c99SGuillaume Chatelet // - When diff > 0: diff[63:32] == 0 and diff[31:0] != 0 1551c814c99SGuillaume Chatelet // - When diff == 0: diff[63:32] == 0 and diff[31:0] == 0 1561c814c99SGuillaume Chatelet // - https://godbolt.org/z/8W7qWP6e5 1571c814c99SGuillaume Chatelet // - This implies that we can only look at diff[32:32] for determining the 1581c814c99SGuillaume Chatelet // sign bit for the returned int32_t. 1591c814c99SGuillaume Chatelet 1601c814c99SGuillaume Chatelet // So, we do the following: 1611c814c99SGuillaume Chatelet // - int32_t[31:31] = diff[32:32] 1621c814c99SGuillaume Chatelet // - int32_t[30:0] = diff[31:0] == 0 ? 0 : non-0. 1631c814c99SGuillaume Chatelet 1641c814c99SGuillaume Chatelet // And, we can achieve the above by the expression below. We could have also 1651c814c99SGuillaume Chatelet // used (diff64 >> 1) | (diff64 & 0x1) but (diff64 & 0xFFFF) is faster than 1661c814c99SGuillaume Chatelet // (diff64 & 0x1). https://godbolt.org/z/j3b569rW1 1671c814c99SGuillaume Chatelet return static_cast<int32_t>((diff >> 1) | (diff & 0xFFFF)); 1681c814c99SGuillaume Chatelet } 1691c814c99SGuillaume Chatelet 1701c814c99SGuillaume Chatelet // Returns a negative value if 'a' is less than 'b' and a positive value 1711c814c99SGuillaume Chatelet // otherwise. This implements the semantic of 'memcmp' when we know that 'a' and 1721c814c99SGuillaume Chatelet // 'b' differ. 1731c814c99SGuillaume Chatelet LIBC_INLINE MemcmpReturnType cmp_neq_uint64_t(uint64_t a, uint64_t b) { 17448ba7da9SGuillaume Chatelet #if defined(LIBC_TARGET_ARCH_IS_X86) 1751c814c99SGuillaume Chatelet // On x86, the best strategy would be to use 'INT32_MAX' and 'INT32_MIN' for 1761c814c99SGuillaume Chatelet // positive and negative value respectively as they are one value apart: 1771c814c99SGuillaume Chatelet // xor eax, eax <- free 1781c814c99SGuillaume Chatelet // cmp rdi, rsi <- serializing 1791c814c99SGuillaume Chatelet // adc eax, 2147483647 <- serializing 1801c814c99SGuillaume Chatelet 1811c814c99SGuillaume Chatelet // Unfortunately we found instances of client code that negate the result of 1821c814c99SGuillaume Chatelet // 'memcmp' to reverse ordering. Because signed integers are not symmetric 1831c814c99SGuillaume Chatelet // (e.g., int8_t ∈ [-128, 127]) returning 'INT_MIN' would break such code as 1841c814c99SGuillaume Chatelet // `-INT_MIN` is not representable as an int32_t. 1851c814c99SGuillaume Chatelet 1861c814c99SGuillaume Chatelet // As a consequence, we use 5 and -5 which is still OK nice in terms of 1871c814c99SGuillaume Chatelet // latency. 1881c814c99SGuillaume Chatelet // cmp rdi, rsi <- serializing 1891c814c99SGuillaume Chatelet // mov ecx, -5 <- can be done in parallel 1901c814c99SGuillaume Chatelet // mov eax, 5 <- can be done in parallel 1911c814c99SGuillaume Chatelet // cmovb eax, ecx <- serializing 1921c814c99SGuillaume Chatelet static constexpr int32_t POSITIVE = 5; 1931c814c99SGuillaume Chatelet static constexpr int32_t NEGATIVE = -5; 1941c814c99SGuillaume Chatelet #else 1951c814c99SGuillaume Chatelet // On RISC-V we simply use '1' and '-1' as it leads to branchless code. 1961c814c99SGuillaume Chatelet // On ARMv8, both strategies lead to the same performance. 1971c814c99SGuillaume Chatelet static constexpr int32_t POSITIVE = 1; 1981c814c99SGuillaume Chatelet static constexpr int32_t NEGATIVE = -1; 1991c814c99SGuillaume Chatelet #endif 2001c814c99SGuillaume Chatelet static_assert(POSITIVE > 0); 2011c814c99SGuillaume Chatelet static_assert(NEGATIVE < 0); 2021c814c99SGuillaume Chatelet return a < b ? NEGATIVE : POSITIVE; 2031c814c99SGuillaume Chatelet } 2041c814c99SGuillaume Chatelet 205a786096fSGuillaume Chatelet // Loads bytes from memory (possibly unaligned) and materializes them as 206a786096fSGuillaume Chatelet // type. 2076363320bSSiva Chandra Reddy template <typename T> LIBC_INLINE T load(CPtr ptr) { 20888d82b74SNick Desaulniers T out; 20988d82b74SNick Desaulniers memcpy_inline<sizeof(T)>(&out, ptr); 21088d82b74SNick Desaulniers return out; 211060a43ceSGuillaume Chatelet } 212060a43ceSGuillaume Chatelet 213a786096fSGuillaume Chatelet // Stores a value of type T in memory (possibly unaligned). 2146363320bSSiva Chandra Reddy template <typename T> LIBC_INLINE void store(Ptr ptr, T value) { 215060a43ceSGuillaume Chatelet memcpy_inline<sizeof(T)>(ptr, &value); 216060a43ceSGuillaume Chatelet } 217060a43ceSGuillaume Chatelet 218f4a35492SGuillaume Chatelet // On architectures that do not allow for unaligned access we perform several 219f4a35492SGuillaume Chatelet // aligned accesses and recombine them through shifts and logicals operations. 220f4a35492SGuillaume Chatelet // For instance, if we know that the pointer is 2-byte aligned we can decompose 221f4a35492SGuillaume Chatelet // a 64-bit operation into four 16-bit operations. 222f4a35492SGuillaume Chatelet 223f4a35492SGuillaume Chatelet // Loads a 'ValueType' by decomposing it into several loads that are assumed to 224f4a35492SGuillaume Chatelet // be aligned. 225f4a35492SGuillaume Chatelet // e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr); 226f4a35492SGuillaume Chatelet template <typename ValueType, typename T, typename... TS> 227019a477cSRoland McGrath LIBC_INLINE ValueType load_aligned(CPtr src) { 228f4a35492SGuillaume Chatelet static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS))); 229f4a35492SGuillaume Chatelet const ValueType value = load<T>(assume_aligned<sizeof(T)>(src)); 230f4a35492SGuillaume Chatelet if constexpr (sizeof...(TS) > 0) { 23188d82b74SNick Desaulniers constexpr size_t SHIFT = sizeof(T) * 8; 232f4a35492SGuillaume Chatelet const ValueType next = load_aligned<ValueType, TS...>(src + sizeof(T)); 233f4a35492SGuillaume Chatelet if constexpr (Endian::IS_LITTLE) 23488d82b74SNick Desaulniers return value | (next << SHIFT); 235f4a35492SGuillaume Chatelet else if constexpr (Endian::IS_BIG) 23688d82b74SNick Desaulniers return (value << SHIFT) | next; 237f4a35492SGuillaume Chatelet else 2381d894788SGuillaume Chatelet static_assert(cpp::always_false<T>, "Invalid endianness"); 239f4a35492SGuillaume Chatelet } else { 240f4a35492SGuillaume Chatelet return value; 241f4a35492SGuillaume Chatelet } 242f4a35492SGuillaume Chatelet } 243f4a35492SGuillaume Chatelet 244f4a35492SGuillaume Chatelet // Alias for loading a 'uint32_t'. 245f4a35492SGuillaume Chatelet template <typename T, typename... TS> 246019a477cSRoland McGrath LIBC_INLINE auto load32_aligned(CPtr src, size_t offset) { 247f4a35492SGuillaume Chatelet static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t)); 248f4a35492SGuillaume Chatelet return load_aligned<uint32_t, T, TS...>(src + offset); 249f4a35492SGuillaume Chatelet } 250f4a35492SGuillaume Chatelet 251f4a35492SGuillaume Chatelet // Alias for loading a 'uint64_t'. 252f4a35492SGuillaume Chatelet template <typename T, typename... TS> 253019a477cSRoland McGrath LIBC_INLINE auto load64_aligned(CPtr src, size_t offset) { 254f4a35492SGuillaume Chatelet static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t)); 255f4a35492SGuillaume Chatelet return load_aligned<uint64_t, T, TS...>(src + offset); 256f4a35492SGuillaume Chatelet } 257f4a35492SGuillaume Chatelet 258f4a35492SGuillaume Chatelet // Stores a 'ValueType' by decomposing it into several stores that are assumed 259f4a35492SGuillaume Chatelet // to be aligned. 260f4a35492SGuillaume Chatelet // e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr); 261f4a35492SGuillaume Chatelet template <typename ValueType, typename T, typename... TS> 262019a477cSRoland McGrath LIBC_INLINE void store_aligned(ValueType value, Ptr dst) { 263f4a35492SGuillaume Chatelet static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS))); 26488d82b74SNick Desaulniers constexpr size_t SHIFT = sizeof(T) * 8; 265f4a35492SGuillaume Chatelet if constexpr (Endian::IS_LITTLE) { 266f4a35492SGuillaume Chatelet store<T>(assume_aligned<sizeof(T)>(dst), value & ~T(0)); 267f4a35492SGuillaume Chatelet if constexpr (sizeof...(TS) > 0) 26888d82b74SNick Desaulniers store_aligned<ValueType, TS...>(value >> SHIFT, dst + sizeof(T)); 269f4a35492SGuillaume Chatelet } else if constexpr (Endian::IS_BIG) { 270f4a35492SGuillaume Chatelet constexpr size_t OFFSET = (0 + ... + sizeof(TS)); 271f4a35492SGuillaume Chatelet store<T>(assume_aligned<sizeof(T)>(dst + OFFSET), value & ~T(0)); 272f4a35492SGuillaume Chatelet if constexpr (sizeof...(TS) > 0) 27388d82b74SNick Desaulniers store_aligned<ValueType, TS...>(value >> SHIFT, dst); 274f4a35492SGuillaume Chatelet } else { 2751d894788SGuillaume Chatelet static_assert(cpp::always_false<T>, "Invalid endianness"); 276f4a35492SGuillaume Chatelet } 277f4a35492SGuillaume Chatelet } 278f4a35492SGuillaume Chatelet 279f4a35492SGuillaume Chatelet // Alias for storing a 'uint32_t'. 280f4a35492SGuillaume Chatelet template <typename T, typename... TS> 281019a477cSRoland McGrath LIBC_INLINE void store32_aligned(uint32_t value, Ptr dst, size_t offset) { 282f4a35492SGuillaume Chatelet static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t)); 283f4a35492SGuillaume Chatelet store_aligned<uint32_t, T, TS...>(value, dst + offset); 284f4a35492SGuillaume Chatelet } 285f4a35492SGuillaume Chatelet 286f4a35492SGuillaume Chatelet // Alias for storing a 'uint64_t'. 287f4a35492SGuillaume Chatelet template <typename T, typename... TS> 288019a477cSRoland McGrath LIBC_INLINE void store64_aligned(uint64_t value, Ptr dst, size_t offset) { 289f4a35492SGuillaume Chatelet static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t)); 290f4a35492SGuillaume Chatelet store_aligned<uint64_t, T, TS...>(value, dst + offset); 291f4a35492SGuillaume Chatelet } 292f4a35492SGuillaume Chatelet 293a786096fSGuillaume Chatelet // Advances the pointers p1 and p2 by offset bytes and decrease count by the 294a786096fSGuillaume Chatelet // same amount. 295060a43ceSGuillaume Chatelet template <typename T1, typename T2> 2966363320bSSiva Chandra Reddy LIBC_INLINE void adjust(ptrdiff_t offset, T1 *__restrict &p1, 297060a43ceSGuillaume Chatelet T2 *__restrict &p2, size_t &count) { 298060a43ceSGuillaume Chatelet p1 += offset; 299060a43ceSGuillaume Chatelet p2 += offset; 300060a43ceSGuillaume Chatelet count -= offset; 301060a43ceSGuillaume Chatelet } 302060a43ceSGuillaume Chatelet 303a786096fSGuillaume Chatelet // Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary 304a786096fSGuillaume Chatelet // and decrease count by the same amount. 305060a43ceSGuillaume Chatelet // We make sure the compiler knows about the adjusted pointer alignment. 306a786096fSGuillaume Chatelet template <size_t SIZE, typename T1, typename T2> 307a786096fSGuillaume Chatelet void align_p1_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2, 308a786096fSGuillaume Chatelet size_t &count) { 309a786096fSGuillaume Chatelet adjust(distance_to_next_aligned<SIZE>(p1), p1, p2, count); 310a786096fSGuillaume Chatelet p1 = assume_aligned<SIZE>(p1); 311060a43ceSGuillaume Chatelet } 312060a43ceSGuillaume Chatelet 313a786096fSGuillaume Chatelet // Same as align_p1_to_next_boundary above but with a single pointer instead. 3148628ca29SGuillaume Chatelet template <size_t SIZE, typename T> 3158628ca29SGuillaume Chatelet LIBC_INLINE void align_to_next_boundary(T *&p1, size_t &count) { 3168628ca29SGuillaume Chatelet const T *dummy = p1; 317a786096fSGuillaume Chatelet align_p1_to_next_boundary<SIZE>(p1, dummy, count); 318d8415b02SSterling Augustine } 319a786096fSGuillaume Chatelet 320a786096fSGuillaume Chatelet // An enum class that discriminates between the first and second pointer. 321a786096fSGuillaume Chatelet enum class Arg { P1, P2, Dst = P1, Src = P2 }; 322a786096fSGuillaume Chatelet 323a786096fSGuillaume Chatelet // Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1. 324a786096fSGuillaume Chatelet // Precondition: &p1 != &p2 325a786096fSGuillaume Chatelet template <size_t SIZE, Arg AlignOn, typename T1, typename T2> 326019a477cSRoland McGrath LIBC_INLINE void align_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2, 327a786096fSGuillaume Chatelet size_t &count) { 328a786096fSGuillaume Chatelet if constexpr (AlignOn == Arg::P1) 329a786096fSGuillaume Chatelet align_p1_to_next_boundary<SIZE>(p1, p2, count); 330a786096fSGuillaume Chatelet else if constexpr (AlignOn == Arg::P2) 331a786096fSGuillaume Chatelet align_p1_to_next_boundary<SIZE>(p2, p1, count); // swapping p1 and p2. 332a786096fSGuillaume Chatelet else 3331d894788SGuillaume Chatelet static_assert(cpp::always_false<T1>, 3341d894788SGuillaume Chatelet "AlignOn must be either Arg::P1 or Arg::P2"); 335060a43ceSGuillaume Chatelet } 3365bf47e14SGuillaume Chatelet 3371c814c99SGuillaume Chatelet template <size_t SIZE> struct AlignHelper { 338019a477cSRoland McGrath LIBC_INLINE AlignHelper(CPtr ptr) 339640c8574SNick Desaulniers : offset(distance_to_next_aligned<SIZE>(ptr)) {} 3401c814c99SGuillaume Chatelet 341640c8574SNick Desaulniers LIBC_INLINE bool not_aligned() const { return offset != SIZE; } 342640c8574SNick Desaulniers uintptr_t offset; 3431c814c99SGuillaume Chatelet }; 3441c814c99SGuillaume Chatelet 3453153aa4cSdoshimili LIBC_INLINE void prefetch_for_write(CPtr dst) { 3463153aa4cSdoshimili __builtin_prefetch(dst, /*write*/ 1, /*max locality*/ 3); 3473153aa4cSdoshimili } 3483153aa4cSdoshimili 3493153aa4cSdoshimili LIBC_INLINE void prefetch_to_local_cache(CPtr dst) { 3503153aa4cSdoshimili __builtin_prefetch(dst, /*read*/ 0, /*max locality*/ 3); 3513153aa4cSdoshimili } 3523153aa4cSdoshimili 3535ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 35485314e9bSGuillaume Chatelet 355270547f3SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H 356