xref: /llvm-project/libc/src/string/memory_utils/utils.h (revision 95b680e4c353d479fbfb96adb39696042c005e99)
166d00febSPaula Toth //===-- Memory utils --------------------------------------------*- C++ -*-===//
285314e9bSGuillaume Chatelet //
385314e9bSGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
485314e9bSGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information.
585314e9bSGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
685314e9bSGuillaume Chatelet //
785314e9bSGuillaume Chatelet //===----------------------------------------------------------------------===//
885314e9bSGuillaume Chatelet 
9270547f3SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
10270547f3SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
1185314e9bSGuillaume Chatelet 
12a786096fSGuillaume Chatelet #include "src/__support/CPP/bit.h"
13d7917fdcSGuillaume Chatelet #include "src/__support/CPP/cstddef.h"
14a786096fSGuillaume Chatelet #include "src/__support/CPP/type_traits.h"
15*95b680e4SDaniel Thornburgh #include "src/__support/endian_internal.h"
16e2f8c556SGuillaume Chatelet #include "src/__support/macros/attributes.h" // LIBC_INLINE
175ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
181c814c99SGuillaume Chatelet #include "src/__support/macros/properties/architectures.h"
1985314e9bSGuillaume Chatelet 
2085314e9bSGuillaume Chatelet #include <stddef.h> // size_t
211c814c99SGuillaume Chatelet #include <stdint.h> // intptr_t / uintptr_t / INT32_MAX / INT32_MIN
2285314e9bSGuillaume Chatelet 
235ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
2485314e9bSGuillaume Chatelet 
25a786096fSGuillaume Chatelet // Returns the number of bytes to substract from ptr to get to the previous
26a786096fSGuillaume Chatelet // multiple of alignment. If ptr is already aligned returns 0.
275bf8efd2SRoland McGrath template <size_t alignment>
285bf8efd2SRoland McGrath LIBC_INLINE uintptr_t distance_to_align_down(const void *ptr) {
291d894788SGuillaume Chatelet   static_assert(cpp::has_single_bit(alignment),
301d894788SGuillaume Chatelet                 "alignment must be a power of 2");
3104a309ddSGuillaume Chatelet   return reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
3204a309ddSGuillaume Chatelet }
3304a309ddSGuillaume Chatelet 
34a786096fSGuillaume Chatelet // Returns the number of bytes to add to ptr to get to the next multiple of
35a786096fSGuillaume Chatelet // alignment. If ptr is already aligned returns 0.
365bf8efd2SRoland McGrath template <size_t alignment>
375bf8efd2SRoland McGrath LIBC_INLINE uintptr_t distance_to_align_up(const void *ptr) {
381d894788SGuillaume Chatelet   static_assert(cpp::has_single_bit(alignment),
391d894788SGuillaume Chatelet                 "alignment must be a power of 2");
4085314e9bSGuillaume Chatelet   // The logic is not straightforward and involves unsigned modulo arithmetic
4185314e9bSGuillaume Chatelet   // but the generated code is as fast as it can be.
4285314e9bSGuillaume Chatelet   return -reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
4385314e9bSGuillaume Chatelet }
4485314e9bSGuillaume Chatelet 
45a786096fSGuillaume Chatelet // Returns the number of bytes to add to ptr to get to the next multiple of
46a786096fSGuillaume Chatelet // alignment. If ptr is already aligned returns alignment.
47a786096fSGuillaume Chatelet template <size_t alignment>
485bf8efd2SRoland McGrath LIBC_INLINE uintptr_t distance_to_next_aligned(const void *ptr) {
49a786096fSGuillaume Chatelet   return alignment - distance_to_align_down<alignment>(ptr);
5085314e9bSGuillaume Chatelet }
5185314e9bSGuillaume Chatelet 
52a786096fSGuillaume Chatelet // Returns the same pointer but notifies the compiler that it is aligned.
53019a477cSRoland McGrath template <size_t alignment, typename T> LIBC_INLINE T *assume_aligned(T *ptr) {
545bf47e14SGuillaume Chatelet   return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
555bf47e14SGuillaume Chatelet }
56a786096fSGuillaume Chatelet 
572cfae7cdSGuillaume Chatelet // Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are
582cfae7cdSGuillaume Chatelet // disjoint.
592cfae7cdSGuillaume Chatelet LIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) {
60f364a7a8SDmitry Vyukov   const ptrdiff_t sdiff =
61f364a7a8SDmitry Vyukov       static_cast<const char *>(p1) - static_cast<const char *>(p2);
62f364a7a8SDmitry Vyukov   // We use bit_cast to make sure that we don't run into accidental integer
63f364a7a8SDmitry Vyukov   // promotion. Notably the unary minus operator goes through integer promotion
64f364a7a8SDmitry Vyukov   // at the expression level. We assume arithmetic to be two's complement (i.e.,
65f364a7a8SDmitry Vyukov   // bit_cast has the same behavior as a regular signed to unsigned cast).
66f364a7a8SDmitry Vyukov   static_assert(-1 == ~0, "not 2's complement");
67f364a7a8SDmitry Vyukov   const size_t udiff = cpp::bit_cast<size_t>(sdiff);
68f364a7a8SDmitry Vyukov   // Integer promition would be caught here.
69f364a7a8SDmitry Vyukov   const size_t neg_udiff = cpp::bit_cast<size_t>(-sdiff);
70f364a7a8SDmitry Vyukov   // This is expected to compile a conditional move.
71f364a7a8SDmitry Vyukov   return sdiff >= 0 ? size <= udiff : size <= neg_udiff;
722cfae7cdSGuillaume Chatelet }
732cfae7cdSGuillaume Chatelet 
7477118536SMarc Auberer #if __has_builtin(__builtin_memcpy_inline)
75060a43ceSGuillaume Chatelet #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
76060a43ceSGuillaume Chatelet #endif
77060a43ceSGuillaume Chatelet 
7877118536SMarc Auberer #if __has_builtin(__builtin_memset_inline)
79a786096fSGuillaume Chatelet #define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE
80a786096fSGuillaume Chatelet #endif
81a786096fSGuillaume Chatelet 
82060a43ceSGuillaume Chatelet // Performs a constant count copy.
83060a43ceSGuillaume Chatelet template <size_t Size>
846363320bSSiva Chandra Reddy LIBC_INLINE void memcpy_inline(void *__restrict dst,
85060a43ceSGuillaume Chatelet                                const void *__restrict src) {
86060a43ceSGuillaume Chatelet #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
87060a43ceSGuillaume Chatelet   __builtin_memcpy_inline(dst, src, Size);
88060a43ceSGuillaume Chatelet #else
89f4a35492SGuillaume Chatelet   // In memory functions `memcpy_inline` is instantiated several times with
90f4a35492SGuillaume Chatelet   // different value of the Size parameter. This doesn't play well with GCC's
91019a477cSRoland McGrath   // Value Range Analysis that wrongly detects out of bounds accesses. We
92bc4f3e31SGuillaume Chatelet   // disable these warnings for the purpose of this function.
93f4a35492SGuillaume Chatelet #pragma GCC diagnostic push
94f4a35492SGuillaume Chatelet #pragma GCC diagnostic ignored "-Warray-bounds"
95bc4f3e31SGuillaume Chatelet #pragma GCC diagnostic ignored "-Wstringop-overread"
96bc4f3e31SGuillaume Chatelet #pragma GCC diagnostic ignored "-Wstringop-overflow"
97060a43ceSGuillaume Chatelet   for (size_t i = 0; i < Size; ++i)
98060a43ceSGuillaume Chatelet     static_cast<char *>(dst)[i] = static_cast<const char *>(src)[i];
99f4a35492SGuillaume Chatelet #pragma GCC diagnostic pop
100060a43ceSGuillaume Chatelet #endif
101060a43ceSGuillaume Chatelet }
102060a43ceSGuillaume Chatelet 
103d7917fdcSGuillaume Chatelet using Ptr = cpp::byte *;        // Pointer to raw data.
104d7917fdcSGuillaume Chatelet using CPtr = const cpp::byte *; // Const pointer to raw data.
105060a43ceSGuillaume Chatelet 
10669090143SGuillaume Chatelet // This type makes sure that we don't accidentally promote an integral type to
10769090143SGuillaume Chatelet // another one. It is only constructible from the exact T type.
10869090143SGuillaume Chatelet template <typename T> struct StrictIntegralType {
10969090143SGuillaume Chatelet   static_assert(cpp::is_integral_v<T>);
11069090143SGuillaume Chatelet 
11169090143SGuillaume Chatelet   // Can only be constructed from a T.
11269090143SGuillaume Chatelet   template <typename U, cpp::enable_if_t<cpp::is_same_v<U, T>, bool> = 0>
113019a477cSRoland McGrath   LIBC_INLINE StrictIntegralType(U value) : value(value) {}
11469090143SGuillaume Chatelet 
11569090143SGuillaume Chatelet   // Allows using the type in an if statement.
116019a477cSRoland McGrath   LIBC_INLINE explicit operator bool() const { return value; }
11769090143SGuillaume Chatelet 
11869090143SGuillaume Chatelet   // If type is unsigned (bcmp) we allow bitwise OR operations.
119019a477cSRoland McGrath   LIBC_INLINE StrictIntegralType
120019a477cSRoland McGrath   operator|(const StrictIntegralType &Rhs) const {
12169090143SGuillaume Chatelet     static_assert(!cpp::is_signed_v<T>);
12269090143SGuillaume Chatelet     return value | Rhs.value;
12369090143SGuillaume Chatelet   }
12469090143SGuillaume Chatelet 
12569090143SGuillaume Chatelet   // For interation with the C API we allow explicit conversion back to the
12669090143SGuillaume Chatelet   // `int` type.
127019a477cSRoland McGrath   LIBC_INLINE explicit operator int() const {
12869090143SGuillaume Chatelet     // bit_cast makes sure that T and int have the same size.
12969090143SGuillaume Chatelet     return cpp::bit_cast<int>(value);
13069090143SGuillaume Chatelet   }
13169090143SGuillaume Chatelet 
13269090143SGuillaume Chatelet   // Helper to get the zero value.
1336f8d826bSNick Desaulniers   LIBC_INLINE static constexpr StrictIntegralType zero() { return {T(0)}; }
1346f8d826bSNick Desaulniers   LIBC_INLINE static constexpr StrictIntegralType nonzero() { return {T(1)}; }
13569090143SGuillaume Chatelet 
13669090143SGuillaume Chatelet private:
13769090143SGuillaume Chatelet   T value;
13869090143SGuillaume Chatelet };
13969090143SGuillaume Chatelet 
14069090143SGuillaume Chatelet using MemcmpReturnType = StrictIntegralType<int32_t>;
14169090143SGuillaume Chatelet using BcmpReturnType = StrictIntegralType<uint32_t>;
14269090143SGuillaume Chatelet 
1431c814c99SGuillaume Chatelet // This implements the semantic of 'memcmp' returning a negative value when 'a'
1441c814c99SGuillaume Chatelet // is less than 'b', '0' when 'a' equals 'b' and a positive number otherwise.
1451c814c99SGuillaume Chatelet LIBC_INLINE MemcmpReturnType cmp_uint32_t(uint32_t a, uint32_t b) {
1461c814c99SGuillaume Chatelet   // We perform the difference as an int64_t.
1471c814c99SGuillaume Chatelet   const int64_t diff = static_cast<int64_t>(a) - static_cast<int64_t>(b);
1481c814c99SGuillaume Chatelet   // For the int64_t to int32_t conversion we want the following properties:
1491c814c99SGuillaume Chatelet   // - int32_t[31:31] == 1 iff diff < 0
1501c814c99SGuillaume Chatelet   // - int32_t[31:0] == 0 iff diff == 0
1511c814c99SGuillaume Chatelet 
1521c814c99SGuillaume Chatelet   // We also observe that:
1531c814c99SGuillaume Chatelet   // - When diff < 0: diff[63:32] == 0xffffffff and diff[31:0] != 0
1541c814c99SGuillaume Chatelet   // - When diff > 0: diff[63:32] == 0 and diff[31:0] != 0
1551c814c99SGuillaume Chatelet   // - When diff == 0: diff[63:32] == 0 and diff[31:0] == 0
1561c814c99SGuillaume Chatelet   // - https://godbolt.org/z/8W7qWP6e5
1571c814c99SGuillaume Chatelet   // - This implies that we can only look at diff[32:32] for determining the
1581c814c99SGuillaume Chatelet   // sign bit for the returned int32_t.
1591c814c99SGuillaume Chatelet 
1601c814c99SGuillaume Chatelet   // So, we do the following:
1611c814c99SGuillaume Chatelet   // - int32_t[31:31] = diff[32:32]
1621c814c99SGuillaume Chatelet   // - int32_t[30:0] = diff[31:0] == 0 ? 0 : non-0.
1631c814c99SGuillaume Chatelet 
1641c814c99SGuillaume Chatelet   // And, we can achieve the above by the expression below. We could have also
1651c814c99SGuillaume Chatelet   // used (diff64 >> 1) | (diff64 & 0x1) but (diff64 & 0xFFFF) is faster than
1661c814c99SGuillaume Chatelet   // (diff64 & 0x1). https://godbolt.org/z/j3b569rW1
1671c814c99SGuillaume Chatelet   return static_cast<int32_t>((diff >> 1) | (diff & 0xFFFF));
1681c814c99SGuillaume Chatelet }
1691c814c99SGuillaume Chatelet 
1701c814c99SGuillaume Chatelet // Returns a negative value if 'a' is less than 'b' and a positive value
1711c814c99SGuillaume Chatelet // otherwise. This implements the semantic of 'memcmp' when we know that 'a' and
1721c814c99SGuillaume Chatelet // 'b' differ.
1731c814c99SGuillaume Chatelet LIBC_INLINE MemcmpReturnType cmp_neq_uint64_t(uint64_t a, uint64_t b) {
17448ba7da9SGuillaume Chatelet #if defined(LIBC_TARGET_ARCH_IS_X86)
1751c814c99SGuillaume Chatelet   // On x86, the best strategy would be to use 'INT32_MAX' and 'INT32_MIN' for
1761c814c99SGuillaume Chatelet   // positive and negative value respectively as they are one value apart:
1771c814c99SGuillaume Chatelet   //   xor     eax, eax         <- free
1781c814c99SGuillaume Chatelet   //   cmp     rdi, rsi         <- serializing
1791c814c99SGuillaume Chatelet   //   adc     eax, 2147483647  <- serializing
1801c814c99SGuillaume Chatelet 
1811c814c99SGuillaume Chatelet   // Unfortunately we found instances of client code that negate the result of
1821c814c99SGuillaume Chatelet   // 'memcmp' to reverse ordering. Because signed integers are not symmetric
1831c814c99SGuillaume Chatelet   // (e.g., int8_t ∈ [-128, 127]) returning 'INT_MIN' would break such code as
1841c814c99SGuillaume Chatelet   // `-INT_MIN` is not representable as an int32_t.
1851c814c99SGuillaume Chatelet 
1861c814c99SGuillaume Chatelet   // As a consequence, we use 5 and -5 which is still OK nice in terms of
1871c814c99SGuillaume Chatelet   // latency.
1881c814c99SGuillaume Chatelet   //   cmp     rdi, rsi         <- serializing
1891c814c99SGuillaume Chatelet   //   mov     ecx, -5          <- can be done in parallel
1901c814c99SGuillaume Chatelet   //   mov     eax, 5           <- can be done in parallel
1911c814c99SGuillaume Chatelet   //   cmovb   eax, ecx         <- serializing
1921c814c99SGuillaume Chatelet   static constexpr int32_t POSITIVE = 5;
1931c814c99SGuillaume Chatelet   static constexpr int32_t NEGATIVE = -5;
1941c814c99SGuillaume Chatelet #else
1951c814c99SGuillaume Chatelet   // On RISC-V we simply use '1' and '-1' as it leads to branchless code.
1961c814c99SGuillaume Chatelet   // On ARMv8, both strategies lead to the same performance.
1971c814c99SGuillaume Chatelet   static constexpr int32_t POSITIVE = 1;
1981c814c99SGuillaume Chatelet   static constexpr int32_t NEGATIVE = -1;
1991c814c99SGuillaume Chatelet #endif
2001c814c99SGuillaume Chatelet   static_assert(POSITIVE > 0);
2011c814c99SGuillaume Chatelet   static_assert(NEGATIVE < 0);
2021c814c99SGuillaume Chatelet   return a < b ? NEGATIVE : POSITIVE;
2031c814c99SGuillaume Chatelet }
2041c814c99SGuillaume Chatelet 
205a786096fSGuillaume Chatelet // Loads bytes from memory (possibly unaligned) and materializes them as
206a786096fSGuillaume Chatelet // type.
2076363320bSSiva Chandra Reddy template <typename T> LIBC_INLINE T load(CPtr ptr) {
20888d82b74SNick Desaulniers   T out;
20988d82b74SNick Desaulniers   memcpy_inline<sizeof(T)>(&out, ptr);
21088d82b74SNick Desaulniers   return out;
211060a43ceSGuillaume Chatelet }
212060a43ceSGuillaume Chatelet 
213a786096fSGuillaume Chatelet // Stores a value of type T in memory (possibly unaligned).
2146363320bSSiva Chandra Reddy template <typename T> LIBC_INLINE void store(Ptr ptr, T value) {
215060a43ceSGuillaume Chatelet   memcpy_inline<sizeof(T)>(ptr, &value);
216060a43ceSGuillaume Chatelet }
217060a43ceSGuillaume Chatelet 
218f4a35492SGuillaume Chatelet // On architectures that do not allow for unaligned access we perform several
219f4a35492SGuillaume Chatelet // aligned accesses and recombine them through shifts and logicals operations.
220f4a35492SGuillaume Chatelet // For instance, if we know that the pointer is 2-byte aligned we can decompose
221f4a35492SGuillaume Chatelet // a 64-bit operation into four 16-bit operations.
222f4a35492SGuillaume Chatelet 
223f4a35492SGuillaume Chatelet // Loads a 'ValueType' by decomposing it into several loads that are assumed to
224f4a35492SGuillaume Chatelet // be aligned.
225f4a35492SGuillaume Chatelet // e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr);
226f4a35492SGuillaume Chatelet template <typename ValueType, typename T, typename... TS>
227019a477cSRoland McGrath LIBC_INLINE ValueType load_aligned(CPtr src) {
228f4a35492SGuillaume Chatelet   static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
229f4a35492SGuillaume Chatelet   const ValueType value = load<T>(assume_aligned<sizeof(T)>(src));
230f4a35492SGuillaume Chatelet   if constexpr (sizeof...(TS) > 0) {
23188d82b74SNick Desaulniers     constexpr size_t SHIFT = sizeof(T) * 8;
232f4a35492SGuillaume Chatelet     const ValueType next = load_aligned<ValueType, TS...>(src + sizeof(T));
233f4a35492SGuillaume Chatelet     if constexpr (Endian::IS_LITTLE)
23488d82b74SNick Desaulniers       return value | (next << SHIFT);
235f4a35492SGuillaume Chatelet     else if constexpr (Endian::IS_BIG)
23688d82b74SNick Desaulniers       return (value << SHIFT) | next;
237f4a35492SGuillaume Chatelet     else
2381d894788SGuillaume Chatelet       static_assert(cpp::always_false<T>, "Invalid endianness");
239f4a35492SGuillaume Chatelet   } else {
240f4a35492SGuillaume Chatelet     return value;
241f4a35492SGuillaume Chatelet   }
242f4a35492SGuillaume Chatelet }
243f4a35492SGuillaume Chatelet 
244f4a35492SGuillaume Chatelet // Alias for loading a 'uint32_t'.
245f4a35492SGuillaume Chatelet template <typename T, typename... TS>
246019a477cSRoland McGrath LIBC_INLINE auto load32_aligned(CPtr src, size_t offset) {
247f4a35492SGuillaume Chatelet   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
248f4a35492SGuillaume Chatelet   return load_aligned<uint32_t, T, TS...>(src + offset);
249f4a35492SGuillaume Chatelet }
250f4a35492SGuillaume Chatelet 
251f4a35492SGuillaume Chatelet // Alias for loading a 'uint64_t'.
252f4a35492SGuillaume Chatelet template <typename T, typename... TS>
253019a477cSRoland McGrath LIBC_INLINE auto load64_aligned(CPtr src, size_t offset) {
254f4a35492SGuillaume Chatelet   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
255f4a35492SGuillaume Chatelet   return load_aligned<uint64_t, T, TS...>(src + offset);
256f4a35492SGuillaume Chatelet }
257f4a35492SGuillaume Chatelet 
258f4a35492SGuillaume Chatelet // Stores a 'ValueType' by decomposing it into several stores that are assumed
259f4a35492SGuillaume Chatelet // to be aligned.
260f4a35492SGuillaume Chatelet // e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr);
261f4a35492SGuillaume Chatelet template <typename ValueType, typename T, typename... TS>
262019a477cSRoland McGrath LIBC_INLINE void store_aligned(ValueType value, Ptr dst) {
263f4a35492SGuillaume Chatelet   static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
26488d82b74SNick Desaulniers   constexpr size_t SHIFT = sizeof(T) * 8;
265f4a35492SGuillaume Chatelet   if constexpr (Endian::IS_LITTLE) {
266f4a35492SGuillaume Chatelet     store<T>(assume_aligned<sizeof(T)>(dst), value & ~T(0));
267f4a35492SGuillaume Chatelet     if constexpr (sizeof...(TS) > 0)
26888d82b74SNick Desaulniers       store_aligned<ValueType, TS...>(value >> SHIFT, dst + sizeof(T));
269f4a35492SGuillaume Chatelet   } else if constexpr (Endian::IS_BIG) {
270f4a35492SGuillaume Chatelet     constexpr size_t OFFSET = (0 + ... + sizeof(TS));
271f4a35492SGuillaume Chatelet     store<T>(assume_aligned<sizeof(T)>(dst + OFFSET), value & ~T(0));
272f4a35492SGuillaume Chatelet     if constexpr (sizeof...(TS) > 0)
27388d82b74SNick Desaulniers       store_aligned<ValueType, TS...>(value >> SHIFT, dst);
274f4a35492SGuillaume Chatelet   } else {
2751d894788SGuillaume Chatelet     static_assert(cpp::always_false<T>, "Invalid endianness");
276f4a35492SGuillaume Chatelet   }
277f4a35492SGuillaume Chatelet }
278f4a35492SGuillaume Chatelet 
279f4a35492SGuillaume Chatelet // Alias for storing a 'uint32_t'.
280f4a35492SGuillaume Chatelet template <typename T, typename... TS>
281019a477cSRoland McGrath LIBC_INLINE void store32_aligned(uint32_t value, Ptr dst, size_t offset) {
282f4a35492SGuillaume Chatelet   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
283f4a35492SGuillaume Chatelet   store_aligned<uint32_t, T, TS...>(value, dst + offset);
284f4a35492SGuillaume Chatelet }
285f4a35492SGuillaume Chatelet 
286f4a35492SGuillaume Chatelet // Alias for storing a 'uint64_t'.
287f4a35492SGuillaume Chatelet template <typename T, typename... TS>
288019a477cSRoland McGrath LIBC_INLINE void store64_aligned(uint64_t value, Ptr dst, size_t offset) {
289f4a35492SGuillaume Chatelet   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
290f4a35492SGuillaume Chatelet   store_aligned<uint64_t, T, TS...>(value, dst + offset);
291f4a35492SGuillaume Chatelet }
292f4a35492SGuillaume Chatelet 
293a786096fSGuillaume Chatelet // Advances the pointers p1 and p2 by offset bytes and decrease count by the
294a786096fSGuillaume Chatelet // same amount.
295060a43ceSGuillaume Chatelet template <typename T1, typename T2>
2966363320bSSiva Chandra Reddy LIBC_INLINE void adjust(ptrdiff_t offset, T1 *__restrict &p1,
297060a43ceSGuillaume Chatelet                         T2 *__restrict &p2, size_t &count) {
298060a43ceSGuillaume Chatelet   p1 += offset;
299060a43ceSGuillaume Chatelet   p2 += offset;
300060a43ceSGuillaume Chatelet   count -= offset;
301060a43ceSGuillaume Chatelet }
302060a43ceSGuillaume Chatelet 
303a786096fSGuillaume Chatelet // Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary
304a786096fSGuillaume Chatelet // and decrease count by the same amount.
305060a43ceSGuillaume Chatelet // We make sure the compiler knows about the adjusted pointer alignment.
306a786096fSGuillaume Chatelet template <size_t SIZE, typename T1, typename T2>
307a786096fSGuillaume Chatelet void align_p1_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
308a786096fSGuillaume Chatelet                                size_t &count) {
309a786096fSGuillaume Chatelet   adjust(distance_to_next_aligned<SIZE>(p1), p1, p2, count);
310a786096fSGuillaume Chatelet   p1 = assume_aligned<SIZE>(p1);
311060a43ceSGuillaume Chatelet }
312060a43ceSGuillaume Chatelet 
313a786096fSGuillaume Chatelet // Same as align_p1_to_next_boundary above but with a single pointer instead.
3148628ca29SGuillaume Chatelet template <size_t SIZE, typename T>
3158628ca29SGuillaume Chatelet LIBC_INLINE void align_to_next_boundary(T *&p1, size_t &count) {
3168628ca29SGuillaume Chatelet   const T *dummy = p1;
317a786096fSGuillaume Chatelet   align_p1_to_next_boundary<SIZE>(p1, dummy, count);
318d8415b02SSterling Augustine }
319a786096fSGuillaume Chatelet 
320a786096fSGuillaume Chatelet // An enum class that discriminates between the first and second pointer.
321a786096fSGuillaume Chatelet enum class Arg { P1, P2, Dst = P1, Src = P2 };
322a786096fSGuillaume Chatelet 
323a786096fSGuillaume Chatelet // Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1.
324a786096fSGuillaume Chatelet // Precondition: &p1 != &p2
325a786096fSGuillaume Chatelet template <size_t SIZE, Arg AlignOn, typename T1, typename T2>
326019a477cSRoland McGrath LIBC_INLINE void align_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
327a786096fSGuillaume Chatelet                                         size_t &count) {
328a786096fSGuillaume Chatelet   if constexpr (AlignOn == Arg::P1)
329a786096fSGuillaume Chatelet     align_p1_to_next_boundary<SIZE>(p1, p2, count);
330a786096fSGuillaume Chatelet   else if constexpr (AlignOn == Arg::P2)
331a786096fSGuillaume Chatelet     align_p1_to_next_boundary<SIZE>(p2, p1, count); // swapping p1 and p2.
332a786096fSGuillaume Chatelet   else
3331d894788SGuillaume Chatelet     static_assert(cpp::always_false<T1>,
3341d894788SGuillaume Chatelet                   "AlignOn must be either Arg::P1 or Arg::P2");
335060a43ceSGuillaume Chatelet }
3365bf47e14SGuillaume Chatelet 
3371c814c99SGuillaume Chatelet template <size_t SIZE> struct AlignHelper {
338019a477cSRoland McGrath   LIBC_INLINE AlignHelper(CPtr ptr)
339640c8574SNick Desaulniers       : offset(distance_to_next_aligned<SIZE>(ptr)) {}
3401c814c99SGuillaume Chatelet 
341640c8574SNick Desaulniers   LIBC_INLINE bool not_aligned() const { return offset != SIZE; }
342640c8574SNick Desaulniers   uintptr_t offset;
3431c814c99SGuillaume Chatelet };
3441c814c99SGuillaume Chatelet 
3453153aa4cSdoshimili LIBC_INLINE void prefetch_for_write(CPtr dst) {
3463153aa4cSdoshimili   __builtin_prefetch(dst, /*write*/ 1, /*max locality*/ 3);
3473153aa4cSdoshimili }
3483153aa4cSdoshimili 
3493153aa4cSdoshimili LIBC_INLINE void prefetch_to_local_cache(CPtr dst) {
3503153aa4cSdoshimili   __builtin_prefetch(dst, /*read*/ 0, /*max locality*/ 3);
3513153aa4cSdoshimili }
3523153aa4cSdoshimili 
3535ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
35485314e9bSGuillaume Chatelet 
355270547f3SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
356