string/memory_utils/utils.h

66d00febSPaula Toth//===-- Memory utils --------------------------------------------*- C++ -*-===//
85314e9bSGuillaume Chatelet//
85314e9bSGuillaume Chatelet// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
85314e9bSGuillaume Chatelet// See https://llvm.org/LICENSE.txt for license information.
85314e9bSGuillaume Chatelet// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
85314e9bSGuillaume Chatelet//
85314e9bSGuillaume Chatelet//===----------------------------------------------------------------------===//
85314e9bSGuillaume Chatelet
270547f3SGuillaume Chatelet#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
270547f3SGuillaume Chatelet#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
85314e9bSGuillaume Chatelet
a786096fSGuillaume Chatelet#include "src/__support/CPP/bit.h"
d7917fdcSGuillaume Chatelet#include "src/__support/CPP/cstddef.h"
a786096fSGuillaume Chatelet#include "src/__support/CPP/type_traits.h"
*95b680e4SDaniel Thornburgh#include "src/__support/endian_internal.h"
e2f8c556SGuillaume Chatelet#include "src/__support/macros/attributes.h" // LIBC_INLINE
5ff3ff33SPetr Hosek#include "src/__support/macros/config.h"
1c814c99SGuillaume Chatelet#include "src/__support/macros/properties/architectures.h"
85314e9bSGuillaume Chatelet
85314e9bSGuillaume Chatelet#include <stddef.h> // size_t
1c814c99SGuillaume Chatelet#include <stdint.h> // intptr_t / uintptr_t / INT32_MAX / INT32_MIN
85314e9bSGuillaume Chatelet
5ff3ff33SPetr Hoseknamespace LIBC_NAMESPACE_DECL {
85314e9bSGuillaume Chatelet
a786096fSGuillaume Chatelet// Returns the number of bytes to substract from ptr to get to the previous
a786096fSGuillaume Chatelet// multiple of alignment. If ptr is already aligned returns 0.
5bf8efd2SRoland McGrathtemplate <size_t alignment>
5bf8efd2SRoland McGrathLIBC_INLINE uintptr_t distance_to_align_down(const void *ptr) {
1d894788SGuillaume Chatelet  static_assert(cpp::has_single_bit(alignment),
1d894788SGuillaume Chatelet                "alignment must be a power of 2");
04a309ddSGuillaume Chatelet  return reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
04a309ddSGuillaume Chatelet}
04a309ddSGuillaume Chatelet
a786096fSGuillaume Chatelet// Returns the number of bytes to add to ptr to get to the next multiple of
a786096fSGuillaume Chatelet// alignment. If ptr is already aligned returns 0.
5bf8efd2SRoland McGrathtemplate <size_t alignment>
5bf8efd2SRoland McGrathLIBC_INLINE uintptr_t distance_to_align_up(const void *ptr) {
1d894788SGuillaume Chatelet  static_assert(cpp::has_single_bit(alignment),
1d894788SGuillaume Chatelet                "alignment must be a power of 2");
85314e9bSGuillaume Chatelet  // The logic is not straightforward and involves unsigned modulo arithmetic
85314e9bSGuillaume Chatelet  // but the generated code is as fast as it can be.
85314e9bSGuillaume Chatelet  return -reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
85314e9bSGuillaume Chatelet}
85314e9bSGuillaume Chatelet
a786096fSGuillaume Chatelet// Returns the number of bytes to add to ptr to get to the next multiple of
a786096fSGuillaume Chatelet// alignment. If ptr is already aligned returns alignment.
a786096fSGuillaume Chatelettemplate <size_t alignment>
5bf8efd2SRoland McGrathLIBC_INLINE uintptr_t distance_to_next_aligned(const void *ptr) {
a786096fSGuillaume Chatelet  return alignment - distance_to_align_down<alignment>(ptr);
85314e9bSGuillaume Chatelet}
85314e9bSGuillaume Chatelet
a786096fSGuillaume Chatelet// Returns the same pointer but notifies the compiler that it is aligned.
019a477cSRoland McGrathtemplate <size_t alignment, typename T> LIBC_INLINE T *assume_aligned(T *ptr) {
5bf47e14SGuillaume Chatelet  return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
5bf47e14SGuillaume Chatelet}
a786096fSGuillaume Chatelet
2cfae7cdSGuillaume Chatelet// Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are
2cfae7cdSGuillaume Chatelet// disjoint.
2cfae7cdSGuillaume ChateletLIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) {
f364a7a8SDmitry Vyukov  const ptrdiff_t sdiff =
f364a7a8SDmitry Vyukov      static_cast<const char *>(p1) - static_cast<const char *>(p2);
f364a7a8SDmitry Vyukov  // We use bit_cast to make sure that we don't run into accidental integer
f364a7a8SDmitry Vyukov  // promotion. Notably the unary minus operator goes through integer promotion
f364a7a8SDmitry Vyukov  // at the expression level. We assume arithmetic to be two's complement (i.e.,
f364a7a8SDmitry Vyukov  // bit_cast has the same behavior as a regular signed to unsigned cast).
f364a7a8SDmitry Vyukov  static_assert(-1 == ~0, "not 2's complement");
f364a7a8SDmitry Vyukov  const size_t udiff = cpp::bit_cast<size_t>(sdiff);
f364a7a8SDmitry Vyukov  // Integer promition would be caught here.
f364a7a8SDmitry Vyukov  const size_t neg_udiff = cpp::bit_cast<size_t>(-sdiff);
f364a7a8SDmitry Vyukov  // This is expected to compile a conditional move.
f364a7a8SDmitry Vyukov  return sdiff >= 0 ? size <= udiff : size <= neg_udiff;
2cfae7cdSGuillaume Chatelet}
2cfae7cdSGuillaume Chatelet
77118536SMarc Auberer#if __has_builtin(__builtin_memcpy_inline)
060a43ceSGuillaume Chatelet#define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
060a43ceSGuillaume Chatelet#endif
060a43ceSGuillaume Chatelet
77118536SMarc Auberer#if __has_builtin(__builtin_memset_inline)
a786096fSGuillaume Chatelet#define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE
a786096fSGuillaume Chatelet#endif
a786096fSGuillaume Chatelet
060a43ceSGuillaume Chatelet// Performs a constant count copy.
060a43ceSGuillaume Chatelettemplate <size_t Size>
6363320bSSiva Chandra ReddyLIBC_INLINE void memcpy_inline(void *__restrict dst,
060a43ceSGuillaume Chatelet                               const void *__restrict src) {
060a43ceSGuillaume Chatelet#ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
060a43ceSGuillaume Chatelet  __builtin_memcpy_inline(dst, src, Size);
060a43ceSGuillaume Chatelet#else
f4a35492SGuillaume Chatelet  // In memory functions `memcpy_inline` is instantiated several times with
f4a35492SGuillaume Chatelet  // different value of the Size parameter. This doesn't play well with GCC's
019a477cSRoland McGrath  // Value Range Analysis that wrongly detects out of bounds accesses. We
bc4f3e31SGuillaume Chatelet  // disable these warnings for the purpose of this function.
f4a35492SGuillaume Chatelet#pragma GCC diagnostic push
f4a35492SGuillaume Chatelet#pragma GCC diagnostic ignored "-Warray-bounds"
bc4f3e31SGuillaume Chatelet#pragma GCC diagnostic ignored "-Wstringop-overread"
bc4f3e31SGuillaume Chatelet#pragma GCC diagnostic ignored "-Wstringop-overflow"
060a43ceSGuillaume Chatelet  for (size_t i = 0; i < Size; ++i)
060a43ceSGuillaume Chatelet    static_cast<char *>(dst)[i] = static_cast<const char *>(src)[i];
f4a35492SGuillaume Chatelet#pragma GCC diagnostic pop
060a43ceSGuillaume Chatelet#endif
060a43ceSGuillaume Chatelet}
060a43ceSGuillaume Chatelet
d7917fdcSGuillaume Chateletusing Ptr = cpp::byte *;        // Pointer to raw data.
d7917fdcSGuillaume Chateletusing CPtr = const cpp::byte *; // Const pointer to raw data.
060a43ceSGuillaume Chatelet
69090143SGuillaume Chatelet// This type makes sure that we don't accidentally promote an integral type to
69090143SGuillaume Chatelet// another one. It is only constructible from the exact T type.
69090143SGuillaume Chatelettemplate <typename T> struct StrictIntegralType {
69090143SGuillaume Chatelet  static_assert(cpp::is_integral_v<T>);
69090143SGuillaume Chatelet
69090143SGuillaume Chatelet  // Can only be constructed from a T.
69090143SGuillaume Chatelet  template <typename U, cpp::enable_if_t<cpp::is_same_v<U, T>, bool> = 0>
019a477cSRoland McGrath  LIBC_INLINE StrictIntegralType(U value) : value(value) {}
69090143SGuillaume Chatelet
69090143SGuillaume Chatelet  // Allows using the type in an if statement.
019a477cSRoland McGrath  LIBC_INLINE explicit operator bool() const { return value; }
69090143SGuillaume Chatelet
69090143SGuillaume Chatelet  // If type is unsigned (bcmp) we allow bitwise OR operations.
019a477cSRoland McGrath  LIBC_INLINE StrictIntegralType
019a477cSRoland McGrath  operator|(const StrictIntegralType &Rhs) const {
69090143SGuillaume Chatelet    static_assert(!cpp::is_signed_v<T>);
69090143SGuillaume Chatelet    return value | Rhs.value;
69090143SGuillaume Chatelet  }
69090143SGuillaume Chatelet
69090143SGuillaume Chatelet  // For interation with the C API we allow explicit conversion back to the
69090143SGuillaume Chatelet  // `int` type.
019a477cSRoland McGrath  LIBC_INLINE explicit operator int() const {
69090143SGuillaume Chatelet    // bit_cast makes sure that T and int have the same size.
69090143SGuillaume Chatelet    return cpp::bit_cast<int>(value);
69090143SGuillaume Chatelet  }
69090143SGuillaume Chatelet
69090143SGuillaume Chatelet  // Helper to get the zero value.
6f8d826bSNick Desaulniers  LIBC_INLINE static constexpr StrictIntegralType zero() { return {T(0)}; }
6f8d826bSNick Desaulniers  LIBC_INLINE static constexpr StrictIntegralType nonzero() { return {T(1)}; }
69090143SGuillaume Chatelet
69090143SGuillaume Chateletprivate:
69090143SGuillaume Chatelet  T value;
69090143SGuillaume Chatelet};
69090143SGuillaume Chatelet
69090143SGuillaume Chateletusing MemcmpReturnType = StrictIntegralType<int32_t>;
69090143SGuillaume Chateletusing BcmpReturnType = StrictIntegralType<uint32_t>;
69090143SGuillaume Chatelet
1c814c99SGuillaume Chatelet// This implements the semantic of 'memcmp' returning a negative value when 'a'
1c814c99SGuillaume Chatelet// is less than 'b', '0' when 'a' equals 'b' and a positive number otherwise.
1c814c99SGuillaume ChateletLIBC_INLINE MemcmpReturnType cmp_uint32_t(uint32_t a, uint32_t b) {
1c814c99SGuillaume Chatelet  // We perform the difference as an int64_t.
1c814c99SGuillaume Chatelet  const int64_t diff = static_cast<int64_t>(a) - static_cast<int64_t>(b);
1c814c99SGuillaume Chatelet  // For the int64_t to int32_t conversion we want the following properties:
1c814c99SGuillaume Chatelet  // - int32_t[31:31] == 1 iff diff < 0
1c814c99SGuillaume Chatelet  // - int32_t[31:0] == 0 iff diff == 0
1c814c99SGuillaume Chatelet
1c814c99SGuillaume Chatelet  // We also observe that:
1c814c99SGuillaume Chatelet  // - When diff < 0: diff[63:32] == 0xffffffff and diff[31:0] != 0
1c814c99SGuillaume Chatelet  // - When diff > 0: diff[63:32] == 0 and diff[31:0] != 0
1c814c99SGuillaume Chatelet  // - When diff == 0: diff[63:32] == 0 and diff[31:0] == 0
1c814c99SGuillaume Chatelet  // - https://godbolt.org/z/8W7qWP6e5
1c814c99SGuillaume Chatelet  // - This implies that we can only look at diff[32:32] for determining the
1c814c99SGuillaume Chatelet  // sign bit for the returned int32_t.
1c814c99SGuillaume Chatelet
1c814c99SGuillaume Chatelet  // So, we do the following:
1c814c99SGuillaume Chatelet  // - int32_t[31:31] = diff[32:32]
1c814c99SGuillaume Chatelet  // - int32_t[30:0] = diff[31:0] == 0 ? 0 : non-0.
1c814c99SGuillaume Chatelet
1c814c99SGuillaume Chatelet  // And, we can achieve the above by the expression below. We could have also
1c814c99SGuillaume Chatelet  // used (diff64 >> 1) | (diff64 & 0x1) but (diff64 & 0xFFFF) is faster than
1c814c99SGuillaume Chatelet  // (diff64 & 0x1). https://godbolt.org/z/j3b569rW1
1c814c99SGuillaume Chatelet  return static_cast<int32_t>((diff >> 1) | (diff & 0xFFFF));
1c814c99SGuillaume Chatelet}
1c814c99SGuillaume Chatelet
1c814c99SGuillaume Chatelet// Returns a negative value if 'a' is less than 'b' and a positive value
1c814c99SGuillaume Chatelet// otherwise. This implements the semantic of 'memcmp' when we know that 'a' and
1c814c99SGuillaume Chatelet// 'b' differ.
1c814c99SGuillaume ChateletLIBC_INLINE MemcmpReturnType cmp_neq_uint64_t(uint64_t a, uint64_t b) {
48ba7da9SGuillaume Chatelet#if defined(LIBC_TARGET_ARCH_IS_X86)
1c814c99SGuillaume Chatelet  // On x86, the best strategy would be to use 'INT32_MAX' and 'INT32_MIN' for
1c814c99SGuillaume Chatelet  // positive and negative value respectively as they are one value apart:
1c814c99SGuillaume Chatelet  //   xor     eax, eax         <- free
1c814c99SGuillaume Chatelet  //   cmp     rdi, rsi         <- serializing
1c814c99SGuillaume Chatelet  //   adc     eax, 2147483647  <- serializing
1c814c99SGuillaume Chatelet
1c814c99SGuillaume Chatelet  // Unfortunately we found instances of client code that negate the result of
1c814c99SGuillaume Chatelet  // 'memcmp' to reverse ordering. Because signed integers are not symmetric
1c814c99SGuillaume Chatelet  // (e.g., int8_t ∈ [-128, 127]) returning 'INT_MIN' would break such code as
1c814c99SGuillaume Chatelet  // `-INT_MIN` is not representable as an int32_t.
1c814c99SGuillaume Chatelet
1c814c99SGuillaume Chatelet  // As a consequence, we use 5 and -5 which is still OK nice in terms of
1c814c99SGuillaume Chatelet  // latency.
1c814c99SGuillaume Chatelet  //   cmp     rdi, rsi         <- serializing
1c814c99SGuillaume Chatelet  //   mov     ecx, -5          <- can be done in parallel
1c814c99SGuillaume Chatelet  //   mov     eax, 5           <- can be done in parallel
1c814c99SGuillaume Chatelet  //   cmovb   eax, ecx         <- serializing
1c814c99SGuillaume Chatelet  static constexpr int32_t POSITIVE = 5;
1c814c99SGuillaume Chatelet  static constexpr int32_t NEGATIVE = -5;
1c814c99SGuillaume Chatelet#else
1c814c99SGuillaume Chatelet  // On RISC-V we simply use '1' and '-1' as it leads to branchless code.
1c814c99SGuillaume Chatelet  // On ARMv8, both strategies lead to the same performance.
1c814c99SGuillaume Chatelet  static constexpr int32_t POSITIVE = 1;
1c814c99SGuillaume Chatelet  static constexpr int32_t NEGATIVE = -1;
1c814c99SGuillaume Chatelet#endif
1c814c99SGuillaume Chatelet  static_assert(POSITIVE > 0);
1c814c99SGuillaume Chatelet  static_assert(NEGATIVE < 0);
1c814c99SGuillaume Chatelet  return a < b ? NEGATIVE : POSITIVE;
1c814c99SGuillaume Chatelet}
1c814c99SGuillaume Chatelet
a786096fSGuillaume Chatelet// Loads bytes from memory (possibly unaligned) and materializes them as
a786096fSGuillaume Chatelet// type.
6363320bSSiva Chandra Reddytemplate <typename T> LIBC_INLINE T load(CPtr ptr) {
88d82b74SNick Desaulniers  T out;
88d82b74SNick Desaulniers  memcpy_inline<sizeof(T)>(&out, ptr);
88d82b74SNick Desaulniers  return out;
060a43ceSGuillaume Chatelet}
060a43ceSGuillaume Chatelet
a786096fSGuillaume Chatelet// Stores a value of type T in memory (possibly unaligned).
6363320bSSiva Chandra Reddytemplate <typename T> LIBC_INLINE void store(Ptr ptr, T value) {
060a43ceSGuillaume Chatelet  memcpy_inline<sizeof(T)>(ptr, &value);
060a43ceSGuillaume Chatelet}
060a43ceSGuillaume Chatelet
f4a35492SGuillaume Chatelet// On architectures that do not allow for unaligned access we perform several
f4a35492SGuillaume Chatelet// aligned accesses and recombine them through shifts and logicals operations.
f4a35492SGuillaume Chatelet// For instance, if we know that the pointer is 2-byte aligned we can decompose
f4a35492SGuillaume Chatelet// a 64-bit operation into four 16-bit operations.
f4a35492SGuillaume Chatelet
f4a35492SGuillaume Chatelet// Loads a 'ValueType' by decomposing it into several loads that are assumed to
f4a35492SGuillaume Chatelet// be aligned.
f4a35492SGuillaume Chatelet// e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr);
f4a35492SGuillaume Chatelettemplate <typename ValueType, typename T, typename... TS>
019a477cSRoland McGrathLIBC_INLINE ValueType load_aligned(CPtr src) {
f4a35492SGuillaume Chatelet  static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
f4a35492SGuillaume Chatelet  const ValueType value = load<T>(assume_aligned<sizeof(T)>(src));
f4a35492SGuillaume Chatelet  if constexpr (sizeof...(TS) > 0) {
88d82b74SNick Desaulniers    constexpr size_t SHIFT = sizeof(T) * 8;
f4a35492SGuillaume Chatelet    const ValueType next = load_aligned<ValueType, TS...>(src + sizeof(T));
f4a35492SGuillaume Chatelet    if constexpr (Endian::IS_LITTLE)
88d82b74SNick Desaulniers      return value | (next << SHIFT);
f4a35492SGuillaume Chatelet    else if constexpr (Endian::IS_BIG)
88d82b74SNick Desaulniers      return (value << SHIFT) | next;
f4a35492SGuillaume Chatelet    else
1d894788SGuillaume Chatelet      static_assert(cpp::always_false<T>, "Invalid endianness");
f4a35492SGuillaume Chatelet  } else {
f4a35492SGuillaume Chatelet    return value;
f4a35492SGuillaume Chatelet  }
f4a35492SGuillaume Chatelet}
f4a35492SGuillaume Chatelet
f4a35492SGuillaume Chatelet// Alias for loading a 'uint32_t'.
f4a35492SGuillaume Chatelettemplate <typename T, typename... TS>
019a477cSRoland McGrathLIBC_INLINE auto load32_aligned(CPtr src, size_t offset) {
f4a35492SGuillaume Chatelet  static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
f4a35492SGuillaume Chatelet  return load_aligned<uint32_t, T, TS...>(src + offset);
f4a35492SGuillaume Chatelet}
f4a35492SGuillaume Chatelet
f4a35492SGuillaume Chatelet// Alias for loading a 'uint64_t'.
f4a35492SGuillaume Chatelettemplate <typename T, typename... TS>
019a477cSRoland McGrathLIBC_INLINE auto load64_aligned(CPtr src, size_t offset) {
f4a35492SGuillaume Chatelet  static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
f4a35492SGuillaume Chatelet  return load_aligned<uint64_t, T, TS...>(src + offset);
f4a35492SGuillaume Chatelet}
f4a35492SGuillaume Chatelet
f4a35492SGuillaume Chatelet// Stores a 'ValueType' by decomposing it into several stores that are assumed
f4a35492SGuillaume Chatelet// to be aligned.
f4a35492SGuillaume Chatelet// e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr);
f4a35492SGuillaume Chatelettemplate <typename ValueType, typename T, typename... TS>
019a477cSRoland McGrathLIBC_INLINE void store_aligned(ValueType value, Ptr dst) {
f4a35492SGuillaume Chatelet  static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
88d82b74SNick Desaulniers  constexpr size_t SHIFT = sizeof(T) * 8;
f4a35492SGuillaume Chatelet  if constexpr (Endian::IS_LITTLE) {
f4a35492SGuillaume Chatelet    store<T>(assume_aligned<sizeof(T)>(dst), value & ~T(0));
f4a35492SGuillaume Chatelet    if constexpr (sizeof...(TS) > 0)
88d82b74SNick Desaulniers      store_aligned<ValueType, TS...>(value >> SHIFT, dst + sizeof(T));
f4a35492SGuillaume Chatelet  } else if constexpr (Endian::IS_BIG) {
f4a35492SGuillaume Chatelet    constexpr size_t OFFSET = (0 + ... + sizeof(TS));
f4a35492SGuillaume Chatelet    store<T>(assume_aligned<sizeof(T)>(dst + OFFSET), value & ~T(0));
f4a35492SGuillaume Chatelet    if constexpr (sizeof...(TS) > 0)
88d82b74SNick Desaulniers      store_aligned<ValueType, TS...>(value >> SHIFT, dst);
f4a35492SGuillaume Chatelet  } else {
1d894788SGuillaume Chatelet    static_assert(cpp::always_false<T>, "Invalid endianness");
f4a35492SGuillaume Chatelet  }
f4a35492SGuillaume Chatelet}
f4a35492SGuillaume Chatelet
f4a35492SGuillaume Chatelet// Alias for storing a 'uint32_t'.
f4a35492SGuillaume Chatelettemplate <typename T, typename... TS>
019a477cSRoland McGrathLIBC_INLINE void store32_aligned(uint32_t value, Ptr dst, size_t offset) {
f4a35492SGuillaume Chatelet  static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
f4a35492SGuillaume Chatelet  store_aligned<uint32_t, T, TS...>(value, dst + offset);
f4a35492SGuillaume Chatelet}
f4a35492SGuillaume Chatelet
f4a35492SGuillaume Chatelet// Alias for storing a 'uint64_t'.
f4a35492SGuillaume Chatelettemplate <typename T, typename... TS>
019a477cSRoland McGrathLIBC_INLINE void store64_aligned(uint64_t value, Ptr dst, size_t offset) {
f4a35492SGuillaume Chatelet  static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
f4a35492SGuillaume Chatelet  store_aligned<uint64_t, T, TS...>(value, dst + offset);
f4a35492SGuillaume Chatelet}
f4a35492SGuillaume Chatelet
a786096fSGuillaume Chatelet// Advances the pointers p1 and p2 by offset bytes and decrease count by the
a786096fSGuillaume Chatelet// same amount.
060a43ceSGuillaume Chatelettemplate <typename T1, typename T2>
6363320bSSiva Chandra ReddyLIBC_INLINE void adjust(ptrdiff_t offset, T1 *__restrict &p1,
060a43ceSGuillaume Chatelet                        T2 *__restrict &p2, size_t &count) {
060a43ceSGuillaume Chatelet  p1 += offset;
060a43ceSGuillaume Chatelet  p2 += offset;
060a43ceSGuillaume Chatelet  count -= offset;
060a43ceSGuillaume Chatelet}
060a43ceSGuillaume Chatelet
a786096fSGuillaume Chatelet// Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary
a786096fSGuillaume Chatelet// and decrease count by the same amount.
060a43ceSGuillaume Chatelet// We make sure the compiler knows about the adjusted pointer alignment.
a786096fSGuillaume Chatelettemplate <size_t SIZE, typename T1, typename T2>
a786096fSGuillaume Chateletvoid align_p1_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
a786096fSGuillaume Chatelet                               size_t &count) {
a786096fSGuillaume Chatelet  adjust(distance_to_next_aligned<SIZE>(p1), p1, p2, count);
a786096fSGuillaume Chatelet  p1 = assume_aligned<SIZE>(p1);
060a43ceSGuillaume Chatelet}
060a43ceSGuillaume Chatelet
a786096fSGuillaume Chatelet// Same as align_p1_to_next_boundary above but with a single pointer instead.
8628ca29SGuillaume Chatelettemplate <size_t SIZE, typename T>
8628ca29SGuillaume ChateletLIBC_INLINE void align_to_next_boundary(T *&p1, size_t &count) {
8628ca29SGuillaume Chatelet  const T *dummy = p1;
a786096fSGuillaume Chatelet  align_p1_to_next_boundary<SIZE>(p1, dummy, count);
d8415b02SSterling Augustine}
a786096fSGuillaume Chatelet
a786096fSGuillaume Chatelet// An enum class that discriminates between the first and second pointer.
a786096fSGuillaume Chateletenum class Arg { P1, P2, Dst = P1, Src = P2 };
a786096fSGuillaume Chatelet
a786096fSGuillaume Chatelet// Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1.
a786096fSGuillaume Chatelet// Precondition: &p1 != &p2
a786096fSGuillaume Chatelettemplate <size_t SIZE, Arg AlignOn, typename T1, typename T2>
019a477cSRoland McGrathLIBC_INLINE void align_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
a786096fSGuillaume Chatelet                                        size_t &count) {
a786096fSGuillaume Chatelet  if constexpr (AlignOn == Arg::P1)
a786096fSGuillaume Chatelet    align_p1_to_next_boundary<SIZE>(p1, p2, count);
a786096fSGuillaume Chatelet  else if constexpr (AlignOn == Arg::P2)
a786096fSGuillaume Chatelet    align_p1_to_next_boundary<SIZE>(p2, p1, count); // swapping p1 and p2.
a786096fSGuillaume Chatelet  else
1d894788SGuillaume Chatelet    static_assert(cpp::always_false<T1>,
1d894788SGuillaume Chatelet                  "AlignOn must be either Arg::P1 or Arg::P2");
060a43ceSGuillaume Chatelet}
5bf47e14SGuillaume Chatelet
1c814c99SGuillaume Chatelettemplate <size_t SIZE> struct AlignHelper {
019a477cSRoland McGrath  LIBC_INLINE AlignHelper(CPtr ptr)
640c8574SNick Desaulniers      : offset(distance_to_next_aligned<SIZE>(ptr)) {}
1c814c99SGuillaume Chatelet
640c8574SNick Desaulniers  LIBC_INLINE bool not_aligned() const { return offset != SIZE; }
640c8574SNick Desaulniers  uintptr_t offset;
1c814c99SGuillaume Chatelet};
1c814c99SGuillaume Chatelet
3153aa4cSdoshimiliLIBC_INLINE void prefetch_for_write(CPtr dst) {
3153aa4cSdoshimili  __builtin_prefetch(dst, /*write*/ 1, /*max locality*/ 3);
3153aa4cSdoshimili}
3153aa4cSdoshimili
3153aa4cSdoshimiliLIBC_INLINE void prefetch_to_local_cache(CPtr dst) {
3153aa4cSdoshimili  __builtin_prefetch(dst, /*read*/ 0, /*max locality*/ 3);
3153aa4cSdoshimili}
3153aa4cSdoshimili
5ff3ff33SPetr Hosek} // namespace LIBC_NAMESPACE_DECL
85314e9bSGuillaume Chatelet
270547f3SGuillaume Chatelet#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H