1bfd94882SGuillaume Chatelet //===-- Implementations for platform with mandatory aligned memory access -===// 2bfd94882SGuillaume Chatelet // 3bfd94882SGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4bfd94882SGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information. 5bfd94882SGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6bfd94882SGuillaume Chatelet // 7bfd94882SGuillaume Chatelet //===----------------------------------------------------------------------===// 8bfd94882SGuillaume Chatelet // For some platforms, unaligned loads and stores are either illegal or very 9bfd94882SGuillaume Chatelet // slow. The implementations in this file make sure all loads and stores are 10bfd94882SGuillaume Chatelet // always aligned. 11bfd94882SGuillaume Chatelet //===----------------------------------------------------------------------===// 12bfd94882SGuillaume Chatelet 13bfd94882SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H 14bfd94882SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H 15bfd94882SGuillaume Chatelet 16*2cc97951Sc8ef #include "src/__support/macros/attributes.h" // LIBC_INLINE 17bfd94882SGuillaume Chatelet #include "src/string/memory_utils/generic/byte_per_byte.h" 18bfd94882SGuillaume Chatelet #include "src/string/memory_utils/op_generic.h" // generic::splat 19bfd94882SGuillaume Chatelet #include "src/string/memory_utils/utils.h" // Ptr, CPtr 20bfd94882SGuillaume Chatelet 21bfd94882SGuillaume Chatelet #include <stddef.h> // size_t 22bfd94882SGuillaume Chatelet 235ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 24bfd94882SGuillaume Chatelet 25bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE uint32_t load32_aligned(CPtr ptr, size_t offset, 26bfd94882SGuillaume Chatelet size_t alignment) { 27bfd94882SGuillaume Chatelet if (alignment == 0) 28bfd94882SGuillaume Chatelet return load32_aligned<uint32_t>(ptr, offset); 29bfd94882SGuillaume Chatelet else if (alignment == 2) 30bfd94882SGuillaume Chatelet return load32_aligned<uint16_t, uint16_t>(ptr, offset); 31bdac9720SGuillaume Chatelet else // 1, 3 32bfd94882SGuillaume Chatelet return load32_aligned<uint8_t, uint16_t, uint8_t>(ptr, offset); 33bfd94882SGuillaume Chatelet } 34bfd94882SGuillaume Chatelet 35bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE uint64_t load64_aligned(CPtr ptr, size_t offset, 36bfd94882SGuillaume Chatelet size_t alignment) { 37bfd94882SGuillaume Chatelet if (alignment == 0) 38bfd94882SGuillaume Chatelet return load64_aligned<uint64_t>(ptr, offset); 39bfd94882SGuillaume Chatelet else if (alignment == 4) 40bfd94882SGuillaume Chatelet return load64_aligned<uint32_t, uint32_t>(ptr, offset); 41bdac9720SGuillaume Chatelet else if (alignment == 6) 42bdac9720SGuillaume Chatelet return load64_aligned<uint16_t, uint32_t, uint16_t>(ptr, offset); 43bfd94882SGuillaume Chatelet else if (alignment == 2) 44bfd94882SGuillaume Chatelet return load64_aligned<uint16_t, uint16_t, uint16_t, uint16_t>(ptr, offset); 45bdac9720SGuillaume Chatelet else // 1, 3, 5, 7 46bfd94882SGuillaume Chatelet return load64_aligned<uint8_t, uint16_t, uint16_t, uint16_t, uint8_t>( 47bfd94882SGuillaume Chatelet ptr, offset); 48bfd94882SGuillaume Chatelet } 49bfd94882SGuillaume Chatelet 50bfd94882SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 51bfd94882SGuillaume Chatelet // memcpy 52bfd94882SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 53bfd94882SGuillaume Chatelet 54bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE void 55bfd94882SGuillaume Chatelet inline_memcpy_aligned_access_32bit(Ptr __restrict dst, CPtr __restrict src, 56bfd94882SGuillaume Chatelet size_t count) { 57bfd94882SGuillaume Chatelet constexpr size_t kAlign = sizeof(uint32_t); 58bfd94882SGuillaume Chatelet if (count <= 2 * kAlign) 59bfd94882SGuillaume Chatelet return inline_memcpy_byte_per_byte(dst, src, count); 60bfd94882SGuillaume Chatelet size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); 61bfd94882SGuillaume Chatelet inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); 62bfd94882SGuillaume Chatelet size_t offset = bytes_to_dst_align; 63bfd94882SGuillaume Chatelet size_t src_alignment = distance_to_align_down<kAlign>(src + offset); 64bfd94882SGuillaume Chatelet for (; offset < count - kAlign; offset += kAlign) { 65bfd94882SGuillaume Chatelet uint32_t value = load32_aligned(src, offset, src_alignment); 66bfd94882SGuillaume Chatelet store32_aligned<uint32_t>(value, dst, offset); 67bfd94882SGuillaume Chatelet } 68bfd94882SGuillaume Chatelet // remainder 69bfd94882SGuillaume Chatelet inline_memcpy_byte_per_byte(dst, src, count, offset); 70bfd94882SGuillaume Chatelet } 71bfd94882SGuillaume Chatelet 72bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE void 73bfd94882SGuillaume Chatelet inline_memcpy_aligned_access_64bit(Ptr __restrict dst, CPtr __restrict src, 74bfd94882SGuillaume Chatelet size_t count) { 75bfd94882SGuillaume Chatelet constexpr size_t kAlign = sizeof(uint64_t); 76bfd94882SGuillaume Chatelet if (count <= 2 * kAlign) 77bfd94882SGuillaume Chatelet return inline_memcpy_byte_per_byte(dst, src, count); 78bfd94882SGuillaume Chatelet size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); 79bfd94882SGuillaume Chatelet inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); 80bfd94882SGuillaume Chatelet size_t offset = bytes_to_dst_align; 81bfd94882SGuillaume Chatelet size_t src_alignment = distance_to_align_down<kAlign>(src + offset); 82bfd94882SGuillaume Chatelet for (; offset < count - kAlign; offset += kAlign) { 83bfd94882SGuillaume Chatelet uint64_t value = load64_aligned(src, offset, src_alignment); 84bfd94882SGuillaume Chatelet store64_aligned<uint64_t>(value, dst, offset); 85bfd94882SGuillaume Chatelet } 86bfd94882SGuillaume Chatelet // remainder 87bfd94882SGuillaume Chatelet inline_memcpy_byte_per_byte(dst, src, count, offset); 88bfd94882SGuillaume Chatelet } 89bfd94882SGuillaume Chatelet 90bfd94882SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 91bfd94882SGuillaume Chatelet // memset 92bfd94882SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 93bfd94882SGuillaume Chatelet 94bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE static void 95bfd94882SGuillaume Chatelet inline_memset_aligned_access_32bit(Ptr dst, uint8_t value, size_t count) { 96bfd94882SGuillaume Chatelet constexpr size_t kAlign = sizeof(uint32_t); 97bfd94882SGuillaume Chatelet if (count <= 2 * kAlign) 98bfd94882SGuillaume Chatelet return inline_memset_byte_per_byte(dst, value, count); 99bfd94882SGuillaume Chatelet size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); 100bfd94882SGuillaume Chatelet inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); 101bfd94882SGuillaume Chatelet size_t offset = bytes_to_dst_align; 102bfd94882SGuillaume Chatelet for (; offset < count - kAlign; offset += kAlign) 103bfd94882SGuillaume Chatelet store32_aligned<uint32_t>(generic::splat<uint32_t>(value), dst, offset); 104bfd94882SGuillaume Chatelet inline_memset_byte_per_byte(dst, value, count, offset); 105bfd94882SGuillaume Chatelet } 106bfd94882SGuillaume Chatelet 107bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE static void 108bfd94882SGuillaume Chatelet inline_memset_aligned_access_64bit(Ptr dst, uint8_t value, size_t count) { 109bfd94882SGuillaume Chatelet constexpr size_t kAlign = sizeof(uint64_t); 110bfd94882SGuillaume Chatelet if (count <= 2 * kAlign) 111bfd94882SGuillaume Chatelet return inline_memset_byte_per_byte(dst, value, count); 112bfd94882SGuillaume Chatelet size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); 113bfd94882SGuillaume Chatelet inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); 114bfd94882SGuillaume Chatelet size_t offset = bytes_to_dst_align; 115bfd94882SGuillaume Chatelet for (; offset < count - kAlign; offset += kAlign) 116bfd94882SGuillaume Chatelet store64_aligned<uint64_t>(generic::splat<uint64_t>(value), dst, offset); 117bfd94882SGuillaume Chatelet inline_memset_byte_per_byte(dst, value, count, offset); 118bfd94882SGuillaume Chatelet } 119bfd94882SGuillaume Chatelet 120bfd94882SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 121bfd94882SGuillaume Chatelet // bcmp 122bfd94882SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 123bfd94882SGuillaume Chatelet 124bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE BcmpReturnType 125bfd94882SGuillaume Chatelet inline_bcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { 126bfd94882SGuillaume Chatelet constexpr size_t kAlign = sizeof(uint32_t); 127bfd94882SGuillaume Chatelet if (count <= 2 * kAlign) 128bfd94882SGuillaume Chatelet return inline_bcmp_byte_per_byte(p1, p2, count); 129bfd94882SGuillaume Chatelet size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); 130bfd94882SGuillaume Chatelet if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) 131bfd94882SGuillaume Chatelet return value; 132bfd94882SGuillaume Chatelet size_t offset = bytes_to_p1_align; 133bfd94882SGuillaume Chatelet size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); 134bfd94882SGuillaume Chatelet for (; offset < count - kAlign; offset += kAlign) { 135bfd94882SGuillaume Chatelet uint32_t a = load32_aligned<uint32_t>(p1, offset); 136bfd94882SGuillaume Chatelet uint32_t b = load32_aligned(p2, offset, p2_alignment); 137bfd94882SGuillaume Chatelet if (a != b) 1386f8d826bSNick Desaulniers return BcmpReturnType::nonzero(); 139bfd94882SGuillaume Chatelet } 140bfd94882SGuillaume Chatelet return inline_bcmp_byte_per_byte(p1, p2, count, offset); 141bfd94882SGuillaume Chatelet } 142bfd94882SGuillaume Chatelet 143bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE BcmpReturnType 144bfd94882SGuillaume Chatelet inline_bcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { 145bfd94882SGuillaume Chatelet constexpr size_t kAlign = sizeof(uint64_t); 146bfd94882SGuillaume Chatelet if (count <= 2 * kAlign) 147bfd94882SGuillaume Chatelet return inline_bcmp_byte_per_byte(p1, p2, count); 148bfd94882SGuillaume Chatelet size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); 149bfd94882SGuillaume Chatelet if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) 150bfd94882SGuillaume Chatelet return value; 151bfd94882SGuillaume Chatelet size_t offset = bytes_to_p1_align; 152bfd94882SGuillaume Chatelet size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); 153bfd94882SGuillaume Chatelet for (; offset < count - kAlign; offset += kAlign) { 154bfd94882SGuillaume Chatelet uint64_t a = load64_aligned<uint64_t>(p1, offset); 155bfd94882SGuillaume Chatelet uint64_t b = load64_aligned(p2, offset, p2_alignment); 156bfd94882SGuillaume Chatelet if (a != b) 1576f8d826bSNick Desaulniers return BcmpReturnType::nonzero(); 158bfd94882SGuillaume Chatelet } 159bfd94882SGuillaume Chatelet return inline_bcmp_byte_per_byte(p1, p2, count, offset); 160bfd94882SGuillaume Chatelet } 161bfd94882SGuillaume Chatelet 162bfd94882SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 163bfd94882SGuillaume Chatelet // memcmp 164bfd94882SGuillaume Chatelet /////////////////////////////////////////////////////////////////////////////// 165bfd94882SGuillaume Chatelet 166bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE MemcmpReturnType 167bfd94882SGuillaume Chatelet inline_memcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { 168bfd94882SGuillaume Chatelet constexpr size_t kAlign = sizeof(uint32_t); 169bfd94882SGuillaume Chatelet if (count <= 2 * kAlign) 170bfd94882SGuillaume Chatelet return inline_memcmp_byte_per_byte(p1, p2, count); 171bfd94882SGuillaume Chatelet size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); 172bfd94882SGuillaume Chatelet if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) 173bfd94882SGuillaume Chatelet return value; 174bfd94882SGuillaume Chatelet size_t offset = bytes_to_p1_align; 175bfd94882SGuillaume Chatelet size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); 176bfd94882SGuillaume Chatelet for (; offset < count - kAlign; offset += kAlign) { 177bfd94882SGuillaume Chatelet uint32_t a = load32_aligned<uint32_t>(p1, offset); 178bfd94882SGuillaume Chatelet uint32_t b = load32_aligned(p2, offset, p2_alignment); 179bfd94882SGuillaume Chatelet if (a != b) 180bfd94882SGuillaume Chatelet return cmp_uint32_t(Endian::to_big_endian(a), Endian::to_big_endian(b)); 181bfd94882SGuillaume Chatelet } 182bfd94882SGuillaume Chatelet return inline_memcmp_byte_per_byte(p1, p2, count, offset); 183bfd94882SGuillaume Chatelet } 184bfd94882SGuillaume Chatelet 185bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE MemcmpReturnType 186bfd94882SGuillaume Chatelet inline_memcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { 187bfd94882SGuillaume Chatelet constexpr size_t kAlign = sizeof(uint64_t); 188bfd94882SGuillaume Chatelet if (count <= 2 * kAlign) 189bfd94882SGuillaume Chatelet return inline_memcmp_byte_per_byte(p1, p2, count); 190bfd94882SGuillaume Chatelet size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); 191bfd94882SGuillaume Chatelet if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) 192bfd94882SGuillaume Chatelet return value; 193bfd94882SGuillaume Chatelet size_t offset = bytes_to_p1_align; 194bfd94882SGuillaume Chatelet size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); 195bfd94882SGuillaume Chatelet for (; offset < count - kAlign; offset += kAlign) { 196bfd94882SGuillaume Chatelet uint64_t a = load64_aligned<uint64_t>(p1, offset); 197bfd94882SGuillaume Chatelet uint64_t b = load64_aligned(p2, offset, p2_alignment); 198bfd94882SGuillaume Chatelet if (a != b) 199bfd94882SGuillaume Chatelet return cmp_neq_uint64_t(Endian::to_big_endian(a), 200bfd94882SGuillaume Chatelet Endian::to_big_endian(b)); 201bfd94882SGuillaume Chatelet } 202bfd94882SGuillaume Chatelet return inline_memcmp_byte_per_byte(p1, p2, count, offset); 203bfd94882SGuillaume Chatelet } 204bfd94882SGuillaume Chatelet 2055ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 206bfd94882SGuillaume Chatelet 207bfd94882SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H 208