1 //===-- Implementations for platform with mandatory aligned memory access -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // For some platforms, unaligned loads and stores are either illegal or very 9 // slow. The implementations in this file make sure all loads and stores are 10 // always aligned. 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H 14 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H 15 16 #include "src/__support/macros/attributes.h" // LIBC_INLINE 17 #include "src/string/memory_utils/generic/byte_per_byte.h" 18 #include "src/string/memory_utils/op_generic.h" // generic::splat 19 #include "src/string/memory_utils/utils.h" // Ptr, CPtr 20 21 #include <stddef.h> // size_t 22 23 namespace LIBC_NAMESPACE_DECL { 24 25 [[maybe_unused]] LIBC_INLINE uint32_t load32_aligned(CPtr ptr, size_t offset, 26 size_t alignment) { 27 if (alignment == 0) 28 return load32_aligned<uint32_t>(ptr, offset); 29 else if (alignment == 2) 30 return load32_aligned<uint16_t, uint16_t>(ptr, offset); 31 else // 1, 3 32 return load32_aligned<uint8_t, uint16_t, uint8_t>(ptr, offset); 33 } 34 35 [[maybe_unused]] LIBC_INLINE uint64_t load64_aligned(CPtr ptr, size_t offset, 36 size_t alignment) { 37 if (alignment == 0) 38 return load64_aligned<uint64_t>(ptr, offset); 39 else if (alignment == 4) 40 return load64_aligned<uint32_t, uint32_t>(ptr, offset); 41 else if (alignment == 6) 42 return load64_aligned<uint16_t, uint32_t, uint16_t>(ptr, offset); 43 else if (alignment == 2) 44 return load64_aligned<uint16_t, uint16_t, uint16_t, uint16_t>(ptr, offset); 45 else // 1, 3, 5, 7 46 return load64_aligned<uint8_t, uint16_t, uint16_t, uint16_t, uint8_t>( 47 ptr, offset); 48 } 49 50 /////////////////////////////////////////////////////////////////////////////// 51 // memcpy 52 /////////////////////////////////////////////////////////////////////////////// 53 54 [[maybe_unused]] LIBC_INLINE void 55 inline_memcpy_aligned_access_32bit(Ptr __restrict dst, CPtr __restrict src, 56 size_t count) { 57 constexpr size_t kAlign = sizeof(uint32_t); 58 if (count <= 2 * kAlign) 59 return inline_memcpy_byte_per_byte(dst, src, count); 60 size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); 61 inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); 62 size_t offset = bytes_to_dst_align; 63 size_t src_alignment = distance_to_align_down<kAlign>(src + offset); 64 for (; offset < count - kAlign; offset += kAlign) { 65 uint32_t value = load32_aligned(src, offset, src_alignment); 66 store32_aligned<uint32_t>(value, dst, offset); 67 } 68 // remainder 69 inline_memcpy_byte_per_byte(dst, src, count, offset); 70 } 71 72 [[maybe_unused]] LIBC_INLINE void 73 inline_memcpy_aligned_access_64bit(Ptr __restrict dst, CPtr __restrict src, 74 size_t count) { 75 constexpr size_t kAlign = sizeof(uint64_t); 76 if (count <= 2 * kAlign) 77 return inline_memcpy_byte_per_byte(dst, src, count); 78 size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); 79 inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); 80 size_t offset = bytes_to_dst_align; 81 size_t src_alignment = distance_to_align_down<kAlign>(src + offset); 82 for (; offset < count - kAlign; offset += kAlign) { 83 uint64_t value = load64_aligned(src, offset, src_alignment); 84 store64_aligned<uint64_t>(value, dst, offset); 85 } 86 // remainder 87 inline_memcpy_byte_per_byte(dst, src, count, offset); 88 } 89 90 /////////////////////////////////////////////////////////////////////////////// 91 // memset 92 /////////////////////////////////////////////////////////////////////////////// 93 94 [[maybe_unused]] LIBC_INLINE static void 95 inline_memset_aligned_access_32bit(Ptr dst, uint8_t value, size_t count) { 96 constexpr size_t kAlign = sizeof(uint32_t); 97 if (count <= 2 * kAlign) 98 return inline_memset_byte_per_byte(dst, value, count); 99 size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); 100 inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); 101 size_t offset = bytes_to_dst_align; 102 for (; offset < count - kAlign; offset += kAlign) 103 store32_aligned<uint32_t>(generic::splat<uint32_t>(value), dst, offset); 104 inline_memset_byte_per_byte(dst, value, count, offset); 105 } 106 107 [[maybe_unused]] LIBC_INLINE static void 108 inline_memset_aligned_access_64bit(Ptr dst, uint8_t value, size_t count) { 109 constexpr size_t kAlign = sizeof(uint64_t); 110 if (count <= 2 * kAlign) 111 return inline_memset_byte_per_byte(dst, value, count); 112 size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); 113 inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); 114 size_t offset = bytes_to_dst_align; 115 for (; offset < count - kAlign; offset += kAlign) 116 store64_aligned<uint64_t>(generic::splat<uint64_t>(value), dst, offset); 117 inline_memset_byte_per_byte(dst, value, count, offset); 118 } 119 120 /////////////////////////////////////////////////////////////////////////////// 121 // bcmp 122 /////////////////////////////////////////////////////////////////////////////// 123 124 [[maybe_unused]] LIBC_INLINE BcmpReturnType 125 inline_bcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { 126 constexpr size_t kAlign = sizeof(uint32_t); 127 if (count <= 2 * kAlign) 128 return inline_bcmp_byte_per_byte(p1, p2, count); 129 size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); 130 if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) 131 return value; 132 size_t offset = bytes_to_p1_align; 133 size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); 134 for (; offset < count - kAlign; offset += kAlign) { 135 uint32_t a = load32_aligned<uint32_t>(p1, offset); 136 uint32_t b = load32_aligned(p2, offset, p2_alignment); 137 if (a != b) 138 return BcmpReturnType::nonzero(); 139 } 140 return inline_bcmp_byte_per_byte(p1, p2, count, offset); 141 } 142 143 [[maybe_unused]] LIBC_INLINE BcmpReturnType 144 inline_bcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { 145 constexpr size_t kAlign = sizeof(uint64_t); 146 if (count <= 2 * kAlign) 147 return inline_bcmp_byte_per_byte(p1, p2, count); 148 size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); 149 if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) 150 return value; 151 size_t offset = bytes_to_p1_align; 152 size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); 153 for (; offset < count - kAlign; offset += kAlign) { 154 uint64_t a = load64_aligned<uint64_t>(p1, offset); 155 uint64_t b = load64_aligned(p2, offset, p2_alignment); 156 if (a != b) 157 return BcmpReturnType::nonzero(); 158 } 159 return inline_bcmp_byte_per_byte(p1, p2, count, offset); 160 } 161 162 /////////////////////////////////////////////////////////////////////////////// 163 // memcmp 164 /////////////////////////////////////////////////////////////////////////////// 165 166 [[maybe_unused]] LIBC_INLINE MemcmpReturnType 167 inline_memcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { 168 constexpr size_t kAlign = sizeof(uint32_t); 169 if (count <= 2 * kAlign) 170 return inline_memcmp_byte_per_byte(p1, p2, count); 171 size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); 172 if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) 173 return value; 174 size_t offset = bytes_to_p1_align; 175 size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); 176 for (; offset < count - kAlign; offset += kAlign) { 177 uint32_t a = load32_aligned<uint32_t>(p1, offset); 178 uint32_t b = load32_aligned(p2, offset, p2_alignment); 179 if (a != b) 180 return cmp_uint32_t(Endian::to_big_endian(a), Endian::to_big_endian(b)); 181 } 182 return inline_memcmp_byte_per_byte(p1, p2, count, offset); 183 } 184 185 [[maybe_unused]] LIBC_INLINE MemcmpReturnType 186 inline_memcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { 187 constexpr size_t kAlign = sizeof(uint64_t); 188 if (count <= 2 * kAlign) 189 return inline_memcmp_byte_per_byte(p1, p2, count); 190 size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); 191 if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) 192 return value; 193 size_t offset = bytes_to_p1_align; 194 size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); 195 for (; offset < count - kAlign; offset += kAlign) { 196 uint64_t a = load64_aligned<uint64_t>(p1, offset); 197 uint64_t b = load64_aligned(p2, offset, p2_alignment); 198 if (a != b) 199 return cmp_neq_uint64_t(Endian::to_big_endian(a), 200 Endian::to_big_endian(b)); 201 } 202 return inline_memcmp_byte_per_byte(p1, p2, count, offset); 203 } 204 205 } // namespace LIBC_NAMESPACE_DECL 206 207 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H 208