xref: /llvm-project/libc/src/string/memory_utils/generic/aligned_access.h (revision 2cc97951400ca2ab79d6bdeccffa6e431882a86e)
1bfd94882SGuillaume Chatelet //===-- Implementations for platform with mandatory aligned memory access -===//
2bfd94882SGuillaume Chatelet //
3bfd94882SGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bfd94882SGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information.
5bfd94882SGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bfd94882SGuillaume Chatelet //
7bfd94882SGuillaume Chatelet //===----------------------------------------------------------------------===//
8bfd94882SGuillaume Chatelet // For some platforms, unaligned loads and stores are either illegal or very
9bfd94882SGuillaume Chatelet // slow. The implementations in this file make sure all loads and stores are
10bfd94882SGuillaume Chatelet // always aligned.
11bfd94882SGuillaume Chatelet //===----------------------------------------------------------------------===//
12bfd94882SGuillaume Chatelet 
13bfd94882SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H
14bfd94882SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H
15bfd94882SGuillaume Chatelet 
16*2cc97951Sc8ef #include "src/__support/macros/attributes.h" // LIBC_INLINE
17bfd94882SGuillaume Chatelet #include "src/string/memory_utils/generic/byte_per_byte.h"
18bfd94882SGuillaume Chatelet #include "src/string/memory_utils/op_generic.h" // generic::splat
19bfd94882SGuillaume Chatelet #include "src/string/memory_utils/utils.h"      // Ptr, CPtr
20bfd94882SGuillaume Chatelet 
21bfd94882SGuillaume Chatelet #include <stddef.h> // size_t
22bfd94882SGuillaume Chatelet 
235ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
24bfd94882SGuillaume Chatelet 
25bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE uint32_t load32_aligned(CPtr ptr, size_t offset,
26bfd94882SGuillaume Chatelet                                                      size_t alignment) {
27bfd94882SGuillaume Chatelet   if (alignment == 0)
28bfd94882SGuillaume Chatelet     return load32_aligned<uint32_t>(ptr, offset);
29bfd94882SGuillaume Chatelet   else if (alignment == 2)
30bfd94882SGuillaume Chatelet     return load32_aligned<uint16_t, uint16_t>(ptr, offset);
31bdac9720SGuillaume Chatelet   else // 1, 3
32bfd94882SGuillaume Chatelet     return load32_aligned<uint8_t, uint16_t, uint8_t>(ptr, offset);
33bfd94882SGuillaume Chatelet }
34bfd94882SGuillaume Chatelet 
35bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE uint64_t load64_aligned(CPtr ptr, size_t offset,
36bfd94882SGuillaume Chatelet                                                      size_t alignment) {
37bfd94882SGuillaume Chatelet   if (alignment == 0)
38bfd94882SGuillaume Chatelet     return load64_aligned<uint64_t>(ptr, offset);
39bfd94882SGuillaume Chatelet   else if (alignment == 4)
40bfd94882SGuillaume Chatelet     return load64_aligned<uint32_t, uint32_t>(ptr, offset);
41bdac9720SGuillaume Chatelet   else if (alignment == 6)
42bdac9720SGuillaume Chatelet     return load64_aligned<uint16_t, uint32_t, uint16_t>(ptr, offset);
43bfd94882SGuillaume Chatelet   else if (alignment == 2)
44bfd94882SGuillaume Chatelet     return load64_aligned<uint16_t, uint16_t, uint16_t, uint16_t>(ptr, offset);
45bdac9720SGuillaume Chatelet   else // 1, 3, 5, 7
46bfd94882SGuillaume Chatelet     return load64_aligned<uint8_t, uint16_t, uint16_t, uint16_t, uint8_t>(
47bfd94882SGuillaume Chatelet         ptr, offset);
48bfd94882SGuillaume Chatelet }
49bfd94882SGuillaume Chatelet 
50bfd94882SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
51bfd94882SGuillaume Chatelet // memcpy
52bfd94882SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
53bfd94882SGuillaume Chatelet 
54bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE void
55bfd94882SGuillaume Chatelet inline_memcpy_aligned_access_32bit(Ptr __restrict dst, CPtr __restrict src,
56bfd94882SGuillaume Chatelet                                    size_t count) {
57bfd94882SGuillaume Chatelet   constexpr size_t kAlign = sizeof(uint32_t);
58bfd94882SGuillaume Chatelet   if (count <= 2 * kAlign)
59bfd94882SGuillaume Chatelet     return inline_memcpy_byte_per_byte(dst, src, count);
60bfd94882SGuillaume Chatelet   size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst);
61bfd94882SGuillaume Chatelet   inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align);
62bfd94882SGuillaume Chatelet   size_t offset = bytes_to_dst_align;
63bfd94882SGuillaume Chatelet   size_t src_alignment = distance_to_align_down<kAlign>(src + offset);
64bfd94882SGuillaume Chatelet   for (; offset < count - kAlign; offset += kAlign) {
65bfd94882SGuillaume Chatelet     uint32_t value = load32_aligned(src, offset, src_alignment);
66bfd94882SGuillaume Chatelet     store32_aligned<uint32_t>(value, dst, offset);
67bfd94882SGuillaume Chatelet   }
68bfd94882SGuillaume Chatelet   // remainder
69bfd94882SGuillaume Chatelet   inline_memcpy_byte_per_byte(dst, src, count, offset);
70bfd94882SGuillaume Chatelet }
71bfd94882SGuillaume Chatelet 
72bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE void
73bfd94882SGuillaume Chatelet inline_memcpy_aligned_access_64bit(Ptr __restrict dst, CPtr __restrict src,
74bfd94882SGuillaume Chatelet                                    size_t count) {
75bfd94882SGuillaume Chatelet   constexpr size_t kAlign = sizeof(uint64_t);
76bfd94882SGuillaume Chatelet   if (count <= 2 * kAlign)
77bfd94882SGuillaume Chatelet     return inline_memcpy_byte_per_byte(dst, src, count);
78bfd94882SGuillaume Chatelet   size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst);
79bfd94882SGuillaume Chatelet   inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align);
80bfd94882SGuillaume Chatelet   size_t offset = bytes_to_dst_align;
81bfd94882SGuillaume Chatelet   size_t src_alignment = distance_to_align_down<kAlign>(src + offset);
82bfd94882SGuillaume Chatelet   for (; offset < count - kAlign; offset += kAlign) {
83bfd94882SGuillaume Chatelet     uint64_t value = load64_aligned(src, offset, src_alignment);
84bfd94882SGuillaume Chatelet     store64_aligned<uint64_t>(value, dst, offset);
85bfd94882SGuillaume Chatelet   }
86bfd94882SGuillaume Chatelet   // remainder
87bfd94882SGuillaume Chatelet   inline_memcpy_byte_per_byte(dst, src, count, offset);
88bfd94882SGuillaume Chatelet }
89bfd94882SGuillaume Chatelet 
90bfd94882SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
91bfd94882SGuillaume Chatelet // memset
92bfd94882SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
93bfd94882SGuillaume Chatelet 
94bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE static void
95bfd94882SGuillaume Chatelet inline_memset_aligned_access_32bit(Ptr dst, uint8_t value, size_t count) {
96bfd94882SGuillaume Chatelet   constexpr size_t kAlign = sizeof(uint32_t);
97bfd94882SGuillaume Chatelet   if (count <= 2 * kAlign)
98bfd94882SGuillaume Chatelet     return inline_memset_byte_per_byte(dst, value, count);
99bfd94882SGuillaume Chatelet   size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst);
100bfd94882SGuillaume Chatelet   inline_memset_byte_per_byte(dst, value, bytes_to_dst_align);
101bfd94882SGuillaume Chatelet   size_t offset = bytes_to_dst_align;
102bfd94882SGuillaume Chatelet   for (; offset < count - kAlign; offset += kAlign)
103bfd94882SGuillaume Chatelet     store32_aligned<uint32_t>(generic::splat<uint32_t>(value), dst, offset);
104bfd94882SGuillaume Chatelet   inline_memset_byte_per_byte(dst, value, count, offset);
105bfd94882SGuillaume Chatelet }
106bfd94882SGuillaume Chatelet 
107bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE static void
108bfd94882SGuillaume Chatelet inline_memset_aligned_access_64bit(Ptr dst, uint8_t value, size_t count) {
109bfd94882SGuillaume Chatelet   constexpr size_t kAlign = sizeof(uint64_t);
110bfd94882SGuillaume Chatelet   if (count <= 2 * kAlign)
111bfd94882SGuillaume Chatelet     return inline_memset_byte_per_byte(dst, value, count);
112bfd94882SGuillaume Chatelet   size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst);
113bfd94882SGuillaume Chatelet   inline_memset_byte_per_byte(dst, value, bytes_to_dst_align);
114bfd94882SGuillaume Chatelet   size_t offset = bytes_to_dst_align;
115bfd94882SGuillaume Chatelet   for (; offset < count - kAlign; offset += kAlign)
116bfd94882SGuillaume Chatelet     store64_aligned<uint64_t>(generic::splat<uint64_t>(value), dst, offset);
117bfd94882SGuillaume Chatelet   inline_memset_byte_per_byte(dst, value, count, offset);
118bfd94882SGuillaume Chatelet }
119bfd94882SGuillaume Chatelet 
120bfd94882SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
121bfd94882SGuillaume Chatelet // bcmp
122bfd94882SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
123bfd94882SGuillaume Chatelet 
124bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE BcmpReturnType
125bfd94882SGuillaume Chatelet inline_bcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) {
126bfd94882SGuillaume Chatelet   constexpr size_t kAlign = sizeof(uint32_t);
127bfd94882SGuillaume Chatelet   if (count <= 2 * kAlign)
128bfd94882SGuillaume Chatelet     return inline_bcmp_byte_per_byte(p1, p2, count);
129bfd94882SGuillaume Chatelet   size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1);
130bfd94882SGuillaume Chatelet   if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align))
131bfd94882SGuillaume Chatelet     return value;
132bfd94882SGuillaume Chatelet   size_t offset = bytes_to_p1_align;
133bfd94882SGuillaume Chatelet   size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset);
134bfd94882SGuillaume Chatelet   for (; offset < count - kAlign; offset += kAlign) {
135bfd94882SGuillaume Chatelet     uint32_t a = load32_aligned<uint32_t>(p1, offset);
136bfd94882SGuillaume Chatelet     uint32_t b = load32_aligned(p2, offset, p2_alignment);
137bfd94882SGuillaume Chatelet     if (a != b)
1386f8d826bSNick Desaulniers       return BcmpReturnType::nonzero();
139bfd94882SGuillaume Chatelet   }
140bfd94882SGuillaume Chatelet   return inline_bcmp_byte_per_byte(p1, p2, count, offset);
141bfd94882SGuillaume Chatelet }
142bfd94882SGuillaume Chatelet 
143bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE BcmpReturnType
144bfd94882SGuillaume Chatelet inline_bcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) {
145bfd94882SGuillaume Chatelet   constexpr size_t kAlign = sizeof(uint64_t);
146bfd94882SGuillaume Chatelet   if (count <= 2 * kAlign)
147bfd94882SGuillaume Chatelet     return inline_bcmp_byte_per_byte(p1, p2, count);
148bfd94882SGuillaume Chatelet   size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1);
149bfd94882SGuillaume Chatelet   if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align))
150bfd94882SGuillaume Chatelet     return value;
151bfd94882SGuillaume Chatelet   size_t offset = bytes_to_p1_align;
152bfd94882SGuillaume Chatelet   size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset);
153bfd94882SGuillaume Chatelet   for (; offset < count - kAlign; offset += kAlign) {
154bfd94882SGuillaume Chatelet     uint64_t a = load64_aligned<uint64_t>(p1, offset);
155bfd94882SGuillaume Chatelet     uint64_t b = load64_aligned(p2, offset, p2_alignment);
156bfd94882SGuillaume Chatelet     if (a != b)
1576f8d826bSNick Desaulniers       return BcmpReturnType::nonzero();
158bfd94882SGuillaume Chatelet   }
159bfd94882SGuillaume Chatelet   return inline_bcmp_byte_per_byte(p1, p2, count, offset);
160bfd94882SGuillaume Chatelet }
161bfd94882SGuillaume Chatelet 
162bfd94882SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
163bfd94882SGuillaume Chatelet // memcmp
164bfd94882SGuillaume Chatelet ///////////////////////////////////////////////////////////////////////////////
165bfd94882SGuillaume Chatelet 
166bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE MemcmpReturnType
167bfd94882SGuillaume Chatelet inline_memcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) {
168bfd94882SGuillaume Chatelet   constexpr size_t kAlign = sizeof(uint32_t);
169bfd94882SGuillaume Chatelet   if (count <= 2 * kAlign)
170bfd94882SGuillaume Chatelet     return inline_memcmp_byte_per_byte(p1, p2, count);
171bfd94882SGuillaume Chatelet   size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1);
172bfd94882SGuillaume Chatelet   if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align))
173bfd94882SGuillaume Chatelet     return value;
174bfd94882SGuillaume Chatelet   size_t offset = bytes_to_p1_align;
175bfd94882SGuillaume Chatelet   size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset);
176bfd94882SGuillaume Chatelet   for (; offset < count - kAlign; offset += kAlign) {
177bfd94882SGuillaume Chatelet     uint32_t a = load32_aligned<uint32_t>(p1, offset);
178bfd94882SGuillaume Chatelet     uint32_t b = load32_aligned(p2, offset, p2_alignment);
179bfd94882SGuillaume Chatelet     if (a != b)
180bfd94882SGuillaume Chatelet       return cmp_uint32_t(Endian::to_big_endian(a), Endian::to_big_endian(b));
181bfd94882SGuillaume Chatelet   }
182bfd94882SGuillaume Chatelet   return inline_memcmp_byte_per_byte(p1, p2, count, offset);
183bfd94882SGuillaume Chatelet }
184bfd94882SGuillaume Chatelet 
185bfd94882SGuillaume Chatelet [[maybe_unused]] LIBC_INLINE MemcmpReturnType
186bfd94882SGuillaume Chatelet inline_memcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) {
187bfd94882SGuillaume Chatelet   constexpr size_t kAlign = sizeof(uint64_t);
188bfd94882SGuillaume Chatelet   if (count <= 2 * kAlign)
189bfd94882SGuillaume Chatelet     return inline_memcmp_byte_per_byte(p1, p2, count);
190bfd94882SGuillaume Chatelet   size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1);
191bfd94882SGuillaume Chatelet   if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align))
192bfd94882SGuillaume Chatelet     return value;
193bfd94882SGuillaume Chatelet   size_t offset = bytes_to_p1_align;
194bfd94882SGuillaume Chatelet   size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset);
195bfd94882SGuillaume Chatelet   for (; offset < count - kAlign; offset += kAlign) {
196bfd94882SGuillaume Chatelet     uint64_t a = load64_aligned<uint64_t>(p1, offset);
197bfd94882SGuillaume Chatelet     uint64_t b = load64_aligned(p2, offset, p2_alignment);
198bfd94882SGuillaume Chatelet     if (a != b)
199bfd94882SGuillaume Chatelet       return cmp_neq_uint64_t(Endian::to_big_endian(a),
200bfd94882SGuillaume Chatelet                               Endian::to_big_endian(b));
201bfd94882SGuillaume Chatelet   }
202bfd94882SGuillaume Chatelet   return inline_memcmp_byte_per_byte(p1, p2, count, offset);
203bfd94882SGuillaume Chatelet }
204bfd94882SGuillaume Chatelet 
2055ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
206bfd94882SGuillaume Chatelet 
207bfd94882SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H
208