1 //===-- Generic implementation of memory function building blocks ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides generic C++ building blocks. 10 // Depending on the requested size, the block operation uses unsigned integral 11 // types, vector types or an array of the type with the maximum size. 12 // 13 // The maximum size is passed as a template argument. For instance, on x86 14 // platforms that only supports integral types the maximum size would be 8 15 // (corresponding to uint64_t). On this platform if we request the size 32, this 16 // would be treated as a cpp::array<uint64_t, 4>. 17 // 18 // On the other hand, if the platform is x86 with support for AVX the maximum 19 // size is 32 and the operation can be handled with a single native operation. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H 24 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H 25 26 #include "src/__support/CPP/array.h" 27 #include "src/__support/CPP/type_traits.h" 28 #include "src/__support/common.h" 29 #include "src/__support/endian.h" 30 #include "src/__support/macros/optimization.h" 31 #include "src/string/memory_utils/op_builtin.h" 32 #include "src/string/memory_utils/utils.h" 33 34 #include <stdint.h> 35 36 namespace __llvm_libc { 37 // Compiler types using the vector attributes. 38 using uint8x1_t = uint8_t __attribute__((__vector_size__(1))); 39 using uint8x2_t = uint8_t __attribute__((__vector_size__(2))); 40 using uint8x4_t = uint8_t __attribute__((__vector_size__(4))); 41 using uint8x8_t = uint8_t __attribute__((__vector_size__(8))); 42 using uint8x16_t = uint8_t __attribute__((__vector_size__(16))); 43 using uint8x32_t = uint8_t __attribute__((__vector_size__(32))); 44 using uint8x64_t = uint8_t __attribute__((__vector_size__(64))); 45 } // namespace __llvm_libc 46 47 namespace __llvm_libc::generic { 48 // We accept three types of values as elements for generic operations: 49 // - scalar : unsigned integral types 50 // - vector : compiler types using the vector attributes 51 // - array : a cpp::array<T, N> where T is itself either a scalar or a vector. 52 // The following traits help discriminate between these cases. 53 template <typename T> 54 constexpr bool is_scalar_v = cpp::is_integral_v<T> && cpp::is_unsigned_v<T>; 55 56 template <typename T> 57 constexpr bool is_vector_v = 58 cpp::details::is_unqualified_any_of<T, uint8x1_t, uint8x2_t, uint8x4_t, 59 uint8x8_t, uint8x16_t, uint8x32_t, 60 uint8x64_t>(); 61 62 template <class T> struct is_array : cpp::false_type {}; 63 template <class T, size_t N> struct is_array<cpp::array<T, N>> { 64 static constexpr bool value = is_scalar_v<T> || is_vector_v<T>; 65 }; 66 template <typename T> constexpr bool is_array_v = is_array<T>::value; 67 68 template <typename T> 69 constexpr bool is_element_type_v = 70 is_scalar_v<T> || is_vector_v<T> || is_array_v<T>; 71 72 // 73 template <class T> struct array_size {}; 74 template <class T, size_t N> 75 struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {}; 76 template <typename T> constexpr size_t array_size_v = array_size<T>::value; 77 78 // Generic operations for the above type categories. 79 80 template <typename T> T load(CPtr src) { 81 static_assert(is_element_type_v<T>); 82 if constexpr (is_scalar_v<T> || is_vector_v<T>) { 83 return ::__llvm_libc::load<T>(src); 84 } else if constexpr (is_array_v<T>) { 85 using value_type = typename T::value_type; 86 T Value; 87 for (size_t I = 0; I < array_size_v<T>; ++I) 88 Value[I] = load<value_type>(src + (I * sizeof(value_type))); 89 return Value; 90 } 91 } 92 93 template <typename T> void store(Ptr dst, T value) { 94 static_assert(is_element_type_v<T>); 95 if constexpr (is_scalar_v<T> || is_vector_v<T>) { 96 ::__llvm_libc::store<T>(dst, value); 97 } else if constexpr (is_array_v<T>) { 98 using value_type = typename T::value_type; 99 for (size_t I = 0; I < array_size_v<T>; ++I) 100 store<value_type>(dst + (I * sizeof(value_type)), value[I]); 101 } 102 } 103 104 template <typename T> T splat(uint8_t value) { 105 static_assert(is_scalar_v<T> || is_vector_v<T>); 106 if constexpr (is_scalar_v<T>) 107 return T(~0) / T(0xFF) * T(value); 108 else if constexpr (is_vector_v<T>) { 109 T Out; 110 // This for loop is optimized out for vector types. 111 for (size_t i = 0; i < sizeof(T); ++i) 112 Out[i] = value; 113 return Out; 114 } 115 } 116 117 static_assert((UINTPTR_MAX == 4294967295U) || 118 (UINTPTR_MAX == 18446744073709551615UL), 119 "We currently only support 32- or 64-bit platforms"); 120 121 #if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64) 122 #define LLVM_LIBC_HAS_UINT64 123 #endif 124 125 namespace details { 126 // Checks that each type is sorted in strictly decreasing order of size. 127 // i.e. sizeof(First) > sizeof(Second) > ... > sizeof(Last) 128 template <typename First> constexpr bool is_decreasing_size() { 129 return sizeof(First) == 1; 130 } 131 template <typename First, typename Second, typename... Next> 132 constexpr bool is_decreasing_size() { 133 if constexpr (sizeof...(Next) > 0) 134 return sizeof(First) > sizeof(Second) && is_decreasing_size<Next...>(); 135 else 136 return sizeof(First) > sizeof(Second) && is_decreasing_size<Second>(); 137 } 138 139 template <size_t Size, typename... Ts> struct Largest; 140 template <size_t Size> struct Largest<Size> : cpp::type_identity<uint8_t> {}; 141 template <size_t Size, typename T, typename... Ts> 142 struct Largest<Size, T, Ts...> { 143 using next = Largest<Size, Ts...>; 144 using type = cpp::conditional_t<(Size >= sizeof(T)), T, typename next::type>; 145 }; 146 147 } // namespace details 148 149 // 'SupportedTypes' holds a list of natively supported types. 150 // The types are instanciations of ScalarType or VectorType. 151 // They should be ordered in strictly decreasing order. 152 // The 'TypeFor<Size>' type retrieves is the largest supported type that can 153 // handle 'Size' bytes. e.g. 154 // 155 // using ST = SupportedTypes<ScalarType<uint16_t>, ScalarType<uint8_t>>; 156 // using Type = ST::TypeFor<10>; 157 // static_assert(cpp:is_same_v<Type, ScalarType<uint16_t>>); 158 159 template <typename First, typename... Ts> struct SupportedTypes { 160 static_assert(details::is_decreasing_size<First, Ts...>()); 161 162 using MaxType = First; 163 164 template <size_t Size> 165 using TypeFor = typename details::Largest<Size, First, Ts...>::type; 166 }; 167 168 // Map from sizes to structures offering static load, store and splat methods. 169 // Note: On platforms lacking vector support, we use the ArrayType below and 170 // decompose the operation in smaller pieces. 171 172 // Lists a generic native types to use for Memset and Memmove operations. 173 // TODO: Inject the native types within Memset and Memmove depending on the 174 // target architectures and derive MaxSize from it. 175 using NativeTypeMap = SupportedTypes<uint8x64_t, // 176 uint8x32_t, // 177 uint8x16_t, 178 #if defined(LLVM_LIBC_HAS_UINT64) 179 uint64_t, // Not available on 32bit 180 #endif 181 uint32_t, // 182 uint16_t, // 183 uint8_t>; 184 185 namespace details { 186 187 // Helper to test if a type is void. 188 template <typename T> inline constexpr bool is_void_v = cpp::is_same_v<T, void>; 189 190 // In case the 'Size' is not supported we can fall back to a sequence of smaller 191 // operations using the largest natively supported type. 192 template <size_t Size, size_t MaxSize> static constexpr bool useArrayType() { 193 return (Size > MaxSize) && ((Size % MaxSize) == 0) && 194 !details::is_void_v<NativeTypeMap::TypeFor<MaxSize>>; 195 } 196 197 // Compute the type to handle an operation of 'Size' bytes knowing that the 198 // underlying platform only support native types up to MaxSize bytes. 199 template <size_t Size, size_t MaxSize> 200 using getTypeFor = cpp::conditional_t< 201 useArrayType<Size, MaxSize>(), 202 cpp::array<NativeTypeMap::TypeFor<MaxSize>, Size / MaxSize>, 203 NativeTypeMap::TypeFor<Size>>; 204 205 } // namespace details 206 207 /////////////////////////////////////////////////////////////////////////////// 208 // Memset 209 /////////////////////////////////////////////////////////////////////////////// 210 211 template <typename T> struct Memset { 212 static constexpr size_t SIZE = sizeof(T); 213 214 LIBC_INLINE static void block(Ptr dst, uint8_t value) { 215 static_assert(is_element_type_v<T>); 216 if constexpr (is_scalar_v<T> || is_vector_v<T>) { 217 store<T>(dst, splat<T>(value)); 218 } else if constexpr (is_array_v<T>) { 219 using value_type = typename T::value_type; 220 const auto Splat = splat<value_type>(value); 221 for (size_t I = 0; I < array_size_v<T>; ++I) 222 store<value_type>(dst + (I * sizeof(value_type)), Splat); 223 } 224 } 225 226 LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) { 227 block(dst + count - SIZE, value); 228 } 229 230 LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) { 231 block(dst, value); 232 tail(dst, value, count); 233 } 234 235 LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) { 236 static_assert(SIZE > 1, "a loop of size 1 does not need tail"); 237 size_t offset = 0; 238 do { 239 block(dst + offset, value); 240 offset += SIZE; 241 } while (offset < count - SIZE); 242 tail(dst, value, count); 243 } 244 }; 245 246 template <typename T, typename... TS> struct MemsetSequence { 247 static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS)); 248 LIBC_INLINE static void block(Ptr dst, uint8_t value) { 249 Memset<T>::block(dst, value); 250 if constexpr (sizeof...(TS)) { 251 return MemsetSequence<TS...>::block(dst + sizeof(T), value); 252 } 253 } 254 }; 255 256 /////////////////////////////////////////////////////////////////////////////// 257 // Memmove 258 /////////////////////////////////////////////////////////////////////////////// 259 260 template <typename T> struct Memmove { 261 static constexpr size_t SIZE = sizeof(T); 262 263 LIBC_INLINE static void block(Ptr dst, CPtr src) { 264 store<T>(dst, load<T>(src)); 265 } 266 267 LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) { 268 const size_t offset = count - SIZE; 269 // The load and store operations can be performed in any order as long as 270 // they are not interleaved. More investigations are needed to determine 271 // the best order. 272 const auto head = load<T>(src); 273 const auto tail = load<T>(src + offset); 274 store<T>(dst, head); 275 store<T>(dst + offset, tail); 276 } 277 278 // Align forward suitable when dst < src. The alignment is performed with 279 // an HeadTail operation of count ∈ [Alignment, 2 x Alignment]. 280 // 281 // e.g. Moving two bytes forward, we make sure src is aligned. 282 // [ | | | | ] 283 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] 284 // [____LLLLLLLL_____________________] 285 // [___________LLLLLLLA______________] 286 // [_SSSSSSSS________________________] 287 // [________SSSSSSSS_________________] 288 // 289 // e.g. Moving two bytes forward, we make sure dst is aligned. 290 // [ | | | | ] 291 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] 292 // [____LLLLLLLL_____________________] 293 // [______LLLLLLLL___________________] 294 // [_SSSSSSSS________________________] 295 // [___SSSSSSSA______________________] 296 template <Arg AlignOn> 297 LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) { 298 Ptr prev_dst = dst; 299 CPtr prev_src = src; 300 size_t prev_count = count; 301 align_to_next_boundary<SIZE, AlignOn>(dst, src, count); 302 adjust(SIZE, dst, src, count); 303 head_tail(prev_dst, prev_src, prev_count - count); 304 } 305 306 // Align backward suitable when dst > src. The alignment is performed with 307 // an HeadTail operation of count ∈ [Alignment, 2 x Alignment]. 308 // 309 // e.g. Moving two bytes backward, we make sure src is aligned. 310 // [ | | | | ] 311 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] 312 // [ _________________ALLLLLLL_______] 313 // [ ___________________LLLLLLLL_____] 314 // [____________________SSSSSSSS_____] 315 // [______________________SSSSSSSS___] 316 // 317 // e.g. Moving two bytes backward, we make sure dst is aligned. 318 // [ | | | | ] 319 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] 320 // [ _______________LLLLLLLL_________] 321 // [ ___________________LLLLLLLL_____] 322 // [__________________ASSSSSSS_______] 323 // [______________________SSSSSSSS___] 324 template <Arg AlignOn> 325 LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) { 326 Ptr headtail_dst = dst + count; 327 CPtr headtail_src = src + count; 328 size_t headtail_size = 0; 329 align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src, 330 headtail_size); 331 adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size); 332 head_tail(headtail_dst, headtail_src, headtail_size); 333 count -= headtail_size; 334 } 335 336 // Move forward suitable when dst < src. We load the tail bytes before 337 // handling the loop. 338 // 339 // e.g. Moving two bytes 340 // [ | | | | |] 341 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 342 // [_________________________LLLLLLLL___] 343 // [___LLLLLLLL_________________________] 344 // [_SSSSSSSS___________________________] 345 // [___________LLLLLLLL_________________] 346 // [_________SSSSSSSS___________________] 347 // [___________________LLLLLLLL_________] 348 // [_________________SSSSSSSS___________] 349 // [_______________________SSSSSSSS_____] 350 LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src, 351 size_t count) { 352 static_assert(SIZE > 1, "a loop of size 1 does not need tail"); 353 const size_t tail_offset = count - SIZE; 354 const auto tail_value = load<T>(src + tail_offset); 355 size_t offset = 0; 356 LIBC_LOOP_NOUNROLL 357 do { 358 block(dst + offset, src + offset); 359 offset += SIZE; 360 } while (offset < count - SIZE); 361 store<T>(dst + tail_offset, tail_value); 362 } 363 364 // Move backward suitable when dst > src. We load the head bytes before 365 // handling the loop. 366 // 367 // e.g. Moving two bytes 368 // [ | | | | |] 369 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 370 // [___LLLLLLLL_________________________] 371 // [_________________________LLLLLLLL___] 372 // [___________________________SSSSSSSS_] 373 // [_________________LLLLLLLL___________] 374 // [___________________SSSSSSSS_________] 375 // [_________LLLLLLLL___________________] 376 // [___________SSSSSSSS_________________] 377 // [_____SSSSSSSS_______________________] 378 LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src, 379 size_t count) { 380 static_assert(SIZE > 1, "a loop of size 1 does not need tail"); 381 const auto head_value = load<T>(src); 382 ptrdiff_t offset = count - SIZE; 383 LIBC_LOOP_NOUNROLL 384 do { 385 block(dst + offset, src + offset); 386 offset -= SIZE; 387 } while (offset >= 0); 388 store<T>(dst, head_value); 389 } 390 }; 391 392 /////////////////////////////////////////////////////////////////////////////// 393 // Bcmp 394 /////////////////////////////////////////////////////////////////////////////// 395 template <size_t Size> struct Bcmp { 396 static constexpr size_t SIZE = Size; 397 static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) 398 ? sizeof(uint64_t) 399 : sizeof(uint32_t); 400 401 template <typename T> LIBC_INLINE static uint32_t load_xor(CPtr p1, CPtr p2) { 402 static_assert(sizeof(T) <= sizeof(uint32_t)); 403 return load<T>(p1) ^ load<T>(p2); 404 } 405 406 template <typename T> 407 LIBC_INLINE static uint32_t load_not_equal(CPtr p1, CPtr p2) { 408 return load<T>(p1) != load<T>(p2); 409 } 410 411 LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) { 412 if constexpr (Size == 1) { 413 return load_xor<uint8_t>(p1, p2); 414 } else if constexpr (Size == 2) { 415 return load_xor<uint16_t>(p1, p2); 416 } else if constexpr (Size == 4) { 417 return load_xor<uint32_t>(p1, p2); 418 } else if constexpr (Size == 8) { 419 return load_not_equal<uint64_t>(p1, p2); 420 } else if constexpr (details::useArrayType<Size, MaxSize>()) { 421 for (size_t offset = 0; offset < Size; offset += MaxSize) 422 if (auto value = Bcmp<MaxSize>::block(p1 + offset, p2 + offset)) 423 return value; 424 } else { 425 deferred_static_assert("Unimplemented Size"); 426 } 427 return BcmpReturnType::ZERO(); 428 } 429 430 LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { 431 return block(p1 + count - SIZE, p2 + count - SIZE); 432 } 433 434 LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) { 435 return block(p1, p2) | tail(p1, p2, count); 436 } 437 438 LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2, 439 size_t count) { 440 static_assert(Size > 1, "a loop of size 1 does not need tail"); 441 size_t offset = 0; 442 do { 443 if (auto value = block(p1 + offset, p2 + offset)) 444 return value; 445 offset += SIZE; 446 } while (offset < count - SIZE); 447 return tail(p1, p2, count); 448 } 449 }; 450 451 /////////////////////////////////////////////////////////////////////////////// 452 // Memcmp 453 /////////////////////////////////////////////////////////////////////////////// 454 template <size_t Size> struct Memcmp { 455 static constexpr size_t SIZE = Size; 456 static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) 457 ? sizeof(uint64_t) 458 : sizeof(uint32_t); 459 460 template <typename T> LIBC_INLINE static T load_be(CPtr ptr) { 461 return Endian::to_big_endian(load<T>(ptr)); 462 } 463 464 template <typename T> 465 LIBC_INLINE static MemcmpReturnType load_be_diff(CPtr p1, CPtr p2) { 466 return load_be<T>(p1) - load_be<T>(p2); 467 } 468 469 template <typename T> 470 LIBC_INLINE static MemcmpReturnType load_be_cmp(CPtr p1, CPtr p2) { 471 const auto la = load_be<T>(p1); 472 const auto lb = load_be<T>(p2); 473 return la > lb ? 1 : la < lb ? -1 : 0; 474 } 475 476 LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) { 477 if constexpr (Size == 1) { 478 return load_be_diff<uint8_t>(p1, p2); 479 } else if constexpr (Size == 2) { 480 return load_be_diff<uint16_t>(p1, p2); 481 } else if constexpr (Size == 4) { 482 return load_be_cmp<uint32_t>(p1, p2); 483 } else if constexpr (Size == 8) { 484 return load_be_cmp<uint64_t>(p1, p2); 485 } else if constexpr (details::useArrayType<Size, MaxSize>()) { 486 for (size_t offset = 0; offset < Size; offset += MaxSize) 487 if (Bcmp<MaxSize>::block(p1 + offset, p2 + offset)) 488 return Memcmp<MaxSize>::block(p1 + offset, p2 + offset); 489 return MemcmpReturnType::ZERO(); 490 } else if constexpr (Size == 3) { 491 if (auto value = Memcmp<2>::block(p1, p2)) 492 return value; 493 return Memcmp<1>::block(p1 + 2, p2 + 2); 494 } else { 495 deferred_static_assert("Unimplemented Size"); 496 } 497 } 498 499 LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { 500 return block(p1 + count - SIZE, p2 + count - SIZE); 501 } 502 503 LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2, 504 size_t count) { 505 if (auto value = block(p1, p2)) 506 return value; 507 return tail(p1, p2, count); 508 } 509 510 LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2, 511 size_t count) { 512 static_assert(Size > 1, "a loop of size 1 does not need tail"); 513 size_t offset = 0; 514 do { 515 if (auto value = block(p1 + offset, p2 + offset)) 516 return value; 517 offset += SIZE; 518 } while (offset < count - SIZE); 519 return tail(p1, p2, count); 520 } 521 }; 522 523 } // namespace __llvm_libc::generic 524 525 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H 526