1 //===-- Generic implementation of memory function building blocks ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides generic C++ building blocks. 10 // Depending on the requested size, the block operation uses unsigned integral 11 // types, vector types or an array of the type with the maximum size. 12 // 13 // The maximum size is passed as a template argument. For instance, on x86 14 // platforms that only supports integral types the maximum size would be 8 15 // (corresponding to uint64_t). On this platform if we request the size 32, this 16 // would be treated as a cpp::array<uint64_t, 4>. 17 // 18 // On the other hand, if the platform is x86 with support for AVX the maximum 19 // size is 32 and the operation can be handled with a single native operation. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H 24 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H 25 26 #include "src/__support/CPP/array.h" 27 #include "src/__support/CPP/type_traits.h" 28 #include "src/__support/common.h" 29 #include "src/__support/endian.h" 30 #include "src/__support/macros/optimization.h" 31 #include "src/string/memory_utils/op_builtin.h" 32 #include "src/string/memory_utils/utils.h" 33 34 #include <stdint.h> 35 36 namespace __llvm_libc { 37 // Compiler types using the vector attributes. 38 using uint8x1_t = uint8_t __attribute__((__vector_size__(1))); 39 using uint8x2_t = uint8_t __attribute__((__vector_size__(2))); 40 using uint8x4_t = uint8_t __attribute__((__vector_size__(4))); 41 using uint8x8_t = uint8_t __attribute__((__vector_size__(8))); 42 using uint8x16_t = uint8_t __attribute__((__vector_size__(16))); 43 using uint8x32_t = uint8_t __attribute__((__vector_size__(32))); 44 using uint8x64_t = uint8_t __attribute__((__vector_size__(64))); 45 } // namespace __llvm_libc 46 47 namespace __llvm_libc::generic { 48 // We accept three types of values as elements for generic operations: 49 // - scalar : unsigned integral types 50 // - vector : compiler types using the vector attributes 51 // - array : a cpp::array<T, N> where T is itself either a scalar or a vector. 52 // The following traits help discriminate between these cases. 53 template <typename T> 54 constexpr bool is_scalar_v = cpp::is_integral_v<T> && cpp::is_unsigned_v<T>; 55 56 template <typename T> 57 constexpr bool is_vector_v = 58 cpp::details::is_unqualified_any_of<T, uint8x1_t, uint8x2_t, uint8x4_t, 59 uint8x8_t, uint8x16_t, uint8x32_t, 60 uint8x64_t>(); 61 62 template <class T> struct is_array : cpp::false_type {}; 63 template <class T, size_t N> struct is_array<cpp::array<T, N>> { 64 static constexpr bool value = is_scalar_v<T> || is_vector_v<T>; 65 }; 66 template <typename T> constexpr bool is_array_v = is_array<T>::value; 67 68 template <typename T> 69 constexpr bool is_element_type_v = 70 is_scalar_v<T> || is_vector_v<T> || is_array_v<T>; 71 72 // 73 template <class T> struct array_size {}; 74 template <class T, size_t N> 75 struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {}; 76 template <typename T> constexpr size_t array_size_v = array_size<T>::value; 77 78 // Generic operations for the above type categories. 79 80 template <typename T> T load(CPtr src) { 81 static_assert(is_element_type_v<T>); 82 if constexpr (is_scalar_v<T> || is_vector_v<T>) { 83 return ::__llvm_libc::load<T>(src); 84 } else if constexpr (is_array_v<T>) { 85 using value_type = typename T::value_type; 86 T Value; 87 for (size_t I = 0; I < array_size_v<T>; ++I) 88 Value[I] = load<value_type>(src + (I * sizeof(value_type))); 89 return Value; 90 } 91 } 92 93 template <typename T> void store(Ptr dst, T value) { 94 static_assert(is_element_type_v<T>); 95 if constexpr (is_scalar_v<T> || is_vector_v<T>) { 96 ::__llvm_libc::store<T>(dst, value); 97 } else if constexpr (is_array_v<T>) { 98 using value_type = typename T::value_type; 99 for (size_t I = 0; I < array_size_v<T>; ++I) 100 store<value_type>(dst + (I * sizeof(value_type)), value[I]); 101 } 102 } 103 104 template <typename T> T splat(uint8_t value) { 105 static_assert(is_scalar_v<T> || is_vector_v<T>); 106 if constexpr (is_scalar_v<T>) 107 return T(~0) / T(0xFF) * T(value); 108 else if constexpr (is_vector_v<T>) { 109 T Out; 110 // This for loop is optimized out for vector types. 111 for (size_t i = 0; i < sizeof(T); ++i) 112 Out[i] = value; 113 return Out; 114 } 115 } 116 117 static_assert((UINTPTR_MAX == 4294967295U) || 118 (UINTPTR_MAX == 18446744073709551615UL), 119 "We currently only support 32- or 64-bit platforms"); 120 121 #if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64) 122 #define LLVM_LIBC_HAS_UINT64 123 #endif 124 125 namespace details { 126 // Checks that each type is sorted in strictly decreasing order of size. 127 // i.e. sizeof(First) > sizeof(Second) > ... > sizeof(Last) 128 template <typename First> constexpr bool is_decreasing_size() { 129 return sizeof(First) == 1; 130 } 131 template <typename First, typename Second, typename... Next> 132 constexpr bool is_decreasing_size() { 133 if constexpr (sizeof...(Next) > 0) 134 return sizeof(First) > sizeof(Second) && is_decreasing_size<Next...>(); 135 else 136 return sizeof(First) > sizeof(Second) && is_decreasing_size<Second>(); 137 } 138 139 template <size_t Size, typename... Ts> struct Largest; 140 template <size_t Size> struct Largest<Size> : cpp::type_identity<uint8_t> {}; 141 template <size_t Size, typename T, typename... Ts> 142 struct Largest<Size, T, Ts...> { 143 using next = Largest<Size, Ts...>; 144 using type = cpp::conditional_t<(Size >= sizeof(T)), T, typename next::type>; 145 }; 146 147 } // namespace details 148 149 // 'SupportedTypes' holds a list of natively supported types. 150 // The types are instanciations of ScalarType or VectorType. 151 // They should be ordered in strictly decreasing order. 152 // The 'TypeFor<Size>' type retrieves is the largest supported type that can 153 // handle 'Size' bytes. e.g. 154 // 155 // using ST = SupportedTypes<ScalarType<uint16_t>, ScalarType<uint8_t>>; 156 // using Type = ST::TypeFor<10>; 157 // static_assert(cpp:is_same_v<Type, ScalarType<uint16_t>>); 158 159 template <typename First, typename... Ts> struct SupportedTypes { 160 static_assert(details::is_decreasing_size<First, Ts...>()); 161 162 using MaxType = First; 163 164 template <size_t Size> 165 using TypeFor = typename details::Largest<Size, First, Ts...>::type; 166 }; 167 168 // Returns the sum of the sizeof of all the TS types. 169 template <typename... TS> static constexpr size_t sum_sizeof() { 170 return (... + sizeof(TS)); 171 } 172 173 // Map from sizes to structures offering static load, store and splat methods. 174 // Note: On platforms lacking vector support, we use the ArrayType below and 175 // decompose the operation in smaller pieces. 176 177 // Lists a generic native types to use for Memset and Memmove operations. 178 // TODO: Inject the native types within Memset and Memmove depending on the 179 // target architectures and derive MaxSize from it. 180 using NativeTypeMap = SupportedTypes<uint8x64_t, // 181 uint8x32_t, // 182 uint8x16_t, 183 #if defined(LLVM_LIBC_HAS_UINT64) 184 uint64_t, // Not available on 32bit 185 #endif 186 uint32_t, // 187 uint16_t, // 188 uint8_t>; 189 190 namespace details { 191 192 // Helper to test if a type is void. 193 template <typename T> inline constexpr bool is_void_v = cpp::is_same_v<T, void>; 194 195 // In case the 'Size' is not supported we can fall back to a sequence of smaller 196 // operations using the largest natively supported type. 197 template <size_t Size, size_t MaxSize> static constexpr bool useArrayType() { 198 return (Size > MaxSize) && ((Size % MaxSize) == 0) && 199 !details::is_void_v<NativeTypeMap::TypeFor<MaxSize>>; 200 } 201 202 // Compute the type to handle an operation of 'Size' bytes knowing that the 203 // underlying platform only support native types up to MaxSize bytes. 204 template <size_t Size, size_t MaxSize> 205 using getTypeFor = cpp::conditional_t< 206 useArrayType<Size, MaxSize>(), 207 cpp::array<NativeTypeMap::TypeFor<MaxSize>, Size / MaxSize>, 208 NativeTypeMap::TypeFor<Size>>; 209 210 } // namespace details 211 212 /////////////////////////////////////////////////////////////////////////////// 213 // Memset 214 /////////////////////////////////////////////////////////////////////////////// 215 216 template <typename T, typename... TS> struct Memset { 217 static constexpr size_t SIZE = sum_sizeof<T, TS...>(); 218 219 LIBC_INLINE static void block(Ptr dst, uint8_t value) { 220 static_assert(is_element_type_v<T>); 221 if constexpr (is_scalar_v<T> || is_vector_v<T>) { 222 store<T>(dst, splat<T>(value)); 223 } else if constexpr (is_array_v<T>) { 224 using value_type = typename T::value_type; 225 const auto Splat = splat<value_type>(value); 226 for (size_t I = 0; I < array_size_v<T>; ++I) 227 store<value_type>(dst + (I * sizeof(value_type)), Splat); 228 } 229 if constexpr (sizeof...(TS)) 230 Memset<TS...>::block(dst + sizeof(T), value); 231 } 232 233 LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) { 234 block(dst + count - SIZE, value); 235 } 236 237 LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) { 238 block(dst, value); 239 tail(dst, value, count); 240 } 241 242 LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) { 243 static_assert(SIZE > 1, "a loop of size 1 does not need tail"); 244 size_t offset = 0; 245 do { 246 block(dst + offset, value); 247 offset += SIZE; 248 } while (offset < count - SIZE); 249 tail(dst, value, count); 250 } 251 }; 252 253 /////////////////////////////////////////////////////////////////////////////// 254 // Memmove 255 /////////////////////////////////////////////////////////////////////////////// 256 257 template <typename T> struct Memmove { 258 static constexpr size_t SIZE = sum_sizeof<T>(); 259 260 LIBC_INLINE static void block(Ptr dst, CPtr src) { 261 store<T>(dst, load<T>(src)); 262 } 263 264 LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) { 265 const size_t offset = count - SIZE; 266 // The load and store operations can be performed in any order as long as 267 // they are not interleaved. More investigations are needed to determine 268 // the best order. 269 const auto head = load<T>(src); 270 const auto tail = load<T>(src + offset); 271 store<T>(dst, head); 272 store<T>(dst + offset, tail); 273 } 274 275 // Align forward suitable when dst < src. The alignment is performed with 276 // an HeadTail operation of count ∈ [Alignment, 2 x Alignment]. 277 // 278 // e.g. Moving two bytes forward, we make sure src is aligned. 279 // [ | | | | ] 280 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] 281 // [____LLLLLLLL_____________________] 282 // [___________LLLLLLLA______________] 283 // [_SSSSSSSS________________________] 284 // [________SSSSSSSS_________________] 285 // 286 // e.g. Moving two bytes forward, we make sure dst is aligned. 287 // [ | | | | ] 288 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] 289 // [____LLLLLLLL_____________________] 290 // [______LLLLLLLL___________________] 291 // [_SSSSSSSS________________________] 292 // [___SSSSSSSA______________________] 293 template <Arg AlignOn> 294 LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) { 295 Ptr prev_dst = dst; 296 CPtr prev_src = src; 297 size_t prev_count = count; 298 align_to_next_boundary<SIZE, AlignOn>(dst, src, count); 299 adjust(SIZE, dst, src, count); 300 head_tail(prev_dst, prev_src, prev_count - count); 301 } 302 303 // Align backward suitable when dst > src. The alignment is performed with 304 // an HeadTail operation of count ∈ [Alignment, 2 x Alignment]. 305 // 306 // e.g. Moving two bytes backward, we make sure src is aligned. 307 // [ | | | | ] 308 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] 309 // [ _________________ALLLLLLL_______] 310 // [ ___________________LLLLLLLL_____] 311 // [____________________SSSSSSSS_____] 312 // [______________________SSSSSSSS___] 313 // 314 // e.g. Moving two bytes backward, we make sure dst is aligned. 315 // [ | | | | ] 316 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] 317 // [ _______________LLLLLLLL_________] 318 // [ ___________________LLLLLLLL_____] 319 // [__________________ASSSSSSS_______] 320 // [______________________SSSSSSSS___] 321 template <Arg AlignOn> 322 LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) { 323 Ptr headtail_dst = dst + count; 324 CPtr headtail_src = src + count; 325 size_t headtail_size = 0; 326 align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src, 327 headtail_size); 328 adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size); 329 head_tail(headtail_dst, headtail_src, headtail_size); 330 count -= headtail_size; 331 } 332 333 // Move forward suitable when dst < src. We load the tail bytes before 334 // handling the loop. 335 // 336 // e.g. Moving two bytes 337 // [ | | | | |] 338 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 339 // [_________________________LLLLLLLL___] 340 // [___LLLLLLLL_________________________] 341 // [_SSSSSSSS___________________________] 342 // [___________LLLLLLLL_________________] 343 // [_________SSSSSSSS___________________] 344 // [___________________LLLLLLLL_________] 345 // [_________________SSSSSSSS___________] 346 // [_______________________SSSSSSSS_____] 347 LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src, 348 size_t count) { 349 static_assert(SIZE > 1, "a loop of size 1 does not need tail"); 350 const size_t tail_offset = count - SIZE; 351 const auto tail_value = load<T>(src + tail_offset); 352 size_t offset = 0; 353 LIBC_LOOP_NOUNROLL 354 do { 355 block(dst + offset, src + offset); 356 offset += SIZE; 357 } while (offset < count - SIZE); 358 store<T>(dst + tail_offset, tail_value); 359 } 360 361 // Move backward suitable when dst > src. We load the head bytes before 362 // handling the loop. 363 // 364 // e.g. Moving two bytes 365 // [ | | | | |] 366 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 367 // [___LLLLLLLL_________________________] 368 // [_________________________LLLLLLLL___] 369 // [___________________________SSSSSSSS_] 370 // [_________________LLLLLLLL___________] 371 // [___________________SSSSSSSS_________] 372 // [_________LLLLLLLL___________________] 373 // [___________SSSSSSSS_________________] 374 // [_____SSSSSSSS_______________________] 375 LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src, 376 size_t count) { 377 static_assert(SIZE > 1, "a loop of size 1 does not need tail"); 378 const auto head_value = load<T>(src); 379 ptrdiff_t offset = count - SIZE; 380 LIBC_LOOP_NOUNROLL 381 do { 382 block(dst + offset, src + offset); 383 offset -= SIZE; 384 } while (offset >= 0); 385 store<T>(dst, head_value); 386 } 387 }; 388 389 /////////////////////////////////////////////////////////////////////////////// 390 // Bcmp 391 /////////////////////////////////////////////////////////////////////////////// 392 template <size_t Size> struct Bcmp { 393 static constexpr size_t SIZE = Size; 394 static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) 395 ? sizeof(uint64_t) 396 : sizeof(uint32_t); 397 398 template <typename T> LIBC_INLINE static uint32_t load_xor(CPtr p1, CPtr p2) { 399 static_assert(sizeof(T) <= sizeof(uint32_t)); 400 return load<T>(p1) ^ load<T>(p2); 401 } 402 403 template <typename T> 404 LIBC_INLINE static uint32_t load_not_equal(CPtr p1, CPtr p2) { 405 return load<T>(p1) != load<T>(p2); 406 } 407 408 LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) { 409 if constexpr (Size == 1) { 410 return load_xor<uint8_t>(p1, p2); 411 } else if constexpr (Size == 2) { 412 return load_xor<uint16_t>(p1, p2); 413 } else if constexpr (Size == 4) { 414 return load_xor<uint32_t>(p1, p2); 415 } else if constexpr (Size == 8) { 416 return load_not_equal<uint64_t>(p1, p2); 417 } else if constexpr (details::useArrayType<Size, MaxSize>()) { 418 for (size_t offset = 0; offset < Size; offset += MaxSize) 419 if (auto value = Bcmp<MaxSize>::block(p1 + offset, p2 + offset)) 420 return value; 421 } else { 422 deferred_static_assert("Unimplemented Size"); 423 } 424 return BcmpReturnType::ZERO(); 425 } 426 427 LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { 428 return block(p1 + count - SIZE, p2 + count - SIZE); 429 } 430 431 LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) { 432 return block(p1, p2) | tail(p1, p2, count); 433 } 434 435 LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2, 436 size_t count) { 437 static_assert(Size > 1, "a loop of size 1 does not need tail"); 438 size_t offset = 0; 439 do { 440 if (auto value = block(p1 + offset, p2 + offset)) 441 return value; 442 offset += SIZE; 443 } while (offset < count - SIZE); 444 return tail(p1, p2, count); 445 } 446 }; 447 448 /////////////////////////////////////////////////////////////////////////////// 449 // Memcmp 450 /////////////////////////////////////////////////////////////////////////////// 451 template <size_t Size> struct Memcmp { 452 static constexpr size_t SIZE = Size; 453 static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) 454 ? sizeof(uint64_t) 455 : sizeof(uint32_t); 456 457 template <typename T> LIBC_INLINE static T load_be(CPtr ptr) { 458 return Endian::to_big_endian(load<T>(ptr)); 459 } 460 461 template <typename T> 462 LIBC_INLINE static MemcmpReturnType load_be_diff(CPtr p1, CPtr p2) { 463 return load_be<T>(p1) - load_be<T>(p2); 464 } 465 466 template <typename T> 467 LIBC_INLINE static MemcmpReturnType load_be_cmp(CPtr p1, CPtr p2) { 468 const auto la = load_be<T>(p1); 469 const auto lb = load_be<T>(p2); 470 return la > lb ? 1 : la < lb ? -1 : 0; 471 } 472 473 LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) { 474 if constexpr (Size == 1) { 475 return load_be_diff<uint8_t>(p1, p2); 476 } else if constexpr (Size == 2) { 477 return load_be_diff<uint16_t>(p1, p2); 478 } else if constexpr (Size == 4) { 479 return load_be_cmp<uint32_t>(p1, p2); 480 } else if constexpr (Size == 8) { 481 return load_be_cmp<uint64_t>(p1, p2); 482 } else if constexpr (details::useArrayType<Size, MaxSize>()) { 483 for (size_t offset = 0; offset < Size; offset += MaxSize) 484 if (Bcmp<MaxSize>::block(p1 + offset, p2 + offset)) 485 return Memcmp<MaxSize>::block(p1 + offset, p2 + offset); 486 return MemcmpReturnType::ZERO(); 487 } else if constexpr (Size == 3) { 488 if (auto value = Memcmp<2>::block(p1, p2)) 489 return value; 490 return Memcmp<1>::block(p1 + 2, p2 + 2); 491 } else { 492 deferred_static_assert("Unimplemented Size"); 493 } 494 } 495 496 LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { 497 return block(p1 + count - SIZE, p2 + count - SIZE); 498 } 499 500 LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2, 501 size_t count) { 502 if (auto value = block(p1, p2)) 503 return value; 504 return tail(p1, p2, count); 505 } 506 507 LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2, 508 size_t count) { 509 static_assert(Size > 1, "a loop of size 1 does not need tail"); 510 size_t offset = 0; 511 do { 512 if (auto value = block(p1 + offset, p2 + offset)) 513 return value; 514 offset += SIZE; 515 } while (offset < count - SIZE); 516 return tail(p1, p2, count); 517 } 518 }; 519 520 } // namespace __llvm_libc::generic 521 522 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H 523