1 //===-- Memset implementation for x86_64 ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H 9 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H 10 11 #include "src/__support/macros/attributes.h" // LIBC_INLINE 12 #include "src/__support/macros/config.h" 13 #include "src/string/memory_utils/op_generic.h" 14 #include "src/string/memory_utils/op_x86.h" 15 #include "src/string/memory_utils/utils.h" // Ptr, CPtr 16 17 #include <stddef.h> // size_t 18 19 namespace LIBC_NAMESPACE_DECL { 20 namespace x86 { 21 // Size of one cache line for software prefetching 22 LIBC_INLINE_VAR constexpr size_t K_ONE_CACHELINE_SIZE = 64; 23 LIBC_INLINE_VAR constexpr size_t K_TWO_CACHELINES_SIZE = 24 K_ONE_CACHELINE_SIZE * 2; 25 LIBC_INLINE_VAR constexpr size_t K_FIVE_CACHELINES_SIZE = 26 K_ONE_CACHELINE_SIZE * 5; 27 28 LIBC_INLINE_VAR constexpr bool K_USE_SOFTWARE_PREFETCHING_MEMSET = 29 LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING); 30 31 } // namespace x86 32 33 #if defined(__AVX512F__) 34 using uint128_t = generic_v128; 35 using uint256_t = generic_v256; 36 using uint512_t = generic_v512; 37 #elif defined(__AVX__) 38 using uint128_t = generic_v128; 39 using uint256_t = generic_v256; 40 using uint512_t = cpp::array<generic_v256, 2>; 41 #elif defined(__SSE2__) 42 using uint128_t = generic_v128; 43 using uint256_t = cpp::array<generic_v128, 2>; 44 using uint512_t = cpp::array<generic_v128, 4>; 45 #else 46 using uint128_t = cpp::array<uint64_t, 2>; 47 using uint256_t = cpp::array<uint64_t, 4>; 48 using uint512_t = cpp::array<uint64_t, 8>; 49 #endif 50 51 [[maybe_unused]] LIBC_INLINE static void 52 inline_memset_x86_gt64_sw_prefetching(Ptr dst, uint8_t value, size_t count) { 53 constexpr size_t PREFETCH_DISTANCE = x86::K_FIVE_CACHELINES_SIZE; 54 constexpr size_t PREFETCH_DEGREE = x86::K_TWO_CACHELINES_SIZE; 55 constexpr size_t SIZE = sizeof(uint256_t); 56 // Prefetch one cache line 57 prefetch_for_write(dst + x86::K_ONE_CACHELINE_SIZE); 58 if (count <= 128) 59 return generic::Memset<uint512_t>::head_tail(dst, value, count); 60 // Prefetch the second cache line 61 prefetch_for_write(dst + x86::K_TWO_CACHELINES_SIZE); 62 // Aligned loop 63 generic::Memset<uint256_t>::block(dst, value); 64 align_to_next_boundary<32>(dst, count); 65 if (count <= 192) { 66 return generic::Memset<uint256_t>::loop_and_tail(dst, value, count); 67 } else { 68 generic::MemsetSequence<uint512_t, uint256_t>::block(dst, value); 69 size_t offset = 96; 70 while (offset + PREFETCH_DEGREE + SIZE <= count) { 71 prefetch_for_write(dst + offset + PREFETCH_DISTANCE); 72 prefetch_for_write(dst + offset + PREFETCH_DISTANCE + 73 x86::K_ONE_CACHELINE_SIZE); 74 for (size_t i = 0; i < PREFETCH_DEGREE; i += SIZE, offset += SIZE) 75 generic::Memset<uint256_t>::block(dst + offset, value); 76 } 77 generic::Memset<uint256_t>::loop_and_tail_offset(dst, value, count, offset); 78 } 79 } 80 81 [[maybe_unused]] LIBC_INLINE static void 82 inline_memset_x86(Ptr dst, uint8_t value, size_t count) { 83 if (count == 0) 84 return; 85 if (count == 1) 86 return generic::Memset<uint8_t>::block(dst, value); 87 if (count == 2) 88 return generic::Memset<uint16_t>::block(dst, value); 89 if (count == 3) 90 return generic::MemsetSequence<uint16_t, uint8_t>::block(dst, value); 91 if (count <= 8) 92 return generic::Memset<uint32_t>::head_tail(dst, value, count); 93 if (count <= 16) 94 return generic::Memset<uint64_t>::head_tail(dst, value, count); 95 if (count <= 32) 96 return generic::Memset<uint128_t>::head_tail(dst, value, count); 97 if (count <= 64) 98 return generic::Memset<uint256_t>::head_tail(dst, value, count); 99 if constexpr (x86::K_USE_SOFTWARE_PREFETCHING_MEMSET) 100 return inline_memset_x86_gt64_sw_prefetching(dst, value, count); 101 if (count <= 128) 102 return generic::Memset<uint512_t>::head_tail(dst, value, count); 103 // Aligned loop 104 generic::Memset<uint256_t>::block(dst, value); 105 align_to_next_boundary<32>(dst, count); 106 return generic::Memset<uint256_t>::loop_and_tail(dst, value, count); 107 } 108 } // namespace LIBC_NAMESPACE_DECL 109 110 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H 111