xref: /llvm-project/libc/src/string/memory_utils/x86_64/inline_memset.h (revision 5ff3ff33ff930e4ec49da7910612d8a41eb068cb)
11f578347SGuillaume Chatelet //===-- Memset implementation for x86_64 ------------------------*- C++ -*-===//
21f578347SGuillaume Chatelet //
31f578347SGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
41f578347SGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information.
51f578347SGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61f578347SGuillaume Chatelet //
71f578347SGuillaume Chatelet //===----------------------------------------------------------------------===//
8270547f3SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
9270547f3SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
101f578347SGuillaume Chatelet 
111f578347SGuillaume Chatelet #include "src/__support/macros/attributes.h" // LIBC_INLINE
12*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
131f578347SGuillaume Chatelet #include "src/string/memory_utils/op_generic.h"
141f578347SGuillaume Chatelet #include "src/string/memory_utils/op_x86.h"
151f578347SGuillaume Chatelet #include "src/string/memory_utils/utils.h" // Ptr, CPtr
161f578347SGuillaume Chatelet 
171f578347SGuillaume Chatelet #include <stddef.h> // size_t
181f578347SGuillaume Chatelet 
19*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
203153aa4cSdoshimili namespace x86 {
213153aa4cSdoshimili // Size of one cache line for software prefetching
2288d82b74SNick Desaulniers LIBC_INLINE_VAR constexpr size_t K_ONE_CACHELINE_SIZE = 64;
2388d82b74SNick Desaulniers LIBC_INLINE_VAR constexpr size_t K_TWO_CACHELINES_SIZE =
2488d82b74SNick Desaulniers     K_ONE_CACHELINE_SIZE * 2;
2588d82b74SNick Desaulniers LIBC_INLINE_VAR constexpr size_t K_FIVE_CACHELINES_SIZE =
2688d82b74SNick Desaulniers     K_ONE_CACHELINE_SIZE * 5;
271f578347SGuillaume Chatelet 
2888d82b74SNick Desaulniers LIBC_INLINE_VAR constexpr bool K_USE_SOFTWARE_PREFETCHING_MEMSET =
293153aa4cSdoshimili     LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING);
303153aa4cSdoshimili 
313153aa4cSdoshimili } // namespace x86
323153aa4cSdoshimili 
331f578347SGuillaume Chatelet #if defined(__AVX512F__)
341f578347SGuillaume Chatelet using uint128_t = generic_v128;
351f578347SGuillaume Chatelet using uint256_t = generic_v256;
361f578347SGuillaume Chatelet using uint512_t = generic_v512;
371f578347SGuillaume Chatelet #elif defined(__AVX__)
381f578347SGuillaume Chatelet using uint128_t = generic_v128;
391f578347SGuillaume Chatelet using uint256_t = generic_v256;
401f578347SGuillaume Chatelet using uint512_t = cpp::array<generic_v256, 2>;
411f578347SGuillaume Chatelet #elif defined(__SSE2__)
421f578347SGuillaume Chatelet using uint128_t = generic_v128;
431f578347SGuillaume Chatelet using uint256_t = cpp::array<generic_v128, 2>;
441f578347SGuillaume Chatelet using uint512_t = cpp::array<generic_v128, 4>;
451f578347SGuillaume Chatelet #else
461f578347SGuillaume Chatelet using uint128_t = cpp::array<uint64_t, 2>;
471f578347SGuillaume Chatelet using uint256_t = cpp::array<uint64_t, 4>;
481f578347SGuillaume Chatelet using uint512_t = cpp::array<uint64_t, 8>;
491f578347SGuillaume Chatelet #endif
501f578347SGuillaume Chatelet 
513153aa4cSdoshimili [[maybe_unused]] LIBC_INLINE static void
523153aa4cSdoshimili inline_memset_x86_gt64_sw_prefetching(Ptr dst, uint8_t value, size_t count) {
5388d82b74SNick Desaulniers   constexpr size_t PREFETCH_DISTANCE = x86::K_FIVE_CACHELINES_SIZE;
5488d82b74SNick Desaulniers   constexpr size_t PREFETCH_DEGREE = x86::K_TWO_CACHELINES_SIZE;
553153aa4cSdoshimili   constexpr size_t SIZE = sizeof(uint256_t);
563153aa4cSdoshimili   // Prefetch one cache line
5788d82b74SNick Desaulniers   prefetch_for_write(dst + x86::K_ONE_CACHELINE_SIZE);
583153aa4cSdoshimili   if (count <= 128)
593153aa4cSdoshimili     return generic::Memset<uint512_t>::head_tail(dst, value, count);
603153aa4cSdoshimili   // Prefetch the second cache line
6188d82b74SNick Desaulniers   prefetch_for_write(dst + x86::K_TWO_CACHELINES_SIZE);
623153aa4cSdoshimili   // Aligned loop
633153aa4cSdoshimili   generic::Memset<uint256_t>::block(dst, value);
643153aa4cSdoshimili   align_to_next_boundary<32>(dst, count);
653153aa4cSdoshimili   if (count <= 192) {
663153aa4cSdoshimili     return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
673153aa4cSdoshimili   } else {
683153aa4cSdoshimili     generic::MemsetSequence<uint512_t, uint256_t>::block(dst, value);
693153aa4cSdoshimili     size_t offset = 96;
703153aa4cSdoshimili     while (offset + PREFETCH_DEGREE + SIZE <= count) {
713153aa4cSdoshimili       prefetch_for_write(dst + offset + PREFETCH_DISTANCE);
723153aa4cSdoshimili       prefetch_for_write(dst + offset + PREFETCH_DISTANCE +
7388d82b74SNick Desaulniers                          x86::K_ONE_CACHELINE_SIZE);
743153aa4cSdoshimili       for (size_t i = 0; i < PREFETCH_DEGREE; i += SIZE, offset += SIZE)
753153aa4cSdoshimili         generic::Memset<uint256_t>::block(dst + offset, value);
763153aa4cSdoshimili     }
773153aa4cSdoshimili     generic::Memset<uint256_t>::loop_and_tail_offset(dst, value, count, offset);
783153aa4cSdoshimili   }
793153aa4cSdoshimili }
803153aa4cSdoshimili 
813153aa4cSdoshimili [[maybe_unused]] LIBC_INLINE static void
823153aa4cSdoshimili inline_memset_x86(Ptr dst, uint8_t value, size_t count) {
831f578347SGuillaume Chatelet   if (count == 0)
841f578347SGuillaume Chatelet     return;
851f578347SGuillaume Chatelet   if (count == 1)
861f578347SGuillaume Chatelet     return generic::Memset<uint8_t>::block(dst, value);
871f578347SGuillaume Chatelet   if (count == 2)
881f578347SGuillaume Chatelet     return generic::Memset<uint16_t>::block(dst, value);
891f578347SGuillaume Chatelet   if (count == 3)
901f578347SGuillaume Chatelet     return generic::MemsetSequence<uint16_t, uint8_t>::block(dst, value);
911f578347SGuillaume Chatelet   if (count <= 8)
921f578347SGuillaume Chatelet     return generic::Memset<uint32_t>::head_tail(dst, value, count);
931f578347SGuillaume Chatelet   if (count <= 16)
941f578347SGuillaume Chatelet     return generic::Memset<uint64_t>::head_tail(dst, value, count);
951f578347SGuillaume Chatelet   if (count <= 32)
961f578347SGuillaume Chatelet     return generic::Memset<uint128_t>::head_tail(dst, value, count);
971f578347SGuillaume Chatelet   if (count <= 64)
981f578347SGuillaume Chatelet     return generic::Memset<uint256_t>::head_tail(dst, value, count);
9988d82b74SNick Desaulniers   if constexpr (x86::K_USE_SOFTWARE_PREFETCHING_MEMSET)
1003153aa4cSdoshimili     return inline_memset_x86_gt64_sw_prefetching(dst, value, count);
1011f578347SGuillaume Chatelet   if (count <= 128)
1021f578347SGuillaume Chatelet     return generic::Memset<uint512_t>::head_tail(dst, value, count);
1031f578347SGuillaume Chatelet   // Aligned loop
1041f578347SGuillaume Chatelet   generic::Memset<uint256_t>::block(dst, value);
1051f578347SGuillaume Chatelet   align_to_next_boundary<32>(dst, count);
1061f578347SGuillaume Chatelet   return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
1071f578347SGuillaume Chatelet }
108*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
1091f578347SGuillaume Chatelet 
110270547f3SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
111