11f578347SGuillaume Chatelet //===-- Memset implementation for x86_64 ------------------------*- C++ -*-===// 21f578347SGuillaume Chatelet // 31f578347SGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 41f578347SGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information. 51f578347SGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61f578347SGuillaume Chatelet // 71f578347SGuillaume Chatelet //===----------------------------------------------------------------------===// 8270547f3SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H 9270547f3SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H 101f578347SGuillaume Chatelet 111f578347SGuillaume Chatelet #include "src/__support/macros/attributes.h" // LIBC_INLINE 12*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 131f578347SGuillaume Chatelet #include "src/string/memory_utils/op_generic.h" 141f578347SGuillaume Chatelet #include "src/string/memory_utils/op_x86.h" 151f578347SGuillaume Chatelet #include "src/string/memory_utils/utils.h" // Ptr, CPtr 161f578347SGuillaume Chatelet 171f578347SGuillaume Chatelet #include <stddef.h> // size_t 181f578347SGuillaume Chatelet 19*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 203153aa4cSdoshimili namespace x86 { 213153aa4cSdoshimili // Size of one cache line for software prefetching 2288d82b74SNick Desaulniers LIBC_INLINE_VAR constexpr size_t K_ONE_CACHELINE_SIZE = 64; 2388d82b74SNick Desaulniers LIBC_INLINE_VAR constexpr size_t K_TWO_CACHELINES_SIZE = 2488d82b74SNick Desaulniers K_ONE_CACHELINE_SIZE * 2; 2588d82b74SNick Desaulniers LIBC_INLINE_VAR constexpr size_t K_FIVE_CACHELINES_SIZE = 2688d82b74SNick Desaulniers K_ONE_CACHELINE_SIZE * 5; 271f578347SGuillaume Chatelet 2888d82b74SNick Desaulniers LIBC_INLINE_VAR constexpr bool K_USE_SOFTWARE_PREFETCHING_MEMSET = 293153aa4cSdoshimili LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING); 303153aa4cSdoshimili 313153aa4cSdoshimili } // namespace x86 323153aa4cSdoshimili 331f578347SGuillaume Chatelet #if defined(__AVX512F__) 341f578347SGuillaume Chatelet using uint128_t = generic_v128; 351f578347SGuillaume Chatelet using uint256_t = generic_v256; 361f578347SGuillaume Chatelet using uint512_t = generic_v512; 371f578347SGuillaume Chatelet #elif defined(__AVX__) 381f578347SGuillaume Chatelet using uint128_t = generic_v128; 391f578347SGuillaume Chatelet using uint256_t = generic_v256; 401f578347SGuillaume Chatelet using uint512_t = cpp::array<generic_v256, 2>; 411f578347SGuillaume Chatelet #elif defined(__SSE2__) 421f578347SGuillaume Chatelet using uint128_t = generic_v128; 431f578347SGuillaume Chatelet using uint256_t = cpp::array<generic_v128, 2>; 441f578347SGuillaume Chatelet using uint512_t = cpp::array<generic_v128, 4>; 451f578347SGuillaume Chatelet #else 461f578347SGuillaume Chatelet using uint128_t = cpp::array<uint64_t, 2>; 471f578347SGuillaume Chatelet using uint256_t = cpp::array<uint64_t, 4>; 481f578347SGuillaume Chatelet using uint512_t = cpp::array<uint64_t, 8>; 491f578347SGuillaume Chatelet #endif 501f578347SGuillaume Chatelet 513153aa4cSdoshimili [[maybe_unused]] LIBC_INLINE static void 523153aa4cSdoshimili inline_memset_x86_gt64_sw_prefetching(Ptr dst, uint8_t value, size_t count) { 5388d82b74SNick Desaulniers constexpr size_t PREFETCH_DISTANCE = x86::K_FIVE_CACHELINES_SIZE; 5488d82b74SNick Desaulniers constexpr size_t PREFETCH_DEGREE = x86::K_TWO_CACHELINES_SIZE; 553153aa4cSdoshimili constexpr size_t SIZE = sizeof(uint256_t); 563153aa4cSdoshimili // Prefetch one cache line 5788d82b74SNick Desaulniers prefetch_for_write(dst + x86::K_ONE_CACHELINE_SIZE); 583153aa4cSdoshimili if (count <= 128) 593153aa4cSdoshimili return generic::Memset<uint512_t>::head_tail(dst, value, count); 603153aa4cSdoshimili // Prefetch the second cache line 6188d82b74SNick Desaulniers prefetch_for_write(dst + x86::K_TWO_CACHELINES_SIZE); 623153aa4cSdoshimili // Aligned loop 633153aa4cSdoshimili generic::Memset<uint256_t>::block(dst, value); 643153aa4cSdoshimili align_to_next_boundary<32>(dst, count); 653153aa4cSdoshimili if (count <= 192) { 663153aa4cSdoshimili return generic::Memset<uint256_t>::loop_and_tail(dst, value, count); 673153aa4cSdoshimili } else { 683153aa4cSdoshimili generic::MemsetSequence<uint512_t, uint256_t>::block(dst, value); 693153aa4cSdoshimili size_t offset = 96; 703153aa4cSdoshimili while (offset + PREFETCH_DEGREE + SIZE <= count) { 713153aa4cSdoshimili prefetch_for_write(dst + offset + PREFETCH_DISTANCE); 723153aa4cSdoshimili prefetch_for_write(dst + offset + PREFETCH_DISTANCE + 7388d82b74SNick Desaulniers x86::K_ONE_CACHELINE_SIZE); 743153aa4cSdoshimili for (size_t i = 0; i < PREFETCH_DEGREE; i += SIZE, offset += SIZE) 753153aa4cSdoshimili generic::Memset<uint256_t>::block(dst + offset, value); 763153aa4cSdoshimili } 773153aa4cSdoshimili generic::Memset<uint256_t>::loop_and_tail_offset(dst, value, count, offset); 783153aa4cSdoshimili } 793153aa4cSdoshimili } 803153aa4cSdoshimili 813153aa4cSdoshimili [[maybe_unused]] LIBC_INLINE static void 823153aa4cSdoshimili inline_memset_x86(Ptr dst, uint8_t value, size_t count) { 831f578347SGuillaume Chatelet if (count == 0) 841f578347SGuillaume Chatelet return; 851f578347SGuillaume Chatelet if (count == 1) 861f578347SGuillaume Chatelet return generic::Memset<uint8_t>::block(dst, value); 871f578347SGuillaume Chatelet if (count == 2) 881f578347SGuillaume Chatelet return generic::Memset<uint16_t>::block(dst, value); 891f578347SGuillaume Chatelet if (count == 3) 901f578347SGuillaume Chatelet return generic::MemsetSequence<uint16_t, uint8_t>::block(dst, value); 911f578347SGuillaume Chatelet if (count <= 8) 921f578347SGuillaume Chatelet return generic::Memset<uint32_t>::head_tail(dst, value, count); 931f578347SGuillaume Chatelet if (count <= 16) 941f578347SGuillaume Chatelet return generic::Memset<uint64_t>::head_tail(dst, value, count); 951f578347SGuillaume Chatelet if (count <= 32) 961f578347SGuillaume Chatelet return generic::Memset<uint128_t>::head_tail(dst, value, count); 971f578347SGuillaume Chatelet if (count <= 64) 981f578347SGuillaume Chatelet return generic::Memset<uint256_t>::head_tail(dst, value, count); 9988d82b74SNick Desaulniers if constexpr (x86::K_USE_SOFTWARE_PREFETCHING_MEMSET) 1003153aa4cSdoshimili return inline_memset_x86_gt64_sw_prefetching(dst, value, count); 1011f578347SGuillaume Chatelet if (count <= 128) 1021f578347SGuillaume Chatelet return generic::Memset<uint512_t>::head_tail(dst, value, count); 1031f578347SGuillaume Chatelet // Aligned loop 1041f578347SGuillaume Chatelet generic::Memset<uint256_t>::block(dst, value); 1051f578347SGuillaume Chatelet align_to_next_boundary<32>(dst, count); 1061f578347SGuillaume Chatelet return generic::Memset<uint256_t>::loop_and_tail(dst, value, count); 1071f578347SGuillaume Chatelet } 108*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 1091f578347SGuillaume Chatelet 110270547f3SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H 111