1 //===-- Bcmp implementation for x86_64 --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H 9 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H 10 11 #include "src/__support/macros/attributes.h" // LIBC_INLINE 12 #include "src/__support/macros/config.h" 13 #include "src/string/memory_utils/op_generic.h" 14 #include "src/string/memory_utils/op_x86.h" 15 #include "src/string/memory_utils/utils.h" // Ptr, CPtr 16 17 #include <stddef.h> // size_t 18 19 namespace LIBC_NAMESPACE_DECL { 20 21 [[maybe_unused]] LIBC_INLINE BcmpReturnType 22 inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) { 23 return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count); 24 } 25 26 #if defined(__SSE4_1__) 27 [[maybe_unused]] LIBC_INLINE BcmpReturnType 28 inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) { 29 if (count <= 32) 30 return generic::branchless_head_tail_neq<__m128i>(p1, p2, count); 31 return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count); 32 } 33 #endif // __SSE4_1__ 34 35 #if defined(__AVX__) 36 [[maybe_unused]] LIBC_INLINE BcmpReturnType 37 inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) { 38 if (count <= 32) 39 return generic::branchless_head_tail_neq<__m128i>(p1, p2, count); 40 if (count <= 64) 41 return generic::branchless_head_tail_neq<__m256i>(p1, p2, count); 42 return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count); 43 } 44 #endif // __AVX__ 45 46 #if defined(__AVX512BW__) 47 [[maybe_unused]] LIBC_INLINE BcmpReturnType 48 inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) { 49 if (count <= 32) 50 return generic::branchless_head_tail_neq<__m128i>(p1, p2, count); 51 if (count <= 64) 52 return generic::branchless_head_tail_neq<__m256i>(p1, p2, count); 53 if (count <= 128) 54 return generic::branchless_head_tail_neq<__m512i>(p1, p2, count); 55 return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count); 56 } 57 #endif // __AVX512BW__ 58 59 [[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2, 60 size_t count) { 61 if (count == 0) 62 return BcmpReturnType::zero(); 63 if (count == 1) 64 return generic::Bcmp<uint8_t>::block(p1, p2); 65 if (count <= 4) 66 return generic::branchless_head_tail_neq<uint16_t>(p1, p2, count); 67 if (count <= 8) 68 return generic::branchless_head_tail_neq<uint32_t>(p1, p2, count); 69 if (count <= 16) 70 return generic::branchless_head_tail_neq<uint64_t>(p1, p2, count); 71 #if defined(__AVX512BW__) 72 return inline_bcmp_x86_avx512bw_gt16(p1, p2, count); 73 #elif defined(__AVX__) 74 return inline_bcmp_x86_avx_gt16(p1, p2, count); 75 #elif defined(__SSE4_1__) 76 return inline_bcmp_x86_sse41_gt16(p1, p2, count); 77 #else 78 return inline_bcmp_generic_gt16(p1, p2, count); 79 #endif 80 } 81 82 } // namespace LIBC_NAMESPACE_DECL 83 84 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H 85