1 // SPDX-License-Identifier: 0BSD
2
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file crc32_arm64.h
6 /// \brief CRC32 calculation with ARM64 optimization
7 //
8 // Authors: Chenxi Mao
9 // Jia Tan
10 // Hans Jansen
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13
14 #ifndef LZMA_CRC32_ARM64_H
15 #define LZMA_CRC32_ARM64_H
16
17 // MSVC always has the CRC intrinsics available when building for ARM64
18 // there is no need to include any header files.
19 #ifndef _MSC_VER
20 # include <arm_acle.h>
21 #endif
22
23 // If both versions are going to be built, we need runtime detection
24 // to check if the instructions are supported.
25 #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
26 # if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
27 # include <sys/auxv.h>
28 # elif defined(_WIN32)
29 # include <processthreadsapi.h>
30 # elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)
31 # include <sys/sysctl.h>
32 # endif
33 #endif
34
35 // Some EDG-based compilers support ARM64 and define __GNUC__
36 // (such as Nvidia's nvcc), but do not support function attributes.
37 //
38 // NOTE: Build systems check for this too, keep them in sync with this.
39 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)
40 # define crc_attr_target __attribute__((__target__("+crc")))
41 #else
42 # define crc_attr_target
43 #endif
44
45
46 crc_attr_target
47 static uint32_t
crc32_arch_optimized(const uint8_t * buf,size_t size,uint32_t crc)48 crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
49 {
50 crc = ~crc;
51
52 // Align the input buffer because this was shown to be
53 // significantly faster than unaligned accesses.
54 const size_t align_amount = my_min(size, (0U - (uintptr_t)buf) & 7);
55
56 for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf)
57 crc = __crc32b(crc, *buf);
58
59 size -= align_amount;
60
61 // Process 8 bytes at a time. The end point is determined by
62 // ignoring the least significant three bits of size to ensure
63 // we do not process past the bounds of the buffer. This guarantees
64 // that limit is a multiple of 8 and is strictly less than size.
65 for (const uint8_t *limit = buf + (size & ~(size_t)7);
66 buf < limit; buf += 8)
67 crc = __crc32d(crc, aligned_read64le(buf));
68
69 // Process the remaining bytes that are not 8 byte aligned.
70 for (const uint8_t *limit = buf + (size & 7); buf < limit; ++buf)
71 crc = __crc32b(crc, *buf);
72
73 return ~crc;
74 }
75
76
77 #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
78 static inline bool
is_arch_extension_supported(void)79 is_arch_extension_supported(void)
80 {
81 #if defined(HAVE_GETAUXVAL)
82 return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0;
83
84 #elif defined(HAVE_ELF_AUX_INFO)
85 unsigned long feature_flags;
86
87 if (elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags)) != 0)
88 return false;
89
90 return (feature_flags & HWCAP_CRC32) != 0;
91
92 #elif defined(_WIN32)
93 return IsProcessorFeaturePresent(
94 PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
95
96 #elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)
97 int has_crc32 = 0;
98 size_t size = sizeof(has_crc32);
99
100 // The sysctlbyname() function requires a string identifier for the
101 // CPU feature it tests. The Apple documentation lists the string
102 // "hw.optional.armv8_crc32", which can be found here:
103 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619
104 if (sysctlbyname("hw.optional.armv8_crc32", &has_crc32,
105 &size, NULL, 0) != 0)
106 return false;
107
108 return has_crc32;
109
110 #else
111 // If a runtime detection method cannot be found, then this must
112 // be a compile time error. The checks in crc_common.h should ensure
113 // a runtime detection method is always found if this function is
114 // built. It would be possible to just return false here, but this
115 // is inefficient for binary size and runtime since only the generic
116 // method could ever be used.
117 # error Runtime detection method unavailable.
118 #endif
119 }
120 #endif
121
122 #endif // LZMA_CRC32_ARM64_H
123