xref: /netbsd-src/external/bsd/zstd/dist/lib/common/bits.h (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos #ifndef ZSTD_BITS_H
12*3117ece4Schristos #define ZSTD_BITS_H
13*3117ece4Schristos 
14*3117ece4Schristos #include "mem.h"
15*3117ece4Schristos 
16*3117ece4Schristos MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
17*3117ece4Schristos {
18*3117ece4Schristos     assert(val != 0);
19*3117ece4Schristos     {
20*3117ece4Schristos         static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
21*3117ece4Schristos                                                 30, 22, 20, 15, 25, 17, 4, 8,
22*3117ece4Schristos                                                 31, 27, 13, 23, 21, 19, 16, 7,
23*3117ece4Schristos                                                 26, 12, 18, 6, 11, 5, 10, 9};
24*3117ece4Schristos         return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
25*3117ece4Schristos     }
26*3117ece4Schristos }
27*3117ece4Schristos 
28*3117ece4Schristos MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
29*3117ece4Schristos {
30*3117ece4Schristos     assert(val != 0);
31*3117ece4Schristos #   if defined(_MSC_VER)
32*3117ece4Schristos #       if STATIC_BMI2 == 1
33*3117ece4Schristos             return (unsigned)_tzcnt_u32(val);
34*3117ece4Schristos #       else
35*3117ece4Schristos             if (val != 0) {
36*3117ece4Schristos                 unsigned long r;
37*3117ece4Schristos                 _BitScanForward(&r, val);
38*3117ece4Schristos                 return (unsigned)r;
39*3117ece4Schristos             } else {
40*3117ece4Schristos                 /* Should not reach this code path */
41*3117ece4Schristos                 __assume(0);
42*3117ece4Schristos             }
43*3117ece4Schristos #       endif
44*3117ece4Schristos #   elif defined(__GNUC__) && (__GNUC__ >= 4)
45*3117ece4Schristos         return (unsigned)__builtin_ctz(val);
46*3117ece4Schristos #   else
47*3117ece4Schristos         return ZSTD_countTrailingZeros32_fallback(val);
48*3117ece4Schristos #   endif
49*3117ece4Schristos }
50*3117ece4Schristos 
51*3117ece4Schristos MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
52*3117ece4Schristos     assert(val != 0);
53*3117ece4Schristos     {
54*3117ece4Schristos         static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
55*3117ece4Schristos                                             11, 14, 16, 18, 22, 25, 3, 30,
56*3117ece4Schristos                                             8, 12, 20, 28, 15, 17, 24, 7,
57*3117ece4Schristos                                             19, 27, 23, 6, 26, 5, 4, 31};
58*3117ece4Schristos         val |= val >> 1;
59*3117ece4Schristos         val |= val >> 2;
60*3117ece4Schristos         val |= val >> 4;
61*3117ece4Schristos         val |= val >> 8;
62*3117ece4Schristos         val |= val >> 16;
63*3117ece4Schristos         return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
64*3117ece4Schristos     }
65*3117ece4Schristos }
66*3117ece4Schristos 
67*3117ece4Schristos MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
68*3117ece4Schristos {
69*3117ece4Schristos     assert(val != 0);
70*3117ece4Schristos #   if defined(_MSC_VER)
71*3117ece4Schristos #       if STATIC_BMI2 == 1
72*3117ece4Schristos             return (unsigned)_lzcnt_u32(val);
73*3117ece4Schristos #       else
74*3117ece4Schristos             if (val != 0) {
75*3117ece4Schristos                 unsigned long r;
76*3117ece4Schristos                 _BitScanReverse(&r, val);
77*3117ece4Schristos                 return (unsigned)(31 - r);
78*3117ece4Schristos             } else {
79*3117ece4Schristos                 /* Should not reach this code path */
80*3117ece4Schristos                 __assume(0);
81*3117ece4Schristos             }
82*3117ece4Schristos #       endif
83*3117ece4Schristos #   elif defined(__GNUC__) && (__GNUC__ >= 4)
84*3117ece4Schristos         return (unsigned)__builtin_clz(val);
85*3117ece4Schristos #   else
86*3117ece4Schristos         return ZSTD_countLeadingZeros32_fallback(val);
87*3117ece4Schristos #   endif
88*3117ece4Schristos }
89*3117ece4Schristos 
90*3117ece4Schristos MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
91*3117ece4Schristos {
92*3117ece4Schristos     assert(val != 0);
93*3117ece4Schristos #   if defined(_MSC_VER) && defined(_WIN64)
94*3117ece4Schristos #       if STATIC_BMI2 == 1
95*3117ece4Schristos             return (unsigned)_tzcnt_u64(val);
96*3117ece4Schristos #       else
97*3117ece4Schristos             if (val != 0) {
98*3117ece4Schristos                 unsigned long r;
99*3117ece4Schristos                 _BitScanForward64(&r, val);
100*3117ece4Schristos                 return (unsigned)r;
101*3117ece4Schristos             } else {
102*3117ece4Schristos                 /* Should not reach this code path */
103*3117ece4Schristos                 __assume(0);
104*3117ece4Schristos             }
105*3117ece4Schristos #       endif
106*3117ece4Schristos #   elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
107*3117ece4Schristos         return (unsigned)__builtin_ctzll(val);
108*3117ece4Schristos #   else
109*3117ece4Schristos         {
110*3117ece4Schristos             U32 mostSignificantWord = (U32)(val >> 32);
111*3117ece4Schristos             U32 leastSignificantWord = (U32)val;
112*3117ece4Schristos             if (leastSignificantWord == 0) {
113*3117ece4Schristos                 return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
114*3117ece4Schristos             } else {
115*3117ece4Schristos                 return ZSTD_countTrailingZeros32(leastSignificantWord);
116*3117ece4Schristos             }
117*3117ece4Schristos         }
118*3117ece4Schristos #   endif
119*3117ece4Schristos }
120*3117ece4Schristos 
121*3117ece4Schristos MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
122*3117ece4Schristos {
123*3117ece4Schristos     assert(val != 0);
124*3117ece4Schristos #   if defined(_MSC_VER) && defined(_WIN64)
125*3117ece4Schristos #       if STATIC_BMI2 == 1
126*3117ece4Schristos             return (unsigned)_lzcnt_u64(val);
127*3117ece4Schristos #       else
128*3117ece4Schristos             if (val != 0) {
129*3117ece4Schristos                 unsigned long r;
130*3117ece4Schristos                 _BitScanReverse64(&r, val);
131*3117ece4Schristos                 return (unsigned)(63 - r);
132*3117ece4Schristos             } else {
133*3117ece4Schristos                 /* Should not reach this code path */
134*3117ece4Schristos                 __assume(0);
135*3117ece4Schristos             }
136*3117ece4Schristos #       endif
137*3117ece4Schristos #   elif defined(__GNUC__) && (__GNUC__ >= 4)
138*3117ece4Schristos         return (unsigned)(__builtin_clzll(val));
139*3117ece4Schristos #   else
140*3117ece4Schristos         {
141*3117ece4Schristos             U32 mostSignificantWord = (U32)(val >> 32);
142*3117ece4Schristos             U32 leastSignificantWord = (U32)val;
143*3117ece4Schristos             if (mostSignificantWord == 0) {
144*3117ece4Schristos                 return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
145*3117ece4Schristos             } else {
146*3117ece4Schristos                 return ZSTD_countLeadingZeros32(mostSignificantWord);
147*3117ece4Schristos             }
148*3117ece4Schristos         }
149*3117ece4Schristos #   endif
150*3117ece4Schristos }
151*3117ece4Schristos 
152*3117ece4Schristos MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
153*3117ece4Schristos {
154*3117ece4Schristos     if (MEM_isLittleEndian()) {
155*3117ece4Schristos         if (MEM_64bits()) {
156*3117ece4Schristos             return ZSTD_countTrailingZeros64((U64)val) >> 3;
157*3117ece4Schristos         } else {
158*3117ece4Schristos             return ZSTD_countTrailingZeros32((U32)val) >> 3;
159*3117ece4Schristos         }
160*3117ece4Schristos     } else {  /* Big Endian CPU */
161*3117ece4Schristos         if (MEM_64bits()) {
162*3117ece4Schristos             return ZSTD_countLeadingZeros64((U64)val) >> 3;
163*3117ece4Schristos         } else {
164*3117ece4Schristos             return ZSTD_countLeadingZeros32((U32)val) >> 3;
165*3117ece4Schristos         }
166*3117ece4Schristos     }
167*3117ece4Schristos }
168*3117ece4Schristos 
169*3117ece4Schristos MEM_STATIC unsigned ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
170*3117ece4Schristos {
171*3117ece4Schristos     assert(val != 0);
172*3117ece4Schristos     return 31 - ZSTD_countLeadingZeros32(val);
173*3117ece4Schristos }
174*3117ece4Schristos 
175*3117ece4Schristos /* ZSTD_rotateRight_*():
176*3117ece4Schristos  * Rotates a bitfield to the right by "count" bits.
177*3117ece4Schristos  * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
178*3117ece4Schristos  */
179*3117ece4Schristos MEM_STATIC
180*3117ece4Schristos U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
181*3117ece4Schristos     assert(count < 64);
182*3117ece4Schristos     count &= 0x3F; /* for fickle pattern recognition */
183*3117ece4Schristos     return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
184*3117ece4Schristos }
185*3117ece4Schristos 
186*3117ece4Schristos MEM_STATIC
187*3117ece4Schristos U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
188*3117ece4Schristos     assert(count < 32);
189*3117ece4Schristos     count &= 0x1F; /* for fickle pattern recognition */
190*3117ece4Schristos     return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
191*3117ece4Schristos }
192*3117ece4Schristos 
193*3117ece4Schristos MEM_STATIC
194*3117ece4Schristos U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
195*3117ece4Schristos     assert(count < 16);
196*3117ece4Schristos     count &= 0x0F; /* for fickle pattern recognition */
197*3117ece4Schristos     return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
198*3117ece4Schristos }
199*3117ece4Schristos 
200*3117ece4Schristos #endif /* ZSTD_BITS_H */
201