xref: /netbsd-src/external/bsd/zstd/dist/lib/common/bitstream.h (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /* ******************************************************************
2*3117ece4Schristos  * bitstream
3*3117ece4Schristos  * Part of FSE library
4*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
5*3117ece4Schristos  *
6*3117ece4Schristos  * You can contact the author at :
7*3117ece4Schristos  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8*3117ece4Schristos  *
9*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
10*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
12*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
13*3117ece4Schristos ****************************************************************** */
14*3117ece4Schristos #ifndef BITSTREAM_H_MODULE
15*3117ece4Schristos #define BITSTREAM_H_MODULE
16*3117ece4Schristos 
17*3117ece4Schristos #if defined (__cplusplus)
18*3117ece4Schristos extern "C" {
19*3117ece4Schristos #endif
20*3117ece4Schristos /*
21*3117ece4Schristos *  This API consists of small unitary functions, which must be inlined for best performance.
22*3117ece4Schristos *  Since link-time-optimization is not available for all compilers,
23*3117ece4Schristos *  these functions are defined into a .h to be included.
24*3117ece4Schristos */
25*3117ece4Schristos 
26*3117ece4Schristos /*-****************************************
27*3117ece4Schristos *  Dependencies
28*3117ece4Schristos ******************************************/
29*3117ece4Schristos #include "mem.h"            /* unaligned access routines */
30*3117ece4Schristos #include "compiler.h"       /* UNLIKELY() */
31*3117ece4Schristos #include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
32*3117ece4Schristos #include "error_private.h"  /* error codes and messages */
33*3117ece4Schristos #include "bits.h"           /* ZSTD_highbit32 */
34*3117ece4Schristos 
35*3117ece4Schristos 
36*3117ece4Schristos /*=========================================
37*3117ece4Schristos *  Target specific
38*3117ece4Schristos =========================================*/
39*3117ece4Schristos #ifndef ZSTD_NO_INTRINSICS
40*3117ece4Schristos #  if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
41*3117ece4Schristos #    include <immintrin.h>   /* support for bextr (experimental)/bzhi */
42*3117ece4Schristos #  elif defined(__ICCARM__)
43*3117ece4Schristos #    include <intrinsics.h>
44*3117ece4Schristos #  endif
45*3117ece4Schristos #endif
46*3117ece4Schristos 
47*3117ece4Schristos #define STREAM_ACCUMULATOR_MIN_32  25
48*3117ece4Schristos #define STREAM_ACCUMULATOR_MIN_64  57
49*3117ece4Schristos #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
50*3117ece4Schristos 
51*3117ece4Schristos 
52*3117ece4Schristos /*-******************************************
53*3117ece4Schristos *  bitStream encoding API (write forward)
54*3117ece4Schristos ********************************************/
55*3117ece4Schristos /* bitStream can mix input from multiple sources.
56*3117ece4Schristos  * A critical property of these streams is that they encode and decode in **reverse** direction.
57*3117ece4Schristos  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
58*3117ece4Schristos  */
59*3117ece4Schristos typedef struct {
60*3117ece4Schristos     size_t bitContainer;
61*3117ece4Schristos     unsigned bitPos;
62*3117ece4Schristos     char*  startPtr;
63*3117ece4Schristos     char*  ptr;
64*3117ece4Schristos     char*  endPtr;
65*3117ece4Schristos } BIT_CStream_t;
66*3117ece4Schristos 
67*3117ece4Schristos MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
68*3117ece4Schristos MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
69*3117ece4Schristos MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
70*3117ece4Schristos MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
71*3117ece4Schristos 
72*3117ece4Schristos /* Start with initCStream, providing the size of buffer to write into.
73*3117ece4Schristos *  bitStream will never write outside of this buffer.
74*3117ece4Schristos *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
75*3117ece4Schristos *
76*3117ece4Schristos *  bits are first added to a local register.
77*3117ece4Schristos *  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
78*3117ece4Schristos *  Writing data into memory is an explicit operation, performed by the flushBits function.
79*3117ece4Schristos *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
80*3117ece4Schristos *  After a flushBits, a maximum of 7 bits might still be stored into local register.
81*3117ece4Schristos *
82*3117ece4Schristos *  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
83*3117ece4Schristos *
84*3117ece4Schristos *  Last operation is to close the bitStream.
85*3117ece4Schristos *  The function returns the final size of CStream in bytes.
86*3117ece4Schristos *  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
87*3117ece4Schristos */
88*3117ece4Schristos 
89*3117ece4Schristos 
90*3117ece4Schristos /*-********************************************
91*3117ece4Schristos *  bitStream decoding API (read backward)
92*3117ece4Schristos **********************************************/
93*3117ece4Schristos typedef size_t BitContainerType;
94*3117ece4Schristos typedef struct {
95*3117ece4Schristos     BitContainerType bitContainer;
96*3117ece4Schristos     unsigned bitsConsumed;
97*3117ece4Schristos     const char* ptr;
98*3117ece4Schristos     const char* start;
99*3117ece4Schristos     const char* limitPtr;
100*3117ece4Schristos } BIT_DStream_t;
101*3117ece4Schristos 
102*3117ece4Schristos typedef enum { BIT_DStream_unfinished = 0,  /* fully refilled */
103*3117ece4Schristos                BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
104*3117ece4Schristos                BIT_DStream_completed = 2,   /* bitstream entirely consumed, bit-exact */
105*3117ece4Schristos                BIT_DStream_overflow = 3     /* user requested more bits than present in bitstream */
106*3117ece4Schristos     } BIT_DStream_status;  /* result of BIT_reloadDStream() */
107*3117ece4Schristos 
108*3117ece4Schristos MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
109*3117ece4Schristos MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
110*3117ece4Schristos MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
111*3117ece4Schristos MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
112*3117ece4Schristos 
113*3117ece4Schristos 
114*3117ece4Schristos /* Start by invoking BIT_initDStream().
115*3117ece4Schristos *  A chunk of the bitStream is then stored into a local register.
116*3117ece4Schristos *  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
117*3117ece4Schristos *  You can then retrieve bitFields stored into the local register, **in reverse order**.
118*3117ece4Schristos *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
119*3117ece4Schristos *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
120*3117ece4Schristos *  Otherwise, it can be less than that, so proceed accordingly.
121*3117ece4Schristos *  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
122*3117ece4Schristos */
123*3117ece4Schristos 
124*3117ece4Schristos 
125*3117ece4Schristos /*-****************************************
126*3117ece4Schristos *  unsafe API
127*3117ece4Schristos ******************************************/
128*3117ece4Schristos MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
129*3117ece4Schristos /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
130*3117ece4Schristos 
131*3117ece4Schristos MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
132*3117ece4Schristos /* unsafe version; does not check buffer overflow */
133*3117ece4Schristos 
134*3117ece4Schristos MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
135*3117ece4Schristos /* faster, but works only if nbBits >= 1 */
136*3117ece4Schristos 
137*3117ece4Schristos /*=====    Local Constants   =====*/
138*3117ece4Schristos static const unsigned BIT_mask[] = {
139*3117ece4Schristos     0,          1,         3,         7,         0xF,       0x1F,
140*3117ece4Schristos     0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
141*3117ece4Schristos     0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
142*3117ece4Schristos     0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
143*3117ece4Schristos     0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
144*3117ece4Schristos     0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
145*3117ece4Schristos #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
146*3117ece4Schristos 
147*3117ece4Schristos /*-**************************************************************
148*3117ece4Schristos *  bitStream encoding
149*3117ece4Schristos ****************************************************************/
150*3117ece4Schristos /*! BIT_initCStream() :
151*3117ece4Schristos  *  `dstCapacity` must be > sizeof(size_t)
152*3117ece4Schristos  *  @return : 0 if success,
153*3117ece4Schristos  *            otherwise an error code (can be tested using ERR_isError()) */
154*3117ece4Schristos MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
155*3117ece4Schristos                                   void* startPtr, size_t dstCapacity)
156*3117ece4Schristos {
157*3117ece4Schristos     bitC->bitContainer = 0;
158*3117ece4Schristos     bitC->bitPos = 0;
159*3117ece4Schristos     bitC->startPtr = (char*)startPtr;
160*3117ece4Schristos     bitC->ptr = bitC->startPtr;
161*3117ece4Schristos     bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
162*3117ece4Schristos     if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
163*3117ece4Schristos     return 0;
164*3117ece4Schristos }
165*3117ece4Schristos 
166*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
167*3117ece4Schristos {
168*3117ece4Schristos #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
169*3117ece4Schristos     return  _bzhi_u64(bitContainer, nbBits);
170*3117ece4Schristos #else
171*3117ece4Schristos     assert(nbBits < BIT_MASK_SIZE);
172*3117ece4Schristos     return bitContainer & BIT_mask[nbBits];
173*3117ece4Schristos #endif
174*3117ece4Schristos }
175*3117ece4Schristos 
176*3117ece4Schristos /*! BIT_addBits() :
177*3117ece4Schristos  *  can add up to 31 bits into `bitC`.
178*3117ece4Schristos  *  Note : does not check for register overflow ! */
179*3117ece4Schristos MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
180*3117ece4Schristos                             size_t value, unsigned nbBits)
181*3117ece4Schristos {
182*3117ece4Schristos     DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
183*3117ece4Schristos     assert(nbBits < BIT_MASK_SIZE);
184*3117ece4Schristos     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
185*3117ece4Schristos     bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
186*3117ece4Schristos     bitC->bitPos += nbBits;
187*3117ece4Schristos }
188*3117ece4Schristos 
189*3117ece4Schristos /*! BIT_addBitsFast() :
190*3117ece4Schristos  *  works only if `value` is _clean_,
191*3117ece4Schristos  *  meaning all high bits above nbBits are 0 */
192*3117ece4Schristos MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
193*3117ece4Schristos                                 size_t value, unsigned nbBits)
194*3117ece4Schristos {
195*3117ece4Schristos     assert((value>>nbBits) == 0);
196*3117ece4Schristos     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
197*3117ece4Schristos     bitC->bitContainer |= value << bitC->bitPos;
198*3117ece4Schristos     bitC->bitPos += nbBits;
199*3117ece4Schristos }
200*3117ece4Schristos 
201*3117ece4Schristos /*! BIT_flushBitsFast() :
202*3117ece4Schristos  *  assumption : bitContainer has not overflowed
203*3117ece4Schristos  *  unsafe version; does not check buffer overflow */
204*3117ece4Schristos MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
205*3117ece4Schristos {
206*3117ece4Schristos     size_t const nbBytes = bitC->bitPos >> 3;
207*3117ece4Schristos     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
208*3117ece4Schristos     assert(bitC->ptr <= bitC->endPtr);
209*3117ece4Schristos     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
210*3117ece4Schristos     bitC->ptr += nbBytes;
211*3117ece4Schristos     bitC->bitPos &= 7;
212*3117ece4Schristos     bitC->bitContainer >>= nbBytes*8;
213*3117ece4Schristos }
214*3117ece4Schristos 
215*3117ece4Schristos /*! BIT_flushBits() :
216*3117ece4Schristos  *  assumption : bitContainer has not overflowed
217*3117ece4Schristos  *  safe version; check for buffer overflow, and prevents it.
218*3117ece4Schristos  *  note : does not signal buffer overflow.
219*3117ece4Schristos  *  overflow will be revealed later on using BIT_closeCStream() */
220*3117ece4Schristos MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
221*3117ece4Schristos {
222*3117ece4Schristos     size_t const nbBytes = bitC->bitPos >> 3;
223*3117ece4Schristos     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
224*3117ece4Schristos     assert(bitC->ptr <= bitC->endPtr);
225*3117ece4Schristos     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
226*3117ece4Schristos     bitC->ptr += nbBytes;
227*3117ece4Schristos     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
228*3117ece4Schristos     bitC->bitPos &= 7;
229*3117ece4Schristos     bitC->bitContainer >>= nbBytes*8;
230*3117ece4Schristos }
231*3117ece4Schristos 
232*3117ece4Schristos /*! BIT_closeCStream() :
233*3117ece4Schristos  *  @return : size of CStream, in bytes,
234*3117ece4Schristos  *            or 0 if it could not fit into dstBuffer */
235*3117ece4Schristos MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
236*3117ece4Schristos {
237*3117ece4Schristos     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
238*3117ece4Schristos     BIT_flushBits(bitC);
239*3117ece4Schristos     if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
240*3117ece4Schristos     return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
241*3117ece4Schristos }
242*3117ece4Schristos 
243*3117ece4Schristos 
244*3117ece4Schristos /*-********************************************************
245*3117ece4Schristos *  bitStream decoding
246*3117ece4Schristos **********************************************************/
247*3117ece4Schristos /*! BIT_initDStream() :
248*3117ece4Schristos  *  Initialize a BIT_DStream_t.
249*3117ece4Schristos  * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
250*3117ece4Schristos  * `srcSize` must be the *exact* size of the bitStream, in bytes.
251*3117ece4Schristos  * @return : size of stream (== srcSize), or an errorCode if a problem is detected
252*3117ece4Schristos  */
253*3117ece4Schristos MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
254*3117ece4Schristos {
255*3117ece4Schristos     if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
256*3117ece4Schristos 
257*3117ece4Schristos     bitD->start = (const char*)srcBuffer;
258*3117ece4Schristos     bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
259*3117ece4Schristos 
260*3117ece4Schristos     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
261*3117ece4Schristos         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
262*3117ece4Schristos         bitD->bitContainer = MEM_readLEST(bitD->ptr);
263*3117ece4Schristos         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
264*3117ece4Schristos           bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
265*3117ece4Schristos           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
266*3117ece4Schristos     } else {
267*3117ece4Schristos         bitD->ptr   = bitD->start;
268*3117ece4Schristos         bitD->bitContainer = *(const BYTE*)(bitD->start);
269*3117ece4Schristos         switch(srcSize)
270*3117ece4Schristos         {
271*3117ece4Schristos         case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
272*3117ece4Schristos                 ZSTD_FALLTHROUGH;
273*3117ece4Schristos 
274*3117ece4Schristos         case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
275*3117ece4Schristos                 ZSTD_FALLTHROUGH;
276*3117ece4Schristos 
277*3117ece4Schristos         case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
278*3117ece4Schristos                 ZSTD_FALLTHROUGH;
279*3117ece4Schristos 
280*3117ece4Schristos         case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
281*3117ece4Schristos                 ZSTD_FALLTHROUGH;
282*3117ece4Schristos 
283*3117ece4Schristos         case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
284*3117ece4Schristos                 ZSTD_FALLTHROUGH;
285*3117ece4Schristos 
286*3117ece4Schristos         case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) <<  8;
287*3117ece4Schristos                 ZSTD_FALLTHROUGH;
288*3117ece4Schristos 
289*3117ece4Schristos         default: break;
290*3117ece4Schristos         }
291*3117ece4Schristos         {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
292*3117ece4Schristos             bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
293*3117ece4Schristos             if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
294*3117ece4Schristos         }
295*3117ece4Schristos         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
296*3117ece4Schristos     }
297*3117ece4Schristos 
298*3117ece4Schristos     return srcSize;
299*3117ece4Schristos }
300*3117ece4Schristos 
301*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
302*3117ece4Schristos {
303*3117ece4Schristos     return bitContainer >> start;
304*3117ece4Schristos }
305*3117ece4Schristos 
306*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
307*3117ece4Schristos {
308*3117ece4Schristos     U32 const regMask = sizeof(bitContainer)*8 - 1;
309*3117ece4Schristos     /* if start > regMask, bitstream is corrupted, and result is undefined */
310*3117ece4Schristos     assert(nbBits < BIT_MASK_SIZE);
311*3117ece4Schristos     /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
312*3117ece4Schristos      * than accessing memory. When bmi2 instruction is not present, we consider
313*3117ece4Schristos      * such cpus old (pre-Haswell, 2013) and their performance is not of that
314*3117ece4Schristos      * importance.
315*3117ece4Schristos      */
316*3117ece4Schristos #if defined(__x86_64__) || defined(_M_X86)
317*3117ece4Schristos     return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
318*3117ece4Schristos #else
319*3117ece4Schristos     return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
320*3117ece4Schristos #endif
321*3117ece4Schristos }
322*3117ece4Schristos 
323*3117ece4Schristos /*! BIT_lookBits() :
324*3117ece4Schristos  *  Provides next n bits from local register.
325*3117ece4Schristos  *  local register is not modified.
326*3117ece4Schristos  *  On 32-bits, maxNbBits==24.
327*3117ece4Schristos  *  On 64-bits, maxNbBits==56.
328*3117ece4Schristos  * @return : value extracted */
329*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
330*3117ece4Schristos {
331*3117ece4Schristos     /* arbitrate between double-shift and shift+mask */
332*3117ece4Schristos #if 1
333*3117ece4Schristos     /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
334*3117ece4Schristos      * bitstream is likely corrupted, and result is undefined */
335*3117ece4Schristos     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
336*3117ece4Schristos #else
337*3117ece4Schristos     /* this code path is slower on my os-x laptop */
338*3117ece4Schristos     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
339*3117ece4Schristos     return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
340*3117ece4Schristos #endif
341*3117ece4Schristos }
342*3117ece4Schristos 
343*3117ece4Schristos /*! BIT_lookBitsFast() :
344*3117ece4Schristos  *  unsafe version; only works if nbBits >= 1 */
345*3117ece4Schristos MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
346*3117ece4Schristos {
347*3117ece4Schristos     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
348*3117ece4Schristos     assert(nbBits >= 1);
349*3117ece4Schristos     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
350*3117ece4Schristos }
351*3117ece4Schristos 
352*3117ece4Schristos FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
353*3117ece4Schristos {
354*3117ece4Schristos     bitD->bitsConsumed += nbBits;
355*3117ece4Schristos }
356*3117ece4Schristos 
357*3117ece4Schristos /*! BIT_readBits() :
358*3117ece4Schristos  *  Read (consume) next n bits from local register and update.
359*3117ece4Schristos  *  Pay attention to not read more than nbBits contained into local register.
360*3117ece4Schristos  * @return : extracted value. */
361*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
362*3117ece4Schristos {
363*3117ece4Schristos     size_t const value = BIT_lookBits(bitD, nbBits);
364*3117ece4Schristos     BIT_skipBits(bitD, nbBits);
365*3117ece4Schristos     return value;
366*3117ece4Schristos }
367*3117ece4Schristos 
368*3117ece4Schristos /*! BIT_readBitsFast() :
369*3117ece4Schristos  *  unsafe version; only works if nbBits >= 1 */
370*3117ece4Schristos MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
371*3117ece4Schristos {
372*3117ece4Schristos     size_t const value = BIT_lookBitsFast(bitD, nbBits);
373*3117ece4Schristos     assert(nbBits >= 1);
374*3117ece4Schristos     BIT_skipBits(bitD, nbBits);
375*3117ece4Schristos     return value;
376*3117ece4Schristos }
377*3117ece4Schristos 
378*3117ece4Schristos /*! BIT_reloadDStream_internal() :
379*3117ece4Schristos  *  Simple variant of BIT_reloadDStream(), with two conditions:
380*3117ece4Schristos  *  1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
381*3117ece4Schristos  *  2. look window is valid after shifted down : bitD->ptr >= bitD->start
382*3117ece4Schristos  */
383*3117ece4Schristos MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
384*3117ece4Schristos {
385*3117ece4Schristos     assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
386*3117ece4Schristos     bitD->ptr -= bitD->bitsConsumed >> 3;
387*3117ece4Schristos     assert(bitD->ptr >= bitD->start);
388*3117ece4Schristos     bitD->bitsConsumed &= 7;
389*3117ece4Schristos     bitD->bitContainer = MEM_readLEST(bitD->ptr);
390*3117ece4Schristos     return BIT_DStream_unfinished;
391*3117ece4Schristos }
392*3117ece4Schristos 
393*3117ece4Schristos /*! BIT_reloadDStreamFast() :
394*3117ece4Schristos  *  Similar to BIT_reloadDStream(), but with two differences:
395*3117ece4Schristos  *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
396*3117ece4Schristos  *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
397*3117ece4Schristos  *     point you must use BIT_reloadDStream() to reload.
398*3117ece4Schristos  */
399*3117ece4Schristos MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
400*3117ece4Schristos {
401*3117ece4Schristos     if (UNLIKELY(bitD->ptr < bitD->limitPtr))
402*3117ece4Schristos         return BIT_DStream_overflow;
403*3117ece4Schristos     return BIT_reloadDStream_internal(bitD);
404*3117ece4Schristos }
405*3117ece4Schristos 
406*3117ece4Schristos /*! BIT_reloadDStream() :
407*3117ece4Schristos  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
408*3117ece4Schristos  *  This function is safe, it guarantees it will not never beyond src buffer.
409*3117ece4Schristos  * @return : status of `BIT_DStream_t` internal register.
410*3117ece4Schristos  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
411*3117ece4Schristos FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
412*3117ece4Schristos {
413*3117ece4Schristos     /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
414*3117ece4Schristos     if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
415*3117ece4Schristos         static const BitContainerType zeroFilled = 0;
416*3117ece4Schristos         bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
417*3117ece4Schristos         /* overflow detected, erroneous scenario or end of stream: no update */
418*3117ece4Schristos         return BIT_DStream_overflow;
419*3117ece4Schristos     }
420*3117ece4Schristos 
421*3117ece4Schristos     assert(bitD->ptr >= bitD->start);
422*3117ece4Schristos 
423*3117ece4Schristos     if (bitD->ptr >= bitD->limitPtr) {
424*3117ece4Schristos         return BIT_reloadDStream_internal(bitD);
425*3117ece4Schristos     }
426*3117ece4Schristos     if (bitD->ptr == bitD->start) {
427*3117ece4Schristos         /* reached end of bitStream => no update */
428*3117ece4Schristos         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
429*3117ece4Schristos         return BIT_DStream_completed;
430*3117ece4Schristos     }
431*3117ece4Schristos     /* start < ptr < limitPtr => cautious update */
432*3117ece4Schristos     {   U32 nbBytes = bitD->bitsConsumed >> 3;
433*3117ece4Schristos         BIT_DStream_status result = BIT_DStream_unfinished;
434*3117ece4Schristos         if (bitD->ptr - nbBytes < bitD->start) {
435*3117ece4Schristos             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
436*3117ece4Schristos             result = BIT_DStream_endOfBuffer;
437*3117ece4Schristos         }
438*3117ece4Schristos         bitD->ptr -= nbBytes;
439*3117ece4Schristos         bitD->bitsConsumed -= nbBytes*8;
440*3117ece4Schristos         bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
441*3117ece4Schristos         return result;
442*3117ece4Schristos     }
443*3117ece4Schristos }
444*3117ece4Schristos 
445*3117ece4Schristos /*! BIT_endOfDStream() :
446*3117ece4Schristos  * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
447*3117ece4Schristos  */
448*3117ece4Schristos MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
449*3117ece4Schristos {
450*3117ece4Schristos     return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
451*3117ece4Schristos }
452*3117ece4Schristos 
453*3117ece4Schristos #if defined (__cplusplus)
454*3117ece4Schristos }
455*3117ece4Schristos #endif
456*3117ece4Schristos 
457*3117ece4Schristos #endif /* BITSTREAM_H_MODULE */
458