1*3117ece4Schristos /* ****************************************************************** 2*3117ece4Schristos * bitstream 3*3117ece4Schristos * Part of FSE library 4*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 5*3117ece4Schristos * 6*3117ece4Schristos * You can contact the author at : 7*3117ece4Schristos * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 8*3117ece4Schristos * 9*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 10*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 12*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 13*3117ece4Schristos ****************************************************************** */ 14*3117ece4Schristos #ifndef BITSTREAM_H_MODULE 15*3117ece4Schristos #define BITSTREAM_H_MODULE 16*3117ece4Schristos 17*3117ece4Schristos #if defined (__cplusplus) 18*3117ece4Schristos extern "C" { 19*3117ece4Schristos #endif 20*3117ece4Schristos /* 21*3117ece4Schristos * This API consists of small unitary functions, which must be inlined for best performance. 22*3117ece4Schristos * Since link-time-optimization is not available for all compilers, 23*3117ece4Schristos * these functions are defined into a .h to be included. 24*3117ece4Schristos */ 25*3117ece4Schristos 26*3117ece4Schristos /*-**************************************** 27*3117ece4Schristos * Dependencies 28*3117ece4Schristos ******************************************/ 29*3117ece4Schristos #include "mem.h" /* unaligned access routines */ 30*3117ece4Schristos #include "compiler.h" /* UNLIKELY() */ 31*3117ece4Schristos #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ 32*3117ece4Schristos #include "error_private.h" /* error codes and messages */ 33*3117ece4Schristos #include "bits.h" /* ZSTD_highbit32 */ 34*3117ece4Schristos 35*3117ece4Schristos 36*3117ece4Schristos /*========================================= 37*3117ece4Schristos * Target specific 38*3117ece4Schristos =========================================*/ 39*3117ece4Schristos #ifndef ZSTD_NO_INTRINSICS 40*3117ece4Schristos # if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__) 41*3117ece4Schristos # include <immintrin.h> /* support for bextr (experimental)/bzhi */ 42*3117ece4Schristos # elif defined(__ICCARM__) 43*3117ece4Schristos # include <intrinsics.h> 44*3117ece4Schristos # endif 45*3117ece4Schristos #endif 46*3117ece4Schristos 47*3117ece4Schristos #define STREAM_ACCUMULATOR_MIN_32 25 48*3117ece4Schristos #define STREAM_ACCUMULATOR_MIN_64 57 49*3117ece4Schristos #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) 50*3117ece4Schristos 51*3117ece4Schristos 52*3117ece4Schristos /*-****************************************** 53*3117ece4Schristos * bitStream encoding API (write forward) 54*3117ece4Schristos ********************************************/ 55*3117ece4Schristos /* bitStream can mix input from multiple sources. 56*3117ece4Schristos * A critical property of these streams is that they encode and decode in **reverse** direction. 57*3117ece4Schristos * So the first bit sequence you add will be the last to be read, like a LIFO stack. 58*3117ece4Schristos */ 59*3117ece4Schristos typedef struct { 60*3117ece4Schristos size_t bitContainer; 61*3117ece4Schristos unsigned bitPos; 62*3117ece4Schristos char* startPtr; 63*3117ece4Schristos char* ptr; 64*3117ece4Schristos char* endPtr; 65*3117ece4Schristos } BIT_CStream_t; 66*3117ece4Schristos 67*3117ece4Schristos MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); 68*3117ece4Schristos MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); 69*3117ece4Schristos MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); 70*3117ece4Schristos MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); 71*3117ece4Schristos 72*3117ece4Schristos /* Start with initCStream, providing the size of buffer to write into. 73*3117ece4Schristos * bitStream will never write outside of this buffer. 74*3117ece4Schristos * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. 75*3117ece4Schristos * 76*3117ece4Schristos * bits are first added to a local register. 77*3117ece4Schristos * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. 78*3117ece4Schristos * Writing data into memory is an explicit operation, performed by the flushBits function. 79*3117ece4Schristos * Hence keep track how many bits are potentially stored into local register to avoid register overflow. 80*3117ece4Schristos * After a flushBits, a maximum of 7 bits might still be stored into local register. 81*3117ece4Schristos * 82*3117ece4Schristos * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. 83*3117ece4Schristos * 84*3117ece4Schristos * Last operation is to close the bitStream. 85*3117ece4Schristos * The function returns the final size of CStream in bytes. 86*3117ece4Schristos * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) 87*3117ece4Schristos */ 88*3117ece4Schristos 89*3117ece4Schristos 90*3117ece4Schristos /*-******************************************** 91*3117ece4Schristos * bitStream decoding API (read backward) 92*3117ece4Schristos **********************************************/ 93*3117ece4Schristos typedef size_t BitContainerType; 94*3117ece4Schristos typedef struct { 95*3117ece4Schristos BitContainerType bitContainer; 96*3117ece4Schristos unsigned bitsConsumed; 97*3117ece4Schristos const char* ptr; 98*3117ece4Schristos const char* start; 99*3117ece4Schristos const char* limitPtr; 100*3117ece4Schristos } BIT_DStream_t; 101*3117ece4Schristos 102*3117ece4Schristos typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */ 103*3117ece4Schristos BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */ 104*3117ece4Schristos BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */ 105*3117ece4Schristos BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */ 106*3117ece4Schristos } BIT_DStream_status; /* result of BIT_reloadDStream() */ 107*3117ece4Schristos 108*3117ece4Schristos MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); 109*3117ece4Schristos MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); 110*3117ece4Schristos MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); 111*3117ece4Schristos MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); 112*3117ece4Schristos 113*3117ece4Schristos 114*3117ece4Schristos /* Start by invoking BIT_initDStream(). 115*3117ece4Schristos * A chunk of the bitStream is then stored into a local register. 116*3117ece4Schristos * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType). 117*3117ece4Schristos * You can then retrieve bitFields stored into the local register, **in reverse order**. 118*3117ece4Schristos * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. 119*3117ece4Schristos * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. 120*3117ece4Schristos * Otherwise, it can be less than that, so proceed accordingly. 121*3117ece4Schristos * Checking if DStream has reached its end can be performed with BIT_endOfDStream(). 122*3117ece4Schristos */ 123*3117ece4Schristos 124*3117ece4Schristos 125*3117ece4Schristos /*-**************************************** 126*3117ece4Schristos * unsafe API 127*3117ece4Schristos ******************************************/ 128*3117ece4Schristos MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); 129*3117ece4Schristos /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ 130*3117ece4Schristos 131*3117ece4Schristos MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); 132*3117ece4Schristos /* unsafe version; does not check buffer overflow */ 133*3117ece4Schristos 134*3117ece4Schristos MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); 135*3117ece4Schristos /* faster, but works only if nbBits >= 1 */ 136*3117ece4Schristos 137*3117ece4Schristos /*===== Local Constants =====*/ 138*3117ece4Schristos static const unsigned BIT_mask[] = { 139*3117ece4Schristos 0, 1, 3, 7, 0xF, 0x1F, 140*3117ece4Schristos 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 141*3117ece4Schristos 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 142*3117ece4Schristos 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 143*3117ece4Schristos 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, 144*3117ece4Schristos 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ 145*3117ece4Schristos #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) 146*3117ece4Schristos 147*3117ece4Schristos /*-************************************************************** 148*3117ece4Schristos * bitStream encoding 149*3117ece4Schristos ****************************************************************/ 150*3117ece4Schristos /*! BIT_initCStream() : 151*3117ece4Schristos * `dstCapacity` must be > sizeof(size_t) 152*3117ece4Schristos * @return : 0 if success, 153*3117ece4Schristos * otherwise an error code (can be tested using ERR_isError()) */ 154*3117ece4Schristos MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, 155*3117ece4Schristos void* startPtr, size_t dstCapacity) 156*3117ece4Schristos { 157*3117ece4Schristos bitC->bitContainer = 0; 158*3117ece4Schristos bitC->bitPos = 0; 159*3117ece4Schristos bitC->startPtr = (char*)startPtr; 160*3117ece4Schristos bitC->ptr = bitC->startPtr; 161*3117ece4Schristos bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); 162*3117ece4Schristos if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); 163*3117ece4Schristos return 0; 164*3117ece4Schristos } 165*3117ece4Schristos 166*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) 167*3117ece4Schristos { 168*3117ece4Schristos #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS) 169*3117ece4Schristos return _bzhi_u64(bitContainer, nbBits); 170*3117ece4Schristos #else 171*3117ece4Schristos assert(nbBits < BIT_MASK_SIZE); 172*3117ece4Schristos return bitContainer & BIT_mask[nbBits]; 173*3117ece4Schristos #endif 174*3117ece4Schristos } 175*3117ece4Schristos 176*3117ece4Schristos /*! BIT_addBits() : 177*3117ece4Schristos * can add up to 31 bits into `bitC`. 178*3117ece4Schristos * Note : does not check for register overflow ! */ 179*3117ece4Schristos MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, 180*3117ece4Schristos size_t value, unsigned nbBits) 181*3117ece4Schristos { 182*3117ece4Schristos DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); 183*3117ece4Schristos assert(nbBits < BIT_MASK_SIZE); 184*3117ece4Schristos assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); 185*3117ece4Schristos bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos; 186*3117ece4Schristos bitC->bitPos += nbBits; 187*3117ece4Schristos } 188*3117ece4Schristos 189*3117ece4Schristos /*! BIT_addBitsFast() : 190*3117ece4Schristos * works only if `value` is _clean_, 191*3117ece4Schristos * meaning all high bits above nbBits are 0 */ 192*3117ece4Schristos MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, 193*3117ece4Schristos size_t value, unsigned nbBits) 194*3117ece4Schristos { 195*3117ece4Schristos assert((value>>nbBits) == 0); 196*3117ece4Schristos assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); 197*3117ece4Schristos bitC->bitContainer |= value << bitC->bitPos; 198*3117ece4Schristos bitC->bitPos += nbBits; 199*3117ece4Schristos } 200*3117ece4Schristos 201*3117ece4Schristos /*! BIT_flushBitsFast() : 202*3117ece4Schristos * assumption : bitContainer has not overflowed 203*3117ece4Schristos * unsafe version; does not check buffer overflow */ 204*3117ece4Schristos MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) 205*3117ece4Schristos { 206*3117ece4Schristos size_t const nbBytes = bitC->bitPos >> 3; 207*3117ece4Schristos assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); 208*3117ece4Schristos assert(bitC->ptr <= bitC->endPtr); 209*3117ece4Schristos MEM_writeLEST(bitC->ptr, bitC->bitContainer); 210*3117ece4Schristos bitC->ptr += nbBytes; 211*3117ece4Schristos bitC->bitPos &= 7; 212*3117ece4Schristos bitC->bitContainer >>= nbBytes*8; 213*3117ece4Schristos } 214*3117ece4Schristos 215*3117ece4Schristos /*! BIT_flushBits() : 216*3117ece4Schristos * assumption : bitContainer has not overflowed 217*3117ece4Schristos * safe version; check for buffer overflow, and prevents it. 218*3117ece4Schristos * note : does not signal buffer overflow. 219*3117ece4Schristos * overflow will be revealed later on using BIT_closeCStream() */ 220*3117ece4Schristos MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) 221*3117ece4Schristos { 222*3117ece4Schristos size_t const nbBytes = bitC->bitPos >> 3; 223*3117ece4Schristos assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); 224*3117ece4Schristos assert(bitC->ptr <= bitC->endPtr); 225*3117ece4Schristos MEM_writeLEST(bitC->ptr, bitC->bitContainer); 226*3117ece4Schristos bitC->ptr += nbBytes; 227*3117ece4Schristos if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; 228*3117ece4Schristos bitC->bitPos &= 7; 229*3117ece4Schristos bitC->bitContainer >>= nbBytes*8; 230*3117ece4Schristos } 231*3117ece4Schristos 232*3117ece4Schristos /*! BIT_closeCStream() : 233*3117ece4Schristos * @return : size of CStream, in bytes, 234*3117ece4Schristos * or 0 if it could not fit into dstBuffer */ 235*3117ece4Schristos MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) 236*3117ece4Schristos { 237*3117ece4Schristos BIT_addBitsFast(bitC, 1, 1); /* endMark */ 238*3117ece4Schristos BIT_flushBits(bitC); 239*3117ece4Schristos if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ 240*3117ece4Schristos return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); 241*3117ece4Schristos } 242*3117ece4Schristos 243*3117ece4Schristos 244*3117ece4Schristos /*-******************************************************** 245*3117ece4Schristos * bitStream decoding 246*3117ece4Schristos **********************************************************/ 247*3117ece4Schristos /*! BIT_initDStream() : 248*3117ece4Schristos * Initialize a BIT_DStream_t. 249*3117ece4Schristos * `bitD` : a pointer to an already allocated BIT_DStream_t structure. 250*3117ece4Schristos * `srcSize` must be the *exact* size of the bitStream, in bytes. 251*3117ece4Schristos * @return : size of stream (== srcSize), or an errorCode if a problem is detected 252*3117ece4Schristos */ 253*3117ece4Schristos MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 254*3117ece4Schristos { 255*3117ece4Schristos if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } 256*3117ece4Schristos 257*3117ece4Schristos bitD->start = (const char*)srcBuffer; 258*3117ece4Schristos bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); 259*3117ece4Schristos 260*3117ece4Schristos if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ 261*3117ece4Schristos bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); 262*3117ece4Schristos bitD->bitContainer = MEM_readLEST(bitD->ptr); 263*3117ece4Schristos { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; 264*3117ece4Schristos bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ 265*3117ece4Schristos if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } 266*3117ece4Schristos } else { 267*3117ece4Schristos bitD->ptr = bitD->start; 268*3117ece4Schristos bitD->bitContainer = *(const BYTE*)(bitD->start); 269*3117ece4Schristos switch(srcSize) 270*3117ece4Schristos { 271*3117ece4Schristos case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); 272*3117ece4Schristos ZSTD_FALLTHROUGH; 273*3117ece4Schristos 274*3117ece4Schristos case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); 275*3117ece4Schristos ZSTD_FALLTHROUGH; 276*3117ece4Schristos 277*3117ece4Schristos case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); 278*3117ece4Schristos ZSTD_FALLTHROUGH; 279*3117ece4Schristos 280*3117ece4Schristos case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24; 281*3117ece4Schristos ZSTD_FALLTHROUGH; 282*3117ece4Schristos 283*3117ece4Schristos case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16; 284*3117ece4Schristos ZSTD_FALLTHROUGH; 285*3117ece4Schristos 286*3117ece4Schristos case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8; 287*3117ece4Schristos ZSTD_FALLTHROUGH; 288*3117ece4Schristos 289*3117ece4Schristos default: break; 290*3117ece4Schristos } 291*3117ece4Schristos { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; 292*3117ece4Schristos bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; 293*3117ece4Schristos if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ 294*3117ece4Schristos } 295*3117ece4Schristos bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; 296*3117ece4Schristos } 297*3117ece4Schristos 298*3117ece4Schristos return srcSize; 299*3117ece4Schristos } 300*3117ece4Schristos 301*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start) 302*3117ece4Schristos { 303*3117ece4Schristos return bitContainer >> start; 304*3117ece4Schristos } 305*3117ece4Schristos 306*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits) 307*3117ece4Schristos { 308*3117ece4Schristos U32 const regMask = sizeof(bitContainer)*8 - 1; 309*3117ece4Schristos /* if start > regMask, bitstream is corrupted, and result is undefined */ 310*3117ece4Schristos assert(nbBits < BIT_MASK_SIZE); 311*3117ece4Schristos /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better 312*3117ece4Schristos * than accessing memory. When bmi2 instruction is not present, we consider 313*3117ece4Schristos * such cpus old (pre-Haswell, 2013) and their performance is not of that 314*3117ece4Schristos * importance. 315*3117ece4Schristos */ 316*3117ece4Schristos #if defined(__x86_64__) || defined(_M_X86) 317*3117ece4Schristos return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); 318*3117ece4Schristos #else 319*3117ece4Schristos return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; 320*3117ece4Schristos #endif 321*3117ece4Schristos } 322*3117ece4Schristos 323*3117ece4Schristos /*! BIT_lookBits() : 324*3117ece4Schristos * Provides next n bits from local register. 325*3117ece4Schristos * local register is not modified. 326*3117ece4Schristos * On 32-bits, maxNbBits==24. 327*3117ece4Schristos * On 64-bits, maxNbBits==56. 328*3117ece4Schristos * @return : value extracted */ 329*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) 330*3117ece4Schristos { 331*3117ece4Schristos /* arbitrate between double-shift and shift+mask */ 332*3117ece4Schristos #if 1 333*3117ece4Schristos /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, 334*3117ece4Schristos * bitstream is likely corrupted, and result is undefined */ 335*3117ece4Schristos return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); 336*3117ece4Schristos #else 337*3117ece4Schristos /* this code path is slower on my os-x laptop */ 338*3117ece4Schristos U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; 339*3117ece4Schristos return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); 340*3117ece4Schristos #endif 341*3117ece4Schristos } 342*3117ece4Schristos 343*3117ece4Schristos /*! BIT_lookBitsFast() : 344*3117ece4Schristos * unsafe version; only works if nbBits >= 1 */ 345*3117ece4Schristos MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) 346*3117ece4Schristos { 347*3117ece4Schristos U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; 348*3117ece4Schristos assert(nbBits >= 1); 349*3117ece4Schristos return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); 350*3117ece4Schristos } 351*3117ece4Schristos 352*3117ece4Schristos FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) 353*3117ece4Schristos { 354*3117ece4Schristos bitD->bitsConsumed += nbBits; 355*3117ece4Schristos } 356*3117ece4Schristos 357*3117ece4Schristos /*! BIT_readBits() : 358*3117ece4Schristos * Read (consume) next n bits from local register and update. 359*3117ece4Schristos * Pay attention to not read more than nbBits contained into local register. 360*3117ece4Schristos * @return : extracted value. */ 361*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) 362*3117ece4Schristos { 363*3117ece4Schristos size_t const value = BIT_lookBits(bitD, nbBits); 364*3117ece4Schristos BIT_skipBits(bitD, nbBits); 365*3117ece4Schristos return value; 366*3117ece4Schristos } 367*3117ece4Schristos 368*3117ece4Schristos /*! BIT_readBitsFast() : 369*3117ece4Schristos * unsafe version; only works if nbBits >= 1 */ 370*3117ece4Schristos MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) 371*3117ece4Schristos { 372*3117ece4Schristos size_t const value = BIT_lookBitsFast(bitD, nbBits); 373*3117ece4Schristos assert(nbBits >= 1); 374*3117ece4Schristos BIT_skipBits(bitD, nbBits); 375*3117ece4Schristos return value; 376*3117ece4Schristos } 377*3117ece4Schristos 378*3117ece4Schristos /*! BIT_reloadDStream_internal() : 379*3117ece4Schristos * Simple variant of BIT_reloadDStream(), with two conditions: 380*3117ece4Schristos * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8 381*3117ece4Schristos * 2. look window is valid after shifted down : bitD->ptr >= bitD->start 382*3117ece4Schristos */ 383*3117ece4Schristos MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD) 384*3117ece4Schristos { 385*3117ece4Schristos assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); 386*3117ece4Schristos bitD->ptr -= bitD->bitsConsumed >> 3; 387*3117ece4Schristos assert(bitD->ptr >= bitD->start); 388*3117ece4Schristos bitD->bitsConsumed &= 7; 389*3117ece4Schristos bitD->bitContainer = MEM_readLEST(bitD->ptr); 390*3117ece4Schristos return BIT_DStream_unfinished; 391*3117ece4Schristos } 392*3117ece4Schristos 393*3117ece4Schristos /*! BIT_reloadDStreamFast() : 394*3117ece4Schristos * Similar to BIT_reloadDStream(), but with two differences: 395*3117ece4Schristos * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! 396*3117ece4Schristos * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this 397*3117ece4Schristos * point you must use BIT_reloadDStream() to reload. 398*3117ece4Schristos */ 399*3117ece4Schristos MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) 400*3117ece4Schristos { 401*3117ece4Schristos if (UNLIKELY(bitD->ptr < bitD->limitPtr)) 402*3117ece4Schristos return BIT_DStream_overflow; 403*3117ece4Schristos return BIT_reloadDStream_internal(bitD); 404*3117ece4Schristos } 405*3117ece4Schristos 406*3117ece4Schristos /*! BIT_reloadDStream() : 407*3117ece4Schristos * Refill `bitD` from buffer previously set in BIT_initDStream() . 408*3117ece4Schristos * This function is safe, it guarantees it will not never beyond src buffer. 409*3117ece4Schristos * @return : status of `BIT_DStream_t` internal register. 410*3117ece4Schristos * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ 411*3117ece4Schristos FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) 412*3117ece4Schristos { 413*3117ece4Schristos /* note : once in overflow mode, a bitstream remains in this mode until it's reset */ 414*3117ece4Schristos if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) { 415*3117ece4Schristos static const BitContainerType zeroFilled = 0; 416*3117ece4Schristos bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */ 417*3117ece4Schristos /* overflow detected, erroneous scenario or end of stream: no update */ 418*3117ece4Schristos return BIT_DStream_overflow; 419*3117ece4Schristos } 420*3117ece4Schristos 421*3117ece4Schristos assert(bitD->ptr >= bitD->start); 422*3117ece4Schristos 423*3117ece4Schristos if (bitD->ptr >= bitD->limitPtr) { 424*3117ece4Schristos return BIT_reloadDStream_internal(bitD); 425*3117ece4Schristos } 426*3117ece4Schristos if (bitD->ptr == bitD->start) { 427*3117ece4Schristos /* reached end of bitStream => no update */ 428*3117ece4Schristos if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; 429*3117ece4Schristos return BIT_DStream_completed; 430*3117ece4Schristos } 431*3117ece4Schristos /* start < ptr < limitPtr => cautious update */ 432*3117ece4Schristos { U32 nbBytes = bitD->bitsConsumed >> 3; 433*3117ece4Schristos BIT_DStream_status result = BIT_DStream_unfinished; 434*3117ece4Schristos if (bitD->ptr - nbBytes < bitD->start) { 435*3117ece4Schristos nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ 436*3117ece4Schristos result = BIT_DStream_endOfBuffer; 437*3117ece4Schristos } 438*3117ece4Schristos bitD->ptr -= nbBytes; 439*3117ece4Schristos bitD->bitsConsumed -= nbBytes*8; 440*3117ece4Schristos bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ 441*3117ece4Schristos return result; 442*3117ece4Schristos } 443*3117ece4Schristos } 444*3117ece4Schristos 445*3117ece4Schristos /*! BIT_endOfDStream() : 446*3117ece4Schristos * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). 447*3117ece4Schristos */ 448*3117ece4Schristos MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) 449*3117ece4Schristos { 450*3117ece4Schristos return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); 451*3117ece4Schristos } 452*3117ece4Schristos 453*3117ece4Schristos #if defined (__cplusplus) 454*3117ece4Schristos } 455*3117ece4Schristos #endif 456*3117ece4Schristos 457*3117ece4Schristos #endif /* BITSTREAM_H_MODULE */ 458