xref: /minix3/external/public-domain/xz/dist/src/liblzma/rangecoder/range_encoder.h (revision 5a645f22a86f086849945a5dd6acbf59f38c913a)
1*5a645f22SBen Gras ///////////////////////////////////////////////////////////////////////////////
2*5a645f22SBen Gras //
3*5a645f22SBen Gras /// \file       range_encoder.h
4*5a645f22SBen Gras /// \brief      Range Encoder
5*5a645f22SBen Gras ///
6*5a645f22SBen Gras //  Authors:    Igor Pavlov
7*5a645f22SBen Gras //              Lasse Collin
8*5a645f22SBen Gras //
9*5a645f22SBen Gras //  This file has been put into the public domain.
10*5a645f22SBen Gras //  You can do whatever you want with this file.
11*5a645f22SBen Gras //
12*5a645f22SBen Gras ///////////////////////////////////////////////////////////////////////////////
13*5a645f22SBen Gras 
14*5a645f22SBen Gras #ifndef LZMA_RANGE_ENCODER_H
15*5a645f22SBen Gras #define LZMA_RANGE_ENCODER_H
16*5a645f22SBen Gras 
17*5a645f22SBen Gras #include "range_common.h"
18*5a645f22SBen Gras #include "price.h"
19*5a645f22SBen Gras 
20*5a645f22SBen Gras 
21*5a645f22SBen Gras /// Maximum number of symbols that can be put pending into lzma_range_encoder
22*5a645f22SBen Gras /// structure between calls to lzma_rc_encode(). For LZMA, 52+5 is enough
23*5a645f22SBen Gras /// (match with big distance and length followed by range encoder flush).
24*5a645f22SBen Gras #define RC_SYMBOLS_MAX 58
25*5a645f22SBen Gras 
26*5a645f22SBen Gras 
27*5a645f22SBen Gras typedef struct {
28*5a645f22SBen Gras 	uint64_t low;
29*5a645f22SBen Gras 	uint64_t cache_size;
30*5a645f22SBen Gras 	uint32_t range;
31*5a645f22SBen Gras 	uint8_t cache;
32*5a645f22SBen Gras 
33*5a645f22SBen Gras 	/// Number of symbols in the tables
34*5a645f22SBen Gras 	size_t count;
35*5a645f22SBen Gras 
36*5a645f22SBen Gras 	/// rc_encode()'s position in the tables
37*5a645f22SBen Gras 	size_t pos;
38*5a645f22SBen Gras 
39*5a645f22SBen Gras 	/// Symbols to encode
40*5a645f22SBen Gras 	enum {
41*5a645f22SBen Gras 		RC_BIT_0,
42*5a645f22SBen Gras 		RC_BIT_1,
43*5a645f22SBen Gras 		RC_DIRECT_0,
44*5a645f22SBen Gras 		RC_DIRECT_1,
45*5a645f22SBen Gras 		RC_FLUSH,
46*5a645f22SBen Gras 	} symbols[RC_SYMBOLS_MAX];
47*5a645f22SBen Gras 
48*5a645f22SBen Gras 	/// Probabilities associated with RC_BIT_0 or RC_BIT_1
49*5a645f22SBen Gras 	probability *probs[RC_SYMBOLS_MAX];
50*5a645f22SBen Gras 
51*5a645f22SBen Gras } lzma_range_encoder;
52*5a645f22SBen Gras 
53*5a645f22SBen Gras 
54*5a645f22SBen Gras static inline void
rc_reset(lzma_range_encoder * rc)55*5a645f22SBen Gras rc_reset(lzma_range_encoder *rc)
56*5a645f22SBen Gras {
57*5a645f22SBen Gras 	rc->low = 0;
58*5a645f22SBen Gras 	rc->cache_size = 1;
59*5a645f22SBen Gras 	rc->range = UINT32_MAX;
60*5a645f22SBen Gras 	rc->cache = 0;
61*5a645f22SBen Gras 	rc->count = 0;
62*5a645f22SBen Gras 	rc->pos = 0;
63*5a645f22SBen Gras }
64*5a645f22SBen Gras 
65*5a645f22SBen Gras 
66*5a645f22SBen Gras static inline void
rc_bit(lzma_range_encoder * rc,probability * prob,uint32_t bit)67*5a645f22SBen Gras rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
68*5a645f22SBen Gras {
69*5a645f22SBen Gras 	rc->symbols[rc->count] = bit;
70*5a645f22SBen Gras 	rc->probs[rc->count] = prob;
71*5a645f22SBen Gras 	++rc->count;
72*5a645f22SBen Gras }
73*5a645f22SBen Gras 
74*5a645f22SBen Gras 
75*5a645f22SBen Gras static inline void
rc_bittree(lzma_range_encoder * rc,probability * probs,uint32_t bit_count,uint32_t symbol)76*5a645f22SBen Gras rc_bittree(lzma_range_encoder *rc, probability *probs,
77*5a645f22SBen Gras 		uint32_t bit_count, uint32_t symbol)
78*5a645f22SBen Gras {
79*5a645f22SBen Gras 	uint32_t model_index = 1;
80*5a645f22SBen Gras 
81*5a645f22SBen Gras 	do {
82*5a645f22SBen Gras 		const uint32_t bit = (symbol >> --bit_count) & 1;
83*5a645f22SBen Gras 		rc_bit(rc, &probs[model_index], bit);
84*5a645f22SBen Gras 		model_index = (model_index << 1) + bit;
85*5a645f22SBen Gras 	} while (bit_count != 0);
86*5a645f22SBen Gras }
87*5a645f22SBen Gras 
88*5a645f22SBen Gras 
89*5a645f22SBen Gras static inline void
rc_bittree_reverse(lzma_range_encoder * rc,probability * probs,uint32_t bit_count,uint32_t symbol)90*5a645f22SBen Gras rc_bittree_reverse(lzma_range_encoder *rc, probability *probs,
91*5a645f22SBen Gras 		uint32_t bit_count, uint32_t symbol)
92*5a645f22SBen Gras {
93*5a645f22SBen Gras 	uint32_t model_index = 1;
94*5a645f22SBen Gras 
95*5a645f22SBen Gras 	do {
96*5a645f22SBen Gras 		const uint32_t bit = symbol & 1;
97*5a645f22SBen Gras 		symbol >>= 1;
98*5a645f22SBen Gras 		rc_bit(rc, &probs[model_index], bit);
99*5a645f22SBen Gras 		model_index = (model_index << 1) + bit;
100*5a645f22SBen Gras 	} while (--bit_count != 0);
101*5a645f22SBen Gras }
102*5a645f22SBen Gras 
103*5a645f22SBen Gras 
104*5a645f22SBen Gras static inline void
rc_direct(lzma_range_encoder * rc,uint32_t value,uint32_t bit_count)105*5a645f22SBen Gras rc_direct(lzma_range_encoder *rc,
106*5a645f22SBen Gras 		uint32_t value, uint32_t bit_count)
107*5a645f22SBen Gras {
108*5a645f22SBen Gras 	do {
109*5a645f22SBen Gras 		rc->symbols[rc->count++]
110*5a645f22SBen Gras 				= RC_DIRECT_0 + ((value >> --bit_count) & 1);
111*5a645f22SBen Gras 	} while (bit_count != 0);
112*5a645f22SBen Gras }
113*5a645f22SBen Gras 
114*5a645f22SBen Gras 
115*5a645f22SBen Gras static inline void
rc_flush(lzma_range_encoder * rc)116*5a645f22SBen Gras rc_flush(lzma_range_encoder *rc)
117*5a645f22SBen Gras {
118*5a645f22SBen Gras 	for (size_t i = 0; i < 5; ++i)
119*5a645f22SBen Gras 		rc->symbols[rc->count++] = RC_FLUSH;
120*5a645f22SBen Gras }
121*5a645f22SBen Gras 
122*5a645f22SBen Gras 
123*5a645f22SBen Gras static inline bool
rc_shift_low(lzma_range_encoder * rc,uint8_t * out,size_t * out_pos,size_t out_size)124*5a645f22SBen Gras rc_shift_low(lzma_range_encoder *rc,
125*5a645f22SBen Gras 		uint8_t *out, size_t *out_pos, size_t out_size)
126*5a645f22SBen Gras {
127*5a645f22SBen Gras 	if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
128*5a645f22SBen Gras 			|| (uint32_t)(rc->low >> 32) != 0) {
129*5a645f22SBen Gras 		do {
130*5a645f22SBen Gras 			if (*out_pos == out_size)
131*5a645f22SBen Gras 				return true;
132*5a645f22SBen Gras 
133*5a645f22SBen Gras 			out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
134*5a645f22SBen Gras 			++*out_pos;
135*5a645f22SBen Gras 			rc->cache = 0xFF;
136*5a645f22SBen Gras 
137*5a645f22SBen Gras 		} while (--rc->cache_size != 0);
138*5a645f22SBen Gras 
139*5a645f22SBen Gras 		rc->cache = (rc->low >> 24) & 0xFF;
140*5a645f22SBen Gras 	}
141*5a645f22SBen Gras 
142*5a645f22SBen Gras 	++rc->cache_size;
143*5a645f22SBen Gras 	rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS;
144*5a645f22SBen Gras 
145*5a645f22SBen Gras 	return false;
146*5a645f22SBen Gras }
147*5a645f22SBen Gras 
148*5a645f22SBen Gras 
149*5a645f22SBen Gras static inline bool
rc_encode(lzma_range_encoder * rc,uint8_t * out,size_t * out_pos,size_t out_size)150*5a645f22SBen Gras rc_encode(lzma_range_encoder *rc,
151*5a645f22SBen Gras 		uint8_t *out, size_t *out_pos, size_t out_size)
152*5a645f22SBen Gras {
153*5a645f22SBen Gras 	assert(rc->count <= RC_SYMBOLS_MAX);
154*5a645f22SBen Gras 
155*5a645f22SBen Gras 	while (rc->pos < rc->count) {
156*5a645f22SBen Gras 		// Normalize
157*5a645f22SBen Gras 		if (rc->range < RC_TOP_VALUE) {
158*5a645f22SBen Gras 			if (rc_shift_low(rc, out, out_pos, out_size))
159*5a645f22SBen Gras 				return true;
160*5a645f22SBen Gras 
161*5a645f22SBen Gras 			rc->range <<= RC_SHIFT_BITS;
162*5a645f22SBen Gras 		}
163*5a645f22SBen Gras 
164*5a645f22SBen Gras 		// Encode a bit
165*5a645f22SBen Gras 		switch (rc->symbols[rc->pos]) {
166*5a645f22SBen Gras 		case RC_BIT_0: {
167*5a645f22SBen Gras 			probability prob = *rc->probs[rc->pos];
168*5a645f22SBen Gras 			rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS)
169*5a645f22SBen Gras 					* prob;
170*5a645f22SBen Gras 			prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS;
171*5a645f22SBen Gras 			*rc->probs[rc->pos] = prob;
172*5a645f22SBen Gras 			break;
173*5a645f22SBen Gras 		}
174*5a645f22SBen Gras 
175*5a645f22SBen Gras 		case RC_BIT_1: {
176*5a645f22SBen Gras 			probability prob = *rc->probs[rc->pos];
177*5a645f22SBen Gras 			const uint32_t bound = prob * (rc->range
178*5a645f22SBen Gras 					>> RC_BIT_MODEL_TOTAL_BITS);
179*5a645f22SBen Gras 			rc->low += bound;
180*5a645f22SBen Gras 			rc->range -= bound;
181*5a645f22SBen Gras 			prob -= prob >> RC_MOVE_BITS;
182*5a645f22SBen Gras 			*rc->probs[rc->pos] = prob;
183*5a645f22SBen Gras 			break;
184*5a645f22SBen Gras 		}
185*5a645f22SBen Gras 
186*5a645f22SBen Gras 		case RC_DIRECT_0:
187*5a645f22SBen Gras 			rc->range >>= 1;
188*5a645f22SBen Gras 			break;
189*5a645f22SBen Gras 
190*5a645f22SBen Gras 		case RC_DIRECT_1:
191*5a645f22SBen Gras 			rc->range >>= 1;
192*5a645f22SBen Gras 			rc->low += rc->range;
193*5a645f22SBen Gras 			break;
194*5a645f22SBen Gras 
195*5a645f22SBen Gras 		case RC_FLUSH:
196*5a645f22SBen Gras 			// Prevent further normalizations.
197*5a645f22SBen Gras 			rc->range = UINT32_MAX;
198*5a645f22SBen Gras 
199*5a645f22SBen Gras 			// Flush the last five bytes (see rc_flush()).
200*5a645f22SBen Gras 			do {
201*5a645f22SBen Gras 				if (rc_shift_low(rc, out, out_pos, out_size))
202*5a645f22SBen Gras 					return true;
203*5a645f22SBen Gras 			} while (++rc->pos < rc->count);
204*5a645f22SBen Gras 
205*5a645f22SBen Gras 			// Reset the range encoder so we are ready to continue
206*5a645f22SBen Gras 			// encoding if we weren't finishing the stream.
207*5a645f22SBen Gras 			rc_reset(rc);
208*5a645f22SBen Gras 			return false;
209*5a645f22SBen Gras 
210*5a645f22SBen Gras 		default:
211*5a645f22SBen Gras 			assert(0);
212*5a645f22SBen Gras 			break;
213*5a645f22SBen Gras 		}
214*5a645f22SBen Gras 
215*5a645f22SBen Gras 		++rc->pos;
216*5a645f22SBen Gras 	}
217*5a645f22SBen Gras 
218*5a645f22SBen Gras 	rc->count = 0;
219*5a645f22SBen Gras 	rc->pos = 0;
220*5a645f22SBen Gras 
221*5a645f22SBen Gras 	return false;
222*5a645f22SBen Gras }
223*5a645f22SBen Gras 
224*5a645f22SBen Gras 
225*5a645f22SBen Gras static inline uint64_t
rc_pending(const lzma_range_encoder * rc)226*5a645f22SBen Gras rc_pending(const lzma_range_encoder *rc)
227*5a645f22SBen Gras {
228*5a645f22SBen Gras 	return rc->cache_size + 5 - 1;
229*5a645f22SBen Gras }
230*5a645f22SBen Gras 
231*5a645f22SBen Gras #endif
232