xref: /plan9-contrib/sys/src/cmd/lzip/encoder.h (revision 13d37d7716a3e781f408392d7869dff5927c6669)
1 /*  Clzip - LZMA lossless data compressor
2     Copyright (C) 2010-2017 Antonio Diaz Diaz.
3 
4     This program is free software: you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation, either version 2 of the License, or
7     (at your option) any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16 
17 typedef struct Len_prices Len_prices;
18 struct Len_prices {
19 	struct Len_model *lm;
20 	int	len_syms;
21 	int	count;
22 	int	prices[pos_states][max_len_syms];
23 	int	counters[pos_states];		/* may decrement below 0 */
24 };
25 
26 static void
Lp_update_low_mid_prices(Len_prices * lp,int pos_state)27 Lp_update_low_mid_prices(Len_prices *lp, int pos_state)
28 {
29 	int	*pps = lp->prices[pos_state];
30 	int	tmp = price0(lp->lm->choice1);
31 	int	len = 0;
32 	for (; len < len_low_syms && len < lp->len_syms; ++len)
33 		pps[len] = tmp + price_symbol3(lp->lm->bm_low[pos_state], len);
34 	if (len >= lp->len_syms)
35 		return;
36 	tmp = price1(lp->lm->choice1) + price0(lp->lm->choice2);
37 	for (; len < len_low_syms + len_mid_syms && len < lp->len_syms; ++len)
38 		pps[len] = tmp +
39 		    price_symbol3(lp->lm->bm_mid[pos_state], len - len_low_syms);
40 }
41 
42 static void
Lp_update_high_prices(Len_prices * lp)43 Lp_update_high_prices(Len_prices *lp)
44 {
45 	int tmp = price1(lp->lm->choice1) + price1(lp->lm->choice2);
46 	int	len;
47 	for (len = len_low_syms + len_mid_syms; len < lp->len_syms; ++len)
48 		/* using 4 slots per value makes "Lp_price" faster */
49 		lp->prices[3][len] = lp->prices[2][len] =
50 		    lp->prices[1][len] = lp->prices[0][len] = tmp +
51 		    price_symbol8(lp->lm->bm_high, len - len_low_syms - len_mid_syms);
52 }
53 
54 static void
Lp_reset(Len_prices * lp)55 Lp_reset(Len_prices *lp)
56 {
57 	int	i;
58 	for (i = 0; i < pos_states; ++i)
59 		lp->counters[i] = 0;
60 }
61 
62 static void
Lp_init(Len_prices * lp,Len_model * lm,int match_len_limit)63 Lp_init(Len_prices *lp, Len_model *lm, int match_len_limit)
64 {
65 	lp->lm = lm;
66 	lp->len_syms = match_len_limit + 1 - min_match_len;
67 	lp->count = (match_len_limit > 12) ? 1 : lp->len_syms;
68 	Lp_reset(lp);
69 }
70 
71 static void
Lp_decr_counter(Len_prices * lp,int pos_state)72 Lp_decr_counter(Len_prices *lp, int pos_state)
73 {
74 	--lp->counters[pos_state];
75 }
76 
77 static void
Lp_update_prices(Len_prices * lp)78 Lp_update_prices(Len_prices *lp)
79 {
80 	int	pos_state;
81 	bool high_pending = false;
82 
83 	for (pos_state = 0; pos_state < pos_states; ++pos_state)
84 		if (lp->counters[pos_state] <= 0) {
85 			lp->counters[pos_state] = lp->count;
86 			Lp_update_low_mid_prices(lp, pos_state);
87 			high_pending = true;
88 		}
89 	if (high_pending && lp->len_syms > len_low_syms + len_mid_syms)
90 		Lp_update_high_prices(lp);
91 }
92 
93 typedef struct Pair Pair;
94 struct Pair {			/* distance-length pair */
95 	int	dis;
96 	int	len;
97 };
98 
99 enum {
100 	infinite_price = 0x0FFFFFFF,
101 	max_num_trials = 1 << 13,
102 	single_step_trial = -2,
103 	dual_step_trial = -1
104 };
105 
106 typedef struct Trial Trial;
107 struct Trial {
108 	State	state;
109 	int	price;	/* dual use var; cumulative price, match length */
110 	int	dis4;	/* -1 for literal, or rep, or match distance + 4 */
111 	int	prev_index;	/* index of prev trial in trials[] */
112 	int	prev_index2;	/*   -2  trial is single step */
113 	/*   -1  literal + rep0 */
114 	/* >= 0  (rep or match) + literal + rep0 */
115 	int	reps[num_rep_distances];
116 };
117 
118 static void
Tr_update2(Trial * trial,int pr,int p_i)119 Tr_update2(Trial *trial, int pr, int p_i)
120 {
121 	if (pr < trial->price) {
122 		trial->price = pr;
123 		trial->dis4 = 0;
124 		trial->prev_index = p_i;
125 		trial->prev_index2 = dual_step_trial;
126 	}
127 }
128 
129 static void
Tr_update3(Trial * trial,int pr,int distance4,int p_i,int p_i2)130 Tr_update3(Trial *trial, int pr, int distance4, int p_i, int p_i2)
131 {
132 	if (pr < trial->price) {
133 		trial->price = pr;
134 		trial->dis4 = distance4;
135 		trial->prev_index = p_i;
136 		trial->prev_index2 = p_i2;
137 	}
138 }
139 
140 typedef struct LZ_encoder LZ_encoder;
141 struct LZ_encoder {
142 	LZ_encoder_base eb;
143 	int	cycles;
144 	int	match_len_limit;
145 	Len_prices match_len_prices;
146 	Len_prices rep_len_prices;
147 	int	pending_num_pairs;
148 	Pair	pairs[max_match_len+1];
149 	Trial	trials[max_num_trials];
150 
151 	int	dis_slot_prices[len_states][2*max_dict_bits];
152 	int	dis_prices[len_states][modeled_distances];
153 	int	align_prices[dis_align_size];
154 	int	num_dis_slots;
155 };
156 
157 static bool
Mb_dec_pos(struct Matchfinder_base * mb,int ahead)158 Mb_dec_pos(struct Matchfinder_base *mb, int ahead)
159 {
160 	if (ahead < 0 || mb->pos < ahead)
161 		return false;
162 	mb->pos -= ahead;
163 	if (mb->cyclic_pos < ahead)
164 		mb->cyclic_pos += mb->dict_size + 1;
165 	mb->cyclic_pos -= ahead;
166 	return true;
167 }
168 
169 int	LZe_get_match_pairs(struct LZ_encoder *e, struct Pair *pairs);
170 
171 /* move-to-front dis in/into reps; do nothing if(dis4 <= 0) */
172 static void
mtf_reps(int dis4,int reps[num_rep_distances])173 mtf_reps(int dis4, int reps[num_rep_distances])
174 {
175 	if (dis4 >= num_rep_distances)			/* match */ {
176 		reps[3] = reps[2];
177 		reps[2] = reps[1];
178 		reps[1] = reps[0];
179 		reps[0] = dis4 - num_rep_distances;
180 	} else if (dis4 > 0)				/* repeated match */ {
181 		int distance = reps[dis4];
182 		int	i;
183 		for (i = dis4; i > 0; --i)
184 			reps[i] = reps[i-1];
185 		reps[0] = distance;
186 	}
187 }
188 
189 static int
LZeb_price_shortrep(struct LZ_encoder_base * eb,State state,int pos_state)190 LZeb_price_shortrep(struct LZ_encoder_base *eb, State state, int pos_state)
191 {
192 	return price0(eb->bm_rep0[state]) + price0(eb->bm_len[state][pos_state]);
193 }
194 
195 static int
LZeb_price_rep(struct LZ_encoder_base * eb,int rep,State state,int pos_state)196 LZeb_price_rep(struct LZ_encoder_base *eb, int rep, State state, int pos_state)
197 {
198 	int	price;
199 	if (rep == 0)
200 		return price0(eb->bm_rep0[state]) +
201 		    price1(eb->bm_len[state][pos_state]);
202 	price = price1(eb->bm_rep0[state]);
203 	if (rep == 1)
204 		price += price0(eb->bm_rep1[state]);
205 	else {
206 		price += price1(eb->bm_rep1[state]);
207 		price += price_bit(eb->bm_rep2[state], rep - 2);
208 	}
209 	return price;
210 }
211 
212 static int
LZe_price_rep0_len(struct LZ_encoder * e,int len,State state,int pos_state)213 LZe_price_rep0_len(struct LZ_encoder *e, int len, State state, int pos_state)
214 {
215 	return LZeb_price_rep(&e->eb, 0, state, pos_state) +
216 	    Lp_price(&e->rep_len_prices, len, pos_state);
217 }
218 
219 static int
LZe_price_pair(struct LZ_encoder * e,int dis,int len,int pos_state)220 LZe_price_pair(struct LZ_encoder *e, int dis, int len, int pos_state)
221 {
222 	int price = Lp_price(&e->match_len_prices, len, pos_state);
223 	int len_state = get_len_state(len);
224 	if (dis < modeled_distances)
225 		return price + e->dis_prices[len_state][dis];
226 	else
227 		return price + e->dis_slot_prices[len_state][get_slot(dis)] +
228 		    e->align_prices[dis & (dis_align_size - 1)];
229 }
230 
231 static int
LZe_read_match_distances(struct LZ_encoder * e)232 LZe_read_match_distances(struct LZ_encoder *e)
233 {
234 	int num_pairs = LZe_get_match_pairs(e, e->pairs);
235 	if (num_pairs > 0) {
236 		int len = e->pairs[num_pairs-1].len;
237 		if (len == e->match_len_limit && len < max_match_len)
238 			e->pairs[num_pairs-1].len =
239 			    Mb_true_match_len(&e->eb.mb, len, e->pairs[num_pairs-1].dis + 1);
240 	}
241 	return num_pairs;
242 }
243 
244 static void
LZe_move_and_update(struct LZ_encoder * e,int n)245 LZe_move_and_update(struct LZ_encoder *e, int n)
246 {
247 	while (true) {
248 		Mb_move_pos(&e->eb.mb);
249 		if (--n <= 0)
250 			break;
251 		LZe_get_match_pairs(e, 0);
252 	}
253 }
254 
255 static void
LZe_backward(struct LZ_encoder * e,int cur)256 LZe_backward(struct LZ_encoder *e, int cur)
257 {
258 	int	dis4 = e->trials[cur].dis4;
259 	while (cur > 0) {
260 		int prev_index = e->trials[cur].prev_index;
261 		struct Trial *prev_trial = &e->trials[prev_index];
262 
263 		if (e->trials[cur].prev_index2 != single_step_trial) {
264 			prev_trial->dis4 = -1;					/* literal */
265 			prev_trial->prev_index = prev_index - 1;
266 			prev_trial->prev_index2 = single_step_trial;
267 			if (e->trials[cur].prev_index2 >= 0) {
268 				struct Trial *prev_trial2 = &e->trials[prev_index-1];
269 				prev_trial2->dis4 = dis4;
270 				dis4 = 0;			/* rep0 */
271 				prev_trial2->prev_index = e->trials[cur].prev_index2;
272 				prev_trial2->prev_index2 = single_step_trial;
273 			}
274 		}
275 		prev_trial->price = cur - prev_index;			/* len */
276 		cur = dis4;
277 		dis4 = prev_trial->dis4;
278 		prev_trial->dis4 = cur;
279 		cur = prev_index;
280 	}
281 }
282 
283 enum {
284 	Nprevpos3 = 1 << 16,
285 	Nprevpos2 = 1 << 10
286 };
287 
288 static bool
LZe_init(struct LZ_encoder * e,int dict_size,int len_limit,int ifd,int outfd)289 LZe_init(struct LZ_encoder *e, int dict_size, int len_limit, int ifd, int outfd)
290 {
291 	enum {
292 		before = max_num_trials,
293 		/* bytes to keep in buffer after pos */
294 		after_size = (2 *max_match_len) + 1,
295 		dict_factor = 2,
296 		Nprevpos23 = Nprevpos2 + Nprevpos3,
297 		pos_array_factor = 2
298 	};
299 
300 	if (!LZeb_init(&e->eb, before, dict_size, after_size, dict_factor,
301 	    Nprevpos23, pos_array_factor, ifd, outfd))
302 		return false;
303 	e->cycles = (len_limit < max_match_len) ? 16 + (len_limit / 2) : 256;
304 	e->match_len_limit = len_limit;
305 	Lp_init(&e->match_len_prices, &e->eb.match_len_model, e->match_len_limit);
306 	Lp_init(&e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit);
307 	e->pending_num_pairs = 0;
308 	e->num_dis_slots = 2 * real_bits(e->eb.mb.dict_size - 1);
309 	e->trials[1].prev_index = 0;
310 	e->trials[1].prev_index2 = single_step_trial;
311 	return true;
312 }
313 
314 static void
LZe_reset(struct LZ_encoder * e)315 LZe_reset(struct LZ_encoder *e)
316 {
317 	LZeb_reset(&e->eb);
318 	Lp_reset(&e->match_len_prices);
319 	Lp_reset(&e->rep_len_prices);
320 	e->pending_num_pairs = 0;
321 }
322 
323 bool	LZe_encode_member(struct LZ_encoder *e, uvlong member_size);
324