xref: /plan9-contrib/sys/src/cmd/lzip/encoder.c (revision 13d37d7716a3e781f408392d7869dff5927c6669)
1 /*  Clzip - LZMA lossless data compressor
2     Copyright (C) 2010-2017 Antonio Diaz Diaz.
3 
4     This program is free software: you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation, either version 2 of the License, or
7     (at your option) any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16 
17 #include "lzip.h"
18 #include "encoder_base.h"
19 #include "encoder.h"
20 
21 CRC32 crc32;
22 
23 /*
24  * starting at data[len] and data[len-delta], what's the longest match
25  * up to len_limit?
26  */
27 int
maxmatch(uchar * data,int delta,int len,int len_limit)28 maxmatch(uchar *data, int delta, int len, int len_limit)
29 {
30 	uchar *pdel, *p;
31 
32 	p = &data[len];
33 	pdel = p - delta;
34 	while (*pdel++ == *p++ && len < len_limit)
35 		++len;
36 	return len;
37 }
38 
39 static int
findpairmaxlen(LZ_encoder * e,Pair ** pairsp,int * npairsp,int maxlen,int pos1,int len_limit,int min_pos,uchar * data,int np2,int np3)40 findpairmaxlen(LZ_encoder *e, Pair **pairsp, int *npairsp, int maxlen, int pos1,
41 	int len_limit, int min_pos, uchar *data, int np2, int np3)
42 {
43 	int num_pairs;
44 	Pair *pairs;
45 
46 	pairs = *pairsp;
47 	num_pairs = *npairsp;
48 	if (np2 > min_pos && e->eb.mb.buffer[np2-1] == data[0]) {
49 		pairs[0].dis = e->eb.mb.pos - np2;
50 		pairs[0].len = maxlen = 2;
51 		num_pairs = 1;
52 	}
53 	if (np2 != np3 && np3 > min_pos && e->eb.mb.buffer[np3-1] == data[0]) {
54 		maxlen = 3;
55 		np2 = np3;
56 		pairs[num_pairs].dis = e->eb.mb.pos - np2;
57 		++num_pairs;
58 	}
59 	if (num_pairs > 0) {
60 		maxlen = maxmatch(data, pos1 - np2, maxlen, len_limit);
61 		pairs[num_pairs-1].len = maxlen;
62 		if (maxlen >= len_limit)
63 			*pairsp = nil;	/* done. now just skip */
64 	}
65 	if (maxlen < 3)
66 		maxlen = 3;
67 	*npairsp = num_pairs;
68 	return maxlen;
69 }
70 
71 int
LZe_get_match_pairs(LZ_encoder * e,Pair * pairs)72 LZe_get_match_pairs(LZ_encoder *e, Pair *pairs)
73 {
74 	int len = 0, len0, len1, maxlen, num_pairs, len_limit, avail;
75 	int pos1, min_pos, cyclic_pos, delta, count, key2, key3, key4, newpos1;
76 	int32_t *ptr0, *ptr1, *newptr, *prevpos;
77 	uchar *data;
78 	uchar *p;
79 	unsigned tmp;
80 
81 	len_limit = e->match_len_limit;
82 	avail = Mb_avail_bytes(&e->eb.mb);
83 	if (len_limit > avail) {
84 		len_limit = avail;
85 		if (len_limit < 4)
86 			return 0;
87 	}
88 
89 	data = Mb_ptr_to_current_pos(&e->eb.mb);
90 	tmp = crc32[data[0]] ^ data[1];
91 	key2 = tmp & (Nprevpos2 - 1);
92 	tmp ^= (unsigned)data[2] << 8;
93 	key3 = Nprevpos2 + (tmp & (Nprevpos3 - 1));
94 	key4 = Nprevpos2 + Nprevpos3 +
95 	    ((tmp ^ (crc32[data[3]] << 5)) & e->eb.mb.key4_mask);
96 
97 	min_pos = (e->eb.mb.pos > e->eb.mb.dict_size) ?
98 	    e->eb.mb.pos - e->eb.mb.dict_size : 0;
99 	pos1 = e->eb.mb.pos + 1;
100 	prevpos = e->eb.mb.prev_positions;
101 	maxlen = 0;
102 	num_pairs = 0;
103 	if (pairs)
104 		maxlen = findpairmaxlen(e, &pairs, &num_pairs, maxlen, pos1,
105 			len_limit, min_pos, data, prevpos[key2], prevpos[key3]);
106 	newpos1 = prevpos[key4];
107 	prevpos[key2] = prevpos[key3] = prevpos[key4] = pos1;
108 
109 	cyclic_pos = e->eb.mb.cyclic_pos;
110 	ptr0 = e->eb.mb.pos_array + (cyclic_pos << 1);
111 	ptr1 = ptr0 + 1;
112 	len0 = len1 = 0;
113 	for (count = e->cycles; ;) {
114 		if (newpos1 <= min_pos || --count < 0) {
115 			*ptr0 = *ptr1 = 0;
116 			break;
117 		}
118 
119 		delta = pos1 - newpos1;
120 		newptr = e->eb.mb.pos_array + ((cyclic_pos - delta +
121 		    (cyclic_pos >= delta? 0: e->eb.mb.dict_size + 1)) << 1);
122 		p = &data[len];
123 		if (p[-delta] == *p) {
124 			len = maxmatch(data, delta, len + 1, len_limit);
125 			if (pairs && maxlen < len) {
126 				pairs[num_pairs].dis = delta - 1;
127 				pairs[num_pairs].len = maxlen = len;
128 				++num_pairs;
129 			}
130 			if (len >= len_limit) {
131 				*ptr0 = newptr[0];
132 				*ptr1 = newptr[1];
133 				break;
134 			}
135 			p = &data[len];
136 		}
137 		if (p[-delta] < *p) {
138 			*ptr0 = newpos1;
139 			ptr0 = newptr + 1;
140 			newpos1 = *ptr0;
141 			len0 = len;
142 			if (len1 < len)
143 				len = len1;
144 		} else {
145 			*ptr1 = newpos1;
146 			ptr1 = newptr;
147 			newpos1 = *ptr1;
148 			len1 = len;
149 			if (len0 < len)
150 				len = len0;
151 		}
152 	}
153 	return num_pairs;
154 }
155 
156 static void
LZe_update_distance_prices(LZ_encoder * e)157 LZe_update_distance_prices(LZ_encoder *e)
158 {
159 	int dis, len_state;
160 
161 	for (dis = start_dis_model; dis < modeled_distances; ++dis) {
162 		int dis_slot = dis_slots[dis];
163 		int direct_bits = (dis_slot >> 1) - 1;
164 		int base = (2 | (dis_slot & 1)) << direct_bits;
165 		int price = price_symbol_reversed(e->eb.bm_dis + (base - dis_slot),
166 		    dis - base, direct_bits);
167 
168 		for (len_state = 0; len_state < len_states; ++len_state)
169 			e->dis_prices[len_state][dis] = price;
170 	}
171 
172 	for (len_state = 0; len_state < len_states; ++len_state) {
173 		int *dsp = e->dis_slot_prices[len_state];
174 		int *dp = e->dis_prices[len_state];
175 		Bit_model * bmds = e->eb.bm_dis_slot[len_state];
176 		int slot = 0;
177 
178 		for (; slot < end_dis_model; ++slot)
179 			dsp[slot] = price_symbol6(bmds, slot);
180 		for (; slot < e->num_dis_slots; ++slot)
181 			dsp[slot] = price_symbol6(bmds, slot) +
182 			    ((((slot >> 1) - 1) - dis_align_bits) << price_shift_bits);
183 
184 		for (dis = 0; dis < start_dis_model; ++dis)
185 			dp[dis] = dsp[dis];
186 		for (; dis < modeled_distances; ++dis)
187 			dp[dis] += dsp[dis_slots[dis]];
188 	}
189 }
190 
191 static int
pricestate2(LZ_encoder * e,int price,int * ps2p,State * st2p,int len2)192 pricestate2(LZ_encoder *e, int price, int *ps2p, State *st2p, int len2)
193 {
194 	int pos_state2;
195 	State state2;
196 
197 	state2 = *st2p;
198 	pos_state2 = *ps2p;
199 
200 	pos_state2 = (pos_state2 + 1) & pos_state_mask;
201 	state2 = St_set_char(state2);
202 	price += price1(e->eb.bm_match[state2][pos_state2]) +
203 		price1(e->eb.bm_rep[state2]) +
204 		LZe_price_rep0_len(e, len2, state2, pos_state2);
205 
206 	*ps2p = pos_state2;
207 	*st2p = state2;
208 	return price;
209 }
210 
211 static int
encinit(LZ_encoder * e,int reps[num_rep_distances],int replens[num_rep_distances],State state,int main_len,int num_pairs,int rep_index,int * ntrialsp)212 encinit(LZ_encoder *e, int reps[num_rep_distances],
213 	int replens[num_rep_distances], State state, int main_len,
214 	int num_pairs, int rep_index, int *ntrialsp)
215 {
216 	int i, rep, num_trials, len;
217 	int pos_state = Mb_data_position(&e->eb.mb) & pos_state_mask;
218 	int match_price = price1(e->eb.bm_match[state][pos_state]);
219 	int rep_match_price = match_price + price1(e->eb.bm_rep[state]);
220 	uchar prev_byte = Mb_peek(&e->eb.mb, 1);
221 	uchar cur_byte = Mb_peek(&e->eb.mb, 0);
222 	uchar match_byte = Mb_peek(&e->eb.mb, reps[0] + 1);
223 
224 	e->trials[1].price = price0(e->eb.bm_match[state][pos_state]);
225 	if (St_is_char(state))
226 		e->trials[1].price += LZeb_price_literal(&e->eb,
227 			prev_byte, cur_byte);
228 	else
229 		e->trials[1].price += LZeb_price_matched(&e->eb,
230 			prev_byte, cur_byte, match_byte);
231 	e->trials[1].dis4 = -1;				/* literal */
232 
233 	if (match_byte == cur_byte)
234 		Tr_update(&e->trials[1], rep_match_price +
235 		    LZeb_price_shortrep(&e->eb, state, pos_state), 0, 0);
236 	num_trials = replens[rep_index];
237 	if (num_trials < main_len)
238 		num_trials = main_len;
239 	*ntrialsp = num_trials;
240 	if (num_trials < min_match_len) {
241 		e->trials[0].price = 1;
242 		e->trials[0].dis4 = e->trials[1].dis4;
243 		Mb_move_pos(&e->eb.mb);
244 		return 1;
245 	}
246 
247 	e->trials[0].state = state;
248 	for (i = 0; i < num_rep_distances; ++i)
249 		e->trials[0].reps[i] = reps[i];
250 
251 	for (len = min_match_len; len <= num_trials; ++len)
252 		e->trials[len].price = infinite_price;
253 
254 	for (rep = 0; rep < num_rep_distances; ++rep) {
255 		int price, replen;
256 
257 		if (replens[rep] < min_match_len)
258 			continue;
259 		price = rep_match_price + LZeb_price_rep(&e->eb, rep,
260 			state, pos_state);
261 		replen = replens[rep];
262 		for (len = min_match_len; len <= replen; ++len)
263 			Tr_update(&e->trials[len], price +
264 			    Lp_price(&e->rep_len_prices, len, pos_state), rep, 0);
265 	}
266 
267 	if (main_len > replens[0]) {
268 		int dis, normal_match_price = match_price +
269 			price0(e->eb.bm_rep[state]);
270 		int replp1 = replens[0] + 1;
271 		int i = 0, len = max(replp1, min_match_len);
272 
273 		while (len > e->pairs[i].len)
274 			++i;
275 		for (;;) {
276 			dis = e->pairs[i].dis;
277 			Tr_update(&e->trials[len], normal_match_price +
278 			    LZe_price_pair(e, dis, len, pos_state),
279 			    dis + num_rep_distances, 0);
280 			if (++len > e->pairs[i].len && ++i >= num_pairs)
281 				break;
282 		}
283 	}
284 	return 0;
285 }
286 
287 static void
finalvalues(LZ_encoder * e,int cur,Trial * cur_trial,State * cstatep)288 finalvalues(LZ_encoder *e, int cur, Trial *cur_trial, State *cstatep)
289 {
290 	int i;
291 	int dis4 = cur_trial->dis4;
292 	int prev_index = cur_trial->prev_index;
293 	int prev_index2 = cur_trial->prev_index2;
294 	State cur_state;
295 
296 	if (prev_index2 == single_step_trial) {
297 		cur_state = e->trials[prev_index].state;
298 		if (prev_index + 1 == cur) {	/* len == 1 */
299 			if (dis4 == 0)
300 				cur_state = St_set_short_rep(cur_state);
301 			else
302 				cur_state = St_set_char(cur_state);	/* literal */
303 		} else if (dis4 < num_rep_distances)
304 			cur_state = St_set_rep(cur_state);
305 		else
306 			cur_state = St_set_match(cur_state);
307 	} else {
308 		if (prev_index2 == dual_step_trial)	/* dis4 == 0 (rep0) */
309 			--prev_index;
310 		else	/* prev_index2 >= 0 */
311 			prev_index = prev_index2;
312 		cur_state = 8;		/* St_set_char_rep(); */
313 	}
314 	cur_trial->state = cur_state;
315 	for (i = 0; i < num_rep_distances; ++i)
316 		cur_trial->reps[i] = e->trials[prev_index].reps[i];
317 	mtf_reps(dis4, cur_trial->reps);	/* literal is ignored */
318 	*cstatep = cur_state;
319 }
320 
321 static int
litrep0(LZ_encoder * e,State cur_state,int cur,Trial * cur_trial,int num_trials,int triable_bytes,int pos_state,int next_price)322 litrep0(LZ_encoder *e, State cur_state, int cur, Trial *cur_trial,
323 	int num_trials, int triable_bytes, int pos_state, int next_price)
324 {
325 	int len = 1, endtrials, limit, mlpl1, dis;
326 	uchar *data = Mb_ptr_to_current_pos(&e->eb.mb);
327 
328 	dis = cur_trial->reps[0] + 1;
329 	mlpl1 = e->match_len_limit + 1;
330 	limit = min(mlpl1, triable_bytes);
331 	len = maxmatch(data, dis, len, limit);
332 	if (--len >= min_match_len) {
333 		int pos_state2, price;
334 		State state2;
335 
336 		pos_state2 = (pos_state + 1) & pos_state_mask;
337 		state2 = St_set_char(cur_state);
338 		price = next_price + price1(e->eb.bm_match[state2][pos_state2])+
339 			price1(e->eb.bm_rep[state2]) +
340 			LZe_price_rep0_len(e, len, state2, pos_state2);
341 		endtrials = cur + 1 + len;
342 		while (num_trials < endtrials)
343 			e->trials[++num_trials].price = infinite_price;
344 		Tr_update2(&e->trials[endtrials], price, cur + 1);
345 	}
346 	return num_trials;
347 }
348 
349 static int
repdists(LZ_encoder * e,State cur_state,int cur,Trial * cur_trial,int num_trials,int triable_bytes,int pos_state,int rep_match_price,int len_limit,int * stlenp)350 repdists(LZ_encoder *e, State cur_state, int cur, Trial *cur_trial,
351 	int num_trials, int triable_bytes, int pos_state,
352 	int rep_match_price, int len_limit, int *stlenp)
353 {
354 	int i, rep, len, price, dis, start_len;
355 
356 	start_len = *stlenp;
357 	for (rep = 0; rep < num_rep_distances; ++rep) {
358 		uchar *data = Mb_ptr_to_current_pos(&e->eb.mb);
359 
360 		dis = cur_trial->reps[rep] + 1;
361 		if (data[0-dis] != data[0] || data[1-dis] != data[1])
362 			continue;
363 		len = maxmatch(data, dis, min_match_len, len_limit);
364 		while (num_trials < cur + len)
365 			e->trials[++num_trials].price = infinite_price;
366 		price = rep_match_price + LZeb_price_rep(&e->eb, rep,
367 			cur_state, pos_state);
368 		for (i = min_match_len; i <= len; ++i)
369 			Tr_update(&e->trials[cur+i], price +
370 			    Lp_price(&e->rep_len_prices, i, pos_state), rep, cur);
371 
372 		if (rep == 0)
373 			start_len = len + 1; /* discard shorter matches */
374 
375 		/* try rep + literal + rep0 */
376 		{
377 			int pos_state2, endtrials, limit, mlpl2, len2;
378 			State state2;
379 
380 			len2 = len + 1;
381 			mlpl2 = e->match_len_limit + len2;
382 			limit = min(mlpl2, triable_bytes);
383 			len2 = maxmatch(data, dis, len2, limit);
384 			len2 -= len + 1;
385 			if (len2 < min_match_len)
386 				continue;
387 
388 			pos_state2 = (pos_state + len) & pos_state_mask;
389 			state2 = St_set_rep(cur_state);
390 			price += Lp_price(&e->rep_len_prices, len, pos_state) +
391 			    price0(e->eb.bm_match[state2][pos_state2]) +
392 			    LZeb_price_matched(&e->eb, data[len-1],
393 				data[len], data[len-dis]);
394 			price = pricestate2(e, price, &pos_state2,
395 				&state2, len2);
396 			endtrials = cur + len + 1 + len2;
397 			while (num_trials < endtrials)
398 				e->trials[++num_trials].price = infinite_price;
399 			Tr_update3(&e->trials[endtrials], price, rep,
400 				endtrials - len2, cur);
401 		}
402 	}
403 	*stlenp = start_len;
404 	return num_trials;
405 }
406 
407 static int
trymatches(LZ_encoder * e,State cur_state,int cur,int num_trials,int triable_bytes,int pos_state,int num_pairs,int normal_match_price,int start_len)408 trymatches(LZ_encoder *e, State cur_state, int cur, int num_trials,
409 	int triable_bytes, int pos_state, int num_pairs,
410 	int normal_match_price, int start_len)
411 {
412 	int i, dis, len, price;
413 
414 	i = 0;
415 	while (e->pairs[i].len < start_len)
416 		++i;
417 	dis = e->pairs[i].dis;
418 	for (len = start_len; ; ++len) {
419 		price = normal_match_price + LZe_price_pair(e, dis, len, pos_state);
420 		Tr_update(&e->trials[cur+len], price, dis + num_rep_distances, cur);
421 
422 		/* try match + literal + rep0 */
423 		if (len == e->pairs[i].len) {
424 			uchar *data = Mb_ptr_to_current_pos(&e->eb.mb);
425 			int endtrials, mlpl2, limit;
426 			int dis2 = dis + 1, len2 = len + 1;
427 
428 			mlpl2 = e->match_len_limit + len2;
429 			limit = min(mlpl2, triable_bytes);
430 			len2 = maxmatch(data, dis2, len2, limit);
431 			len2 -= len + 1;
432 			if (len2 >= min_match_len) {
433 				int pos_state2 = (pos_state + len) &pos_state_mask;
434 				State state2 = St_set_match(cur_state);
435 
436 	 			price += price0(e->eb.bm_match[state2][pos_state2]) +
437 				    LZeb_price_matched(&e->eb, data[len-1], data[len], data[len-dis2]);
438 				price = pricestate2(e, price,
439 				    &pos_state2, &state2, len2);
440 				endtrials = cur + len + 1 + len2;
441 				while (num_trials < endtrials)
442 					e->trials[++num_trials].price = infinite_price;
443 				Tr_update3(&e->trials[endtrials],
444 					price, dis +
445 					num_rep_distances,
446 					endtrials - len2, cur);
447 			}
448 			if (++i >= num_pairs)
449 				break;
450 			dis = e->pairs[i].dis;
451 		}
452 	}
453 	return num_trials;
454 }
455 
456 /*
457  * Returns the number of bytes advanced (ahead).
458    trials[0]..trials[ahead-1] contain the steps to encode.
459    (trials[0].dis4 == -1) means literal.
460    A match/rep longer or equal than match_len_limit finishes the sequence.
461  */
462 static int
LZe_sequence_optimizer(LZ_encoder * e,int reps[num_rep_distances],State state)463 LZe_sequence_optimizer(LZ_encoder *e, int reps[num_rep_distances], State state)
464 {
465 	int main_len, num_pairs, i, num_trials;
466 	int rep_index = 0, cur = 0;
467 	int replens[num_rep_distances];
468 
469 	if (e->pending_num_pairs > 0) {		/* from previous call */
470 		num_pairs = e->pending_num_pairs;
471 		e->pending_num_pairs = 0;
472 	} else
473 		num_pairs = LZe_read_match_distances(e);
474 	main_len = (num_pairs > 0) ? e->pairs[num_pairs-1].len : 0;
475 
476 	for (i = 0; i < num_rep_distances; ++i) {
477 		replens[i] = Mb_true_match_len(&e->eb.mb, 0, reps[i] + 1);
478 		if (replens[i] > replens[rep_index])
479 			rep_index = i;
480 	}
481 	if (replens[rep_index] >= e->match_len_limit) {
482 		e->trials[0].price = replens[rep_index];
483 		e->trials[0].dis4 = rep_index;
484 		LZe_move_and_update(e, replens[rep_index]);
485 		return replens[rep_index];
486 	}
487 
488 	if (main_len >= e->match_len_limit) {
489 		e->trials[0].price = main_len;
490 		e->trials[0].dis4 = e->pairs[num_pairs-1].dis + num_rep_distances;
491 		LZe_move_and_update(e, main_len);
492 		return main_len;
493 	}
494 
495 	if (encinit(e, reps, replens, state, main_len, num_pairs, rep_index,
496 	    &num_trials) > 0)
497 		return 1;
498 
499 	/*
500 	 * Optimize price.
501 	 */
502 	for (;;) {
503 		Trial *cur_trial, *next_trial;
504 		int newlen, pos_state, triable_bytes, len_limit;
505 		int next_price, match_price, rep_match_price;
506 		int start_len = min_match_len;
507 		State cur_state;
508 		uchar prev_byte, cur_byte, match_byte;
509 
510 		Mb_move_pos(&e->eb.mb);
511 		if (++cur >= num_trials) {	/* no more initialized trials */
512 			LZe_backward(e, cur);
513 			return cur;
514 		}
515 
516 		num_pairs = LZe_read_match_distances(e);
517 		newlen = num_pairs > 0? e->pairs[num_pairs-1].len: 0;
518 		if (newlen >= e->match_len_limit) {
519 			e->pending_num_pairs = num_pairs;
520 			LZe_backward(e, cur);
521 			return cur;
522 		}
523 
524 		/* give final values to current trial */
525 		cur_trial = &e->trials[cur];
526 		finalvalues(e, cur, cur_trial, &cur_state);
527 
528 		pos_state = Mb_data_position(&e->eb.mb) & pos_state_mask;
529 		prev_byte = Mb_peek(&e->eb.mb, 1);
530 		cur_byte = Mb_peek(&e->eb.mb, 0);
531 		match_byte = Mb_peek(&e->eb.mb, cur_trial->reps[0] + 1);
532 
533 		next_price = cur_trial->price +
534 		    price0(e->eb.bm_match[cur_state][pos_state]);
535 		if (St_is_char(cur_state))
536 			next_price += LZeb_price_literal(&e->eb, prev_byte, cur_byte);
537 		else
538 			next_price += LZeb_price_matched(&e->eb, prev_byte,
539 				cur_byte, match_byte);
540 
541 		/* try last updates to next trial */
542 		next_trial = &e->trials[cur+1];
543 
544 		Tr_update(next_trial, next_price, -1, cur);	/* literal */
545 
546 		match_price = cur_trial->price +
547 			price1(e->eb.bm_match[cur_state][pos_state]);
548 		rep_match_price = match_price + price1(e->eb.bm_rep[cur_state]);
549 
550 		if (match_byte == cur_byte && next_trial->dis4 != 0 &&
551 		    next_trial->prev_index2 == single_step_trial) {
552 			int price = rep_match_price +
553 			    LZeb_price_shortrep(&e->eb, cur_state, pos_state);
554 			if (price <= next_trial->price) {
555 				next_trial->price = price;
556 				next_trial->dis4 = 0;		/* rep0 */
557 				next_trial->prev_index = cur;
558 			}
559 		}
560 
561 		int trm1mcur = max_num_trials - 1 - cur;
562 
563 		triable_bytes = Mb_avail_bytes(&e->eb.mb);
564 		if (triable_bytes > trm1mcur)
565 			triable_bytes = trm1mcur;
566 		if (triable_bytes < min_match_len)
567 			continue;
568 
569 		len_limit = min(e->match_len_limit, triable_bytes);
570 
571 		/* try literal + rep0 */
572 		if (match_byte != cur_byte && next_trial->prev_index != cur)
573 			num_trials = litrep0(e, cur_state, cur, cur_trial,
574 				num_trials, triable_bytes, pos_state, next_price);
575 
576 		/* try rep distances */
577 		num_trials = repdists(e, cur_state, cur, cur_trial,
578 			num_trials, triable_bytes, pos_state,
579 			rep_match_price, len_limit, &start_len);
580 
581 		/* try matches */
582 		if (newlen >= start_len && newlen <= len_limit) {
583 			int normal_match_price = match_price +
584 			    price0(e->eb.bm_rep[cur_state]);
585 
586 			while (num_trials < cur + newlen)
587 				e->trials[++num_trials].price = infinite_price;
588 
589 			num_trials = trymatches(e, cur_state, cur, num_trials,
590 				triable_bytes, pos_state, num_pairs,
591 				normal_match_price, start_len);
592 		}
593 	}
594 }
595 
596 static int
encrepmatch(LZ_encoder * e,State state,int len,int dis,int pos_state)597 encrepmatch(LZ_encoder *e, State state, int len, int dis, int pos_state)
598 {
599 	int bit = (dis == 0);
600 
601 	Re_encode_bit(&e->eb.renc, &e->eb.bm_rep0[state], !bit);
602 	if (bit)
603 		Re_encode_bit(&e->eb.renc, &e->eb.bm_len[state][pos_state],
604 			len > 1);
605 	else {
606 		Re_encode_bit(&e->eb.renc, &e->eb.bm_rep1[state], dis > 1);
607 		if (dis > 1)
608 			Re_encode_bit(&e->eb.renc, &e->eb.bm_rep2[state],
609 				dis > 2);
610 	}
611 	if (len == 1)
612 		state = St_set_short_rep(state);
613 	else {
614 		Re_encode_len(&e->eb.renc, &e->eb.rep_len_model, len, pos_state);
615 		Lp_decr_counter(&e->rep_len_prices, pos_state);
616 		state = St_set_rep(state);
617 	}
618 	return state;
619 }
620 
621 bool
LZe_encode_member(LZ_encoder * e,uvlong member_size)622 LZe_encode_member(LZ_encoder *e, uvlong member_size)
623 {
624 	uvlong member_size_limit = member_size - Ft_size - max_marker_size;
625 	bool best = (e->match_len_limit > 12);
626 	int dis_price_count = best? 1: 512;
627 	int align_price_count = best? 1: dis_align_size;
628 	int price_count = (e->match_len_limit > 36? 1013 : 4093);
629 	int price_counter = 0;		/* counters may decrement below 0 */
630 	int dis_price_counter = 0;
631 	int align_price_counter = 0;
632 	int ahead, i;
633 	int reps[num_rep_distances];
634 	State state = 0;
635 
636 	for (i = 0; i < num_rep_distances; ++i)
637 		reps[i] = 0;
638 
639 	if (Mb_data_position(&e->eb.mb) != 0 ||
640 	    Re_member_position(&e->eb.renc) != Fh_size)
641 		return false;			/* can be called only once */
642 
643 	if (!Mb_data_finished(&e->eb.mb)) {	/* encode first byte */
644 		uchar prev_byte = 0;
645 		uchar cur_byte = Mb_peek(&e->eb.mb, 0);
646 
647 		Re_encode_bit(&e->eb.renc, &e->eb.bm_match[state][0], 0);
648 		LZeb_encode_literal(&e->eb, prev_byte, cur_byte);
649 		CRC32_update_byte(&e->eb.crc, cur_byte);
650 		LZe_get_match_pairs(e, 0);
651 		Mb_move_pos(&e->eb.mb);
652 	}
653 
654 	while (!Mb_data_finished(&e->eb.mb)) {
655 		if (price_counter <= 0 && e->pending_num_pairs == 0) {
656 			/* recalculate prices every these many bytes */
657 			price_counter = price_count;
658 			if (dis_price_counter <= 0) {
659 				dis_price_counter = dis_price_count;
660 				LZe_update_distance_prices(e);
661 			}
662 			if (align_price_counter <= 0) {
663 				align_price_counter = align_price_count;
664 				for (i = 0; i < dis_align_size; ++i)
665 					e->align_prices[i] = price_symbol_reversed(
666 						e->eb.bm_align, i, dis_align_bits);
667 			}
668 			Lp_update_prices(&e->match_len_prices);
669 			Lp_update_prices(&e->rep_len_prices);
670 		}
671 
672 		ahead = LZe_sequence_optimizer(e, reps, state);
673 		price_counter -= ahead;
674 
675 		for (i = 0; ahead > 0;) {
676 			int pos_state = (Mb_data_position(&e->eb.mb) - ahead) &
677 				pos_state_mask;
678 			int len = e->trials[i].price;
679 			int dis = e->trials[i].dis4;
680 			bool bit = (dis < 0);
681 
682 			Re_encode_bit(&e->eb.renc, &e->eb.bm_match[state][pos_state],
683 				!bit);
684 			if (bit) {			/* literal byte */
685 				uchar prev_byte = Mb_peek(&e->eb.mb, ahead+1);
686 				uchar cur_byte = Mb_peek(&e->eb.mb, ahead);
687 
688 				CRC32_update_byte(&e->eb.crc, cur_byte);
689 				if (St_is_char(state))
690 					LZeb_encode_literal(&e->eb, prev_byte,
691 						cur_byte);
692 				else {
693 					uchar match_byte = Mb_peek(&e->eb.mb,
694 						ahead + reps[0] + 1);
695 
696 					LZeb_encode_matched(&e->eb, prev_byte,
697 						cur_byte, match_byte);
698 				}
699 				state = St_set_char(state);
700 			} else {		/* match or repeated match */
701 				CRC32_update_buf(&e->eb.crc,
702 					Mb_ptr_to_current_pos(&e->eb.mb) - ahead,
703 					len);
704 				mtf_reps(dis, reps);
705 				bit = (dis < num_rep_distances);
706 				Re_encode_bit(&e->eb.renc, &e->eb.bm_rep[state],
707 					bit);
708 				if (bit) 		 /* repeated match */
709 					state = encrepmatch(e, state, len, dis,
710 						pos_state);
711 				else {			/* match */
712 					dis -= num_rep_distances;
713 					LZeb_encode_pair(&e->eb, dis, len,
714 						pos_state);
715 					if (dis >= modeled_distances)
716 						--align_price_counter;
717 					--dis_price_counter;
718 					Lp_decr_counter(
719 						&e->match_len_prices, pos_state);
720 					state = St_set_match(state);
721 				}
722 			}
723 			ahead -= len;
724 			i += len;
725 			if (Re_member_position(&e->eb.renc) >= member_size_limit) {
726 				if (!Mb_dec_pos(&e->eb.mb, ahead))
727 					return false;
728 				LZeb_full_flush(&e->eb, state);
729 				return true;
730 			}
731 		}
732 	}
733 	LZeb_full_flush(&e->eb, state);
734 	return true;
735 }
736