1*13d37d77SDavid du Colombier /* Clzip - LZMA lossless data compressor
2*13d37d77SDavid du Colombier Copyright (C) 2010-2017 Antonio Diaz Diaz.
3*13d37d77SDavid du Colombier
4*13d37d77SDavid du Colombier This program is free software: you can redistribute it and/or modify
5*13d37d77SDavid du Colombier it under the terms of the GNU General Public License as published by
6*13d37d77SDavid du Colombier the Free Software Foundation, either version 2 of the License, or
7*13d37d77SDavid du Colombier (at your option) any later version.
8*13d37d77SDavid du Colombier
9*13d37d77SDavid du Colombier This program is distributed in the hope that it will be useful,
10*13d37d77SDavid du Colombier but WITHOUT ANY WARRANTY; without even the implied warranty of
11*13d37d77SDavid du Colombier MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12*13d37d77SDavid du Colombier GNU General Public License for more details.
13*13d37d77SDavid du Colombier
14*13d37d77SDavid du Colombier You should have received a copy of the GNU General Public License
15*13d37d77SDavid du Colombier along with this program. If not, see <http://www.gnu.org/licenses/>. */
16*13d37d77SDavid du Colombier
17*13d37d77SDavid du Colombier typedef struct Len_prices Len_prices;
18*13d37d77SDavid du Colombier struct Len_prices {
19*13d37d77SDavid du Colombier struct Len_model *lm;
20*13d37d77SDavid du Colombier int len_syms;
21*13d37d77SDavid du Colombier int count;
22*13d37d77SDavid du Colombier int prices[pos_states][max_len_syms];
23*13d37d77SDavid du Colombier int counters[pos_states]; /* may decrement below 0 */
24*13d37d77SDavid du Colombier };
25*13d37d77SDavid du Colombier
26*13d37d77SDavid du Colombier static void
Lp_update_low_mid_prices(Len_prices * lp,int pos_state)27*13d37d77SDavid du Colombier Lp_update_low_mid_prices(Len_prices *lp, int pos_state)
28*13d37d77SDavid du Colombier {
29*13d37d77SDavid du Colombier int *pps = lp->prices[pos_state];
30*13d37d77SDavid du Colombier int tmp = price0(lp->lm->choice1);
31*13d37d77SDavid du Colombier int len = 0;
32*13d37d77SDavid du Colombier for (; len < len_low_syms && len < lp->len_syms; ++len)
33*13d37d77SDavid du Colombier pps[len] = tmp + price_symbol3(lp->lm->bm_low[pos_state], len);
34*13d37d77SDavid du Colombier if (len >= lp->len_syms)
35*13d37d77SDavid du Colombier return;
36*13d37d77SDavid du Colombier tmp = price1(lp->lm->choice1) + price0(lp->lm->choice2);
37*13d37d77SDavid du Colombier for (; len < len_low_syms + len_mid_syms && len < lp->len_syms; ++len)
38*13d37d77SDavid du Colombier pps[len] = tmp +
39*13d37d77SDavid du Colombier price_symbol3(lp->lm->bm_mid[pos_state], len - len_low_syms);
40*13d37d77SDavid du Colombier }
41*13d37d77SDavid du Colombier
42*13d37d77SDavid du Colombier static void
Lp_update_high_prices(Len_prices * lp)43*13d37d77SDavid du Colombier Lp_update_high_prices(Len_prices *lp)
44*13d37d77SDavid du Colombier {
45*13d37d77SDavid du Colombier int tmp = price1(lp->lm->choice1) + price1(lp->lm->choice2);
46*13d37d77SDavid du Colombier int len;
47*13d37d77SDavid du Colombier for (len = len_low_syms + len_mid_syms; len < lp->len_syms; ++len)
48*13d37d77SDavid du Colombier /* using 4 slots per value makes "Lp_price" faster */
49*13d37d77SDavid du Colombier lp->prices[3][len] = lp->prices[2][len] =
50*13d37d77SDavid du Colombier lp->prices[1][len] = lp->prices[0][len] = tmp +
51*13d37d77SDavid du Colombier price_symbol8(lp->lm->bm_high, len - len_low_syms - len_mid_syms);
52*13d37d77SDavid du Colombier }
53*13d37d77SDavid du Colombier
54*13d37d77SDavid du Colombier static void
Lp_reset(Len_prices * lp)55*13d37d77SDavid du Colombier Lp_reset(Len_prices *lp)
56*13d37d77SDavid du Colombier {
57*13d37d77SDavid du Colombier int i;
58*13d37d77SDavid du Colombier for (i = 0; i < pos_states; ++i)
59*13d37d77SDavid du Colombier lp->counters[i] = 0;
60*13d37d77SDavid du Colombier }
61*13d37d77SDavid du Colombier
62*13d37d77SDavid du Colombier static void
Lp_init(Len_prices * lp,Len_model * lm,int match_len_limit)63*13d37d77SDavid du Colombier Lp_init(Len_prices *lp, Len_model *lm, int match_len_limit)
64*13d37d77SDavid du Colombier {
65*13d37d77SDavid du Colombier lp->lm = lm;
66*13d37d77SDavid du Colombier lp->len_syms = match_len_limit + 1 - min_match_len;
67*13d37d77SDavid du Colombier lp->count = (match_len_limit > 12) ? 1 : lp->len_syms;
68*13d37d77SDavid du Colombier Lp_reset(lp);
69*13d37d77SDavid du Colombier }
70*13d37d77SDavid du Colombier
71*13d37d77SDavid du Colombier static void
Lp_decr_counter(Len_prices * lp,int pos_state)72*13d37d77SDavid du Colombier Lp_decr_counter(Len_prices *lp, int pos_state)
73*13d37d77SDavid du Colombier {
74*13d37d77SDavid du Colombier --lp->counters[pos_state];
75*13d37d77SDavid du Colombier }
76*13d37d77SDavid du Colombier
77*13d37d77SDavid du Colombier static void
Lp_update_prices(Len_prices * lp)78*13d37d77SDavid du Colombier Lp_update_prices(Len_prices *lp)
79*13d37d77SDavid du Colombier {
80*13d37d77SDavid du Colombier int pos_state;
81*13d37d77SDavid du Colombier bool high_pending = false;
82*13d37d77SDavid du Colombier
83*13d37d77SDavid du Colombier for (pos_state = 0; pos_state < pos_states; ++pos_state)
84*13d37d77SDavid du Colombier if (lp->counters[pos_state] <= 0) {
85*13d37d77SDavid du Colombier lp->counters[pos_state] = lp->count;
86*13d37d77SDavid du Colombier Lp_update_low_mid_prices(lp, pos_state);
87*13d37d77SDavid du Colombier high_pending = true;
88*13d37d77SDavid du Colombier }
89*13d37d77SDavid du Colombier if (high_pending && lp->len_syms > len_low_syms + len_mid_syms)
90*13d37d77SDavid du Colombier Lp_update_high_prices(lp);
91*13d37d77SDavid du Colombier }
92*13d37d77SDavid du Colombier
93*13d37d77SDavid du Colombier typedef struct Pair Pair;
94*13d37d77SDavid du Colombier struct Pair { /* distance-length pair */
95*13d37d77SDavid du Colombier int dis;
96*13d37d77SDavid du Colombier int len;
97*13d37d77SDavid du Colombier };
98*13d37d77SDavid du Colombier
99*13d37d77SDavid du Colombier enum {
100*13d37d77SDavid du Colombier infinite_price = 0x0FFFFFFF,
101*13d37d77SDavid du Colombier max_num_trials = 1 << 13,
102*13d37d77SDavid du Colombier single_step_trial = -2,
103*13d37d77SDavid du Colombier dual_step_trial = -1
104*13d37d77SDavid du Colombier };
105*13d37d77SDavid du Colombier
106*13d37d77SDavid du Colombier typedef struct Trial Trial;
107*13d37d77SDavid du Colombier struct Trial {
108*13d37d77SDavid du Colombier State state;
109*13d37d77SDavid du Colombier int price; /* dual use var; cumulative price, match length */
110*13d37d77SDavid du Colombier int dis4; /* -1 for literal, or rep, or match distance + 4 */
111*13d37d77SDavid du Colombier int prev_index; /* index of prev trial in trials[] */
112*13d37d77SDavid du Colombier int prev_index2; /* -2 trial is single step */
113*13d37d77SDavid du Colombier /* -1 literal + rep0 */
114*13d37d77SDavid du Colombier /* >= 0 (rep or match) + literal + rep0 */
115*13d37d77SDavid du Colombier int reps[num_rep_distances];
116*13d37d77SDavid du Colombier };
117*13d37d77SDavid du Colombier
118*13d37d77SDavid du Colombier static void
Tr_update2(Trial * trial,int pr,int p_i)119*13d37d77SDavid du Colombier Tr_update2(Trial *trial, int pr, int p_i)
120*13d37d77SDavid du Colombier {
121*13d37d77SDavid du Colombier if (pr < trial->price) {
122*13d37d77SDavid du Colombier trial->price = pr;
123*13d37d77SDavid du Colombier trial->dis4 = 0;
124*13d37d77SDavid du Colombier trial->prev_index = p_i;
125*13d37d77SDavid du Colombier trial->prev_index2 = dual_step_trial;
126*13d37d77SDavid du Colombier }
127*13d37d77SDavid du Colombier }
128*13d37d77SDavid du Colombier
129*13d37d77SDavid du Colombier static void
Tr_update3(Trial * trial,int pr,int distance4,int p_i,int p_i2)130*13d37d77SDavid du Colombier Tr_update3(Trial *trial, int pr, int distance4, int p_i, int p_i2)
131*13d37d77SDavid du Colombier {
132*13d37d77SDavid du Colombier if (pr < trial->price) {
133*13d37d77SDavid du Colombier trial->price = pr;
134*13d37d77SDavid du Colombier trial->dis4 = distance4;
135*13d37d77SDavid du Colombier trial->prev_index = p_i;
136*13d37d77SDavid du Colombier trial->prev_index2 = p_i2;
137*13d37d77SDavid du Colombier }
138*13d37d77SDavid du Colombier }
139*13d37d77SDavid du Colombier
140*13d37d77SDavid du Colombier typedef struct LZ_encoder LZ_encoder;
141*13d37d77SDavid du Colombier struct LZ_encoder {
142*13d37d77SDavid du Colombier LZ_encoder_base eb;
143*13d37d77SDavid du Colombier int cycles;
144*13d37d77SDavid du Colombier int match_len_limit;
145*13d37d77SDavid du Colombier Len_prices match_len_prices;
146*13d37d77SDavid du Colombier Len_prices rep_len_prices;
147*13d37d77SDavid du Colombier int pending_num_pairs;
148*13d37d77SDavid du Colombier Pair pairs[max_match_len+1];
149*13d37d77SDavid du Colombier Trial trials[max_num_trials];
150*13d37d77SDavid du Colombier
151*13d37d77SDavid du Colombier int dis_slot_prices[len_states][2*max_dict_bits];
152*13d37d77SDavid du Colombier int dis_prices[len_states][modeled_distances];
153*13d37d77SDavid du Colombier int align_prices[dis_align_size];
154*13d37d77SDavid du Colombier int num_dis_slots;
155*13d37d77SDavid du Colombier };
156*13d37d77SDavid du Colombier
157*13d37d77SDavid du Colombier static bool
Mb_dec_pos(struct Matchfinder_base * mb,int ahead)158*13d37d77SDavid du Colombier Mb_dec_pos(struct Matchfinder_base *mb, int ahead)
159*13d37d77SDavid du Colombier {
160*13d37d77SDavid du Colombier if (ahead < 0 || mb->pos < ahead)
161*13d37d77SDavid du Colombier return false;
162*13d37d77SDavid du Colombier mb->pos -= ahead;
163*13d37d77SDavid du Colombier if (mb->cyclic_pos < ahead)
164*13d37d77SDavid du Colombier mb->cyclic_pos += mb->dict_size + 1;
165*13d37d77SDavid du Colombier mb->cyclic_pos -= ahead;
166*13d37d77SDavid du Colombier return true;
167*13d37d77SDavid du Colombier }
168*13d37d77SDavid du Colombier
169*13d37d77SDavid du Colombier int LZe_get_match_pairs(struct LZ_encoder *e, struct Pair *pairs);
170*13d37d77SDavid du Colombier
171*13d37d77SDavid du Colombier /* move-to-front dis in/into reps; do nothing if(dis4 <= 0) */
172*13d37d77SDavid du Colombier static void
mtf_reps(int dis4,int reps[num_rep_distances])173*13d37d77SDavid du Colombier mtf_reps(int dis4, int reps[num_rep_distances])
174*13d37d77SDavid du Colombier {
175*13d37d77SDavid du Colombier if (dis4 >= num_rep_distances) /* match */ {
176*13d37d77SDavid du Colombier reps[3] = reps[2];
177*13d37d77SDavid du Colombier reps[2] = reps[1];
178*13d37d77SDavid du Colombier reps[1] = reps[0];
179*13d37d77SDavid du Colombier reps[0] = dis4 - num_rep_distances;
180*13d37d77SDavid du Colombier } else if (dis4 > 0) /* repeated match */ {
181*13d37d77SDavid du Colombier int distance = reps[dis4];
182*13d37d77SDavid du Colombier int i;
183*13d37d77SDavid du Colombier for (i = dis4; i > 0; --i)
184*13d37d77SDavid du Colombier reps[i] = reps[i-1];
185*13d37d77SDavid du Colombier reps[0] = distance;
186*13d37d77SDavid du Colombier }
187*13d37d77SDavid du Colombier }
188*13d37d77SDavid du Colombier
189*13d37d77SDavid du Colombier static int
LZeb_price_shortrep(struct LZ_encoder_base * eb,State state,int pos_state)190*13d37d77SDavid du Colombier LZeb_price_shortrep(struct LZ_encoder_base *eb, State state, int pos_state)
191*13d37d77SDavid du Colombier {
192*13d37d77SDavid du Colombier return price0(eb->bm_rep0[state]) + price0(eb->bm_len[state][pos_state]);
193*13d37d77SDavid du Colombier }
194*13d37d77SDavid du Colombier
195*13d37d77SDavid du Colombier static int
LZeb_price_rep(struct LZ_encoder_base * eb,int rep,State state,int pos_state)196*13d37d77SDavid du Colombier LZeb_price_rep(struct LZ_encoder_base *eb, int rep, State state, int pos_state)
197*13d37d77SDavid du Colombier {
198*13d37d77SDavid du Colombier int price;
199*13d37d77SDavid du Colombier if (rep == 0)
200*13d37d77SDavid du Colombier return price0(eb->bm_rep0[state]) +
201*13d37d77SDavid du Colombier price1(eb->bm_len[state][pos_state]);
202*13d37d77SDavid du Colombier price = price1(eb->bm_rep0[state]);
203*13d37d77SDavid du Colombier if (rep == 1)
204*13d37d77SDavid du Colombier price += price0(eb->bm_rep1[state]);
205*13d37d77SDavid du Colombier else {
206*13d37d77SDavid du Colombier price += price1(eb->bm_rep1[state]);
207*13d37d77SDavid du Colombier price += price_bit(eb->bm_rep2[state], rep - 2);
208*13d37d77SDavid du Colombier }
209*13d37d77SDavid du Colombier return price;
210*13d37d77SDavid du Colombier }
211*13d37d77SDavid du Colombier
212*13d37d77SDavid du Colombier static int
LZe_price_rep0_len(struct LZ_encoder * e,int len,State state,int pos_state)213*13d37d77SDavid du Colombier LZe_price_rep0_len(struct LZ_encoder *e, int len, State state, int pos_state)
214*13d37d77SDavid du Colombier {
215*13d37d77SDavid du Colombier return LZeb_price_rep(&e->eb, 0, state, pos_state) +
216*13d37d77SDavid du Colombier Lp_price(&e->rep_len_prices, len, pos_state);
217*13d37d77SDavid du Colombier }
218*13d37d77SDavid du Colombier
219*13d37d77SDavid du Colombier static int
LZe_price_pair(struct LZ_encoder * e,int dis,int len,int pos_state)220*13d37d77SDavid du Colombier LZe_price_pair(struct LZ_encoder *e, int dis, int len, int pos_state)
221*13d37d77SDavid du Colombier {
222*13d37d77SDavid du Colombier int price = Lp_price(&e->match_len_prices, len, pos_state);
223*13d37d77SDavid du Colombier int len_state = get_len_state(len);
224*13d37d77SDavid du Colombier if (dis < modeled_distances)
225*13d37d77SDavid du Colombier return price + e->dis_prices[len_state][dis];
226*13d37d77SDavid du Colombier else
227*13d37d77SDavid du Colombier return price + e->dis_slot_prices[len_state][get_slot(dis)] +
228*13d37d77SDavid du Colombier e->align_prices[dis & (dis_align_size - 1)];
229*13d37d77SDavid du Colombier }
230*13d37d77SDavid du Colombier
231*13d37d77SDavid du Colombier static int
LZe_read_match_distances(struct LZ_encoder * e)232*13d37d77SDavid du Colombier LZe_read_match_distances(struct LZ_encoder *e)
233*13d37d77SDavid du Colombier {
234*13d37d77SDavid du Colombier int num_pairs = LZe_get_match_pairs(e, e->pairs);
235*13d37d77SDavid du Colombier if (num_pairs > 0) {
236*13d37d77SDavid du Colombier int len = e->pairs[num_pairs-1].len;
237*13d37d77SDavid du Colombier if (len == e->match_len_limit && len < max_match_len)
238*13d37d77SDavid du Colombier e->pairs[num_pairs-1].len =
239*13d37d77SDavid du Colombier Mb_true_match_len(&e->eb.mb, len, e->pairs[num_pairs-1].dis + 1);
240*13d37d77SDavid du Colombier }
241*13d37d77SDavid du Colombier return num_pairs;
242*13d37d77SDavid du Colombier }
243*13d37d77SDavid du Colombier
244*13d37d77SDavid du Colombier static void
LZe_move_and_update(struct LZ_encoder * e,int n)245*13d37d77SDavid du Colombier LZe_move_and_update(struct LZ_encoder *e, int n)
246*13d37d77SDavid du Colombier {
247*13d37d77SDavid du Colombier while (true) {
248*13d37d77SDavid du Colombier Mb_move_pos(&e->eb.mb);
249*13d37d77SDavid du Colombier if (--n <= 0)
250*13d37d77SDavid du Colombier break;
251*13d37d77SDavid du Colombier LZe_get_match_pairs(e, 0);
252*13d37d77SDavid du Colombier }
253*13d37d77SDavid du Colombier }
254*13d37d77SDavid du Colombier
255*13d37d77SDavid du Colombier static void
LZe_backward(struct LZ_encoder * e,int cur)256*13d37d77SDavid du Colombier LZe_backward(struct LZ_encoder *e, int cur)
257*13d37d77SDavid du Colombier {
258*13d37d77SDavid du Colombier int dis4 = e->trials[cur].dis4;
259*13d37d77SDavid du Colombier while (cur > 0) {
260*13d37d77SDavid du Colombier int prev_index = e->trials[cur].prev_index;
261*13d37d77SDavid du Colombier struct Trial *prev_trial = &e->trials[prev_index];
262*13d37d77SDavid du Colombier
263*13d37d77SDavid du Colombier if (e->trials[cur].prev_index2 != single_step_trial) {
264*13d37d77SDavid du Colombier prev_trial->dis4 = -1; /* literal */
265*13d37d77SDavid du Colombier prev_trial->prev_index = prev_index - 1;
266*13d37d77SDavid du Colombier prev_trial->prev_index2 = single_step_trial;
267*13d37d77SDavid du Colombier if (e->trials[cur].prev_index2 >= 0) {
268*13d37d77SDavid du Colombier struct Trial *prev_trial2 = &e->trials[prev_index-1];
269*13d37d77SDavid du Colombier prev_trial2->dis4 = dis4;
270*13d37d77SDavid du Colombier dis4 = 0; /* rep0 */
271*13d37d77SDavid du Colombier prev_trial2->prev_index = e->trials[cur].prev_index2;
272*13d37d77SDavid du Colombier prev_trial2->prev_index2 = single_step_trial;
273*13d37d77SDavid du Colombier }
274*13d37d77SDavid du Colombier }
275*13d37d77SDavid du Colombier prev_trial->price = cur - prev_index; /* len */
276*13d37d77SDavid du Colombier cur = dis4;
277*13d37d77SDavid du Colombier dis4 = prev_trial->dis4;
278*13d37d77SDavid du Colombier prev_trial->dis4 = cur;
279*13d37d77SDavid du Colombier cur = prev_index;
280*13d37d77SDavid du Colombier }
281*13d37d77SDavid du Colombier }
282*13d37d77SDavid du Colombier
283*13d37d77SDavid du Colombier enum {
284*13d37d77SDavid du Colombier Nprevpos3 = 1 << 16,
285*13d37d77SDavid du Colombier Nprevpos2 = 1 << 10
286*13d37d77SDavid du Colombier };
287*13d37d77SDavid du Colombier
288*13d37d77SDavid du Colombier static bool
LZe_init(struct LZ_encoder * e,int dict_size,int len_limit,int ifd,int outfd)289*13d37d77SDavid du Colombier LZe_init(struct LZ_encoder *e, int dict_size, int len_limit, int ifd, int outfd)
290*13d37d77SDavid du Colombier {
291*13d37d77SDavid du Colombier enum {
292*13d37d77SDavid du Colombier before = max_num_trials,
293*13d37d77SDavid du Colombier /* bytes to keep in buffer after pos */
294*13d37d77SDavid du Colombier after_size = (2 *max_match_len) + 1,
295*13d37d77SDavid du Colombier dict_factor = 2,
296*13d37d77SDavid du Colombier Nprevpos23 = Nprevpos2 + Nprevpos3,
297*13d37d77SDavid du Colombier pos_array_factor = 2
298*13d37d77SDavid du Colombier };
299*13d37d77SDavid du Colombier
300*13d37d77SDavid du Colombier if (!LZeb_init(&e->eb, before, dict_size, after_size, dict_factor,
301*13d37d77SDavid du Colombier Nprevpos23, pos_array_factor, ifd, outfd))
302*13d37d77SDavid du Colombier return false;
303*13d37d77SDavid du Colombier e->cycles = (len_limit < max_match_len) ? 16 + (len_limit / 2) : 256;
304*13d37d77SDavid du Colombier e->match_len_limit = len_limit;
305*13d37d77SDavid du Colombier Lp_init(&e->match_len_prices, &e->eb.match_len_model, e->match_len_limit);
306*13d37d77SDavid du Colombier Lp_init(&e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit);
307*13d37d77SDavid du Colombier e->pending_num_pairs = 0;
308*13d37d77SDavid du Colombier e->num_dis_slots = 2 * real_bits(e->eb.mb.dict_size - 1);
309*13d37d77SDavid du Colombier e->trials[1].prev_index = 0;
310*13d37d77SDavid du Colombier e->trials[1].prev_index2 = single_step_trial;
311*13d37d77SDavid du Colombier return true;
312*13d37d77SDavid du Colombier }
313*13d37d77SDavid du Colombier
314*13d37d77SDavid du Colombier static void
LZe_reset(struct LZ_encoder * e)315*13d37d77SDavid du Colombier LZe_reset(struct LZ_encoder *e)
316*13d37d77SDavid du Colombier {
317*13d37d77SDavid du Colombier LZeb_reset(&e->eb);
318*13d37d77SDavid du Colombier Lp_reset(&e->match_len_prices);
319*13d37d77SDavid du Colombier Lp_reset(&e->rep_len_prices);
320*13d37d77SDavid du Colombier e->pending_num_pairs = 0;
321*13d37d77SDavid du Colombier }
322*13d37d77SDavid du Colombier
323*13d37d77SDavid du Colombier bool LZe_encode_member(struct LZ_encoder *e, uvlong member_size);
324