1 /* Clzip - LZMA lossless data compressor
2 Copyright (C) 2010-2017 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 typedef struct Len_prices Len_prices;
18 struct Len_prices {
19 struct Len_model *lm;
20 int len_syms;
21 int count;
22 int prices[pos_states][max_len_syms];
23 int counters[pos_states]; /* may decrement below 0 */
24 };
25
26 static void
Lp_update_low_mid_prices(Len_prices * lp,int pos_state)27 Lp_update_low_mid_prices(Len_prices *lp, int pos_state)
28 {
29 int *pps = lp->prices[pos_state];
30 int tmp = price0(lp->lm->choice1);
31 int len = 0;
32 for (; len < len_low_syms && len < lp->len_syms; ++len)
33 pps[len] = tmp + price_symbol3(lp->lm->bm_low[pos_state], len);
34 if (len >= lp->len_syms)
35 return;
36 tmp = price1(lp->lm->choice1) + price0(lp->lm->choice2);
37 for (; len < len_low_syms + len_mid_syms && len < lp->len_syms; ++len)
38 pps[len] = tmp +
39 price_symbol3(lp->lm->bm_mid[pos_state], len - len_low_syms);
40 }
41
42 static void
Lp_update_high_prices(Len_prices * lp)43 Lp_update_high_prices(Len_prices *lp)
44 {
45 int tmp = price1(lp->lm->choice1) + price1(lp->lm->choice2);
46 int len;
47 for (len = len_low_syms + len_mid_syms; len < lp->len_syms; ++len)
48 /* using 4 slots per value makes "Lp_price" faster */
49 lp->prices[3][len] = lp->prices[2][len] =
50 lp->prices[1][len] = lp->prices[0][len] = tmp +
51 price_symbol8(lp->lm->bm_high, len - len_low_syms - len_mid_syms);
52 }
53
54 static void
Lp_reset(Len_prices * lp)55 Lp_reset(Len_prices *lp)
56 {
57 int i;
58 for (i = 0; i < pos_states; ++i)
59 lp->counters[i] = 0;
60 }
61
62 static void
Lp_init(Len_prices * lp,Len_model * lm,int match_len_limit)63 Lp_init(Len_prices *lp, Len_model *lm, int match_len_limit)
64 {
65 lp->lm = lm;
66 lp->len_syms = match_len_limit + 1 - min_match_len;
67 lp->count = (match_len_limit > 12) ? 1 : lp->len_syms;
68 Lp_reset(lp);
69 }
70
71 static void
Lp_decr_counter(Len_prices * lp,int pos_state)72 Lp_decr_counter(Len_prices *lp, int pos_state)
73 {
74 --lp->counters[pos_state];
75 }
76
77 static void
Lp_update_prices(Len_prices * lp)78 Lp_update_prices(Len_prices *lp)
79 {
80 int pos_state;
81 bool high_pending = false;
82
83 for (pos_state = 0; pos_state < pos_states; ++pos_state)
84 if (lp->counters[pos_state] <= 0) {
85 lp->counters[pos_state] = lp->count;
86 Lp_update_low_mid_prices(lp, pos_state);
87 high_pending = true;
88 }
89 if (high_pending && lp->len_syms > len_low_syms + len_mid_syms)
90 Lp_update_high_prices(lp);
91 }
92
93 typedef struct Pair Pair;
94 struct Pair { /* distance-length pair */
95 int dis;
96 int len;
97 };
98
99 enum {
100 infinite_price = 0x0FFFFFFF,
101 max_num_trials = 1 << 13,
102 single_step_trial = -2,
103 dual_step_trial = -1
104 };
105
106 typedef struct Trial Trial;
107 struct Trial {
108 State state;
109 int price; /* dual use var; cumulative price, match length */
110 int dis4; /* -1 for literal, or rep, or match distance + 4 */
111 int prev_index; /* index of prev trial in trials[] */
112 int prev_index2; /* -2 trial is single step */
113 /* -1 literal + rep0 */
114 /* >= 0 (rep or match) + literal + rep0 */
115 int reps[num_rep_distances];
116 };
117
118 static void
Tr_update2(Trial * trial,int pr,int p_i)119 Tr_update2(Trial *trial, int pr, int p_i)
120 {
121 if (pr < trial->price) {
122 trial->price = pr;
123 trial->dis4 = 0;
124 trial->prev_index = p_i;
125 trial->prev_index2 = dual_step_trial;
126 }
127 }
128
129 static void
Tr_update3(Trial * trial,int pr,int distance4,int p_i,int p_i2)130 Tr_update3(Trial *trial, int pr, int distance4, int p_i, int p_i2)
131 {
132 if (pr < trial->price) {
133 trial->price = pr;
134 trial->dis4 = distance4;
135 trial->prev_index = p_i;
136 trial->prev_index2 = p_i2;
137 }
138 }
139
140 typedef struct LZ_encoder LZ_encoder;
141 struct LZ_encoder {
142 LZ_encoder_base eb;
143 int cycles;
144 int match_len_limit;
145 Len_prices match_len_prices;
146 Len_prices rep_len_prices;
147 int pending_num_pairs;
148 Pair pairs[max_match_len+1];
149 Trial trials[max_num_trials];
150
151 int dis_slot_prices[len_states][2*max_dict_bits];
152 int dis_prices[len_states][modeled_distances];
153 int align_prices[dis_align_size];
154 int num_dis_slots;
155 };
156
157 static bool
Mb_dec_pos(struct Matchfinder_base * mb,int ahead)158 Mb_dec_pos(struct Matchfinder_base *mb, int ahead)
159 {
160 if (ahead < 0 || mb->pos < ahead)
161 return false;
162 mb->pos -= ahead;
163 if (mb->cyclic_pos < ahead)
164 mb->cyclic_pos += mb->dict_size + 1;
165 mb->cyclic_pos -= ahead;
166 return true;
167 }
168
169 int LZe_get_match_pairs(struct LZ_encoder *e, struct Pair *pairs);
170
171 /* move-to-front dis in/into reps; do nothing if(dis4 <= 0) */
172 static void
mtf_reps(int dis4,int reps[num_rep_distances])173 mtf_reps(int dis4, int reps[num_rep_distances])
174 {
175 if (dis4 >= num_rep_distances) /* match */ {
176 reps[3] = reps[2];
177 reps[2] = reps[1];
178 reps[1] = reps[0];
179 reps[0] = dis4 - num_rep_distances;
180 } else if (dis4 > 0) /* repeated match */ {
181 int distance = reps[dis4];
182 int i;
183 for (i = dis4; i > 0; --i)
184 reps[i] = reps[i-1];
185 reps[0] = distance;
186 }
187 }
188
189 static int
LZeb_price_shortrep(struct LZ_encoder_base * eb,State state,int pos_state)190 LZeb_price_shortrep(struct LZ_encoder_base *eb, State state, int pos_state)
191 {
192 return price0(eb->bm_rep0[state]) + price0(eb->bm_len[state][pos_state]);
193 }
194
195 static int
LZeb_price_rep(struct LZ_encoder_base * eb,int rep,State state,int pos_state)196 LZeb_price_rep(struct LZ_encoder_base *eb, int rep, State state, int pos_state)
197 {
198 int price;
199 if (rep == 0)
200 return price0(eb->bm_rep0[state]) +
201 price1(eb->bm_len[state][pos_state]);
202 price = price1(eb->bm_rep0[state]);
203 if (rep == 1)
204 price += price0(eb->bm_rep1[state]);
205 else {
206 price += price1(eb->bm_rep1[state]);
207 price += price_bit(eb->bm_rep2[state], rep - 2);
208 }
209 return price;
210 }
211
212 static int
LZe_price_rep0_len(struct LZ_encoder * e,int len,State state,int pos_state)213 LZe_price_rep0_len(struct LZ_encoder *e, int len, State state, int pos_state)
214 {
215 return LZeb_price_rep(&e->eb, 0, state, pos_state) +
216 Lp_price(&e->rep_len_prices, len, pos_state);
217 }
218
219 static int
LZe_price_pair(struct LZ_encoder * e,int dis,int len,int pos_state)220 LZe_price_pair(struct LZ_encoder *e, int dis, int len, int pos_state)
221 {
222 int price = Lp_price(&e->match_len_prices, len, pos_state);
223 int len_state = get_len_state(len);
224 if (dis < modeled_distances)
225 return price + e->dis_prices[len_state][dis];
226 else
227 return price + e->dis_slot_prices[len_state][get_slot(dis)] +
228 e->align_prices[dis & (dis_align_size - 1)];
229 }
230
231 static int
LZe_read_match_distances(struct LZ_encoder * e)232 LZe_read_match_distances(struct LZ_encoder *e)
233 {
234 int num_pairs = LZe_get_match_pairs(e, e->pairs);
235 if (num_pairs > 0) {
236 int len = e->pairs[num_pairs-1].len;
237 if (len == e->match_len_limit && len < max_match_len)
238 e->pairs[num_pairs-1].len =
239 Mb_true_match_len(&e->eb.mb, len, e->pairs[num_pairs-1].dis + 1);
240 }
241 return num_pairs;
242 }
243
244 static void
LZe_move_and_update(struct LZ_encoder * e,int n)245 LZe_move_and_update(struct LZ_encoder *e, int n)
246 {
247 while (true) {
248 Mb_move_pos(&e->eb.mb);
249 if (--n <= 0)
250 break;
251 LZe_get_match_pairs(e, 0);
252 }
253 }
254
255 static void
LZe_backward(struct LZ_encoder * e,int cur)256 LZe_backward(struct LZ_encoder *e, int cur)
257 {
258 int dis4 = e->trials[cur].dis4;
259 while (cur > 0) {
260 int prev_index = e->trials[cur].prev_index;
261 struct Trial *prev_trial = &e->trials[prev_index];
262
263 if (e->trials[cur].prev_index2 != single_step_trial) {
264 prev_trial->dis4 = -1; /* literal */
265 prev_trial->prev_index = prev_index - 1;
266 prev_trial->prev_index2 = single_step_trial;
267 if (e->trials[cur].prev_index2 >= 0) {
268 struct Trial *prev_trial2 = &e->trials[prev_index-1];
269 prev_trial2->dis4 = dis4;
270 dis4 = 0; /* rep0 */
271 prev_trial2->prev_index = e->trials[cur].prev_index2;
272 prev_trial2->prev_index2 = single_step_trial;
273 }
274 }
275 prev_trial->price = cur - prev_index; /* len */
276 cur = dis4;
277 dis4 = prev_trial->dis4;
278 prev_trial->dis4 = cur;
279 cur = prev_index;
280 }
281 }
282
283 enum {
284 Nprevpos3 = 1 << 16,
285 Nprevpos2 = 1 << 10
286 };
287
288 static bool
LZe_init(struct LZ_encoder * e,int dict_size,int len_limit,int ifd,int outfd)289 LZe_init(struct LZ_encoder *e, int dict_size, int len_limit, int ifd, int outfd)
290 {
291 enum {
292 before = max_num_trials,
293 /* bytes to keep in buffer after pos */
294 after_size = (2 *max_match_len) + 1,
295 dict_factor = 2,
296 Nprevpos23 = Nprevpos2 + Nprevpos3,
297 pos_array_factor = 2
298 };
299
300 if (!LZeb_init(&e->eb, before, dict_size, after_size, dict_factor,
301 Nprevpos23, pos_array_factor, ifd, outfd))
302 return false;
303 e->cycles = (len_limit < max_match_len) ? 16 + (len_limit / 2) : 256;
304 e->match_len_limit = len_limit;
305 Lp_init(&e->match_len_prices, &e->eb.match_len_model, e->match_len_limit);
306 Lp_init(&e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit);
307 e->pending_num_pairs = 0;
308 e->num_dis_slots = 2 * real_bits(e->eb.mb.dict_size - 1);
309 e->trials[1].prev_index = 0;
310 e->trials[1].prev_index2 = single_step_trial;
311 return true;
312 }
313
314 static void
LZe_reset(struct LZ_encoder * e)315 LZe_reset(struct LZ_encoder *e)
316 {
317 LZeb_reset(&e->eb);
318 Lp_reset(&e->match_len_prices);
319 Lp_reset(&e->rep_len_prices);
320 e->pending_num_pairs = 0;
321 }
322
323 bool LZe_encode_member(struct LZ_encoder *e, uvlong member_size);
324