1*13d37d77SDavid du Colombier /* Clzip - LZMA lossless data compressor
2*13d37d77SDavid du Colombier Copyright (C) 2010-2017 Antonio Diaz Diaz.
3*13d37d77SDavid du Colombier
4*13d37d77SDavid du Colombier This program is free software: you can redistribute it and/or modify
5*13d37d77SDavid du Colombier it under the terms of the GNU General Public License as published by
6*13d37d77SDavid du Colombier the Free Software Foundation, either version 2 of the License, or
7*13d37d77SDavid du Colombier (at your option) any later version.
8*13d37d77SDavid du Colombier
9*13d37d77SDavid du Colombier This program is distributed in the hope that it will be useful,
10*13d37d77SDavid du Colombier but WITHOUT ANY WARRANTY; without even the implied warranty of
11*13d37d77SDavid du Colombier MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12*13d37d77SDavid du Colombier GNU General Public License for more details.
13*13d37d77SDavid du Colombier
14*13d37d77SDavid du Colombier You should have received a copy of the GNU General Public License
15*13d37d77SDavid du Colombier along with this program. If not, see <http://www.gnu.org/licenses/>. */
16*13d37d77SDavid du Colombier
17*13d37d77SDavid du Colombier #include "lzip.h"
18*13d37d77SDavid du Colombier #include "encoder_base.h"
19*13d37d77SDavid du Colombier #include "fast_encoder.h"
20*13d37d77SDavid du Colombier
21*13d37d77SDavid du Colombier int
FLZe_longest_match_len(FLZ_encoder * fe,int * distance)22*13d37d77SDavid du Colombier FLZe_longest_match_len(FLZ_encoder *fe, int *distance)
23*13d37d77SDavid du Colombier {
24*13d37d77SDavid du Colombier enum { len_limit = 16 };
25*13d37d77SDavid du Colombier uchar *data = Mb_ptr_to_current_pos(&fe->eb.mb);
26*13d37d77SDavid du Colombier int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos;
27*13d37d77SDavid du Colombier int pos1 = fe->eb.mb.pos + 1;
28*13d37d77SDavid du Colombier int maxlen = 0, newpos1, count;
29*13d37d77SDavid du Colombier int available = min(Mb_avail_bytes(&fe->eb.mb), max_match_len);
30*13d37d77SDavid du Colombier
31*13d37d77SDavid du Colombier if (available < len_limit)
32*13d37d77SDavid du Colombier return 0;
33*13d37d77SDavid du Colombier
34*13d37d77SDavid du Colombier fe->key4 = ((fe->key4 << 4) ^ data[3]) & fe->eb.mb.key4_mask;
35*13d37d77SDavid du Colombier newpos1 = fe->eb.mb.prev_positions[fe->key4];
36*13d37d77SDavid du Colombier fe->eb.mb.prev_positions[fe->key4] = pos1;
37*13d37d77SDavid du Colombier
38*13d37d77SDavid du Colombier for (count = 4; ;) {
39*13d37d77SDavid du Colombier int32_t * newptr;
40*13d37d77SDavid du Colombier int delta;
41*13d37d77SDavid du Colombier
42*13d37d77SDavid du Colombier if (newpos1 <= 0 || --count < 0 ||
43*13d37d77SDavid du Colombier (delta = pos1 - newpos1) > fe->eb.mb.dict_size) {
44*13d37d77SDavid du Colombier *ptr0 = 0;
45*13d37d77SDavid du Colombier break;
46*13d37d77SDavid du Colombier }
47*13d37d77SDavid du Colombier newptr = fe->eb.mb.pos_array +
48*13d37d77SDavid du Colombier (fe->eb.mb.cyclic_pos - delta +
49*13d37d77SDavid du Colombier ((fe->eb.mb.cyclic_pos >= delta) ? 0 : fe->eb.mb.dict_size + 1));
50*13d37d77SDavid du Colombier
51*13d37d77SDavid du Colombier if (data[maxlen-delta] == data[maxlen]) {
52*13d37d77SDavid du Colombier int len = 0;
53*13d37d77SDavid du Colombier while (len < available && data[len-delta] == data[len])
54*13d37d77SDavid du Colombier ++len;
55*13d37d77SDavid du Colombier if (maxlen < len) {
56*13d37d77SDavid du Colombier maxlen = len;
57*13d37d77SDavid du Colombier *distance = delta - 1;
58*13d37d77SDavid du Colombier if (maxlen >= len_limit) {
59*13d37d77SDavid du Colombier *ptr0 = *newptr;
60*13d37d77SDavid du Colombier break;
61*13d37d77SDavid du Colombier }
62*13d37d77SDavid du Colombier }
63*13d37d77SDavid du Colombier }
64*13d37d77SDavid du Colombier
65*13d37d77SDavid du Colombier *ptr0 = newpos1;
66*13d37d77SDavid du Colombier ptr0 = newptr;
67*13d37d77SDavid du Colombier newpos1 = *ptr0;
68*13d37d77SDavid du Colombier }
69*13d37d77SDavid du Colombier return maxlen;
70*13d37d77SDavid du Colombier }
71*13d37d77SDavid du Colombier
72*13d37d77SDavid du Colombier bool
FLZe_encode_member(FLZ_encoder * fe,uvlong member_size)73*13d37d77SDavid du Colombier FLZe_encode_member(FLZ_encoder *fe, uvlong member_size)
74*13d37d77SDavid du Colombier {
75*13d37d77SDavid du Colombier uvlong member_size_limit = member_size - Ft_size - max_marker_size;
76*13d37d77SDavid du Colombier int rep = 0, i;
77*13d37d77SDavid du Colombier int reps[num_rep_distances];
78*13d37d77SDavid du Colombier State state = 0;
79*13d37d77SDavid du Colombier
80*13d37d77SDavid du Colombier for (i = 0; i < num_rep_distances; ++i)
81*13d37d77SDavid du Colombier reps[i] = 0;
82*13d37d77SDavid du Colombier
83*13d37d77SDavid du Colombier if (Mb_data_position(&fe->eb.mb) != 0 ||
84*13d37d77SDavid du Colombier Re_member_position(&fe->eb.renc) != Fh_size)
85*13d37d77SDavid du Colombier return false; /* can be called only once */
86*13d37d77SDavid du Colombier
87*13d37d77SDavid du Colombier if (!Mb_data_finished(&fe->eb.mb)) /* encode first byte */ {
88*13d37d77SDavid du Colombier uchar prev_byte = 0;
89*13d37d77SDavid du Colombier uchar cur_byte = Mb_peek(&fe->eb.mb, 0);
90*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][0], 0);
91*13d37d77SDavid du Colombier LZeb_encode_literal(&fe->eb, prev_byte, cur_byte);
92*13d37d77SDavid du Colombier CRC32_update_byte(&fe->eb.crc, cur_byte);
93*13d37d77SDavid du Colombier FLZe_reset_key4(fe);
94*13d37d77SDavid du Colombier FLZe_update_and_move(fe, 1);
95*13d37d77SDavid du Colombier }
96*13d37d77SDavid du Colombier
97*13d37d77SDavid du Colombier while (!Mb_data_finished(&fe->eb.mb) &&
98*13d37d77SDavid du Colombier Re_member_position(&fe->eb.renc) < member_size_limit) {
99*13d37d77SDavid du Colombier int match_distance;
100*13d37d77SDavid du Colombier int main_len = FLZe_longest_match_len(fe, &match_distance);
101*13d37d77SDavid du Colombier int pos_state = Mb_data_position(&fe->eb.mb) & pos_state_mask;
102*13d37d77SDavid du Colombier int len = 0;
103*13d37d77SDavid du Colombier
104*13d37d77SDavid du Colombier for (i = 0; i < num_rep_distances; ++i) {
105*13d37d77SDavid du Colombier int tlen = Mb_true_match_len(&fe->eb.mb, 0, reps[i] + 1);
106*13d37d77SDavid du Colombier if (tlen > len) {
107*13d37d77SDavid du Colombier len = tlen;
108*13d37d77SDavid du Colombier rep = i;
109*13d37d77SDavid du Colombier }
110*13d37d77SDavid du Colombier }
111*13d37d77SDavid du Colombier if (len > min_match_len && len + 3 > main_len) {
112*13d37d77SDavid du Colombier CRC32_update_buf(&fe->eb.crc, Mb_ptr_to_current_pos(&fe->eb.mb), len);
113*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1);
114*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep[state], 1);
115*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep0[state], rep != 0);
116*13d37d77SDavid du Colombier if (rep == 0)
117*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_len[state][pos_state], 1);
118*13d37d77SDavid du Colombier else {
119*13d37d77SDavid du Colombier int distance;
120*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep1[state], rep > 1);
121*13d37d77SDavid du Colombier if (rep > 1)
122*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep2[state], rep > 2);
123*13d37d77SDavid du Colombier distance = reps[rep];
124*13d37d77SDavid du Colombier for (i = rep; i > 0; --i)
125*13d37d77SDavid du Colombier reps[i] = reps[i-1];
126*13d37d77SDavid du Colombier reps[0] = distance;
127*13d37d77SDavid du Colombier }
128*13d37d77SDavid du Colombier state = St_set_rep(state);
129*13d37d77SDavid du Colombier Re_encode_len(&fe->eb.renc, &fe->eb.rep_len_model, len, pos_state);
130*13d37d77SDavid du Colombier Mb_move_pos(&fe->eb.mb);
131*13d37d77SDavid du Colombier FLZe_update_and_move(fe, len - 1);
132*13d37d77SDavid du Colombier continue;
133*13d37d77SDavid du Colombier }
134*13d37d77SDavid du Colombier
135*13d37d77SDavid du Colombier if (main_len > min_match_len) {
136*13d37d77SDavid du Colombier CRC32_update_buf(&fe->eb.crc, Mb_ptr_to_current_pos(&fe->eb.mb), main_len);
137*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1);
138*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep[state], 0);
139*13d37d77SDavid du Colombier state = St_set_match(state);
140*13d37d77SDavid du Colombier for (i = num_rep_distances - 1; i > 0; --i)
141*13d37d77SDavid du Colombier reps[i] = reps[i-1];
142*13d37d77SDavid du Colombier reps[0] = match_distance;
143*13d37d77SDavid du Colombier LZeb_encode_pair(&fe->eb, match_distance, main_len, pos_state);
144*13d37d77SDavid du Colombier Mb_move_pos(&fe->eb.mb);
145*13d37d77SDavid du Colombier FLZe_update_and_move(fe, main_len - 1);
146*13d37d77SDavid du Colombier continue;
147*13d37d77SDavid du Colombier }
148*13d37d77SDavid du Colombier
149*13d37d77SDavid du Colombier {
150*13d37d77SDavid du Colombier uchar prev_byte = Mb_peek(&fe->eb.mb, 1);
151*13d37d77SDavid du Colombier uchar cur_byte = Mb_peek(&fe->eb.mb, 0);
152*13d37d77SDavid du Colombier uchar match_byte = Mb_peek(&fe->eb.mb, reps[0] + 1);
153*13d37d77SDavid du Colombier Mb_move_pos(&fe->eb.mb);
154*13d37d77SDavid du Colombier CRC32_update_byte(&fe->eb.crc, cur_byte);
155*13d37d77SDavid du Colombier
156*13d37d77SDavid du Colombier if (match_byte == cur_byte) {
157*13d37d77SDavid du Colombier int short_rep_price = price1(fe->eb.bm_match[state][pos_state]) +
158*13d37d77SDavid du Colombier price1(fe->eb.bm_rep[state]) +
159*13d37d77SDavid du Colombier price0(fe->eb.bm_rep0[state]) +
160*13d37d77SDavid du Colombier price0(fe->eb.bm_len[state][pos_state]);
161*13d37d77SDavid du Colombier int price = price0(fe->eb.bm_match[state][pos_state]);
162*13d37d77SDavid du Colombier if (St_is_char(state))
163*13d37d77SDavid du Colombier price += LZeb_price_literal(&fe->eb, prev_byte, cur_byte);
164*13d37d77SDavid du Colombier else
165*13d37d77SDavid du Colombier price += LZeb_price_matched(&fe->eb, prev_byte, cur_byte, match_byte);
166*13d37d77SDavid du Colombier if (short_rep_price < price) {
167*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1);
168*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep[state], 1);
169*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep0[state], 0);
170*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_len[state][pos_state], 0);
171*13d37d77SDavid du Colombier state = St_set_short_rep(state);
172*13d37d77SDavid du Colombier continue;
173*13d37d77SDavid du Colombier }
174*13d37d77SDavid du Colombier }
175*13d37d77SDavid du Colombier
176*13d37d77SDavid du Colombier /* literal byte */
177*13d37d77SDavid du Colombier Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 0);
178*13d37d77SDavid du Colombier if (St_is_char(state))
179*13d37d77SDavid du Colombier LZeb_encode_literal(&fe->eb, prev_byte, cur_byte);
180*13d37d77SDavid du Colombier else
181*13d37d77SDavid du Colombier LZeb_encode_matched(&fe->eb, prev_byte, cur_byte, match_byte);
182*13d37d77SDavid du Colombier state = St_set_char(state);
183*13d37d77SDavid du Colombier }
184*13d37d77SDavid du Colombier }
185*13d37d77SDavid du Colombier
186*13d37d77SDavid du Colombier LZeb_full_flush(&fe->eb, state);
187*13d37d77SDavid du Colombier return true;
188*13d37d77SDavid du Colombier }
189