1 /* Clzip - LZMA lossless data compressor
2 Copyright (C) 2010-2017 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include "lzip.h"
18 #include "encoder_base.h"
19 #include "fast_encoder.h"
20
21 int
FLZe_longest_match_len(FLZ_encoder * fe,int * distance)22 FLZe_longest_match_len(FLZ_encoder *fe, int *distance)
23 {
24 enum { len_limit = 16 };
25 uchar *data = Mb_ptr_to_current_pos(&fe->eb.mb);
26 int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos;
27 int pos1 = fe->eb.mb.pos + 1;
28 int maxlen = 0, newpos1, count;
29 int available = min(Mb_avail_bytes(&fe->eb.mb), max_match_len);
30
31 if (available < len_limit)
32 return 0;
33
34 fe->key4 = ((fe->key4 << 4) ^ data[3]) & fe->eb.mb.key4_mask;
35 newpos1 = fe->eb.mb.prev_positions[fe->key4];
36 fe->eb.mb.prev_positions[fe->key4] = pos1;
37
38 for (count = 4; ;) {
39 int32_t * newptr;
40 int delta;
41
42 if (newpos1 <= 0 || --count < 0 ||
43 (delta = pos1 - newpos1) > fe->eb.mb.dict_size) {
44 *ptr0 = 0;
45 break;
46 }
47 newptr = fe->eb.mb.pos_array +
48 (fe->eb.mb.cyclic_pos - delta +
49 ((fe->eb.mb.cyclic_pos >= delta) ? 0 : fe->eb.mb.dict_size + 1));
50
51 if (data[maxlen-delta] == data[maxlen]) {
52 int len = 0;
53 while (len < available && data[len-delta] == data[len])
54 ++len;
55 if (maxlen < len) {
56 maxlen = len;
57 *distance = delta - 1;
58 if (maxlen >= len_limit) {
59 *ptr0 = *newptr;
60 break;
61 }
62 }
63 }
64
65 *ptr0 = newpos1;
66 ptr0 = newptr;
67 newpos1 = *ptr0;
68 }
69 return maxlen;
70 }
71
72 bool
FLZe_encode_member(FLZ_encoder * fe,uvlong member_size)73 FLZe_encode_member(FLZ_encoder *fe, uvlong member_size)
74 {
75 uvlong member_size_limit = member_size - Ft_size - max_marker_size;
76 int rep = 0, i;
77 int reps[num_rep_distances];
78 State state = 0;
79
80 for (i = 0; i < num_rep_distances; ++i)
81 reps[i] = 0;
82
83 if (Mb_data_position(&fe->eb.mb) != 0 ||
84 Re_member_position(&fe->eb.renc) != Fh_size)
85 return false; /* can be called only once */
86
87 if (!Mb_data_finished(&fe->eb.mb)) /* encode first byte */ {
88 uchar prev_byte = 0;
89 uchar cur_byte = Mb_peek(&fe->eb.mb, 0);
90 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][0], 0);
91 LZeb_encode_literal(&fe->eb, prev_byte, cur_byte);
92 CRC32_update_byte(&fe->eb.crc, cur_byte);
93 FLZe_reset_key4(fe);
94 FLZe_update_and_move(fe, 1);
95 }
96
97 while (!Mb_data_finished(&fe->eb.mb) &&
98 Re_member_position(&fe->eb.renc) < member_size_limit) {
99 int match_distance;
100 int main_len = FLZe_longest_match_len(fe, &match_distance);
101 int pos_state = Mb_data_position(&fe->eb.mb) & pos_state_mask;
102 int len = 0;
103
104 for (i = 0; i < num_rep_distances; ++i) {
105 int tlen = Mb_true_match_len(&fe->eb.mb, 0, reps[i] + 1);
106 if (tlen > len) {
107 len = tlen;
108 rep = i;
109 }
110 }
111 if (len > min_match_len && len + 3 > main_len) {
112 CRC32_update_buf(&fe->eb.crc, Mb_ptr_to_current_pos(&fe->eb.mb), len);
113 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1);
114 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep[state], 1);
115 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep0[state], rep != 0);
116 if (rep == 0)
117 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_len[state][pos_state], 1);
118 else {
119 int distance;
120 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep1[state], rep > 1);
121 if (rep > 1)
122 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep2[state], rep > 2);
123 distance = reps[rep];
124 for (i = rep; i > 0; --i)
125 reps[i] = reps[i-1];
126 reps[0] = distance;
127 }
128 state = St_set_rep(state);
129 Re_encode_len(&fe->eb.renc, &fe->eb.rep_len_model, len, pos_state);
130 Mb_move_pos(&fe->eb.mb);
131 FLZe_update_and_move(fe, len - 1);
132 continue;
133 }
134
135 if (main_len > min_match_len) {
136 CRC32_update_buf(&fe->eb.crc, Mb_ptr_to_current_pos(&fe->eb.mb), main_len);
137 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1);
138 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep[state], 0);
139 state = St_set_match(state);
140 for (i = num_rep_distances - 1; i > 0; --i)
141 reps[i] = reps[i-1];
142 reps[0] = match_distance;
143 LZeb_encode_pair(&fe->eb, match_distance, main_len, pos_state);
144 Mb_move_pos(&fe->eb.mb);
145 FLZe_update_and_move(fe, main_len - 1);
146 continue;
147 }
148
149 {
150 uchar prev_byte = Mb_peek(&fe->eb.mb, 1);
151 uchar cur_byte = Mb_peek(&fe->eb.mb, 0);
152 uchar match_byte = Mb_peek(&fe->eb.mb, reps[0] + 1);
153 Mb_move_pos(&fe->eb.mb);
154 CRC32_update_byte(&fe->eb.crc, cur_byte);
155
156 if (match_byte == cur_byte) {
157 int short_rep_price = price1(fe->eb.bm_match[state][pos_state]) +
158 price1(fe->eb.bm_rep[state]) +
159 price0(fe->eb.bm_rep0[state]) +
160 price0(fe->eb.bm_len[state][pos_state]);
161 int price = price0(fe->eb.bm_match[state][pos_state]);
162 if (St_is_char(state))
163 price += LZeb_price_literal(&fe->eb, prev_byte, cur_byte);
164 else
165 price += LZeb_price_matched(&fe->eb, prev_byte, cur_byte, match_byte);
166 if (short_rep_price < price) {
167 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1);
168 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep[state], 1);
169 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep0[state], 0);
170 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_len[state][pos_state], 0);
171 state = St_set_short_rep(state);
172 continue;
173 }
174 }
175
176 /* literal byte */
177 Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 0);
178 if (St_is_char(state))
179 LZeb_encode_literal(&fe->eb, prev_byte, cur_byte);
180 else
181 LZeb_encode_matched(&fe->eb, prev_byte, cur_byte, match_byte);
182 state = St_set_char(state);
183 }
184 }
185
186 LZeb_full_flush(&fe->eb, state);
187 return true;
188 }
189