xref: /plan9-contrib/sys/src/cmd/lzip/encoder_base.c (revision 13d37d7716a3e781f408392d7869dff5927c6669)
1 /*  Clzip - LZMA lossless data compressor
2     Copyright (C) 2010-2017 Antonio Diaz Diaz.
3 
4     This program is free software: you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation, either version 2 of the License, or
7     (at your option) any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16 
17 #include "lzip.h"
18 #include "encoder_base.h"
19 
20 Dis_slots dis_slots;
21 Prob_prices prob_prices;
22 
23 bool
Mb_read_block(Matchfinder_base * mb)24 Mb_read_block(Matchfinder_base *mb)
25 {
26 	if (!mb->at_stream_end && mb->stream_pos < mb->buffer_size) {
27 		int size = mb->buffer_size - mb->stream_pos;
28 		int rd = readblock(mb->infd, mb->buffer + mb->stream_pos, size);
29 
30 		mb->stream_pos += rd;
31 		if (rd != size && errno) {
32 			show_error( "Read error", errno, false );
33 			cleanup_and_fail(1);
34 		}
35 		if (rd < size) {
36 			mb->at_stream_end = true;
37 			mb->pos_limit = mb->buffer_size;
38 		}
39 	}
40 	return mb->pos < mb->stream_pos;
41 }
42 
43 void
Mb_normalize_pos(Matchfinder_base * mb)44 Mb_normalize_pos(Matchfinder_base *mb)
45 {
46 	if (mb->pos > mb->stream_pos)
47 		internal_error( "pos > stream_pos in Mb_normalize_pos." );
48 	if (!mb->at_stream_end) {
49 		int i, offset = mb->pos - mb->before_size - mb->dict_size;
50 		int size = mb->stream_pos - offset;
51 
52 		memmove(mb->buffer, mb->buffer + offset, size);
53 		mb->partial_data_pos += offset;
54 		mb->pos -= offset;	/* pos = before_size + dict_size */
55 		mb->stream_pos -= offset;
56 		for (i = 0; i < mb->num_prev_positions; ++i)
57 			if (mb->prev_positions[i] < offset)
58 				mb->prev_positions[i] = 0;
59 			else
60 				mb->prev_positions[i] -= offset;
61 		for (i = 0; i < mb->pos_array_size; ++i)
62 			if (mb->pos_array[i] < offset)
63 				mb->pos_array[i] = 0;
64 			else
65 				mb->pos_array[i] -= offset;
66 		Mb_read_block(mb);
67 	}
68 }
69 
70 bool
Mb_init(Matchfinder_base * mb,int before,int dict_size,int after_size,int dict_factor,int num_prev_positions23,int pos_array_factor,int ifd)71 Mb_init(Matchfinder_base *mb, int before, int dict_size, int after_size, int dict_factor, int num_prev_positions23, int pos_array_factor, int ifd)
72 {
73 	int buffer_size_limit = (dict_factor * dict_size) + before + after_size;
74 	unsigned size;
75 	int i;
76 
77 	mb->partial_data_pos = 0;
78 	mb->before_size = before;
79 	mb->pos = 0;
80 	mb->cyclic_pos = 0;
81 	mb->stream_pos = 0;
82 	mb->infd = ifd;
83 	mb->at_stream_end = false;
84 
85 	mb->buffer_size = max(65536, dict_size);
86 	mb->buffer = (uchar *)malloc(mb->buffer_size);
87 	if (!mb->buffer)
88 		return false;
89 	if (Mb_read_block(mb) && !mb->at_stream_end &&
90 	    mb->buffer_size < buffer_size_limit) {
91 		uchar * tmp;
92 		mb->buffer_size = buffer_size_limit;
93 		tmp = (uchar *)realloc(mb->buffer, mb->buffer_size);
94 		if (!tmp) {
95 			free(mb->buffer);
96 			return false;
97 		}
98 		mb->buffer = tmp;
99 		Mb_read_block(mb);
100 	}
101 	if (mb->at_stream_end && mb->stream_pos < dict_size)
102 		mb->dict_size = max(min_dict_size, mb->stream_pos);
103 	else
104 		mb->dict_size = dict_size;
105 	mb->pos_limit = mb->buffer_size;
106 	if (!mb->at_stream_end)
107 		mb->pos_limit -= after_size;
108 	size = real_bits(mb->dict_size - 1) - 2;
109 	if (size < 16)
110 		size = 16;
111 	size = 1 << size;
112 //	if (mb->dict_size > (1 << 26))		/* 64 MiB */
113 //		size >>= 1;
114 	mb->key4_mask = size - 1;
115 	size += num_prev_positions23;
116 
117 	mb->num_prev_positions = size;
118 	mb->pos_array_size = pos_array_factor * (mb->dict_size + 1);
119 	size += mb->pos_array_size;
120 	if (size * sizeof mb->prev_positions[0] <= size)
121 		mb->prev_positions = 0;
122 	else
123 		mb->prev_positions =
124 		    (int32_t *)malloc(size * sizeof mb->prev_positions[0]);
125 	if (!mb->prev_positions) {
126 		free(mb->buffer);
127 		return false;
128 	}
129 	mb->pos_array = mb->prev_positions + mb->num_prev_positions;
130 	for (i = 0; i < mb->num_prev_positions; ++i)
131 		mb->prev_positions[i] = 0;
132 	return true;
133 }
134 
135 void
Mb_reset(Matchfinder_base * mb)136 Mb_reset(Matchfinder_base *mb)
137 {
138 	int	i;
139 
140 	if (mb->stream_pos > mb->pos)
141 		memmove(mb->buffer, mb->buffer + mb->pos, mb->stream_pos - mb->pos);
142 	mb->partial_data_pos = 0;
143 	mb->stream_pos -= mb->pos;
144 	mb->pos = 0;
145 	mb->cyclic_pos = 0;
146 	for (i = 0; i < mb->num_prev_positions; ++i)
147 		mb->prev_positions[i] = 0;
148 	Mb_read_block(mb);
149 }
150 
151 void
Re_flush_data(Range_encoder * renc)152 Re_flush_data(Range_encoder *renc)
153 {
154 	if (renc->pos > 0) {
155 		if (renc->outfd >= 0 &&
156 		    writeblock(renc->outfd, renc->buffer, renc->pos) != renc->pos) {
157 			show_error( "Write error", errno, false );
158 			cleanup_and_fail(1);
159 		}
160 		renc->partial_member_pos += renc->pos;
161 		renc->pos = 0;
162 		show_progress(0, 0, 0, 0);
163 	}
164 }
165 
166 /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
167 void
LZeb_full_flush(LZ_encoder_base * eb,State state)168 LZeb_full_flush(LZ_encoder_base *eb, State state)
169 {
170 	int	i;
171 	int pos_state = Mb_data_position(&eb->mb) & pos_state_mask;
172 	File_trailer trailer;
173 	Re_encode_bit(&eb->renc, &eb->bm_match[state][pos_state], 1);
174 	Re_encode_bit(&eb->renc, &eb->bm_rep[state], 0);
175 	LZeb_encode_pair(eb, 0xFFFFFFFFU, min_match_len, pos_state);
176 	Re_flush(&eb->renc);
177 	Ft_set_data_crc(trailer, LZeb_crc(eb));
178 	Ft_set_data_size(trailer, Mb_data_position(&eb->mb));
179 	Ft_set_member_size(trailer, Re_member_position(&eb->renc) + Ft_size);
180 	for (i = 0; i < Ft_size; ++i)
181 		Re_put_byte(&eb->renc, trailer[i]);
182 	Re_flush_data(&eb->renc);
183 }
184 
185 void
LZeb_reset(LZ_encoder_base * eb)186 LZeb_reset(LZ_encoder_base *eb)
187 {
188 	Mb_reset(&eb->mb);
189 	eb->crc = 0xFFFFFFFFU;
190 	Bm_array_init(eb->bm_literal[0], (1 << literal_context_bits) * 0x300);
191 	Bm_array_init(eb->bm_match[0], states * pos_states);
192 	Bm_array_init(eb->bm_rep, states);
193 	Bm_array_init(eb->bm_rep0, states);
194 	Bm_array_init(eb->bm_rep1, states);
195 	Bm_array_init(eb->bm_rep2, states);
196 	Bm_array_init(eb->bm_len[0], states * pos_states);
197 	Bm_array_init(eb->bm_dis_slot[0], len_states * (1 << dis_slot_bits));
198 	Bm_array_init(eb->bm_dis, modeled_distances - end_dis_model + 1);
199 	Bm_array_init(eb->bm_align, dis_align_size);
200 	Lm_init(&eb->match_len_model);
201 	Lm_init(&eb->rep_len_model);
202 	Re_reset(&eb->renc);
203 }
204