1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19
20 #include "blocksplitter.h"
21
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25
26 #include "deflate.h"
27 #include "lz77.h"
28 #include "squeeze.h"
29 #include "tree.h"
30 #include "util.h"
31
32 /*
33 The "f" for the FindMinimum function below.
34 i: the current parameter of f(i)
35 context: for your implementation
36 */
37 typedef double FindMinimumFun(size_t i, void* context);
38
39 /*
40 Finds minimum of function f(i) where is is of type size_t, f(i) is of type
41 double, i is in range start-end (excluding end).
42 */
FindMinimum(FindMinimumFun f,void * context,size_t start,size_t end)43 static size_t FindMinimum(FindMinimumFun f, void* context,
44 size_t start, size_t end) {
45 if (end - start < 1024) {
46 double best = ZOPFLI_LARGE_FLOAT;
47 size_t result = start;
48 size_t i;
49 for (i = start; i < end; i++) {
50 double v = f(i, context);
51 if (v < best) {
52 best = v;
53 result = i;
54 }
55 }
56 return result;
57 } else {
58 /* Try to find minimum faster by recursively checking multiple points. */
59 #define NUM 9 /* Good value: 9. */
60 size_t i;
61 size_t p[NUM];
62 double vp[NUM];
63 size_t besti;
64 double best;
65 double lastbest = ZOPFLI_LARGE_FLOAT;
66 size_t pos = start;
67
68 for (;;) {
69 if (end - start <= NUM) break;
70
71 for (i = 0; i < NUM; i++) {
72 p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
73 vp[i] = f(p[i], context);
74 }
75 besti = 0;
76 best = vp[0];
77 for (i = 1; i < NUM; i++) {
78 if (vp[i] < best) {
79 best = vp[i];
80 besti = i;
81 }
82 }
83 if (best > lastbest) break;
84
85 start = besti == 0 ? start : p[besti - 1];
86 end = besti == NUM - 1 ? end : p[besti + 1];
87
88 pos = p[besti];
89 lastbest = best;
90 }
91 return pos;
92 #undef NUM
93 }
94 }
95
96 /*
97 Returns estimated cost of a block in bits. It includes the size to encode the
98 tree and the size to encode all literal, length and distance symbols and their
99 extra bits.
100
101 litlens: lz77 lit/lengths
102 dists: ll77 distances
103 lstart: start of block
104 lend: end of block (not inclusive)
105 */
EstimateCost(const unsigned short * litlens,const unsigned short * dists,size_t lstart,size_t lend)106 static double EstimateCost(const unsigned short* litlens,
107 const unsigned short* dists,
108 size_t lstart, size_t lend) {
109 return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
110 }
111
112 typedef struct SplitCostContext {
113 const unsigned short* litlens;
114 const unsigned short* dists;
115 size_t llsize;
116 size_t start;
117 size_t end;
118 } SplitCostContext;
119
120
121 /*
122 Gets the cost which is the sum of the cost of the left and the right section
123 of the data.
124 type: FindMinimumFun
125 */
SplitCost(size_t i,void * context)126 static double SplitCost(size_t i, void* context) {
127 SplitCostContext* c = (SplitCostContext*)context;
128 return EstimateCost(c->litlens, c->dists, c->start, i) +
129 EstimateCost(c->litlens, c->dists, i, c->end);
130 }
131
AddSorted(size_t value,size_t ** out,size_t * outsize)132 static void AddSorted(size_t value, size_t** out, size_t* outsize) {
133 size_t i;
134 ZOPFLI_APPEND_DATA(value, out, outsize);
135 if (*outsize > 0) {
136 for (i = 0; i < *outsize - 1; i++) {
137 if ((*out)[i] > value) {
138 size_t j;
139 for (j = *outsize - 1; j > i; j--) {
140 (*out)[j] = (*out)[j - 1];
141 }
142 (*out)[i] = value;
143 break;
144 }
145 }
146 }
147 }
148
149 /*
150 Prints the block split points as decimal and hex values in the terminal.
151 */
PrintBlockSplitPoints(const unsigned short * litlens,const unsigned short * dists,size_t llsize,const size_t * lz77splitpoints,size_t nlz77points)152 static void PrintBlockSplitPoints(const unsigned short* litlens,
153 const unsigned short* dists,
154 size_t llsize, const size_t* lz77splitpoints,
155 size_t nlz77points) {
156 size_t* splitpoints = 0;
157 size_t npoints = 0;
158 size_t i;
159 /* The input is given as lz77 indices, but we want to see the uncompressed
160 index values. */
161 size_t pos = 0;
162 if (nlz77points > 0) {
163 for (i = 0; i < llsize; i++) {
164 size_t length = dists[i] == 0 ? 1 : litlens[i];
165 if (lz77splitpoints[npoints] == i) {
166 ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
167 if (npoints == nlz77points) break;
168 }
169 pos += length;
170 }
171 }
172 assert(npoints == nlz77points);
173
174 fprintf(stderr, "block split points: ");
175 for (i = 0; i < npoints; i++) {
176 fprintf(stderr, "%d ", (int)splitpoints[i]);
177 }
178 fprintf(stderr, "(hex:");
179 for (i = 0; i < npoints; i++) {
180 fprintf(stderr, " %x", (int)splitpoints[i]);
181 }
182 fprintf(stderr, ")\n");
183
184 free(splitpoints);
185 }
186
187 /*
188 Finds next block to try to split, the largest of the available ones.
189 The largest is chosen to make sure that if only a limited amount of blocks is
190 requested, their sizes are spread evenly.
191 llsize: the size of the LL77 data, which is the size of the done array here.
192 done: array indicating which blocks starting at that position are no longer
193 splittable (splitting them increases rather than decreases cost).
194 splitpoints: the splitpoints found so far.
195 npoints: the amount of splitpoints found so far.
196 lstart: output variable, giving start of block.
197 lend: output variable, giving end of block.
198 returns 1 if a block was found, 0 if no block found (all are done).
199 */
FindLargestSplittableBlock(size_t llsize,const unsigned char * done,const size_t * splitpoints,size_t npoints,size_t * lstart,size_t * lend)200 static int FindLargestSplittableBlock(
201 size_t llsize, const unsigned char* done,
202 const size_t* splitpoints, size_t npoints,
203 size_t* lstart, size_t* lend) {
204 size_t longest = 0;
205 int found = 0;
206 size_t i;
207 for (i = 0; i <= npoints; i++) {
208 size_t start = i == 0 ? 0 : splitpoints[i - 1];
209 size_t end = i == npoints ? llsize - 1 : splitpoints[i];
210 if (!done[start] && end - start > longest) {
211 *lstart = start;
212 *lend = end;
213 found = 1;
214 longest = end - start;
215 }
216 }
217 return found;
218 }
219
ZopfliBlockSplitLZ77(const ZopfliOptions * options,const unsigned short * litlens,const unsigned short * dists,size_t llsize,size_t maxblocks,size_t ** splitpoints,size_t * npoints)220 void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
221 const unsigned short* litlens,
222 const unsigned short* dists,
223 size_t llsize, size_t maxblocks,
224 size_t** splitpoints, size_t* npoints) {
225 size_t lstart, lend;
226 size_t i;
227 size_t llpos = 0;
228 size_t numblocks = 1;
229 unsigned char* done;
230 double splitcost, origcost;
231
232 if (llsize < 10) return; /* This code fails on tiny files. */
233
234 done = (unsigned char*)malloc(llsize);
235 if (!done) exit(-1); /* Allocation failed. */
236 for (i = 0; i < llsize; i++) done[i] = 0;
237
238 lstart = 0;
239 lend = llsize;
240 for (;;) {
241 SplitCostContext c;
242
243 if (maxblocks > 0 && numblocks >= maxblocks) {
244 break;
245 }
246
247 c.litlens = litlens;
248 c.dists = dists;
249 c.llsize = llsize;
250 c.start = lstart;
251 c.end = lend;
252 assert(lstart < lend);
253 llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
254
255 assert(llpos > lstart);
256 assert(llpos < lend);
257
258 splitcost = EstimateCost(litlens, dists, lstart, llpos) +
259 EstimateCost(litlens, dists, llpos, lend);
260 origcost = EstimateCost(litlens, dists, lstart, lend);
261
262 if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
263 done[lstart] = 1;
264 } else {
265 AddSorted(llpos, splitpoints, npoints);
266 numblocks++;
267 }
268
269 if (!FindLargestSplittableBlock(
270 llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
271 break; /* No further split will probably reduce compression. */
272 }
273
274 if (lend - lstart < 10) {
275 break;
276 }
277 }
278
279 if (options->verbose) {
280 PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
281 }
282
283 free(done);
284 }
285
ZopfliBlockSplit(const ZopfliOptions * options,const unsigned char * in,size_t instart,size_t inend,size_t maxblocks,size_t ** splitpoints,size_t * npoints)286 void ZopfliBlockSplit(const ZopfliOptions* options,
287 const unsigned char* in, size_t instart, size_t inend,
288 size_t maxblocks, size_t** splitpoints, size_t* npoints) {
289 size_t pos = 0;
290 size_t i;
291 ZopfliBlockState s;
292 size_t* lz77splitpoints = 0;
293 size_t nlz77points = 0;
294 ZopfliLZ77Store store;
295
296 ZopfliInitLZ77Store(&store);
297
298 s.options = options;
299 s.blockstart = instart;
300 s.blockend = inend;
301 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
302 s.lmc = 0;
303 #endif
304
305 *npoints = 0;
306 *splitpoints = 0;
307
308 /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
309 results in better blocks. */
310 ZopfliLZ77Greedy(&s, in, instart, inend, &store);
311
312 ZopfliBlockSplitLZ77(options,
313 store.litlens, store.dists, store.size, maxblocks,
314 &lz77splitpoints, &nlz77points);
315
316 /* Convert LZ77 positions to positions in the uncompressed input. */
317 pos = instart;
318 if (nlz77points > 0) {
319 for (i = 0; i < store.size; i++) {
320 size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
321 if (lz77splitpoints[*npoints] == i) {
322 ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
323 if (*npoints == nlz77points) break;
324 }
325 pos += length;
326 }
327 }
328 assert(*npoints == nlz77points);
329
330 free(lz77splitpoints);
331 ZopfliCleanLZ77Store(&store);
332 }
333
ZopfliBlockSplitSimple(const unsigned char * in,size_t instart,size_t inend,size_t blocksize,size_t ** splitpoints,size_t * npoints)334 void ZopfliBlockSplitSimple(const unsigned char* in,
335 size_t instart, size_t inend,
336 size_t blocksize,
337 size_t** splitpoints, size_t* npoints) {
338 size_t i = instart;
339 while (i < inend) {
340 ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
341 i += blocksize;
342 }
343 (void)in;
344 }
345