xref: /netbsd-src/external/zlib/pigz/dist/zopfli/blocksplitter.c (revision 3e407a68a64115001ff3f1b658def92a1368b7e6)
1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8     http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19 
20 #include "blocksplitter.h"
21 
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #include "deflate.h"
27 #include "lz77.h"
28 #include "squeeze.h"
29 #include "tree.h"
30 #include "util.h"
31 
32 /*
33 The "f" for the FindMinimum function below.
34 i: the current parameter of f(i)
35 context: for your implementation
36 */
37 typedef double FindMinimumFun(size_t i, void* context);
38 
39 /*
40 Finds minimum of function f(i) where is is of type size_t, f(i) is of type
41 double, i is in range start-end (excluding end).
42 */
FindMinimum(FindMinimumFun f,void * context,size_t start,size_t end)43 static size_t FindMinimum(FindMinimumFun f, void* context,
44                           size_t start, size_t end) {
45   if (end - start < 1024) {
46     double best = ZOPFLI_LARGE_FLOAT;
47     size_t result = start;
48     size_t i;
49     for (i = start; i < end; i++) {
50       double v = f(i, context);
51       if (v < best) {
52         best = v;
53         result = i;
54       }
55     }
56     return result;
57   } else {
58     /* Try to find minimum faster by recursively checking multiple points. */
59 #define NUM 9  /* Good value: 9. */
60     size_t i;
61     size_t p[NUM];
62     double vp[NUM];
63     size_t besti;
64     double best;
65     double lastbest = ZOPFLI_LARGE_FLOAT;
66     size_t pos = start;
67 
68     for (;;) {
69       if (end - start <= NUM) break;
70 
71       for (i = 0; i < NUM; i++) {
72         p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
73         vp[i] = f(p[i], context);
74       }
75       besti = 0;
76       best = vp[0];
77       for (i = 1; i < NUM; i++) {
78         if (vp[i] < best) {
79           best = vp[i];
80           besti = i;
81         }
82       }
83       if (best > lastbest) break;
84 
85       start = besti == 0 ? start : p[besti - 1];
86       end = besti == NUM - 1 ? end : p[besti + 1];
87 
88       pos = p[besti];
89       lastbest = best;
90     }
91     return pos;
92 #undef NUM
93   }
94 }
95 
96 /*
97 Returns estimated cost of a block in bits.  It includes the size to encode the
98 tree and the size to encode all literal, length and distance symbols and their
99 extra bits.
100 
101 litlens: lz77 lit/lengths
102 dists: ll77 distances
103 lstart: start of block
104 lend: end of block (not inclusive)
105 */
EstimateCost(const unsigned short * litlens,const unsigned short * dists,size_t lstart,size_t lend)106 static double EstimateCost(const unsigned short* litlens,
107                            const unsigned short* dists,
108                            size_t lstart, size_t lend) {
109   return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
110 }
111 
112 typedef struct SplitCostContext {
113   const unsigned short* litlens;
114   const unsigned short* dists;
115   size_t llsize;
116   size_t start;
117   size_t end;
118 } SplitCostContext;
119 
120 
121 /*
122 Gets the cost which is the sum of the cost of the left and the right section
123 of the data.
124 type: FindMinimumFun
125 */
SplitCost(size_t i,void * context)126 static double SplitCost(size_t i, void* context) {
127   SplitCostContext* c = (SplitCostContext*)context;
128   return EstimateCost(c->litlens, c->dists, c->start, i) +
129       EstimateCost(c->litlens, c->dists, i, c->end);
130 }
131 
AddSorted(size_t value,size_t ** out,size_t * outsize)132 static void AddSorted(size_t value, size_t** out, size_t* outsize) {
133   size_t i;
134   ZOPFLI_APPEND_DATA(value, out, outsize);
135   if (*outsize > 0) {
136     for (i = 0; i < *outsize - 1; i++) {
137       if ((*out)[i] > value) {
138         size_t j;
139         for (j = *outsize - 1; j > i; j--) {
140           (*out)[j] = (*out)[j - 1];
141         }
142         (*out)[i] = value;
143         break;
144       }
145     }
146   }
147 }
148 
149 /*
150 Prints the block split points as decimal and hex values in the terminal.
151 */
PrintBlockSplitPoints(const unsigned short * litlens,const unsigned short * dists,size_t llsize,const size_t * lz77splitpoints,size_t nlz77points)152 static void PrintBlockSplitPoints(const unsigned short* litlens,
153                                   const unsigned short* dists,
154                                   size_t llsize, const size_t* lz77splitpoints,
155                                   size_t nlz77points) {
156   size_t* splitpoints = 0;
157   size_t npoints = 0;
158   size_t i;
159   /* The input is given as lz77 indices, but we want to see the uncompressed
160   index values. */
161   size_t pos = 0;
162   if (nlz77points > 0) {
163     for (i = 0; i < llsize; i++) {
164       size_t length = dists[i] == 0 ? 1 : litlens[i];
165       if (lz77splitpoints[npoints] == i) {
166         ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
167         if (npoints == nlz77points) break;
168       }
169       pos += length;
170     }
171   }
172   assert(npoints == nlz77points);
173 
174   fprintf(stderr, "block split points: ");
175   for (i = 0; i < npoints; i++) {
176     fprintf(stderr, "%d ", (int)splitpoints[i]);
177   }
178   fprintf(stderr, "(hex:");
179   for (i = 0; i < npoints; i++) {
180     fprintf(stderr, " %x", (int)splitpoints[i]);
181   }
182   fprintf(stderr, ")\n");
183 
184   free(splitpoints);
185 }
186 
187 /*
188 Finds next block to try to split, the largest of the available ones.
189 The largest is chosen to make sure that if only a limited amount of blocks is
190 requested, their sizes are spread evenly.
191 llsize: the size of the LL77 data, which is the size of the done array here.
192 done: array indicating which blocks starting at that position are no longer
193     splittable (splitting them increases rather than decreases cost).
194 splitpoints: the splitpoints found so far.
195 npoints: the amount of splitpoints found so far.
196 lstart: output variable, giving start of block.
197 lend: output variable, giving end of block.
198 returns 1 if a block was found, 0 if no block found (all are done).
199 */
FindLargestSplittableBlock(size_t llsize,const unsigned char * done,const size_t * splitpoints,size_t npoints,size_t * lstart,size_t * lend)200 static int FindLargestSplittableBlock(
201     size_t llsize, const unsigned char* done,
202     const size_t* splitpoints, size_t npoints,
203     size_t* lstart, size_t* lend) {
204   size_t longest = 0;
205   int found = 0;
206   size_t i;
207   for (i = 0; i <= npoints; i++) {
208     size_t start = i == 0 ? 0 : splitpoints[i - 1];
209     size_t end = i == npoints ? llsize - 1 : splitpoints[i];
210     if (!done[start] && end - start > longest) {
211       *lstart = start;
212       *lend = end;
213       found = 1;
214       longest = end - start;
215     }
216   }
217   return found;
218 }
219 
ZopfliBlockSplitLZ77(const ZopfliOptions * options,const unsigned short * litlens,const unsigned short * dists,size_t llsize,size_t maxblocks,size_t ** splitpoints,size_t * npoints)220 void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
221                           const unsigned short* litlens,
222                           const unsigned short* dists,
223                           size_t llsize, size_t maxblocks,
224                           size_t** splitpoints, size_t* npoints) {
225   size_t lstart, lend;
226   size_t i;
227   size_t llpos = 0;
228   size_t numblocks = 1;
229   unsigned char* done;
230   double splitcost, origcost;
231 
232   if (llsize < 10) return;  /* This code fails on tiny files. */
233 
234   done = (unsigned char*)malloc(llsize);
235   if (!done) exit(-1); /* Allocation failed. */
236   for (i = 0; i < llsize; i++) done[i] = 0;
237 
238   lstart = 0;
239   lend = llsize;
240   for (;;) {
241     SplitCostContext c;
242 
243     if (maxblocks > 0 && numblocks >= maxblocks) {
244       break;
245     }
246 
247     c.litlens = litlens;
248     c.dists = dists;
249     c.llsize = llsize;
250     c.start = lstart;
251     c.end = lend;
252     assert(lstart < lend);
253     llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
254 
255     assert(llpos > lstart);
256     assert(llpos < lend);
257 
258     splitcost = EstimateCost(litlens, dists, lstart, llpos) +
259         EstimateCost(litlens, dists, llpos, lend);
260     origcost = EstimateCost(litlens, dists, lstart, lend);
261 
262     if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
263       done[lstart] = 1;
264     } else {
265       AddSorted(llpos, splitpoints, npoints);
266       numblocks++;
267     }
268 
269     if (!FindLargestSplittableBlock(
270         llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
271       break;  /* No further split will probably reduce compression. */
272     }
273 
274     if (lend - lstart < 10) {
275       break;
276     }
277   }
278 
279   if (options->verbose) {
280     PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
281   }
282 
283   free(done);
284 }
285 
ZopfliBlockSplit(const ZopfliOptions * options,const unsigned char * in,size_t instart,size_t inend,size_t maxblocks,size_t ** splitpoints,size_t * npoints)286 void ZopfliBlockSplit(const ZopfliOptions* options,
287                       const unsigned char* in, size_t instart, size_t inend,
288                       size_t maxblocks, size_t** splitpoints, size_t* npoints) {
289   size_t pos = 0;
290   size_t i;
291   ZopfliBlockState s;
292   size_t* lz77splitpoints = 0;
293   size_t nlz77points = 0;
294   ZopfliLZ77Store store;
295 
296   ZopfliInitLZ77Store(&store);
297 
298   s.options = options;
299   s.blockstart = instart;
300   s.blockend = inend;
301 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
302   s.lmc = 0;
303 #endif
304 
305   *npoints = 0;
306   *splitpoints = 0;
307 
308   /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
309   results in better blocks. */
310   ZopfliLZ77Greedy(&s, in, instart, inend, &store);
311 
312   ZopfliBlockSplitLZ77(options,
313                        store.litlens, store.dists, store.size, maxblocks,
314                        &lz77splitpoints, &nlz77points);
315 
316   /* Convert LZ77 positions to positions in the uncompressed input. */
317   pos = instart;
318   if (nlz77points > 0) {
319     for (i = 0; i < store.size; i++) {
320       size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
321       if (lz77splitpoints[*npoints] == i) {
322         ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
323         if (*npoints == nlz77points) break;
324       }
325       pos += length;
326     }
327   }
328   assert(*npoints == nlz77points);
329 
330   free(lz77splitpoints);
331   ZopfliCleanLZ77Store(&store);
332 }
333 
ZopfliBlockSplitSimple(const unsigned char * in,size_t instart,size_t inend,size_t blocksize,size_t ** splitpoints,size_t * npoints)334 void ZopfliBlockSplitSimple(const unsigned char* in,
335                             size_t instart, size_t inend,
336                             size_t blocksize,
337                             size_t** splitpoints, size_t* npoints) {
338   size_t i = instart;
339   while (i < inend) {
340     ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
341     i += blocksize;
342   }
343   (void)in;
344 }
345