1*59cc4ca5SDavid du Colombier
2*59cc4ca5SDavid du Colombier /*-------------------------------------------------------------*/
3*59cc4ca5SDavid du Colombier /*--- Block sorting machinery ---*/
4*59cc4ca5SDavid du Colombier /*--- blocksort.c ---*/
5*59cc4ca5SDavid du Colombier /*-------------------------------------------------------------*/
6*59cc4ca5SDavid du Colombier
7*59cc4ca5SDavid du Colombier /*--
8*59cc4ca5SDavid du Colombier This file is a part of bzip2 and/or libbzip2, a program and
9*59cc4ca5SDavid du Colombier library for lossless, block-sorting data compression.
10*59cc4ca5SDavid du Colombier
11*59cc4ca5SDavid du Colombier Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
12*59cc4ca5SDavid du Colombier
13*59cc4ca5SDavid du Colombier Redistribution and use in source and binary forms, with or without
14*59cc4ca5SDavid du Colombier modification, are permitted provided that the following conditions
15*59cc4ca5SDavid du Colombier are met:
16*59cc4ca5SDavid du Colombier
17*59cc4ca5SDavid du Colombier 1. Redistributions of source code must retain the above copyright
18*59cc4ca5SDavid du Colombier notice, this list of conditions and the following disclaimer.
19*59cc4ca5SDavid du Colombier
20*59cc4ca5SDavid du Colombier 2. The origin of this software must not be misrepresented; you must
21*59cc4ca5SDavid du Colombier not claim that you wrote the original software. If you use this
22*59cc4ca5SDavid du Colombier software in a product, an acknowledgment in the product
23*59cc4ca5SDavid du Colombier documentation would be appreciated but is not required.
24*59cc4ca5SDavid du Colombier
25*59cc4ca5SDavid du Colombier 3. Altered source versions must be plainly marked as such, and must
26*59cc4ca5SDavid du Colombier not be misrepresented as being the original software.
27*59cc4ca5SDavid du Colombier
28*59cc4ca5SDavid du Colombier 4. The name of the author may not be used to endorse or promote
29*59cc4ca5SDavid du Colombier products derived from this software without specific prior written
30*59cc4ca5SDavid du Colombier permission.
31*59cc4ca5SDavid du Colombier
32*59cc4ca5SDavid du Colombier THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
33*59cc4ca5SDavid du Colombier OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34*59cc4ca5SDavid du Colombier WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35*59cc4ca5SDavid du Colombier ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36*59cc4ca5SDavid du Colombier DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37*59cc4ca5SDavid du Colombier DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
38*59cc4ca5SDavid du Colombier GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39*59cc4ca5SDavid du Colombier INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40*59cc4ca5SDavid du Colombier WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41*59cc4ca5SDavid du Colombier NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42*59cc4ca5SDavid du Colombier SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43*59cc4ca5SDavid du Colombier
44*59cc4ca5SDavid du Colombier Julian Seward, Cambridge, UK.
45*59cc4ca5SDavid du Colombier jseward@acm.org
46*59cc4ca5SDavid du Colombier bzip2/libbzip2 version 1.0 of 21 March 2000
47*59cc4ca5SDavid du Colombier
48*59cc4ca5SDavid du Colombier This program is based on (at least) the work of:
49*59cc4ca5SDavid du Colombier Mike Burrows
50*59cc4ca5SDavid du Colombier David Wheeler
51*59cc4ca5SDavid du Colombier Peter Fenwick
52*59cc4ca5SDavid du Colombier Alistair Moffat
53*59cc4ca5SDavid du Colombier Radford Neal
54*59cc4ca5SDavid du Colombier Ian H. Witten
55*59cc4ca5SDavid du Colombier Robert Sedgewick
56*59cc4ca5SDavid du Colombier Jon L. Bentley
57*59cc4ca5SDavid du Colombier
58*59cc4ca5SDavid du Colombier For more information on these sources, see the manual.
59*59cc4ca5SDavid du Colombier
60*59cc4ca5SDavid du Colombier To get some idea how the block sorting algorithms in this file
61*59cc4ca5SDavid du Colombier work, read my paper
62*59cc4ca5SDavid du Colombier On the Performance of BWT Sorting Algorithms
63*59cc4ca5SDavid du Colombier in Proceedings of the IEEE Data Compression Conference 2000,
64*59cc4ca5SDavid du Colombier Snowbird, Utah, USA, 27-30 March 2000. The main sort in this
65*59cc4ca5SDavid du Colombier file implements the algorithm called cache in the paper.
66*59cc4ca5SDavid du Colombier --*/
67*59cc4ca5SDavid du Colombier
68*59cc4ca5SDavid du Colombier #include "os.h"
69*59cc4ca5SDavid du Colombier #include "bzlib.h"
70*59cc4ca5SDavid du Colombier #include "bzlib_private.h"
71*59cc4ca5SDavid du Colombier
72*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
73*59cc4ca5SDavid du Colombier /*--- Fallback O(N log(N)^2) sorting ---*/
74*59cc4ca5SDavid du Colombier /*--- algorithm, for repetitive blocks ---*/
75*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
76*59cc4ca5SDavid du Colombier
77*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
78*59cc4ca5SDavid du Colombier static
79*59cc4ca5SDavid du Colombier __inline__
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)80*59cc4ca5SDavid du Colombier void fallbackSimpleSort ( UInt32* fmap,
81*59cc4ca5SDavid du Colombier UInt32* eclass,
82*59cc4ca5SDavid du Colombier Int32 lo,
83*59cc4ca5SDavid du Colombier Int32 hi )
84*59cc4ca5SDavid du Colombier {
85*59cc4ca5SDavid du Colombier Int32 i, j, tmp;
86*59cc4ca5SDavid du Colombier UInt32 ec_tmp;
87*59cc4ca5SDavid du Colombier
88*59cc4ca5SDavid du Colombier if (lo == hi) return;
89*59cc4ca5SDavid du Colombier
90*59cc4ca5SDavid du Colombier if (hi - lo > 3) {
91*59cc4ca5SDavid du Colombier for ( i = hi-4; i >= lo; i-- ) {
92*59cc4ca5SDavid du Colombier tmp = fmap[i];
93*59cc4ca5SDavid du Colombier ec_tmp = eclass[tmp];
94*59cc4ca5SDavid du Colombier for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
95*59cc4ca5SDavid du Colombier fmap[j-4] = fmap[j];
96*59cc4ca5SDavid du Colombier fmap[j-4] = tmp;
97*59cc4ca5SDavid du Colombier }
98*59cc4ca5SDavid du Colombier }
99*59cc4ca5SDavid du Colombier
100*59cc4ca5SDavid du Colombier for ( i = hi-1; i >= lo; i-- ) {
101*59cc4ca5SDavid du Colombier tmp = fmap[i];
102*59cc4ca5SDavid du Colombier ec_tmp = eclass[tmp];
103*59cc4ca5SDavid du Colombier for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
104*59cc4ca5SDavid du Colombier fmap[j-1] = fmap[j];
105*59cc4ca5SDavid du Colombier fmap[j-1] = tmp;
106*59cc4ca5SDavid du Colombier }
107*59cc4ca5SDavid du Colombier }
108*59cc4ca5SDavid du Colombier
109*59cc4ca5SDavid du Colombier
110*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
111*59cc4ca5SDavid du Colombier #define fswap(zz1, zz2) \
112*59cc4ca5SDavid du Colombier { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
113*59cc4ca5SDavid du Colombier
114*59cc4ca5SDavid du Colombier #define fvswap(zzp1, zzp2, zzn) \
115*59cc4ca5SDavid du Colombier { \
116*59cc4ca5SDavid du Colombier Int32 yyp1 = (zzp1); \
117*59cc4ca5SDavid du Colombier Int32 yyp2 = (zzp2); \
118*59cc4ca5SDavid du Colombier Int32 yyn = (zzn); \
119*59cc4ca5SDavid du Colombier while (yyn > 0) { \
120*59cc4ca5SDavid du Colombier fswap(fmap[yyp1], fmap[yyp2]); \
121*59cc4ca5SDavid du Colombier yyp1++; yyp2++; yyn--; \
122*59cc4ca5SDavid du Colombier } \
123*59cc4ca5SDavid du Colombier }
124*59cc4ca5SDavid du Colombier
125*59cc4ca5SDavid du Colombier
126*59cc4ca5SDavid du Colombier #define fmin(a,b) ((a) < (b)) ? (a) : (b)
127*59cc4ca5SDavid du Colombier
128*59cc4ca5SDavid du Colombier #define fpush(lz,hz) { stackLo[sp] = lz; \
129*59cc4ca5SDavid du Colombier stackHi[sp] = hz; \
130*59cc4ca5SDavid du Colombier sp++; }
131*59cc4ca5SDavid du Colombier
132*59cc4ca5SDavid du Colombier #define fpop(lz,hz) { sp--; \
133*59cc4ca5SDavid du Colombier lz = stackLo[sp]; \
134*59cc4ca5SDavid du Colombier hz = stackHi[sp]; }
135*59cc4ca5SDavid du Colombier
136*59cc4ca5SDavid du Colombier #define FALLBACK_QSORT_SMALL_THRESH 10
137*59cc4ca5SDavid du Colombier #define FALLBACK_QSORT_STACK_SIZE 100
138*59cc4ca5SDavid du Colombier
139*59cc4ca5SDavid du Colombier
140*59cc4ca5SDavid du Colombier static
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)141*59cc4ca5SDavid du Colombier void fallbackQSort3 ( UInt32* fmap,
142*59cc4ca5SDavid du Colombier UInt32* eclass,
143*59cc4ca5SDavid du Colombier Int32 loSt,
144*59cc4ca5SDavid du Colombier Int32 hiSt )
145*59cc4ca5SDavid du Colombier {
146*59cc4ca5SDavid du Colombier Int32 unLo, unHi, ltLo, gtHi, n, m;
147*59cc4ca5SDavid du Colombier Int32 sp, lo, hi;
148*59cc4ca5SDavid du Colombier UInt32 med, r, r3;
149*59cc4ca5SDavid du Colombier Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
150*59cc4ca5SDavid du Colombier Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
151*59cc4ca5SDavid du Colombier
152*59cc4ca5SDavid du Colombier r = 0;
153*59cc4ca5SDavid du Colombier
154*59cc4ca5SDavid du Colombier sp = 0;
155*59cc4ca5SDavid du Colombier fpush ( loSt, hiSt );
156*59cc4ca5SDavid du Colombier
157*59cc4ca5SDavid du Colombier while (sp > 0) {
158*59cc4ca5SDavid du Colombier
159*59cc4ca5SDavid du Colombier AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
160*59cc4ca5SDavid du Colombier
161*59cc4ca5SDavid du Colombier fpop ( lo, hi );
162*59cc4ca5SDavid du Colombier if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
163*59cc4ca5SDavid du Colombier fallbackSimpleSort ( fmap, eclass, lo, hi );
164*59cc4ca5SDavid du Colombier continue;
165*59cc4ca5SDavid du Colombier }
166*59cc4ca5SDavid du Colombier
167*59cc4ca5SDavid du Colombier /* Random partitioning. Median of 3 sometimes fails to
168*59cc4ca5SDavid du Colombier avoid bad cases. Median of 9 seems to help but
169*59cc4ca5SDavid du Colombier looks rather expensive. This too seems to work but
170*59cc4ca5SDavid du Colombier is cheaper. Guidance for the magic constants
171*59cc4ca5SDavid du Colombier 7621 and 32768 is taken from Sedgewick's algorithms
172*59cc4ca5SDavid du Colombier book, chapter 35.
173*59cc4ca5SDavid du Colombier */
174*59cc4ca5SDavid du Colombier r = ((r * 7621) + 1) % 32768;
175*59cc4ca5SDavid du Colombier r3 = r % 3;
176*59cc4ca5SDavid du Colombier if (r3 == 0) med = eclass[fmap[lo]]; else
177*59cc4ca5SDavid du Colombier if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
178*59cc4ca5SDavid du Colombier med = eclass[fmap[hi]];
179*59cc4ca5SDavid du Colombier
180*59cc4ca5SDavid du Colombier unLo = ltLo = lo;
181*59cc4ca5SDavid du Colombier unHi = gtHi = hi;
182*59cc4ca5SDavid du Colombier
183*59cc4ca5SDavid du Colombier while (1) {
184*59cc4ca5SDavid du Colombier while (1) {
185*59cc4ca5SDavid du Colombier if (unLo > unHi) break;
186*59cc4ca5SDavid du Colombier n = (Int32)eclass[fmap[unLo]] - (Int32)med;
187*59cc4ca5SDavid du Colombier if (n == 0) {
188*59cc4ca5SDavid du Colombier fswap(fmap[unLo], fmap[ltLo]);
189*59cc4ca5SDavid du Colombier ltLo++; unLo++;
190*59cc4ca5SDavid du Colombier continue;
191*59cc4ca5SDavid du Colombier };
192*59cc4ca5SDavid du Colombier if (n > 0) break;
193*59cc4ca5SDavid du Colombier unLo++;
194*59cc4ca5SDavid du Colombier }
195*59cc4ca5SDavid du Colombier while (1) {
196*59cc4ca5SDavid du Colombier if (unLo > unHi) break;
197*59cc4ca5SDavid du Colombier n = (Int32)eclass[fmap[unHi]] - (Int32)med;
198*59cc4ca5SDavid du Colombier if (n == 0) {
199*59cc4ca5SDavid du Colombier fswap(fmap[unHi], fmap[gtHi]);
200*59cc4ca5SDavid du Colombier gtHi--; unHi--;
201*59cc4ca5SDavid du Colombier continue;
202*59cc4ca5SDavid du Colombier };
203*59cc4ca5SDavid du Colombier if (n < 0) break;
204*59cc4ca5SDavid du Colombier unHi--;
205*59cc4ca5SDavid du Colombier }
206*59cc4ca5SDavid du Colombier if (unLo > unHi) break;
207*59cc4ca5SDavid du Colombier fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
208*59cc4ca5SDavid du Colombier }
209*59cc4ca5SDavid du Colombier
210*59cc4ca5SDavid du Colombier AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
211*59cc4ca5SDavid du Colombier
212*59cc4ca5SDavid du Colombier if (gtHi < ltLo) continue;
213*59cc4ca5SDavid du Colombier
214*59cc4ca5SDavid du Colombier n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
215*59cc4ca5SDavid du Colombier m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
216*59cc4ca5SDavid du Colombier
217*59cc4ca5SDavid du Colombier n = lo + unLo - ltLo - 1;
218*59cc4ca5SDavid du Colombier m = hi - (gtHi - unHi) + 1;
219*59cc4ca5SDavid du Colombier
220*59cc4ca5SDavid du Colombier if (n - lo > hi - m) {
221*59cc4ca5SDavid du Colombier fpush ( lo, n );
222*59cc4ca5SDavid du Colombier fpush ( m, hi );
223*59cc4ca5SDavid du Colombier } else {
224*59cc4ca5SDavid du Colombier fpush ( m, hi );
225*59cc4ca5SDavid du Colombier fpush ( lo, n );
226*59cc4ca5SDavid du Colombier }
227*59cc4ca5SDavid du Colombier }
228*59cc4ca5SDavid du Colombier }
229*59cc4ca5SDavid du Colombier
230*59cc4ca5SDavid du Colombier #undef fmin
231*59cc4ca5SDavid du Colombier #undef fpush
232*59cc4ca5SDavid du Colombier #undef fpop
233*59cc4ca5SDavid du Colombier #undef fswap
234*59cc4ca5SDavid du Colombier #undef fvswap
235*59cc4ca5SDavid du Colombier #undef FALLBACK_QSORT_SMALL_THRESH
236*59cc4ca5SDavid du Colombier #undef FALLBACK_QSORT_STACK_SIZE
237*59cc4ca5SDavid du Colombier
238*59cc4ca5SDavid du Colombier
239*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
240*59cc4ca5SDavid du Colombier /* Pre:
241*59cc4ca5SDavid du Colombier nblock > 0
242*59cc4ca5SDavid du Colombier eclass exists for [0 .. nblock-1]
243*59cc4ca5SDavid du Colombier ((UChar*)eclass) [0 .. nblock-1] holds block
244*59cc4ca5SDavid du Colombier ptr exists for [0 .. nblock-1]
245*59cc4ca5SDavid du Colombier
246*59cc4ca5SDavid du Colombier Post:
247*59cc4ca5SDavid du Colombier ((UChar*)eclass) [0 .. nblock-1] holds block
248*59cc4ca5SDavid du Colombier All other areas of eclass destroyed
249*59cc4ca5SDavid du Colombier fmap [0 .. nblock-1] holds sorted order
250*59cc4ca5SDavid du Colombier bhtab [ 0 .. 2+(nblock/32) ] destroyed
251*59cc4ca5SDavid du Colombier */
252*59cc4ca5SDavid du Colombier
253*59cc4ca5SDavid du Colombier #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
254*59cc4ca5SDavid du Colombier #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
255*59cc4ca5SDavid du Colombier #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
256*59cc4ca5SDavid du Colombier #define WORD_BH(zz) bhtab[(zz) >> 5]
257*59cc4ca5SDavid du Colombier #define UNALIGNED_BH(zz) ((zz) & 0x01f)
258*59cc4ca5SDavid du Colombier
259*59cc4ca5SDavid du Colombier static
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)260*59cc4ca5SDavid du Colombier void fallbackSort ( UInt32* fmap,
261*59cc4ca5SDavid du Colombier UInt32* eclass,
262*59cc4ca5SDavid du Colombier UInt32* bhtab,
263*59cc4ca5SDavid du Colombier Int32 nblock,
264*59cc4ca5SDavid du Colombier Int32 verb )
265*59cc4ca5SDavid du Colombier {
266*59cc4ca5SDavid du Colombier Int32 ftab[257];
267*59cc4ca5SDavid du Colombier Int32 ftabCopy[256];
268*59cc4ca5SDavid du Colombier Int32 H, i, j, k, l, r, cc, cc1;
269*59cc4ca5SDavid du Colombier Int32 nNotDone;
270*59cc4ca5SDavid du Colombier Int32 nBhtab;
271*59cc4ca5SDavid du Colombier UChar* eclass8 = (UChar*)eclass;
272*59cc4ca5SDavid du Colombier
273*59cc4ca5SDavid du Colombier /*--
274*59cc4ca5SDavid du Colombier Initial 1-char radix sort to generate
275*59cc4ca5SDavid du Colombier initial fmap and initial BH bits.
276*59cc4ca5SDavid du Colombier --*/
277*59cc4ca5SDavid du Colombier if (verb >= 4)
278*59cc4ca5SDavid du Colombier VPrintf0 ( " bucket sorting ...\n" );
279*59cc4ca5SDavid du Colombier for (i = 0; i < 257; i++) ftab[i] = 0;
280*59cc4ca5SDavid du Colombier for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
281*59cc4ca5SDavid du Colombier for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
282*59cc4ca5SDavid du Colombier for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
283*59cc4ca5SDavid du Colombier
284*59cc4ca5SDavid du Colombier for (i = 0; i < nblock; i++) {
285*59cc4ca5SDavid du Colombier j = eclass8[i];
286*59cc4ca5SDavid du Colombier k = ftab[j] - 1;
287*59cc4ca5SDavid du Colombier ftab[j] = k;
288*59cc4ca5SDavid du Colombier fmap[k] = i;
289*59cc4ca5SDavid du Colombier }
290*59cc4ca5SDavid du Colombier
291*59cc4ca5SDavid du Colombier nBhtab = 2 + (nblock / 32);
292*59cc4ca5SDavid du Colombier for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
293*59cc4ca5SDavid du Colombier for (i = 0; i < 256; i++) SET_BH(ftab[i]);
294*59cc4ca5SDavid du Colombier
295*59cc4ca5SDavid du Colombier /*--
296*59cc4ca5SDavid du Colombier Inductively refine the buckets. Kind-of an
297*59cc4ca5SDavid du Colombier "exponential radix sort" (!), inspired by the
298*59cc4ca5SDavid du Colombier Manber-Myers suffix array construction algorithm.
299*59cc4ca5SDavid du Colombier --*/
300*59cc4ca5SDavid du Colombier
301*59cc4ca5SDavid du Colombier /*-- set sentinel bits for block-end detection --*/
302*59cc4ca5SDavid du Colombier for (i = 0; i < 32; i++) {
303*59cc4ca5SDavid du Colombier SET_BH(nblock + 2*i);
304*59cc4ca5SDavid du Colombier CLEAR_BH(nblock + 2*i + 1);
305*59cc4ca5SDavid du Colombier }
306*59cc4ca5SDavid du Colombier
307*59cc4ca5SDavid du Colombier /*-- the log(N) loop --*/
308*59cc4ca5SDavid du Colombier H = 1;
309*59cc4ca5SDavid du Colombier while (1) {
310*59cc4ca5SDavid du Colombier
311*59cc4ca5SDavid du Colombier if (verb >= 4)
312*59cc4ca5SDavid du Colombier VPrintf1 ( " depth %6d has ", H );
313*59cc4ca5SDavid du Colombier
314*59cc4ca5SDavid du Colombier j = 0;
315*59cc4ca5SDavid du Colombier for (i = 0; i < nblock; i++) {
316*59cc4ca5SDavid du Colombier if (ISSET_BH(i)) j = i;
317*59cc4ca5SDavid du Colombier k = fmap[i] - H; if (k < 0) k += nblock;
318*59cc4ca5SDavid du Colombier eclass[k] = j;
319*59cc4ca5SDavid du Colombier }
320*59cc4ca5SDavid du Colombier
321*59cc4ca5SDavid du Colombier nNotDone = 0;
322*59cc4ca5SDavid du Colombier r = -1;
323*59cc4ca5SDavid du Colombier while (1) {
324*59cc4ca5SDavid du Colombier
325*59cc4ca5SDavid du Colombier /*-- find the next non-singleton bucket --*/
326*59cc4ca5SDavid du Colombier k = r + 1;
327*59cc4ca5SDavid du Colombier while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
328*59cc4ca5SDavid du Colombier if (ISSET_BH(k)) {
329*59cc4ca5SDavid du Colombier while (WORD_BH(k) == 0xffffffff) k += 32;
330*59cc4ca5SDavid du Colombier while (ISSET_BH(k)) k++;
331*59cc4ca5SDavid du Colombier }
332*59cc4ca5SDavid du Colombier l = k - 1;
333*59cc4ca5SDavid du Colombier if (l >= nblock) break;
334*59cc4ca5SDavid du Colombier while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
335*59cc4ca5SDavid du Colombier if (!ISSET_BH(k)) {
336*59cc4ca5SDavid du Colombier while (WORD_BH(k) == 0x00000000) k += 32;
337*59cc4ca5SDavid du Colombier while (!ISSET_BH(k)) k++;
338*59cc4ca5SDavid du Colombier }
339*59cc4ca5SDavid du Colombier r = k - 1;
340*59cc4ca5SDavid du Colombier if (r >= nblock) break;
341*59cc4ca5SDavid du Colombier
342*59cc4ca5SDavid du Colombier /*-- now [l, r] bracket current bucket --*/
343*59cc4ca5SDavid du Colombier if (r > l) {
344*59cc4ca5SDavid du Colombier nNotDone += (r - l + 1);
345*59cc4ca5SDavid du Colombier fallbackQSort3 ( fmap, eclass, l, r );
346*59cc4ca5SDavid du Colombier
347*59cc4ca5SDavid du Colombier /*-- scan bucket and generate header bits-- */
348*59cc4ca5SDavid du Colombier cc = -1;
349*59cc4ca5SDavid du Colombier for (i = l; i <= r; i++) {
350*59cc4ca5SDavid du Colombier cc1 = eclass[fmap[i]];
351*59cc4ca5SDavid du Colombier if (cc != cc1) { SET_BH(i); cc = cc1; };
352*59cc4ca5SDavid du Colombier }
353*59cc4ca5SDavid du Colombier }
354*59cc4ca5SDavid du Colombier }
355*59cc4ca5SDavid du Colombier
356*59cc4ca5SDavid du Colombier if (verb >= 4)
357*59cc4ca5SDavid du Colombier VPrintf1 ( "%6d unresolved strings\n", nNotDone );
358*59cc4ca5SDavid du Colombier
359*59cc4ca5SDavid du Colombier H *= 2;
360*59cc4ca5SDavid du Colombier if (H > nblock || nNotDone == 0) break;
361*59cc4ca5SDavid du Colombier }
362*59cc4ca5SDavid du Colombier
363*59cc4ca5SDavid du Colombier /*--
364*59cc4ca5SDavid du Colombier Reconstruct the original block in
365*59cc4ca5SDavid du Colombier eclass8 [0 .. nblock-1], since the
366*59cc4ca5SDavid du Colombier previous phase destroyed it.
367*59cc4ca5SDavid du Colombier --*/
368*59cc4ca5SDavid du Colombier if (verb >= 4)
369*59cc4ca5SDavid du Colombier VPrintf0 ( " reconstructing block ...\n" );
370*59cc4ca5SDavid du Colombier j = 0;
371*59cc4ca5SDavid du Colombier for (i = 0; i < nblock; i++) {
372*59cc4ca5SDavid du Colombier while (ftabCopy[j] == 0) j++;
373*59cc4ca5SDavid du Colombier ftabCopy[j]--;
374*59cc4ca5SDavid du Colombier eclass8[fmap[i]] = (UChar)j;
375*59cc4ca5SDavid du Colombier }
376*59cc4ca5SDavid du Colombier AssertH ( j < 256, 1005 );
377*59cc4ca5SDavid du Colombier }
378*59cc4ca5SDavid du Colombier
379*59cc4ca5SDavid du Colombier #undef SET_BH
380*59cc4ca5SDavid du Colombier #undef CLEAR_BH
381*59cc4ca5SDavid du Colombier #undef ISSET_BH
382*59cc4ca5SDavid du Colombier #undef WORD_BH
383*59cc4ca5SDavid du Colombier #undef UNALIGNED_BH
384*59cc4ca5SDavid du Colombier
385*59cc4ca5SDavid du Colombier
386*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
387*59cc4ca5SDavid du Colombier /*--- The main, O(N^2 log(N)) sorting ---*/
388*59cc4ca5SDavid du Colombier /*--- algorithm. Faster for "normal" ---*/
389*59cc4ca5SDavid du Colombier /*--- non-repetitive blocks. ---*/
390*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
391*59cc4ca5SDavid du Colombier
392*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
393*59cc4ca5SDavid du Colombier static
394*59cc4ca5SDavid du Colombier __inline__
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)395*59cc4ca5SDavid du Colombier Bool mainGtU ( UInt32 i1,
396*59cc4ca5SDavid du Colombier UInt32 i2,
397*59cc4ca5SDavid du Colombier UChar* block,
398*59cc4ca5SDavid du Colombier UInt16* quadrant,
399*59cc4ca5SDavid du Colombier UInt32 nblock,
400*59cc4ca5SDavid du Colombier Int32* budget )
401*59cc4ca5SDavid du Colombier {
402*59cc4ca5SDavid du Colombier Int32 k;
403*59cc4ca5SDavid du Colombier UChar c1, c2;
404*59cc4ca5SDavid du Colombier UInt16 s1, s2;
405*59cc4ca5SDavid du Colombier
406*59cc4ca5SDavid du Colombier AssertD ( i1 != i2, "mainGtU" );
407*59cc4ca5SDavid du Colombier /* 1 */
408*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
409*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
410*59cc4ca5SDavid du Colombier i1++; i2++;
411*59cc4ca5SDavid du Colombier /* 2 */
412*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
413*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
414*59cc4ca5SDavid du Colombier i1++; i2++;
415*59cc4ca5SDavid du Colombier /* 3 */
416*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
417*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
418*59cc4ca5SDavid du Colombier i1++; i2++;
419*59cc4ca5SDavid du Colombier /* 4 */
420*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
421*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
422*59cc4ca5SDavid du Colombier i1++; i2++;
423*59cc4ca5SDavid du Colombier /* 5 */
424*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
425*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
426*59cc4ca5SDavid du Colombier i1++; i2++;
427*59cc4ca5SDavid du Colombier /* 6 */
428*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
429*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
430*59cc4ca5SDavid du Colombier i1++; i2++;
431*59cc4ca5SDavid du Colombier /* 7 */
432*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
433*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
434*59cc4ca5SDavid du Colombier i1++; i2++;
435*59cc4ca5SDavid du Colombier /* 8 */
436*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
437*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
438*59cc4ca5SDavid du Colombier i1++; i2++;
439*59cc4ca5SDavid du Colombier /* 9 */
440*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
441*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
442*59cc4ca5SDavid du Colombier i1++; i2++;
443*59cc4ca5SDavid du Colombier /* 10 */
444*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
445*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
446*59cc4ca5SDavid du Colombier i1++; i2++;
447*59cc4ca5SDavid du Colombier /* 11 */
448*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
449*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
450*59cc4ca5SDavid du Colombier i1++; i2++;
451*59cc4ca5SDavid du Colombier /* 12 */
452*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
453*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
454*59cc4ca5SDavid du Colombier i1++; i2++;
455*59cc4ca5SDavid du Colombier
456*59cc4ca5SDavid du Colombier k = nblock + 8;
457*59cc4ca5SDavid du Colombier
458*59cc4ca5SDavid du Colombier do {
459*59cc4ca5SDavid du Colombier /* 1 */
460*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
461*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
462*59cc4ca5SDavid du Colombier s1 = quadrant[i1]; s2 = quadrant[i2];
463*59cc4ca5SDavid du Colombier if (s1 != s2) return (s1 > s2);
464*59cc4ca5SDavid du Colombier i1++; i2++;
465*59cc4ca5SDavid du Colombier /* 2 */
466*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
467*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
468*59cc4ca5SDavid du Colombier s1 = quadrant[i1]; s2 = quadrant[i2];
469*59cc4ca5SDavid du Colombier if (s1 != s2) return (s1 > s2);
470*59cc4ca5SDavid du Colombier i1++; i2++;
471*59cc4ca5SDavid du Colombier /* 3 */
472*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
473*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
474*59cc4ca5SDavid du Colombier s1 = quadrant[i1]; s2 = quadrant[i2];
475*59cc4ca5SDavid du Colombier if (s1 != s2) return (s1 > s2);
476*59cc4ca5SDavid du Colombier i1++; i2++;
477*59cc4ca5SDavid du Colombier /* 4 */
478*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
479*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
480*59cc4ca5SDavid du Colombier s1 = quadrant[i1]; s2 = quadrant[i2];
481*59cc4ca5SDavid du Colombier if (s1 != s2) return (s1 > s2);
482*59cc4ca5SDavid du Colombier i1++; i2++;
483*59cc4ca5SDavid du Colombier /* 5 */
484*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
485*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
486*59cc4ca5SDavid du Colombier s1 = quadrant[i1]; s2 = quadrant[i2];
487*59cc4ca5SDavid du Colombier if (s1 != s2) return (s1 > s2);
488*59cc4ca5SDavid du Colombier i1++; i2++;
489*59cc4ca5SDavid du Colombier /* 6 */
490*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
491*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
492*59cc4ca5SDavid du Colombier s1 = quadrant[i1]; s2 = quadrant[i2];
493*59cc4ca5SDavid du Colombier if (s1 != s2) return (s1 > s2);
494*59cc4ca5SDavid du Colombier i1++; i2++;
495*59cc4ca5SDavid du Colombier /* 7 */
496*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
497*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
498*59cc4ca5SDavid du Colombier s1 = quadrant[i1]; s2 = quadrant[i2];
499*59cc4ca5SDavid du Colombier if (s1 != s2) return (s1 > s2);
500*59cc4ca5SDavid du Colombier i1++; i2++;
501*59cc4ca5SDavid du Colombier /* 8 */
502*59cc4ca5SDavid du Colombier c1 = block[i1]; c2 = block[i2];
503*59cc4ca5SDavid du Colombier if (c1 != c2) return (c1 > c2);
504*59cc4ca5SDavid du Colombier s1 = quadrant[i1]; s2 = quadrant[i2];
505*59cc4ca5SDavid du Colombier if (s1 != s2) return (s1 > s2);
506*59cc4ca5SDavid du Colombier i1++; i2++;
507*59cc4ca5SDavid du Colombier
508*59cc4ca5SDavid du Colombier if (i1 >= nblock) i1 -= nblock;
509*59cc4ca5SDavid du Colombier if (i2 >= nblock) i2 -= nblock;
510*59cc4ca5SDavid du Colombier
511*59cc4ca5SDavid du Colombier k -= 8;
512*59cc4ca5SDavid du Colombier (*budget)--;
513*59cc4ca5SDavid du Colombier }
514*59cc4ca5SDavid du Colombier while (k >= 0);
515*59cc4ca5SDavid du Colombier
516*59cc4ca5SDavid du Colombier return False;
517*59cc4ca5SDavid du Colombier }
518*59cc4ca5SDavid du Colombier
519*59cc4ca5SDavid du Colombier
520*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
521*59cc4ca5SDavid du Colombier /*--
522*59cc4ca5SDavid du Colombier Knuth's increments seem to work better
523*59cc4ca5SDavid du Colombier than Incerpi-Sedgewick here. Possibly
524*59cc4ca5SDavid du Colombier because the number of elems to sort is
525*59cc4ca5SDavid du Colombier usually small, typically <= 20.
526*59cc4ca5SDavid du Colombier --*/
527*59cc4ca5SDavid du Colombier static
528*59cc4ca5SDavid du Colombier Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
529*59cc4ca5SDavid du Colombier 9841, 29524, 88573, 265720,
530*59cc4ca5SDavid du Colombier 797161, 2391484 };
531*59cc4ca5SDavid du Colombier
532*59cc4ca5SDavid du Colombier static
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)533*59cc4ca5SDavid du Colombier void mainSimpleSort ( UInt32* ptr,
534*59cc4ca5SDavid du Colombier UChar* block,
535*59cc4ca5SDavid du Colombier UInt16* quadrant,
536*59cc4ca5SDavid du Colombier Int32 nblock,
537*59cc4ca5SDavid du Colombier Int32 lo,
538*59cc4ca5SDavid du Colombier Int32 hi,
539*59cc4ca5SDavid du Colombier Int32 d,
540*59cc4ca5SDavid du Colombier Int32* budget )
541*59cc4ca5SDavid du Colombier {
542*59cc4ca5SDavid du Colombier Int32 i, j, h, bigN, hp;
543*59cc4ca5SDavid du Colombier UInt32 v;
544*59cc4ca5SDavid du Colombier
545*59cc4ca5SDavid du Colombier bigN = hi - lo + 1;
546*59cc4ca5SDavid du Colombier if (bigN < 2) return;
547*59cc4ca5SDavid du Colombier
548*59cc4ca5SDavid du Colombier hp = 0;
549*59cc4ca5SDavid du Colombier while (incs[hp] < bigN) hp++;
550*59cc4ca5SDavid du Colombier hp--;
551*59cc4ca5SDavid du Colombier
552*59cc4ca5SDavid du Colombier for (; hp >= 0; hp--) {
553*59cc4ca5SDavid du Colombier h = incs[hp];
554*59cc4ca5SDavid du Colombier
555*59cc4ca5SDavid du Colombier i = lo + h;
556*59cc4ca5SDavid du Colombier while (True) {
557*59cc4ca5SDavid du Colombier
558*59cc4ca5SDavid du Colombier /*-- copy 1 --*/
559*59cc4ca5SDavid du Colombier if (i > hi) break;
560*59cc4ca5SDavid du Colombier v = ptr[i];
561*59cc4ca5SDavid du Colombier j = i;
562*59cc4ca5SDavid du Colombier while ( mainGtU (
563*59cc4ca5SDavid du Colombier ptr[j-h]+d, v+d, block, quadrant, nblock, budget
564*59cc4ca5SDavid du Colombier ) ) {
565*59cc4ca5SDavid du Colombier ptr[j] = ptr[j-h];
566*59cc4ca5SDavid du Colombier j = j - h;
567*59cc4ca5SDavid du Colombier if (j <= (lo + h - 1)) break;
568*59cc4ca5SDavid du Colombier }
569*59cc4ca5SDavid du Colombier ptr[j] = v;
570*59cc4ca5SDavid du Colombier i++;
571*59cc4ca5SDavid du Colombier
572*59cc4ca5SDavid du Colombier /*-- copy 2 --*/
573*59cc4ca5SDavid du Colombier if (i > hi) break;
574*59cc4ca5SDavid du Colombier v = ptr[i];
575*59cc4ca5SDavid du Colombier j = i;
576*59cc4ca5SDavid du Colombier while ( mainGtU (
577*59cc4ca5SDavid du Colombier ptr[j-h]+d, v+d, block, quadrant, nblock, budget
578*59cc4ca5SDavid du Colombier ) ) {
579*59cc4ca5SDavid du Colombier ptr[j] = ptr[j-h];
580*59cc4ca5SDavid du Colombier j = j - h;
581*59cc4ca5SDavid du Colombier if (j <= (lo + h - 1)) break;
582*59cc4ca5SDavid du Colombier }
583*59cc4ca5SDavid du Colombier ptr[j] = v;
584*59cc4ca5SDavid du Colombier i++;
585*59cc4ca5SDavid du Colombier
586*59cc4ca5SDavid du Colombier /*-- copy 3 --*/
587*59cc4ca5SDavid du Colombier if (i > hi) break;
588*59cc4ca5SDavid du Colombier v = ptr[i];
589*59cc4ca5SDavid du Colombier j = i;
590*59cc4ca5SDavid du Colombier while ( mainGtU (
591*59cc4ca5SDavid du Colombier ptr[j-h]+d, v+d, block, quadrant, nblock, budget
592*59cc4ca5SDavid du Colombier ) ) {
593*59cc4ca5SDavid du Colombier ptr[j] = ptr[j-h];
594*59cc4ca5SDavid du Colombier j = j - h;
595*59cc4ca5SDavid du Colombier if (j <= (lo + h - 1)) break;
596*59cc4ca5SDavid du Colombier }
597*59cc4ca5SDavid du Colombier ptr[j] = v;
598*59cc4ca5SDavid du Colombier i++;
599*59cc4ca5SDavid du Colombier
600*59cc4ca5SDavid du Colombier if (*budget < 0) return;
601*59cc4ca5SDavid du Colombier }
602*59cc4ca5SDavid du Colombier }
603*59cc4ca5SDavid du Colombier }
604*59cc4ca5SDavid du Colombier
605*59cc4ca5SDavid du Colombier
606*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
607*59cc4ca5SDavid du Colombier /*--
608*59cc4ca5SDavid du Colombier The following is an implementation of
609*59cc4ca5SDavid du Colombier an elegant 3-way quicksort for strings,
610*59cc4ca5SDavid du Colombier described in a paper "Fast Algorithms for
611*59cc4ca5SDavid du Colombier Sorting and Searching Strings", by Robert
612*59cc4ca5SDavid du Colombier Sedgewick and Jon L. Bentley.
613*59cc4ca5SDavid du Colombier --*/
614*59cc4ca5SDavid du Colombier
615*59cc4ca5SDavid du Colombier #define mswap(zz1, zz2) \
616*59cc4ca5SDavid du Colombier { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
617*59cc4ca5SDavid du Colombier
618*59cc4ca5SDavid du Colombier #define mvswap(zzp1, zzp2, zzn) \
619*59cc4ca5SDavid du Colombier { \
620*59cc4ca5SDavid du Colombier Int32 yyp1 = (zzp1); \
621*59cc4ca5SDavid du Colombier Int32 yyp2 = (zzp2); \
622*59cc4ca5SDavid du Colombier Int32 yyn = (zzn); \
623*59cc4ca5SDavid du Colombier while (yyn > 0) { \
624*59cc4ca5SDavid du Colombier mswap(ptr[yyp1], ptr[yyp2]); \
625*59cc4ca5SDavid du Colombier yyp1++; yyp2++; yyn--; \
626*59cc4ca5SDavid du Colombier } \
627*59cc4ca5SDavid du Colombier }
628*59cc4ca5SDavid du Colombier
629*59cc4ca5SDavid du Colombier static
630*59cc4ca5SDavid du Colombier __inline__
mmed3(UChar a,UChar b,UChar c)631*59cc4ca5SDavid du Colombier UChar mmed3 ( UChar a, UChar b, UChar c )
632*59cc4ca5SDavid du Colombier {
633*59cc4ca5SDavid du Colombier UChar t;
634*59cc4ca5SDavid du Colombier if (a > b) { t = a; a = b; b = t; };
635*59cc4ca5SDavid du Colombier if (b > c) {
636*59cc4ca5SDavid du Colombier b = c;
637*59cc4ca5SDavid du Colombier if (a > b) b = a;
638*59cc4ca5SDavid du Colombier }
639*59cc4ca5SDavid du Colombier return b;
640*59cc4ca5SDavid du Colombier }
641*59cc4ca5SDavid du Colombier
642*59cc4ca5SDavid du Colombier #define mmin(a,b) ((a) < (b)) ? (a) : (b)
643*59cc4ca5SDavid du Colombier
644*59cc4ca5SDavid du Colombier #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
645*59cc4ca5SDavid du Colombier stackHi[sp] = hz; \
646*59cc4ca5SDavid du Colombier stackD [sp] = dz; \
647*59cc4ca5SDavid du Colombier sp++; }
648*59cc4ca5SDavid du Colombier
649*59cc4ca5SDavid du Colombier #define mpop(lz,hz,dz) { sp--; \
650*59cc4ca5SDavid du Colombier lz = stackLo[sp]; \
651*59cc4ca5SDavid du Colombier hz = stackHi[sp]; \
652*59cc4ca5SDavid du Colombier dz = stackD [sp]; }
653*59cc4ca5SDavid du Colombier
654*59cc4ca5SDavid du Colombier
655*59cc4ca5SDavid du Colombier #define mnextsize(az) (nextHi[az]-nextLo[az])
656*59cc4ca5SDavid du Colombier
657*59cc4ca5SDavid du Colombier #define mnextswap(az,bz) \
658*59cc4ca5SDavid du Colombier { Int32 tz; \
659*59cc4ca5SDavid du Colombier tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
660*59cc4ca5SDavid du Colombier tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
661*59cc4ca5SDavid du Colombier tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
662*59cc4ca5SDavid du Colombier
663*59cc4ca5SDavid du Colombier
664*59cc4ca5SDavid du Colombier #define MAIN_QSORT_SMALL_THRESH 20
665*59cc4ca5SDavid du Colombier #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
666*59cc4ca5SDavid du Colombier #define MAIN_QSORT_STACK_SIZE 100
667*59cc4ca5SDavid du Colombier
668*59cc4ca5SDavid du Colombier static
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)669*59cc4ca5SDavid du Colombier void mainQSort3 ( UInt32* ptr,
670*59cc4ca5SDavid du Colombier UChar* block,
671*59cc4ca5SDavid du Colombier UInt16* quadrant,
672*59cc4ca5SDavid du Colombier Int32 nblock,
673*59cc4ca5SDavid du Colombier Int32 loSt,
674*59cc4ca5SDavid du Colombier Int32 hiSt,
675*59cc4ca5SDavid du Colombier Int32 dSt,
676*59cc4ca5SDavid du Colombier Int32* budget )
677*59cc4ca5SDavid du Colombier {
678*59cc4ca5SDavid du Colombier Int32 unLo, unHi, ltLo, gtHi, n, m, med;
679*59cc4ca5SDavid du Colombier Int32 sp, lo, hi, d;
680*59cc4ca5SDavid du Colombier
681*59cc4ca5SDavid du Colombier Int32 stackLo[MAIN_QSORT_STACK_SIZE];
682*59cc4ca5SDavid du Colombier Int32 stackHi[MAIN_QSORT_STACK_SIZE];
683*59cc4ca5SDavid du Colombier Int32 stackD [MAIN_QSORT_STACK_SIZE];
684*59cc4ca5SDavid du Colombier
685*59cc4ca5SDavid du Colombier Int32 nextLo[3];
686*59cc4ca5SDavid du Colombier Int32 nextHi[3];
687*59cc4ca5SDavid du Colombier Int32 nextD [3];
688*59cc4ca5SDavid du Colombier
689*59cc4ca5SDavid du Colombier sp = 0;
690*59cc4ca5SDavid du Colombier mpush ( loSt, hiSt, dSt );
691*59cc4ca5SDavid du Colombier
692*59cc4ca5SDavid du Colombier while (sp > 0) {
693*59cc4ca5SDavid du Colombier
694*59cc4ca5SDavid du Colombier AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
695*59cc4ca5SDavid du Colombier
696*59cc4ca5SDavid du Colombier mpop ( lo, hi, d );
697*59cc4ca5SDavid du Colombier if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
698*59cc4ca5SDavid du Colombier d > MAIN_QSORT_DEPTH_THRESH) {
699*59cc4ca5SDavid du Colombier mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
700*59cc4ca5SDavid du Colombier if (*budget < 0) return;
701*59cc4ca5SDavid du Colombier continue;
702*59cc4ca5SDavid du Colombier }
703*59cc4ca5SDavid du Colombier
704*59cc4ca5SDavid du Colombier med = (Int32)
705*59cc4ca5SDavid du Colombier mmed3 ( block[ptr[ lo ]+d],
706*59cc4ca5SDavid du Colombier block[ptr[ hi ]+d],
707*59cc4ca5SDavid du Colombier block[ptr[ (lo+hi)>>1 ]+d] );
708*59cc4ca5SDavid du Colombier
709*59cc4ca5SDavid du Colombier unLo = ltLo = lo;
710*59cc4ca5SDavid du Colombier unHi = gtHi = hi;
711*59cc4ca5SDavid du Colombier
712*59cc4ca5SDavid du Colombier while (True) {
713*59cc4ca5SDavid du Colombier while (True) {
714*59cc4ca5SDavid du Colombier if (unLo > unHi) break;
715*59cc4ca5SDavid du Colombier n = ((Int32)block[ptr[unLo]+d]) - med;
716*59cc4ca5SDavid du Colombier if (n == 0) {
717*59cc4ca5SDavid du Colombier mswap(ptr[unLo], ptr[ltLo]);
718*59cc4ca5SDavid du Colombier ltLo++; unLo++; continue;
719*59cc4ca5SDavid du Colombier };
720*59cc4ca5SDavid du Colombier if (n > 0) break;
721*59cc4ca5SDavid du Colombier unLo++;
722*59cc4ca5SDavid du Colombier }
723*59cc4ca5SDavid du Colombier while (True) {
724*59cc4ca5SDavid du Colombier if (unLo > unHi) break;
725*59cc4ca5SDavid du Colombier n = ((Int32)block[ptr[unHi]+d]) - med;
726*59cc4ca5SDavid du Colombier if (n == 0) {
727*59cc4ca5SDavid du Colombier mswap(ptr[unHi], ptr[gtHi]);
728*59cc4ca5SDavid du Colombier gtHi--; unHi--; continue;
729*59cc4ca5SDavid du Colombier };
730*59cc4ca5SDavid du Colombier if (n < 0) break;
731*59cc4ca5SDavid du Colombier unHi--;
732*59cc4ca5SDavid du Colombier }
733*59cc4ca5SDavid du Colombier if (unLo > unHi) break;
734*59cc4ca5SDavid du Colombier mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
735*59cc4ca5SDavid du Colombier }
736*59cc4ca5SDavid du Colombier
737*59cc4ca5SDavid du Colombier AssertD ( unHi == unLo-1, "mainQSort3(2)" );
738*59cc4ca5SDavid du Colombier
739*59cc4ca5SDavid du Colombier if (gtHi < ltLo) {
740*59cc4ca5SDavid du Colombier mpush(lo, hi, d+1 );
741*59cc4ca5SDavid du Colombier continue;
742*59cc4ca5SDavid du Colombier }
743*59cc4ca5SDavid du Colombier
744*59cc4ca5SDavid du Colombier n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
745*59cc4ca5SDavid du Colombier m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
746*59cc4ca5SDavid du Colombier
747*59cc4ca5SDavid du Colombier n = lo + unLo - ltLo - 1;
748*59cc4ca5SDavid du Colombier m = hi - (gtHi - unHi) + 1;
749*59cc4ca5SDavid du Colombier
750*59cc4ca5SDavid du Colombier nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
751*59cc4ca5SDavid du Colombier nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
752*59cc4ca5SDavid du Colombier nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
753*59cc4ca5SDavid du Colombier
754*59cc4ca5SDavid du Colombier if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
755*59cc4ca5SDavid du Colombier if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
756*59cc4ca5SDavid du Colombier if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
757*59cc4ca5SDavid du Colombier
758*59cc4ca5SDavid du Colombier AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
759*59cc4ca5SDavid du Colombier AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
760*59cc4ca5SDavid du Colombier
761*59cc4ca5SDavid du Colombier mpush (nextLo[0], nextHi[0], nextD[0]);
762*59cc4ca5SDavid du Colombier mpush (nextLo[1], nextHi[1], nextD[1]);
763*59cc4ca5SDavid du Colombier mpush (nextLo[2], nextHi[2], nextD[2]);
764*59cc4ca5SDavid du Colombier }
765*59cc4ca5SDavid du Colombier }
766*59cc4ca5SDavid du Colombier
767*59cc4ca5SDavid du Colombier #undef mswap
768*59cc4ca5SDavid du Colombier #undef mvswap
769*59cc4ca5SDavid du Colombier #undef mpush
770*59cc4ca5SDavid du Colombier #undef mpop
771*59cc4ca5SDavid du Colombier #undef mmin
772*59cc4ca5SDavid du Colombier #undef mnextsize
773*59cc4ca5SDavid du Colombier #undef mnextswap
774*59cc4ca5SDavid du Colombier #undef MAIN_QSORT_SMALL_THRESH
775*59cc4ca5SDavid du Colombier #undef MAIN_QSORT_DEPTH_THRESH
776*59cc4ca5SDavid du Colombier #undef MAIN_QSORT_STACK_SIZE
777*59cc4ca5SDavid du Colombier
778*59cc4ca5SDavid du Colombier
779*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
780*59cc4ca5SDavid du Colombier /* Pre:
781*59cc4ca5SDavid du Colombier nblock > N_OVERSHOOT
782*59cc4ca5SDavid du Colombier block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
783*59cc4ca5SDavid du Colombier ((UChar*)block32) [0 .. nblock-1] holds block
784*59cc4ca5SDavid du Colombier ptr exists for [0 .. nblock-1]
785*59cc4ca5SDavid du Colombier
786*59cc4ca5SDavid du Colombier Post:
787*59cc4ca5SDavid du Colombier ((UChar*)block32) [0 .. nblock-1] holds block
788*59cc4ca5SDavid du Colombier All other areas of block32 destroyed
789*59cc4ca5SDavid du Colombier ftab [0 .. 65536 ] destroyed
790*59cc4ca5SDavid du Colombier ptr [0 .. nblock-1] holds sorted order
791*59cc4ca5SDavid du Colombier if (*budget < 0), sorting was abandoned
792*59cc4ca5SDavid du Colombier */
793*59cc4ca5SDavid du Colombier
794*59cc4ca5SDavid du Colombier #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
795*59cc4ca5SDavid du Colombier #define SETMASK (1 << 21)
796*59cc4ca5SDavid du Colombier #define CLEARMASK (~(SETMASK))
797*59cc4ca5SDavid du Colombier
798*59cc4ca5SDavid du Colombier static
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)799*59cc4ca5SDavid du Colombier void mainSort ( UInt32* ptr,
800*59cc4ca5SDavid du Colombier UChar* block,
801*59cc4ca5SDavid du Colombier UInt16* quadrant,
802*59cc4ca5SDavid du Colombier UInt32* ftab,
803*59cc4ca5SDavid du Colombier Int32 nblock,
804*59cc4ca5SDavid du Colombier Int32 verb,
805*59cc4ca5SDavid du Colombier Int32* budget )
806*59cc4ca5SDavid du Colombier {
807*59cc4ca5SDavid du Colombier Int32 i, j, k, ss, sb;
808*59cc4ca5SDavid du Colombier Int32 runningOrder[256];
809*59cc4ca5SDavid du Colombier Bool bigDone[256];
810*59cc4ca5SDavid du Colombier Int32 copyStart[256];
811*59cc4ca5SDavid du Colombier Int32 copyEnd [256];
812*59cc4ca5SDavid du Colombier UChar c1;
813*59cc4ca5SDavid du Colombier Int32 numQSorted;
814*59cc4ca5SDavid du Colombier UInt16 s;
815*59cc4ca5SDavid du Colombier if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
816*59cc4ca5SDavid du Colombier
817*59cc4ca5SDavid du Colombier /*-- set up the 2-byte frequency table --*/
818*59cc4ca5SDavid du Colombier for (i = 65536; i >= 0; i--) ftab[i] = 0;
819*59cc4ca5SDavid du Colombier
820*59cc4ca5SDavid du Colombier j = block[0] << 8;
821*59cc4ca5SDavid du Colombier i = nblock-1;
822*59cc4ca5SDavid du Colombier for (; i >= 3; i -= 4) {
823*59cc4ca5SDavid du Colombier quadrant[i] = 0;
824*59cc4ca5SDavid du Colombier j = (j >> 8) | ( ((UInt16)block[i]) << 8);
825*59cc4ca5SDavid du Colombier ftab[j]++;
826*59cc4ca5SDavid du Colombier quadrant[i-1] = 0;
827*59cc4ca5SDavid du Colombier j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
828*59cc4ca5SDavid du Colombier ftab[j]++;
829*59cc4ca5SDavid du Colombier quadrant[i-2] = 0;
830*59cc4ca5SDavid du Colombier j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
831*59cc4ca5SDavid du Colombier ftab[j]++;
832*59cc4ca5SDavid du Colombier quadrant[i-3] = 0;
833*59cc4ca5SDavid du Colombier j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
834*59cc4ca5SDavid du Colombier ftab[j]++;
835*59cc4ca5SDavid du Colombier }
836*59cc4ca5SDavid du Colombier for (; i >= 0; i--) {
837*59cc4ca5SDavid du Colombier quadrant[i] = 0;
838*59cc4ca5SDavid du Colombier j = (j >> 8) | ( ((UInt16)block[i]) << 8);
839*59cc4ca5SDavid du Colombier ftab[j]++;
840*59cc4ca5SDavid du Colombier }
841*59cc4ca5SDavid du Colombier
842*59cc4ca5SDavid du Colombier /*-- (emphasises close relationship of block & quadrant) --*/
843*59cc4ca5SDavid du Colombier for (i = 0; i < BZ_N_OVERSHOOT; i++) {
844*59cc4ca5SDavid du Colombier block [nblock+i] = block[i];
845*59cc4ca5SDavid du Colombier quadrant[nblock+i] = 0;
846*59cc4ca5SDavid du Colombier }
847*59cc4ca5SDavid du Colombier
848*59cc4ca5SDavid du Colombier if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
849*59cc4ca5SDavid du Colombier
850*59cc4ca5SDavid du Colombier /*-- Complete the initial radix sort --*/
851*59cc4ca5SDavid du Colombier for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
852*59cc4ca5SDavid du Colombier
853*59cc4ca5SDavid du Colombier s = block[0] << 8;
854*59cc4ca5SDavid du Colombier i = nblock-1;
855*59cc4ca5SDavid du Colombier for (; i >= 3; i -= 4) {
856*59cc4ca5SDavid du Colombier s = (s >> 8) | (block[i] << 8);
857*59cc4ca5SDavid du Colombier j = ftab[s] -1;
858*59cc4ca5SDavid du Colombier ftab[s] = j;
859*59cc4ca5SDavid du Colombier ptr[j] = i;
860*59cc4ca5SDavid du Colombier s = (s >> 8) | (block[i-1] << 8);
861*59cc4ca5SDavid du Colombier j = ftab[s] -1;
862*59cc4ca5SDavid du Colombier ftab[s] = j;
863*59cc4ca5SDavid du Colombier ptr[j] = i-1;
864*59cc4ca5SDavid du Colombier s = (s >> 8) | (block[i-2] << 8);
865*59cc4ca5SDavid du Colombier j = ftab[s] -1;
866*59cc4ca5SDavid du Colombier ftab[s] = j;
867*59cc4ca5SDavid du Colombier ptr[j] = i-2;
868*59cc4ca5SDavid du Colombier s = (s >> 8) | (block[i-3] << 8);
869*59cc4ca5SDavid du Colombier j = ftab[s] -1;
870*59cc4ca5SDavid du Colombier ftab[s] = j;
871*59cc4ca5SDavid du Colombier ptr[j] = i-3;
872*59cc4ca5SDavid du Colombier }
873*59cc4ca5SDavid du Colombier for (; i >= 0; i--) {
874*59cc4ca5SDavid du Colombier s = (s >> 8) | (block[i] << 8);
875*59cc4ca5SDavid du Colombier j = ftab[s] -1;
876*59cc4ca5SDavid du Colombier ftab[s] = j;
877*59cc4ca5SDavid du Colombier ptr[j] = i;
878*59cc4ca5SDavid du Colombier }
879*59cc4ca5SDavid du Colombier
880*59cc4ca5SDavid du Colombier /*--
881*59cc4ca5SDavid du Colombier Now ftab contains the first loc of every small bucket.
882*59cc4ca5SDavid du Colombier Calculate the running order, from smallest to largest
883*59cc4ca5SDavid du Colombier big bucket.
884*59cc4ca5SDavid du Colombier --*/
885*59cc4ca5SDavid du Colombier for (i = 0; i <= 255; i++) {
886*59cc4ca5SDavid du Colombier bigDone [i] = False;
887*59cc4ca5SDavid du Colombier runningOrder[i] = i;
888*59cc4ca5SDavid du Colombier }
889*59cc4ca5SDavid du Colombier
890*59cc4ca5SDavid du Colombier {
891*59cc4ca5SDavid du Colombier Int32 vv;
892*59cc4ca5SDavid du Colombier Int32 h = 1;
893*59cc4ca5SDavid du Colombier do h = 3 * h + 1; while (h <= 256);
894*59cc4ca5SDavid du Colombier do {
895*59cc4ca5SDavid du Colombier h = h / 3;
896*59cc4ca5SDavid du Colombier for (i = h; i <= 255; i++) {
897*59cc4ca5SDavid du Colombier vv = runningOrder[i];
898*59cc4ca5SDavid du Colombier j = i;
899*59cc4ca5SDavid du Colombier while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
900*59cc4ca5SDavid du Colombier runningOrder[j] = runningOrder[j-h];
901*59cc4ca5SDavid du Colombier j = j - h;
902*59cc4ca5SDavid du Colombier if (j <= (h - 1)) goto zero;
903*59cc4ca5SDavid du Colombier }
904*59cc4ca5SDavid du Colombier zero:
905*59cc4ca5SDavid du Colombier runningOrder[j] = vv;
906*59cc4ca5SDavid du Colombier }
907*59cc4ca5SDavid du Colombier } while (h != 1);
908*59cc4ca5SDavid du Colombier }
909*59cc4ca5SDavid du Colombier
910*59cc4ca5SDavid du Colombier /*--
911*59cc4ca5SDavid du Colombier The main sorting loop.
912*59cc4ca5SDavid du Colombier --*/
913*59cc4ca5SDavid du Colombier
914*59cc4ca5SDavid du Colombier numQSorted = 0;
915*59cc4ca5SDavid du Colombier
916*59cc4ca5SDavid du Colombier for (i = 0; i <= 255; i++) {
917*59cc4ca5SDavid du Colombier
918*59cc4ca5SDavid du Colombier /*--
919*59cc4ca5SDavid du Colombier Process big buckets, starting with the least full.
920*59cc4ca5SDavid du Colombier Basically this is a 3-step process in which we call
921*59cc4ca5SDavid du Colombier mainQSort3 to sort the small buckets [ss, j], but
922*59cc4ca5SDavid du Colombier also make a big effort to avoid the calls if we can.
923*59cc4ca5SDavid du Colombier --*/
924*59cc4ca5SDavid du Colombier ss = runningOrder[i];
925*59cc4ca5SDavid du Colombier
926*59cc4ca5SDavid du Colombier /*--
927*59cc4ca5SDavid du Colombier Step 1:
928*59cc4ca5SDavid du Colombier Complete the big bucket [ss] by quicksorting
929*59cc4ca5SDavid du Colombier any unsorted small buckets [ss, j], for j != ss.
930*59cc4ca5SDavid du Colombier Hopefully previous pointer-scanning phases have already
931*59cc4ca5SDavid du Colombier completed many of the small buckets [ss, j], so
932*59cc4ca5SDavid du Colombier we don't have to sort them at all.
933*59cc4ca5SDavid du Colombier --*/
934*59cc4ca5SDavid du Colombier for (j = 0; j <= 255; j++) {
935*59cc4ca5SDavid du Colombier if (j != ss) {
936*59cc4ca5SDavid du Colombier sb = (ss << 8) + j;
937*59cc4ca5SDavid du Colombier if ( ! (ftab[sb] & SETMASK) ) {
938*59cc4ca5SDavid du Colombier Int32 lo = ftab[sb] & CLEARMASK;
939*59cc4ca5SDavid du Colombier Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
940*59cc4ca5SDavid du Colombier if (hi > lo) {
941*59cc4ca5SDavid du Colombier if (verb >= 4)
942*59cc4ca5SDavid du Colombier VPrintf4 ( " qsort [0x%x, 0x%x] "
943*59cc4ca5SDavid du Colombier "done %d this %d\n",
944*59cc4ca5SDavid du Colombier ss, j, numQSorted, hi - lo + 1 );
945*59cc4ca5SDavid du Colombier mainQSort3 (
946*59cc4ca5SDavid du Colombier ptr, block, quadrant, nblock,
947*59cc4ca5SDavid du Colombier lo, hi, BZ_N_RADIX, budget
948*59cc4ca5SDavid du Colombier );
949*59cc4ca5SDavid du Colombier numQSorted += (hi - lo + 1);
950*59cc4ca5SDavid du Colombier if (*budget < 0) return;
951*59cc4ca5SDavid du Colombier }
952*59cc4ca5SDavid du Colombier }
953*59cc4ca5SDavid du Colombier ftab[sb] |= SETMASK;
954*59cc4ca5SDavid du Colombier }
955*59cc4ca5SDavid du Colombier }
956*59cc4ca5SDavid du Colombier
957*59cc4ca5SDavid du Colombier AssertH ( !bigDone[ss], 1006 );
958*59cc4ca5SDavid du Colombier
959*59cc4ca5SDavid du Colombier /*--
960*59cc4ca5SDavid du Colombier Step 2:
961*59cc4ca5SDavid du Colombier Now scan this big bucket [ss] so as to synthesise the
962*59cc4ca5SDavid du Colombier sorted order for small buckets [t, ss] for all t,
963*59cc4ca5SDavid du Colombier including, magically, the bucket [ss,ss] too.
964*59cc4ca5SDavid du Colombier This will avoid doing Real Work in subsequent Step 1's.
965*59cc4ca5SDavid du Colombier --*/
966*59cc4ca5SDavid du Colombier {
967*59cc4ca5SDavid du Colombier for (j = 0; j <= 255; j++) {
968*59cc4ca5SDavid du Colombier copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
969*59cc4ca5SDavid du Colombier copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
970*59cc4ca5SDavid du Colombier }
971*59cc4ca5SDavid du Colombier for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
972*59cc4ca5SDavid du Colombier k = ptr[j]-1; if (k < 0) k += nblock;
973*59cc4ca5SDavid du Colombier c1 = block[k];
974*59cc4ca5SDavid du Colombier if (!bigDone[c1])
975*59cc4ca5SDavid du Colombier ptr[ copyStart[c1]++ ] = k;
976*59cc4ca5SDavid du Colombier }
977*59cc4ca5SDavid du Colombier for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
978*59cc4ca5SDavid du Colombier k = ptr[j]-1; if (k < 0) k += nblock;
979*59cc4ca5SDavid du Colombier c1 = block[k];
980*59cc4ca5SDavid du Colombier if (!bigDone[c1])
981*59cc4ca5SDavid du Colombier ptr[ copyEnd[c1]-- ] = k;
982*59cc4ca5SDavid du Colombier }
983*59cc4ca5SDavid du Colombier }
984*59cc4ca5SDavid du Colombier
985*59cc4ca5SDavid du Colombier AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
986*59cc4ca5SDavid du Colombier
987*59cc4ca5SDavid du Colombier for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
988*59cc4ca5SDavid du Colombier
989*59cc4ca5SDavid du Colombier /*--
990*59cc4ca5SDavid du Colombier Step 3:
991*59cc4ca5SDavid du Colombier The [ss] big bucket is now done. Record this fact,
992*59cc4ca5SDavid du Colombier and update the quadrant descriptors. Remember to
993*59cc4ca5SDavid du Colombier update quadrants in the overshoot area too, if
994*59cc4ca5SDavid du Colombier necessary. The "if (i < 255)" test merely skips
995*59cc4ca5SDavid du Colombier this updating for the last bucket processed, since
996*59cc4ca5SDavid du Colombier updating for the last bucket is pointless.
997*59cc4ca5SDavid du Colombier
998*59cc4ca5SDavid du Colombier The quadrant array provides a way to incrementally
999*59cc4ca5SDavid du Colombier cache sort orderings, as they appear, so as to
1000*59cc4ca5SDavid du Colombier make subsequent comparisons in fullGtU() complete
1001*59cc4ca5SDavid du Colombier faster. For repetitive blocks this makes a big
1002*59cc4ca5SDavid du Colombier difference (but not big enough to be able to avoid
1003*59cc4ca5SDavid du Colombier the fallback sorting mechanism, exponential radix sort).
1004*59cc4ca5SDavid du Colombier
1005*59cc4ca5SDavid du Colombier The precise meaning is: at all times:
1006*59cc4ca5SDavid du Colombier
1007*59cc4ca5SDavid du Colombier for 0 <= i < nblock and 0 <= j <= nblock
1008*59cc4ca5SDavid du Colombier
1009*59cc4ca5SDavid du Colombier if block[i] != block[j],
1010*59cc4ca5SDavid du Colombier
1011*59cc4ca5SDavid du Colombier then the relative values of quadrant[i] and
1012*59cc4ca5SDavid du Colombier quadrant[j] are meaningless.
1013*59cc4ca5SDavid du Colombier
1014*59cc4ca5SDavid du Colombier else {
1015*59cc4ca5SDavid du Colombier if quadrant[i] < quadrant[j]
1016*59cc4ca5SDavid du Colombier then the string starting at i lexicographically
1017*59cc4ca5SDavid du Colombier precedes the string starting at j
1018*59cc4ca5SDavid du Colombier
1019*59cc4ca5SDavid du Colombier else if quadrant[i] > quadrant[j]
1020*59cc4ca5SDavid du Colombier then the string starting at j lexicographically
1021*59cc4ca5SDavid du Colombier precedes the string starting at i
1022*59cc4ca5SDavid du Colombier
1023*59cc4ca5SDavid du Colombier else
1024*59cc4ca5SDavid du Colombier the relative ordering of the strings starting
1025*59cc4ca5SDavid du Colombier at i and j has not yet been determined.
1026*59cc4ca5SDavid du Colombier }
1027*59cc4ca5SDavid du Colombier --*/
1028*59cc4ca5SDavid du Colombier bigDone[ss] = True;
1029*59cc4ca5SDavid du Colombier
1030*59cc4ca5SDavid du Colombier if (i < 255) {
1031*59cc4ca5SDavid du Colombier Int32 bbStart = ftab[ss << 8] & CLEARMASK;
1032*59cc4ca5SDavid du Colombier Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
1033*59cc4ca5SDavid du Colombier Int32 shifts = 0;
1034*59cc4ca5SDavid du Colombier
1035*59cc4ca5SDavid du Colombier while ((bbSize >> shifts) > 65534) shifts++;
1036*59cc4ca5SDavid du Colombier
1037*59cc4ca5SDavid du Colombier for (j = bbSize-1; j >= 0; j--) {
1038*59cc4ca5SDavid du Colombier Int32 a2update = ptr[bbStart + j];
1039*59cc4ca5SDavid du Colombier UInt16 qVal = (UInt16)(j >> shifts);
1040*59cc4ca5SDavid du Colombier quadrant[a2update] = qVal;
1041*59cc4ca5SDavid du Colombier if (a2update < BZ_N_OVERSHOOT)
1042*59cc4ca5SDavid du Colombier quadrant[a2update + nblock] = qVal;
1043*59cc4ca5SDavid du Colombier }
1044*59cc4ca5SDavid du Colombier AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1045*59cc4ca5SDavid du Colombier }
1046*59cc4ca5SDavid du Colombier
1047*59cc4ca5SDavid du Colombier }
1048*59cc4ca5SDavid du Colombier
1049*59cc4ca5SDavid du Colombier if (verb >= 4)
1050*59cc4ca5SDavid du Colombier VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1051*59cc4ca5SDavid du Colombier nblock, numQSorted, nblock - numQSorted );
1052*59cc4ca5SDavid du Colombier }
1053*59cc4ca5SDavid du Colombier
1054*59cc4ca5SDavid du Colombier #undef BIGFREQ
1055*59cc4ca5SDavid du Colombier #undef SETMASK
1056*59cc4ca5SDavid du Colombier #undef CLEARMASK
1057*59cc4ca5SDavid du Colombier
1058*59cc4ca5SDavid du Colombier
1059*59cc4ca5SDavid du Colombier /*---------------------------------------------*/
1060*59cc4ca5SDavid du Colombier /* Pre:
1061*59cc4ca5SDavid du Colombier nblock > 0
1062*59cc4ca5SDavid du Colombier arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1063*59cc4ca5SDavid du Colombier ((UChar*)arr2) [0 .. nblock-1] holds block
1064*59cc4ca5SDavid du Colombier arr1 exists for [0 .. nblock-1]
1065*59cc4ca5SDavid du Colombier
1066*59cc4ca5SDavid du Colombier Post:
1067*59cc4ca5SDavid du Colombier ((UChar*)arr2) [0 .. nblock-1] holds block
1068*59cc4ca5SDavid du Colombier All other areas of block destroyed
1069*59cc4ca5SDavid du Colombier ftab [ 0 .. 65536 ] destroyed
1070*59cc4ca5SDavid du Colombier arr1 [0 .. nblock-1] holds sorted order
1071*59cc4ca5SDavid du Colombier */
BZ2_blockSort(EState * s)1072*59cc4ca5SDavid du Colombier void BZ2_blockSort ( EState* s )
1073*59cc4ca5SDavid du Colombier {
1074*59cc4ca5SDavid du Colombier UInt32* ptr = s->ptr;
1075*59cc4ca5SDavid du Colombier UChar* block = s->block;
1076*59cc4ca5SDavid du Colombier UInt32* ftab = s->ftab;
1077*59cc4ca5SDavid du Colombier Int32 nblock = s->nblock;
1078*59cc4ca5SDavid du Colombier Int32 verb = s->verbosity;
1079*59cc4ca5SDavid du Colombier Int32 wfact = s->workFactor;
1080*59cc4ca5SDavid du Colombier UInt16* quadrant;
1081*59cc4ca5SDavid du Colombier Int32 budget;
1082*59cc4ca5SDavid du Colombier Int32 budgetInit;
1083*59cc4ca5SDavid du Colombier Int32 i;
1084*59cc4ca5SDavid du Colombier
1085*59cc4ca5SDavid du Colombier if (nblock < 10000) {
1086*59cc4ca5SDavid du Colombier fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1087*59cc4ca5SDavid du Colombier } else {
1088*59cc4ca5SDavid du Colombier /* Calculate the location for quadrant, remembering to get
1089*59cc4ca5SDavid du Colombier the alignment right. Assumes that &(block[0]) is at least
1090*59cc4ca5SDavid du Colombier 2-byte aligned -- this should be ok since block is really
1091*59cc4ca5SDavid du Colombier the first section of arr2.
1092*59cc4ca5SDavid du Colombier */
1093*59cc4ca5SDavid du Colombier i = nblock+BZ_N_OVERSHOOT;
1094*59cc4ca5SDavid du Colombier if (i & 1) i++;
1095*59cc4ca5SDavid du Colombier quadrant = (UInt16*)(&(block[i]));
1096*59cc4ca5SDavid du Colombier
1097*59cc4ca5SDavid du Colombier /* (wfact-1) / 3 puts the default-factor-30
1098*59cc4ca5SDavid du Colombier transition point at very roughly the same place as
1099*59cc4ca5SDavid du Colombier with v0.1 and v0.9.0.
1100*59cc4ca5SDavid du Colombier Not that it particularly matters any more, since the
1101*59cc4ca5SDavid du Colombier resulting compressed stream is now the same regardless
1102*59cc4ca5SDavid du Colombier of whether or not we use the main sort or fallback sort.
1103*59cc4ca5SDavid du Colombier */
1104*59cc4ca5SDavid du Colombier if (wfact < 1 ) wfact = 1;
1105*59cc4ca5SDavid du Colombier if (wfact > 100) wfact = 100;
1106*59cc4ca5SDavid du Colombier budgetInit = nblock * ((wfact-1) / 3);
1107*59cc4ca5SDavid du Colombier budget = budgetInit;
1108*59cc4ca5SDavid du Colombier
1109*59cc4ca5SDavid du Colombier mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1110*59cc4ca5SDavid du Colombier if (verb >= 3)
1111*59cc4ca5SDavid du Colombier VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1112*59cc4ca5SDavid du Colombier budgetInit - budget,
1113*59cc4ca5SDavid du Colombier nblock,
1114*59cc4ca5SDavid du Colombier (float)(budgetInit - budget) /
1115*59cc4ca5SDavid du Colombier (float)(nblock==0 ? 1 : nblock) );
1116*59cc4ca5SDavid du Colombier if (budget < 0) {
1117*59cc4ca5SDavid du Colombier if (verb >= 2)
1118*59cc4ca5SDavid du Colombier VPrintf0 ( " too repetitive; using fallback"
1119*59cc4ca5SDavid du Colombier " sorting algorithm\n" );
1120*59cc4ca5SDavid du Colombier fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1121*59cc4ca5SDavid du Colombier }
1122*59cc4ca5SDavid du Colombier }
1123*59cc4ca5SDavid du Colombier
1124*59cc4ca5SDavid du Colombier s->origPtr = -1;
1125*59cc4ca5SDavid du Colombier for (i = 0; i < s->nblock; i++)
1126*59cc4ca5SDavid du Colombier if (ptr[i] == 0)
1127*59cc4ca5SDavid du Colombier { s->origPtr = i; break; };
1128*59cc4ca5SDavid du Colombier
1129*59cc4ca5SDavid du Colombier AssertH( s->origPtr != -1, 1003 );
1130*59cc4ca5SDavid du Colombier }
1131*59cc4ca5SDavid du Colombier
1132*59cc4ca5SDavid du Colombier
1133*59cc4ca5SDavid du Colombier /*-------------------------------------------------------------*/
1134*59cc4ca5SDavid du Colombier /*--- end blocksort.c ---*/
1135*59cc4ca5SDavid du Colombier /*-------------------------------------------------------------*/
1136