1 /* $NetBSD: blocksort.c,v 1.1.1.2 2012/05/07 00:41:45 wiz Exp $ */ 2 3 4 /*-------------------------------------------------------------*/ 5 /*--- Block sorting machinery ---*/ 6 /*--- blocksort.c ---*/ 7 /*-------------------------------------------------------------*/ 8 9 /* ------------------------------------------------------------------ 10 This file is part of bzip2/libbzip2, a program and library for 11 lossless, block-sorting data compression. 12 13 bzip2/libbzip2 version 1.0.6 of 6 September 2010 14 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 15 16 Please read the WARNING, DISCLAIMER and PATENTS sections in the 17 README file. 18 19 This program is released under the terms of the license contained 20 in the file LICENSE. 21 ------------------------------------------------------------------ */ 22 23 24 #include "bzlib_private.h" 25 26 /*---------------------------------------------*/ 27 /*--- Fallback O(N log(N)^2) sorting ---*/ 28 /*--- algorithm, for repetitive blocks ---*/ 29 /*---------------------------------------------*/ 30 31 /*---------------------------------------------*/ 32 static 33 __inline__ 34 void fallbackSimpleSort ( UInt32* fmap, 35 UInt32* eclass, 36 Int32 lo, 37 Int32 hi ) 38 { 39 Int32 i, j, tmp; 40 UInt32 ec_tmp; 41 42 if (lo == hi) return; 43 44 if (hi - lo > 3) { 45 for ( i = hi-4; i >= lo; i-- ) { 46 tmp = fmap[i]; 47 ec_tmp = eclass[tmp]; 48 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 49 fmap[j-4] = fmap[j]; 50 fmap[j-4] = tmp; 51 } 52 } 53 54 for ( i = hi-1; i >= lo; i-- ) { 55 tmp = fmap[i]; 56 ec_tmp = eclass[tmp]; 57 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 58 fmap[j-1] = fmap[j]; 59 fmap[j-1] = tmp; 60 } 61 } 62 63 64 /*---------------------------------------------*/ 65 #define fswap(zz1, zz2) \ 66 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 67 68 #define fvswap(zzp1, zzp2, zzn) \ 69 { \ 70 Int32 yyp1 = (zzp1); \ 71 Int32 yyp2 = (zzp2); \ 72 Int32 yyn = (zzn); \ 73 while (yyn > 0) { \ 74 fswap(fmap[yyp1], fmap[yyp2]); \ 75 yyp1++; yyp2++; yyn--; \ 76 } \ 77 } 78 79 80 #define fmin(a,b) ((a) < (b)) ? (a) : (b) 81 82 #define fpush(lz,hz) { stackLo[sp] = lz; \ 83 stackHi[sp] = hz; \ 84 sp++; } 85 86 #define fpop(lz,hz) { sp--; \ 87 lz = stackLo[sp]; \ 88 hz = stackHi[sp]; } 89 90 #define FALLBACK_QSORT_SMALL_THRESH 10 91 #define FALLBACK_QSORT_STACK_SIZE 100 92 93 94 static 95 void fallbackQSort3 ( UInt32* fmap, 96 UInt32* eclass, 97 Int32 loSt, 98 Int32 hiSt ) 99 { 100 Int32 unLo, unHi, ltLo, gtHi, n, m; 101 Int32 sp, lo, hi; 102 UInt32 med, r, r3; 103 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 104 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 105 106 r = 0; 107 108 sp = 0; 109 fpush ( loSt, hiSt ); 110 111 while (sp > 0) { 112 113 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); 114 115 fpop ( lo, hi ); 116 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 117 fallbackSimpleSort ( fmap, eclass, lo, hi ); 118 continue; 119 } 120 121 /* Random partitioning. Median of 3 sometimes fails to 122 avoid bad cases. Median of 9 seems to help but 123 looks rather expensive. This too seems to work but 124 is cheaper. Guidance for the magic constants 125 7621 and 32768 is taken from Sedgewick's algorithms 126 book, chapter 35. 127 */ 128 r = ((r * 7621) + 1) % 32768; 129 r3 = r % 3; 130 if (r3 == 0) med = eclass[fmap[lo]]; else 131 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 132 med = eclass[fmap[hi]]; 133 134 unLo = ltLo = lo; 135 unHi = gtHi = hi; 136 137 while (1) { 138 while (1) { 139 if (unLo > unHi) break; 140 n = (Int32)eclass[fmap[unLo]] - (Int32)med; 141 if (n == 0) { 142 fswap(fmap[unLo], fmap[ltLo]); 143 ltLo++; unLo++; 144 continue; 145 }; 146 if (n > 0) break; 147 unLo++; 148 } 149 while (1) { 150 if (unLo > unHi) break; 151 n = (Int32)eclass[fmap[unHi]] - (Int32)med; 152 if (n == 0) { 153 fswap(fmap[unHi], fmap[gtHi]); 154 gtHi--; unHi--; 155 continue; 156 }; 157 if (n < 0) break; 158 unHi--; 159 } 160 if (unLo > unHi) break; 161 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 162 } 163 164 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 165 166 if (gtHi < ltLo) continue; 167 168 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 169 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 170 171 n = lo + unLo - ltLo - 1; 172 m = hi - (gtHi - unHi) + 1; 173 174 if (n - lo > hi - m) { 175 fpush ( lo, n ); 176 fpush ( m, hi ); 177 } else { 178 fpush ( m, hi ); 179 fpush ( lo, n ); 180 } 181 } 182 } 183 184 #undef fmin 185 #undef fpush 186 #undef fpop 187 #undef fswap 188 #undef fvswap 189 #undef FALLBACK_QSORT_SMALL_THRESH 190 #undef FALLBACK_QSORT_STACK_SIZE 191 192 193 /*---------------------------------------------*/ 194 /* Pre: 195 nblock > 0 196 eclass exists for [0 .. nblock-1] 197 ((UChar*)eclass) [0 .. nblock-1] holds block 198 ptr exists for [0 .. nblock-1] 199 200 Post: 201 ((UChar*)eclass) [0 .. nblock-1] holds block 202 All other areas of eclass destroyed 203 fmap [0 .. nblock-1] holds sorted order 204 bhtab [ 0 .. 2+(nblock/32) ] destroyed 205 */ 206 207 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 208 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 209 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 210 #define WORD_BH(zz) bhtab[(zz) >> 5] 211 #define UNALIGNED_BH(zz) ((zz) & 0x01f) 212 213 static 214 void fallbackSort ( UInt32* fmap, 215 UInt32* eclass, 216 UInt32* bhtab, 217 Int32 nblock, 218 Int32 verb ) 219 { 220 Int32 ftab[257]; 221 Int32 ftabCopy[256]; 222 Int32 H, i, j, k, l, r, cc, cc1; 223 Int32 nNotDone; 224 Int32 nBhtab; 225 UChar* eclass8 = (UChar*)eclass; 226 227 /*-- 228 Initial 1-char radix sort to generate 229 initial fmap and initial BH bits. 230 --*/ 231 if (verb >= 4) 232 VPrintf0 ( " bucket sorting ...\n" ); 233 for (i = 0; i < 257; i++) ftab[i] = 0; 234 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 235 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 236 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 237 238 for (i = 0; i < nblock; i++) { 239 j = eclass8[i]; 240 k = ftab[j] - 1; 241 ftab[j] = k; 242 fmap[k] = i; 243 } 244 245 nBhtab = 2 + (nblock / 32); 246 for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 247 for (i = 0; i < 256; i++) SET_BH(ftab[i]); 248 249 /*-- 250 Inductively refine the buckets. Kind-of an 251 "exponential radix sort" (!), inspired by the 252 Manber-Myers suffix array construction algorithm. 253 --*/ 254 255 /*-- set sentinel bits for block-end detection --*/ 256 for (i = 0; i < 32; i++) { 257 SET_BH(nblock + 2*i); 258 CLEAR_BH(nblock + 2*i + 1); 259 } 260 261 /*-- the log(N) loop --*/ 262 H = 1; 263 while (1) { 264 265 if (verb >= 4) 266 VPrintf1 ( " depth %6d has ", H ); 267 268 j = 0; 269 for (i = 0; i < nblock; i++) { 270 if (ISSET_BH(i)) j = i; 271 k = fmap[i] - H; if (k < 0) k += nblock; 272 eclass[k] = j; 273 } 274 275 nNotDone = 0; 276 r = -1; 277 while (1) { 278 279 /*-- find the next non-singleton bucket --*/ 280 k = r + 1; 281 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 282 if (ISSET_BH(k)) { 283 while (WORD_BH(k) == 0xffffffff) k += 32; 284 while (ISSET_BH(k)) k++; 285 } 286 l = k - 1; 287 if (l >= nblock) break; 288 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 289 if (!ISSET_BH(k)) { 290 while (WORD_BH(k) == 0x00000000) k += 32; 291 while (!ISSET_BH(k)) k++; 292 } 293 r = k - 1; 294 if (r >= nblock) break; 295 296 /*-- now [l, r] bracket current bucket --*/ 297 if (r > l) { 298 nNotDone += (r - l + 1); 299 fallbackQSort3 ( fmap, eclass, l, r ); 300 301 /*-- scan bucket and generate header bits-- */ 302 cc = -1; 303 for (i = l; i <= r; i++) { 304 cc1 = eclass[fmap[i]]; 305 if (cc != cc1) { SET_BH(i); cc = cc1; }; 306 } 307 } 308 } 309 310 if (verb >= 4) 311 VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 312 313 H *= 2; 314 if (H > nblock || nNotDone == 0) break; 315 } 316 317 /*-- 318 Reconstruct the original block in 319 eclass8 [0 .. nblock-1], since the 320 previous phase destroyed it. 321 --*/ 322 if (verb >= 4) 323 VPrintf0 ( " reconstructing block ...\n" ); 324 j = 0; 325 for (i = 0; i < nblock; i++) { 326 while (ftabCopy[j] == 0) j++; 327 ftabCopy[j]--; 328 eclass8[fmap[i]] = (UChar)j; 329 } 330 AssertH ( j < 256, 1005 ); 331 } 332 333 #undef SET_BH 334 #undef CLEAR_BH 335 #undef ISSET_BH 336 #undef WORD_BH 337 #undef UNALIGNED_BH 338 339 340 /*---------------------------------------------*/ 341 /*--- The main, O(N^2 log(N)) sorting ---*/ 342 /*--- algorithm. Faster for "normal" ---*/ 343 /*--- non-repetitive blocks. ---*/ 344 /*---------------------------------------------*/ 345 346 /*---------------------------------------------*/ 347 static 348 __inline__ 349 Bool mainGtU ( UInt32 i1, 350 UInt32 i2, 351 UChar* block, 352 UInt16* quadrant, 353 UInt32 nblock, 354 Int32* budget ) 355 { 356 Int32 k; 357 UChar c1, c2; 358 UInt16 s1, s2; 359 360 AssertD ( i1 != i2, "mainGtU" ); 361 /* 1 */ 362 c1 = block[i1]; c2 = block[i2]; 363 if (c1 != c2) return (c1 > c2); 364 i1++; i2++; 365 /* 2 */ 366 c1 = block[i1]; c2 = block[i2]; 367 if (c1 != c2) return (c1 > c2); 368 i1++; i2++; 369 /* 3 */ 370 c1 = block[i1]; c2 = block[i2]; 371 if (c1 != c2) return (c1 > c2); 372 i1++; i2++; 373 /* 4 */ 374 c1 = block[i1]; c2 = block[i2]; 375 if (c1 != c2) return (c1 > c2); 376 i1++; i2++; 377 /* 5 */ 378 c1 = block[i1]; c2 = block[i2]; 379 if (c1 != c2) return (c1 > c2); 380 i1++; i2++; 381 /* 6 */ 382 c1 = block[i1]; c2 = block[i2]; 383 if (c1 != c2) return (c1 > c2); 384 i1++; i2++; 385 /* 7 */ 386 c1 = block[i1]; c2 = block[i2]; 387 if (c1 != c2) return (c1 > c2); 388 i1++; i2++; 389 /* 8 */ 390 c1 = block[i1]; c2 = block[i2]; 391 if (c1 != c2) return (c1 > c2); 392 i1++; i2++; 393 /* 9 */ 394 c1 = block[i1]; c2 = block[i2]; 395 if (c1 != c2) return (c1 > c2); 396 i1++; i2++; 397 /* 10 */ 398 c1 = block[i1]; c2 = block[i2]; 399 if (c1 != c2) return (c1 > c2); 400 i1++; i2++; 401 /* 11 */ 402 c1 = block[i1]; c2 = block[i2]; 403 if (c1 != c2) return (c1 > c2); 404 i1++; i2++; 405 /* 12 */ 406 c1 = block[i1]; c2 = block[i2]; 407 if (c1 != c2) return (c1 > c2); 408 i1++; i2++; 409 410 k = nblock + 8; 411 412 do { 413 /* 1 */ 414 c1 = block[i1]; c2 = block[i2]; 415 if (c1 != c2) return (c1 > c2); 416 s1 = quadrant[i1]; s2 = quadrant[i2]; 417 if (s1 != s2) return (s1 > s2); 418 i1++; i2++; 419 /* 2 */ 420 c1 = block[i1]; c2 = block[i2]; 421 if (c1 != c2) return (c1 > c2); 422 s1 = quadrant[i1]; s2 = quadrant[i2]; 423 if (s1 != s2) return (s1 > s2); 424 i1++; i2++; 425 /* 3 */ 426 c1 = block[i1]; c2 = block[i2]; 427 if (c1 != c2) return (c1 > c2); 428 s1 = quadrant[i1]; s2 = quadrant[i2]; 429 if (s1 != s2) return (s1 > s2); 430 i1++; i2++; 431 /* 4 */ 432 c1 = block[i1]; c2 = block[i2]; 433 if (c1 != c2) return (c1 > c2); 434 s1 = quadrant[i1]; s2 = quadrant[i2]; 435 if (s1 != s2) return (s1 > s2); 436 i1++; i2++; 437 /* 5 */ 438 c1 = block[i1]; c2 = block[i2]; 439 if (c1 != c2) return (c1 > c2); 440 s1 = quadrant[i1]; s2 = quadrant[i2]; 441 if (s1 != s2) return (s1 > s2); 442 i1++; i2++; 443 /* 6 */ 444 c1 = block[i1]; c2 = block[i2]; 445 if (c1 != c2) return (c1 > c2); 446 s1 = quadrant[i1]; s2 = quadrant[i2]; 447 if (s1 != s2) return (s1 > s2); 448 i1++; i2++; 449 /* 7 */ 450 c1 = block[i1]; c2 = block[i2]; 451 if (c1 != c2) return (c1 > c2); 452 s1 = quadrant[i1]; s2 = quadrant[i2]; 453 if (s1 != s2) return (s1 > s2); 454 i1++; i2++; 455 /* 8 */ 456 c1 = block[i1]; c2 = block[i2]; 457 if (c1 != c2) return (c1 > c2); 458 s1 = quadrant[i1]; s2 = quadrant[i2]; 459 if (s1 != s2) return (s1 > s2); 460 i1++; i2++; 461 462 if (i1 >= nblock) i1 -= nblock; 463 if (i2 >= nblock) i2 -= nblock; 464 465 k -= 8; 466 (*budget)--; 467 } 468 while (k >= 0); 469 470 return False; 471 } 472 473 474 /*---------------------------------------------*/ 475 /*-- 476 Knuth's increments seem to work better 477 than Incerpi-Sedgewick here. Possibly 478 because the number of elems to sort is 479 usually small, typically <= 20. 480 --*/ 481 static 482 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 483 9841, 29524, 88573, 265720, 484 797161, 2391484 }; 485 486 static 487 void mainSimpleSort ( UInt32* ptr, 488 UChar* block, 489 UInt16* quadrant, 490 Int32 nblock, 491 Int32 lo, 492 Int32 hi, 493 Int32 d, 494 Int32* budget ) 495 { 496 Int32 i, j, h, bigN, hp; 497 UInt32 v; 498 499 bigN = hi - lo + 1; 500 if (bigN < 2) return; 501 502 hp = 0; 503 while (incs[hp] < bigN) hp++; 504 hp--; 505 506 for (; hp >= 0; hp--) { 507 h = incs[hp]; 508 509 i = lo + h; 510 while (True) { 511 512 /*-- copy 1 --*/ 513 if (i > hi) break; 514 v = ptr[i]; 515 j = i; 516 while ( mainGtU ( 517 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 518 ) ) { 519 ptr[j] = ptr[j-h]; 520 j = j - h; 521 if (j <= (lo + h - 1)) break; 522 } 523 ptr[j] = v; 524 i++; 525 526 /*-- copy 2 --*/ 527 if (i > hi) break; 528 v = ptr[i]; 529 j = i; 530 while ( mainGtU ( 531 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 532 ) ) { 533 ptr[j] = ptr[j-h]; 534 j = j - h; 535 if (j <= (lo + h - 1)) break; 536 } 537 ptr[j] = v; 538 i++; 539 540 /*-- copy 3 --*/ 541 if (i > hi) break; 542 v = ptr[i]; 543 j = i; 544 while ( mainGtU ( 545 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 546 ) ) { 547 ptr[j] = ptr[j-h]; 548 j = j - h; 549 if (j <= (lo + h - 1)) break; 550 } 551 ptr[j] = v; 552 i++; 553 554 if (*budget < 0) return; 555 } 556 } 557 } 558 559 560 /*---------------------------------------------*/ 561 /*-- 562 The following is an implementation of 563 an elegant 3-way quicksort for strings, 564 described in a paper "Fast Algorithms for 565 Sorting and Searching Strings", by Robert 566 Sedgewick and Jon L. Bentley. 567 --*/ 568 569 #define mswap(zz1, zz2) \ 570 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 571 572 #define mvswap(zzp1, zzp2, zzn) \ 573 { \ 574 Int32 yyp1 = (zzp1); \ 575 Int32 yyp2 = (zzp2); \ 576 Int32 yyn = (zzn); \ 577 while (yyn > 0) { \ 578 mswap(ptr[yyp1], ptr[yyp2]); \ 579 yyp1++; yyp2++; yyn--; \ 580 } \ 581 } 582 583 static 584 __inline__ 585 UChar mmed3 ( UChar a, UChar b, UChar c ) 586 { 587 UChar t; 588 if (a > b) { t = a; a = b; b = t; }; 589 if (b > c) { 590 b = c; 591 if (a > b) b = a; 592 } 593 return b; 594 } 595 596 #define mmin(a,b) ((a) < (b)) ? (a) : (b) 597 598 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 599 stackHi[sp] = hz; \ 600 stackD [sp] = dz; \ 601 sp++; } 602 603 #define mpop(lz,hz,dz) { sp--; \ 604 lz = stackLo[sp]; \ 605 hz = stackHi[sp]; \ 606 dz = stackD [sp]; } 607 608 609 #define mnextsize(az) (nextHi[az]-nextLo[az]) 610 611 #define mnextswap(az,bz) \ 612 { Int32 tz; \ 613 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 614 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 615 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 616 617 618 #define MAIN_QSORT_SMALL_THRESH 20 619 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 620 #define MAIN_QSORT_STACK_SIZE 100 621 622 static 623 void mainQSort3 ( UInt32* ptr, 624 UChar* block, 625 UInt16* quadrant, 626 Int32 nblock, 627 Int32 loSt, 628 Int32 hiSt, 629 Int32 dSt, 630 Int32* budget ) 631 { 632 Int32 unLo, unHi, ltLo, gtHi, n, m, med; 633 Int32 sp, lo, hi, d; 634 635 Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 636 Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 637 Int32 stackD [MAIN_QSORT_STACK_SIZE]; 638 639 Int32 nextLo[3]; 640 Int32 nextHi[3]; 641 Int32 nextD [3]; 642 643 sp = 0; 644 mpush ( loSt, hiSt, dSt ); 645 646 while (sp > 0) { 647 648 AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); 649 650 mpop ( lo, hi, d ); 651 if (hi - lo < MAIN_QSORT_SMALL_THRESH || 652 d > MAIN_QSORT_DEPTH_THRESH) { 653 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 654 if (*budget < 0) return; 655 continue; 656 } 657 658 med = (Int32) 659 mmed3 ( block[ptr[ lo ]+d], 660 block[ptr[ hi ]+d], 661 block[ptr[ (lo+hi)>>1 ]+d] ); 662 663 unLo = ltLo = lo; 664 unHi = gtHi = hi; 665 666 while (True) { 667 while (True) { 668 if (unLo > unHi) break; 669 n = ((Int32)block[ptr[unLo]+d]) - med; 670 if (n == 0) { 671 mswap(ptr[unLo], ptr[ltLo]); 672 ltLo++; unLo++; continue; 673 }; 674 if (n > 0) break; 675 unLo++; 676 } 677 while (True) { 678 if (unLo > unHi) break; 679 n = ((Int32)block[ptr[unHi]+d]) - med; 680 if (n == 0) { 681 mswap(ptr[unHi], ptr[gtHi]); 682 gtHi--; unHi--; continue; 683 }; 684 if (n < 0) break; 685 unHi--; 686 } 687 if (unLo > unHi) break; 688 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 689 } 690 691 AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 692 693 if (gtHi < ltLo) { 694 mpush(lo, hi, d+1 ); 695 continue; 696 } 697 698 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 699 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 700 701 n = lo + unLo - ltLo - 1; 702 m = hi - (gtHi - unHi) + 1; 703 704 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 705 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 706 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 707 708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 709 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 710 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 711 712 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 713 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 714 715 mpush (nextLo[0], nextHi[0], nextD[0]); 716 mpush (nextLo[1], nextHi[1], nextD[1]); 717 mpush (nextLo[2], nextHi[2], nextD[2]); 718 } 719 } 720 721 #undef mswap 722 #undef mvswap 723 #undef mpush 724 #undef mpop 725 #undef mmin 726 #undef mnextsize 727 #undef mnextswap 728 #undef MAIN_QSORT_SMALL_THRESH 729 #undef MAIN_QSORT_DEPTH_THRESH 730 #undef MAIN_QSORT_STACK_SIZE 731 732 733 /*---------------------------------------------*/ 734 /* Pre: 735 nblock > N_OVERSHOOT 736 block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 737 ((UChar*)block32) [0 .. nblock-1] holds block 738 ptr exists for [0 .. nblock-1] 739 740 Post: 741 ((UChar*)block32) [0 .. nblock-1] holds block 742 All other areas of block32 destroyed 743 ftab [0 .. 65536 ] destroyed 744 ptr [0 .. nblock-1] holds sorted order 745 if (*budget < 0), sorting was abandoned 746 */ 747 748 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 749 #define SETMASK (1 << 21) 750 #define CLEARMASK (~(SETMASK)) 751 752 static 753 void mainSort ( UInt32* ptr, 754 UChar* block, 755 UInt16* quadrant, 756 UInt32* ftab, 757 Int32 nblock, 758 Int32 verb, 759 Int32* budget ) 760 { 761 Int32 i, j, k, ss, sb; 762 Int32 runningOrder[256]; 763 Bool bigDone[256]; 764 Int32 copyStart[256]; 765 Int32 copyEnd [256]; 766 UChar c1; 767 Int32 numQSorted; 768 UInt16 s; 769 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 770 771 /*-- set up the 2-byte frequency table --*/ 772 for (i = 65536; i >= 0; i--) ftab[i] = 0; 773 774 j = block[0] << 8; 775 i = nblock-1; 776 for (; i >= 3; i -= 4) { 777 quadrant[i] = 0; 778 j = (j >> 8) | ( ((UInt16)block[i]) << 8); 779 ftab[j]++; 780 quadrant[i-1] = 0; 781 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 782 ftab[j]++; 783 quadrant[i-2] = 0; 784 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 785 ftab[j]++; 786 quadrant[i-3] = 0; 787 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 788 ftab[j]++; 789 } 790 for (; i >= 0; i--) { 791 quadrant[i] = 0; 792 j = (j >> 8) | ( ((UInt16)block[i]) << 8); 793 ftab[j]++; 794 } 795 796 /*-- (emphasises close relationship of block & quadrant) --*/ 797 for (i = 0; i < BZ_N_OVERSHOOT; i++) { 798 block [nblock+i] = block[i]; 799 quadrant[nblock+i] = 0; 800 } 801 802 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 803 804 /*-- Complete the initial radix sort --*/ 805 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 806 807 s = block[0] << 8; 808 i = nblock-1; 809 for (; i >= 3; i -= 4) { 810 s = (s >> 8) | (block[i] << 8); 811 j = ftab[s] -1; 812 ftab[s] = j; 813 ptr[j] = i; 814 s = (s >> 8) | (block[i-1] << 8); 815 j = ftab[s] -1; 816 ftab[s] = j; 817 ptr[j] = i-1; 818 s = (s >> 8) | (block[i-2] << 8); 819 j = ftab[s] -1; 820 ftab[s] = j; 821 ptr[j] = i-2; 822 s = (s >> 8) | (block[i-3] << 8); 823 j = ftab[s] -1; 824 ftab[s] = j; 825 ptr[j] = i-3; 826 } 827 for (; i >= 0; i--) { 828 s = (s >> 8) | (block[i] << 8); 829 j = ftab[s] -1; 830 ftab[s] = j; 831 ptr[j] = i; 832 } 833 834 /*-- 835 Now ftab contains the first loc of every small bucket. 836 Calculate the running order, from smallest to largest 837 big bucket. 838 --*/ 839 for (i = 0; i <= 255; i++) { 840 bigDone [i] = False; 841 runningOrder[i] = i; 842 } 843 844 { 845 Int32 vv; 846 Int32 h = 1; 847 do h = 3 * h + 1; while (h <= 256); 848 do { 849 h = h / 3; 850 for (i = h; i <= 255; i++) { 851 vv = runningOrder[i]; 852 j = i; 853 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 854 runningOrder[j] = runningOrder[j-h]; 855 j = j - h; 856 if (j <= (h - 1)) goto zero; 857 } 858 zero: 859 runningOrder[j] = vv; 860 } 861 } while (h != 1); 862 } 863 864 /*-- 865 The main sorting loop. 866 --*/ 867 868 numQSorted = 0; 869 870 for (i = 0; i <= 255; i++) { 871 872 /*-- 873 Process big buckets, starting with the least full. 874 Basically this is a 3-step process in which we call 875 mainQSort3 to sort the small buckets [ss, j], but 876 also make a big effort to avoid the calls if we can. 877 --*/ 878 ss = runningOrder[i]; 879 880 /*-- 881 Step 1: 882 Complete the big bucket [ss] by quicksorting 883 any unsorted small buckets [ss, j], for j != ss. 884 Hopefully previous pointer-scanning phases have already 885 completed many of the small buckets [ss, j], so 886 we don't have to sort them at all. 887 --*/ 888 for (j = 0; j <= 255; j++) { 889 if (j != ss) { 890 sb = (ss << 8) + j; 891 if ( ! (ftab[sb] & SETMASK) ) { 892 Int32 lo = ftab[sb] & CLEARMASK; 893 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 894 if (hi > lo) { 895 if (verb >= 4) 896 VPrintf4 ( " qsort [0x%x, 0x%x] " 897 "done %d this %d\n", 898 ss, j, numQSorted, hi - lo + 1 ); 899 mainQSort3 ( 900 ptr, block, quadrant, nblock, 901 lo, hi, BZ_N_RADIX, budget 902 ); 903 numQSorted += (hi - lo + 1); 904 if (*budget < 0) return; 905 } 906 } 907 ftab[sb] |= SETMASK; 908 } 909 } 910 911 AssertH ( !bigDone[ss], 1006 ); 912 913 /*-- 914 Step 2: 915 Now scan this big bucket [ss] so as to synthesise the 916 sorted order for small buckets [t, ss] for all t, 917 including, magically, the bucket [ss,ss] too. 918 This will avoid doing Real Work in subsequent Step 1's. 919 --*/ 920 { 921 for (j = 0; j <= 255; j++) { 922 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 923 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 924 } 925 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 926 k = ptr[j]-1; if (k < 0) k += nblock; 927 c1 = block[k]; 928 if (!bigDone[c1]) 929 ptr[ copyStart[c1]++ ] = k; 930 } 931 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 932 k = ptr[j]-1; if (k < 0) k += nblock; 933 c1 = block[k]; 934 if (!bigDone[c1]) 935 ptr[ copyEnd[c1]-- ] = k; 936 } 937 } 938 939 AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 940 || 941 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 942 Necessity for this case is demonstrated by compressing 943 a sequence of approximately 48.5 million of character 944 251; 1.0.0/1.0.1 will then die here. */ 945 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 946 1007 ) 947 948 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 949 950 /*-- 951 Step 3: 952 The [ss] big bucket is now done. Record this fact, 953 and update the quadrant descriptors. Remember to 954 update quadrants in the overshoot area too, if 955 necessary. The "if (i < 255)" test merely skips 956 this updating for the last bucket processed, since 957 updating for the last bucket is pointless. 958 959 The quadrant array provides a way to incrementally 960 cache sort orderings, as they appear, so as to 961 make subsequent comparisons in fullGtU() complete 962 faster. For repetitive blocks this makes a big 963 difference (but not big enough to be able to avoid 964 the fallback sorting mechanism, exponential radix sort). 965 966 The precise meaning is: at all times: 967 968 for 0 <= i < nblock and 0 <= j <= nblock 969 970 if block[i] != block[j], 971 972 then the relative values of quadrant[i] and 973 quadrant[j] are meaningless. 974 975 else { 976 if quadrant[i] < quadrant[j] 977 then the string starting at i lexicographically 978 precedes the string starting at j 979 980 else if quadrant[i] > quadrant[j] 981 then the string starting at j lexicographically 982 precedes the string starting at i 983 984 else 985 the relative ordering of the strings starting 986 at i and j has not yet been determined. 987 } 988 --*/ 989 bigDone[ss] = True; 990 991 if (i < 255) { 992 Int32 bbStart = ftab[ss << 8] & CLEARMASK; 993 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 994 Int32 shifts = 0; 995 996 while ((bbSize >> shifts) > 65534) shifts++; 997 998 for (j = bbSize-1; j >= 0; j--) { 999 Int32 a2update = ptr[bbStart + j]; 1000 UInt16 qVal = (UInt16)(j >> shifts); 1001 quadrant[a2update] = qVal; 1002 if (a2update < BZ_N_OVERSHOOT) 1003 quadrant[a2update + nblock] = qVal; 1004 } 1005 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 1006 } 1007 1008 } 1009 1010 if (verb >= 4) 1011 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 1012 nblock, numQSorted, nblock - numQSorted ); 1013 } 1014 1015 #undef BIGFREQ 1016 #undef SETMASK 1017 #undef CLEARMASK 1018 1019 1020 /*---------------------------------------------*/ 1021 /* Pre: 1022 nblock > 0 1023 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 1024 ((UChar*)arr2) [0 .. nblock-1] holds block 1025 arr1 exists for [0 .. nblock-1] 1026 1027 Post: 1028 ((UChar*)arr2) [0 .. nblock-1] holds block 1029 All other areas of block destroyed 1030 ftab [ 0 .. 65536 ] destroyed 1031 arr1 [0 .. nblock-1] holds sorted order 1032 */ 1033 void BZ2_blockSort ( EState* s ) 1034 { 1035 UInt32* ptr = s->ptr; 1036 UChar* block = s->block; 1037 UInt32* ftab = s->ftab; 1038 Int32 nblock = s->nblock; 1039 Int32 verb = s->verbosity; 1040 Int32 wfact = s->workFactor; 1041 UInt16* quadrant; 1042 Int32 budget; 1043 Int32 budgetInit; 1044 Int32 i; 1045 1046 if (nblock < 10000) { 1047 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1048 } else { 1049 /* Calculate the location for quadrant, remembering to get 1050 the alignment right. Assumes that &(block[0]) is at least 1051 2-byte aligned -- this should be ok since block is really 1052 the first section of arr2. 1053 */ 1054 i = nblock+BZ_N_OVERSHOOT; 1055 if (i & 1) i++; 1056 quadrant = (UInt16*)(&(block[i])); 1057 1058 /* (wfact-1) / 3 puts the default-factor-30 1059 transition point at very roughly the same place as 1060 with v0.1 and v0.9.0. 1061 Not that it particularly matters any more, since the 1062 resulting compressed stream is now the same regardless 1063 of whether or not we use the main sort or fallback sort. 1064 */ 1065 if (wfact < 1 ) wfact = 1; 1066 if (wfact > 100) wfact = 100; 1067 budgetInit = nblock * ((wfact-1) / 3); 1068 budget = budgetInit; 1069 1070 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 1071 if (verb >= 3) 1072 VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 1073 budgetInit - budget, 1074 nblock, 1075 (float)(budgetInit - budget) / 1076 (float)(nblock==0 ? 1 : nblock) ); 1077 if (budget < 0) { 1078 if (verb >= 2) 1079 VPrintf0 ( " too repetitive; using fallback" 1080 " sorting algorithm\n" ); 1081 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1082 } 1083 } 1084 1085 s->origPtr = -1; 1086 for (i = 0; i < s->nblock; i++) 1087 if (ptr[i] == 0) 1088 { s->origPtr = i; break; }; 1089 1090 AssertH( s->origPtr != -1, 1003 ); 1091 } 1092 1093 1094 /*-------------------------------------------------------------*/ 1095 /*--- end blocksort.c ---*/ 1096 /*-------------------------------------------------------------*/ 1097