xref: /dflybsd-src/contrib/xz/src/liblzma/lzma/fastpos.h (revision b5feb3da7c498482b19d14ac6f2b1901005f7d94)
12940b44dSPeter Avalos ///////////////////////////////////////////////////////////////////////////////
22940b44dSPeter Avalos //
32940b44dSPeter Avalos /// \file       fastpos.h
42940b44dSPeter Avalos /// \brief      Kind of two-bit version of bit scan reverse
52940b44dSPeter Avalos ///
62940b44dSPeter Avalos //  Authors:    Igor Pavlov
72940b44dSPeter Avalos //              Lasse Collin
82940b44dSPeter Avalos //
92940b44dSPeter Avalos //  This file has been put into the public domain.
102940b44dSPeter Avalos //  You can do whatever you want with this file.
112940b44dSPeter Avalos //
122940b44dSPeter Avalos ///////////////////////////////////////////////////////////////////////////////
132940b44dSPeter Avalos 
142940b44dSPeter Avalos #ifndef LZMA_FASTPOS_H
152940b44dSPeter Avalos #define LZMA_FASTPOS_H
162940b44dSPeter Avalos 
1715ab8c86SJohn Marino // LZMA encodes match distances by storing the highest two bits using
1815ab8c86SJohn Marino // a six-bit value [0, 63], and then the missing lower bits.
1915ab8c86SJohn Marino // Dictionary size is also stored using this encoding in the .xz
202940b44dSPeter Avalos // file format header.
212940b44dSPeter Avalos //
222940b44dSPeter Avalos // fastpos.h provides a way to quickly find out the correct six-bit
232940b44dSPeter Avalos // values. The following table gives some examples of this encoding:
242940b44dSPeter Avalos //
2515ab8c86SJohn Marino //     dist   return
262940b44dSPeter Avalos //       0       0
272940b44dSPeter Avalos //       1       1
282940b44dSPeter Avalos //       2       2
292940b44dSPeter Avalos //       3       3
302940b44dSPeter Avalos //       4       4
312940b44dSPeter Avalos //       5       4
322940b44dSPeter Avalos //       6       5
332940b44dSPeter Avalos //       7       5
342940b44dSPeter Avalos //       8       6
352940b44dSPeter Avalos //      11       6
362940b44dSPeter Avalos //      12       7
372940b44dSPeter Avalos //     ...      ...
382940b44dSPeter Avalos //      15       7
392940b44dSPeter Avalos //      16       8
402940b44dSPeter Avalos //      17       8
412940b44dSPeter Avalos //     ...      ...
422940b44dSPeter Avalos //      23       8
432940b44dSPeter Avalos //      24       9
442940b44dSPeter Avalos //      25       9
452940b44dSPeter Avalos //     ...      ...
462940b44dSPeter Avalos //
472940b44dSPeter Avalos //
482940b44dSPeter Avalos // Provided functions or macros
492940b44dSPeter Avalos // ----------------------------
502940b44dSPeter Avalos //
5115ab8c86SJohn Marino // get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)
5215ab8c86SJohn Marino // assumes that dist >= FULL_DISTANCES, thus the result is at least
5315ab8c86SJohn Marino // FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of
5415ab8c86SJohn Marino // get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)
552940b44dSPeter Avalos // should be tiny bit faster due to the assumption being made.
562940b44dSPeter Avalos //
572940b44dSPeter Avalos //
582940b44dSPeter Avalos // Size vs. speed
592940b44dSPeter Avalos // --------------
602940b44dSPeter Avalos //
612940b44dSPeter Avalos // With some CPUs that have fast BSR (bit scan reverse) instruction, the
622940b44dSPeter Avalos // size optimized version is slightly faster than the bigger table based
632940b44dSPeter Avalos // approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
642940b44dSPeter Avalos // and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
652940b44dSPeter Avalos // would still have speed roughly comparable to the table version. Older
662940b44dSPeter Avalos // x86 CPUs like the original Pentium have very slow BSR; on those systems
672940b44dSPeter Avalos // the table version is a lot faster.
682940b44dSPeter Avalos //
692940b44dSPeter Avalos // On some CPUs, the table version is a lot faster when using position
702940b44dSPeter Avalos // dependent code, but with position independent code the size optimized
712940b44dSPeter Avalos // version is slightly faster. This occurs at least on 32-bit SPARC (no
722940b44dSPeter Avalos // ASM optimizations).
732940b44dSPeter Avalos //
742940b44dSPeter Avalos // I'm making the table version the default, because that has good speed
752940b44dSPeter Avalos // on all systems I have tried. The size optimized version is sometimes
762940b44dSPeter Avalos // slightly faster, but sometimes it is a lot slower.
772940b44dSPeter Avalos 
782940b44dSPeter Avalos #ifdef HAVE_SMALL
7915ab8c86SJohn Marino #	define get_dist_slot(dist) \
8015ab8c86SJohn Marino 		((dist) <= 4 ? (dist) : get_dist_slot_2(dist))
812940b44dSPeter Avalos 
822940b44dSPeter Avalos static inline uint32_t
get_dist_slot_2(uint32_t dist)8315ab8c86SJohn Marino get_dist_slot_2(uint32_t dist)
842940b44dSPeter Avalos {
8515ab8c86SJohn Marino 	const uint32_t i = bsr32(dist);
8615ab8c86SJohn Marino 	return (i + i) + ((dist >> (i - 1)) & 1);
872940b44dSPeter Avalos }
882940b44dSPeter Avalos 
892940b44dSPeter Avalos 
902940b44dSPeter Avalos #else
912940b44dSPeter Avalos 
922940b44dSPeter Avalos #define FASTPOS_BITS 13
932940b44dSPeter Avalos 
942940b44dSPeter Avalos extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
952940b44dSPeter Avalos 
962940b44dSPeter Avalos 
972940b44dSPeter Avalos #define fastpos_shift(extra, n) \
982940b44dSPeter Avalos 	((extra) + (n) * (FASTPOS_BITS - 1))
992940b44dSPeter Avalos 
1002940b44dSPeter Avalos #define fastpos_limit(extra, n) \
1012940b44dSPeter Avalos 	(UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
1022940b44dSPeter Avalos 
10315ab8c86SJohn Marino #define fastpos_result(dist, extra, n) \
104*e151908bSDaniel Fojt 	(uint32_t)(lzma_fastpos[(dist) >> fastpos_shift(extra, n)]) \
1052940b44dSPeter Avalos 			+ 2 * fastpos_shift(extra, n)
1062940b44dSPeter Avalos 
1072940b44dSPeter Avalos 
1082940b44dSPeter Avalos static inline uint32_t
get_dist_slot(uint32_t dist)10915ab8c86SJohn Marino get_dist_slot(uint32_t dist)
1102940b44dSPeter Avalos {
1112940b44dSPeter Avalos 	// If it is small enough, we can pick the result directly from
1122940b44dSPeter Avalos 	// the precalculated table.
11315ab8c86SJohn Marino 	if (dist < fastpos_limit(0, 0))
11415ab8c86SJohn Marino 		return lzma_fastpos[dist];
1152940b44dSPeter Avalos 
11615ab8c86SJohn Marino 	if (dist < fastpos_limit(0, 1))
11715ab8c86SJohn Marino 		return fastpos_result(dist, 0, 1);
1182940b44dSPeter Avalos 
11915ab8c86SJohn Marino 	return fastpos_result(dist, 0, 2);
1202940b44dSPeter Avalos }
1212940b44dSPeter Avalos 
1222940b44dSPeter Avalos 
1232940b44dSPeter Avalos #ifdef FULL_DISTANCES_BITS
1242940b44dSPeter Avalos static inline uint32_t
get_dist_slot_2(uint32_t dist)12515ab8c86SJohn Marino get_dist_slot_2(uint32_t dist)
1262940b44dSPeter Avalos {
12715ab8c86SJohn Marino 	assert(dist >= FULL_DISTANCES);
1282940b44dSPeter Avalos 
12915ab8c86SJohn Marino 	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
13015ab8c86SJohn Marino 		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);
1312940b44dSPeter Avalos 
13215ab8c86SJohn Marino 	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
13315ab8c86SJohn Marino 		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);
1342940b44dSPeter Avalos 
13515ab8c86SJohn Marino 	return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
1362940b44dSPeter Avalos }
1372940b44dSPeter Avalos #endif
1382940b44dSPeter Avalos 
1392940b44dSPeter Avalos #endif
1402940b44dSPeter Avalos 
1412940b44dSPeter Avalos #endif
142