xref: /dflybsd-src/contrib/xz/src/liblzma/lz/lz_encoder_mf.c (revision b5feb3da7c498482b19d14ac6f2b1901005f7d94)
12940b44dSPeter Avalos ///////////////////////////////////////////////////////////////////////////////
22940b44dSPeter Avalos //
32940b44dSPeter Avalos /// \file       lz_encoder_mf.c
42940b44dSPeter Avalos /// \brief      Match finders
52940b44dSPeter Avalos ///
62940b44dSPeter Avalos //  Authors:    Igor Pavlov
72940b44dSPeter Avalos //              Lasse Collin
82940b44dSPeter Avalos //
92940b44dSPeter Avalos //  This file has been put into the public domain.
102940b44dSPeter Avalos //  You can do whatever you want with this file.
112940b44dSPeter Avalos //
122940b44dSPeter Avalos ///////////////////////////////////////////////////////////////////////////////
132940b44dSPeter Avalos 
142940b44dSPeter Avalos #include "lz_encoder.h"
152940b44dSPeter Avalos #include "lz_encoder_hash.h"
1615ab8c86SJohn Marino #include "memcmplen.h"
172940b44dSPeter Avalos 
182940b44dSPeter Avalos 
192940b44dSPeter Avalos /// \brief      Find matches starting from the current byte
202940b44dSPeter Avalos ///
212940b44dSPeter Avalos /// \return     The length of the longest match found
222940b44dSPeter Avalos extern uint32_t
lzma_mf_find(lzma_mf * mf,uint32_t * count_ptr,lzma_match * matches)232940b44dSPeter Avalos lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches)
242940b44dSPeter Avalos {
252940b44dSPeter Avalos 	// Call the match finder. It returns the number of length-distance
262940b44dSPeter Avalos 	// pairs found.
272940b44dSPeter Avalos 	// FIXME: Minimum count is zero, what _exactly_ is the maximum?
282940b44dSPeter Avalos 	const uint32_t count = mf->find(mf, matches);
292940b44dSPeter Avalos 
302940b44dSPeter Avalos 	// Length of the longest match; assume that no matches were found
312940b44dSPeter Avalos 	// and thus the maximum length is zero.
322940b44dSPeter Avalos 	uint32_t len_best = 0;
332940b44dSPeter Avalos 
342940b44dSPeter Avalos 	if (count > 0) {
352940b44dSPeter Avalos #ifndef NDEBUG
362940b44dSPeter Avalos 		// Validate the matches.
372940b44dSPeter Avalos 		for (uint32_t i = 0; i < count; ++i) {
382940b44dSPeter Avalos 			assert(matches[i].len <= mf->nice_len);
392940b44dSPeter Avalos 			assert(matches[i].dist < mf->read_pos);
402940b44dSPeter Avalos 			assert(memcmp(mf_ptr(mf) - 1,
412940b44dSPeter Avalos 				mf_ptr(mf) - matches[i].dist - 2,
422940b44dSPeter Avalos 				matches[i].len) == 0);
432940b44dSPeter Avalos 		}
442940b44dSPeter Avalos #endif
452940b44dSPeter Avalos 
462940b44dSPeter Avalos 		// The last used element in the array contains
472940b44dSPeter Avalos 		// the longest match.
482940b44dSPeter Avalos 		len_best = matches[count - 1].len;
492940b44dSPeter Avalos 
502940b44dSPeter Avalos 		// If a match of maximum search length was found, try to
512940b44dSPeter Avalos 		// extend the match to maximum possible length.
522940b44dSPeter Avalos 		if (len_best == mf->nice_len) {
532940b44dSPeter Avalos 			// The limit for the match length is either the
542940b44dSPeter Avalos 			// maximum match length supported by the LZ-based
552940b44dSPeter Avalos 			// encoder or the number of bytes left in the
562940b44dSPeter Avalos 			// dictionary, whichever is smaller.
572940b44dSPeter Avalos 			uint32_t limit = mf_avail(mf) + 1;
582940b44dSPeter Avalos 			if (limit > mf->match_len_max)
592940b44dSPeter Avalos 				limit = mf->match_len_max;
602940b44dSPeter Avalos 
612940b44dSPeter Avalos 			// Pointer to the byte we just ran through
622940b44dSPeter Avalos 			// the match finder.
632940b44dSPeter Avalos 			const uint8_t *p1 = mf_ptr(mf) - 1;
642940b44dSPeter Avalos 
652940b44dSPeter Avalos 			// Pointer to the beginning of the match. We need -1
662940b44dSPeter Avalos 			// here because the match distances are zero based.
672940b44dSPeter Avalos 			const uint8_t *p2 = p1 - matches[count - 1].dist - 1;
682940b44dSPeter Avalos 
6915ab8c86SJohn Marino 			len_best = lzma_memcmplen(p1, p2, len_best, limit);
702940b44dSPeter Avalos 		}
712940b44dSPeter Avalos 	}
722940b44dSPeter Avalos 
732940b44dSPeter Avalos 	*count_ptr = count;
742940b44dSPeter Avalos 
752940b44dSPeter Avalos 	// Finally update the read position to indicate that match finder was
762940b44dSPeter Avalos 	// run for this dictionary offset.
772940b44dSPeter Avalos 	++mf->read_ahead;
782940b44dSPeter Avalos 
792940b44dSPeter Avalos 	return len_best;
802940b44dSPeter Avalos }
812940b44dSPeter Avalos 
822940b44dSPeter Avalos 
832940b44dSPeter Avalos /// Hash value to indicate unused element in the hash. Since we start the
842940b44dSPeter Avalos /// positions from dict_size + 1, zero is always too far to qualify
852940b44dSPeter Avalos /// as usable match position.
862940b44dSPeter Avalos #define EMPTY_HASH_VALUE 0
872940b44dSPeter Avalos 
882940b44dSPeter Avalos 
892940b44dSPeter Avalos /// Normalization must be done when lzma_mf.offset + lzma_mf.read_pos
902940b44dSPeter Avalos /// reaches MUST_NORMALIZE_POS.
912940b44dSPeter Avalos #define MUST_NORMALIZE_POS UINT32_MAX
922940b44dSPeter Avalos 
932940b44dSPeter Avalos 
942940b44dSPeter Avalos /// \brief      Normalizes hash values
952940b44dSPeter Avalos ///
962940b44dSPeter Avalos /// The hash arrays store positions of match candidates. The positions are
972940b44dSPeter Avalos /// relative to an arbitrary offset that is not the same as the absolute
982940b44dSPeter Avalos /// offset in the input stream. The relative position of the current byte
992940b44dSPeter Avalos /// is lzma_mf.offset + lzma_mf.read_pos. The distances of the matches are
1002940b44dSPeter Avalos /// the differences of the current read position and the position found from
1012940b44dSPeter Avalos /// the hash.
1022940b44dSPeter Avalos ///
1032940b44dSPeter Avalos /// To prevent integer overflows of the offsets stored in the hash arrays,
1042940b44dSPeter Avalos /// we need to "normalize" the stored values now and then. During the
1052940b44dSPeter Avalos /// normalization, we drop values that indicate distance greater than the
1062940b44dSPeter Avalos /// dictionary size, thus making space for new values.
1072940b44dSPeter Avalos static void
normalize(lzma_mf * mf)1082940b44dSPeter Avalos normalize(lzma_mf *mf)
1092940b44dSPeter Avalos {
1102940b44dSPeter Avalos 	assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS);
1112940b44dSPeter Avalos 
1122940b44dSPeter Avalos 	// In future we may not want to touch the lowest bits, because there
1132940b44dSPeter Avalos 	// may be match finders that use larger resolution than one byte.
1142940b44dSPeter Avalos 	const uint32_t subvalue
1152940b44dSPeter Avalos 			= (MUST_NORMALIZE_POS - mf->cyclic_size);
116*e151908bSDaniel Fojt 				// & ~((UINT32_C(1) << 10) - 1);
1172940b44dSPeter Avalos 
11815ab8c86SJohn Marino 	for (uint32_t i = 0; i < mf->hash_count; ++i) {
1192940b44dSPeter Avalos 		// If the distance is greater than the dictionary size,
1202940b44dSPeter Avalos 		// we can simply mark the hash element as empty.
12115ab8c86SJohn Marino 		if (mf->hash[i] <= subvalue)
12215ab8c86SJohn Marino 			mf->hash[i] = EMPTY_HASH_VALUE;
1232940b44dSPeter Avalos 		else
12415ab8c86SJohn Marino 			mf->hash[i] -= subvalue;
12515ab8c86SJohn Marino 	}
12615ab8c86SJohn Marino 
12715ab8c86SJohn Marino 	for (uint32_t i = 0; i < mf->sons_count; ++i) {
12815ab8c86SJohn Marino 		// Do the same for mf->son.
12915ab8c86SJohn Marino 		//
13015ab8c86SJohn Marino 		// NOTE: There may be uninitialized elements in mf->son.
13115ab8c86SJohn Marino 		// Valgrind may complain that the "if" below depends on
13215ab8c86SJohn Marino 		// an uninitialized value. In this case it is safe to ignore
13315ab8c86SJohn Marino 		// the warning. See also the comments in lz_encoder_init()
13415ab8c86SJohn Marino 		// in lz_encoder.c.
13515ab8c86SJohn Marino 		if (mf->son[i] <= subvalue)
13615ab8c86SJohn Marino 			mf->son[i] = EMPTY_HASH_VALUE;
13715ab8c86SJohn Marino 		else
13815ab8c86SJohn Marino 			mf->son[i] -= subvalue;
1392940b44dSPeter Avalos 	}
1402940b44dSPeter Avalos 
1412940b44dSPeter Avalos 	// Update offset to match the new locations.
1422940b44dSPeter Avalos 	mf->offset -= subvalue;
1432940b44dSPeter Avalos 
1442940b44dSPeter Avalos 	return;
1452940b44dSPeter Avalos }
1462940b44dSPeter Avalos 
1472940b44dSPeter Avalos 
1482940b44dSPeter Avalos /// Mark the current byte as processed from point of view of the match finder.
1492940b44dSPeter Avalos static void
move_pos(lzma_mf * mf)1502940b44dSPeter Avalos move_pos(lzma_mf *mf)
1512940b44dSPeter Avalos {
1522940b44dSPeter Avalos 	if (++mf->cyclic_pos == mf->cyclic_size)
1532940b44dSPeter Avalos 		mf->cyclic_pos = 0;
1542940b44dSPeter Avalos 
1552940b44dSPeter Avalos 	++mf->read_pos;
1562940b44dSPeter Avalos 	assert(mf->read_pos <= mf->write_pos);
1572940b44dSPeter Avalos 
1582940b44dSPeter Avalos 	if (unlikely(mf->read_pos + mf->offset == UINT32_MAX))
1592940b44dSPeter Avalos 		normalize(mf);
1602940b44dSPeter Avalos }
1612940b44dSPeter Avalos 
1622940b44dSPeter Avalos 
1632940b44dSPeter Avalos /// When flushing, we cannot run the match finder unless there is nice_len
1642940b44dSPeter Avalos /// bytes available in the dictionary. Instead, we skip running the match
1652940b44dSPeter Avalos /// finder (indicating that no match was found), and count how many bytes we
1662940b44dSPeter Avalos /// have ignored this way.
1672940b44dSPeter Avalos ///
1682940b44dSPeter Avalos /// When new data is given after the flushing was completed, the match finder
1692940b44dSPeter Avalos /// is restarted by rewinding mf->read_pos backwards by mf->pending. Then
1702940b44dSPeter Avalos /// the missed bytes are added to the hash using the match finder's skip
1712940b44dSPeter Avalos /// function (with small amount of input, it may start using mf->pending
1722940b44dSPeter Avalos /// again if flushing).
1732940b44dSPeter Avalos ///
1742940b44dSPeter Avalos /// Due to this rewinding, we don't touch cyclic_pos or test for
1752940b44dSPeter Avalos /// normalization. It will be done when the match finder's skip function
1762940b44dSPeter Avalos /// catches up after a flush.
1772940b44dSPeter Avalos static void
move_pending(lzma_mf * mf)1782940b44dSPeter Avalos move_pending(lzma_mf *mf)
1792940b44dSPeter Avalos {
1802940b44dSPeter Avalos 	++mf->read_pos;
1812940b44dSPeter Avalos 	assert(mf->read_pos <= mf->write_pos);
1822940b44dSPeter Avalos 	++mf->pending;
1832940b44dSPeter Avalos }
1842940b44dSPeter Avalos 
1852940b44dSPeter Avalos 
1862940b44dSPeter Avalos /// Calculate len_limit and determine if there is enough input to run
1872940b44dSPeter Avalos /// the actual match finder code. Sets up "cur" and "pos". This macro
1882940b44dSPeter Avalos /// is used by all find functions and binary tree skip functions. Hash
1892940b44dSPeter Avalos /// chain skip function doesn't need len_limit so a simpler code is used
1902940b44dSPeter Avalos /// in them.
1912940b44dSPeter Avalos #define header(is_bt, len_min, ret_op) \
1922940b44dSPeter Avalos 	uint32_t len_limit = mf_avail(mf); \
1932940b44dSPeter Avalos 	if (mf->nice_len <= len_limit) { \
1942940b44dSPeter Avalos 		len_limit = mf->nice_len; \
1952940b44dSPeter Avalos 	} else if (len_limit < (len_min) \
1962940b44dSPeter Avalos 			|| (is_bt && mf->action == LZMA_SYNC_FLUSH)) { \
1972940b44dSPeter Avalos 		assert(mf->action != LZMA_RUN); \
1982940b44dSPeter Avalos 		move_pending(mf); \
1992940b44dSPeter Avalos 		ret_op; \
2002940b44dSPeter Avalos 	} \
2012940b44dSPeter Avalos 	const uint8_t *cur = mf_ptr(mf); \
2022940b44dSPeter Avalos 	const uint32_t pos = mf->read_pos + mf->offset
2032940b44dSPeter Avalos 
2042940b44dSPeter Avalos 
2052940b44dSPeter Avalos /// Header for find functions. "return 0" indicates that zero matches
2062940b44dSPeter Avalos /// were found.
2072940b44dSPeter Avalos #define header_find(is_bt, len_min) \
2082940b44dSPeter Avalos 	header(is_bt, len_min, return 0); \
2092940b44dSPeter Avalos 	uint32_t matches_count = 0
2102940b44dSPeter Avalos 
2112940b44dSPeter Avalos 
2122940b44dSPeter Avalos /// Header for a loop in a skip function. "continue" tells to skip the rest
2132940b44dSPeter Avalos /// of the code in the loop.
2142940b44dSPeter Avalos #define header_skip(is_bt, len_min) \
2152940b44dSPeter Avalos 	header(is_bt, len_min, continue)
2162940b44dSPeter Avalos 
2172940b44dSPeter Avalos 
2182940b44dSPeter Avalos /// Calls hc_find_func() or bt_find_func() and calculates the total number
2192940b44dSPeter Avalos /// of matches found. Updates the dictionary position and returns the number
2202940b44dSPeter Avalos /// of matches found.
2212940b44dSPeter Avalos #define call_find(func, len_best) \
2222940b44dSPeter Avalos do { \
2232940b44dSPeter Avalos 	matches_count = func(len_limit, pos, cur, cur_match, mf->depth, \
2242940b44dSPeter Avalos 				mf->son, mf->cyclic_pos, mf->cyclic_size, \
2252940b44dSPeter Avalos 				matches + matches_count, len_best) \
2262940b44dSPeter Avalos 			- matches; \
2272940b44dSPeter Avalos 	move_pos(mf); \
2282940b44dSPeter Avalos 	return matches_count; \
2292940b44dSPeter Avalos } while (0)
2302940b44dSPeter Avalos 
2312940b44dSPeter Avalos 
2322940b44dSPeter Avalos ////////////////
2332940b44dSPeter Avalos // Hash Chain //
2342940b44dSPeter Avalos ////////////////
2352940b44dSPeter Avalos 
2362940b44dSPeter Avalos #if defined(HAVE_MF_HC3) || defined(HAVE_MF_HC4)
2372940b44dSPeter Avalos ///
2382940b44dSPeter Avalos ///
2392940b44dSPeter Avalos /// \param      len_limit       Don't look for matches longer than len_limit.
2402940b44dSPeter Avalos /// \param      pos             lzma_mf.read_pos + lzma_mf.offset
2412940b44dSPeter Avalos /// \param      cur             Pointer to current byte (mf_ptr(mf))
2422940b44dSPeter Avalos /// \param      cur_match       Start position of the current match candidate
2432940b44dSPeter Avalos /// \param      depth           Maximum length of the hash chain
2442940b44dSPeter Avalos /// \param      son             lzma_mf.son (contains the hash chain)
2452940b44dSPeter Avalos /// \param      cyclic_pos
2462940b44dSPeter Avalos /// \param      cyclic_size
2472940b44dSPeter Avalos /// \param      matches         Array to hold the matches.
2482940b44dSPeter Avalos /// \param      len_best        The length of the longest match found so far.
2492940b44dSPeter Avalos static lzma_match *
hc_find_func(const uint32_t len_limit,const uint32_t pos,const uint8_t * const cur,uint32_t cur_match,uint32_t depth,uint32_t * const son,const uint32_t cyclic_pos,const uint32_t cyclic_size,lzma_match * matches,uint32_t len_best)2502940b44dSPeter Avalos hc_find_func(
2512940b44dSPeter Avalos 		const uint32_t len_limit,
2522940b44dSPeter Avalos 		const uint32_t pos,
2532940b44dSPeter Avalos 		const uint8_t *const cur,
2542940b44dSPeter Avalos 		uint32_t cur_match,
2552940b44dSPeter Avalos 		uint32_t depth,
2562940b44dSPeter Avalos 		uint32_t *const son,
2572940b44dSPeter Avalos 		const uint32_t cyclic_pos,
2582940b44dSPeter Avalos 		const uint32_t cyclic_size,
2592940b44dSPeter Avalos 		lzma_match *matches,
2602940b44dSPeter Avalos 		uint32_t len_best)
2612940b44dSPeter Avalos {
2622940b44dSPeter Avalos 	son[cyclic_pos] = cur_match;
2632940b44dSPeter Avalos 
2642940b44dSPeter Avalos 	while (true) {
2652940b44dSPeter Avalos 		const uint32_t delta = pos - cur_match;
2662940b44dSPeter Avalos 		if (depth-- == 0 || delta >= cyclic_size)
2672940b44dSPeter Avalos 			return matches;
2682940b44dSPeter Avalos 
2692940b44dSPeter Avalos 		const uint8_t *const pb = cur - delta;
2702940b44dSPeter Avalos 		cur_match = son[cyclic_pos - delta
2712940b44dSPeter Avalos 				+ (delta > cyclic_pos ? cyclic_size : 0)];
2722940b44dSPeter Avalos 
2732940b44dSPeter Avalos 		if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) {
27415ab8c86SJohn Marino 			uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit);
2752940b44dSPeter Avalos 
2762940b44dSPeter Avalos 			if (len_best < len) {
2772940b44dSPeter Avalos 				len_best = len;
2782940b44dSPeter Avalos 				matches->len = len;
2792940b44dSPeter Avalos 				matches->dist = delta - 1;
2802940b44dSPeter Avalos 				++matches;
2812940b44dSPeter Avalos 
2822940b44dSPeter Avalos 				if (len == len_limit)
2832940b44dSPeter Avalos 					return matches;
2842940b44dSPeter Avalos 			}
2852940b44dSPeter Avalos 		}
2862940b44dSPeter Avalos 	}
2872940b44dSPeter Avalos }
2882940b44dSPeter Avalos 
2892940b44dSPeter Avalos 
2902940b44dSPeter Avalos #define hc_find(len_best) \
2912940b44dSPeter Avalos 	call_find(hc_find_func, len_best)
2922940b44dSPeter Avalos 
2932940b44dSPeter Avalos 
2942940b44dSPeter Avalos #define hc_skip() \
2952940b44dSPeter Avalos do { \
2962940b44dSPeter Avalos 	mf->son[mf->cyclic_pos] = cur_match; \
2972940b44dSPeter Avalos 	move_pos(mf); \
2982940b44dSPeter Avalos } while (0)
2992940b44dSPeter Avalos 
3002940b44dSPeter Avalos #endif
3012940b44dSPeter Avalos 
3022940b44dSPeter Avalos 
3032940b44dSPeter Avalos #ifdef HAVE_MF_HC3
3042940b44dSPeter Avalos extern uint32_t
lzma_mf_hc3_find(lzma_mf * mf,lzma_match * matches)3052940b44dSPeter Avalos lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches)
3062940b44dSPeter Avalos {
3072940b44dSPeter Avalos 	header_find(false, 3);
3082940b44dSPeter Avalos 
3092940b44dSPeter Avalos 	hash_3_calc();
3102940b44dSPeter Avalos 
3112940b44dSPeter Avalos 	const uint32_t delta2 = pos - mf->hash[hash_2_value];
3122940b44dSPeter Avalos 	const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];
3132940b44dSPeter Avalos 
3142940b44dSPeter Avalos 	mf->hash[hash_2_value] = pos;
3152940b44dSPeter Avalos 	mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
3162940b44dSPeter Avalos 
3172940b44dSPeter Avalos 	uint32_t len_best = 2;
3182940b44dSPeter Avalos 
3192940b44dSPeter Avalos 	if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
32015ab8c86SJohn Marino 		len_best = lzma_memcmplen(cur - delta2, cur,
32115ab8c86SJohn Marino 				len_best, len_limit);
3222940b44dSPeter Avalos 
3232940b44dSPeter Avalos 		matches[0].len = len_best;
3242940b44dSPeter Avalos 		matches[0].dist = delta2 - 1;
3252940b44dSPeter Avalos 		matches_count = 1;
3262940b44dSPeter Avalos 
3272940b44dSPeter Avalos 		if (len_best == len_limit) {
3282940b44dSPeter Avalos 			hc_skip();
3292940b44dSPeter Avalos 			return 1; // matches_count
3302940b44dSPeter Avalos 		}
3312940b44dSPeter Avalos 	}
3322940b44dSPeter Avalos 
3332940b44dSPeter Avalos 	hc_find(len_best);
3342940b44dSPeter Avalos }
3352940b44dSPeter Avalos 
3362940b44dSPeter Avalos 
3372940b44dSPeter Avalos extern void
lzma_mf_hc3_skip(lzma_mf * mf,uint32_t amount)3382940b44dSPeter Avalos lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount)
3392940b44dSPeter Avalos {
3402940b44dSPeter Avalos 	do {
3412940b44dSPeter Avalos 		if (mf_avail(mf) < 3) {
3422940b44dSPeter Avalos 			move_pending(mf);
3432940b44dSPeter Avalos 			continue;
3442940b44dSPeter Avalos 		}
3452940b44dSPeter Avalos 
3462940b44dSPeter Avalos 		const uint8_t *cur = mf_ptr(mf);
3472940b44dSPeter Avalos 		const uint32_t pos = mf->read_pos + mf->offset;
3482940b44dSPeter Avalos 
3492940b44dSPeter Avalos 		hash_3_calc();
3502940b44dSPeter Avalos 
3512940b44dSPeter Avalos 		const uint32_t cur_match
3522940b44dSPeter Avalos 				= mf->hash[FIX_3_HASH_SIZE + hash_value];
3532940b44dSPeter Avalos 
3542940b44dSPeter Avalos 		mf->hash[hash_2_value] = pos;
3552940b44dSPeter Avalos 		mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
3562940b44dSPeter Avalos 
3572940b44dSPeter Avalos 		hc_skip();
3582940b44dSPeter Avalos 
3592940b44dSPeter Avalos 	} while (--amount != 0);
3602940b44dSPeter Avalos }
3612940b44dSPeter Avalos #endif
3622940b44dSPeter Avalos 
3632940b44dSPeter Avalos 
3642940b44dSPeter Avalos #ifdef HAVE_MF_HC4
3652940b44dSPeter Avalos extern uint32_t
lzma_mf_hc4_find(lzma_mf * mf,lzma_match * matches)3662940b44dSPeter Avalos lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches)
3672940b44dSPeter Avalos {
3682940b44dSPeter Avalos 	header_find(false, 4);
3692940b44dSPeter Avalos 
3702940b44dSPeter Avalos 	hash_4_calc();
3712940b44dSPeter Avalos 
3722940b44dSPeter Avalos 	uint32_t delta2 = pos - mf->hash[hash_2_value];
3732940b44dSPeter Avalos 	const uint32_t delta3
3742940b44dSPeter Avalos 			= pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value];
3752940b44dSPeter Avalos 	const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value];
3762940b44dSPeter Avalos 
3772940b44dSPeter Avalos 	mf->hash[hash_2_value ] = pos;
3782940b44dSPeter Avalos 	mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos;
3792940b44dSPeter Avalos 	mf->hash[FIX_4_HASH_SIZE + hash_value] = pos;
3802940b44dSPeter Avalos 
3812940b44dSPeter Avalos 	uint32_t len_best = 1;
3822940b44dSPeter Avalos 
3832940b44dSPeter Avalos 	if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
3842940b44dSPeter Avalos 		len_best = 2;
3852940b44dSPeter Avalos 		matches[0].len = 2;
3862940b44dSPeter Avalos 		matches[0].dist = delta2 - 1;
3872940b44dSPeter Avalos 		matches_count = 1;
3882940b44dSPeter Avalos 	}
3892940b44dSPeter Avalos 
3902940b44dSPeter Avalos 	if (delta2 != delta3 && delta3 < mf->cyclic_size
3912940b44dSPeter Avalos 			&& *(cur - delta3) == *cur) {
3922940b44dSPeter Avalos 		len_best = 3;
3932940b44dSPeter Avalos 		matches[matches_count++].dist = delta3 - 1;
3942940b44dSPeter Avalos 		delta2 = delta3;
3952940b44dSPeter Avalos 	}
3962940b44dSPeter Avalos 
3972940b44dSPeter Avalos 	if (matches_count != 0) {
39815ab8c86SJohn Marino 		len_best = lzma_memcmplen(cur - delta2, cur,
39915ab8c86SJohn Marino 				len_best, len_limit);
4002940b44dSPeter Avalos 
4012940b44dSPeter Avalos 		matches[matches_count - 1].len = len_best;
4022940b44dSPeter Avalos 
4032940b44dSPeter Avalos 		if (len_best == len_limit) {
4042940b44dSPeter Avalos 			hc_skip();
4052940b44dSPeter Avalos 			return matches_count;
4062940b44dSPeter Avalos 		}
4072940b44dSPeter Avalos 	}
4082940b44dSPeter Avalos 
4092940b44dSPeter Avalos 	if (len_best < 3)
4102940b44dSPeter Avalos 		len_best = 3;
4112940b44dSPeter Avalos 
4122940b44dSPeter Avalos 	hc_find(len_best);
4132940b44dSPeter Avalos }
4142940b44dSPeter Avalos 
4152940b44dSPeter Avalos 
4162940b44dSPeter Avalos extern void
lzma_mf_hc4_skip(lzma_mf * mf,uint32_t amount)4172940b44dSPeter Avalos lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount)
4182940b44dSPeter Avalos {
4192940b44dSPeter Avalos 	do {
4202940b44dSPeter Avalos 		if (mf_avail(mf) < 4) {
4212940b44dSPeter Avalos 			move_pending(mf);
4222940b44dSPeter Avalos 			continue;
4232940b44dSPeter Avalos 		}
4242940b44dSPeter Avalos 
4252940b44dSPeter Avalos 		const uint8_t *cur = mf_ptr(mf);
4262940b44dSPeter Avalos 		const uint32_t pos = mf->read_pos + mf->offset;
4272940b44dSPeter Avalos 
4282940b44dSPeter Avalos 		hash_4_calc();
4292940b44dSPeter Avalos 
4302940b44dSPeter Avalos 		const uint32_t cur_match
4312940b44dSPeter Avalos 				= mf->hash[FIX_4_HASH_SIZE + hash_value];
4322940b44dSPeter Avalos 
4332940b44dSPeter Avalos 		mf->hash[hash_2_value] = pos;
4342940b44dSPeter Avalos 		mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos;
4352940b44dSPeter Avalos 		mf->hash[FIX_4_HASH_SIZE + hash_value] = pos;
4362940b44dSPeter Avalos 
4372940b44dSPeter Avalos 		hc_skip();
4382940b44dSPeter Avalos 
4392940b44dSPeter Avalos 	} while (--amount != 0);
4402940b44dSPeter Avalos }
4412940b44dSPeter Avalos #endif
4422940b44dSPeter Avalos 
4432940b44dSPeter Avalos 
4442940b44dSPeter Avalos /////////////////
4452940b44dSPeter Avalos // Binary Tree //
4462940b44dSPeter Avalos /////////////////
4472940b44dSPeter Avalos 
4482940b44dSPeter Avalos #if defined(HAVE_MF_BT2) || defined(HAVE_MF_BT3) || defined(HAVE_MF_BT4)
4492940b44dSPeter Avalos static lzma_match *
bt_find_func(const uint32_t len_limit,const uint32_t pos,const uint8_t * const cur,uint32_t cur_match,uint32_t depth,uint32_t * const son,const uint32_t cyclic_pos,const uint32_t cyclic_size,lzma_match * matches,uint32_t len_best)4502940b44dSPeter Avalos bt_find_func(
4512940b44dSPeter Avalos 		const uint32_t len_limit,
4522940b44dSPeter Avalos 		const uint32_t pos,
4532940b44dSPeter Avalos 		const uint8_t *const cur,
4542940b44dSPeter Avalos 		uint32_t cur_match,
4552940b44dSPeter Avalos 		uint32_t depth,
4562940b44dSPeter Avalos 		uint32_t *const son,
4572940b44dSPeter Avalos 		const uint32_t cyclic_pos,
4582940b44dSPeter Avalos 		const uint32_t cyclic_size,
4592940b44dSPeter Avalos 		lzma_match *matches,
4602940b44dSPeter Avalos 		uint32_t len_best)
4612940b44dSPeter Avalos {
4622940b44dSPeter Avalos 	uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
4632940b44dSPeter Avalos 	uint32_t *ptr1 = son + (cyclic_pos << 1);
4642940b44dSPeter Avalos 
4652940b44dSPeter Avalos 	uint32_t len0 = 0;
4662940b44dSPeter Avalos 	uint32_t len1 = 0;
4672940b44dSPeter Avalos 
4682940b44dSPeter Avalos 	while (true) {
4692940b44dSPeter Avalos 		const uint32_t delta = pos - cur_match;
4702940b44dSPeter Avalos 		if (depth-- == 0 || delta >= cyclic_size) {
4712940b44dSPeter Avalos 			*ptr0 = EMPTY_HASH_VALUE;
4722940b44dSPeter Avalos 			*ptr1 = EMPTY_HASH_VALUE;
4732940b44dSPeter Avalos 			return matches;
4742940b44dSPeter Avalos 		}
4752940b44dSPeter Avalos 
4762940b44dSPeter Avalos 		uint32_t *const pair = son + ((cyclic_pos - delta
4772940b44dSPeter Avalos 				+ (delta > cyclic_pos ? cyclic_size : 0))
4782940b44dSPeter Avalos 				<< 1);
4792940b44dSPeter Avalos 
4802940b44dSPeter Avalos 		const uint8_t *const pb = cur - delta;
4812940b44dSPeter Avalos 		uint32_t len = my_min(len0, len1);
4822940b44dSPeter Avalos 
4832940b44dSPeter Avalos 		if (pb[len] == cur[len]) {
48415ab8c86SJohn Marino 			len = lzma_memcmplen(pb, cur, len + 1, len_limit);
4852940b44dSPeter Avalos 
4862940b44dSPeter Avalos 			if (len_best < len) {
4872940b44dSPeter Avalos 				len_best = len;
4882940b44dSPeter Avalos 				matches->len = len;
4892940b44dSPeter Avalos 				matches->dist = delta - 1;
4902940b44dSPeter Avalos 				++matches;
4912940b44dSPeter Avalos 
4922940b44dSPeter Avalos 				if (len == len_limit) {
4932940b44dSPeter Avalos 					*ptr1 = pair[0];
4942940b44dSPeter Avalos 					*ptr0 = pair[1];
4952940b44dSPeter Avalos 					return matches;
4962940b44dSPeter Avalos 				}
4972940b44dSPeter Avalos 			}
4982940b44dSPeter Avalos 		}
4992940b44dSPeter Avalos 
5002940b44dSPeter Avalos 		if (pb[len] < cur[len]) {
5012940b44dSPeter Avalos 			*ptr1 = cur_match;
5022940b44dSPeter Avalos 			ptr1 = pair + 1;
5032940b44dSPeter Avalos 			cur_match = *ptr1;
5042940b44dSPeter Avalos 			len1 = len;
5052940b44dSPeter Avalos 		} else {
5062940b44dSPeter Avalos 			*ptr0 = cur_match;
5072940b44dSPeter Avalos 			ptr0 = pair;
5082940b44dSPeter Avalos 			cur_match = *ptr0;
5092940b44dSPeter Avalos 			len0 = len;
5102940b44dSPeter Avalos 		}
5112940b44dSPeter Avalos 	}
5122940b44dSPeter Avalos }
5132940b44dSPeter Avalos 
5142940b44dSPeter Avalos 
5152940b44dSPeter Avalos static void
bt_skip_func(const uint32_t len_limit,const uint32_t pos,const uint8_t * const cur,uint32_t cur_match,uint32_t depth,uint32_t * const son,const uint32_t cyclic_pos,const uint32_t cyclic_size)5162940b44dSPeter Avalos bt_skip_func(
5172940b44dSPeter Avalos 		const uint32_t len_limit,
5182940b44dSPeter Avalos 		const uint32_t pos,
5192940b44dSPeter Avalos 		const uint8_t *const cur,
5202940b44dSPeter Avalos 		uint32_t cur_match,
5212940b44dSPeter Avalos 		uint32_t depth,
5222940b44dSPeter Avalos 		uint32_t *const son,
5232940b44dSPeter Avalos 		const uint32_t cyclic_pos,
5242940b44dSPeter Avalos 		const uint32_t cyclic_size)
5252940b44dSPeter Avalos {
5262940b44dSPeter Avalos 	uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
5272940b44dSPeter Avalos 	uint32_t *ptr1 = son + (cyclic_pos << 1);
5282940b44dSPeter Avalos 
5292940b44dSPeter Avalos 	uint32_t len0 = 0;
5302940b44dSPeter Avalos 	uint32_t len1 = 0;
5312940b44dSPeter Avalos 
5322940b44dSPeter Avalos 	while (true) {
5332940b44dSPeter Avalos 		const uint32_t delta = pos - cur_match;
5342940b44dSPeter Avalos 		if (depth-- == 0 || delta >= cyclic_size) {
5352940b44dSPeter Avalos 			*ptr0 = EMPTY_HASH_VALUE;
5362940b44dSPeter Avalos 			*ptr1 = EMPTY_HASH_VALUE;
5372940b44dSPeter Avalos 			return;
5382940b44dSPeter Avalos 		}
5392940b44dSPeter Avalos 
5402940b44dSPeter Avalos 		uint32_t *pair = son + ((cyclic_pos - delta
5412940b44dSPeter Avalos 				+ (delta > cyclic_pos ? cyclic_size : 0))
5422940b44dSPeter Avalos 				<< 1);
5432940b44dSPeter Avalos 		const uint8_t *pb = cur - delta;
5442940b44dSPeter Avalos 		uint32_t len = my_min(len0, len1);
5452940b44dSPeter Avalos 
5462940b44dSPeter Avalos 		if (pb[len] == cur[len]) {
54715ab8c86SJohn Marino 			len = lzma_memcmplen(pb, cur, len + 1, len_limit);
5482940b44dSPeter Avalos 
5492940b44dSPeter Avalos 			if (len == len_limit) {
5502940b44dSPeter Avalos 				*ptr1 = pair[0];
5512940b44dSPeter Avalos 				*ptr0 = pair[1];
5522940b44dSPeter Avalos 				return;
5532940b44dSPeter Avalos 			}
5542940b44dSPeter Avalos 		}
5552940b44dSPeter Avalos 
5562940b44dSPeter Avalos 		if (pb[len] < cur[len]) {
5572940b44dSPeter Avalos 			*ptr1 = cur_match;
5582940b44dSPeter Avalos 			ptr1 = pair + 1;
5592940b44dSPeter Avalos 			cur_match = *ptr1;
5602940b44dSPeter Avalos 			len1 = len;
5612940b44dSPeter Avalos 		} else {
5622940b44dSPeter Avalos 			*ptr0 = cur_match;
5632940b44dSPeter Avalos 			ptr0 = pair;
5642940b44dSPeter Avalos 			cur_match = *ptr0;
5652940b44dSPeter Avalos 			len0 = len;
5662940b44dSPeter Avalos 		}
5672940b44dSPeter Avalos 	}
5682940b44dSPeter Avalos }
5692940b44dSPeter Avalos 
5702940b44dSPeter Avalos 
5712940b44dSPeter Avalos #define bt_find(len_best) \
5722940b44dSPeter Avalos 	call_find(bt_find_func, len_best)
5732940b44dSPeter Avalos 
5742940b44dSPeter Avalos #define bt_skip() \
5752940b44dSPeter Avalos do { \
5762940b44dSPeter Avalos 	bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, \
5772940b44dSPeter Avalos 			mf->son, mf->cyclic_pos, \
5782940b44dSPeter Avalos 			mf->cyclic_size); \
5792940b44dSPeter Avalos 	move_pos(mf); \
5802940b44dSPeter Avalos } while (0)
5812940b44dSPeter Avalos 
5822940b44dSPeter Avalos #endif
5832940b44dSPeter Avalos 
5842940b44dSPeter Avalos 
5852940b44dSPeter Avalos #ifdef HAVE_MF_BT2
5862940b44dSPeter Avalos extern uint32_t
lzma_mf_bt2_find(lzma_mf * mf,lzma_match * matches)5872940b44dSPeter Avalos lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches)
5882940b44dSPeter Avalos {
5892940b44dSPeter Avalos 	header_find(true, 2);
5902940b44dSPeter Avalos 
5912940b44dSPeter Avalos 	hash_2_calc();
5922940b44dSPeter Avalos 
5932940b44dSPeter Avalos 	const uint32_t cur_match = mf->hash[hash_value];
5942940b44dSPeter Avalos 	mf->hash[hash_value] = pos;
5952940b44dSPeter Avalos 
5962940b44dSPeter Avalos 	bt_find(1);
5972940b44dSPeter Avalos }
5982940b44dSPeter Avalos 
5992940b44dSPeter Avalos 
6002940b44dSPeter Avalos extern void
lzma_mf_bt2_skip(lzma_mf * mf,uint32_t amount)6012940b44dSPeter Avalos lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount)
6022940b44dSPeter Avalos {
6032940b44dSPeter Avalos 	do {
6042940b44dSPeter Avalos 		header_skip(true, 2);
6052940b44dSPeter Avalos 
6062940b44dSPeter Avalos 		hash_2_calc();
6072940b44dSPeter Avalos 
6082940b44dSPeter Avalos 		const uint32_t cur_match = mf->hash[hash_value];
6092940b44dSPeter Avalos 		mf->hash[hash_value] = pos;
6102940b44dSPeter Avalos 
6112940b44dSPeter Avalos 		bt_skip();
6122940b44dSPeter Avalos 
6132940b44dSPeter Avalos 	} while (--amount != 0);
6142940b44dSPeter Avalos }
6152940b44dSPeter Avalos #endif
6162940b44dSPeter Avalos 
6172940b44dSPeter Avalos 
6182940b44dSPeter Avalos #ifdef HAVE_MF_BT3
6192940b44dSPeter Avalos extern uint32_t
lzma_mf_bt3_find(lzma_mf * mf,lzma_match * matches)6202940b44dSPeter Avalos lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches)
6212940b44dSPeter Avalos {
6222940b44dSPeter Avalos 	header_find(true, 3);
6232940b44dSPeter Avalos 
6242940b44dSPeter Avalos 	hash_3_calc();
6252940b44dSPeter Avalos 
6262940b44dSPeter Avalos 	const uint32_t delta2 = pos - mf->hash[hash_2_value];
6272940b44dSPeter Avalos 	const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];
6282940b44dSPeter Avalos 
6292940b44dSPeter Avalos 	mf->hash[hash_2_value] = pos;
6302940b44dSPeter Avalos 	mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
6312940b44dSPeter Avalos 
6322940b44dSPeter Avalos 	uint32_t len_best = 2;
6332940b44dSPeter Avalos 
6342940b44dSPeter Avalos 	if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
63515ab8c86SJohn Marino 		len_best = lzma_memcmplen(
63615ab8c86SJohn Marino 				cur, cur - delta2, len_best, len_limit);
6372940b44dSPeter Avalos 
6382940b44dSPeter Avalos 		matches[0].len = len_best;
6392940b44dSPeter Avalos 		matches[0].dist = delta2 - 1;
6402940b44dSPeter Avalos 		matches_count = 1;
6412940b44dSPeter Avalos 
6422940b44dSPeter Avalos 		if (len_best == len_limit) {
6432940b44dSPeter Avalos 			bt_skip();
6442940b44dSPeter Avalos 			return 1; // matches_count
6452940b44dSPeter Avalos 		}
6462940b44dSPeter Avalos 	}
6472940b44dSPeter Avalos 
6482940b44dSPeter Avalos 	bt_find(len_best);
6492940b44dSPeter Avalos }
6502940b44dSPeter Avalos 
6512940b44dSPeter Avalos 
6522940b44dSPeter Avalos extern void
lzma_mf_bt3_skip(lzma_mf * mf,uint32_t amount)6532940b44dSPeter Avalos lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount)
6542940b44dSPeter Avalos {
6552940b44dSPeter Avalos 	do {
6562940b44dSPeter Avalos 		header_skip(true, 3);
6572940b44dSPeter Avalos 
6582940b44dSPeter Avalos 		hash_3_calc();
6592940b44dSPeter Avalos 
6602940b44dSPeter Avalos 		const uint32_t cur_match
6612940b44dSPeter Avalos 				= mf->hash[FIX_3_HASH_SIZE + hash_value];
6622940b44dSPeter Avalos 
6632940b44dSPeter Avalos 		mf->hash[hash_2_value] = pos;
6642940b44dSPeter Avalos 		mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
6652940b44dSPeter Avalos 
6662940b44dSPeter Avalos 		bt_skip();
6672940b44dSPeter Avalos 
6682940b44dSPeter Avalos 	} while (--amount != 0);
6692940b44dSPeter Avalos }
6702940b44dSPeter Avalos #endif
6712940b44dSPeter Avalos 
6722940b44dSPeter Avalos 
6732940b44dSPeter Avalos #ifdef HAVE_MF_BT4
6742940b44dSPeter Avalos extern uint32_t
lzma_mf_bt4_find(lzma_mf * mf,lzma_match * matches)6752940b44dSPeter Avalos lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches)
6762940b44dSPeter Avalos {
6772940b44dSPeter Avalos 	header_find(true, 4);
6782940b44dSPeter Avalos 
6792940b44dSPeter Avalos 	hash_4_calc();
6802940b44dSPeter Avalos 
6812940b44dSPeter Avalos 	uint32_t delta2 = pos - mf->hash[hash_2_value];
6822940b44dSPeter Avalos 	const uint32_t delta3
6832940b44dSPeter Avalos 			= pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value];
6842940b44dSPeter Avalos 	const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value];
6852940b44dSPeter Avalos 
6862940b44dSPeter Avalos 	mf->hash[hash_2_value] = pos;
6872940b44dSPeter Avalos 	mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos;
6882940b44dSPeter Avalos 	mf->hash[FIX_4_HASH_SIZE + hash_value] = pos;
6892940b44dSPeter Avalos 
6902940b44dSPeter Avalos 	uint32_t len_best = 1;
6912940b44dSPeter Avalos 
6922940b44dSPeter Avalos 	if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
6932940b44dSPeter Avalos 		len_best = 2;
6942940b44dSPeter Avalos 		matches[0].len = 2;
6952940b44dSPeter Avalos 		matches[0].dist = delta2 - 1;
6962940b44dSPeter Avalos 		matches_count = 1;
6972940b44dSPeter Avalos 	}
6982940b44dSPeter Avalos 
6992940b44dSPeter Avalos 	if (delta2 != delta3 && delta3 < mf->cyclic_size
7002940b44dSPeter Avalos 			&& *(cur - delta3) == *cur) {
7012940b44dSPeter Avalos 		len_best = 3;
7022940b44dSPeter Avalos 		matches[matches_count++].dist = delta3 - 1;
7032940b44dSPeter Avalos 		delta2 = delta3;
7042940b44dSPeter Avalos 	}
7052940b44dSPeter Avalos 
7062940b44dSPeter Avalos 	if (matches_count != 0) {
70715ab8c86SJohn Marino 		len_best = lzma_memcmplen(
70815ab8c86SJohn Marino 				cur, cur - delta2, len_best, len_limit);
7092940b44dSPeter Avalos 
7102940b44dSPeter Avalos 		matches[matches_count - 1].len = len_best;
7112940b44dSPeter Avalos 
7122940b44dSPeter Avalos 		if (len_best == len_limit) {
7132940b44dSPeter Avalos 			bt_skip();
7142940b44dSPeter Avalos 			return matches_count;
7152940b44dSPeter Avalos 		}
7162940b44dSPeter Avalos 	}
7172940b44dSPeter Avalos 
7182940b44dSPeter Avalos 	if (len_best < 3)
7192940b44dSPeter Avalos 		len_best = 3;
7202940b44dSPeter Avalos 
7212940b44dSPeter Avalos 	bt_find(len_best);
7222940b44dSPeter Avalos }
7232940b44dSPeter Avalos 
7242940b44dSPeter Avalos 
7252940b44dSPeter Avalos extern void
lzma_mf_bt4_skip(lzma_mf * mf,uint32_t amount)7262940b44dSPeter Avalos lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount)
7272940b44dSPeter Avalos {
7282940b44dSPeter Avalos 	do {
7292940b44dSPeter Avalos 		header_skip(true, 4);
7302940b44dSPeter Avalos 
7312940b44dSPeter Avalos 		hash_4_calc();
7322940b44dSPeter Avalos 
7332940b44dSPeter Avalos 		const uint32_t cur_match
7342940b44dSPeter Avalos 				= mf->hash[FIX_4_HASH_SIZE + hash_value];
7352940b44dSPeter Avalos 
7362940b44dSPeter Avalos 		mf->hash[hash_2_value] = pos;
7372940b44dSPeter Avalos 		mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos;
7382940b44dSPeter Avalos 		mf->hash[FIX_4_HASH_SIZE + hash_value] = pos;
7392940b44dSPeter Avalos 
7402940b44dSPeter Avalos 		bt_skip();
7412940b44dSPeter Avalos 
7422940b44dSPeter Avalos 	} while (--amount != 0);
7432940b44dSPeter Avalos }
7442940b44dSPeter Avalos #endif
745