12940b44dSPeter Avalos /////////////////////////////////////////////////////////////////////////////// 22940b44dSPeter Avalos // 32940b44dSPeter Avalos /// \file lz_encoder_mf.c 42940b44dSPeter Avalos /// \brief Match finders 52940b44dSPeter Avalos /// 62940b44dSPeter Avalos // Authors: Igor Pavlov 72940b44dSPeter Avalos // Lasse Collin 82940b44dSPeter Avalos // 92940b44dSPeter Avalos // This file has been put into the public domain. 102940b44dSPeter Avalos // You can do whatever you want with this file. 112940b44dSPeter Avalos // 122940b44dSPeter Avalos /////////////////////////////////////////////////////////////////////////////// 132940b44dSPeter Avalos 142940b44dSPeter Avalos #include "lz_encoder.h" 152940b44dSPeter Avalos #include "lz_encoder_hash.h" 16*15ab8c86SJohn Marino #include "memcmplen.h" 172940b44dSPeter Avalos 182940b44dSPeter Avalos 192940b44dSPeter Avalos /// \brief Find matches starting from the current byte 202940b44dSPeter Avalos /// 212940b44dSPeter Avalos /// \return The length of the longest match found 222940b44dSPeter Avalos extern uint32_t 232940b44dSPeter Avalos lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) 242940b44dSPeter Avalos { 252940b44dSPeter Avalos // Call the match finder. It returns the number of length-distance 262940b44dSPeter Avalos // pairs found. 272940b44dSPeter Avalos // FIXME: Minimum count is zero, what _exactly_ is the maximum? 282940b44dSPeter Avalos const uint32_t count = mf->find(mf, matches); 292940b44dSPeter Avalos 302940b44dSPeter Avalos // Length of the longest match; assume that no matches were found 312940b44dSPeter Avalos // and thus the maximum length is zero. 322940b44dSPeter Avalos uint32_t len_best = 0; 332940b44dSPeter Avalos 342940b44dSPeter Avalos if (count > 0) { 352940b44dSPeter Avalos #ifndef NDEBUG 362940b44dSPeter Avalos // Validate the matches. 372940b44dSPeter Avalos for (uint32_t i = 0; i < count; ++i) { 382940b44dSPeter Avalos assert(matches[i].len <= mf->nice_len); 392940b44dSPeter Avalos assert(matches[i].dist < mf->read_pos); 402940b44dSPeter Avalos assert(memcmp(mf_ptr(mf) - 1, 412940b44dSPeter Avalos mf_ptr(mf) - matches[i].dist - 2, 422940b44dSPeter Avalos matches[i].len) == 0); 432940b44dSPeter Avalos } 442940b44dSPeter Avalos #endif 452940b44dSPeter Avalos 462940b44dSPeter Avalos // The last used element in the array contains 472940b44dSPeter Avalos // the longest match. 482940b44dSPeter Avalos len_best = matches[count - 1].len; 492940b44dSPeter Avalos 502940b44dSPeter Avalos // If a match of maximum search length was found, try to 512940b44dSPeter Avalos // extend the match to maximum possible length. 522940b44dSPeter Avalos if (len_best == mf->nice_len) { 532940b44dSPeter Avalos // The limit for the match length is either the 542940b44dSPeter Avalos // maximum match length supported by the LZ-based 552940b44dSPeter Avalos // encoder or the number of bytes left in the 562940b44dSPeter Avalos // dictionary, whichever is smaller. 572940b44dSPeter Avalos uint32_t limit = mf_avail(mf) + 1; 582940b44dSPeter Avalos if (limit > mf->match_len_max) 592940b44dSPeter Avalos limit = mf->match_len_max; 602940b44dSPeter Avalos 612940b44dSPeter Avalos // Pointer to the byte we just ran through 622940b44dSPeter Avalos // the match finder. 632940b44dSPeter Avalos const uint8_t *p1 = mf_ptr(mf) - 1; 642940b44dSPeter Avalos 652940b44dSPeter Avalos // Pointer to the beginning of the match. We need -1 662940b44dSPeter Avalos // here because the match distances are zero based. 672940b44dSPeter Avalos const uint8_t *p2 = p1 - matches[count - 1].dist - 1; 682940b44dSPeter Avalos 69*15ab8c86SJohn Marino len_best = lzma_memcmplen(p1, p2, len_best, limit); 702940b44dSPeter Avalos } 712940b44dSPeter Avalos } 722940b44dSPeter Avalos 732940b44dSPeter Avalos *count_ptr = count; 742940b44dSPeter Avalos 752940b44dSPeter Avalos // Finally update the read position to indicate that match finder was 762940b44dSPeter Avalos // run for this dictionary offset. 772940b44dSPeter Avalos ++mf->read_ahead; 782940b44dSPeter Avalos 792940b44dSPeter Avalos return len_best; 802940b44dSPeter Avalos } 812940b44dSPeter Avalos 822940b44dSPeter Avalos 832940b44dSPeter Avalos /// Hash value to indicate unused element in the hash. Since we start the 842940b44dSPeter Avalos /// positions from dict_size + 1, zero is always too far to qualify 852940b44dSPeter Avalos /// as usable match position. 862940b44dSPeter Avalos #define EMPTY_HASH_VALUE 0 872940b44dSPeter Avalos 882940b44dSPeter Avalos 892940b44dSPeter Avalos /// Normalization must be done when lzma_mf.offset + lzma_mf.read_pos 902940b44dSPeter Avalos /// reaches MUST_NORMALIZE_POS. 912940b44dSPeter Avalos #define MUST_NORMALIZE_POS UINT32_MAX 922940b44dSPeter Avalos 932940b44dSPeter Avalos 942940b44dSPeter Avalos /// \brief Normalizes hash values 952940b44dSPeter Avalos /// 962940b44dSPeter Avalos /// The hash arrays store positions of match candidates. The positions are 972940b44dSPeter Avalos /// relative to an arbitrary offset that is not the same as the absolute 982940b44dSPeter Avalos /// offset in the input stream. The relative position of the current byte 992940b44dSPeter Avalos /// is lzma_mf.offset + lzma_mf.read_pos. The distances of the matches are 1002940b44dSPeter Avalos /// the differences of the current read position and the position found from 1012940b44dSPeter Avalos /// the hash. 1022940b44dSPeter Avalos /// 1032940b44dSPeter Avalos /// To prevent integer overflows of the offsets stored in the hash arrays, 1042940b44dSPeter Avalos /// we need to "normalize" the stored values now and then. During the 1052940b44dSPeter Avalos /// normalization, we drop values that indicate distance greater than the 1062940b44dSPeter Avalos /// dictionary size, thus making space for new values. 1072940b44dSPeter Avalos static void 1082940b44dSPeter Avalos normalize(lzma_mf *mf) 1092940b44dSPeter Avalos { 1102940b44dSPeter Avalos assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); 1112940b44dSPeter Avalos 1122940b44dSPeter Avalos // In future we may not want to touch the lowest bits, because there 1132940b44dSPeter Avalos // may be match finders that use larger resolution than one byte. 1142940b44dSPeter Avalos const uint32_t subvalue 1152940b44dSPeter Avalos = (MUST_NORMALIZE_POS - mf->cyclic_size); 1162940b44dSPeter Avalos // & (~(UINT32_C(1) << 10) - 1); 1172940b44dSPeter Avalos 118*15ab8c86SJohn Marino for (uint32_t i = 0; i < mf->hash_count; ++i) { 1192940b44dSPeter Avalos // If the distance is greater than the dictionary size, 1202940b44dSPeter Avalos // we can simply mark the hash element as empty. 121*15ab8c86SJohn Marino if (mf->hash[i] <= subvalue) 122*15ab8c86SJohn Marino mf->hash[i] = EMPTY_HASH_VALUE; 1232940b44dSPeter Avalos else 124*15ab8c86SJohn Marino mf->hash[i] -= subvalue; 125*15ab8c86SJohn Marino } 126*15ab8c86SJohn Marino 127*15ab8c86SJohn Marino for (uint32_t i = 0; i < mf->sons_count; ++i) { 128*15ab8c86SJohn Marino // Do the same for mf->son. 129*15ab8c86SJohn Marino // 130*15ab8c86SJohn Marino // NOTE: There may be uninitialized elements in mf->son. 131*15ab8c86SJohn Marino // Valgrind may complain that the "if" below depends on 132*15ab8c86SJohn Marino // an uninitialized value. In this case it is safe to ignore 133*15ab8c86SJohn Marino // the warning. See also the comments in lz_encoder_init() 134*15ab8c86SJohn Marino // in lz_encoder.c. 135*15ab8c86SJohn Marino if (mf->son[i] <= subvalue) 136*15ab8c86SJohn Marino mf->son[i] = EMPTY_HASH_VALUE; 137*15ab8c86SJohn Marino else 138*15ab8c86SJohn Marino mf->son[i] -= subvalue; 1392940b44dSPeter Avalos } 1402940b44dSPeter Avalos 1412940b44dSPeter Avalos // Update offset to match the new locations. 1422940b44dSPeter Avalos mf->offset -= subvalue; 1432940b44dSPeter Avalos 1442940b44dSPeter Avalos return; 1452940b44dSPeter Avalos } 1462940b44dSPeter Avalos 1472940b44dSPeter Avalos 1482940b44dSPeter Avalos /// Mark the current byte as processed from point of view of the match finder. 1492940b44dSPeter Avalos static void 1502940b44dSPeter Avalos move_pos(lzma_mf *mf) 1512940b44dSPeter Avalos { 1522940b44dSPeter Avalos if (++mf->cyclic_pos == mf->cyclic_size) 1532940b44dSPeter Avalos mf->cyclic_pos = 0; 1542940b44dSPeter Avalos 1552940b44dSPeter Avalos ++mf->read_pos; 1562940b44dSPeter Avalos assert(mf->read_pos <= mf->write_pos); 1572940b44dSPeter Avalos 1582940b44dSPeter Avalos if (unlikely(mf->read_pos + mf->offset == UINT32_MAX)) 1592940b44dSPeter Avalos normalize(mf); 1602940b44dSPeter Avalos } 1612940b44dSPeter Avalos 1622940b44dSPeter Avalos 1632940b44dSPeter Avalos /// When flushing, we cannot run the match finder unless there is nice_len 1642940b44dSPeter Avalos /// bytes available in the dictionary. Instead, we skip running the match 1652940b44dSPeter Avalos /// finder (indicating that no match was found), and count how many bytes we 1662940b44dSPeter Avalos /// have ignored this way. 1672940b44dSPeter Avalos /// 1682940b44dSPeter Avalos /// When new data is given after the flushing was completed, the match finder 1692940b44dSPeter Avalos /// is restarted by rewinding mf->read_pos backwards by mf->pending. Then 1702940b44dSPeter Avalos /// the missed bytes are added to the hash using the match finder's skip 1712940b44dSPeter Avalos /// function (with small amount of input, it may start using mf->pending 1722940b44dSPeter Avalos /// again if flushing). 1732940b44dSPeter Avalos /// 1742940b44dSPeter Avalos /// Due to this rewinding, we don't touch cyclic_pos or test for 1752940b44dSPeter Avalos /// normalization. It will be done when the match finder's skip function 1762940b44dSPeter Avalos /// catches up after a flush. 1772940b44dSPeter Avalos static void 1782940b44dSPeter Avalos move_pending(lzma_mf *mf) 1792940b44dSPeter Avalos { 1802940b44dSPeter Avalos ++mf->read_pos; 1812940b44dSPeter Avalos assert(mf->read_pos <= mf->write_pos); 1822940b44dSPeter Avalos ++mf->pending; 1832940b44dSPeter Avalos } 1842940b44dSPeter Avalos 1852940b44dSPeter Avalos 1862940b44dSPeter Avalos /// Calculate len_limit and determine if there is enough input to run 1872940b44dSPeter Avalos /// the actual match finder code. Sets up "cur" and "pos". This macro 1882940b44dSPeter Avalos /// is used by all find functions and binary tree skip functions. Hash 1892940b44dSPeter Avalos /// chain skip function doesn't need len_limit so a simpler code is used 1902940b44dSPeter Avalos /// in them. 1912940b44dSPeter Avalos #define header(is_bt, len_min, ret_op) \ 1922940b44dSPeter Avalos uint32_t len_limit = mf_avail(mf); \ 1932940b44dSPeter Avalos if (mf->nice_len <= len_limit) { \ 1942940b44dSPeter Avalos len_limit = mf->nice_len; \ 1952940b44dSPeter Avalos } else if (len_limit < (len_min) \ 1962940b44dSPeter Avalos || (is_bt && mf->action == LZMA_SYNC_FLUSH)) { \ 1972940b44dSPeter Avalos assert(mf->action != LZMA_RUN); \ 1982940b44dSPeter Avalos move_pending(mf); \ 1992940b44dSPeter Avalos ret_op; \ 2002940b44dSPeter Avalos } \ 2012940b44dSPeter Avalos const uint8_t *cur = mf_ptr(mf); \ 2022940b44dSPeter Avalos const uint32_t pos = mf->read_pos + mf->offset 2032940b44dSPeter Avalos 2042940b44dSPeter Avalos 2052940b44dSPeter Avalos /// Header for find functions. "return 0" indicates that zero matches 2062940b44dSPeter Avalos /// were found. 2072940b44dSPeter Avalos #define header_find(is_bt, len_min) \ 2082940b44dSPeter Avalos header(is_bt, len_min, return 0); \ 2092940b44dSPeter Avalos uint32_t matches_count = 0 2102940b44dSPeter Avalos 2112940b44dSPeter Avalos 2122940b44dSPeter Avalos /// Header for a loop in a skip function. "continue" tells to skip the rest 2132940b44dSPeter Avalos /// of the code in the loop. 2142940b44dSPeter Avalos #define header_skip(is_bt, len_min) \ 2152940b44dSPeter Avalos header(is_bt, len_min, continue) 2162940b44dSPeter Avalos 2172940b44dSPeter Avalos 2182940b44dSPeter Avalos /// Calls hc_find_func() or bt_find_func() and calculates the total number 2192940b44dSPeter Avalos /// of matches found. Updates the dictionary position and returns the number 2202940b44dSPeter Avalos /// of matches found. 2212940b44dSPeter Avalos #define call_find(func, len_best) \ 2222940b44dSPeter Avalos do { \ 2232940b44dSPeter Avalos matches_count = func(len_limit, pos, cur, cur_match, mf->depth, \ 2242940b44dSPeter Avalos mf->son, mf->cyclic_pos, mf->cyclic_size, \ 2252940b44dSPeter Avalos matches + matches_count, len_best) \ 2262940b44dSPeter Avalos - matches; \ 2272940b44dSPeter Avalos move_pos(mf); \ 2282940b44dSPeter Avalos return matches_count; \ 2292940b44dSPeter Avalos } while (0) 2302940b44dSPeter Avalos 2312940b44dSPeter Avalos 2322940b44dSPeter Avalos //////////////// 2332940b44dSPeter Avalos // Hash Chain // 2342940b44dSPeter Avalos //////////////// 2352940b44dSPeter Avalos 2362940b44dSPeter Avalos #if defined(HAVE_MF_HC3) || defined(HAVE_MF_HC4) 2372940b44dSPeter Avalos /// 2382940b44dSPeter Avalos /// 2392940b44dSPeter Avalos /// \param len_limit Don't look for matches longer than len_limit. 2402940b44dSPeter Avalos /// \param pos lzma_mf.read_pos + lzma_mf.offset 2412940b44dSPeter Avalos /// \param cur Pointer to current byte (mf_ptr(mf)) 2422940b44dSPeter Avalos /// \param cur_match Start position of the current match candidate 2432940b44dSPeter Avalos /// \param depth Maximum length of the hash chain 2442940b44dSPeter Avalos /// \param son lzma_mf.son (contains the hash chain) 2452940b44dSPeter Avalos /// \param cyclic_pos 2462940b44dSPeter Avalos /// \param cyclic_size 2472940b44dSPeter Avalos /// \param matches Array to hold the matches. 2482940b44dSPeter Avalos /// \param len_best The length of the longest match found so far. 2492940b44dSPeter Avalos static lzma_match * 2502940b44dSPeter Avalos hc_find_func( 2512940b44dSPeter Avalos const uint32_t len_limit, 2522940b44dSPeter Avalos const uint32_t pos, 2532940b44dSPeter Avalos const uint8_t *const cur, 2542940b44dSPeter Avalos uint32_t cur_match, 2552940b44dSPeter Avalos uint32_t depth, 2562940b44dSPeter Avalos uint32_t *const son, 2572940b44dSPeter Avalos const uint32_t cyclic_pos, 2582940b44dSPeter Avalos const uint32_t cyclic_size, 2592940b44dSPeter Avalos lzma_match *matches, 2602940b44dSPeter Avalos uint32_t len_best) 2612940b44dSPeter Avalos { 2622940b44dSPeter Avalos son[cyclic_pos] = cur_match; 2632940b44dSPeter Avalos 2642940b44dSPeter Avalos while (true) { 2652940b44dSPeter Avalos const uint32_t delta = pos - cur_match; 2662940b44dSPeter Avalos if (depth-- == 0 || delta >= cyclic_size) 2672940b44dSPeter Avalos return matches; 2682940b44dSPeter Avalos 2692940b44dSPeter Avalos const uint8_t *const pb = cur - delta; 2702940b44dSPeter Avalos cur_match = son[cyclic_pos - delta 2712940b44dSPeter Avalos + (delta > cyclic_pos ? cyclic_size : 0)]; 2722940b44dSPeter Avalos 2732940b44dSPeter Avalos if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { 274*15ab8c86SJohn Marino uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit); 2752940b44dSPeter Avalos 2762940b44dSPeter Avalos if (len_best < len) { 2772940b44dSPeter Avalos len_best = len; 2782940b44dSPeter Avalos matches->len = len; 2792940b44dSPeter Avalos matches->dist = delta - 1; 2802940b44dSPeter Avalos ++matches; 2812940b44dSPeter Avalos 2822940b44dSPeter Avalos if (len == len_limit) 2832940b44dSPeter Avalos return matches; 2842940b44dSPeter Avalos } 2852940b44dSPeter Avalos } 2862940b44dSPeter Avalos } 2872940b44dSPeter Avalos } 2882940b44dSPeter Avalos 2892940b44dSPeter Avalos 2902940b44dSPeter Avalos #define hc_find(len_best) \ 2912940b44dSPeter Avalos call_find(hc_find_func, len_best) 2922940b44dSPeter Avalos 2932940b44dSPeter Avalos 2942940b44dSPeter Avalos #define hc_skip() \ 2952940b44dSPeter Avalos do { \ 2962940b44dSPeter Avalos mf->son[mf->cyclic_pos] = cur_match; \ 2972940b44dSPeter Avalos move_pos(mf); \ 2982940b44dSPeter Avalos } while (0) 2992940b44dSPeter Avalos 3002940b44dSPeter Avalos #endif 3012940b44dSPeter Avalos 3022940b44dSPeter Avalos 3032940b44dSPeter Avalos #ifdef HAVE_MF_HC3 3042940b44dSPeter Avalos extern uint32_t 3052940b44dSPeter Avalos lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches) 3062940b44dSPeter Avalos { 3072940b44dSPeter Avalos header_find(false, 3); 3082940b44dSPeter Avalos 3092940b44dSPeter Avalos hash_3_calc(); 3102940b44dSPeter Avalos 3112940b44dSPeter Avalos const uint32_t delta2 = pos - mf->hash[hash_2_value]; 3122940b44dSPeter Avalos const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; 3132940b44dSPeter Avalos 3142940b44dSPeter Avalos mf->hash[hash_2_value] = pos; 3152940b44dSPeter Avalos mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; 3162940b44dSPeter Avalos 3172940b44dSPeter Avalos uint32_t len_best = 2; 3182940b44dSPeter Avalos 3192940b44dSPeter Avalos if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { 320*15ab8c86SJohn Marino len_best = lzma_memcmplen(cur - delta2, cur, 321*15ab8c86SJohn Marino len_best, len_limit); 3222940b44dSPeter Avalos 3232940b44dSPeter Avalos matches[0].len = len_best; 3242940b44dSPeter Avalos matches[0].dist = delta2 - 1; 3252940b44dSPeter Avalos matches_count = 1; 3262940b44dSPeter Avalos 3272940b44dSPeter Avalos if (len_best == len_limit) { 3282940b44dSPeter Avalos hc_skip(); 3292940b44dSPeter Avalos return 1; // matches_count 3302940b44dSPeter Avalos } 3312940b44dSPeter Avalos } 3322940b44dSPeter Avalos 3332940b44dSPeter Avalos hc_find(len_best); 3342940b44dSPeter Avalos } 3352940b44dSPeter Avalos 3362940b44dSPeter Avalos 3372940b44dSPeter Avalos extern void 3382940b44dSPeter Avalos lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount) 3392940b44dSPeter Avalos { 3402940b44dSPeter Avalos do { 3412940b44dSPeter Avalos if (mf_avail(mf) < 3) { 3422940b44dSPeter Avalos move_pending(mf); 3432940b44dSPeter Avalos continue; 3442940b44dSPeter Avalos } 3452940b44dSPeter Avalos 3462940b44dSPeter Avalos const uint8_t *cur = mf_ptr(mf); 3472940b44dSPeter Avalos const uint32_t pos = mf->read_pos + mf->offset; 3482940b44dSPeter Avalos 3492940b44dSPeter Avalos hash_3_calc(); 3502940b44dSPeter Avalos 3512940b44dSPeter Avalos const uint32_t cur_match 3522940b44dSPeter Avalos = mf->hash[FIX_3_HASH_SIZE + hash_value]; 3532940b44dSPeter Avalos 3542940b44dSPeter Avalos mf->hash[hash_2_value] = pos; 3552940b44dSPeter Avalos mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; 3562940b44dSPeter Avalos 3572940b44dSPeter Avalos hc_skip(); 3582940b44dSPeter Avalos 3592940b44dSPeter Avalos } while (--amount != 0); 3602940b44dSPeter Avalos } 3612940b44dSPeter Avalos #endif 3622940b44dSPeter Avalos 3632940b44dSPeter Avalos 3642940b44dSPeter Avalos #ifdef HAVE_MF_HC4 3652940b44dSPeter Avalos extern uint32_t 3662940b44dSPeter Avalos lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches) 3672940b44dSPeter Avalos { 3682940b44dSPeter Avalos header_find(false, 4); 3692940b44dSPeter Avalos 3702940b44dSPeter Avalos hash_4_calc(); 3712940b44dSPeter Avalos 3722940b44dSPeter Avalos uint32_t delta2 = pos - mf->hash[hash_2_value]; 3732940b44dSPeter Avalos const uint32_t delta3 3742940b44dSPeter Avalos = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; 3752940b44dSPeter Avalos const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; 3762940b44dSPeter Avalos 3772940b44dSPeter Avalos mf->hash[hash_2_value ] = pos; 3782940b44dSPeter Avalos mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; 3792940b44dSPeter Avalos mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; 3802940b44dSPeter Avalos 3812940b44dSPeter Avalos uint32_t len_best = 1; 3822940b44dSPeter Avalos 3832940b44dSPeter Avalos if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { 3842940b44dSPeter Avalos len_best = 2; 3852940b44dSPeter Avalos matches[0].len = 2; 3862940b44dSPeter Avalos matches[0].dist = delta2 - 1; 3872940b44dSPeter Avalos matches_count = 1; 3882940b44dSPeter Avalos } 3892940b44dSPeter Avalos 3902940b44dSPeter Avalos if (delta2 != delta3 && delta3 < mf->cyclic_size 3912940b44dSPeter Avalos && *(cur - delta3) == *cur) { 3922940b44dSPeter Avalos len_best = 3; 3932940b44dSPeter Avalos matches[matches_count++].dist = delta3 - 1; 3942940b44dSPeter Avalos delta2 = delta3; 3952940b44dSPeter Avalos } 3962940b44dSPeter Avalos 3972940b44dSPeter Avalos if (matches_count != 0) { 398*15ab8c86SJohn Marino len_best = lzma_memcmplen(cur - delta2, cur, 399*15ab8c86SJohn Marino len_best, len_limit); 4002940b44dSPeter Avalos 4012940b44dSPeter Avalos matches[matches_count - 1].len = len_best; 4022940b44dSPeter Avalos 4032940b44dSPeter Avalos if (len_best == len_limit) { 4042940b44dSPeter Avalos hc_skip(); 4052940b44dSPeter Avalos return matches_count; 4062940b44dSPeter Avalos } 4072940b44dSPeter Avalos } 4082940b44dSPeter Avalos 4092940b44dSPeter Avalos if (len_best < 3) 4102940b44dSPeter Avalos len_best = 3; 4112940b44dSPeter Avalos 4122940b44dSPeter Avalos hc_find(len_best); 4132940b44dSPeter Avalos } 4142940b44dSPeter Avalos 4152940b44dSPeter Avalos 4162940b44dSPeter Avalos extern void 4172940b44dSPeter Avalos lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount) 4182940b44dSPeter Avalos { 4192940b44dSPeter Avalos do { 4202940b44dSPeter Avalos if (mf_avail(mf) < 4) { 4212940b44dSPeter Avalos move_pending(mf); 4222940b44dSPeter Avalos continue; 4232940b44dSPeter Avalos } 4242940b44dSPeter Avalos 4252940b44dSPeter Avalos const uint8_t *cur = mf_ptr(mf); 4262940b44dSPeter Avalos const uint32_t pos = mf->read_pos + mf->offset; 4272940b44dSPeter Avalos 4282940b44dSPeter Avalos hash_4_calc(); 4292940b44dSPeter Avalos 4302940b44dSPeter Avalos const uint32_t cur_match 4312940b44dSPeter Avalos = mf->hash[FIX_4_HASH_SIZE + hash_value]; 4322940b44dSPeter Avalos 4332940b44dSPeter Avalos mf->hash[hash_2_value] = pos; 4342940b44dSPeter Avalos mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; 4352940b44dSPeter Avalos mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; 4362940b44dSPeter Avalos 4372940b44dSPeter Avalos hc_skip(); 4382940b44dSPeter Avalos 4392940b44dSPeter Avalos } while (--amount != 0); 4402940b44dSPeter Avalos } 4412940b44dSPeter Avalos #endif 4422940b44dSPeter Avalos 4432940b44dSPeter Avalos 4442940b44dSPeter Avalos ///////////////// 4452940b44dSPeter Avalos // Binary Tree // 4462940b44dSPeter Avalos ///////////////// 4472940b44dSPeter Avalos 4482940b44dSPeter Avalos #if defined(HAVE_MF_BT2) || defined(HAVE_MF_BT3) || defined(HAVE_MF_BT4) 4492940b44dSPeter Avalos static lzma_match * 4502940b44dSPeter Avalos bt_find_func( 4512940b44dSPeter Avalos const uint32_t len_limit, 4522940b44dSPeter Avalos const uint32_t pos, 4532940b44dSPeter Avalos const uint8_t *const cur, 4542940b44dSPeter Avalos uint32_t cur_match, 4552940b44dSPeter Avalos uint32_t depth, 4562940b44dSPeter Avalos uint32_t *const son, 4572940b44dSPeter Avalos const uint32_t cyclic_pos, 4582940b44dSPeter Avalos const uint32_t cyclic_size, 4592940b44dSPeter Avalos lzma_match *matches, 4602940b44dSPeter Avalos uint32_t len_best) 4612940b44dSPeter Avalos { 4622940b44dSPeter Avalos uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; 4632940b44dSPeter Avalos uint32_t *ptr1 = son + (cyclic_pos << 1); 4642940b44dSPeter Avalos 4652940b44dSPeter Avalos uint32_t len0 = 0; 4662940b44dSPeter Avalos uint32_t len1 = 0; 4672940b44dSPeter Avalos 4682940b44dSPeter Avalos while (true) { 4692940b44dSPeter Avalos const uint32_t delta = pos - cur_match; 4702940b44dSPeter Avalos if (depth-- == 0 || delta >= cyclic_size) { 4712940b44dSPeter Avalos *ptr0 = EMPTY_HASH_VALUE; 4722940b44dSPeter Avalos *ptr1 = EMPTY_HASH_VALUE; 4732940b44dSPeter Avalos return matches; 4742940b44dSPeter Avalos } 4752940b44dSPeter Avalos 4762940b44dSPeter Avalos uint32_t *const pair = son + ((cyclic_pos - delta 4772940b44dSPeter Avalos + (delta > cyclic_pos ? cyclic_size : 0)) 4782940b44dSPeter Avalos << 1); 4792940b44dSPeter Avalos 4802940b44dSPeter Avalos const uint8_t *const pb = cur - delta; 4812940b44dSPeter Avalos uint32_t len = my_min(len0, len1); 4822940b44dSPeter Avalos 4832940b44dSPeter Avalos if (pb[len] == cur[len]) { 484*15ab8c86SJohn Marino len = lzma_memcmplen(pb, cur, len + 1, len_limit); 4852940b44dSPeter Avalos 4862940b44dSPeter Avalos if (len_best < len) { 4872940b44dSPeter Avalos len_best = len; 4882940b44dSPeter Avalos matches->len = len; 4892940b44dSPeter Avalos matches->dist = delta - 1; 4902940b44dSPeter Avalos ++matches; 4912940b44dSPeter Avalos 4922940b44dSPeter Avalos if (len == len_limit) { 4932940b44dSPeter Avalos *ptr1 = pair[0]; 4942940b44dSPeter Avalos *ptr0 = pair[1]; 4952940b44dSPeter Avalos return matches; 4962940b44dSPeter Avalos } 4972940b44dSPeter Avalos } 4982940b44dSPeter Avalos } 4992940b44dSPeter Avalos 5002940b44dSPeter Avalos if (pb[len] < cur[len]) { 5012940b44dSPeter Avalos *ptr1 = cur_match; 5022940b44dSPeter Avalos ptr1 = pair + 1; 5032940b44dSPeter Avalos cur_match = *ptr1; 5042940b44dSPeter Avalos len1 = len; 5052940b44dSPeter Avalos } else { 5062940b44dSPeter Avalos *ptr0 = cur_match; 5072940b44dSPeter Avalos ptr0 = pair; 5082940b44dSPeter Avalos cur_match = *ptr0; 5092940b44dSPeter Avalos len0 = len; 5102940b44dSPeter Avalos } 5112940b44dSPeter Avalos } 5122940b44dSPeter Avalos } 5132940b44dSPeter Avalos 5142940b44dSPeter Avalos 5152940b44dSPeter Avalos static void 5162940b44dSPeter Avalos bt_skip_func( 5172940b44dSPeter Avalos const uint32_t len_limit, 5182940b44dSPeter Avalos const uint32_t pos, 5192940b44dSPeter Avalos const uint8_t *const cur, 5202940b44dSPeter Avalos uint32_t cur_match, 5212940b44dSPeter Avalos uint32_t depth, 5222940b44dSPeter Avalos uint32_t *const son, 5232940b44dSPeter Avalos const uint32_t cyclic_pos, 5242940b44dSPeter Avalos const uint32_t cyclic_size) 5252940b44dSPeter Avalos { 5262940b44dSPeter Avalos uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; 5272940b44dSPeter Avalos uint32_t *ptr1 = son + (cyclic_pos << 1); 5282940b44dSPeter Avalos 5292940b44dSPeter Avalos uint32_t len0 = 0; 5302940b44dSPeter Avalos uint32_t len1 = 0; 5312940b44dSPeter Avalos 5322940b44dSPeter Avalos while (true) { 5332940b44dSPeter Avalos const uint32_t delta = pos - cur_match; 5342940b44dSPeter Avalos if (depth-- == 0 || delta >= cyclic_size) { 5352940b44dSPeter Avalos *ptr0 = EMPTY_HASH_VALUE; 5362940b44dSPeter Avalos *ptr1 = EMPTY_HASH_VALUE; 5372940b44dSPeter Avalos return; 5382940b44dSPeter Avalos } 5392940b44dSPeter Avalos 5402940b44dSPeter Avalos uint32_t *pair = son + ((cyclic_pos - delta 5412940b44dSPeter Avalos + (delta > cyclic_pos ? cyclic_size : 0)) 5422940b44dSPeter Avalos << 1); 5432940b44dSPeter Avalos const uint8_t *pb = cur - delta; 5442940b44dSPeter Avalos uint32_t len = my_min(len0, len1); 5452940b44dSPeter Avalos 5462940b44dSPeter Avalos if (pb[len] == cur[len]) { 547*15ab8c86SJohn Marino len = lzma_memcmplen(pb, cur, len + 1, len_limit); 5482940b44dSPeter Avalos 5492940b44dSPeter Avalos if (len == len_limit) { 5502940b44dSPeter Avalos *ptr1 = pair[0]; 5512940b44dSPeter Avalos *ptr0 = pair[1]; 5522940b44dSPeter Avalos return; 5532940b44dSPeter Avalos } 5542940b44dSPeter Avalos } 5552940b44dSPeter Avalos 5562940b44dSPeter Avalos if (pb[len] < cur[len]) { 5572940b44dSPeter Avalos *ptr1 = cur_match; 5582940b44dSPeter Avalos ptr1 = pair + 1; 5592940b44dSPeter Avalos cur_match = *ptr1; 5602940b44dSPeter Avalos len1 = len; 5612940b44dSPeter Avalos } else { 5622940b44dSPeter Avalos *ptr0 = cur_match; 5632940b44dSPeter Avalos ptr0 = pair; 5642940b44dSPeter Avalos cur_match = *ptr0; 5652940b44dSPeter Avalos len0 = len; 5662940b44dSPeter Avalos } 5672940b44dSPeter Avalos } 5682940b44dSPeter Avalos } 5692940b44dSPeter Avalos 5702940b44dSPeter Avalos 5712940b44dSPeter Avalos #define bt_find(len_best) \ 5722940b44dSPeter Avalos call_find(bt_find_func, len_best) 5732940b44dSPeter Avalos 5742940b44dSPeter Avalos #define bt_skip() \ 5752940b44dSPeter Avalos do { \ 5762940b44dSPeter Avalos bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, \ 5772940b44dSPeter Avalos mf->son, mf->cyclic_pos, \ 5782940b44dSPeter Avalos mf->cyclic_size); \ 5792940b44dSPeter Avalos move_pos(mf); \ 5802940b44dSPeter Avalos } while (0) 5812940b44dSPeter Avalos 5822940b44dSPeter Avalos #endif 5832940b44dSPeter Avalos 5842940b44dSPeter Avalos 5852940b44dSPeter Avalos #ifdef HAVE_MF_BT2 5862940b44dSPeter Avalos extern uint32_t 5872940b44dSPeter Avalos lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches) 5882940b44dSPeter Avalos { 5892940b44dSPeter Avalos header_find(true, 2); 5902940b44dSPeter Avalos 5912940b44dSPeter Avalos hash_2_calc(); 5922940b44dSPeter Avalos 5932940b44dSPeter Avalos const uint32_t cur_match = mf->hash[hash_value]; 5942940b44dSPeter Avalos mf->hash[hash_value] = pos; 5952940b44dSPeter Avalos 5962940b44dSPeter Avalos bt_find(1); 5972940b44dSPeter Avalos } 5982940b44dSPeter Avalos 5992940b44dSPeter Avalos 6002940b44dSPeter Avalos extern void 6012940b44dSPeter Avalos lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount) 6022940b44dSPeter Avalos { 6032940b44dSPeter Avalos do { 6042940b44dSPeter Avalos header_skip(true, 2); 6052940b44dSPeter Avalos 6062940b44dSPeter Avalos hash_2_calc(); 6072940b44dSPeter Avalos 6082940b44dSPeter Avalos const uint32_t cur_match = mf->hash[hash_value]; 6092940b44dSPeter Avalos mf->hash[hash_value] = pos; 6102940b44dSPeter Avalos 6112940b44dSPeter Avalos bt_skip(); 6122940b44dSPeter Avalos 6132940b44dSPeter Avalos } while (--amount != 0); 6142940b44dSPeter Avalos } 6152940b44dSPeter Avalos #endif 6162940b44dSPeter Avalos 6172940b44dSPeter Avalos 6182940b44dSPeter Avalos #ifdef HAVE_MF_BT3 6192940b44dSPeter Avalos extern uint32_t 6202940b44dSPeter Avalos lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches) 6212940b44dSPeter Avalos { 6222940b44dSPeter Avalos header_find(true, 3); 6232940b44dSPeter Avalos 6242940b44dSPeter Avalos hash_3_calc(); 6252940b44dSPeter Avalos 6262940b44dSPeter Avalos const uint32_t delta2 = pos - mf->hash[hash_2_value]; 6272940b44dSPeter Avalos const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; 6282940b44dSPeter Avalos 6292940b44dSPeter Avalos mf->hash[hash_2_value] = pos; 6302940b44dSPeter Avalos mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; 6312940b44dSPeter Avalos 6322940b44dSPeter Avalos uint32_t len_best = 2; 6332940b44dSPeter Avalos 6342940b44dSPeter Avalos if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { 635*15ab8c86SJohn Marino len_best = lzma_memcmplen( 636*15ab8c86SJohn Marino cur, cur - delta2, len_best, len_limit); 6372940b44dSPeter Avalos 6382940b44dSPeter Avalos matches[0].len = len_best; 6392940b44dSPeter Avalos matches[0].dist = delta2 - 1; 6402940b44dSPeter Avalos matches_count = 1; 6412940b44dSPeter Avalos 6422940b44dSPeter Avalos if (len_best == len_limit) { 6432940b44dSPeter Avalos bt_skip(); 6442940b44dSPeter Avalos return 1; // matches_count 6452940b44dSPeter Avalos } 6462940b44dSPeter Avalos } 6472940b44dSPeter Avalos 6482940b44dSPeter Avalos bt_find(len_best); 6492940b44dSPeter Avalos } 6502940b44dSPeter Avalos 6512940b44dSPeter Avalos 6522940b44dSPeter Avalos extern void 6532940b44dSPeter Avalos lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount) 6542940b44dSPeter Avalos { 6552940b44dSPeter Avalos do { 6562940b44dSPeter Avalos header_skip(true, 3); 6572940b44dSPeter Avalos 6582940b44dSPeter Avalos hash_3_calc(); 6592940b44dSPeter Avalos 6602940b44dSPeter Avalos const uint32_t cur_match 6612940b44dSPeter Avalos = mf->hash[FIX_3_HASH_SIZE + hash_value]; 6622940b44dSPeter Avalos 6632940b44dSPeter Avalos mf->hash[hash_2_value] = pos; 6642940b44dSPeter Avalos mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; 6652940b44dSPeter Avalos 6662940b44dSPeter Avalos bt_skip(); 6672940b44dSPeter Avalos 6682940b44dSPeter Avalos } while (--amount != 0); 6692940b44dSPeter Avalos } 6702940b44dSPeter Avalos #endif 6712940b44dSPeter Avalos 6722940b44dSPeter Avalos 6732940b44dSPeter Avalos #ifdef HAVE_MF_BT4 6742940b44dSPeter Avalos extern uint32_t 6752940b44dSPeter Avalos lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches) 6762940b44dSPeter Avalos { 6772940b44dSPeter Avalos header_find(true, 4); 6782940b44dSPeter Avalos 6792940b44dSPeter Avalos hash_4_calc(); 6802940b44dSPeter Avalos 6812940b44dSPeter Avalos uint32_t delta2 = pos - mf->hash[hash_2_value]; 6822940b44dSPeter Avalos const uint32_t delta3 6832940b44dSPeter Avalos = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; 6842940b44dSPeter Avalos const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; 6852940b44dSPeter Avalos 6862940b44dSPeter Avalos mf->hash[hash_2_value] = pos; 6872940b44dSPeter Avalos mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; 6882940b44dSPeter Avalos mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; 6892940b44dSPeter Avalos 6902940b44dSPeter Avalos uint32_t len_best = 1; 6912940b44dSPeter Avalos 6922940b44dSPeter Avalos if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { 6932940b44dSPeter Avalos len_best = 2; 6942940b44dSPeter Avalos matches[0].len = 2; 6952940b44dSPeter Avalos matches[0].dist = delta2 - 1; 6962940b44dSPeter Avalos matches_count = 1; 6972940b44dSPeter Avalos } 6982940b44dSPeter Avalos 6992940b44dSPeter Avalos if (delta2 != delta3 && delta3 < mf->cyclic_size 7002940b44dSPeter Avalos && *(cur - delta3) == *cur) { 7012940b44dSPeter Avalos len_best = 3; 7022940b44dSPeter Avalos matches[matches_count++].dist = delta3 - 1; 7032940b44dSPeter Avalos delta2 = delta3; 7042940b44dSPeter Avalos } 7052940b44dSPeter Avalos 7062940b44dSPeter Avalos if (matches_count != 0) { 707*15ab8c86SJohn Marino len_best = lzma_memcmplen( 708*15ab8c86SJohn Marino cur, cur - delta2, len_best, len_limit); 7092940b44dSPeter Avalos 7102940b44dSPeter Avalos matches[matches_count - 1].len = len_best; 7112940b44dSPeter Avalos 7122940b44dSPeter Avalos if (len_best == len_limit) { 7132940b44dSPeter Avalos bt_skip(); 7142940b44dSPeter Avalos return matches_count; 7152940b44dSPeter Avalos } 7162940b44dSPeter Avalos } 7172940b44dSPeter Avalos 7182940b44dSPeter Avalos if (len_best < 3) 7192940b44dSPeter Avalos len_best = 3; 7202940b44dSPeter Avalos 7212940b44dSPeter Avalos bt_find(len_best); 7222940b44dSPeter Avalos } 7232940b44dSPeter Avalos 7242940b44dSPeter Avalos 7252940b44dSPeter Avalos extern void 7262940b44dSPeter Avalos lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount) 7272940b44dSPeter Avalos { 7282940b44dSPeter Avalos do { 7292940b44dSPeter Avalos header_skip(true, 4); 7302940b44dSPeter Avalos 7312940b44dSPeter Avalos hash_4_calc(); 7322940b44dSPeter Avalos 7332940b44dSPeter Avalos const uint32_t cur_match 7342940b44dSPeter Avalos = mf->hash[FIX_4_HASH_SIZE + hash_value]; 7352940b44dSPeter Avalos 7362940b44dSPeter Avalos mf->hash[hash_2_value] = pos; 7372940b44dSPeter Avalos mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; 7382940b44dSPeter Avalos mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; 7392940b44dSPeter Avalos 7402940b44dSPeter Avalos bt_skip(); 7412940b44dSPeter Avalos 7422940b44dSPeter Avalos } while (--amount != 0); 7432940b44dSPeter Avalos } 7442940b44dSPeter Avalos #endif 745