12940b44dSPeter Avalos ///////////////////////////////////////////////////////////////////////////////
22940b44dSPeter Avalos //
32940b44dSPeter Avalos /// \file lzma_decoder.c
42940b44dSPeter Avalos /// \brief LZMA decoder
52940b44dSPeter Avalos ///
62940b44dSPeter Avalos // Authors: Igor Pavlov
72940b44dSPeter Avalos // Lasse Collin
82940b44dSPeter Avalos //
92940b44dSPeter Avalos // This file has been put into the public domain.
102940b44dSPeter Avalos // You can do whatever you want with this file.
112940b44dSPeter Avalos //
122940b44dSPeter Avalos ///////////////////////////////////////////////////////////////////////////////
132940b44dSPeter Avalos
142940b44dSPeter Avalos #include "lz_decoder.h"
152940b44dSPeter Avalos #include "lzma_common.h"
162940b44dSPeter Avalos #include "lzma_decoder.h"
172940b44dSPeter Avalos #include "range_decoder.h"
182940b44dSPeter Avalos
1946a2189dSzrj // The macros unroll loops with switch statements.
2046a2189dSzrj // Silence warnings about missing fall-through comments.
2146a2189dSzrj #if TUKLIB_GNUC_REQ(7, 0)
2246a2189dSzrj # pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
2346a2189dSzrj #endif
2446a2189dSzrj
252940b44dSPeter Avalos
262940b44dSPeter Avalos #ifdef HAVE_SMALL
272940b44dSPeter Avalos
282940b44dSPeter Avalos // Macros for (somewhat) size-optimized code.
292940b44dSPeter Avalos #define seq_4(seq) seq
302940b44dSPeter Avalos
312940b44dSPeter Avalos #define seq_6(seq) seq
322940b44dSPeter Avalos
332940b44dSPeter Avalos #define seq_8(seq) seq
342940b44dSPeter Avalos
352940b44dSPeter Avalos #define seq_len(seq) \
362940b44dSPeter Avalos seq ## _CHOICE, \
372940b44dSPeter Avalos seq ## _CHOICE2, \
382940b44dSPeter Avalos seq ## _BITTREE
392940b44dSPeter Avalos
402940b44dSPeter Avalos #define len_decode(target, ld, pos_state, seq) \
412940b44dSPeter Avalos do { \
422940b44dSPeter Avalos case seq ## _CHOICE: \
432940b44dSPeter Avalos rc_if_0(ld.choice, seq ## _CHOICE) { \
442940b44dSPeter Avalos rc_update_0(ld.choice); \
452940b44dSPeter Avalos probs = ld.low[pos_state];\
462940b44dSPeter Avalos limit = LEN_LOW_SYMBOLS; \
472940b44dSPeter Avalos target = MATCH_LEN_MIN; \
482940b44dSPeter Avalos } else { \
492940b44dSPeter Avalos rc_update_1(ld.choice); \
502940b44dSPeter Avalos case seq ## _CHOICE2: \
512940b44dSPeter Avalos rc_if_0(ld.choice2, seq ## _CHOICE2) { \
522940b44dSPeter Avalos rc_update_0(ld.choice2); \
532940b44dSPeter Avalos probs = ld.mid[pos_state]; \
542940b44dSPeter Avalos limit = LEN_MID_SYMBOLS; \
552940b44dSPeter Avalos target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
562940b44dSPeter Avalos } else { \
572940b44dSPeter Avalos rc_update_1(ld.choice2); \
582940b44dSPeter Avalos probs = ld.high; \
592940b44dSPeter Avalos limit = LEN_HIGH_SYMBOLS; \
602940b44dSPeter Avalos target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
612940b44dSPeter Avalos + LEN_MID_SYMBOLS; \
622940b44dSPeter Avalos } \
632940b44dSPeter Avalos } \
642940b44dSPeter Avalos symbol = 1; \
652940b44dSPeter Avalos case seq ## _BITTREE: \
662940b44dSPeter Avalos do { \
672940b44dSPeter Avalos rc_bit(probs[symbol], , , seq ## _BITTREE); \
682940b44dSPeter Avalos } while (symbol < limit); \
692940b44dSPeter Avalos target += symbol - limit; \
702940b44dSPeter Avalos } while (0)
712940b44dSPeter Avalos
722940b44dSPeter Avalos #else // HAVE_SMALL
732940b44dSPeter Avalos
742940b44dSPeter Avalos // Unrolled versions
752940b44dSPeter Avalos #define seq_4(seq) \
762940b44dSPeter Avalos seq ## 0, \
772940b44dSPeter Avalos seq ## 1, \
782940b44dSPeter Avalos seq ## 2, \
792940b44dSPeter Avalos seq ## 3
802940b44dSPeter Avalos
812940b44dSPeter Avalos #define seq_6(seq) \
822940b44dSPeter Avalos seq ## 0, \
832940b44dSPeter Avalos seq ## 1, \
842940b44dSPeter Avalos seq ## 2, \
852940b44dSPeter Avalos seq ## 3, \
862940b44dSPeter Avalos seq ## 4, \
872940b44dSPeter Avalos seq ## 5
882940b44dSPeter Avalos
892940b44dSPeter Avalos #define seq_8(seq) \
902940b44dSPeter Avalos seq ## 0, \
912940b44dSPeter Avalos seq ## 1, \
922940b44dSPeter Avalos seq ## 2, \
932940b44dSPeter Avalos seq ## 3, \
942940b44dSPeter Avalos seq ## 4, \
952940b44dSPeter Avalos seq ## 5, \
962940b44dSPeter Avalos seq ## 6, \
972940b44dSPeter Avalos seq ## 7
982940b44dSPeter Avalos
992940b44dSPeter Avalos #define seq_len(seq) \
1002940b44dSPeter Avalos seq ## _CHOICE, \
1012940b44dSPeter Avalos seq ## _LOW0, \
1022940b44dSPeter Avalos seq ## _LOW1, \
1032940b44dSPeter Avalos seq ## _LOW2, \
1042940b44dSPeter Avalos seq ## _CHOICE2, \
1052940b44dSPeter Avalos seq ## _MID0, \
1062940b44dSPeter Avalos seq ## _MID1, \
1072940b44dSPeter Avalos seq ## _MID2, \
1082940b44dSPeter Avalos seq ## _HIGH0, \
1092940b44dSPeter Avalos seq ## _HIGH1, \
1102940b44dSPeter Avalos seq ## _HIGH2, \
1112940b44dSPeter Avalos seq ## _HIGH3, \
1122940b44dSPeter Avalos seq ## _HIGH4, \
1132940b44dSPeter Avalos seq ## _HIGH5, \
1142940b44dSPeter Avalos seq ## _HIGH6, \
1152940b44dSPeter Avalos seq ## _HIGH7
1162940b44dSPeter Avalos
1172940b44dSPeter Avalos #define len_decode(target, ld, pos_state, seq) \
1182940b44dSPeter Avalos do { \
1192940b44dSPeter Avalos symbol = 1; \
1202940b44dSPeter Avalos case seq ## _CHOICE: \
1212940b44dSPeter Avalos rc_if_0(ld.choice, seq ## _CHOICE) { \
1222940b44dSPeter Avalos rc_update_0(ld.choice); \
1232940b44dSPeter Avalos rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
1242940b44dSPeter Avalos rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
1252940b44dSPeter Avalos rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
1262940b44dSPeter Avalos target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
1272940b44dSPeter Avalos } else { \
1282940b44dSPeter Avalos rc_update_1(ld.choice); \
1292940b44dSPeter Avalos case seq ## _CHOICE2: \
1302940b44dSPeter Avalos rc_if_0(ld.choice2, seq ## _CHOICE2) { \
1312940b44dSPeter Avalos rc_update_0(ld.choice2); \
1322940b44dSPeter Avalos rc_bit_case(ld.mid[pos_state][symbol], , , \
1332940b44dSPeter Avalos seq ## _MID0); \
1342940b44dSPeter Avalos rc_bit_case(ld.mid[pos_state][symbol], , , \
1352940b44dSPeter Avalos seq ## _MID1); \
1362940b44dSPeter Avalos rc_bit_case(ld.mid[pos_state][symbol], , , \
1372940b44dSPeter Avalos seq ## _MID2); \
1382940b44dSPeter Avalos target = symbol - LEN_MID_SYMBOLS \
1392940b44dSPeter Avalos + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
1402940b44dSPeter Avalos } else { \
1412940b44dSPeter Avalos rc_update_1(ld.choice2); \
1422940b44dSPeter Avalos rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
1432940b44dSPeter Avalos rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
1442940b44dSPeter Avalos rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
1452940b44dSPeter Avalos rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
1462940b44dSPeter Avalos rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
1472940b44dSPeter Avalos rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
1482940b44dSPeter Avalos rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
1492940b44dSPeter Avalos rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
1502940b44dSPeter Avalos target = symbol - LEN_HIGH_SYMBOLS \
1512940b44dSPeter Avalos + MATCH_LEN_MIN \
1522940b44dSPeter Avalos + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
1532940b44dSPeter Avalos } \
1542940b44dSPeter Avalos } \
1552940b44dSPeter Avalos } while (0)
1562940b44dSPeter Avalos
1572940b44dSPeter Avalos #endif // HAVE_SMALL
1582940b44dSPeter Avalos
1592940b44dSPeter Avalos
1602940b44dSPeter Avalos /// Length decoder probabilities; see comments in lzma_common.h.
1612940b44dSPeter Avalos typedef struct {
1622940b44dSPeter Avalos probability choice;
1632940b44dSPeter Avalos probability choice2;
1642940b44dSPeter Avalos probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
1652940b44dSPeter Avalos probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
1662940b44dSPeter Avalos probability high[LEN_HIGH_SYMBOLS];
1672940b44dSPeter Avalos } lzma_length_decoder;
1682940b44dSPeter Avalos
1692940b44dSPeter Avalos
17046a2189dSzrj typedef struct {
1712940b44dSPeter Avalos ///////////////////
1722940b44dSPeter Avalos // Probabilities //
1732940b44dSPeter Avalos ///////////////////
1742940b44dSPeter Avalos
1752940b44dSPeter Avalos /// Literals; see comments in lzma_common.h.
1762940b44dSPeter Avalos probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
1772940b44dSPeter Avalos
1782940b44dSPeter Avalos /// If 1, it's a match. Otherwise it's a single 8-bit literal.
1792940b44dSPeter Avalos probability is_match[STATES][POS_STATES_MAX];
1802940b44dSPeter Avalos
1812940b44dSPeter Avalos /// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
1822940b44dSPeter Avalos probability is_rep[STATES];
1832940b44dSPeter Avalos
1842940b44dSPeter Avalos /// If 0, distance of a repeated match is rep0.
1852940b44dSPeter Avalos /// Otherwise check is_rep1.
1862940b44dSPeter Avalos probability is_rep0[STATES];
1872940b44dSPeter Avalos
1882940b44dSPeter Avalos /// If 0, distance of a repeated match is rep1.
1892940b44dSPeter Avalos /// Otherwise check is_rep2.
1902940b44dSPeter Avalos probability is_rep1[STATES];
1912940b44dSPeter Avalos
1922940b44dSPeter Avalos /// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
1932940b44dSPeter Avalos probability is_rep2[STATES];
1942940b44dSPeter Avalos
1952940b44dSPeter Avalos /// If 1, the repeated match has length of one byte. Otherwise
1962940b44dSPeter Avalos /// the length is decoded from rep_len_decoder.
1972940b44dSPeter Avalos probability is_rep0_long[STATES][POS_STATES_MAX];
1982940b44dSPeter Avalos
1992940b44dSPeter Avalos /// Probability tree for the highest two bits of the match distance.
2002940b44dSPeter Avalos /// There is a separate probability tree for match lengths of
2012940b44dSPeter Avalos /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
20215ab8c86SJohn Marino probability dist_slot[DIST_STATES][DIST_SLOTS];
2032940b44dSPeter Avalos
2042940b44dSPeter Avalos /// Probability trees for additional bits for match distance when the
2052940b44dSPeter Avalos /// distance is in the range [4, 127].
20615ab8c86SJohn Marino probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
2072940b44dSPeter Avalos
2082940b44dSPeter Avalos /// Probability tree for the lowest four bits of a match distance
2092940b44dSPeter Avalos /// that is equal to or greater than 128.
21015ab8c86SJohn Marino probability pos_align[ALIGN_SIZE];
2112940b44dSPeter Avalos
2122940b44dSPeter Avalos /// Length of a normal match
2132940b44dSPeter Avalos lzma_length_decoder match_len_decoder;
2142940b44dSPeter Avalos
2152940b44dSPeter Avalos /// Length of a repeated match
2162940b44dSPeter Avalos lzma_length_decoder rep_len_decoder;
2172940b44dSPeter Avalos
2182940b44dSPeter Avalos ///////////////////
2192940b44dSPeter Avalos // Decoder state //
2202940b44dSPeter Avalos ///////////////////
2212940b44dSPeter Avalos
2222940b44dSPeter Avalos // Range coder
2232940b44dSPeter Avalos lzma_range_decoder rc;
2242940b44dSPeter Avalos
2252940b44dSPeter Avalos // Types of the most recently seen LZMA symbols
2262940b44dSPeter Avalos lzma_lzma_state state;
2272940b44dSPeter Avalos
2282940b44dSPeter Avalos uint32_t rep0; ///< Distance of the latest match
2292940b44dSPeter Avalos uint32_t rep1; ///< Distance of second latest match
2302940b44dSPeter Avalos uint32_t rep2; ///< Distance of third latest match
2312940b44dSPeter Avalos uint32_t rep3; ///< Distance of fourth latest match
2322940b44dSPeter Avalos
2332940b44dSPeter Avalos uint32_t pos_mask; // (1U << pb) - 1
2342940b44dSPeter Avalos uint32_t literal_context_bits;
2352940b44dSPeter Avalos uint32_t literal_pos_mask;
2362940b44dSPeter Avalos
2372940b44dSPeter Avalos /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
2382940b44dSPeter Avalos /// payload marker is expected.
2392940b44dSPeter Avalos lzma_vli uncompressed_size;
2402940b44dSPeter Avalos
2412940b44dSPeter Avalos ////////////////////////////////
2422940b44dSPeter Avalos // State of incomplete symbol //
2432940b44dSPeter Avalos ////////////////////////////////
2442940b44dSPeter Avalos
2452940b44dSPeter Avalos /// Position where to continue the decoder loop
2462940b44dSPeter Avalos enum {
2472940b44dSPeter Avalos SEQ_NORMALIZE,
2482940b44dSPeter Avalos SEQ_IS_MATCH,
2492940b44dSPeter Avalos seq_8(SEQ_LITERAL),
2502940b44dSPeter Avalos seq_8(SEQ_LITERAL_MATCHED),
2512940b44dSPeter Avalos SEQ_LITERAL_WRITE,
2522940b44dSPeter Avalos SEQ_IS_REP,
2532940b44dSPeter Avalos seq_len(SEQ_MATCH_LEN),
25415ab8c86SJohn Marino seq_6(SEQ_DIST_SLOT),
25515ab8c86SJohn Marino SEQ_DIST_MODEL,
2562940b44dSPeter Avalos SEQ_DIRECT,
2572940b44dSPeter Avalos seq_4(SEQ_ALIGN),
2582940b44dSPeter Avalos SEQ_EOPM,
2592940b44dSPeter Avalos SEQ_IS_REP0,
2602940b44dSPeter Avalos SEQ_SHORTREP,
2612940b44dSPeter Avalos SEQ_IS_REP0_LONG,
2622940b44dSPeter Avalos SEQ_IS_REP1,
2632940b44dSPeter Avalos SEQ_IS_REP2,
2642940b44dSPeter Avalos seq_len(SEQ_REP_LEN),
2652940b44dSPeter Avalos SEQ_COPY,
2662940b44dSPeter Avalos } sequence;
2672940b44dSPeter Avalos
2682940b44dSPeter Avalos /// Base of the current probability tree
2692940b44dSPeter Avalos probability *probs;
2702940b44dSPeter Avalos
2712940b44dSPeter Avalos /// Symbol being decoded. This is also used as an index variable in
2722940b44dSPeter Avalos /// bittree decoders: probs[symbol]
2732940b44dSPeter Avalos uint32_t symbol;
2742940b44dSPeter Avalos
2752940b44dSPeter Avalos /// Used as a loop termination condition on bittree decoders and
2762940b44dSPeter Avalos /// direct bits decoder.
2772940b44dSPeter Avalos uint32_t limit;
2782940b44dSPeter Avalos
2792940b44dSPeter Avalos /// Matched literal decoder: 0x100 or 0 to help avoiding branches.
2802940b44dSPeter Avalos /// Bittree reverse decoders: Offset of the next bit: 1 << offset
2812940b44dSPeter Avalos uint32_t offset;
2822940b44dSPeter Avalos
2832940b44dSPeter Avalos /// If decoding a literal: match byte.
2842940b44dSPeter Avalos /// If decoding a match: length of the match.
2852940b44dSPeter Avalos uint32_t len;
28646a2189dSzrj } lzma_lzma1_decoder;
2872940b44dSPeter Avalos
2882940b44dSPeter Avalos
2892940b44dSPeter Avalos static lzma_ret
lzma_decode(void * coder_ptr,lzma_dict * restrict dictptr,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size)29046a2189dSzrj lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
2912940b44dSPeter Avalos const uint8_t *restrict in,
2922940b44dSPeter Avalos size_t *restrict in_pos, size_t in_size)
2932940b44dSPeter Avalos {
29446a2189dSzrj lzma_lzma1_decoder *restrict coder = coder_ptr;
29546a2189dSzrj
2962940b44dSPeter Avalos ////////////////////
2972940b44dSPeter Avalos // Initialization //
2982940b44dSPeter Avalos ////////////////////
2992940b44dSPeter Avalos
30015ab8c86SJohn Marino {
30115ab8c86SJohn Marino const lzma_ret ret = rc_read_init(
30215ab8c86SJohn Marino &coder->rc, in, in_pos, in_size);
30315ab8c86SJohn Marino if (ret != LZMA_STREAM_END)
30415ab8c86SJohn Marino return ret;
30515ab8c86SJohn Marino }
3062940b44dSPeter Avalos
3072940b44dSPeter Avalos ///////////////
3082940b44dSPeter Avalos // Variables //
3092940b44dSPeter Avalos ///////////////
3102940b44dSPeter Avalos
3112940b44dSPeter Avalos // Making local copies of often-used variables improves both
3122940b44dSPeter Avalos // speed and readability.
3132940b44dSPeter Avalos
3142940b44dSPeter Avalos lzma_dict dict = *dictptr;
3152940b44dSPeter Avalos
3162940b44dSPeter Avalos const size_t dict_start = dict.pos;
3172940b44dSPeter Avalos
3182940b44dSPeter Avalos // Range decoder
3192940b44dSPeter Avalos rc_to_local(coder->rc, *in_pos);
3202940b44dSPeter Avalos
3212940b44dSPeter Avalos // State
3222940b44dSPeter Avalos uint32_t state = coder->state;
3232940b44dSPeter Avalos uint32_t rep0 = coder->rep0;
3242940b44dSPeter Avalos uint32_t rep1 = coder->rep1;
3252940b44dSPeter Avalos uint32_t rep2 = coder->rep2;
3262940b44dSPeter Avalos uint32_t rep3 = coder->rep3;
3272940b44dSPeter Avalos
3282940b44dSPeter Avalos const uint32_t pos_mask = coder->pos_mask;
3292940b44dSPeter Avalos
3302940b44dSPeter Avalos // These variables are actually needed only if we last time ran
3312940b44dSPeter Avalos // out of input in the middle of the decoder loop.
3322940b44dSPeter Avalos probability *probs = coder->probs;
3332940b44dSPeter Avalos uint32_t symbol = coder->symbol;
3342940b44dSPeter Avalos uint32_t limit = coder->limit;
3352940b44dSPeter Avalos uint32_t offset = coder->offset;
3362940b44dSPeter Avalos uint32_t len = coder->len;
3372940b44dSPeter Avalos
3382940b44dSPeter Avalos const uint32_t literal_pos_mask = coder->literal_pos_mask;
3392940b44dSPeter Avalos const uint32_t literal_context_bits = coder->literal_context_bits;
3402940b44dSPeter Avalos
3412940b44dSPeter Avalos // Temporary variables
3422940b44dSPeter Avalos uint32_t pos_state = dict.pos & pos_mask;
3432940b44dSPeter Avalos
3442940b44dSPeter Avalos lzma_ret ret = LZMA_OK;
3452940b44dSPeter Avalos
3462940b44dSPeter Avalos // If uncompressed size is known, there must be no end of payload
3472940b44dSPeter Avalos // marker.
3482940b44dSPeter Avalos const bool no_eopm = coder->uncompressed_size
3492940b44dSPeter Avalos != LZMA_VLI_UNKNOWN;
3502940b44dSPeter Avalos if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
3512940b44dSPeter Avalos dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
3522940b44dSPeter Avalos
3532940b44dSPeter Avalos // The main decoder loop. The "switch" is used to restart the decoder at
3542940b44dSPeter Avalos // correct location. Once restarted, the "switch" is no longer used.
3552940b44dSPeter Avalos switch (coder->sequence)
3562940b44dSPeter Avalos while (true) {
3572940b44dSPeter Avalos // Calculate new pos_state. This is skipped on the first loop
3582940b44dSPeter Avalos // since we already calculated it when setting up the local
3592940b44dSPeter Avalos // variables.
3602940b44dSPeter Avalos pos_state = dict.pos & pos_mask;
3612940b44dSPeter Avalos
3622940b44dSPeter Avalos case SEQ_NORMALIZE:
3632940b44dSPeter Avalos case SEQ_IS_MATCH:
3642940b44dSPeter Avalos if (unlikely(no_eopm && dict.pos == dict.limit))
3652940b44dSPeter Avalos break;
3662940b44dSPeter Avalos
3672940b44dSPeter Avalos rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
3682940b44dSPeter Avalos rc_update_0(coder->is_match[state][pos_state]);
3692940b44dSPeter Avalos
3702940b44dSPeter Avalos // It's a literal i.e. a single 8-bit byte.
3712940b44dSPeter Avalos
3722940b44dSPeter Avalos probs = literal_subcoder(coder->literal,
3732940b44dSPeter Avalos literal_context_bits, literal_pos_mask,
3742940b44dSPeter Avalos dict.pos, dict_get(&dict, 0));
3752940b44dSPeter Avalos symbol = 1;
3762940b44dSPeter Avalos
3772940b44dSPeter Avalos if (is_literal_state(state)) {
3782940b44dSPeter Avalos // Decode literal without match byte.
3792940b44dSPeter Avalos #ifdef HAVE_SMALL
3802940b44dSPeter Avalos case SEQ_LITERAL:
3812940b44dSPeter Avalos do {
3822940b44dSPeter Avalos rc_bit(probs[symbol], , , SEQ_LITERAL);
3832940b44dSPeter Avalos } while (symbol < (1 << 8));
3842940b44dSPeter Avalos #else
3852940b44dSPeter Avalos rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
3862940b44dSPeter Avalos rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
3872940b44dSPeter Avalos rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
3882940b44dSPeter Avalos rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
3892940b44dSPeter Avalos rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
3902940b44dSPeter Avalos rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
3912940b44dSPeter Avalos rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
3922940b44dSPeter Avalos rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
3932940b44dSPeter Avalos #endif
3942940b44dSPeter Avalos } else {
3952940b44dSPeter Avalos // Decode literal with match byte.
3962940b44dSPeter Avalos //
3972940b44dSPeter Avalos // We store the byte we compare against
3982940b44dSPeter Avalos // ("match byte") to "len" to minimize the
3992940b44dSPeter Avalos // number of variables we need to store
4002940b44dSPeter Avalos // between decoder calls.
401*e151908bSDaniel Fojt len = (uint32_t)(dict_get(&dict, rep0)) << 1;
4022940b44dSPeter Avalos
4032940b44dSPeter Avalos // The usage of "offset" allows omitting some
4042940b44dSPeter Avalos // branches, which should give tiny speed
4052940b44dSPeter Avalos // improvement on some CPUs. "offset" gets
4062940b44dSPeter Avalos // set to zero if match_bit didn't match.
4072940b44dSPeter Avalos offset = 0x100;
4082940b44dSPeter Avalos
4092940b44dSPeter Avalos #ifdef HAVE_SMALL
4102940b44dSPeter Avalos case SEQ_LITERAL_MATCHED:
4112940b44dSPeter Avalos do {
4122940b44dSPeter Avalos const uint32_t match_bit
4132940b44dSPeter Avalos = len & offset;
4142940b44dSPeter Avalos const uint32_t subcoder_index
4152940b44dSPeter Avalos = offset + match_bit
4162940b44dSPeter Avalos + symbol;
4172940b44dSPeter Avalos
4182940b44dSPeter Avalos rc_bit(probs[subcoder_index],
4192940b44dSPeter Avalos offset &= ~match_bit,
4202940b44dSPeter Avalos offset &= match_bit,
4212940b44dSPeter Avalos SEQ_LITERAL_MATCHED);
4222940b44dSPeter Avalos
4232940b44dSPeter Avalos // It seems to be faster to do this
4242940b44dSPeter Avalos // here instead of putting it to the
4252940b44dSPeter Avalos // beginning of the loop and then
4262940b44dSPeter Avalos // putting the "case" in the middle
4272940b44dSPeter Avalos // of the loop.
4282940b44dSPeter Avalos len <<= 1;
4292940b44dSPeter Avalos
4302940b44dSPeter Avalos } while (symbol < (1 << 8));
4312940b44dSPeter Avalos #else
4322940b44dSPeter Avalos // Unroll the loop.
4332940b44dSPeter Avalos uint32_t match_bit;
4342940b44dSPeter Avalos uint32_t subcoder_index;
4352940b44dSPeter Avalos
4362940b44dSPeter Avalos # define d(seq) \
4372940b44dSPeter Avalos case seq: \
4382940b44dSPeter Avalos match_bit = len & offset; \
4392940b44dSPeter Avalos subcoder_index = offset + match_bit + symbol; \
4402940b44dSPeter Avalos rc_bit(probs[subcoder_index], \
4412940b44dSPeter Avalos offset &= ~match_bit, \
4422940b44dSPeter Avalos offset &= match_bit, \
4432940b44dSPeter Avalos seq)
4442940b44dSPeter Avalos
4452940b44dSPeter Avalos d(SEQ_LITERAL_MATCHED0);
4462940b44dSPeter Avalos len <<= 1;
4472940b44dSPeter Avalos d(SEQ_LITERAL_MATCHED1);
4482940b44dSPeter Avalos len <<= 1;
4492940b44dSPeter Avalos d(SEQ_LITERAL_MATCHED2);
4502940b44dSPeter Avalos len <<= 1;
4512940b44dSPeter Avalos d(SEQ_LITERAL_MATCHED3);
4522940b44dSPeter Avalos len <<= 1;
4532940b44dSPeter Avalos d(SEQ_LITERAL_MATCHED4);
4542940b44dSPeter Avalos len <<= 1;
4552940b44dSPeter Avalos d(SEQ_LITERAL_MATCHED5);
4562940b44dSPeter Avalos len <<= 1;
4572940b44dSPeter Avalos d(SEQ_LITERAL_MATCHED6);
4582940b44dSPeter Avalos len <<= 1;
4592940b44dSPeter Avalos d(SEQ_LITERAL_MATCHED7);
4602940b44dSPeter Avalos # undef d
4612940b44dSPeter Avalos #endif
4622940b44dSPeter Avalos }
4632940b44dSPeter Avalos
4642940b44dSPeter Avalos //update_literal(state);
4652940b44dSPeter Avalos // Use a lookup table to update to literal state,
4662940b44dSPeter Avalos // since compared to other state updates, this would
4672940b44dSPeter Avalos // need two branches.
4682940b44dSPeter Avalos static const lzma_lzma_state next_state[] = {
4692940b44dSPeter Avalos STATE_LIT_LIT,
4702940b44dSPeter Avalos STATE_LIT_LIT,
4712940b44dSPeter Avalos STATE_LIT_LIT,
4722940b44dSPeter Avalos STATE_LIT_LIT,
4732940b44dSPeter Avalos STATE_MATCH_LIT_LIT,
4742940b44dSPeter Avalos STATE_REP_LIT_LIT,
4752940b44dSPeter Avalos STATE_SHORTREP_LIT_LIT,
4762940b44dSPeter Avalos STATE_MATCH_LIT,
4772940b44dSPeter Avalos STATE_REP_LIT,
4782940b44dSPeter Avalos STATE_SHORTREP_LIT,
4792940b44dSPeter Avalos STATE_MATCH_LIT,
4802940b44dSPeter Avalos STATE_REP_LIT
4812940b44dSPeter Avalos };
4822940b44dSPeter Avalos state = next_state[state];
4832940b44dSPeter Avalos
4842940b44dSPeter Avalos case SEQ_LITERAL_WRITE:
4852940b44dSPeter Avalos if (unlikely(dict_put(&dict, symbol))) {
4862940b44dSPeter Avalos coder->sequence = SEQ_LITERAL_WRITE;
4872940b44dSPeter Avalos goto out;
4882940b44dSPeter Avalos }
4892940b44dSPeter Avalos
4902940b44dSPeter Avalos continue;
4912940b44dSPeter Avalos }
4922940b44dSPeter Avalos
4932940b44dSPeter Avalos // Instead of a new byte we are going to get a byte range
4942940b44dSPeter Avalos // (distance and length) which will be repeated from our
4952940b44dSPeter Avalos // output history.
4962940b44dSPeter Avalos
4972940b44dSPeter Avalos rc_update_1(coder->is_match[state][pos_state]);
4982940b44dSPeter Avalos
4992940b44dSPeter Avalos case SEQ_IS_REP:
5002940b44dSPeter Avalos rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
5012940b44dSPeter Avalos // Not a repeated match
5022940b44dSPeter Avalos rc_update_0(coder->is_rep[state]);
5032940b44dSPeter Avalos update_match(state);
5042940b44dSPeter Avalos
5052940b44dSPeter Avalos // The latest three match distances are kept in
5062940b44dSPeter Avalos // memory in case there are repeated matches.
5072940b44dSPeter Avalos rep3 = rep2;
5082940b44dSPeter Avalos rep2 = rep1;
5092940b44dSPeter Avalos rep1 = rep0;
5102940b44dSPeter Avalos
5112940b44dSPeter Avalos // Decode the length of the match.
5122940b44dSPeter Avalos len_decode(len, coder->match_len_decoder,
5132940b44dSPeter Avalos pos_state, SEQ_MATCH_LEN);
5142940b44dSPeter Avalos
5152940b44dSPeter Avalos // Prepare to decode the highest two bits of the
5162940b44dSPeter Avalos // match distance.
51715ab8c86SJohn Marino probs = coder->dist_slot[get_dist_state(len)];
5182940b44dSPeter Avalos symbol = 1;
5192940b44dSPeter Avalos
5202940b44dSPeter Avalos #ifdef HAVE_SMALL
52115ab8c86SJohn Marino case SEQ_DIST_SLOT:
5222940b44dSPeter Avalos do {
52315ab8c86SJohn Marino rc_bit(probs[symbol], , , SEQ_DIST_SLOT);
52415ab8c86SJohn Marino } while (symbol < DIST_SLOTS);
5252940b44dSPeter Avalos #else
52615ab8c86SJohn Marino rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0);
52715ab8c86SJohn Marino rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1);
52815ab8c86SJohn Marino rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2);
52915ab8c86SJohn Marino rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3);
53015ab8c86SJohn Marino rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4);
53115ab8c86SJohn Marino rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5);
5322940b44dSPeter Avalos #endif
5332940b44dSPeter Avalos // Get rid of the highest bit that was needed for
5342940b44dSPeter Avalos // indexing of the probability array.
53515ab8c86SJohn Marino symbol -= DIST_SLOTS;
5362940b44dSPeter Avalos assert(symbol <= 63);
5372940b44dSPeter Avalos
53815ab8c86SJohn Marino if (symbol < DIST_MODEL_START) {
5392940b44dSPeter Avalos // Match distances [0, 3] have only two bits.
5402940b44dSPeter Avalos rep0 = symbol;
5412940b44dSPeter Avalos } else {
5422940b44dSPeter Avalos // Decode the lowest [1, 29] bits of
5432940b44dSPeter Avalos // the match distance.
5442940b44dSPeter Avalos limit = (symbol >> 1) - 1;
5452940b44dSPeter Avalos assert(limit >= 1 && limit <= 30);
5462940b44dSPeter Avalos rep0 = 2 + (symbol & 1);
5472940b44dSPeter Avalos
54815ab8c86SJohn Marino if (symbol < DIST_MODEL_END) {
5492940b44dSPeter Avalos // Prepare to decode the low bits for
5502940b44dSPeter Avalos // a distance of [4, 127].
5512940b44dSPeter Avalos assert(limit <= 5);
5522940b44dSPeter Avalos rep0 <<= limit;
5532940b44dSPeter Avalos assert(rep0 <= 96);
5542940b44dSPeter Avalos // -1 is fine, because we start
5552940b44dSPeter Avalos // decoding at probs[1], not probs[0].
5562940b44dSPeter Avalos // NOTE: This violates the C standard,
5572940b44dSPeter Avalos // since we are doing pointer
5582940b44dSPeter Avalos // arithmetic past the beginning of
5592940b44dSPeter Avalos // the array.
5602940b44dSPeter Avalos assert((int32_t)(rep0 - symbol - 1)
5612940b44dSPeter Avalos >= -1);
5622940b44dSPeter Avalos assert((int32_t)(rep0 - symbol - 1)
5632940b44dSPeter Avalos <= 82);
5642940b44dSPeter Avalos probs = coder->pos_special + rep0
5652940b44dSPeter Avalos - symbol - 1;
5662940b44dSPeter Avalos symbol = 1;
5672940b44dSPeter Avalos offset = 0;
56815ab8c86SJohn Marino case SEQ_DIST_MODEL:
5692940b44dSPeter Avalos #ifdef HAVE_SMALL
5702940b44dSPeter Avalos do {
5712940b44dSPeter Avalos rc_bit(probs[symbol], ,
572*e151908bSDaniel Fojt rep0 += 1U << offset,
57315ab8c86SJohn Marino SEQ_DIST_MODEL);
5742940b44dSPeter Avalos } while (++offset < limit);
5752940b44dSPeter Avalos #else
5762940b44dSPeter Avalos switch (limit) {
5772940b44dSPeter Avalos case 5:
5782940b44dSPeter Avalos assert(offset == 0);
5792940b44dSPeter Avalos rc_bit(probs[symbol], ,
580*e151908bSDaniel Fojt rep0 += 1U,
58115ab8c86SJohn Marino SEQ_DIST_MODEL);
5822940b44dSPeter Avalos ++offset;
5832940b44dSPeter Avalos --limit;
5842940b44dSPeter Avalos case 4:
5852940b44dSPeter Avalos rc_bit(probs[symbol], ,
586*e151908bSDaniel Fojt rep0 += 1U << offset,
58715ab8c86SJohn Marino SEQ_DIST_MODEL);
5882940b44dSPeter Avalos ++offset;
5892940b44dSPeter Avalos --limit;
5902940b44dSPeter Avalos case 3:
5912940b44dSPeter Avalos rc_bit(probs[symbol], ,
592*e151908bSDaniel Fojt rep0 += 1U << offset,
59315ab8c86SJohn Marino SEQ_DIST_MODEL);
5942940b44dSPeter Avalos ++offset;
5952940b44dSPeter Avalos --limit;
5962940b44dSPeter Avalos case 2:
5972940b44dSPeter Avalos rc_bit(probs[symbol], ,
598*e151908bSDaniel Fojt rep0 += 1U << offset,
59915ab8c86SJohn Marino SEQ_DIST_MODEL);
6002940b44dSPeter Avalos ++offset;
6012940b44dSPeter Avalos --limit;
6022940b44dSPeter Avalos case 1:
6032940b44dSPeter Avalos // We need "symbol" only for
6042940b44dSPeter Avalos // indexing the probability
6052940b44dSPeter Avalos // array, thus we can use
6062940b44dSPeter Avalos // rc_bit_last() here to omit
6072940b44dSPeter Avalos // the unneeded updating of
6082940b44dSPeter Avalos // "symbol".
6092940b44dSPeter Avalos rc_bit_last(probs[symbol], ,
610*e151908bSDaniel Fojt rep0 += 1U << offset,
61115ab8c86SJohn Marino SEQ_DIST_MODEL);
6122940b44dSPeter Avalos }
6132940b44dSPeter Avalos #endif
6142940b44dSPeter Avalos } else {
6152940b44dSPeter Avalos // The distance is >= 128. Decode the
6162940b44dSPeter Avalos // lower bits without probabilities
6172940b44dSPeter Avalos // except the lowest four bits.
6182940b44dSPeter Avalos assert(symbol >= 14);
6192940b44dSPeter Avalos assert(limit >= 6);
6202940b44dSPeter Avalos limit -= ALIGN_BITS;
6212940b44dSPeter Avalos assert(limit >= 2);
6222940b44dSPeter Avalos case SEQ_DIRECT:
6232940b44dSPeter Avalos // Not worth manual unrolling
6242940b44dSPeter Avalos do {
6252940b44dSPeter Avalos rc_direct(rep0, SEQ_DIRECT);
6262940b44dSPeter Avalos } while (--limit > 0);
6272940b44dSPeter Avalos
6282940b44dSPeter Avalos // Decode the lowest four bits using
6292940b44dSPeter Avalos // probabilities.
6302940b44dSPeter Avalos rep0 <<= ALIGN_BITS;
6312940b44dSPeter Avalos symbol = 1;
6322940b44dSPeter Avalos #ifdef HAVE_SMALL
6332940b44dSPeter Avalos offset = 0;
6342940b44dSPeter Avalos case SEQ_ALIGN:
6352940b44dSPeter Avalos do {
6362940b44dSPeter Avalos rc_bit(coder->pos_align[
6372940b44dSPeter Avalos symbol], ,
638*e151908bSDaniel Fojt rep0 += 1U << offset,
6392940b44dSPeter Avalos SEQ_ALIGN);
6402940b44dSPeter Avalos } while (++offset < ALIGN_BITS);
6412940b44dSPeter Avalos #else
6422940b44dSPeter Avalos case SEQ_ALIGN0:
6432940b44dSPeter Avalos rc_bit(coder->pos_align[symbol], ,
6442940b44dSPeter Avalos rep0 += 1, SEQ_ALIGN0);
6452940b44dSPeter Avalos case SEQ_ALIGN1:
6462940b44dSPeter Avalos rc_bit(coder->pos_align[symbol], ,
6472940b44dSPeter Avalos rep0 += 2, SEQ_ALIGN1);
6482940b44dSPeter Avalos case SEQ_ALIGN2:
6492940b44dSPeter Avalos rc_bit(coder->pos_align[symbol], ,
6502940b44dSPeter Avalos rep0 += 4, SEQ_ALIGN2);
6512940b44dSPeter Avalos case SEQ_ALIGN3:
65215ab8c86SJohn Marino // Like in SEQ_DIST_MODEL, we don't
6532940b44dSPeter Avalos // need "symbol" for anything else
6542940b44dSPeter Avalos // than indexing the probability array.
6552940b44dSPeter Avalos rc_bit_last(coder->pos_align[symbol], ,
6562940b44dSPeter Avalos rep0 += 8, SEQ_ALIGN3);
6572940b44dSPeter Avalos #endif
6582940b44dSPeter Avalos
6592940b44dSPeter Avalos if (rep0 == UINT32_MAX) {
6602940b44dSPeter Avalos // End of payload marker was
6612940b44dSPeter Avalos // found. It must not be
6622940b44dSPeter Avalos // present if uncompressed
6632940b44dSPeter Avalos // size is known.
6642940b44dSPeter Avalos if (coder->uncompressed_size
6652940b44dSPeter Avalos != LZMA_VLI_UNKNOWN) {
6662940b44dSPeter Avalos ret = LZMA_DATA_ERROR;
6672940b44dSPeter Avalos goto out;
6682940b44dSPeter Avalos }
6692940b44dSPeter Avalos
6702940b44dSPeter Avalos case SEQ_EOPM:
6712940b44dSPeter Avalos // LZMA1 stream with
6722940b44dSPeter Avalos // end-of-payload marker.
6732940b44dSPeter Avalos rc_normalize(SEQ_EOPM);
6742940b44dSPeter Avalos ret = LZMA_STREAM_END;
6752940b44dSPeter Avalos goto out;
6762940b44dSPeter Avalos }
6772940b44dSPeter Avalos }
6782940b44dSPeter Avalos }
6792940b44dSPeter Avalos
6802940b44dSPeter Avalos // Validate the distance we just decoded.
6812940b44dSPeter Avalos if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
6822940b44dSPeter Avalos ret = LZMA_DATA_ERROR;
6832940b44dSPeter Avalos goto out;
6842940b44dSPeter Avalos }
6852940b44dSPeter Avalos
6862940b44dSPeter Avalos } else {
6872940b44dSPeter Avalos rc_update_1(coder->is_rep[state]);
6882940b44dSPeter Avalos
6892940b44dSPeter Avalos // Repeated match
6902940b44dSPeter Avalos //
6912940b44dSPeter Avalos // The match distance is a value that we have had
6922940b44dSPeter Avalos // earlier. The latest four match distances are
6932940b44dSPeter Avalos // available as rep0, rep1, rep2 and rep3. We will
6942940b44dSPeter Avalos // now decode which of them is the new distance.
6952940b44dSPeter Avalos //
6962940b44dSPeter Avalos // There cannot be a match if we haven't produced
6972940b44dSPeter Avalos // any output, so check that first.
6982940b44dSPeter Avalos if (unlikely(!dict_is_distance_valid(&dict, 0))) {
6992940b44dSPeter Avalos ret = LZMA_DATA_ERROR;
7002940b44dSPeter Avalos goto out;
7012940b44dSPeter Avalos }
7022940b44dSPeter Avalos
7032940b44dSPeter Avalos case SEQ_IS_REP0:
7042940b44dSPeter Avalos rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
7052940b44dSPeter Avalos rc_update_0(coder->is_rep0[state]);
7062940b44dSPeter Avalos // The distance is rep0.
7072940b44dSPeter Avalos
7082940b44dSPeter Avalos case SEQ_IS_REP0_LONG:
7092940b44dSPeter Avalos rc_if_0(coder->is_rep0_long[state][pos_state],
7102940b44dSPeter Avalos SEQ_IS_REP0_LONG) {
7112940b44dSPeter Avalos rc_update_0(coder->is_rep0_long[
7122940b44dSPeter Avalos state][pos_state]);
7132940b44dSPeter Avalos
7142940b44dSPeter Avalos update_short_rep(state);
7152940b44dSPeter Avalos
7162940b44dSPeter Avalos case SEQ_SHORTREP:
7172940b44dSPeter Avalos if (unlikely(dict_put(&dict, dict_get(
7182940b44dSPeter Avalos &dict, rep0)))) {
7192940b44dSPeter Avalos coder->sequence = SEQ_SHORTREP;
7202940b44dSPeter Avalos goto out;
7212940b44dSPeter Avalos }
7222940b44dSPeter Avalos
7232940b44dSPeter Avalos continue;
7242940b44dSPeter Avalos }
7252940b44dSPeter Avalos
7262940b44dSPeter Avalos // Repeating more than one byte at
7272940b44dSPeter Avalos // distance of rep0.
7282940b44dSPeter Avalos rc_update_1(coder->is_rep0_long[
7292940b44dSPeter Avalos state][pos_state]);
7302940b44dSPeter Avalos
7312940b44dSPeter Avalos } else {
7322940b44dSPeter Avalos rc_update_1(coder->is_rep0[state]);
7332940b44dSPeter Avalos
7342940b44dSPeter Avalos case SEQ_IS_REP1:
7352940b44dSPeter Avalos // The distance is rep1, rep2 or rep3. Once
7362940b44dSPeter Avalos // we find out which one of these three, it
7372940b44dSPeter Avalos // is stored to rep0 and rep1, rep2 and rep3
7382940b44dSPeter Avalos // are updated accordingly.
7392940b44dSPeter Avalos rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
7402940b44dSPeter Avalos rc_update_0(coder->is_rep1[state]);
7412940b44dSPeter Avalos
7422940b44dSPeter Avalos const uint32_t distance = rep1;
7432940b44dSPeter Avalos rep1 = rep0;
7442940b44dSPeter Avalos rep0 = distance;
7452940b44dSPeter Avalos
7462940b44dSPeter Avalos } else {
7472940b44dSPeter Avalos rc_update_1(coder->is_rep1[state]);
7482940b44dSPeter Avalos case SEQ_IS_REP2:
7492940b44dSPeter Avalos rc_if_0(coder->is_rep2[state],
7502940b44dSPeter Avalos SEQ_IS_REP2) {
7512940b44dSPeter Avalos rc_update_0(coder->is_rep2[
7522940b44dSPeter Avalos state]);
7532940b44dSPeter Avalos
7542940b44dSPeter Avalos const uint32_t distance = rep2;
7552940b44dSPeter Avalos rep2 = rep1;
7562940b44dSPeter Avalos rep1 = rep0;
7572940b44dSPeter Avalos rep0 = distance;
7582940b44dSPeter Avalos
7592940b44dSPeter Avalos } else {
7602940b44dSPeter Avalos rc_update_1(coder->is_rep2[
7612940b44dSPeter Avalos state]);
7622940b44dSPeter Avalos
7632940b44dSPeter Avalos const uint32_t distance = rep3;
7642940b44dSPeter Avalos rep3 = rep2;
7652940b44dSPeter Avalos rep2 = rep1;
7662940b44dSPeter Avalos rep1 = rep0;
7672940b44dSPeter Avalos rep0 = distance;
7682940b44dSPeter Avalos }
7692940b44dSPeter Avalos }
7702940b44dSPeter Avalos }
7712940b44dSPeter Avalos
7722940b44dSPeter Avalos update_long_rep(state);
7732940b44dSPeter Avalos
7742940b44dSPeter Avalos // Decode the length of the repeated match.
7752940b44dSPeter Avalos len_decode(len, coder->rep_len_decoder,
7762940b44dSPeter Avalos pos_state, SEQ_REP_LEN);
7772940b44dSPeter Avalos }
7782940b44dSPeter Avalos
7792940b44dSPeter Avalos /////////////////////////////////
7802940b44dSPeter Avalos // Repeat from history buffer. //
7812940b44dSPeter Avalos /////////////////////////////////
7822940b44dSPeter Avalos
7832940b44dSPeter Avalos // The length is always between these limits. There is no way
7842940b44dSPeter Avalos // to trigger the algorithm to set len outside this range.
7852940b44dSPeter Avalos assert(len >= MATCH_LEN_MIN);
7862940b44dSPeter Avalos assert(len <= MATCH_LEN_MAX);
7872940b44dSPeter Avalos
7882940b44dSPeter Avalos case SEQ_COPY:
7892940b44dSPeter Avalos // Repeat len bytes from distance of rep0.
7902940b44dSPeter Avalos if (unlikely(dict_repeat(&dict, rep0, &len))) {
7912940b44dSPeter Avalos coder->sequence = SEQ_COPY;
7922940b44dSPeter Avalos goto out;
7932940b44dSPeter Avalos }
7942940b44dSPeter Avalos }
7952940b44dSPeter Avalos
7962940b44dSPeter Avalos rc_normalize(SEQ_NORMALIZE);
7972940b44dSPeter Avalos coder->sequence = SEQ_IS_MATCH;
7982940b44dSPeter Avalos
7992940b44dSPeter Avalos out:
8002940b44dSPeter Avalos // Save state
8012940b44dSPeter Avalos
8022940b44dSPeter Avalos // NOTE: Must not copy dict.limit.
8032940b44dSPeter Avalos dictptr->pos = dict.pos;
8042940b44dSPeter Avalos dictptr->full = dict.full;
8052940b44dSPeter Avalos
8062940b44dSPeter Avalos rc_from_local(coder->rc, *in_pos);
8072940b44dSPeter Avalos
8082940b44dSPeter Avalos coder->state = state;
8092940b44dSPeter Avalos coder->rep0 = rep0;
8102940b44dSPeter Avalos coder->rep1 = rep1;
8112940b44dSPeter Avalos coder->rep2 = rep2;
8122940b44dSPeter Avalos coder->rep3 = rep3;
8132940b44dSPeter Avalos
8142940b44dSPeter Avalos coder->probs = probs;
8152940b44dSPeter Avalos coder->symbol = symbol;
8162940b44dSPeter Avalos coder->limit = limit;
8172940b44dSPeter Avalos coder->offset = offset;
8182940b44dSPeter Avalos coder->len = len;
8192940b44dSPeter Avalos
8202940b44dSPeter Avalos // Update the remaining amount of uncompressed data if uncompressed
8212940b44dSPeter Avalos // size was known.
8222940b44dSPeter Avalos if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
8232940b44dSPeter Avalos coder->uncompressed_size -= dict.pos - dict_start;
8242940b44dSPeter Avalos
8252940b44dSPeter Avalos // Since there cannot be end of payload marker if the
8262940b44dSPeter Avalos // uncompressed size was known, we check here if we
8272940b44dSPeter Avalos // finished decoding.
8282940b44dSPeter Avalos if (coder->uncompressed_size == 0 && ret == LZMA_OK
8292940b44dSPeter Avalos && coder->sequence != SEQ_NORMALIZE)
8302940b44dSPeter Avalos ret = coder->sequence == SEQ_IS_MATCH
8312940b44dSPeter Avalos ? LZMA_STREAM_END : LZMA_DATA_ERROR;
8322940b44dSPeter Avalos }
8332940b44dSPeter Avalos
8342940b44dSPeter Avalos // We can do an additional check in the range decoder to catch some
8352940b44dSPeter Avalos // corrupted files.
8362940b44dSPeter Avalos if (ret == LZMA_STREAM_END) {
8372940b44dSPeter Avalos if (!rc_is_finished(coder->rc))
8382940b44dSPeter Avalos ret = LZMA_DATA_ERROR;
8392940b44dSPeter Avalos
8402940b44dSPeter Avalos // Reset the range decoder so that it is ready to reinitialize
8412940b44dSPeter Avalos // for a new LZMA2 chunk.
8422940b44dSPeter Avalos rc_reset(coder->rc);
8432940b44dSPeter Avalos }
8442940b44dSPeter Avalos
8452940b44dSPeter Avalos return ret;
8462940b44dSPeter Avalos }
8472940b44dSPeter Avalos
8482940b44dSPeter Avalos
8492940b44dSPeter Avalos
8502940b44dSPeter Avalos static void
lzma_decoder_uncompressed(void * coder_ptr,lzma_vli uncompressed_size)85146a2189dSzrj lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
8522940b44dSPeter Avalos {
85346a2189dSzrj lzma_lzma1_decoder *coder = coder_ptr;
8542940b44dSPeter Avalos coder->uncompressed_size = uncompressed_size;
8552940b44dSPeter Avalos }
8562940b44dSPeter Avalos
8572940b44dSPeter Avalos
8582940b44dSPeter Avalos static void
lzma_decoder_reset(void * coder_ptr,const void * opt)85946a2189dSzrj lzma_decoder_reset(void *coder_ptr, const void *opt)
8602940b44dSPeter Avalos {
86146a2189dSzrj lzma_lzma1_decoder *coder = coder_ptr;
8622940b44dSPeter Avalos const lzma_options_lzma *options = opt;
8632940b44dSPeter Avalos
8642940b44dSPeter Avalos // NOTE: We assume that lc/lp/pb are valid since they were
8652940b44dSPeter Avalos // successfully decoded with lzma_lzma_decode_properties().
8662940b44dSPeter Avalos
8672940b44dSPeter Avalos // Calculate pos_mask. We don't need pos_bits as is for anything.
8682940b44dSPeter Avalos coder->pos_mask = (1U << options->pb) - 1;
8692940b44dSPeter Avalos
8702940b44dSPeter Avalos // Initialize the literal decoder.
8712940b44dSPeter Avalos literal_init(coder->literal, options->lc, options->lp);
8722940b44dSPeter Avalos
8732940b44dSPeter Avalos coder->literal_context_bits = options->lc;
8742940b44dSPeter Avalos coder->literal_pos_mask = (1U << options->lp) - 1;
8752940b44dSPeter Avalos
8762940b44dSPeter Avalos // State
8772940b44dSPeter Avalos coder->state = STATE_LIT_LIT;
8782940b44dSPeter Avalos coder->rep0 = 0;
8792940b44dSPeter Avalos coder->rep1 = 0;
8802940b44dSPeter Avalos coder->rep2 = 0;
8812940b44dSPeter Avalos coder->rep3 = 0;
8822940b44dSPeter Avalos coder->pos_mask = (1U << options->pb) - 1;
8832940b44dSPeter Avalos
8842940b44dSPeter Avalos // Range decoder
8852940b44dSPeter Avalos rc_reset(coder->rc);
8862940b44dSPeter Avalos
8872940b44dSPeter Avalos // Bit and bittree decoders
8882940b44dSPeter Avalos for (uint32_t i = 0; i < STATES; ++i) {
8892940b44dSPeter Avalos for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
8902940b44dSPeter Avalos bit_reset(coder->is_match[i][j]);
8912940b44dSPeter Avalos bit_reset(coder->is_rep0_long[i][j]);
8922940b44dSPeter Avalos }
8932940b44dSPeter Avalos
8942940b44dSPeter Avalos bit_reset(coder->is_rep[i]);
8952940b44dSPeter Avalos bit_reset(coder->is_rep0[i]);
8962940b44dSPeter Avalos bit_reset(coder->is_rep1[i]);
8972940b44dSPeter Avalos bit_reset(coder->is_rep2[i]);
8982940b44dSPeter Avalos }
8992940b44dSPeter Avalos
90015ab8c86SJohn Marino for (uint32_t i = 0; i < DIST_STATES; ++i)
90115ab8c86SJohn Marino bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
9022940b44dSPeter Avalos
90315ab8c86SJohn Marino for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
9042940b44dSPeter Avalos bit_reset(coder->pos_special[i]);
9052940b44dSPeter Avalos
9062940b44dSPeter Avalos bittree_reset(coder->pos_align, ALIGN_BITS);
9072940b44dSPeter Avalos
9082940b44dSPeter Avalos // Len decoders (also bit/bittree)
9092940b44dSPeter Avalos const uint32_t num_pos_states = 1U << options->pb;
9102940b44dSPeter Avalos bit_reset(coder->match_len_decoder.choice);
9112940b44dSPeter Avalos bit_reset(coder->match_len_decoder.choice2);
9122940b44dSPeter Avalos bit_reset(coder->rep_len_decoder.choice);
9132940b44dSPeter Avalos bit_reset(coder->rep_len_decoder.choice2);
9142940b44dSPeter Avalos
9152940b44dSPeter Avalos for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
9162940b44dSPeter Avalos bittree_reset(coder->match_len_decoder.low[pos_state],
9172940b44dSPeter Avalos LEN_LOW_BITS);
9182940b44dSPeter Avalos bittree_reset(coder->match_len_decoder.mid[pos_state],
9192940b44dSPeter Avalos LEN_MID_BITS);
9202940b44dSPeter Avalos
9212940b44dSPeter Avalos bittree_reset(coder->rep_len_decoder.low[pos_state],
9222940b44dSPeter Avalos LEN_LOW_BITS);
9232940b44dSPeter Avalos bittree_reset(coder->rep_len_decoder.mid[pos_state],
9242940b44dSPeter Avalos LEN_MID_BITS);
9252940b44dSPeter Avalos }
9262940b44dSPeter Avalos
9272940b44dSPeter Avalos bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
9282940b44dSPeter Avalos bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
9292940b44dSPeter Avalos
9302940b44dSPeter Avalos coder->sequence = SEQ_IS_MATCH;
9312940b44dSPeter Avalos coder->probs = NULL;
9322940b44dSPeter Avalos coder->symbol = 0;
9332940b44dSPeter Avalos coder->limit = 0;
9342940b44dSPeter Avalos coder->offset = 0;
9352940b44dSPeter Avalos coder->len = 0;
9362940b44dSPeter Avalos
9372940b44dSPeter Avalos return;
9382940b44dSPeter Avalos }
9392940b44dSPeter Avalos
9402940b44dSPeter Avalos
9412940b44dSPeter Avalos extern lzma_ret
lzma_lzma_decoder_create(lzma_lz_decoder * lz,const lzma_allocator * allocator,const void * opt,lzma_lz_options * lz_options)94215ab8c86SJohn Marino lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
9432940b44dSPeter Avalos const void *opt, lzma_lz_options *lz_options)
9442940b44dSPeter Avalos {
9452940b44dSPeter Avalos if (lz->coder == NULL) {
94646a2189dSzrj lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
9472940b44dSPeter Avalos if (lz->coder == NULL)
9482940b44dSPeter Avalos return LZMA_MEM_ERROR;
9492940b44dSPeter Avalos
9502940b44dSPeter Avalos lz->code = &lzma_decode;
9512940b44dSPeter Avalos lz->reset = &lzma_decoder_reset;
9522940b44dSPeter Avalos lz->set_uncompressed = &lzma_decoder_uncompressed;
9532940b44dSPeter Avalos }
9542940b44dSPeter Avalos
9552940b44dSPeter Avalos // All dictionary sizes are OK here. LZ decoder will take care of
9562940b44dSPeter Avalos // the special cases.
9572940b44dSPeter Avalos const lzma_options_lzma *options = opt;
9582940b44dSPeter Avalos lz_options->dict_size = options->dict_size;
9592940b44dSPeter Avalos lz_options->preset_dict = options->preset_dict;
9602940b44dSPeter Avalos lz_options->preset_dict_size = options->preset_dict_size;
9612940b44dSPeter Avalos
9622940b44dSPeter Avalos return LZMA_OK;
9632940b44dSPeter Avalos }
9642940b44dSPeter Avalos
9652940b44dSPeter Avalos
9662940b44dSPeter Avalos /// Allocate and initialize LZMA decoder. This is used only via LZ
9672940b44dSPeter Avalos /// initialization (lzma_lzma_decoder_init() passes function pointer to
9682940b44dSPeter Avalos /// the LZ initialization).
9692940b44dSPeter Avalos static lzma_ret
lzma_decoder_init(lzma_lz_decoder * lz,const lzma_allocator * allocator,const void * options,lzma_lz_options * lz_options)97015ab8c86SJohn Marino lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
9712940b44dSPeter Avalos const void *options, lzma_lz_options *lz_options)
9722940b44dSPeter Avalos {
9732940b44dSPeter Avalos if (!is_lclppb_valid(options))
9742940b44dSPeter Avalos return LZMA_PROG_ERROR;
9752940b44dSPeter Avalos
9762940b44dSPeter Avalos return_if_error(lzma_lzma_decoder_create(
9772940b44dSPeter Avalos lz, allocator, options, lz_options));
9782940b44dSPeter Avalos
9792940b44dSPeter Avalos lzma_decoder_reset(lz->coder, options);
9802940b44dSPeter Avalos lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
9812940b44dSPeter Avalos
9822940b44dSPeter Avalos return LZMA_OK;
9832940b44dSPeter Avalos }
9842940b44dSPeter Avalos
9852940b44dSPeter Avalos
9862940b44dSPeter Avalos extern lzma_ret
lzma_lzma_decoder_init(lzma_next_coder * next,const lzma_allocator * allocator,const lzma_filter_info * filters)98715ab8c86SJohn Marino lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
9882940b44dSPeter Avalos const lzma_filter_info *filters)
9892940b44dSPeter Avalos {
9902940b44dSPeter Avalos // LZMA can only be the last filter in the chain. This is enforced
9912940b44dSPeter Avalos // by the raw_decoder initialization.
9922940b44dSPeter Avalos assert(filters[1].init == NULL);
9932940b44dSPeter Avalos
9942940b44dSPeter Avalos return lzma_lz_decoder_init(next, allocator, filters,
9952940b44dSPeter Avalos &lzma_decoder_init);
9962940b44dSPeter Avalos }
9972940b44dSPeter Avalos
9982940b44dSPeter Avalos
9992940b44dSPeter Avalos extern bool
lzma_lzma_lclppb_decode(lzma_options_lzma * options,uint8_t byte)10002940b44dSPeter Avalos lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
10012940b44dSPeter Avalos {
10022940b44dSPeter Avalos if (byte > (4 * 5 + 4) * 9 + 8)
10032940b44dSPeter Avalos return true;
10042940b44dSPeter Avalos
10052940b44dSPeter Avalos // See the file format specification to understand this.
10062940b44dSPeter Avalos options->pb = byte / (9 * 5);
10072940b44dSPeter Avalos byte -= options->pb * 9 * 5;
10082940b44dSPeter Avalos options->lp = byte / 9;
10092940b44dSPeter Avalos options->lc = byte - options->lp * 9;
10102940b44dSPeter Avalos
10112940b44dSPeter Avalos return options->lc + options->lp > LZMA_LCLP_MAX;
10122940b44dSPeter Avalos }
10132940b44dSPeter Avalos
10142940b44dSPeter Avalos
10152940b44dSPeter Avalos extern uint64_t
lzma_lzma_decoder_memusage_nocheck(const void * options)10162940b44dSPeter Avalos lzma_lzma_decoder_memusage_nocheck(const void *options)
10172940b44dSPeter Avalos {
10182940b44dSPeter Avalos const lzma_options_lzma *const opt = options;
101946a2189dSzrj return sizeof(lzma_lzma1_decoder)
102046a2189dSzrj + lzma_lz_decoder_memusage(opt->dict_size);
10212940b44dSPeter Avalos }
10222940b44dSPeter Avalos
10232940b44dSPeter Avalos
10242940b44dSPeter Avalos extern uint64_t
lzma_lzma_decoder_memusage(const void * options)10252940b44dSPeter Avalos lzma_lzma_decoder_memusage(const void *options)
10262940b44dSPeter Avalos {
10272940b44dSPeter Avalos if (!is_lclppb_valid(options))
10282940b44dSPeter Avalos return UINT64_MAX;
10292940b44dSPeter Avalos
10302940b44dSPeter Avalos return lzma_lzma_decoder_memusage_nocheck(options);
10312940b44dSPeter Avalos }
10322940b44dSPeter Avalos
10332940b44dSPeter Avalos
10342940b44dSPeter Avalos extern lzma_ret
lzma_lzma_props_decode(void ** options,const lzma_allocator * allocator,const uint8_t * props,size_t props_size)103515ab8c86SJohn Marino lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
10362940b44dSPeter Avalos const uint8_t *props, size_t props_size)
10372940b44dSPeter Avalos {
10382940b44dSPeter Avalos if (props_size != 5)
10392940b44dSPeter Avalos return LZMA_OPTIONS_ERROR;
10402940b44dSPeter Avalos
10412940b44dSPeter Avalos lzma_options_lzma *opt
10422940b44dSPeter Avalos = lzma_alloc(sizeof(lzma_options_lzma), allocator);
10432940b44dSPeter Avalos if (opt == NULL)
10442940b44dSPeter Avalos return LZMA_MEM_ERROR;
10452940b44dSPeter Avalos
10462940b44dSPeter Avalos if (lzma_lzma_lclppb_decode(opt, props[0]))
10472940b44dSPeter Avalos goto error;
10482940b44dSPeter Avalos
10492940b44dSPeter Avalos // All dictionary sizes are accepted, including zero. LZ decoder
10502940b44dSPeter Avalos // will automatically use a dictionary at least a few KiB even if
10512940b44dSPeter Avalos // a smaller dictionary is requested.
1052*e151908bSDaniel Fojt opt->dict_size = read32le(props + 1);
10532940b44dSPeter Avalos
10542940b44dSPeter Avalos opt->preset_dict = NULL;
10552940b44dSPeter Avalos opt->preset_dict_size = 0;
10562940b44dSPeter Avalos
10572940b44dSPeter Avalos *options = opt;
10582940b44dSPeter Avalos
10592940b44dSPeter Avalos return LZMA_OK;
10602940b44dSPeter Avalos
10612940b44dSPeter Avalos error:
10622940b44dSPeter Avalos lzma_free(opt, allocator);
10632940b44dSPeter Avalos return LZMA_OPTIONS_ERROR;
10642940b44dSPeter Avalos }
1065