1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Yann Collet, Meta Platforms, Inc. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos #include "external_matchfinder.h" 12*3117ece4Schristos #include <string.h> 13*3117ece4Schristos #include "zstd_compress_internal.h" 14*3117ece4Schristos 15*3117ece4Schristos #define HSIZE 1024 16*3117ece4Schristos static U32 const HLOG = 10; 17*3117ece4Schristos static U32 const MLS = 4; 18*3117ece4Schristos static U32 const BADIDX = 0xffffffff; 19*3117ece4Schristos 20*3117ece4Schristos static size_t simpleSequenceProducer( 21*3117ece4Schristos void* sequenceProducerState, 22*3117ece4Schristos ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, 23*3117ece4Schristos const void* src, size_t srcSize, 24*3117ece4Schristos const void* dict, size_t dictSize, 25*3117ece4Schristos int compressionLevel, 26*3117ece4Schristos size_t windowSize 27*3117ece4Schristos ) { 28*3117ece4Schristos const BYTE* const istart = (const BYTE*)src; 29*3117ece4Schristos const BYTE* const iend = istart + srcSize; 30*3117ece4Schristos const BYTE* ip = istart; 31*3117ece4Schristos const BYTE* anchor = istart; 32*3117ece4Schristos size_t seqCount = 0; 33*3117ece4Schristos U32 hashTable[HSIZE]; 34*3117ece4Schristos 35*3117ece4Schristos (void)sequenceProducerState; 36*3117ece4Schristos (void)dict; 37*3117ece4Schristos (void)dictSize; 38*3117ece4Schristos (void)outSeqsCapacity; 39*3117ece4Schristos (void)compressionLevel; 40*3117ece4Schristos 41*3117ece4Schristos { int i; 42*3117ece4Schristos for (i=0; i < HSIZE; i++) { 43*3117ece4Schristos hashTable[i] = BADIDX; 44*3117ece4Schristos } } 45*3117ece4Schristos 46*3117ece4Schristos while (ip + MLS < iend) { 47*3117ece4Schristos size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); 48*3117ece4Schristos U32 const matchIndex = hashTable[hash]; 49*3117ece4Schristos hashTable[hash] = (U32)(ip - istart); 50*3117ece4Schristos 51*3117ece4Schristos if (matchIndex != BADIDX) { 52*3117ece4Schristos const BYTE* const match = istart + matchIndex; 53*3117ece4Schristos U32 const matchLen = (U32)ZSTD_count(ip, match, iend); 54*3117ece4Schristos if (matchLen >= ZSTD_MINMATCH_MIN) { 55*3117ece4Schristos U32 const litLen = (U32)(ip - anchor); 56*3117ece4Schristos U32 const offset = (U32)(ip - match); 57*3117ece4Schristos ZSTD_Sequence const seq = { 58*3117ece4Schristos offset, litLen, matchLen, 0 59*3117ece4Schristos }; 60*3117ece4Schristos 61*3117ece4Schristos /* Note: it's crucial to stay within the window size! */ 62*3117ece4Schristos if (offset <= windowSize) { 63*3117ece4Schristos outSeqs[seqCount++] = seq; 64*3117ece4Schristos ip += matchLen; 65*3117ece4Schristos anchor = ip; 66*3117ece4Schristos continue; 67*3117ece4Schristos } 68*3117ece4Schristos } 69*3117ece4Schristos } 70*3117ece4Schristos 71*3117ece4Schristos ip++; 72*3117ece4Schristos } 73*3117ece4Schristos 74*3117ece4Schristos { ZSTD_Sequence const finalSeq = { 75*3117ece4Schristos 0, (U32)(iend - anchor), 0, 0 76*3117ece4Schristos }; 77*3117ece4Schristos outSeqs[seqCount++] = finalSeq; 78*3117ece4Schristos } 79*3117ece4Schristos 80*3117ece4Schristos return seqCount; 81*3117ece4Schristos } 82*3117ece4Schristos 83*3117ece4Schristos size_t zstreamSequenceProducer( 84*3117ece4Schristos void* sequenceProducerState, 85*3117ece4Schristos ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, 86*3117ece4Schristos const void* src, size_t srcSize, 87*3117ece4Schristos const void* dict, size_t dictSize, 88*3117ece4Schristos int compressionLevel, 89*3117ece4Schristos size_t windowSize 90*3117ece4Schristos ) { 91*3117ece4Schristos EMF_testCase const testCase = *((EMF_testCase*)sequenceProducerState); 92*3117ece4Schristos memset(outSeqs, 0, outSeqsCapacity); 93*3117ece4Schristos 94*3117ece4Schristos switch (testCase) { 95*3117ece4Schristos case EMF_ZERO_SEQS: 96*3117ece4Schristos return 0; 97*3117ece4Schristos case EMF_ONE_BIG_SEQ: 98*3117ece4Schristos outSeqs[0].offset = 0; 99*3117ece4Schristos outSeqs[0].matchLength = 0; 100*3117ece4Schristos outSeqs[0].litLength = (U32)(srcSize); 101*3117ece4Schristos return 1; 102*3117ece4Schristos case EMF_LOTS_OF_SEQS: 103*3117ece4Schristos return simpleSequenceProducer( 104*3117ece4Schristos sequenceProducerState, 105*3117ece4Schristos outSeqs, outSeqsCapacity, 106*3117ece4Schristos src, srcSize, 107*3117ece4Schristos dict, dictSize, 108*3117ece4Schristos compressionLevel, 109*3117ece4Schristos windowSize 110*3117ece4Schristos ); 111*3117ece4Schristos case EMF_INVALID_OFFSET: 112*3117ece4Schristos outSeqs[0].offset = 1 << 20; 113*3117ece4Schristos outSeqs[0].matchLength = 4; 114*3117ece4Schristos outSeqs[0].litLength = (U32)(srcSize - 4); 115*3117ece4Schristos return 1; 116*3117ece4Schristos case EMF_INVALID_MATCHLEN: 117*3117ece4Schristos outSeqs[0].offset = 1; 118*3117ece4Schristos outSeqs[0].matchLength = (U32)(srcSize); 119*3117ece4Schristos outSeqs[0].litLength = 1; 120*3117ece4Schristos return 1; 121*3117ece4Schristos case EMF_INVALID_LITLEN: 122*3117ece4Schristos outSeqs[0].offset = 0; 123*3117ece4Schristos outSeqs[0].matchLength = 0; 124*3117ece4Schristos outSeqs[0].litLength = (U32)(srcSize + 1); 125*3117ece4Schristos return 1; 126*3117ece4Schristos case EMF_INVALID_LAST_LITS: 127*3117ece4Schristos outSeqs[0].offset = 1; 128*3117ece4Schristos outSeqs[0].matchLength = 1; 129*3117ece4Schristos outSeqs[0].litLength = 1; 130*3117ece4Schristos outSeqs[1].offset = 0; 131*3117ece4Schristos outSeqs[1].matchLength = 0; 132*3117ece4Schristos outSeqs[1].litLength = (U32)(srcSize - 1); 133*3117ece4Schristos return 2; 134*3117ece4Schristos case EMF_SMALL_ERROR: 135*3117ece4Schristos return outSeqsCapacity + 1; 136*3117ece4Schristos case EMF_BIG_ERROR: 137*3117ece4Schristos default: 138*3117ece4Schristos return ZSTD_SEQUENCE_PRODUCER_ERROR; 139*3117ece4Schristos } 140*3117ece4Schristos } 141