1 /* 2 * Copyright (c) Yann Collet, Meta Platforms, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "external_matchfinder.h" 12 #include <string.h> 13 #include "zstd_compress_internal.h" 14 15 #define HSIZE 1024 16 static U32 const HLOG = 10; 17 static U32 const MLS = 4; 18 static U32 const BADIDX = 0xffffffff; 19 20 static size_t simpleSequenceProducer( 21 void* sequenceProducerState, 22 ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, 23 const void* src, size_t srcSize, 24 const void* dict, size_t dictSize, 25 int compressionLevel, 26 size_t windowSize 27 ) { 28 const BYTE* const istart = (const BYTE*)src; 29 const BYTE* const iend = istart + srcSize; 30 const BYTE* ip = istart; 31 const BYTE* anchor = istart; 32 size_t seqCount = 0; 33 U32 hashTable[HSIZE]; 34 35 (void)sequenceProducerState; 36 (void)dict; 37 (void)dictSize; 38 (void)outSeqsCapacity; 39 (void)compressionLevel; 40 41 { int i; 42 for (i=0; i < HSIZE; i++) { 43 hashTable[i] = BADIDX; 44 } } 45 46 while (ip + MLS < iend) { 47 size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); 48 U32 const matchIndex = hashTable[hash]; 49 hashTable[hash] = (U32)(ip - istart); 50 51 if (matchIndex != BADIDX) { 52 const BYTE* const match = istart + matchIndex; 53 U32 const matchLen = (U32)ZSTD_count(ip, match, iend); 54 if (matchLen >= ZSTD_MINMATCH_MIN) { 55 U32 const litLen = (U32)(ip - anchor); 56 U32 const offset = (U32)(ip - match); 57 ZSTD_Sequence const seq = { 58 offset, litLen, matchLen, 0 59 }; 60 61 /* Note: it's crucial to stay within the window size! */ 62 if (offset <= windowSize) { 63 outSeqs[seqCount++] = seq; 64 ip += matchLen; 65 anchor = ip; 66 continue; 67 } 68 } 69 } 70 71 ip++; 72 } 73 74 { ZSTD_Sequence const finalSeq = { 75 0, (U32)(iend - anchor), 0, 0 76 }; 77 outSeqs[seqCount++] = finalSeq; 78 } 79 80 return seqCount; 81 } 82 83 size_t zstreamSequenceProducer( 84 void* sequenceProducerState, 85 ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, 86 const void* src, size_t srcSize, 87 const void* dict, size_t dictSize, 88 int compressionLevel, 89 size_t windowSize 90 ) { 91 EMF_testCase const testCase = *((EMF_testCase*)sequenceProducerState); 92 memset(outSeqs, 0, outSeqsCapacity); 93 94 switch (testCase) { 95 case EMF_ZERO_SEQS: 96 return 0; 97 case EMF_ONE_BIG_SEQ: 98 outSeqs[0].offset = 0; 99 outSeqs[0].matchLength = 0; 100 outSeqs[0].litLength = (U32)(srcSize); 101 return 1; 102 case EMF_LOTS_OF_SEQS: 103 return simpleSequenceProducer( 104 sequenceProducerState, 105 outSeqs, outSeqsCapacity, 106 src, srcSize, 107 dict, dictSize, 108 compressionLevel, 109 windowSize 110 ); 111 case EMF_INVALID_OFFSET: 112 outSeqs[0].offset = 1 << 20; 113 outSeqs[0].matchLength = 4; 114 outSeqs[0].litLength = (U32)(srcSize - 4); 115 return 1; 116 case EMF_INVALID_MATCHLEN: 117 outSeqs[0].offset = 1; 118 outSeqs[0].matchLength = (U32)(srcSize); 119 outSeqs[0].litLength = 1; 120 return 1; 121 case EMF_INVALID_LITLEN: 122 outSeqs[0].offset = 0; 123 outSeqs[0].matchLength = 0; 124 outSeqs[0].litLength = (U32)(srcSize + 1); 125 return 1; 126 case EMF_INVALID_LAST_LITS: 127 outSeqs[0].offset = 1; 128 outSeqs[0].matchLength = 1; 129 outSeqs[0].litLength = 1; 130 outSeqs[1].offset = 0; 131 outSeqs[1].matchLength = 0; 132 outSeqs[1].litLength = (U32)(srcSize - 1); 133 return 2; 134 case EMF_SMALL_ERROR: 135 return outSeqsCapacity + 1; 136 case EMF_BIG_ERROR: 137 default: 138 return ZSTD_SEQUENCE_PRODUCER_ERROR; 139 } 140 } 141