xref: /netbsd-src/external/bsd/zstd/dist/tests/external_matchfinder.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Yann Collet, Meta Platforms, Inc.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos #include "external_matchfinder.h"
12*3117ece4Schristos #include <string.h>
13*3117ece4Schristos #include "zstd_compress_internal.h"
14*3117ece4Schristos 
15*3117ece4Schristos #define HSIZE 1024
16*3117ece4Schristos static U32 const HLOG = 10;
17*3117ece4Schristos static U32 const MLS = 4;
18*3117ece4Schristos static U32 const BADIDX = 0xffffffff;
19*3117ece4Schristos 
20*3117ece4Schristos static size_t simpleSequenceProducer(
21*3117ece4Schristos   void* sequenceProducerState,
22*3117ece4Schristos   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
23*3117ece4Schristos   const void* src, size_t srcSize,
24*3117ece4Schristos   const void* dict, size_t dictSize,
25*3117ece4Schristos   int compressionLevel,
26*3117ece4Schristos   size_t windowSize
27*3117ece4Schristos ) {
28*3117ece4Schristos     const BYTE* const istart = (const BYTE*)src;
29*3117ece4Schristos     const BYTE* const iend = istart + srcSize;
30*3117ece4Schristos     const BYTE* ip = istart;
31*3117ece4Schristos     const BYTE* anchor = istart;
32*3117ece4Schristos     size_t seqCount = 0;
33*3117ece4Schristos     U32 hashTable[HSIZE];
34*3117ece4Schristos 
35*3117ece4Schristos     (void)sequenceProducerState;
36*3117ece4Schristos     (void)dict;
37*3117ece4Schristos     (void)dictSize;
38*3117ece4Schristos     (void)outSeqsCapacity;
39*3117ece4Schristos     (void)compressionLevel;
40*3117ece4Schristos 
41*3117ece4Schristos     {   int i;
42*3117ece4Schristos         for (i=0; i < HSIZE; i++) {
43*3117ece4Schristos             hashTable[i] = BADIDX;
44*3117ece4Schristos     }   }
45*3117ece4Schristos 
46*3117ece4Schristos     while (ip + MLS < iend) {
47*3117ece4Schristos         size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
48*3117ece4Schristos         U32 const matchIndex = hashTable[hash];
49*3117ece4Schristos         hashTable[hash] = (U32)(ip - istart);
50*3117ece4Schristos 
51*3117ece4Schristos         if (matchIndex != BADIDX) {
52*3117ece4Schristos             const BYTE* const match = istart + matchIndex;
53*3117ece4Schristos             U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
54*3117ece4Schristos             if (matchLen >= ZSTD_MINMATCH_MIN) {
55*3117ece4Schristos                 U32 const litLen = (U32)(ip - anchor);
56*3117ece4Schristos                 U32 const offset = (U32)(ip - match);
57*3117ece4Schristos                 ZSTD_Sequence const seq = {
58*3117ece4Schristos                     offset, litLen, matchLen, 0
59*3117ece4Schristos                 };
60*3117ece4Schristos 
61*3117ece4Schristos                 /* Note: it's crucial to stay within the window size! */
62*3117ece4Schristos                 if (offset <= windowSize) {
63*3117ece4Schristos                     outSeqs[seqCount++] = seq;
64*3117ece4Schristos                     ip += matchLen;
65*3117ece4Schristos                     anchor = ip;
66*3117ece4Schristos                     continue;
67*3117ece4Schristos                 }
68*3117ece4Schristos             }
69*3117ece4Schristos         }
70*3117ece4Schristos 
71*3117ece4Schristos         ip++;
72*3117ece4Schristos     }
73*3117ece4Schristos 
74*3117ece4Schristos     {   ZSTD_Sequence const finalSeq = {
75*3117ece4Schristos             0, (U32)(iend - anchor), 0, 0
76*3117ece4Schristos         };
77*3117ece4Schristos         outSeqs[seqCount++] = finalSeq;
78*3117ece4Schristos     }
79*3117ece4Schristos 
80*3117ece4Schristos     return seqCount;
81*3117ece4Schristos }
82*3117ece4Schristos 
83*3117ece4Schristos size_t zstreamSequenceProducer(
84*3117ece4Schristos   void* sequenceProducerState,
85*3117ece4Schristos   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
86*3117ece4Schristos   const void* src, size_t srcSize,
87*3117ece4Schristos   const void* dict, size_t dictSize,
88*3117ece4Schristos   int compressionLevel,
89*3117ece4Schristos   size_t windowSize
90*3117ece4Schristos ) {
91*3117ece4Schristos     EMF_testCase const testCase = *((EMF_testCase*)sequenceProducerState);
92*3117ece4Schristos     memset(outSeqs, 0, outSeqsCapacity);
93*3117ece4Schristos 
94*3117ece4Schristos     switch (testCase) {
95*3117ece4Schristos         case EMF_ZERO_SEQS:
96*3117ece4Schristos             return 0;
97*3117ece4Schristos         case EMF_ONE_BIG_SEQ:
98*3117ece4Schristos             outSeqs[0].offset = 0;
99*3117ece4Schristos             outSeqs[0].matchLength = 0;
100*3117ece4Schristos             outSeqs[0].litLength = (U32)(srcSize);
101*3117ece4Schristos             return 1;
102*3117ece4Schristos          case EMF_LOTS_OF_SEQS:
103*3117ece4Schristos             return simpleSequenceProducer(
104*3117ece4Schristos                 sequenceProducerState,
105*3117ece4Schristos                 outSeqs, outSeqsCapacity,
106*3117ece4Schristos                 src, srcSize,
107*3117ece4Schristos                 dict, dictSize,
108*3117ece4Schristos                 compressionLevel,
109*3117ece4Schristos                 windowSize
110*3117ece4Schristos             );
111*3117ece4Schristos         case EMF_INVALID_OFFSET:
112*3117ece4Schristos             outSeqs[0].offset = 1 << 20;
113*3117ece4Schristos             outSeqs[0].matchLength = 4;
114*3117ece4Schristos             outSeqs[0].litLength = (U32)(srcSize - 4);
115*3117ece4Schristos             return 1;
116*3117ece4Schristos         case EMF_INVALID_MATCHLEN:
117*3117ece4Schristos             outSeqs[0].offset = 1;
118*3117ece4Schristos             outSeqs[0].matchLength = (U32)(srcSize);
119*3117ece4Schristos             outSeqs[0].litLength = 1;
120*3117ece4Schristos             return 1;
121*3117ece4Schristos         case EMF_INVALID_LITLEN:
122*3117ece4Schristos             outSeqs[0].offset = 0;
123*3117ece4Schristos             outSeqs[0].matchLength = 0;
124*3117ece4Schristos             outSeqs[0].litLength = (U32)(srcSize + 1);
125*3117ece4Schristos             return 1;
126*3117ece4Schristos         case EMF_INVALID_LAST_LITS:
127*3117ece4Schristos             outSeqs[0].offset = 1;
128*3117ece4Schristos             outSeqs[0].matchLength = 1;
129*3117ece4Schristos             outSeqs[0].litLength = 1;
130*3117ece4Schristos             outSeqs[1].offset = 0;
131*3117ece4Schristos             outSeqs[1].matchLength = 0;
132*3117ece4Schristos             outSeqs[1].litLength = (U32)(srcSize - 1);
133*3117ece4Schristos             return 2;
134*3117ece4Schristos         case EMF_SMALL_ERROR:
135*3117ece4Schristos             return outSeqsCapacity + 1;
136*3117ece4Schristos         case EMF_BIG_ERROR:
137*3117ece4Schristos         default:
138*3117ece4Schristos             return ZSTD_SEQUENCE_PRODUCER_ERROR;
139*3117ece4Schristos     }
140*3117ece4Schristos }
141