xref: /netbsd-src/external/bsd/zstd/dist/tests/seqgen.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos #include "seqgen.h"
12*3117ece4Schristos #include "mem.h"
13*3117ece4Schristos #include <string.h>
14*3117ece4Schristos 
15*3117ece4Schristos #define MIN(a, b)  ((a) < (b) ? (a) : (b))
16*3117ece4Schristos 
17*3117ece4Schristos static const size_t kMatchBytes = 128;
18*3117ece4Schristos 
19*3117ece4Schristos #define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
20*3117ece4Schristos static BYTE SEQ_randByte(unsigned* src)
21*3117ece4Schristos {
22*3117ece4Schristos     static const U32 prime1 = 2654435761U;
23*3117ece4Schristos     static const U32 prime2 = 2246822519U;
24*3117ece4Schristos     U32 rand32 = *src;
25*3117ece4Schristos     rand32 *= prime1;
26*3117ece4Schristos     rand32 ^= prime2;
27*3117ece4Schristos     rand32  = SEQ_rotl32(rand32, 13);
28*3117ece4Schristos     *src = rand32;
29*3117ece4Schristos     return (BYTE)(rand32 >> 5);
30*3117ece4Schristos }
31*3117ece4Schristos 
32*3117ece4Schristos SEQ_stream SEQ_initStream(unsigned seed)
33*3117ece4Schristos {
34*3117ece4Schristos     SEQ_stream stream;
35*3117ece4Schristos     stream.state = 0;
36*3117ece4Schristos     XXH64_reset(&stream.xxh, 0);
37*3117ece4Schristos     stream.seed = seed;
38*3117ece4Schristos     return stream;
39*3117ece4Schristos }
40*3117ece4Schristos 
41*3117ece4Schristos /* Generates a single guard byte, then match length + 1 of a different byte,
42*3117ece4Schristos  * then another guard byte.
43*3117ece4Schristos  */
44*3117ece4Schristos static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
45*3117ece4Schristos                                   SEQ_outBuffer* out)
46*3117ece4Schristos {
47*3117ece4Schristos     typedef enum {
48*3117ece4Schristos         ml_first_byte = 0,
49*3117ece4Schristos         ml_match_bytes,
50*3117ece4Schristos         ml_last_byte,
51*3117ece4Schristos     } ml_state;
52*3117ece4Schristos     BYTE* const ostart = (BYTE*)out->dst;
53*3117ece4Schristos     BYTE* const oend = ostart + out->size;
54*3117ece4Schristos     BYTE* op = ostart + out->pos;
55*3117ece4Schristos 
56*3117ece4Schristos     switch ((ml_state)stream->state) {
57*3117ece4Schristos     case ml_first_byte:
58*3117ece4Schristos         /* Generate a single byte and pick a different byte for the match */
59*3117ece4Schristos         if (op >= oend) {
60*3117ece4Schristos             stream->bytesLeft = 1;
61*3117ece4Schristos             break;
62*3117ece4Schristos         }
63*3117ece4Schristos         *op = SEQ_randByte(&stream->seed) & 0xFF;
64*3117ece4Schristos         do {
65*3117ece4Schristos             stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
66*3117ece4Schristos         } while (*op == stream->saved);
67*3117ece4Schristos         ++op;
68*3117ece4Schristos         /* State transition */
69*3117ece4Schristos         stream->state = ml_match_bytes;
70*3117ece4Schristos         stream->bytesLeft = value + 1;
71*3117ece4Schristos     /* fall-through */
72*3117ece4Schristos     case ml_match_bytes: {
73*3117ece4Schristos         /* Copy matchLength + 1 bytes to the output buffer */
74*3117ece4Schristos         size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
75*3117ece4Schristos         if (setLength > 0) {
76*3117ece4Schristos             memset(op, stream->saved, setLength);
77*3117ece4Schristos             op += setLength;
78*3117ece4Schristos             stream->bytesLeft -= setLength;
79*3117ece4Schristos         }
80*3117ece4Schristos         if (stream->bytesLeft > 0)
81*3117ece4Schristos             break;
82*3117ece4Schristos         /* State transition */
83*3117ece4Schristos         stream->state = ml_last_byte;
84*3117ece4Schristos     }
85*3117ece4Schristos     /* fall-through */
86*3117ece4Schristos     case ml_last_byte:
87*3117ece4Schristos         /* Generate a single byte and pick a different byte for the match */
88*3117ece4Schristos         if (op >= oend) {
89*3117ece4Schristos             stream->bytesLeft = 1;
90*3117ece4Schristos             break;
91*3117ece4Schristos         }
92*3117ece4Schristos         do {
93*3117ece4Schristos             *op = SEQ_randByte(&stream->seed) & 0xFF;
94*3117ece4Schristos         } while (*op == stream->saved);
95*3117ece4Schristos         ++op;
96*3117ece4Schristos         /* State transition */
97*3117ece4Schristos     /* fall-through */
98*3117ece4Schristos     default:
99*3117ece4Schristos         stream->state = 0;
100*3117ece4Schristos         stream->bytesLeft = 0;
101*3117ece4Schristos         break;
102*3117ece4Schristos     }
103*3117ece4Schristos     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
104*3117ece4Schristos     out->pos = op - ostart;
105*3117ece4Schristos     return stream->bytesLeft;
106*3117ece4Schristos }
107*3117ece4Schristos 
108*3117ece4Schristos /* Saves the current seed then generates kMatchBytes random bytes >= 128.
109*3117ece4Schristos  * Generates literal length - kMatchBytes random bytes < 128.
110*3117ece4Schristos  * Generates another kMatchBytes using the saved seed to generate a match.
111*3117ece4Schristos  * This way the match is easy to find for the compressors.
112*3117ece4Schristos  */
113*3117ece4Schristos static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
114*3117ece4Schristos {
115*3117ece4Schristos     typedef enum {
116*3117ece4Schristos         ll_start = 0,
117*3117ece4Schristos         ll_run_bytes,
118*3117ece4Schristos         ll_literals,
119*3117ece4Schristos         ll_run_match,
120*3117ece4Schristos     } ll_state;
121*3117ece4Schristos     BYTE* const ostart = (BYTE*)out->dst;
122*3117ece4Schristos     BYTE* const oend = ostart + out->size;
123*3117ece4Schristos     BYTE* op = ostart + out->pos;
124*3117ece4Schristos 
125*3117ece4Schristos     switch ((ll_state)stream->state) {
126*3117ece4Schristos     case ll_start:
127*3117ece4Schristos         stream->state = ll_run_bytes;
128*3117ece4Schristos         stream->saved = stream->seed;
129*3117ece4Schristos         stream->bytesLeft = MIN(kMatchBytes, value);
130*3117ece4Schristos     /* fall-through */
131*3117ece4Schristos     case ll_run_bytes:
132*3117ece4Schristos         while (stream->bytesLeft > 0 && op < oend) {
133*3117ece4Schristos             *op++ = SEQ_randByte(&stream->seed) | 0x80;
134*3117ece4Schristos             --stream->bytesLeft;
135*3117ece4Schristos         }
136*3117ece4Schristos         if (stream->bytesLeft > 0)
137*3117ece4Schristos             break;
138*3117ece4Schristos         /* State transition */
139*3117ece4Schristos         stream->state = ll_literals;
140*3117ece4Schristos         stream->bytesLeft = value - MIN(kMatchBytes, value);
141*3117ece4Schristos     /* fall-through */
142*3117ece4Schristos     case ll_literals:
143*3117ece4Schristos         while (stream->bytesLeft > 0 && op < oend) {
144*3117ece4Schristos             *op++ = SEQ_randByte(&stream->seed) & 0x7F;
145*3117ece4Schristos             --stream->bytesLeft;
146*3117ece4Schristos         }
147*3117ece4Schristos         if (stream->bytesLeft > 0)
148*3117ece4Schristos             break;
149*3117ece4Schristos         /* State transition */
150*3117ece4Schristos         stream->state = ll_run_match;
151*3117ece4Schristos         stream->bytesLeft = MIN(kMatchBytes, value);
152*3117ece4Schristos     /* fall-through */
153*3117ece4Schristos     case ll_run_match: {
154*3117ece4Schristos         while (stream->bytesLeft > 0 && op < oend) {
155*3117ece4Schristos             *op++ = SEQ_randByte(&stream->saved) | 0x80;
156*3117ece4Schristos             --stream->bytesLeft;
157*3117ece4Schristos         }
158*3117ece4Schristos         if (stream->bytesLeft > 0)
159*3117ece4Schristos             break;
160*3117ece4Schristos     }
161*3117ece4Schristos     /* fall-through */
162*3117ece4Schristos     default:
163*3117ece4Schristos         stream->state = 0;
164*3117ece4Schristos         stream->bytesLeft = 0;
165*3117ece4Schristos         break;
166*3117ece4Schristos     }
167*3117ece4Schristos     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
168*3117ece4Schristos     out->pos = op - ostart;
169*3117ece4Schristos     return stream->bytesLeft;
170*3117ece4Schristos }
171*3117ece4Schristos 
172*3117ece4Schristos /* Saves the current seed then generates kMatchBytes random bytes >= 128.
173*3117ece4Schristos  * Generates offset - kMatchBytes of zeros to get a large offset without
174*3117ece4Schristos  * polluting the hash tables.
175*3117ece4Schristos  * Generates another kMatchBytes using the saved seed to generate a with the
176*3117ece4Schristos  * required offset.
177*3117ece4Schristos  */
178*3117ece4Schristos static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
179*3117ece4Schristos {
180*3117ece4Schristos     typedef enum {
181*3117ece4Schristos         of_start = 0,
182*3117ece4Schristos         of_run_bytes,
183*3117ece4Schristos         of_offset,
184*3117ece4Schristos         of_run_match,
185*3117ece4Schristos     } of_state;
186*3117ece4Schristos     BYTE* const ostart = (BYTE*)out->dst;
187*3117ece4Schristos     BYTE* const oend = ostart + out->size;
188*3117ece4Schristos     BYTE* op = ostart + out->pos;
189*3117ece4Schristos 
190*3117ece4Schristos     switch ((of_state)stream->state) {
191*3117ece4Schristos     case of_start:
192*3117ece4Schristos         stream->state = of_run_bytes;
193*3117ece4Schristos         stream->saved = stream->seed;
194*3117ece4Schristos         stream->bytesLeft = MIN(value, kMatchBytes);
195*3117ece4Schristos     /* fall-through */
196*3117ece4Schristos     case of_run_bytes: {
197*3117ece4Schristos         while (stream->bytesLeft > 0 && op < oend) {
198*3117ece4Schristos             *op++ = SEQ_randByte(&stream->seed) | 0x80;
199*3117ece4Schristos             --stream->bytesLeft;
200*3117ece4Schristos         }
201*3117ece4Schristos         if (stream->bytesLeft > 0)
202*3117ece4Schristos             break;
203*3117ece4Schristos         /* State transition */
204*3117ece4Schristos         stream->state = of_offset;
205*3117ece4Schristos         stream->bytesLeft = value - MIN(value, kMatchBytes);
206*3117ece4Schristos     }
207*3117ece4Schristos     /* fall-through */
208*3117ece4Schristos     case of_offset: {
209*3117ece4Schristos         /* Copy matchLength + 1 bytes to the output buffer */
210*3117ece4Schristos         size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
211*3117ece4Schristos         if (setLength > 0) {
212*3117ece4Schristos             memset(op, 0, setLength);
213*3117ece4Schristos             op += setLength;
214*3117ece4Schristos             stream->bytesLeft -= setLength;
215*3117ece4Schristos         }
216*3117ece4Schristos         if (stream->bytesLeft > 0)
217*3117ece4Schristos             break;
218*3117ece4Schristos         /* State transition */
219*3117ece4Schristos         stream->state = of_run_match;
220*3117ece4Schristos         stream->bytesLeft = MIN(value, kMatchBytes);
221*3117ece4Schristos     }
222*3117ece4Schristos     /* fall-through */
223*3117ece4Schristos     case of_run_match: {
224*3117ece4Schristos         while (stream->bytesLeft > 0 && op < oend) {
225*3117ece4Schristos             *op++ = SEQ_randByte(&stream->saved) | 0x80;
226*3117ece4Schristos             --stream->bytesLeft;
227*3117ece4Schristos         }
228*3117ece4Schristos         if (stream->bytesLeft > 0)
229*3117ece4Schristos             break;
230*3117ece4Schristos     }
231*3117ece4Schristos     /* fall-through */
232*3117ece4Schristos     default:
233*3117ece4Schristos         stream->state = 0;
234*3117ece4Schristos         stream->bytesLeft = 0;
235*3117ece4Schristos         break;
236*3117ece4Schristos     }
237*3117ece4Schristos     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
238*3117ece4Schristos     out->pos = op - ostart;
239*3117ece4Schristos     return stream->bytesLeft;
240*3117ece4Schristos }
241*3117ece4Schristos 
242*3117ece4Schristos /* Returns the number of bytes left to generate.
243*3117ece4Schristos  * Must pass the same type/value until it returns 0.
244*3117ece4Schristos  */
245*3117ece4Schristos size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
246*3117ece4Schristos {
247*3117ece4Schristos     switch (type) {
248*3117ece4Schristos         case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
249*3117ece4Schristos         case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
250*3117ece4Schristos         case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
251*3117ece4Schristos         case SEQ_gen_max: /* fall-through */
252*3117ece4Schristos         default: return 0;
253*3117ece4Schristos     }
254*3117ece4Schristos }
255*3117ece4Schristos 
256*3117ece4Schristos /* Returns the xxhash of the data produced so far */
257*3117ece4Schristos XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
258*3117ece4Schristos {
259*3117ece4Schristos     return XXH64_digest(&stream->xxh);
260*3117ece4Schristos }
261