1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos #include <limits.h> 12*3117ece4Schristos #include <math.h> 13*3117ece4Schristos #include <stddef.h> 14*3117ece4Schristos #include <stdio.h> 15*3117ece4Schristos #include <stdlib.h> 16*3117ece4Schristos #include <string.h> 17*3117ece4Schristos #include <time.h> /* time(), for seed random initialization */ 18*3117ece4Schristos 19*3117ece4Schristos #include "util.h" 20*3117ece4Schristos #include "timefn.h" /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */ 21*3117ece4Schristos #include "zstd.h" 22*3117ece4Schristos #include "zstd_internal.h" 23*3117ece4Schristos #include "mem.h" 24*3117ece4Schristos #define ZDICT_STATIC_LINKING_ONLY 25*3117ece4Schristos #include "zdict.h" 26*3117ece4Schristos 27*3117ece4Schristos /* Direct access to internal compression functions is required */ 28*3117ece4Schristos #include "compress/zstd_compress.c" /* ZSTD_resetSeqStore, ZSTD_storeSeq, *_TO_OFFBASE, HIST_countFast_wksp, HIST_isError */ 29*3117ece4Schristos #include "decompress/zstd_decompress_block.h" /* ZSTD_decompressBlock_deprecated */ 30*3117ece4Schristos 31*3117ece4Schristos #define XXH_STATIC_LINKING_ONLY 32*3117ece4Schristos #include "xxhash.h" /* XXH64 */ 33*3117ece4Schristos 34*3117ece4Schristos #if !(defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)) 35*3117ece4Schristos # define inline /* disable */ 36*3117ece4Schristos #endif 37*3117ece4Schristos 38*3117ece4Schristos /*-************************************ 39*3117ece4Schristos * DISPLAY Macros 40*3117ece4Schristos **************************************/ 41*3117ece4Schristos #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 42*3117ece4Schristos #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } 43*3117ece4Schristos static U32 g_displayLevel = 2; 44*3117ece4Schristos 45*3117ece4Schristos #define DISPLAYUPDATE(...) \ 46*3117ece4Schristos do { \ 47*3117ece4Schristos if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || \ 48*3117ece4Schristos (g_displayLevel >= 4)) { \ 49*3117ece4Schristos g_displayClock = UTIL_getTime(); \ 50*3117ece4Schristos DISPLAY(__VA_ARGS__); \ 51*3117ece4Schristos if (g_displayLevel >= 4) fflush(stderr); \ 52*3117ece4Schristos } \ 53*3117ece4Schristos } while (0) 54*3117ece4Schristos 55*3117ece4Schristos static const U64 g_refreshRate = SEC_TO_MICRO / 6; 56*3117ece4Schristos static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; 57*3117ece4Schristos 58*3117ece4Schristos #define CHECKERR(code) \ 59*3117ece4Schristos do { \ 60*3117ece4Schristos if (ZSTD_isError(code)) { \ 61*3117ece4Schristos DISPLAY("Error occurred while generating data: %s\n", \ 62*3117ece4Schristos ZSTD_getErrorName(code)); \ 63*3117ece4Schristos exit(1); \ 64*3117ece4Schristos } \ 65*3117ece4Schristos } while (0) 66*3117ece4Schristos 67*3117ece4Schristos 68*3117ece4Schristos /*-******************************************************* 69*3117ece4Schristos * Random function 70*3117ece4Schristos *********************************************************/ 71*3117ece4Schristos static U32 RAND(U32* src) 72*3117ece4Schristos { 73*3117ece4Schristos #define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r))) 74*3117ece4Schristos static const U32 prime1 = 2654435761U; 75*3117ece4Schristos static const U32 prime2 = 2246822519U; 76*3117ece4Schristos U32 rand32 = *src; 77*3117ece4Schristos rand32 *= prime1; 78*3117ece4Schristos rand32 += prime2; 79*3117ece4Schristos rand32 = RAND_rotl32(rand32, 13); 80*3117ece4Schristos *src = rand32; 81*3117ece4Schristos return RAND_rotl32(rand32, 27); 82*3117ece4Schristos #undef RAND_rotl32 83*3117ece4Schristos } 84*3117ece4Schristos 85*3117ece4Schristos #define DISTSIZE (8192) 86*3117ece4Schristos 87*3117ece4Schristos /* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */ 88*3117ece4Schristos static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb) 89*3117ece4Schristos { 90*3117ece4Schristos size_t i; 91*3117ece4Schristos BYTE* op = ptr; 92*3117ece4Schristos 93*3117ece4Schristos for (i = 0; i < size; i++) { 94*3117ece4Schristos op[i] = (BYTE) (RAND(seed) % (maxSymb + 1)); 95*3117ece4Schristos } 96*3117ece4Schristos } 97*3117ece4Schristos 98*3117ece4Schristos /* Write `size` random bytes into `ptr` */ 99*3117ece4Schristos static void RAND_buffer(U32* seed, void* ptr, size_t size) 100*3117ece4Schristos { 101*3117ece4Schristos size_t i; 102*3117ece4Schristos BYTE* op = ptr; 103*3117ece4Schristos 104*3117ece4Schristos for (i = 0; i + 4 <= size; i += 4) { 105*3117ece4Schristos MEM_writeLE32(op + i, RAND(seed)); 106*3117ece4Schristos } 107*3117ece4Schristos for (; i < size; i++) { 108*3117ece4Schristos op[i] = RAND(seed) & 0xff; 109*3117ece4Schristos } 110*3117ece4Schristos } 111*3117ece4Schristos 112*3117ece4Schristos /* Write `size` bytes into `ptr` following the distribution `dist` */ 113*3117ece4Schristos static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size) 114*3117ece4Schristos { 115*3117ece4Schristos size_t i; 116*3117ece4Schristos BYTE* op = ptr; 117*3117ece4Schristos 118*3117ece4Schristos for (i = 0; i < size; i++) { 119*3117ece4Schristos op[i] = dist[RAND(seed) % DISTSIZE]; 120*3117ece4Schristos } 121*3117ece4Schristos } 122*3117ece4Schristos 123*3117ece4Schristos /* Generate a random distribution where the frequency of each symbol follows a 124*3117ece4Schristos * geometric distribution defined by `weight` 125*3117ece4Schristos * `dist` should have size at least `DISTSIZE` */ 126*3117ece4Schristos static void RAND_genDist(U32* seed, BYTE* dist, double weight) 127*3117ece4Schristos { 128*3117ece4Schristos size_t i = 0; 129*3117ece4Schristos size_t statesLeft = DISTSIZE; 130*3117ece4Schristos BYTE symb = (BYTE) (RAND(seed) % 256); 131*3117ece4Schristos BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */ 132*3117ece4Schristos 133*3117ece4Schristos while (i < DISTSIZE) { 134*3117ece4Schristos size_t states = ((size_t)(weight * (double)statesLeft)) + 1; 135*3117ece4Schristos size_t j; 136*3117ece4Schristos for (j = 0; j < states && i < DISTSIZE; j++, i++) { 137*3117ece4Schristos dist[i] = symb; 138*3117ece4Schristos } 139*3117ece4Schristos 140*3117ece4Schristos symb += step; 141*3117ece4Schristos statesLeft -= states; 142*3117ece4Schristos } 143*3117ece4Schristos } 144*3117ece4Schristos 145*3117ece4Schristos /* Generates a random number in the range [min, max) */ 146*3117ece4Schristos static inline U32 RAND_range(U32* seed, U32 min, U32 max) 147*3117ece4Schristos { 148*3117ece4Schristos return (RAND(seed) % (max-min)) + min; 149*3117ece4Schristos } 150*3117ece4Schristos 151*3117ece4Schristos #define ROUND(x) ((U32)(x + 0.5)) 152*3117ece4Schristos 153*3117ece4Schristos /* Generates a random number in an exponential distribution with mean `mean` */ 154*3117ece4Schristos static double RAND_exp(U32* seed, double mean) 155*3117ece4Schristos { 156*3117ece4Schristos double const u = RAND(seed) / (double) UINT_MAX; 157*3117ece4Schristos return log(1-u) * (-mean); 158*3117ece4Schristos } 159*3117ece4Schristos 160*3117ece4Schristos /*-******************************************************* 161*3117ece4Schristos * Constants and Structs 162*3117ece4Schristos *********************************************************/ 163*3117ece4Schristos const char* BLOCK_TYPES[] = {"raw", "rle", "compressed"}; 164*3117ece4Schristos 165*3117ece4Schristos #define MAX_DECOMPRESSED_SIZE_LOG 20 166*3117ece4Schristos #define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG) 167*3117ece4Schristos 168*3117ece4Schristos #define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */ 169*3117ece4Schristos 170*3117ece4Schristos #define MIN_SEQ_LEN (3) 171*3117ece4Schristos #define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN) 172*3117ece4Schristos 173*3117ece4Schristos #ifndef MAX_PATH 174*3117ece4Schristos #ifdef PATH_MAX 175*3117ece4Schristos #define MAX_PATH PATH_MAX 176*3117ece4Schristos #else 177*3117ece4Schristos #define MAX_PATH 256 178*3117ece4Schristos #endif 179*3117ece4Schristos #endif 180*3117ece4Schristos 181*3117ece4Schristos BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE]; 182*3117ece4Schristos BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2]; 183*3117ece4Schristos BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; 184*3117ece4Schristos 185*3117ece4Schristos seqDef SEQUENCE_BUFFER[MAX_NB_SEQ]; 186*3117ece4Schristos BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */ 187*3117ece4Schristos BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX]; 188*3117ece4Schristos BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX]; 189*3117ece4Schristos BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX]; 190*3117ece4Schristos 191*3117ece4Schristos U64 WKSP[HUF_WORKSPACE_SIZE_U64]; 192*3117ece4Schristos 193*3117ece4Schristos typedef struct { 194*3117ece4Schristos size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */ 195*3117ece4Schristos unsigned windowSize; /* contentSize >= windowSize means single segment */ 196*3117ece4Schristos } frameHeader_t; 197*3117ece4Schristos 198*3117ece4Schristos /* For repeat modes */ 199*3117ece4Schristos typedef struct { 200*3117ece4Schristos U32 rep[ZSTD_REP_NUM]; 201*3117ece4Schristos 202*3117ece4Schristos int hufInit; 203*3117ece4Schristos /* the distribution used in the previous block for repeat mode */ 204*3117ece4Schristos BYTE hufDist[DISTSIZE]; 205*3117ece4Schristos HUF_CElt hufTable [HUF_CTABLE_SIZE_ST(255)]; 206*3117ece4Schristos 207*3117ece4Schristos int fseInit; 208*3117ece4Schristos FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; 209*3117ece4Schristos FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; 210*3117ece4Schristos FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; 211*3117ece4Schristos 212*3117ece4Schristos /* Symbols that were present in the previous distribution, for use with 213*3117ece4Schristos * set_repeat */ 214*3117ece4Schristos BYTE litlengthSymbolSet[36]; 215*3117ece4Schristos BYTE offsetSymbolSet[29]; 216*3117ece4Schristos BYTE matchlengthSymbolSet[53]; 217*3117ece4Schristos } cblockStats_t; 218*3117ece4Schristos 219*3117ece4Schristos typedef struct { 220*3117ece4Schristos void* data; 221*3117ece4Schristos void* dataStart; 222*3117ece4Schristos void* dataEnd; 223*3117ece4Schristos 224*3117ece4Schristos void* src; 225*3117ece4Schristos void* srcStart; 226*3117ece4Schristos void* srcEnd; 227*3117ece4Schristos 228*3117ece4Schristos frameHeader_t header; 229*3117ece4Schristos 230*3117ece4Schristos cblockStats_t stats; 231*3117ece4Schristos cblockStats_t oldStats; /* so they can be rolled back if uncompressible */ 232*3117ece4Schristos } frame_t; 233*3117ece4Schristos 234*3117ece4Schristos typedef struct { 235*3117ece4Schristos int useDict; 236*3117ece4Schristos U32 dictID; 237*3117ece4Schristos size_t dictContentSize; 238*3117ece4Schristos BYTE* dictContent; 239*3117ece4Schristos } dictInfo; 240*3117ece4Schristos 241*3117ece4Schristos typedef enum { 242*3117ece4Schristos gt_frame = 0, /* generate frames */ 243*3117ece4Schristos gt_block, /* generate compressed blocks without block/frame headers */ 244*3117ece4Schristos } genType_e; 245*3117ece4Schristos 246*3117ece4Schristos #ifndef MIN 247*3117ece4Schristos #define MIN(a, b) ((a) < (b) ? (a) : (b)) 248*3117ece4Schristos #endif 249*3117ece4Schristos 250*3117ece4Schristos /*-******************************************************* 251*3117ece4Schristos * Global variables (set from command line) 252*3117ece4Schristos *********************************************************/ 253*3117ece4Schristos U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG; /* <= 20 */ 254*3117ece4Schristos U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */ 255*3117ece4Schristos 256*3117ece4Schristos /*-******************************************************* 257*3117ece4Schristos * Generator Functions 258*3117ece4Schristos *********************************************************/ 259*3117ece4Schristos 260*3117ece4Schristos struct { 261*3117ece4Schristos int contentSize; /* force the content size to be present */ 262*3117ece4Schristos } opts; /* advanced options on generation */ 263*3117ece4Schristos 264*3117ece4Schristos /* Generate and write a random frame header */ 265*3117ece4Schristos static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info) 266*3117ece4Schristos { 267*3117ece4Schristos BYTE* const op = frame->data; 268*3117ece4Schristos size_t pos = 0; 269*3117ece4Schristos frameHeader_t fh; 270*3117ece4Schristos 271*3117ece4Schristos BYTE windowByte = 0; 272*3117ece4Schristos 273*3117ece4Schristos int singleSegment = 0; 274*3117ece4Schristos int contentSizeFlag = 0; 275*3117ece4Schristos int fcsCode = 0; 276*3117ece4Schristos 277*3117ece4Schristos memset(&fh, 0, sizeof(fh)); 278*3117ece4Schristos 279*3117ece4Schristos /* generate window size */ 280*3117ece4Schristos { 281*3117ece4Schristos /* Follow window algorithm from specification */ 282*3117ece4Schristos int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10); 283*3117ece4Schristos int const mantissa = RAND(seed) % 8; 284*3117ece4Schristos windowByte = (BYTE) ((exponent << 3) | mantissa); 285*3117ece4Schristos fh.windowSize = (1U << (exponent + 10)); 286*3117ece4Schristos fh.windowSize += fh.windowSize / 8 * mantissa; 287*3117ece4Schristos } 288*3117ece4Schristos 289*3117ece4Schristos { 290*3117ece4Schristos /* Generate random content size */ 291*3117ece4Schristos size_t highBit; 292*3117ece4Schristos if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) { 293*3117ece4Schristos /* do content of at least 128 bytes */ 294*3117ece4Schristos highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog); 295*3117ece4Schristos } else if (RAND(seed) & 3) { 296*3117ece4Schristos /* do small content */ 297*3117ece4Schristos highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog)); 298*3117ece4Schristos } else { 299*3117ece4Schristos /* 0 size frame */ 300*3117ece4Schristos highBit = 0; 301*3117ece4Schristos } 302*3117ece4Schristos fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0; 303*3117ece4Schristos 304*3117ece4Schristos /* provide size sometimes */ 305*3117ece4Schristos contentSizeFlag = opts.contentSize | (RAND(seed) & 1); 306*3117ece4Schristos 307*3117ece4Schristos if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) { 308*3117ece4Schristos /* do single segment sometimes */ 309*3117ece4Schristos fh.windowSize = (U32) fh.contentSize; 310*3117ece4Schristos singleSegment = 1; 311*3117ece4Schristos } 312*3117ece4Schristos } 313*3117ece4Schristos 314*3117ece4Schristos if (contentSizeFlag) { 315*3117ece4Schristos /* Determine how large fcs field has to be */ 316*3117ece4Schristos int minFcsCode = (fh.contentSize >= 256) + 317*3117ece4Schristos (fh.contentSize >= 65536 + 256) + 318*3117ece4Schristos (fh.contentSize > 0xFFFFFFFFU); 319*3117ece4Schristos if (!singleSegment && !minFcsCode) { 320*3117ece4Schristos minFcsCode = 1; 321*3117ece4Schristos } 322*3117ece4Schristos fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode)); 323*3117ece4Schristos if (fcsCode == 1 && fh.contentSize < 256) fcsCode++; 324*3117ece4Schristos } 325*3117ece4Schristos 326*3117ece4Schristos /* write out the header */ 327*3117ece4Schristos MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER); 328*3117ece4Schristos pos += 4; 329*3117ece4Schristos 330*3117ece4Schristos { 331*3117ece4Schristos /* 332*3117ece4Schristos * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6) 333*3117ece4Schristos * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5) 334*3117ece4Schristos * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2) 335*3117ece4Schristos * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0) 336*3117ece4Schristos * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header 337*3117ece4Schristos */ 338*3117ece4Schristos int const dictBits = info.useDict ? 3 : 0; 339*3117ece4Schristos BYTE const frameHeaderDescriptor = 340*3117ece4Schristos (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits); 341*3117ece4Schristos op[pos++] = frameHeaderDescriptor; 342*3117ece4Schristos } 343*3117ece4Schristos 344*3117ece4Schristos if (!singleSegment) { 345*3117ece4Schristos op[pos++] = windowByte; 346*3117ece4Schristos } 347*3117ece4Schristos if (info.useDict) { 348*3117ece4Schristos MEM_writeLE32(op + pos, (U32) info.dictID); 349*3117ece4Schristos pos += 4; 350*3117ece4Schristos } 351*3117ece4Schristos if (contentSizeFlag) { 352*3117ece4Schristos switch (fcsCode) { 353*3117ece4Schristos default: /* Impossible */ 354*3117ece4Schristos case 0: op[pos++] = (BYTE) fh.contentSize; break; 355*3117ece4Schristos case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break; 356*3117ece4Schristos case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break; 357*3117ece4Schristos case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break; 358*3117ece4Schristos } 359*3117ece4Schristos } 360*3117ece4Schristos 361*3117ece4Schristos DISPLAYLEVEL(3, " frame content size:\t%u\n", (unsigned)fh.contentSize); 362*3117ece4Schristos DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize); 363*3117ece4Schristos DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag); 364*3117ece4Schristos DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment); 365*3117ece4Schristos 366*3117ece4Schristos frame->data = op + pos; 367*3117ece4Schristos frame->header = fh; 368*3117ece4Schristos } 369*3117ece4Schristos 370*3117ece4Schristos /* Write a literal block in either raw or RLE form, return the literals size */ 371*3117ece4Schristos static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize) 372*3117ece4Schristos { 373*3117ece4Schristos BYTE* op = (BYTE*)frame->data; 374*3117ece4Schristos int const type = RAND(seed) % 2; 375*3117ece4Schristos int const sizeFormatDesc = RAND(seed) % 8; 376*3117ece4Schristos size_t litSize; 377*3117ece4Schristos size_t maxLitSize = MIN(contentSize, g_maxBlockSize); 378*3117ece4Schristos 379*3117ece4Schristos if (sizeFormatDesc == 0) { 380*3117ece4Schristos /* Size_FormatDesc = ?0 */ 381*3117ece4Schristos maxLitSize = MIN(maxLitSize, 31); 382*3117ece4Schristos } else if (sizeFormatDesc <= 4) { 383*3117ece4Schristos /* Size_FormatDesc = 01 */ 384*3117ece4Schristos maxLitSize = MIN(maxLitSize, 4095); 385*3117ece4Schristos } else { 386*3117ece4Schristos /* Size_Format = 11 */ 387*3117ece4Schristos maxLitSize = MIN(maxLitSize, 1048575); 388*3117ece4Schristos } 389*3117ece4Schristos 390*3117ece4Schristos litSize = RAND(seed) % (maxLitSize + 1); 391*3117ece4Schristos if (frame->src == frame->srcStart && litSize == 0) { 392*3117ece4Schristos litSize = 1; /* no empty literals if there's nothing preceding this block */ 393*3117ece4Schristos } 394*3117ece4Schristos if (litSize + 3 > contentSize) { 395*3117ece4Schristos litSize = contentSize; /* no matches shorter than 3 are allowed */ 396*3117ece4Schristos } 397*3117ece4Schristos /* use smallest size format that fits */ 398*3117ece4Schristos if (litSize < 32) { 399*3117ece4Schristos op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff; 400*3117ece4Schristos op += 1; 401*3117ece4Schristos } else if (litSize < 4096) { 402*3117ece4Schristos op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff; 403*3117ece4Schristos op[1] = (litSize >> 4) & 0xff; 404*3117ece4Schristos op += 2; 405*3117ece4Schristos } else { 406*3117ece4Schristos op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff; 407*3117ece4Schristos op[1] = (litSize >> 4) & 0xff; 408*3117ece4Schristos op[2] = (litSize >> 12) & 0xff; 409*3117ece4Schristos op += 3; 410*3117ece4Schristos } 411*3117ece4Schristos 412*3117ece4Schristos if (type == 0) { 413*3117ece4Schristos /* Raw literals */ 414*3117ece4Schristos DISPLAYLEVEL(4, " raw literals\n"); 415*3117ece4Schristos 416*3117ece4Schristos RAND_buffer(seed, LITERAL_BUFFER, litSize); 417*3117ece4Schristos memcpy(op, LITERAL_BUFFER, litSize); 418*3117ece4Schristos op += litSize; 419*3117ece4Schristos } else { 420*3117ece4Schristos /* RLE literals */ 421*3117ece4Schristos BYTE const symb = (BYTE) (RAND(seed) % 256); 422*3117ece4Schristos 423*3117ece4Schristos DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (unsigned)symb); 424*3117ece4Schristos 425*3117ece4Schristos memset(LITERAL_BUFFER, symb, litSize); 426*3117ece4Schristos op[0] = symb; 427*3117ece4Schristos op++; 428*3117ece4Schristos } 429*3117ece4Schristos 430*3117ece4Schristos frame->data = op; 431*3117ece4Schristos 432*3117ece4Schristos return litSize; 433*3117ece4Schristos } 434*3117ece4Schristos 435*3117ece4Schristos /* Generate a Huffman header for the given source */ 436*3117ece4Schristos static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize, 437*3117ece4Schristos const void* src, size_t srcSize) 438*3117ece4Schristos { 439*3117ece4Schristos BYTE* const ostart = (BYTE*)dst; 440*3117ece4Schristos BYTE* op = ostart; 441*3117ece4Schristos 442*3117ece4Schristos unsigned huffLog = 11; 443*3117ece4Schristos unsigned maxSymbolValue = 255; 444*3117ece4Schristos 445*3117ece4Schristos unsigned count[HUF_SYMBOLVALUE_MAX+1]; 446*3117ece4Schristos 447*3117ece4Schristos /* Scan input and build symbol stats */ 448*3117ece4Schristos { size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP, sizeof(WKSP)); 449*3117ece4Schristos assert(!HIST_isError(largest)); 450*3117ece4Schristos if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */ 451*3117ece4Schristos if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ 452*3117ece4Schristos } 453*3117ece4Schristos 454*3117ece4Schristos /* Build Huffman Tree */ 455*3117ece4Schristos /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */ 456*3117ece4Schristos huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1); 457*3117ece4Schristos DISPLAYLEVEL(6, " huffman log: %u\n", huffLog); 458*3117ece4Schristos { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); 459*3117ece4Schristos CHECKERR(maxBits); 460*3117ece4Schristos huffLog = (U32)maxBits; 461*3117ece4Schristos } 462*3117ece4Schristos 463*3117ece4Schristos /* Write table description header */ 464*3117ece4Schristos { size_t const hSize = HUF_writeCTable_wksp (op, dstSize, hufTable, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); 465*3117ece4Schristos if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ 466*3117ece4Schristos op += hSize; 467*3117ece4Schristos } 468*3117ece4Schristos 469*3117ece4Schristos return op - ostart; 470*3117ece4Schristos } 471*3117ece4Schristos 472*3117ece4Schristos /* Write a Huffman coded literals block and return the literals size */ 473*3117ece4Schristos static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize) 474*3117ece4Schristos { 475*3117ece4Schristos BYTE* origop = (BYTE*)frame->data; 476*3117ece4Schristos BYTE* opend = (BYTE*)frame->dataEnd; 477*3117ece4Schristos BYTE* op; 478*3117ece4Schristos BYTE* const ostart = origop; 479*3117ece4Schristos int const sizeFormat = RAND(seed) % 4; 480*3117ece4Schristos size_t litSize; 481*3117ece4Schristos size_t hufHeaderSize = 0; 482*3117ece4Schristos size_t compressedSize = 0; 483*3117ece4Schristos size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize); 484*3117ece4Schristos 485*3117ece4Schristos symbolEncodingType_e hType; 486*3117ece4Schristos 487*3117ece4Schristos if (contentSize < 64) { 488*3117ece4Schristos /* make sure we get reasonably-sized literals for compression */ 489*3117ece4Schristos return ERROR(GENERIC); 490*3117ece4Schristos } 491*3117ece4Schristos 492*3117ece4Schristos DISPLAYLEVEL(4, " compressed literals\n"); 493*3117ece4Schristos 494*3117ece4Schristos switch (sizeFormat) { 495*3117ece4Schristos case 0: /* fall through, size is the same as case 1 */ 496*3117ece4Schristos case 1: 497*3117ece4Schristos maxLitSize = MIN(maxLitSize, 1023); 498*3117ece4Schristos origop += 3; 499*3117ece4Schristos break; 500*3117ece4Schristos case 2: 501*3117ece4Schristos maxLitSize = MIN(maxLitSize, 16383); 502*3117ece4Schristos origop += 4; 503*3117ece4Schristos break; 504*3117ece4Schristos case 3: 505*3117ece4Schristos maxLitSize = MIN(maxLitSize, 262143); 506*3117ece4Schristos origop += 5; 507*3117ece4Schristos break; 508*3117ece4Schristos default:; /* impossible */ 509*3117ece4Schristos } 510*3117ece4Schristos 511*3117ece4Schristos do { 512*3117ece4Schristos op = origop; 513*3117ece4Schristos do { 514*3117ece4Schristos litSize = RAND(seed) % (maxLitSize + 1); 515*3117ece4Schristos } while (litSize < 32); /* avoid small literal sizes */ 516*3117ece4Schristos if (litSize + 3 > contentSize) { 517*3117ece4Schristos litSize = contentSize; /* no matches shorter than 3 are allowed */ 518*3117ece4Schristos } 519*3117ece4Schristos 520*3117ece4Schristos /* most of the time generate a new distribution */ 521*3117ece4Schristos if ((RAND(seed) & 3) || !frame->stats.hufInit) { 522*3117ece4Schristos do { 523*3117ece4Schristos if (RAND(seed) & 3) { 524*3117ece4Schristos /* add 10 to ensure some compressibility */ 525*3117ece4Schristos double const weight = ((RAND(seed) % 90) + 10) / 100.0; 526*3117ece4Schristos 527*3117ece4Schristos DISPLAYLEVEL(5, " distribution weight: %d%%\n", 528*3117ece4Schristos (int)(weight * 100)); 529*3117ece4Schristos 530*3117ece4Schristos RAND_genDist(seed, frame->stats.hufDist, weight); 531*3117ece4Schristos } else { 532*3117ece4Schristos /* sometimes do restricted range literals to force 533*3117ece4Schristos * non-huffman headers */ 534*3117ece4Schristos DISPLAYLEVEL(5, " small range literals\n"); 535*3117ece4Schristos RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE, 536*3117ece4Schristos 15); 537*3117ece4Schristos } 538*3117ece4Schristos RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, 539*3117ece4Schristos litSize); 540*3117ece4Schristos 541*3117ece4Schristos /* generate the header from the distribution instead of the 542*3117ece4Schristos * actual data to avoid bugs with symbols that were in the 543*3117ece4Schristos * distribution but never showed up in the output */ 544*3117ece4Schristos hufHeaderSize = writeHufHeader( 545*3117ece4Schristos seed, frame->stats.hufTable, op, opend - op, 546*3117ece4Schristos frame->stats.hufDist, DISTSIZE); 547*3117ece4Schristos CHECKERR(hufHeaderSize); 548*3117ece4Schristos /* repeat until a valid header is written */ 549*3117ece4Schristos } while (hufHeaderSize == 0); 550*3117ece4Schristos op += hufHeaderSize; 551*3117ece4Schristos hType = set_compressed; 552*3117ece4Schristos 553*3117ece4Schristos frame->stats.hufInit = 1; 554*3117ece4Schristos } else { 555*3117ece4Schristos /* repeat the distribution/table from last time */ 556*3117ece4Schristos DISPLAYLEVEL(5, " huffman repeat stats\n"); 557*3117ece4Schristos RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, 558*3117ece4Schristos litSize); 559*3117ece4Schristos hufHeaderSize = 0; 560*3117ece4Schristos hType = set_repeat; 561*3117ece4Schristos } 562*3117ece4Schristos 563*3117ece4Schristos do { 564*3117ece4Schristos compressedSize = 565*3117ece4Schristos sizeFormat == 0 566*3117ece4Schristos ? HUF_compress1X_usingCTable( 567*3117ece4Schristos op, opend - op, LITERAL_BUFFER, litSize, 568*3117ece4Schristos frame->stats.hufTable, /* flags */ 0) 569*3117ece4Schristos : HUF_compress4X_usingCTable( 570*3117ece4Schristos op, opend - op, LITERAL_BUFFER, litSize, 571*3117ece4Schristos frame->stats.hufTable, /* flags */ 0); 572*3117ece4Schristos CHECKERR(compressedSize); 573*3117ece4Schristos /* this only occurs when it could not compress or similar */ 574*3117ece4Schristos } while (compressedSize <= 0); 575*3117ece4Schristos 576*3117ece4Schristos op += compressedSize; 577*3117ece4Schristos 578*3117ece4Schristos compressedSize += hufHeaderSize; 579*3117ece4Schristos DISPLAYLEVEL(5, " regenerated size: %u\n", (unsigned)litSize); 580*3117ece4Schristos DISPLAYLEVEL(5, " compressed size: %u\n", (unsigned)compressedSize); 581*3117ece4Schristos if (compressedSize >= litSize) { 582*3117ece4Schristos DISPLAYLEVEL(5, " trying again\n"); 583*3117ece4Schristos /* if we have to try again, reset the stats so we don't accidentally 584*3117ece4Schristos * try to repeat a distribution we just made */ 585*3117ece4Schristos frame->stats = frame->oldStats; 586*3117ece4Schristos } else { 587*3117ece4Schristos break; 588*3117ece4Schristos } 589*3117ece4Schristos } while (1); 590*3117ece4Schristos 591*3117ece4Schristos /* write header */ 592*3117ece4Schristos switch (sizeFormat) { 593*3117ece4Schristos case 0: /* fall through, size is the same as case 1 */ 594*3117ece4Schristos case 1: { 595*3117ece4Schristos U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | 596*3117ece4Schristos ((U32)compressedSize << 14); 597*3117ece4Schristos MEM_writeLE24(ostart, header); 598*3117ece4Schristos break; 599*3117ece4Schristos } 600*3117ece4Schristos case 2: { 601*3117ece4Schristos U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | 602*3117ece4Schristos ((U32)compressedSize << 18); 603*3117ece4Schristos MEM_writeLE32(ostart, header); 604*3117ece4Schristos break; 605*3117ece4Schristos } 606*3117ece4Schristos case 3: { 607*3117ece4Schristos U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | 608*3117ece4Schristos ((U32)compressedSize << 22); 609*3117ece4Schristos MEM_writeLE32(ostart, header); 610*3117ece4Schristos ostart[4] = (BYTE)(compressedSize >> 10); 611*3117ece4Schristos break; 612*3117ece4Schristos } 613*3117ece4Schristos default:; /* impossible */ 614*3117ece4Schristos } 615*3117ece4Schristos 616*3117ece4Schristos frame->data = op; 617*3117ece4Schristos return litSize; 618*3117ece4Schristos } 619*3117ece4Schristos 620*3117ece4Schristos static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize) 621*3117ece4Schristos { 622*3117ece4Schristos /* only do compressed for larger segments to avoid compressibility issues */ 623*3117ece4Schristos if (RAND(seed) & 7 && contentSize >= 64) { 624*3117ece4Schristos return writeLiteralsBlockCompressed(seed, frame, contentSize); 625*3117ece4Schristos } else { 626*3117ece4Schristos return writeLiteralsBlockSimple(seed, frame, contentSize); 627*3117ece4Schristos } 628*3117ece4Schristos } 629*3117ece4Schristos 630*3117ece4Schristos static inline void initSeqStore(seqStore_t *seqStore) { 631*3117ece4Schristos seqStore->maxNbSeq = MAX_NB_SEQ; 632*3117ece4Schristos seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX; 633*3117ece4Schristos seqStore->sequencesStart = SEQUENCE_BUFFER; 634*3117ece4Schristos seqStore->litStart = SEQUENCE_LITERAL_BUFFER; 635*3117ece4Schristos seqStore->llCode = SEQUENCE_LLCODE; 636*3117ece4Schristos seqStore->mlCode = SEQUENCE_MLCODE; 637*3117ece4Schristos seqStore->ofCode = SEQUENCE_OFCODE; 638*3117ece4Schristos 639*3117ece4Schristos ZSTD_resetSeqStore(seqStore); 640*3117ece4Schristos } 641*3117ece4Schristos 642*3117ece4Schristos /* Randomly generate sequence commands */ 643*3117ece4Schristos static U32 644*3117ece4Schristos generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, 645*3117ece4Schristos size_t contentSize, size_t literalsSize, dictInfo info) 646*3117ece4Schristos { 647*3117ece4Schristos /* The total length of all the matches */ 648*3117ece4Schristos size_t const remainingMatch = contentSize - literalsSize; 649*3117ece4Schristos size_t excessMatch = 0; 650*3117ece4Schristos U32 numSequences = 0; 651*3117ece4Schristos U32 i; 652*3117ece4Schristos 653*3117ece4Schristos const BYTE* literals = LITERAL_BUFFER; 654*3117ece4Schristos BYTE* srcPtr = frame->src; 655*3117ece4Schristos 656*3117ece4Schristos if (literalsSize != contentSize) { 657*3117ece4Schristos /* each match must be at least MIN_SEQ_LEN, so this is the maximum 658*3117ece4Schristos * number of sequences we can have */ 659*3117ece4Schristos U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN; 660*3117ece4Schristos numSequences = (RAND(seed) % maxSequences) + 1; 661*3117ece4Schristos 662*3117ece4Schristos /* the extra match lengths we have to allocate to each sequence */ 663*3117ece4Schristos excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN; 664*3117ece4Schristos } 665*3117ece4Schristos 666*3117ece4Schristos DISPLAYLEVEL(5, " total match lengths: %u\n", (unsigned)remainingMatch); 667*3117ece4Schristos for (i = 0; i < numSequences; i++) { 668*3117ece4Schristos /* Generate match and literal lengths by exponential distribution to 669*3117ece4Schristos * ensure nice numbers */ 670*3117ece4Schristos U32 matchLen = 671*3117ece4Schristos MIN_SEQ_LEN + 672*3117ece4Schristos ROUND(RAND_exp(seed, (double)excessMatch / (double)(numSequences - i))); 673*3117ece4Schristos U32 literalLen = 674*3117ece4Schristos (RAND(seed) & 7) 675*3117ece4Schristos ? ROUND(RAND_exp(seed, 676*3117ece4Schristos (double)literalsSize / 677*3117ece4Schristos (double)(numSequences - i))) 678*3117ece4Schristos : 0; 679*3117ece4Schristos /* actual offset, code to send, and point to copy up to when shifting 680*3117ece4Schristos * codes in the repeat offsets history */ 681*3117ece4Schristos U32 offset, offBase, repIndex; 682*3117ece4Schristos 683*3117ece4Schristos /* bounds checks */ 684*3117ece4Schristos matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN); 685*3117ece4Schristos literalLen = MIN(literalLen, (U32) literalsSize); 686*3117ece4Schristos if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1; 687*3117ece4Schristos if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch; 688*3117ece4Schristos 689*3117ece4Schristos memcpy(srcPtr, literals, literalLen); 690*3117ece4Schristos srcPtr += literalLen; 691*3117ece4Schristos do { 692*3117ece4Schristos if (RAND(seed) & 7) { 693*3117ece4Schristos /* do a normal offset */ 694*3117ece4Schristos U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart); 695*3117ece4Schristos offset = (RAND(seed) % 696*3117ece4Schristos MIN(frame->header.windowSize, 697*3117ece4Schristos (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) + 698*3117ece4Schristos 1; 699*3117ece4Schristos if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) { 700*3117ece4Schristos /* need to occasionally generate offsets that go past the start */ 701*3117ece4Schristos /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */ 702*3117ece4Schristos U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1; 703*3117ece4Schristos offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart; 704*3117ece4Schristos if (offset > frame->header.windowSize) { 705*3117ece4Schristos if (lenPastStart < MIN_SEQ_LEN) { 706*3117ece4Schristos /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */ 707*3117ece4Schristos /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */ 708*3117ece4Schristos /* make sure lenPastStart does not go past dictionary start though */ 709*3117ece4Schristos lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize); 710*3117ece4Schristos offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart; 711*3117ece4Schristos } 712*3117ece4Schristos { U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart); 713*3117ece4Schristos matchLen = MIN(matchLen, matchLenBound); 714*3117ece4Schristos } 715*3117ece4Schristos } 716*3117ece4Schristos } 717*3117ece4Schristos offBase = OFFSET_TO_OFFBASE(offset); 718*3117ece4Schristos repIndex = 2; 719*3117ece4Schristos } else { 720*3117ece4Schristos /* do a repeat offset */ 721*3117ece4Schristos U32 const randomRepIndex = RAND(seed) % 3; 722*3117ece4Schristos offBase = REPCODE_TO_OFFBASE(randomRepIndex + 1); /* expects values between 1 & 3 */ 723*3117ece4Schristos if (literalLen > 0) { 724*3117ece4Schristos offset = frame->stats.rep[randomRepIndex]; 725*3117ece4Schristos repIndex = randomRepIndex; 726*3117ece4Schristos } else { 727*3117ece4Schristos /* special case : literalLen == 0 */ 728*3117ece4Schristos offset = randomRepIndex == 2 ? frame->stats.rep[0] - 1 729*3117ece4Schristos : frame->stats.rep[randomRepIndex + 1]; 730*3117ece4Schristos repIndex = MIN(2, randomRepIndex + 1); 731*3117ece4Schristos } 732*3117ece4Schristos } 733*3117ece4Schristos } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0); 734*3117ece4Schristos 735*3117ece4Schristos { BYTE* const dictEnd = ZSTD_maybeNullPtrAdd(info.dictContent, info.dictContentSize); 736*3117ece4Schristos size_t j; 737*3117ece4Schristos for (j = 0; j < matchLen; j++) { 738*3117ece4Schristos if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) { 739*3117ece4Schristos /* copy from dictionary instead of literals */ 740*3117ece4Schristos size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart); 741*3117ece4Schristos *srcPtr = *(dictEnd - dictOffset); 742*3117ece4Schristos } 743*3117ece4Schristos else { 744*3117ece4Schristos *srcPtr = *(srcPtr-offset); 745*3117ece4Schristos } 746*3117ece4Schristos srcPtr++; 747*3117ece4Schristos } } 748*3117ece4Schristos 749*3117ece4Schristos { int r; 750*3117ece4Schristos for (r = repIndex; r > 0; r--) { 751*3117ece4Schristos frame->stats.rep[r] = frame->stats.rep[r - 1]; 752*3117ece4Schristos } 753*3117ece4Schristos frame->stats.rep[0] = offset; 754*3117ece4Schristos } 755*3117ece4Schristos 756*3117ece4Schristos DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", 757*3117ece4Schristos (unsigned)literalLen, (unsigned)offset, (unsigned)matchLen); 758*3117ece4Schristos DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u", 759*3117ece4Schristos (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i); 760*3117ece4Schristos DISPLAYLEVEL(6, "\n"); 761*3117ece4Schristos if (OFFBASE_IS_REPCODE(offBase)) { /* expects sumtype numeric representation of ZSTD_storeSeq() */ 762*3117ece4Schristos DISPLAYLEVEL(7, " repeat offset: %d\n", (int)repIndex); 763*3117ece4Schristos } 764*3117ece4Schristos /* use libzstd sequence handling */ 765*3117ece4Schristos ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen, 766*3117ece4Schristos offBase, matchLen); 767*3117ece4Schristos 768*3117ece4Schristos literalsSize -= literalLen; 769*3117ece4Schristos excessMatch -= (matchLen - MIN_SEQ_LEN); 770*3117ece4Schristos literals += literalLen; 771*3117ece4Schristos } 772*3117ece4Schristos 773*3117ece4Schristos memcpy(srcPtr, literals, literalsSize); 774*3117ece4Schristos srcPtr += literalsSize; 775*3117ece4Schristos DISPLAYLEVEL(6, " excess literals: %5u ", (unsigned)literalsSize); 776*3117ece4Schristos DISPLAYLEVEL(7, "srcPos: %8u ", (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart)); 777*3117ece4Schristos DISPLAYLEVEL(6, "\n"); 778*3117ece4Schristos 779*3117ece4Schristos return numSequences; 780*3117ece4Schristos } 781*3117ece4Schristos 782*3117ece4Schristos static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue) 783*3117ece4Schristos { 784*3117ece4Schristos size_t i; 785*3117ece4Schristos 786*3117ece4Schristos memset(set, 0, (size_t)maxSymbolValue+1); 787*3117ece4Schristos 788*3117ece4Schristos for (i = 0; i < len; i++) { 789*3117ece4Schristos set[symbols[i]] = 1; 790*3117ece4Schristos } 791*3117ece4Schristos } 792*3117ece4Schristos 793*3117ece4Schristos static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue) 794*3117ece4Schristos { 795*3117ece4Schristos size_t i; 796*3117ece4Schristos 797*3117ece4Schristos for (i = 0; i < len; i++) { 798*3117ece4Schristos if (symbols[i] > maxSymbolValue || !set[symbols[i]]) { 799*3117ece4Schristos return 0; 800*3117ece4Schristos } 801*3117ece4Schristos } 802*3117ece4Schristos return 1; 803*3117ece4Schristos } 804*3117ece4Schristos 805*3117ece4Schristos static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, 806*3117ece4Schristos size_t nbSeq) 807*3117ece4Schristos { 808*3117ece4Schristos /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */ 809*3117ece4Schristos unsigned count[MaxSeq+1]; 810*3117ece4Schristos S16 norm[MaxSeq+1]; 811*3117ece4Schristos FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable; 812*3117ece4Schristos FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable; 813*3117ece4Schristos FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable; 814*3117ece4Schristos U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ 815*3117ece4Schristos const seqDef* const sequences = seqStorePtr->sequencesStart; 816*3117ece4Schristos const BYTE* const ofCodeTable = seqStorePtr->ofCode; 817*3117ece4Schristos const BYTE* const llCodeTable = seqStorePtr->llCode; 818*3117ece4Schristos const BYTE* const mlCodeTable = seqStorePtr->mlCode; 819*3117ece4Schristos BYTE* const oend = (BYTE*)frame->dataEnd; 820*3117ece4Schristos BYTE* op = (BYTE*)frame->data; 821*3117ece4Schristos BYTE* seqHead; 822*3117ece4Schristos BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)]; 823*3117ece4Schristos 824*3117ece4Schristos /* literals compressing block removed so that can be done separately */ 825*3117ece4Schristos 826*3117ece4Schristos /* Sequences Header */ 827*3117ece4Schristos if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); 828*3117ece4Schristos if (nbSeq < 128) *op++ = (BYTE)nbSeq; 829*3117ece4Schristos else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; 830*3117ece4Schristos else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; 831*3117ece4Schristos 832*3117ece4Schristos if (nbSeq==0) { 833*3117ece4Schristos frame->data = op; 834*3117ece4Schristos return 0; 835*3117ece4Schristos } 836*3117ece4Schristos 837*3117ece4Schristos /* seqHead : flags for FSE encoding type */ 838*3117ece4Schristos seqHead = op++; 839*3117ece4Schristos 840*3117ece4Schristos /* convert length/distances into codes */ 841*3117ece4Schristos ZSTD_seqToCodes(seqStorePtr); 842*3117ece4Schristos 843*3117ece4Schristos /* CTable for Literal Lengths */ 844*3117ece4Schristos { unsigned max = MaxLL; 845*3117ece4Schristos size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ 846*3117ece4Schristos assert(!HIST_isError(mostFrequent)); 847*3117ece4Schristos if (frame->stats.fseInit && !(RAND(seed) & 3) && 848*3117ece4Schristos isSymbolSubset(llCodeTable, nbSeq, 849*3117ece4Schristos frame->stats.litlengthSymbolSet, 35)) { 850*3117ece4Schristos /* maybe do repeat mode if we're allowed to */ 851*3117ece4Schristos LLtype = set_repeat; 852*3117ece4Schristos } else if (mostFrequent == nbSeq) { 853*3117ece4Schristos /* do RLE if we have the chance */ 854*3117ece4Schristos *op++ = llCodeTable[0]; 855*3117ece4Schristos FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); 856*3117ece4Schristos LLtype = set_rle; 857*3117ece4Schristos } else if (!(RAND(seed) & 3)) { 858*3117ece4Schristos /* maybe use the default distribution */ 859*3117ece4Schristos CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer))); 860*3117ece4Schristos LLtype = set_basic; 861*3117ece4Schristos } else { 862*3117ece4Schristos /* fall back on a full table */ 863*3117ece4Schristos size_t nbSeq_1 = nbSeq; 864*3117ece4Schristos const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); 865*3117ece4Schristos if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } 866*3117ece4Schristos FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); 867*3117ece4Schristos { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ 868*3117ece4Schristos if (FSE_isError(NCountSize)) return ERROR(GENERIC); 869*3117ece4Schristos op += NCountSize; } 870*3117ece4Schristos CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer))); 871*3117ece4Schristos LLtype = set_compressed; 872*3117ece4Schristos } } 873*3117ece4Schristos 874*3117ece4Schristos /* CTable for Offsets */ 875*3117ece4Schristos /* see Literal Lengths for descriptions of mode choices */ 876*3117ece4Schristos { unsigned max = MaxOff; 877*3117ece4Schristos size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ 878*3117ece4Schristos assert(!HIST_isError(mostFrequent)); 879*3117ece4Schristos if (frame->stats.fseInit && !(RAND(seed) & 3) && 880*3117ece4Schristos isSymbolSubset(ofCodeTable, nbSeq, 881*3117ece4Schristos frame->stats.offsetSymbolSet, 28)) { 882*3117ece4Schristos Offtype = set_repeat; 883*3117ece4Schristos } else if (mostFrequent == nbSeq) { 884*3117ece4Schristos *op++ = ofCodeTable[0]; 885*3117ece4Schristos FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); 886*3117ece4Schristos Offtype = set_rle; 887*3117ece4Schristos } else if (!(RAND(seed) & 3)) { 888*3117ece4Schristos FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); 889*3117ece4Schristos Offtype = set_basic; 890*3117ece4Schristos } else { 891*3117ece4Schristos size_t nbSeq_1 = nbSeq; 892*3117ece4Schristos const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); 893*3117ece4Schristos if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } 894*3117ece4Schristos FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); 895*3117ece4Schristos { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ 896*3117ece4Schristos if (FSE_isError(NCountSize)) return ERROR(GENERIC); 897*3117ece4Schristos op += NCountSize; } 898*3117ece4Schristos FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); 899*3117ece4Schristos Offtype = set_compressed; 900*3117ece4Schristos } } 901*3117ece4Schristos 902*3117ece4Schristos /* CTable for MatchLengths */ 903*3117ece4Schristos /* see Literal Lengths for descriptions of mode choices */ 904*3117ece4Schristos { unsigned max = MaxML; 905*3117ece4Schristos size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ 906*3117ece4Schristos assert(!HIST_isError(mostFrequent)); 907*3117ece4Schristos if (frame->stats.fseInit && !(RAND(seed) & 3) && 908*3117ece4Schristos isSymbolSubset(mlCodeTable, nbSeq, 909*3117ece4Schristos frame->stats.matchlengthSymbolSet, 52)) { 910*3117ece4Schristos MLtype = set_repeat; 911*3117ece4Schristos } else if (mostFrequent == nbSeq) { 912*3117ece4Schristos *op++ = *mlCodeTable; 913*3117ece4Schristos FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); 914*3117ece4Schristos MLtype = set_rle; 915*3117ece4Schristos } else if (!(RAND(seed) & 3)) { 916*3117ece4Schristos /* sometimes do default distribution */ 917*3117ece4Schristos FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); 918*3117ece4Schristos MLtype = set_basic; 919*3117ece4Schristos } else { 920*3117ece4Schristos /* fall back on table */ 921*3117ece4Schristos size_t nbSeq_1 = nbSeq; 922*3117ece4Schristos const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); 923*3117ece4Schristos if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } 924*3117ece4Schristos FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); 925*3117ece4Schristos { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ 926*3117ece4Schristos if (FSE_isError(NCountSize)) return ERROR(GENERIC); 927*3117ece4Schristos op += NCountSize; } 928*3117ece4Schristos FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); 929*3117ece4Schristos MLtype = set_compressed; 930*3117ece4Schristos } } 931*3117ece4Schristos frame->stats.fseInit = 1; 932*3117ece4Schristos initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35); 933*3117ece4Schristos initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28); 934*3117ece4Schristos initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52); 935*3117ece4Schristos 936*3117ece4Schristos DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", (unsigned)LLtype, (unsigned)Offtype, (unsigned)MLtype); 937*3117ece4Schristos 938*3117ece4Schristos *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); 939*3117ece4Schristos 940*3117ece4Schristos /* Encoding Sequences */ 941*3117ece4Schristos { BIT_CStream_t blockStream; 942*3117ece4Schristos FSE_CState_t stateMatchLength; 943*3117ece4Schristos FSE_CState_t stateOffsetBits; 944*3117ece4Schristos FSE_CState_t stateLitLength; 945*3117ece4Schristos 946*3117ece4Schristos RETURN_ERROR_IF( 947*3117ece4Schristos ERR_isError(BIT_initCStream(&blockStream, op, oend-op)), 948*3117ece4Schristos dstSize_tooSmall, "not enough space remaining"); 949*3117ece4Schristos 950*3117ece4Schristos /* first symbols */ 951*3117ece4Schristos FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); 952*3117ece4Schristos FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); 953*3117ece4Schristos FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); 954*3117ece4Schristos BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); 955*3117ece4Schristos if (MEM_32bits()) BIT_flushBits(&blockStream); 956*3117ece4Schristos BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]); 957*3117ece4Schristos if (MEM_32bits()) BIT_flushBits(&blockStream); 958*3117ece4Schristos BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]); 959*3117ece4Schristos BIT_flushBits(&blockStream); 960*3117ece4Schristos 961*3117ece4Schristos { size_t n; 962*3117ece4Schristos for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ 963*3117ece4Schristos BYTE const llCode = llCodeTable[n]; 964*3117ece4Schristos BYTE const ofCode = ofCodeTable[n]; 965*3117ece4Schristos BYTE const mlCode = mlCodeTable[n]; 966*3117ece4Schristos U32 const llBits = LL_bits[llCode]; 967*3117ece4Schristos U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ 968*3117ece4Schristos U32 const mlBits = ML_bits[mlCode]; 969*3117ece4Schristos /* (7)*/ /* (7)*/ 970*3117ece4Schristos FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ 971*3117ece4Schristos FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ 972*3117ece4Schristos if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ 973*3117ece4Schristos FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ 974*3117ece4Schristos if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) 975*3117ece4Schristos BIT_flushBits(&blockStream); /* (7)*/ 976*3117ece4Schristos BIT_addBits(&blockStream, sequences[n].litLength, llBits); 977*3117ece4Schristos if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); 978*3117ece4Schristos BIT_addBits(&blockStream, sequences[n].mlBase, mlBits); 979*3117ece4Schristos if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ 980*3117ece4Schristos BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */ 981*3117ece4Schristos BIT_flushBits(&blockStream); /* (7)*/ 982*3117ece4Schristos } } 983*3117ece4Schristos 984*3117ece4Schristos FSE_flushCState(&blockStream, &stateMatchLength); 985*3117ece4Schristos FSE_flushCState(&blockStream, &stateOffsetBits); 986*3117ece4Schristos FSE_flushCState(&blockStream, &stateLitLength); 987*3117ece4Schristos 988*3117ece4Schristos { size_t const streamSize = BIT_closeCStream(&blockStream); 989*3117ece4Schristos if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ 990*3117ece4Schristos op += streamSize; 991*3117ece4Schristos } } 992*3117ece4Schristos 993*3117ece4Schristos frame->data = op; 994*3117ece4Schristos 995*3117ece4Schristos return 0; 996*3117ece4Schristos } 997*3117ece4Schristos 998*3117ece4Schristos static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize, 999*3117ece4Schristos size_t literalsSize, dictInfo info) 1000*3117ece4Schristos { 1001*3117ece4Schristos seqStore_t seqStore; 1002*3117ece4Schristos size_t numSequences; 1003*3117ece4Schristos 1004*3117ece4Schristos 1005*3117ece4Schristos initSeqStore(&seqStore); 1006*3117ece4Schristos 1007*3117ece4Schristos /* randomly generate sequences */ 1008*3117ece4Schristos numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info); 1009*3117ece4Schristos /* write them out to the frame data */ 1010*3117ece4Schristos CHECKERR(writeSequences(seed, frame, &seqStore, numSequences)); 1011*3117ece4Schristos 1012*3117ece4Schristos return numSequences; 1013*3117ece4Schristos } 1014*3117ece4Schristos 1015*3117ece4Schristos static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info) 1016*3117ece4Schristos { 1017*3117ece4Schristos BYTE* const blockStart = (BYTE*)frame->data; 1018*3117ece4Schristos size_t literalsSize; 1019*3117ece4Schristos size_t nbSeq; 1020*3117ece4Schristos 1021*3117ece4Schristos DISPLAYLEVEL(4, " compressed block:\n"); 1022*3117ece4Schristos 1023*3117ece4Schristos literalsSize = writeLiteralsBlock(seed, frame, contentSize); 1024*3117ece4Schristos 1025*3117ece4Schristos DISPLAYLEVEL(4, " literals size: %u\n", (unsigned)literalsSize); 1026*3117ece4Schristos 1027*3117ece4Schristos nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info); 1028*3117ece4Schristos 1029*3117ece4Schristos DISPLAYLEVEL(4, " number of sequences: %u\n", (unsigned)nbSeq); 1030*3117ece4Schristos 1031*3117ece4Schristos return (BYTE*)frame->data - blockStart; 1032*3117ece4Schristos } 1033*3117ece4Schristos 1034*3117ece4Schristos static void writeBlock(U32* seed, frame_t* frame, size_t contentSize, 1035*3117ece4Schristos int lastBlock, dictInfo info) 1036*3117ece4Schristos { 1037*3117ece4Schristos int const blockTypeDesc = RAND(seed) % 8; 1038*3117ece4Schristos size_t blockSize; 1039*3117ece4Schristos int blockType; 1040*3117ece4Schristos 1041*3117ece4Schristos BYTE *const header = (BYTE*)frame->data; 1042*3117ece4Schristos BYTE *op = header + 3; 1043*3117ece4Schristos 1044*3117ece4Schristos DISPLAYLEVEL(4, " block:\n"); 1045*3117ece4Schristos DISPLAYLEVEL(4, " block content size: %u\n", (unsigned)contentSize); 1046*3117ece4Schristos DISPLAYLEVEL(4, " last block: %s\n", lastBlock ? "yes" : "no"); 1047*3117ece4Schristos 1048*3117ece4Schristos if (blockTypeDesc == 0) { 1049*3117ece4Schristos /* Raw data frame */ 1050*3117ece4Schristos 1051*3117ece4Schristos RAND_buffer(seed, frame->src, contentSize); 1052*3117ece4Schristos memcpy(op, frame->src, contentSize); 1053*3117ece4Schristos 1054*3117ece4Schristos op += contentSize; 1055*3117ece4Schristos blockType = 0; 1056*3117ece4Schristos blockSize = contentSize; 1057*3117ece4Schristos } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) { 1058*3117ece4Schristos /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/ 1059*3117ece4Schristos BYTE const symbol = RAND(seed) & 0xff; 1060*3117ece4Schristos 1061*3117ece4Schristos op[0] = symbol; 1062*3117ece4Schristos memset(frame->src, symbol, contentSize); 1063*3117ece4Schristos 1064*3117ece4Schristos op++; 1065*3117ece4Schristos blockType = 1; 1066*3117ece4Schristos blockSize = contentSize; 1067*3117ece4Schristos } else { 1068*3117ece4Schristos /* compressed, most common */ 1069*3117ece4Schristos size_t compressedSize; 1070*3117ece4Schristos blockType = 2; 1071*3117ece4Schristos 1072*3117ece4Schristos frame->oldStats = frame->stats; 1073*3117ece4Schristos 1074*3117ece4Schristos frame->data = op; 1075*3117ece4Schristos compressedSize = writeCompressedBlock(seed, frame, contentSize, info); 1076*3117ece4Schristos if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */ 1077*3117ece4Schristos blockType = 0; 1078*3117ece4Schristos memcpy(op, frame->src, contentSize); 1079*3117ece4Schristos 1080*3117ece4Schristos op += contentSize; 1081*3117ece4Schristos blockSize = contentSize; /* fall back on raw block if data doesn't 1082*3117ece4Schristos compress */ 1083*3117ece4Schristos 1084*3117ece4Schristos frame->stats = frame->oldStats; /* don't update the stats */ 1085*3117ece4Schristos } else { 1086*3117ece4Schristos op += compressedSize; 1087*3117ece4Schristos blockSize = compressedSize; 1088*3117ece4Schristos } 1089*3117ece4Schristos } 1090*3117ece4Schristos frame->src = (BYTE*)frame->src + contentSize; 1091*3117ece4Schristos 1092*3117ece4Schristos DISPLAYLEVEL(4, " block type: %s\n", BLOCK_TYPES[blockType]); 1093*3117ece4Schristos DISPLAYLEVEL(4, " block size field: %u\n", (unsigned)blockSize); 1094*3117ece4Schristos 1095*3117ece4Schristos header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff); 1096*3117ece4Schristos MEM_writeLE16(header + 1, (U16) (blockSize >> 5)); 1097*3117ece4Schristos 1098*3117ece4Schristos frame->data = op; 1099*3117ece4Schristos } 1100*3117ece4Schristos 1101*3117ece4Schristos static void writeBlocks(U32* seed, frame_t* frame, dictInfo info) 1102*3117ece4Schristos { 1103*3117ece4Schristos size_t contentLeft = frame->header.contentSize; 1104*3117ece4Schristos size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); 1105*3117ece4Schristos while (1) { 1106*3117ece4Schristos /* 1 in 4 chance of ending frame */ 1107*3117ece4Schristos int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3); 1108*3117ece4Schristos size_t blockContentSize; 1109*3117ece4Schristos if (lastBlock) { 1110*3117ece4Schristos blockContentSize = contentLeft; 1111*3117ece4Schristos } else { 1112*3117ece4Schristos if (contentLeft > 0 && (RAND(seed) & 7)) { 1113*3117ece4Schristos /* some variable size block */ 1114*3117ece4Schristos blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1); 1115*3117ece4Schristos } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) { 1116*3117ece4Schristos /* some full size block */ 1117*3117ece4Schristos blockContentSize = maxBlockSize; 1118*3117ece4Schristos } else { 1119*3117ece4Schristos /* some empty block */ 1120*3117ece4Schristos blockContentSize = 0; 1121*3117ece4Schristos } 1122*3117ece4Schristos } 1123*3117ece4Schristos 1124*3117ece4Schristos writeBlock(seed, frame, blockContentSize, lastBlock, info); 1125*3117ece4Schristos 1126*3117ece4Schristos contentLeft -= blockContentSize; 1127*3117ece4Schristos if (lastBlock) break; 1128*3117ece4Schristos } 1129*3117ece4Schristos } 1130*3117ece4Schristos 1131*3117ece4Schristos static void writeChecksum(frame_t* frame) 1132*3117ece4Schristos { 1133*3117ece4Schristos /* write checksum so implementations can verify their output */ 1134*3117ece4Schristos U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0); 1135*3117ece4Schristos DISPLAYLEVEL(3, " checksum: %08x\n", (unsigned)digest); 1136*3117ece4Schristos MEM_writeLE32(frame->data, (U32)digest); 1137*3117ece4Schristos frame->data = (BYTE*)frame->data + 4; 1138*3117ece4Schristos } 1139*3117ece4Schristos 1140*3117ece4Schristos static void outputBuffer(const void* buf, size_t size, const char* const path) 1141*3117ece4Schristos { 1142*3117ece4Schristos /* write data out to file */ 1143*3117ece4Schristos const BYTE* ip = (const BYTE*)buf; 1144*3117ece4Schristos FILE* out; 1145*3117ece4Schristos if (path) { 1146*3117ece4Schristos out = fopen(path, "wb"); 1147*3117ece4Schristos } else { 1148*3117ece4Schristos out = stdout; 1149*3117ece4Schristos } 1150*3117ece4Schristos if (!out) { 1151*3117ece4Schristos fprintf(stderr, "Failed to open file at %s: ", path); 1152*3117ece4Schristos perror(NULL); 1153*3117ece4Schristos exit(1); 1154*3117ece4Schristos } 1155*3117ece4Schristos 1156*3117ece4Schristos { size_t fsize = size; 1157*3117ece4Schristos size_t written = 0; 1158*3117ece4Schristos while (written < fsize) { 1159*3117ece4Schristos written += fwrite(ip + written, 1, fsize - written, out); 1160*3117ece4Schristos if (ferror(out)) { 1161*3117ece4Schristos fprintf(stderr, "Failed to write to file at %s: ", path); 1162*3117ece4Schristos perror(NULL); 1163*3117ece4Schristos exit(1); 1164*3117ece4Schristos } 1165*3117ece4Schristos } 1166*3117ece4Schristos } 1167*3117ece4Schristos 1168*3117ece4Schristos if (path) { 1169*3117ece4Schristos fclose(out); 1170*3117ece4Schristos } 1171*3117ece4Schristos } 1172*3117ece4Schristos 1173*3117ece4Schristos static void initFrame(frame_t* fr) 1174*3117ece4Schristos { 1175*3117ece4Schristos memset(fr, 0, sizeof(*fr)); 1176*3117ece4Schristos fr->data = fr->dataStart = FRAME_BUFFER; 1177*3117ece4Schristos fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER); 1178*3117ece4Schristos fr->src = fr->srcStart = CONTENT_BUFFER; 1179*3117ece4Schristos fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER); 1180*3117ece4Schristos 1181*3117ece4Schristos /* init repeat codes */ 1182*3117ece4Schristos fr->stats.rep[0] = 1; 1183*3117ece4Schristos fr->stats.rep[1] = 4; 1184*3117ece4Schristos fr->stats.rep[2] = 8; 1185*3117ece4Schristos } 1186*3117ece4Schristos 1187*3117ece4Schristos /** 1188*3117ece4Schristos * Generated a single zstd compressed block with no block/frame header. 1189*3117ece4Schristos * Returns the final seed. 1190*3117ece4Schristos */ 1191*3117ece4Schristos static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info) 1192*3117ece4Schristos { 1193*3117ece4Schristos size_t blockContentSize; 1194*3117ece4Schristos int blockWritten = 0; 1195*3117ece4Schristos BYTE* op; 1196*3117ece4Schristos DISPLAYLEVEL(4, "block seed: %u\n", (unsigned)seed); 1197*3117ece4Schristos initFrame(frame); 1198*3117ece4Schristos op = (BYTE*)frame->data; 1199*3117ece4Schristos 1200*3117ece4Schristos while (!blockWritten) { 1201*3117ece4Schristos size_t cSize; 1202*3117ece4Schristos /* generate window size */ 1203*3117ece4Schristos { int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10); 1204*3117ece4Schristos int const mantissa = RAND(&seed) % 8; 1205*3117ece4Schristos frame->header.windowSize = (1U << (exponent + 10)); 1206*3117ece4Schristos frame->header.windowSize += (frame->header.windowSize / 8) * mantissa; 1207*3117ece4Schristos } 1208*3117ece4Schristos 1209*3117ece4Schristos /* generate content size */ 1210*3117ece4Schristos { size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); 1211*3117ece4Schristos if (RAND(&seed) & 15) { 1212*3117ece4Schristos /* some full size blocks */ 1213*3117ece4Schristos blockContentSize = maxBlockSize; 1214*3117ece4Schristos } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) { 1215*3117ece4Schristos /* some small blocks <= 128 bytes*/ 1216*3117ece4Schristos blockContentSize = RAND(&seed) % (1U << 7); 1217*3117ece4Schristos } else { 1218*3117ece4Schristos /* some variable size blocks */ 1219*3117ece4Schristos blockContentSize = RAND(&seed) % maxBlockSize; 1220*3117ece4Schristos } 1221*3117ece4Schristos } 1222*3117ece4Schristos 1223*3117ece4Schristos /* try generating a compressed block */ 1224*3117ece4Schristos frame->oldStats = frame->stats; 1225*3117ece4Schristos frame->data = op; 1226*3117ece4Schristos cSize = writeCompressedBlock(&seed, frame, blockContentSize, info); 1227*3117ece4Schristos if (cSize >= blockContentSize) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */ 1228*3117ece4Schristos /* data doesn't compress -- try again */ 1229*3117ece4Schristos frame->stats = frame->oldStats; /* don't update the stats */ 1230*3117ece4Schristos DISPLAYLEVEL(5, " can't compress block : try again \n"); 1231*3117ece4Schristos } else { 1232*3117ece4Schristos blockWritten = 1; 1233*3117ece4Schristos DISPLAYLEVEL(4, " block size: %u \n", (unsigned)cSize); 1234*3117ece4Schristos frame->src = (BYTE*)frame->src + blockContentSize; 1235*3117ece4Schristos } 1236*3117ece4Schristos } 1237*3117ece4Schristos return seed; 1238*3117ece4Schristos } 1239*3117ece4Schristos 1240*3117ece4Schristos /* Return the final seed */ 1241*3117ece4Schristos static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info) 1242*3117ece4Schristos { 1243*3117ece4Schristos /* generate a complete frame */ 1244*3117ece4Schristos DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed); 1245*3117ece4Schristos initFrame(fr); 1246*3117ece4Schristos 1247*3117ece4Schristos writeFrameHeader(&seed, fr, info); 1248*3117ece4Schristos writeBlocks(&seed, fr, info); 1249*3117ece4Schristos writeChecksum(fr); 1250*3117ece4Schristos 1251*3117ece4Schristos return seed; 1252*3117ece4Schristos } 1253*3117ece4Schristos 1254*3117ece4Schristos /*_******************************************************* 1255*3117ece4Schristos * Dictionary Helper Functions 1256*3117ece4Schristos *********************************************************/ 1257*3117ece4Schristos /* returns 0 if successful, otherwise returns 1 upon error */ 1258*3117ece4Schristos static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict) 1259*3117ece4Schristos { 1260*3117ece4Schristos /* allocate space for samples */ 1261*3117ece4Schristos int ret = 0; 1262*3117ece4Schristos unsigned const numSamples = 4; 1263*3117ece4Schristos size_t sampleSizes[4]; 1264*3117ece4Schristos BYTE* const samples = malloc(5000*sizeof(BYTE)); 1265*3117ece4Schristos if (samples == NULL) { 1266*3117ece4Schristos DISPLAY("Error: could not allocate space for samples\n"); 1267*3117ece4Schristos return 1; 1268*3117ece4Schristos } 1269*3117ece4Schristos 1270*3117ece4Schristos /* generate samples */ 1271*3117ece4Schristos { unsigned literalValue = 1; 1272*3117ece4Schristos unsigned samplesPos = 0; 1273*3117ece4Schristos size_t currSize = 1; 1274*3117ece4Schristos while (literalValue <= 4) { 1275*3117ece4Schristos sampleSizes[literalValue - 1] = currSize; 1276*3117ece4Schristos { size_t k; 1277*3117ece4Schristos for (k = 0; k < currSize; k++) { 1278*3117ece4Schristos *(samples + (samplesPos++)) = (BYTE)literalValue; 1279*3117ece4Schristos } } 1280*3117ece4Schristos literalValue++; 1281*3117ece4Schristos currSize *= 16; 1282*3117ece4Schristos } } 1283*3117ece4Schristos 1284*3117ece4Schristos { size_t dictWriteSize = 0; 1285*3117ece4Schristos ZDICT_params_t zdictParams; 1286*3117ece4Schristos size_t const headerSize = MAX(dictSize/4, 256); 1287*3117ece4Schristos size_t const dictContentSize = dictSize - headerSize; 1288*3117ece4Schristos BYTE* const dictContent = fullDict + headerSize; 1289*3117ece4Schristos if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) { 1290*3117ece4Schristos DISPLAY("Error: dictionary size is too small\n"); 1291*3117ece4Schristos ret = 1; 1292*3117ece4Schristos goto exitGenRandomDict; 1293*3117ece4Schristos } 1294*3117ece4Schristos 1295*3117ece4Schristos /* init dictionary params */ 1296*3117ece4Schristos memset(&zdictParams, 0, sizeof(zdictParams)); 1297*3117ece4Schristos zdictParams.dictID = dictID; 1298*3117ece4Schristos zdictParams.notificationLevel = 1; 1299*3117ece4Schristos 1300*3117ece4Schristos /* fill in dictionary content */ 1301*3117ece4Schristos RAND_buffer(&seed, (void*)dictContent, dictContentSize); 1302*3117ece4Schristos 1303*3117ece4Schristos /* finalize dictionary with random samples */ 1304*3117ece4Schristos dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize, 1305*3117ece4Schristos dictContent, dictContentSize, 1306*3117ece4Schristos samples, sampleSizes, numSamples, 1307*3117ece4Schristos zdictParams); 1308*3117ece4Schristos 1309*3117ece4Schristos if (ZDICT_isError(dictWriteSize)) { 1310*3117ece4Schristos DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize)); 1311*3117ece4Schristos ret = 1; 1312*3117ece4Schristos } 1313*3117ece4Schristos } 1314*3117ece4Schristos 1315*3117ece4Schristos exitGenRandomDict: 1316*3117ece4Schristos free(samples); 1317*3117ece4Schristos return ret; 1318*3117ece4Schristos } 1319*3117ece4Schristos 1320*3117ece4Schristos static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){ 1321*3117ece4Schristos /* allocate space statically */ 1322*3117ece4Schristos dictInfo dictOp; 1323*3117ece4Schristos memset(&dictOp, 0, sizeof(dictOp)); 1324*3117ece4Schristos dictOp.useDict = useDict; 1325*3117ece4Schristos dictOp.dictContentSize = dictContentSize; 1326*3117ece4Schristos dictOp.dictContent = dictContent; 1327*3117ece4Schristos dictOp.dictID = dictID; 1328*3117ece4Schristos return dictOp; 1329*3117ece4Schristos } 1330*3117ece4Schristos 1331*3117ece4Schristos /*-******************************************************* 1332*3117ece4Schristos * Test Mode 1333*3117ece4Schristos *********************************************************/ 1334*3117ece4Schristos 1335*3117ece4Schristos BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE]; 1336*3117ece4Schristos 1337*3117ece4Schristos static size_t testDecodeSimple(frame_t* fr) 1338*3117ece4Schristos { 1339*3117ece4Schristos /* test decoding the generated data with the simple API */ 1340*3117ece4Schristos size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1341*3117ece4Schristos fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); 1342*3117ece4Schristos 1343*3117ece4Schristos if (ZSTD_isError(ret)) return ret; 1344*3117ece4Schristos 1345*3117ece4Schristos if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, 1346*3117ece4Schristos (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { 1347*3117ece4Schristos return ERROR(corruption_detected); 1348*3117ece4Schristos } 1349*3117ece4Schristos 1350*3117ece4Schristos return ret; 1351*3117ece4Schristos } 1352*3117ece4Schristos 1353*3117ece4Schristos static size_t testDecodeStreaming(frame_t* fr) 1354*3117ece4Schristos { 1355*3117ece4Schristos /* test decoding the generated data with the streaming API */ 1356*3117ece4Schristos ZSTD_DStream* zd = ZSTD_createDStream(); 1357*3117ece4Schristos ZSTD_inBuffer in; 1358*3117ece4Schristos ZSTD_outBuffer out; 1359*3117ece4Schristos size_t ret; 1360*3117ece4Schristos 1361*3117ece4Schristos if (!zd) return ERROR(memory_allocation); 1362*3117ece4Schristos 1363*3117ece4Schristos in.src = fr->dataStart; 1364*3117ece4Schristos in.pos = 0; 1365*3117ece4Schristos in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart; 1366*3117ece4Schristos 1367*3117ece4Schristos out.dst = DECOMPRESSED_BUFFER; 1368*3117ece4Schristos out.pos = 0; 1369*3117ece4Schristos out.size = ZSTD_DStreamOutSize(); 1370*3117ece4Schristos 1371*3117ece4Schristos ZSTD_initDStream(zd); 1372*3117ece4Schristos while (1) { 1373*3117ece4Schristos ret = ZSTD_decompressStream(zd, &out, &in); 1374*3117ece4Schristos if (ZSTD_isError(ret)) goto cleanup; /* error */ 1375*3117ece4Schristos if (ret == 0) break; /* frame is done */ 1376*3117ece4Schristos 1377*3117ece4Schristos /* force decoding to be done in chunks */ 1378*3117ece4Schristos out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size); 1379*3117ece4Schristos } 1380*3117ece4Schristos 1381*3117ece4Schristos ret = out.pos; 1382*3117ece4Schristos 1383*3117ece4Schristos if (memcmp(out.dst, fr->srcStart, out.pos) != 0) { 1384*3117ece4Schristos return ERROR(corruption_detected); 1385*3117ece4Schristos } 1386*3117ece4Schristos 1387*3117ece4Schristos cleanup: 1388*3117ece4Schristos ZSTD_freeDStream(zd); 1389*3117ece4Schristos return ret; 1390*3117ece4Schristos } 1391*3117ece4Schristos 1392*3117ece4Schristos static size_t testDecodeWithDict(U32 seed, genType_e genType) 1393*3117ece4Schristos { 1394*3117ece4Schristos /* create variables */ 1395*3117ece4Schristos size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN; 1396*3117ece4Schristos U32 const dictID = RAND(&seed); 1397*3117ece4Schristos size_t errorDetected = 0; 1398*3117ece4Schristos BYTE* const fullDict = malloc(dictSize); 1399*3117ece4Schristos if (fullDict == NULL) { 1400*3117ece4Schristos return ERROR(GENERIC); 1401*3117ece4Schristos } 1402*3117ece4Schristos 1403*3117ece4Schristos /* generate random dictionary */ 1404*3117ece4Schristos if (genRandomDict(dictID, seed, dictSize, fullDict)) { /* return 0 on success */ 1405*3117ece4Schristos errorDetected = ERROR(GENERIC); 1406*3117ece4Schristos goto dictTestCleanup; 1407*3117ece4Schristos } 1408*3117ece4Schristos 1409*3117ece4Schristos 1410*3117ece4Schristos { frame_t fr; 1411*3117ece4Schristos dictInfo info; 1412*3117ece4Schristos ZSTD_DCtx* const dctx = ZSTD_createDCtx(); 1413*3117ece4Schristos size_t ret; 1414*3117ece4Schristos 1415*3117ece4Schristos /* get dict info */ 1416*3117ece4Schristos { size_t const headerSize = MAX(dictSize/4, 256); 1417*3117ece4Schristos size_t const dictContentSize = dictSize-headerSize; 1418*3117ece4Schristos BYTE* const dictContent = fullDict+headerSize; 1419*3117ece4Schristos info = initDictInfo(1, dictContentSize, dictContent, dictID); 1420*3117ece4Schristos } 1421*3117ece4Schristos 1422*3117ece4Schristos /* manually decompress and check difference */ 1423*3117ece4Schristos if (genType == gt_frame) { 1424*3117ece4Schristos /* Test frame */ 1425*3117ece4Schristos generateFrame(seed, &fr, info); 1426*3117ece4Schristos ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1427*3117ece4Schristos fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, 1428*3117ece4Schristos fullDict, dictSize); 1429*3117ece4Schristos } else { 1430*3117ece4Schristos /* Test block */ 1431*3117ece4Schristos generateCompressedBlock(seed, &fr, info); 1432*3117ece4Schristos ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize); 1433*3117ece4Schristos if (ZSTD_isError(ret)) { 1434*3117ece4Schristos errorDetected = ret; 1435*3117ece4Schristos ZSTD_freeDCtx(dctx); 1436*3117ece4Schristos goto dictTestCleanup; 1437*3117ece4Schristos } 1438*3117ece4Schristos ret = ZSTD_decompressBlock_deprecated(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1439*3117ece4Schristos fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart); 1440*3117ece4Schristos } 1441*3117ece4Schristos ZSTD_freeDCtx(dctx); 1442*3117ece4Schristos 1443*3117ece4Schristos if (ZSTD_isError(ret)) { 1444*3117ece4Schristos errorDetected = ret; 1445*3117ece4Schristos goto dictTestCleanup; 1446*3117ece4Schristos } 1447*3117ece4Schristos 1448*3117ece4Schristos if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) { 1449*3117ece4Schristos errorDetected = ERROR(corruption_detected); 1450*3117ece4Schristos goto dictTestCleanup; 1451*3117ece4Schristos } 1452*3117ece4Schristos } 1453*3117ece4Schristos 1454*3117ece4Schristos dictTestCleanup: 1455*3117ece4Schristos free(fullDict); 1456*3117ece4Schristos return errorDetected; 1457*3117ece4Schristos } 1458*3117ece4Schristos 1459*3117ece4Schristos static size_t testDecodeRawBlock(frame_t* fr) 1460*3117ece4Schristos { 1461*3117ece4Schristos ZSTD_DCtx* dctx = ZSTD_createDCtx(); 1462*3117ece4Schristos size_t ret = ZSTD_decompressBegin(dctx); 1463*3117ece4Schristos if (ZSTD_isError(ret)) return ret; 1464*3117ece4Schristos 1465*3117ece4Schristos ret = ZSTD_decompressBlock_deprecated( 1466*3117ece4Schristos dctx, 1467*3117ece4Schristos DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1468*3117ece4Schristos fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); 1469*3117ece4Schristos ZSTD_freeDCtx(dctx); 1470*3117ece4Schristos if (ZSTD_isError(ret)) return ret; 1471*3117ece4Schristos 1472*3117ece4Schristos if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, 1473*3117ece4Schristos (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { 1474*3117ece4Schristos return ERROR(corruption_detected); 1475*3117ece4Schristos } 1476*3117ece4Schristos 1477*3117ece4Schristos return ret; 1478*3117ece4Schristos } 1479*3117ece4Schristos 1480*3117ece4Schristos static int runBlockTest(U32* seed) 1481*3117ece4Schristos { 1482*3117ece4Schristos frame_t fr; 1483*3117ece4Schristos U32 const seedCopy = *seed; 1484*3117ece4Schristos { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1485*3117ece4Schristos *seed = generateCompressedBlock(*seed, &fr, info); 1486*3117ece4Schristos } 1487*3117ece4Schristos 1488*3117ece4Schristos { size_t const r = testDecodeRawBlock(&fr); 1489*3117ece4Schristos if (ZSTD_isError(r)) { 1490*3117ece4Schristos DISPLAY("Error in block mode on test seed %u: %s\n", 1491*3117ece4Schristos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1492*3117ece4Schristos return 1; 1493*3117ece4Schristos } 1494*3117ece4Schristos } 1495*3117ece4Schristos 1496*3117ece4Schristos { size_t const r = testDecodeWithDict(*seed, gt_block); 1497*3117ece4Schristos if (ZSTD_isError(r)) { 1498*3117ece4Schristos DISPLAY("Error in block mode with dictionary on test seed %u: %s\n", 1499*3117ece4Schristos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1500*3117ece4Schristos return 1; 1501*3117ece4Schristos } 1502*3117ece4Schristos } 1503*3117ece4Schristos return 0; 1504*3117ece4Schristos } 1505*3117ece4Schristos 1506*3117ece4Schristos static int runFrameTest(U32* seed) 1507*3117ece4Schristos { 1508*3117ece4Schristos frame_t fr; 1509*3117ece4Schristos U32 const seedCopy = *seed; 1510*3117ece4Schristos { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1511*3117ece4Schristos *seed = generateFrame(*seed, &fr, info); 1512*3117ece4Schristos } 1513*3117ece4Schristos 1514*3117ece4Schristos { size_t const r = testDecodeSimple(&fr); 1515*3117ece4Schristos if (ZSTD_isError(r)) { 1516*3117ece4Schristos DISPLAY("Error in simple mode on test seed %u: %s\n", 1517*3117ece4Schristos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1518*3117ece4Schristos return 1; 1519*3117ece4Schristos } 1520*3117ece4Schristos } 1521*3117ece4Schristos { size_t const r = testDecodeStreaming(&fr); 1522*3117ece4Schristos if (ZSTD_isError(r)) { 1523*3117ece4Schristos DISPLAY("Error in streaming mode on test seed %u: %s\n", 1524*3117ece4Schristos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1525*3117ece4Schristos return 1; 1526*3117ece4Schristos } 1527*3117ece4Schristos } 1528*3117ece4Schristos { size_t const r = testDecodeWithDict(*seed, gt_frame); /* avoid big dictionaries */ 1529*3117ece4Schristos if (ZSTD_isError(r)) { 1530*3117ece4Schristos DISPLAY("Error in dictionary mode on test seed %u: %s\n", 1531*3117ece4Schristos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1532*3117ece4Schristos return 1; 1533*3117ece4Schristos } 1534*3117ece4Schristos } 1535*3117ece4Schristos return 0; 1536*3117ece4Schristos } 1537*3117ece4Schristos 1538*3117ece4Schristos static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS, 1539*3117ece4Schristos genType_e genType) 1540*3117ece4Schristos { 1541*3117ece4Schristos unsigned fnum; 1542*3117ece4Schristos 1543*3117ece4Schristos UTIL_time_t const startClock = UTIL_getTime(); 1544*3117ece4Schristos U64 const maxClockSpan = testDurationS * SEC_TO_MICRO; 1545*3117ece4Schristos 1546*3117ece4Schristos if (numFiles == 0 && !testDurationS) numFiles = 1; 1547*3117ece4Schristos 1548*3117ece4Schristos DISPLAY("seed: %u\n", (unsigned)seed); 1549*3117ece4Schristos 1550*3117ece4Schristos for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) { 1551*3117ece4Schristos if (fnum < numFiles) 1552*3117ece4Schristos DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); 1553*3117ece4Schristos else 1554*3117ece4Schristos DISPLAYUPDATE("\r%u ", fnum); 1555*3117ece4Schristos 1556*3117ece4Schristos { int const ret = (genType == gt_frame) ? 1557*3117ece4Schristos runFrameTest(&seed) : 1558*3117ece4Schristos runBlockTest(&seed); 1559*3117ece4Schristos if (ret) return ret; 1560*3117ece4Schristos } 1561*3117ece4Schristos } 1562*3117ece4Schristos 1563*3117ece4Schristos DISPLAY("\r%u tests completed: ", fnum); 1564*3117ece4Schristos DISPLAY("OK\n"); 1565*3117ece4Schristos 1566*3117ece4Schristos return 0; 1567*3117ece4Schristos } 1568*3117ece4Schristos 1569*3117ece4Schristos /*-******************************************************* 1570*3117ece4Schristos * File I/O 1571*3117ece4Schristos *********************************************************/ 1572*3117ece4Schristos 1573*3117ece4Schristos static int generateFile(U32 seed, const char* const path, 1574*3117ece4Schristos const char* const origPath, genType_e genType) 1575*3117ece4Schristos { 1576*3117ece4Schristos frame_t fr; 1577*3117ece4Schristos 1578*3117ece4Schristos DISPLAY("seed: %u\n", (unsigned)seed); 1579*3117ece4Schristos 1580*3117ece4Schristos { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1581*3117ece4Schristos if (genType == gt_frame) { 1582*3117ece4Schristos generateFrame(seed, &fr, info); 1583*3117ece4Schristos } else { 1584*3117ece4Schristos generateCompressedBlock(seed, &fr, info); 1585*3117ece4Schristos } 1586*3117ece4Schristos } 1587*3117ece4Schristos outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); 1588*3117ece4Schristos if (origPath) { 1589*3117ece4Schristos outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); 1590*3117ece4Schristos } 1591*3117ece4Schristos return 0; 1592*3117ece4Schristos } 1593*3117ece4Schristos 1594*3117ece4Schristos static int generateCorpus(U32 seed, unsigned numFiles, const char* const path, 1595*3117ece4Schristos const char* const origPath, genType_e genType) 1596*3117ece4Schristos { 1597*3117ece4Schristos char outPath[MAX_PATH]; 1598*3117ece4Schristos unsigned fnum; 1599*3117ece4Schristos 1600*3117ece4Schristos DISPLAY("seed: %u\n", (unsigned)seed); 1601*3117ece4Schristos 1602*3117ece4Schristos for (fnum = 0; fnum < numFiles; fnum++) { 1603*3117ece4Schristos frame_t fr; 1604*3117ece4Schristos 1605*3117ece4Schristos DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); 1606*3117ece4Schristos 1607*3117ece4Schristos { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1608*3117ece4Schristos if (genType == gt_frame) { 1609*3117ece4Schristos seed = generateFrame(seed, &fr, info); 1610*3117ece4Schristos } else { 1611*3117ece4Schristos seed = generateCompressedBlock(seed, &fr, info); 1612*3117ece4Schristos } 1613*3117ece4Schristos } 1614*3117ece4Schristos 1615*3117ece4Schristos if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { 1616*3117ece4Schristos DISPLAY("Error: path too long\n"); 1617*3117ece4Schristos return 1; 1618*3117ece4Schristos } 1619*3117ece4Schristos outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); 1620*3117ece4Schristos 1621*3117ece4Schristos if (origPath) { 1622*3117ece4Schristos if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { 1623*3117ece4Schristos DISPLAY("Error: path too long\n"); 1624*3117ece4Schristos return 1; 1625*3117ece4Schristos } 1626*3117ece4Schristos outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); 1627*3117ece4Schristos } 1628*3117ece4Schristos } 1629*3117ece4Schristos 1630*3117ece4Schristos DISPLAY("\r%u/%u \n", fnum, numFiles); 1631*3117ece4Schristos 1632*3117ece4Schristos return 0; 1633*3117ece4Schristos } 1634*3117ece4Schristos 1635*3117ece4Schristos static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path, 1636*3117ece4Schristos const char* const origPath, const size_t dictSize, 1637*3117ece4Schristos genType_e genType) 1638*3117ece4Schristos { 1639*3117ece4Schristos char outPath[MAX_PATH]; 1640*3117ece4Schristos BYTE* fullDict; 1641*3117ece4Schristos U32 const dictID = RAND(&seed); 1642*3117ece4Schristos int errorDetected = 0; 1643*3117ece4Schristos 1644*3117ece4Schristos if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { 1645*3117ece4Schristos DISPLAY("Error: path too long\n"); 1646*3117ece4Schristos return 1; 1647*3117ece4Schristos } 1648*3117ece4Schristos 1649*3117ece4Schristos /* allocate space for the dictionary */ 1650*3117ece4Schristos fullDict = malloc(dictSize); 1651*3117ece4Schristos if (fullDict == NULL) { 1652*3117ece4Schristos DISPLAY("Error: could not allocate space for full dictionary.\n"); 1653*3117ece4Schristos return 1; 1654*3117ece4Schristos } 1655*3117ece4Schristos 1656*3117ece4Schristos /* randomly generate the dictionary */ 1657*3117ece4Schristos { int const ret = genRandomDict(dictID, seed, dictSize, fullDict); 1658*3117ece4Schristos if (ret != 0) { 1659*3117ece4Schristos errorDetected = ret; 1660*3117ece4Schristos goto dictCleanup; 1661*3117ece4Schristos } 1662*3117ece4Schristos } 1663*3117ece4Schristos 1664*3117ece4Schristos /* write out dictionary */ 1665*3117ece4Schristos if (numFiles != 0) { 1666*3117ece4Schristos if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { 1667*3117ece4Schristos DISPLAY("Error: dictionary path too long\n"); 1668*3117ece4Schristos errorDetected = 1; 1669*3117ece4Schristos goto dictCleanup; 1670*3117ece4Schristos } 1671*3117ece4Schristos outputBuffer(fullDict, dictSize, outPath); 1672*3117ece4Schristos } 1673*3117ece4Schristos else { 1674*3117ece4Schristos outputBuffer(fullDict, dictSize, "dictionary"); 1675*3117ece4Schristos } 1676*3117ece4Schristos 1677*3117ece4Schristos /* generate random compressed/decompressed files */ 1678*3117ece4Schristos { unsigned fnum; 1679*3117ece4Schristos for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) { 1680*3117ece4Schristos frame_t fr; 1681*3117ece4Schristos DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); 1682*3117ece4Schristos { 1683*3117ece4Schristos size_t const headerSize = MAX(dictSize/4, 256); 1684*3117ece4Schristos size_t const dictContentSize = dictSize-headerSize; 1685*3117ece4Schristos BYTE* const dictContent = fullDict+headerSize; 1686*3117ece4Schristos dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID); 1687*3117ece4Schristos if (genType == gt_frame) { 1688*3117ece4Schristos seed = generateFrame(seed, &fr, info); 1689*3117ece4Schristos } else { 1690*3117ece4Schristos seed = generateCompressedBlock(seed, &fr, info); 1691*3117ece4Schristos } 1692*3117ece4Schristos } 1693*3117ece4Schristos 1694*3117ece4Schristos if (numFiles != 0) { 1695*3117ece4Schristos if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { 1696*3117ece4Schristos DISPLAY("Error: path too long\n"); 1697*3117ece4Schristos errorDetected = 1; 1698*3117ece4Schristos goto dictCleanup; 1699*3117ece4Schristos } 1700*3117ece4Schristos outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); 1701*3117ece4Schristos 1702*3117ece4Schristos if (origPath) { 1703*3117ece4Schristos if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { 1704*3117ece4Schristos DISPLAY("Error: path too long\n"); 1705*3117ece4Schristos errorDetected = 1; 1706*3117ece4Schristos goto dictCleanup; 1707*3117ece4Schristos } 1708*3117ece4Schristos outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); 1709*3117ece4Schristos } 1710*3117ece4Schristos } 1711*3117ece4Schristos else { 1712*3117ece4Schristos outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); 1713*3117ece4Schristos if (origPath) { 1714*3117ece4Schristos outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); 1715*3117ece4Schristos } 1716*3117ece4Schristos } 1717*3117ece4Schristos } 1718*3117ece4Schristos } 1719*3117ece4Schristos 1720*3117ece4Schristos dictCleanup: 1721*3117ece4Schristos free(fullDict); 1722*3117ece4Schristos return errorDetected; 1723*3117ece4Schristos } 1724*3117ece4Schristos 1725*3117ece4Schristos 1726*3117ece4Schristos /*_******************************************************* 1727*3117ece4Schristos * Command line 1728*3117ece4Schristos *********************************************************/ 1729*3117ece4Schristos static U32 makeSeed(void) 1730*3117ece4Schristos { 1731*3117ece4Schristos U32 t = (U32) time(NULL); 1732*3117ece4Schristos return XXH32(&t, sizeof(t), 0) % 65536; 1733*3117ece4Schristos } 1734*3117ece4Schristos 1735*3117ece4Schristos static unsigned readInt(const char** argument) 1736*3117ece4Schristos { 1737*3117ece4Schristos unsigned val = 0; 1738*3117ece4Schristos while ((**argument>='0') && (**argument<='9')) { 1739*3117ece4Schristos val *= 10; 1740*3117ece4Schristos val += **argument - '0'; 1741*3117ece4Schristos (*argument)++; 1742*3117ece4Schristos } 1743*3117ece4Schristos return val; 1744*3117ece4Schristos } 1745*3117ece4Schristos 1746*3117ece4Schristos static void usage(const char* programName) 1747*3117ece4Schristos { 1748*3117ece4Schristos DISPLAY( "Usage :\n"); 1749*3117ece4Schristos DISPLAY( " %s [args]\n", programName); 1750*3117ece4Schristos DISPLAY( "\n"); 1751*3117ece4Schristos DISPLAY( "Arguments :\n"); 1752*3117ece4Schristos DISPLAY( " -p<path> : select output path (default:stdout)\n"); 1753*3117ece4Schristos DISPLAY( " in multiple files mode this should be a directory\n"); 1754*3117ece4Schristos DISPLAY( " -o<path> : select path to output original file (default:no output)\n"); 1755*3117ece4Schristos DISPLAY( " in multiple files mode this should be a directory\n"); 1756*3117ece4Schristos DISPLAY( " -s# : select seed (default:random based on time)\n"); 1757*3117ece4Schristos DISPLAY( " -n# : number of files to generate (default:1)\n"); 1758*3117ece4Schristos DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n"); 1759*3117ece4Schristos DISPLAY( " -T# : length of time to run tests for\n"); 1760*3117ece4Schristos DISPLAY( " -v : increase verbosity level (default:0, max:7)\n"); 1761*3117ece4Schristos DISPLAY( " -h/H : display help/long help and exit\n"); 1762*3117ece4Schristos } 1763*3117ece4Schristos 1764*3117ece4Schristos static void advancedUsage(const char* programName) 1765*3117ece4Schristos { 1766*3117ece4Schristos usage(programName); 1767*3117ece4Schristos DISPLAY( "\n"); 1768*3117ece4Schristos DISPLAY( "Advanced arguments :\n"); 1769*3117ece4Schristos DISPLAY( " --content-size : always include the content size in the frame header\n"); 1770*3117ece4Schristos DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n"); 1771*3117ece4Schristos DISPLAY( " --gen-blocks : generate raw compressed blocks without block/frame headers\n"); 1772*3117ece4Schristos DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n"); 1773*3117ece4Schristos DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n"); 1774*3117ece4Schristos DISPLAY( " (this is ignored with gen-blocks)\n"); 1775*3117ece4Schristos } 1776*3117ece4Schristos 1777*3117ece4Schristos /*! readU32FromChar() : 1778*3117ece4Schristos @return : unsigned integer value read from input in `char` format 1779*3117ece4Schristos allows and interprets K, KB, KiB, M, MB and MiB suffix. 1780*3117ece4Schristos Will also modify `*stringPtr`, advancing it to position where it stopped reading. 1781*3117ece4Schristos Note : function result can overflow if digit string > MAX_UINT */ 1782*3117ece4Schristos static unsigned readU32FromChar(const char** stringPtr) 1783*3117ece4Schristos { 1784*3117ece4Schristos unsigned result = 0; 1785*3117ece4Schristos while ((**stringPtr >='0') && (**stringPtr <='9')) 1786*3117ece4Schristos result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; 1787*3117ece4Schristos if ((**stringPtr=='K') || (**stringPtr=='M')) { 1788*3117ece4Schristos result <<= 10; 1789*3117ece4Schristos if (**stringPtr=='M') result <<= 10; 1790*3117ece4Schristos (*stringPtr)++ ; 1791*3117ece4Schristos if (**stringPtr=='i') (*stringPtr)++; 1792*3117ece4Schristos if (**stringPtr=='B') (*stringPtr)++; 1793*3117ece4Schristos } 1794*3117ece4Schristos return result; 1795*3117ece4Schristos } 1796*3117ece4Schristos 1797*3117ece4Schristos /** longCommandWArg() : 1798*3117ece4Schristos * check if *stringPtr is the same as longCommand. 1799*3117ece4Schristos * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. 1800*3117ece4Schristos * @return 0 and doesn't modify *stringPtr otherwise. 1801*3117ece4Schristos */ 1802*3117ece4Schristos static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) 1803*3117ece4Schristos { 1804*3117ece4Schristos size_t const comSize = strlen(longCommand); 1805*3117ece4Schristos int const result = !strncmp(*stringPtr, longCommand, comSize); 1806*3117ece4Schristos if (result) *stringPtr += comSize; 1807*3117ece4Schristos return result; 1808*3117ece4Schristos } 1809*3117ece4Schristos 1810*3117ece4Schristos int main(int argc, char** argv) 1811*3117ece4Schristos { 1812*3117ece4Schristos U32 seed = 0; 1813*3117ece4Schristos int seedset = 0; 1814*3117ece4Schristos unsigned numFiles = 0; 1815*3117ece4Schristos unsigned testDuration = 0; 1816*3117ece4Schristos int testMode = 0; 1817*3117ece4Schristos const char* path = NULL; 1818*3117ece4Schristos const char* origPath = NULL; 1819*3117ece4Schristos int useDict = 0; 1820*3117ece4Schristos unsigned dictSize = (10 << 10); /* 10 kB default */ 1821*3117ece4Schristos genType_e genType = gt_frame; 1822*3117ece4Schristos 1823*3117ece4Schristos int argNb; 1824*3117ece4Schristos 1825*3117ece4Schristos /* Check command line */ 1826*3117ece4Schristos for (argNb=1; argNb<argc; argNb++) { 1827*3117ece4Schristos const char* argument = argv[argNb]; 1828*3117ece4Schristos if(!argument) continue; /* Protection if argument empty */ 1829*3117ece4Schristos 1830*3117ece4Schristos /* Handle commands. Aggregated commands are allowed */ 1831*3117ece4Schristos if (argument[0]=='-') { 1832*3117ece4Schristos argument++; 1833*3117ece4Schristos while (*argument!=0) { 1834*3117ece4Schristos switch(*argument) 1835*3117ece4Schristos { 1836*3117ece4Schristos case 'h': 1837*3117ece4Schristos usage(argv[0]); 1838*3117ece4Schristos return 0; 1839*3117ece4Schristos case 'H': 1840*3117ece4Schristos advancedUsage(argv[0]); 1841*3117ece4Schristos return 0; 1842*3117ece4Schristos case 'v': 1843*3117ece4Schristos argument++; 1844*3117ece4Schristos g_displayLevel++; 1845*3117ece4Schristos break; 1846*3117ece4Schristos case 's': 1847*3117ece4Schristos argument++; 1848*3117ece4Schristos seedset=1; 1849*3117ece4Schristos seed = readInt(&argument); 1850*3117ece4Schristos break; 1851*3117ece4Schristos case 'n': 1852*3117ece4Schristos argument++; 1853*3117ece4Schristos numFiles = readInt(&argument); 1854*3117ece4Schristos break; 1855*3117ece4Schristos case 'T': 1856*3117ece4Schristos argument++; 1857*3117ece4Schristos testDuration = readInt(&argument); 1858*3117ece4Schristos if (*argument == 'm') { 1859*3117ece4Schristos testDuration *= 60; 1860*3117ece4Schristos argument++; 1861*3117ece4Schristos if (*argument == 'n') argument++; 1862*3117ece4Schristos } 1863*3117ece4Schristos break; 1864*3117ece4Schristos case 'o': 1865*3117ece4Schristos argument++; 1866*3117ece4Schristos origPath = argument; 1867*3117ece4Schristos argument += strlen(argument); 1868*3117ece4Schristos break; 1869*3117ece4Schristos case 'p': 1870*3117ece4Schristos argument++; 1871*3117ece4Schristos path = argument; 1872*3117ece4Schristos argument += strlen(argument); 1873*3117ece4Schristos break; 1874*3117ece4Schristos case 't': 1875*3117ece4Schristos argument++; 1876*3117ece4Schristos testMode = 1; 1877*3117ece4Schristos break; 1878*3117ece4Schristos case '-': 1879*3117ece4Schristos argument++; 1880*3117ece4Schristos if (strcmp(argument, "content-size") == 0) { 1881*3117ece4Schristos opts.contentSize = 1; 1882*3117ece4Schristos } else if (longCommandWArg(&argument, "use-dict=")) { 1883*3117ece4Schristos dictSize = readU32FromChar(&argument); 1884*3117ece4Schristos useDict = 1; 1885*3117ece4Schristos } else if (strcmp(argument, "gen-blocks") == 0) { 1886*3117ece4Schristos genType = gt_block; 1887*3117ece4Schristos } else if (longCommandWArg(&argument, "max-block-size-log=")) { 1888*3117ece4Schristos U32 value = readU32FromChar(&argument); 1889*3117ece4Schristos if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) { 1890*3117ece4Schristos g_maxBlockSize = 1U << value; 1891*3117ece4Schristos } 1892*3117ece4Schristos } else if (longCommandWArg(&argument, "max-content-size-log=")) { 1893*3117ece4Schristos U32 value = readU32FromChar(&argument); 1894*3117ece4Schristos g_maxDecompressedSizeLog = 1895*3117ece4Schristos MIN(MAX_DECOMPRESSED_SIZE_LOG, value); 1896*3117ece4Schristos } else { 1897*3117ece4Schristos advancedUsage(argv[0]); 1898*3117ece4Schristos return 1; 1899*3117ece4Schristos } 1900*3117ece4Schristos argument += strlen(argument); 1901*3117ece4Schristos break; 1902*3117ece4Schristos default: 1903*3117ece4Schristos usage(argv[0]); 1904*3117ece4Schristos return 1; 1905*3117ece4Schristos } } } } /* for (argNb=1; argNb<argc; argNb++) */ 1906*3117ece4Schristos 1907*3117ece4Schristos if (!seedset) { 1908*3117ece4Schristos seed = makeSeed(); 1909*3117ece4Schristos } 1910*3117ece4Schristos 1911*3117ece4Schristos if (testMode) { 1912*3117ece4Schristos return runTestMode(seed, numFiles, testDuration, genType); 1913*3117ece4Schristos } else { 1914*3117ece4Schristos if (testDuration) { 1915*3117ece4Schristos DISPLAY("Error: -T requires test mode (-t)\n\n"); 1916*3117ece4Schristos usage(argv[0]); 1917*3117ece4Schristos return 1; 1918*3117ece4Schristos } 1919*3117ece4Schristos } 1920*3117ece4Schristos 1921*3117ece4Schristos if (!path) { 1922*3117ece4Schristos DISPLAY("Error: path is required in file generation mode\n"); 1923*3117ece4Schristos usage(argv[0]); 1924*3117ece4Schristos return 1; 1925*3117ece4Schristos } 1926*3117ece4Schristos 1927*3117ece4Schristos if (numFiles == 0 && useDict == 0) { 1928*3117ece4Schristos return generateFile(seed, path, origPath, genType); 1929*3117ece4Schristos } else if (useDict == 0){ 1930*3117ece4Schristos return generateCorpus(seed, numFiles, path, origPath, genType); 1931*3117ece4Schristos } else { 1932*3117ece4Schristos /* should generate files with a dictionary */ 1933*3117ece4Schristos return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType); 1934*3117ece4Schristos } 1935*3117ece4Schristos 1936*3117ece4Schristos } 1937