1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos #include "seqgen.h" 12*3117ece4Schristos #include "mem.h" 13*3117ece4Schristos #include <string.h> 14*3117ece4Schristos 15*3117ece4Schristos #define MIN(a, b) ((a) < (b) ? (a) : (b)) 16*3117ece4Schristos 17*3117ece4Schristos static const size_t kMatchBytes = 128; 18*3117ece4Schristos 19*3117ece4Schristos #define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r))) 20*3117ece4Schristos static BYTE SEQ_randByte(unsigned* src) 21*3117ece4Schristos { 22*3117ece4Schristos static const U32 prime1 = 2654435761U; 23*3117ece4Schristos static const U32 prime2 = 2246822519U; 24*3117ece4Schristos U32 rand32 = *src; 25*3117ece4Schristos rand32 *= prime1; 26*3117ece4Schristos rand32 ^= prime2; 27*3117ece4Schristos rand32 = SEQ_rotl32(rand32, 13); 28*3117ece4Schristos *src = rand32; 29*3117ece4Schristos return (BYTE)(rand32 >> 5); 30*3117ece4Schristos } 31*3117ece4Schristos 32*3117ece4Schristos SEQ_stream SEQ_initStream(unsigned seed) 33*3117ece4Schristos { 34*3117ece4Schristos SEQ_stream stream; 35*3117ece4Schristos stream.state = 0; 36*3117ece4Schristos XXH64_reset(&stream.xxh, 0); 37*3117ece4Schristos stream.seed = seed; 38*3117ece4Schristos return stream; 39*3117ece4Schristos } 40*3117ece4Schristos 41*3117ece4Schristos /* Generates a single guard byte, then match length + 1 of a different byte, 42*3117ece4Schristos * then another guard byte. 43*3117ece4Schristos */ 44*3117ece4Schristos static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value, 45*3117ece4Schristos SEQ_outBuffer* out) 46*3117ece4Schristos { 47*3117ece4Schristos typedef enum { 48*3117ece4Schristos ml_first_byte = 0, 49*3117ece4Schristos ml_match_bytes, 50*3117ece4Schristos ml_last_byte, 51*3117ece4Schristos } ml_state; 52*3117ece4Schristos BYTE* const ostart = (BYTE*)out->dst; 53*3117ece4Schristos BYTE* const oend = ostart + out->size; 54*3117ece4Schristos BYTE* op = ostart + out->pos; 55*3117ece4Schristos 56*3117ece4Schristos switch ((ml_state)stream->state) { 57*3117ece4Schristos case ml_first_byte: 58*3117ece4Schristos /* Generate a single byte and pick a different byte for the match */ 59*3117ece4Schristos if (op >= oend) { 60*3117ece4Schristos stream->bytesLeft = 1; 61*3117ece4Schristos break; 62*3117ece4Schristos } 63*3117ece4Schristos *op = SEQ_randByte(&stream->seed) & 0xFF; 64*3117ece4Schristos do { 65*3117ece4Schristos stream->saved = SEQ_randByte(&stream->seed) & 0xFF; 66*3117ece4Schristos } while (*op == stream->saved); 67*3117ece4Schristos ++op; 68*3117ece4Schristos /* State transition */ 69*3117ece4Schristos stream->state = ml_match_bytes; 70*3117ece4Schristos stream->bytesLeft = value + 1; 71*3117ece4Schristos /* fall-through */ 72*3117ece4Schristos case ml_match_bytes: { 73*3117ece4Schristos /* Copy matchLength + 1 bytes to the output buffer */ 74*3117ece4Schristos size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); 75*3117ece4Schristos if (setLength > 0) { 76*3117ece4Schristos memset(op, stream->saved, setLength); 77*3117ece4Schristos op += setLength; 78*3117ece4Schristos stream->bytesLeft -= setLength; 79*3117ece4Schristos } 80*3117ece4Schristos if (stream->bytesLeft > 0) 81*3117ece4Schristos break; 82*3117ece4Schristos /* State transition */ 83*3117ece4Schristos stream->state = ml_last_byte; 84*3117ece4Schristos } 85*3117ece4Schristos /* fall-through */ 86*3117ece4Schristos case ml_last_byte: 87*3117ece4Schristos /* Generate a single byte and pick a different byte for the match */ 88*3117ece4Schristos if (op >= oend) { 89*3117ece4Schristos stream->bytesLeft = 1; 90*3117ece4Schristos break; 91*3117ece4Schristos } 92*3117ece4Schristos do { 93*3117ece4Schristos *op = SEQ_randByte(&stream->seed) & 0xFF; 94*3117ece4Schristos } while (*op == stream->saved); 95*3117ece4Schristos ++op; 96*3117ece4Schristos /* State transition */ 97*3117ece4Schristos /* fall-through */ 98*3117ece4Schristos default: 99*3117ece4Schristos stream->state = 0; 100*3117ece4Schristos stream->bytesLeft = 0; 101*3117ece4Schristos break; 102*3117ece4Schristos } 103*3117ece4Schristos XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); 104*3117ece4Schristos out->pos = op - ostart; 105*3117ece4Schristos return stream->bytesLeft; 106*3117ece4Schristos } 107*3117ece4Schristos 108*3117ece4Schristos /* Saves the current seed then generates kMatchBytes random bytes >= 128. 109*3117ece4Schristos * Generates literal length - kMatchBytes random bytes < 128. 110*3117ece4Schristos * Generates another kMatchBytes using the saved seed to generate a match. 111*3117ece4Schristos * This way the match is easy to find for the compressors. 112*3117ece4Schristos */ 113*3117ece4Schristos static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) 114*3117ece4Schristos { 115*3117ece4Schristos typedef enum { 116*3117ece4Schristos ll_start = 0, 117*3117ece4Schristos ll_run_bytes, 118*3117ece4Schristos ll_literals, 119*3117ece4Schristos ll_run_match, 120*3117ece4Schristos } ll_state; 121*3117ece4Schristos BYTE* const ostart = (BYTE*)out->dst; 122*3117ece4Schristos BYTE* const oend = ostart + out->size; 123*3117ece4Schristos BYTE* op = ostart + out->pos; 124*3117ece4Schristos 125*3117ece4Schristos switch ((ll_state)stream->state) { 126*3117ece4Schristos case ll_start: 127*3117ece4Schristos stream->state = ll_run_bytes; 128*3117ece4Schristos stream->saved = stream->seed; 129*3117ece4Schristos stream->bytesLeft = MIN(kMatchBytes, value); 130*3117ece4Schristos /* fall-through */ 131*3117ece4Schristos case ll_run_bytes: 132*3117ece4Schristos while (stream->bytesLeft > 0 && op < oend) { 133*3117ece4Schristos *op++ = SEQ_randByte(&stream->seed) | 0x80; 134*3117ece4Schristos --stream->bytesLeft; 135*3117ece4Schristos } 136*3117ece4Schristos if (stream->bytesLeft > 0) 137*3117ece4Schristos break; 138*3117ece4Schristos /* State transition */ 139*3117ece4Schristos stream->state = ll_literals; 140*3117ece4Schristos stream->bytesLeft = value - MIN(kMatchBytes, value); 141*3117ece4Schristos /* fall-through */ 142*3117ece4Schristos case ll_literals: 143*3117ece4Schristos while (stream->bytesLeft > 0 && op < oend) { 144*3117ece4Schristos *op++ = SEQ_randByte(&stream->seed) & 0x7F; 145*3117ece4Schristos --stream->bytesLeft; 146*3117ece4Schristos } 147*3117ece4Schristos if (stream->bytesLeft > 0) 148*3117ece4Schristos break; 149*3117ece4Schristos /* State transition */ 150*3117ece4Schristos stream->state = ll_run_match; 151*3117ece4Schristos stream->bytesLeft = MIN(kMatchBytes, value); 152*3117ece4Schristos /* fall-through */ 153*3117ece4Schristos case ll_run_match: { 154*3117ece4Schristos while (stream->bytesLeft > 0 && op < oend) { 155*3117ece4Schristos *op++ = SEQ_randByte(&stream->saved) | 0x80; 156*3117ece4Schristos --stream->bytesLeft; 157*3117ece4Schristos } 158*3117ece4Schristos if (stream->bytesLeft > 0) 159*3117ece4Schristos break; 160*3117ece4Schristos } 161*3117ece4Schristos /* fall-through */ 162*3117ece4Schristos default: 163*3117ece4Schristos stream->state = 0; 164*3117ece4Schristos stream->bytesLeft = 0; 165*3117ece4Schristos break; 166*3117ece4Schristos } 167*3117ece4Schristos XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); 168*3117ece4Schristos out->pos = op - ostart; 169*3117ece4Schristos return stream->bytesLeft; 170*3117ece4Schristos } 171*3117ece4Schristos 172*3117ece4Schristos /* Saves the current seed then generates kMatchBytes random bytes >= 128. 173*3117ece4Schristos * Generates offset - kMatchBytes of zeros to get a large offset without 174*3117ece4Schristos * polluting the hash tables. 175*3117ece4Schristos * Generates another kMatchBytes using the saved seed to generate a with the 176*3117ece4Schristos * required offset. 177*3117ece4Schristos */ 178*3117ece4Schristos static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) 179*3117ece4Schristos { 180*3117ece4Schristos typedef enum { 181*3117ece4Schristos of_start = 0, 182*3117ece4Schristos of_run_bytes, 183*3117ece4Schristos of_offset, 184*3117ece4Schristos of_run_match, 185*3117ece4Schristos } of_state; 186*3117ece4Schristos BYTE* const ostart = (BYTE*)out->dst; 187*3117ece4Schristos BYTE* const oend = ostart + out->size; 188*3117ece4Schristos BYTE* op = ostart + out->pos; 189*3117ece4Schristos 190*3117ece4Schristos switch ((of_state)stream->state) { 191*3117ece4Schristos case of_start: 192*3117ece4Schristos stream->state = of_run_bytes; 193*3117ece4Schristos stream->saved = stream->seed; 194*3117ece4Schristos stream->bytesLeft = MIN(value, kMatchBytes); 195*3117ece4Schristos /* fall-through */ 196*3117ece4Schristos case of_run_bytes: { 197*3117ece4Schristos while (stream->bytesLeft > 0 && op < oend) { 198*3117ece4Schristos *op++ = SEQ_randByte(&stream->seed) | 0x80; 199*3117ece4Schristos --stream->bytesLeft; 200*3117ece4Schristos } 201*3117ece4Schristos if (stream->bytesLeft > 0) 202*3117ece4Schristos break; 203*3117ece4Schristos /* State transition */ 204*3117ece4Schristos stream->state = of_offset; 205*3117ece4Schristos stream->bytesLeft = value - MIN(value, kMatchBytes); 206*3117ece4Schristos } 207*3117ece4Schristos /* fall-through */ 208*3117ece4Schristos case of_offset: { 209*3117ece4Schristos /* Copy matchLength + 1 bytes to the output buffer */ 210*3117ece4Schristos size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); 211*3117ece4Schristos if (setLength > 0) { 212*3117ece4Schristos memset(op, 0, setLength); 213*3117ece4Schristos op += setLength; 214*3117ece4Schristos stream->bytesLeft -= setLength; 215*3117ece4Schristos } 216*3117ece4Schristos if (stream->bytesLeft > 0) 217*3117ece4Schristos break; 218*3117ece4Schristos /* State transition */ 219*3117ece4Schristos stream->state = of_run_match; 220*3117ece4Schristos stream->bytesLeft = MIN(value, kMatchBytes); 221*3117ece4Schristos } 222*3117ece4Schristos /* fall-through */ 223*3117ece4Schristos case of_run_match: { 224*3117ece4Schristos while (stream->bytesLeft > 0 && op < oend) { 225*3117ece4Schristos *op++ = SEQ_randByte(&stream->saved) | 0x80; 226*3117ece4Schristos --stream->bytesLeft; 227*3117ece4Schristos } 228*3117ece4Schristos if (stream->bytesLeft > 0) 229*3117ece4Schristos break; 230*3117ece4Schristos } 231*3117ece4Schristos /* fall-through */ 232*3117ece4Schristos default: 233*3117ece4Schristos stream->state = 0; 234*3117ece4Schristos stream->bytesLeft = 0; 235*3117ece4Schristos break; 236*3117ece4Schristos } 237*3117ece4Schristos XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); 238*3117ece4Schristos out->pos = op - ostart; 239*3117ece4Schristos return stream->bytesLeft; 240*3117ece4Schristos } 241*3117ece4Schristos 242*3117ece4Schristos /* Returns the number of bytes left to generate. 243*3117ece4Schristos * Must pass the same type/value until it returns 0. 244*3117ece4Schristos */ 245*3117ece4Schristos size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out) 246*3117ece4Schristos { 247*3117ece4Schristos switch (type) { 248*3117ece4Schristos case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out); 249*3117ece4Schristos case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out); 250*3117ece4Schristos case SEQ_gen_of: return SEQ_gen_offset(stream, value, out); 251*3117ece4Schristos case SEQ_gen_max: /* fall-through */ 252*3117ece4Schristos default: return 0; 253*3117ece4Schristos } 254*3117ece4Schristos } 255*3117ece4Schristos 256*3117ece4Schristos /* Returns the xxhash of the data produced so far */ 257*3117ece4Schristos XXH64_hash_t SEQ_digest(SEQ_stream const* stream) 258*3117ece4Schristos { 259*3117ece4Schristos return XXH64_digest(&stream->xxh); 260*3117ece4Schristos } 261