1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 */ 9 10 #include <stdlib.h> /* malloc, free */ 11 #include <limits.h> /* UINT_MAX */ 12 #include <assert.h> 13 14 #define XXH_STATIC_LINKING_ONLY 15 #include "xxhash.h" 16 17 #define ZSTD_STATIC_LINKING_ONLY 18 #include "zstd.h" 19 #include "zstd_errors.h" 20 #include "mem.h" 21 22 #include "zstd_seekable.h" 23 24 #define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; } 25 26 #undef ERROR 27 #define ERROR(name) ((size_t)-ZSTD_error_##name) 28 29 #undef MIN 30 #undef MAX 31 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 32 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 33 34 typedef struct { 35 U32 cSize; 36 U32 dSize; 37 U32 checksum; 38 } framelogEntry_t; 39 40 struct ZSTD_frameLog_s { 41 framelogEntry_t* entries; 42 U32 size; 43 U32 capacity; 44 45 int checksumFlag; 46 47 /* for use when streaming out the seek table */ 48 U32 seekTablePos; 49 U32 seekTableIndex; 50 } framelog_t; 51 52 struct ZSTD_seekable_CStream_s { 53 ZSTD_CStream* cstream; 54 ZSTD_frameLog framelog; 55 56 U32 frameCSize; 57 U32 frameDSize; 58 59 XXH64_state_t xxhState; 60 61 U32 maxFrameSize; 62 63 int writingSeekTable; 64 }; 65 66 static size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl) 67 { 68 /* allocate some initial space */ 69 size_t const FRAMELOG_STARTING_CAPACITY = 16; 70 fl->entries = (framelogEntry_t*)malloc( 71 sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY); 72 if (fl->entries == NULL) return ERROR(memory_allocation); 73 fl->capacity = (U32)FRAMELOG_STARTING_CAPACITY; 74 return 0; 75 } 76 77 static size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl) 78 { 79 if (fl != NULL) free(fl->entries); 80 return 0; 81 } 82 83 ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag) 84 { 85 ZSTD_frameLog* const fl = (ZSTD_frameLog*)malloc(sizeof(ZSTD_frameLog)); 86 if (fl == NULL) return NULL; 87 88 if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(fl))) { 89 free(fl); 90 return NULL; 91 } 92 93 fl->checksumFlag = checksumFlag; 94 fl->seekTablePos = 0; 95 fl->seekTableIndex = 0; 96 fl->size = 0; 97 98 return fl; 99 } 100 101 size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl) 102 { 103 ZSTD_seekable_frameLog_freeVec(fl); 104 free(fl); 105 return 0; 106 } 107 108 ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void) 109 { 110 ZSTD_seekable_CStream* const zcs = (ZSTD_seekable_CStream*)malloc(sizeof(ZSTD_seekable_CStream)); 111 if (zcs == NULL) return NULL; 112 113 memset(zcs, 0, sizeof(*zcs)); 114 115 zcs->cstream = ZSTD_createCStream(); 116 if (zcs->cstream == NULL) goto failed1; 117 118 if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(&zcs->framelog))) goto failed2; 119 120 return zcs; 121 122 failed2: 123 ZSTD_freeCStream(zcs->cstream); 124 failed1: 125 free(zcs); 126 return NULL; 127 } 128 129 size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs) 130 { 131 if (zcs == NULL) return 0; /* support free on null */ 132 ZSTD_freeCStream(zcs->cstream); 133 ZSTD_seekable_frameLog_freeVec(&zcs->framelog); 134 free(zcs); 135 return 0; 136 } 137 138 size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, 139 int compressionLevel, 140 int checksumFlag, 141 unsigned maxFrameSize) 142 { 143 zcs->framelog.size = 0; 144 zcs->frameCSize = 0; 145 zcs->frameDSize = 0; 146 147 /* make sure maxFrameSize has a reasonable value */ 148 if (maxFrameSize > ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE) { 149 return ERROR(frameParameter_unsupported); 150 } 151 152 zcs->maxFrameSize = maxFrameSize ? 153 maxFrameSize : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE; 154 155 zcs->framelog.checksumFlag = checksumFlag; 156 if (zcs->framelog.checksumFlag) { 157 XXH64_reset(&zcs->xxhState, 0); 158 } 159 160 zcs->framelog.seekTablePos = 0; 161 zcs->framelog.seekTableIndex = 0; 162 zcs->writingSeekTable = 0; 163 164 return ZSTD_initCStream(zcs->cstream, compressionLevel); 165 } 166 167 size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, 168 unsigned compressedSize, 169 unsigned decompressedSize, 170 unsigned checksum) 171 { 172 if (fl->size == ZSTD_SEEKABLE_MAXFRAMES) 173 return ERROR(frameIndex_tooLarge); 174 175 /* grow the buffer if required */ 176 if (fl->size == fl->capacity) { 177 /* exponential size increase for constant amortized runtime */ 178 size_t const newCapacity = fl->capacity * 2; 179 framelogEntry_t* const newEntries = (framelogEntry_t*)realloc(fl->entries, 180 sizeof(framelogEntry_t) * newCapacity); 181 182 if (newEntries == NULL) return ERROR(memory_allocation); 183 184 fl->entries = newEntries; 185 assert(newCapacity <= UINT_MAX); 186 fl->capacity = (U32)newCapacity; 187 } 188 189 fl->entries[fl->size] = (framelogEntry_t){ 190 compressedSize, decompressedSize, checksum 191 }; 192 fl->size++; 193 194 return 0; 195 } 196 197 size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output) 198 { 199 size_t const prevOutPos = output->pos; 200 /* end the frame */ 201 size_t ret = ZSTD_endStream(zcs->cstream, output); 202 203 zcs->frameCSize += (U32)(output->pos - prevOutPos); 204 205 /* need to flush before doing the rest */ 206 if (ret) return ret; 207 208 /* frame done */ 209 210 /* store the frame data for later */ 211 ret = ZSTD_seekable_logFrame( 212 &zcs->framelog, zcs->frameCSize, zcs->frameDSize, 213 zcs->framelog.checksumFlag 214 ? XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU 215 : 0); 216 if (ret) return ret; 217 218 /* reset for the next frame */ 219 zcs->frameCSize = 0; 220 zcs->frameDSize = 0; 221 222 ZSTD_CCtx_reset(zcs->cstream, ZSTD_reset_session_only); 223 if (zcs->framelog.checksumFlag) XXH64_reset(&zcs->xxhState, 0); 224 225 return 0; 226 } 227 228 size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) 229 { 230 const BYTE* const inBase = (const BYTE*) input->src + input->pos; 231 size_t inLen = input->size - input->pos; 232 233 assert(zcs->maxFrameSize < INT_MAX); 234 ZSTD_CCtx_setParameter(zcs->cstream, ZSTD_c_srcSizeHint, (int)zcs->maxFrameSize); 235 inLen = MIN(inLen, (size_t)(zcs->maxFrameSize - zcs->frameDSize)); 236 237 /* if we haven't finished flushing the last frame, don't start writing a new one */ 238 if (inLen > 0) { 239 ZSTD_inBuffer inTmp = { inBase, inLen, 0 }; 240 size_t const prevOutPos = output->pos; 241 242 size_t const ret = ZSTD_compressStream(zcs->cstream, output, &inTmp); 243 244 if (zcs->framelog.checksumFlag) { 245 XXH64_update(&zcs->xxhState, inBase, inTmp.pos); 246 } 247 248 zcs->frameCSize += (U32)(output->pos - prevOutPos); 249 zcs->frameDSize += (U32)inTmp.pos; 250 251 input->pos += inTmp.pos; 252 253 if (ZSTD_isError(ret)) return ret; 254 } 255 256 if (zcs->maxFrameSize == zcs->frameDSize) { 257 /* log the frame and start over */ 258 size_t const ret = ZSTD_seekable_endFrame(zcs, output); 259 if (ZSTD_isError(ret)) return ret; 260 261 /* get the client ready for the next frame */ 262 return (size_t)zcs->maxFrameSize; 263 } 264 265 return (size_t)(zcs->maxFrameSize - zcs->frameDSize); 266 } 267 268 static inline size_t ZSTD_seekable_seekTableSize(const ZSTD_frameLog* fl) 269 { 270 size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0); 271 size_t const seekTableLen = ZSTD_SKIPPABLEHEADERSIZE + 272 sizePerFrame * fl->size + 273 ZSTD_seekTableFooterSize; 274 275 return seekTableLen; 276 } 277 278 static inline size_t ZSTD_stwrite32(ZSTD_frameLog* fl, 279 ZSTD_outBuffer* output, U32 const value, 280 U32 const offset) 281 { 282 if (fl->seekTablePos < offset + 4) { 283 BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */ 284 size_t const lenWrite = 285 MIN(output->size - output->pos, offset + 4 - fl->seekTablePos); 286 MEM_writeLE32(tmp, value); 287 memcpy((BYTE*)output->dst + output->pos, 288 tmp + (fl->seekTablePos - offset), lenWrite); 289 output->pos += lenWrite; 290 fl->seekTablePos += (U32)lenWrite; 291 292 if (lenWrite < 4) return ZSTD_seekable_seekTableSize(fl) - fl->seekTablePos; 293 } 294 return 0; 295 } 296 297 size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output) 298 { 299 /* seekTableIndex: the current index in the table and 300 * seekTableSize: the amount of the table written so far 301 * 302 * This function is written this way so that if it has to return early 303 * because of a small buffer, it can keep going where it left off. 304 */ 305 306 size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0); 307 size_t const seekTableLen = ZSTD_seekable_seekTableSize(fl); 308 309 CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0)); 310 assert(seekTableLen <= (size_t)UINT_MAX); 311 CHECK_Z(ZSTD_stwrite32(fl, output, (U32)seekTableLen - ZSTD_SKIPPABLEHEADERSIZE, 4)); 312 313 while (fl->seekTableIndex < fl->size) { 314 unsigned long long const start = ZSTD_SKIPPABLEHEADERSIZE + sizePerFrame * fl->seekTableIndex; 315 assert(start + 8 <= UINT_MAX); 316 CHECK_Z(ZSTD_stwrite32(fl, output, 317 fl->entries[fl->seekTableIndex].cSize, 318 (U32)start + 0)); 319 320 CHECK_Z(ZSTD_stwrite32(fl, output, 321 fl->entries[fl->seekTableIndex].dSize, 322 (U32)start + 4)); 323 324 if (fl->checksumFlag) { 325 CHECK_Z(ZSTD_stwrite32( 326 fl, output, fl->entries[fl->seekTableIndex].checksum, 327 (U32)start + 8)); 328 } 329 330 fl->seekTableIndex++; 331 } 332 333 assert(seekTableLen <= UINT_MAX); 334 CHECK_Z(ZSTD_stwrite32(fl, output, fl->size, 335 (U32)seekTableLen - ZSTD_seekTableFooterSize)); 336 337 if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos; 338 if (fl->seekTablePos < seekTableLen - 4) { 339 BYTE const sfd = (BYTE)((fl->checksumFlag) << 7); 340 341 ((BYTE*)output->dst)[output->pos] = sfd; 342 output->pos++; 343 fl->seekTablePos++; 344 } 345 346 CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_SEEKABLE_MAGICNUMBER, 347 (U32)seekTableLen - 4)); 348 349 if (fl->seekTablePos != seekTableLen) return ERROR(GENERIC); 350 return 0; 351 } 352 353 size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output) 354 { 355 if (!zcs->writingSeekTable) { 356 const size_t endFrame = ZSTD_seekable_endFrame(zcs, output); 357 if (ZSTD_isError(endFrame)) return endFrame; 358 /* return an accurate size hint */ 359 if (endFrame) return endFrame + ZSTD_seekable_seekTableSize(&zcs->framelog); 360 } 361 362 zcs->writingSeekTable = 1; 363 364 return ZSTD_seekable_writeSeekTable(&zcs->framelog, output); 365 } 366