1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /* ************************************** 12 * Tuning parameters 13 ****************************************/ 14 #ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */ 15 # define BMK_TIMETEST_DEFAULT_S 3 16 #endif 17 18 /* ************************************* 19 * Includes 20 ***************************************/ 21 /* this must be included first */ 22 #include "platform.h" /* Large Files support, compiler specifics */ 23 24 /* then following system includes */ 25 #include <assert.h> /* assert */ 26 #include <errno.h> 27 #include <stdio.h> /* fprintf, fopen */ 28 #include <stdlib.h> /* malloc, free */ 29 #include <string.h> /* memset, strerror */ 30 #include "util.h" /* UTIL_getFileSize, UTIL_sleep */ 31 #include "../lib/common/mem.h" 32 #include "benchfn.h" 33 #include "timefn.h" /* UTIL_time_t */ 34 #ifndef ZSTD_STATIC_LINKING_ONLY 35 # define ZSTD_STATIC_LINKING_ONLY 36 #endif 37 #include "../lib/zstd.h" 38 #include "datagen.h" /* RDG_genBuffer */ 39 #include "lorem.h" /* LOREM_genBuffer */ 40 #ifndef XXH_INLINE_ALL 41 # define XXH_INLINE_ALL 42 #endif 43 #include "../lib/common/xxhash.h" 44 #include "../lib/zstd_errors.h" 45 #include "benchzstd.h" 46 47 /* ************************************* 48 * Constants 49 ***************************************/ 50 #ifndef ZSTD_GIT_COMMIT 51 # define ZSTD_GIT_COMMIT_STRING "" 52 #else 53 # define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT) 54 #endif 55 56 #define TIMELOOP_MICROSEC (1 * 1000000ULL) /* 1 second */ 57 #define TIMELOOP_NANOSEC (1 * 1000000000ULL) /* 1 second */ 58 #define ACTIVEPERIOD_MICROSEC (70 * TIMELOOP_MICROSEC) /* 70 seconds */ 59 #define COOLPERIOD_SEC 10 60 61 #define KB *(1 << 10) 62 #define MB *(1 << 20) 63 #define GB *(1U << 30) 64 65 #define BMK_RUNTEST_DEFAULT_MS 1000 66 67 static const size_t maxMemory = (sizeof(size_t) == 4) 68 ? 69 /* 32-bit */ (2 GB - 64 MB) 70 : 71 /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t) * 8) - 31)); 72 73 /* ************************************* 74 * console display 75 ***************************************/ 76 #define DISPLAY(...) \ 77 { \ 78 fprintf(stderr, __VA_ARGS__); \ 79 fflush(NULL); \ 80 } 81 #define DISPLAYLEVEL(l, ...) \ 82 if (displayLevel >= l) { \ 83 DISPLAY(__VA_ARGS__); \ 84 } 85 /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + 86 * progression; 4 : + information */ 87 #define OUTPUT(...) \ 88 { \ 89 fprintf(stdout, __VA_ARGS__); \ 90 fflush(NULL); \ 91 } 92 #define OUTPUTLEVEL(l, ...) \ 93 if (displayLevel >= l) { \ 94 OUTPUT(__VA_ARGS__); \ 95 } 96 97 /* ************************************* 98 * Exceptions 99 ***************************************/ 100 #ifndef DEBUG 101 # define DEBUG 0 102 #endif 103 #define DEBUGOUTPUT(...) \ 104 { \ 105 if (DEBUG) \ 106 DISPLAY(__VA_ARGS__); \ 107 } 108 109 #define RETURN_ERROR_INT(errorNum, ...) \ 110 { \ 111 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ 112 DISPLAYLEVEL(1, "Error %i : ", errorNum); \ 113 DISPLAYLEVEL(1, __VA_ARGS__); \ 114 DISPLAYLEVEL(1, " \n"); \ 115 return errorNum; \ 116 } 117 118 #define CHECK_Z(zf) \ 119 { \ 120 size_t const zerr = zf; \ 121 if (ZSTD_isError(zerr)) { \ 122 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ 123 DISPLAY("Error : "); \ 124 DISPLAY("%s failed : %s", #zf, ZSTD_getErrorName(zerr)); \ 125 DISPLAY(" \n"); \ 126 exit(1); \ 127 } \ 128 } 129 130 #define RETURN_ERROR(errorNum, retType, ...) \ 131 { \ 132 retType r; \ 133 memset(&r, 0, sizeof(retType)); \ 134 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ 135 DISPLAYLEVEL(1, "Error %i : ", errorNum); \ 136 DISPLAYLEVEL(1, __VA_ARGS__); \ 137 DISPLAYLEVEL(1, " \n"); \ 138 r.tag = errorNum; \ 139 return r; \ 140 } 141 142 /* replacement for snprintf(), which is not supported by C89 143 * sprintf() would be the supported one, but it's labelled unsafe, 144 * so some modern static analyzer will flag it as such, making it unusable. 145 * formatString_u() replaces snprintf() for the specific case where there are only %u arguments */ 146 static int formatString_u(char* buffer, size_t buffer_size, const char* formatString, unsigned int value) 147 { 148 size_t written = 0; 149 int i; 150 assert(value <= 100); 151 152 for (i = 0; formatString[i] != '\0' && written < buffer_size - 1; ++i) { 153 if (formatString[i] != '%') { 154 buffer[written++] = formatString[i]; 155 continue; 156 } 157 158 if (formatString[++i] == 'u') { 159 /* Handle single digit */ 160 if (value < 10) { 161 buffer[written++] = '0' + (char)value; 162 } else if (value < 100) { 163 /* Handle two digits */ 164 if (written >= buffer_size - 2) { 165 return -1; /* buffer overflow */ 166 } 167 buffer[written++] = '0' + (char)(value / 10); 168 buffer[written++] = '0' + (char)(value % 10); 169 } else { /* 100 */ 170 if (written >= buffer_size - 3) { 171 return -1; /* buffer overflow */ 172 } 173 buffer[written++] = '1'; 174 buffer[written++] = '0'; 175 buffer[written++] = '0'; 176 } 177 } else if (formatString[i] == '%') { /* Check for escaped percent sign */ 178 buffer[written++] = '%'; 179 } else { 180 return -1; /* unsupported format */ 181 } 182 } 183 184 if (written < buffer_size) { 185 buffer[written] = '\0'; 186 } else { 187 buffer[0] = '\0'; /* Handle truncation */ 188 } 189 190 return (int)written; 191 } 192 193 /* ************************************* 194 * Benchmark Parameters 195 ***************************************/ 196 197 BMK_advancedParams_t BMK_initAdvancedParams(void) 198 { 199 BMK_advancedParams_t const res = { 200 BMK_both, /* mode */ 201 BMK_TIMETEST_DEFAULT_S, /* nbSeconds */ 202 0, /* blockSize */ 203 0, /* targetCBlockSize */ 204 0, /* nbWorkers */ 205 0, /* realTime */ 206 0, /* additionalParam */ 207 0, /* ldmFlag */ 208 0, /* ldmMinMatch */ 209 0, /* ldmHashLog */ 210 0, /* ldmBuckSizeLog */ 211 0, /* ldmHashRateLog */ 212 ZSTD_ps_auto, /* literalCompressionMode */ 213 0 /* useRowMatchFinder */ 214 }; 215 return res; 216 } 217 218 /* ******************************************************** 219 * Bench functions 220 **********************************************************/ 221 typedef struct { 222 const void* srcPtr; 223 size_t srcSize; 224 void* cPtr; 225 size_t cRoom; 226 size_t cSize; 227 void* resPtr; 228 size_t resSize; 229 } blockParam_t; 230 231 #undef MIN 232 #undef MAX 233 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 234 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 235 236 static void BMK_initCCtx( 237 ZSTD_CCtx* ctx, 238 const void* dictBuffer, 239 size_t dictBufferSize, 240 int cLevel, 241 const ZSTD_compressionParameters* comprParams, 242 const BMK_advancedParams_t* adv) 243 { 244 ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters); 245 if (adv->nbWorkers == 1) { 246 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0)); 247 } else { 248 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers)); 249 } 250 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel)); 251 CHECK_Z(ZSTD_CCtx_setParameter( 252 ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder)); 253 CHECK_Z(ZSTD_CCtx_setParameter( 254 ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag)); 255 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch)); 256 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog)); 257 CHECK_Z(ZSTD_CCtx_setParameter( 258 ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog)); 259 CHECK_Z(ZSTD_CCtx_setParameter( 260 ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog)); 261 CHECK_Z(ZSTD_CCtx_setParameter( 262 ctx, ZSTD_c_windowLog, (int)comprParams->windowLog)); 263 CHECK_Z(ZSTD_CCtx_setParameter( 264 ctx, ZSTD_c_hashLog, (int)comprParams->hashLog)); 265 CHECK_Z(ZSTD_CCtx_setParameter( 266 ctx, ZSTD_c_chainLog, (int)comprParams->chainLog)); 267 CHECK_Z(ZSTD_CCtx_setParameter( 268 ctx, ZSTD_c_searchLog, (int)comprParams->searchLog)); 269 CHECK_Z(ZSTD_CCtx_setParameter( 270 ctx, ZSTD_c_minMatch, (int)comprParams->minMatch)); 271 CHECK_Z(ZSTD_CCtx_setParameter( 272 ctx, ZSTD_c_targetLength, (int)comprParams->targetLength)); 273 CHECK_Z(ZSTD_CCtx_setParameter( 274 ctx, 275 ZSTD_c_literalCompressionMode, 276 (int)adv->literalCompressionMode)); 277 CHECK_Z(ZSTD_CCtx_setParameter( 278 ctx, ZSTD_c_strategy, (int)comprParams->strategy)); 279 CHECK_Z(ZSTD_CCtx_setParameter( 280 ctx, ZSTD_c_targetCBlockSize, (int)adv->targetCBlockSize)); 281 CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize)); 282 } 283 284 static void 285 BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize) 286 { 287 CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); 288 CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize)); 289 } 290 291 typedef struct { 292 ZSTD_CCtx* cctx; 293 const void* dictBuffer; 294 size_t dictBufferSize; 295 int cLevel; 296 const ZSTD_compressionParameters* comprParams; 297 const BMK_advancedParams_t* adv; 298 } BMK_initCCtxArgs; 299 300 static size_t local_initCCtx(void* payload) 301 { 302 BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload; 303 BMK_initCCtx( 304 ag->cctx, 305 ag->dictBuffer, 306 ag->dictBufferSize, 307 ag->cLevel, 308 ag->comprParams, 309 ag->adv); 310 return 0; 311 } 312 313 typedef struct { 314 ZSTD_DCtx* dctx; 315 const void* dictBuffer; 316 size_t dictBufferSize; 317 } BMK_initDCtxArgs; 318 319 static size_t local_initDCtx(void* payload) 320 { 321 BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload; 322 BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize); 323 return 0; 324 } 325 326 /* `addArgs` is the context */ 327 static size_t local_defaultCompress( 328 const void* srcBuffer, 329 size_t srcSize, 330 void* dstBuffer, 331 size_t dstSize, 332 void* addArgs) 333 { 334 ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs; 335 return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize); 336 } 337 338 /* `addArgs` is the context */ 339 static size_t local_defaultDecompress( 340 const void* srcBuffer, 341 size_t srcSize, 342 void* dstBuffer, 343 size_t dstCapacity, 344 void* addArgs) 345 { 346 size_t moreToFlush = 1; 347 ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs; 348 ZSTD_inBuffer in; 349 ZSTD_outBuffer out; 350 in.src = srcBuffer; 351 in.size = srcSize; 352 in.pos = 0; 353 out.dst = dstBuffer; 354 out.size = dstCapacity; 355 out.pos = 0; 356 while (moreToFlush) { 357 if (out.pos == out.size) { 358 return (size_t)-ZSTD_error_dstSize_tooSmall; 359 } 360 moreToFlush = ZSTD_decompressStream(dctx, &out, &in); 361 if (ZSTD_isError(moreToFlush)) { 362 return moreToFlush; 363 } 364 } 365 return out.pos; 366 } 367 368 /* ================================================================= */ 369 /* Benchmark Zstandard, mem-to-mem scenarios */ 370 /* ================================================================= */ 371 372 int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome) 373 { 374 return outcome.tag == 0; 375 } 376 377 BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome) 378 { 379 assert(outcome.tag == 0); 380 return outcome.internal_never_use_directly; 381 } 382 383 static BMK_benchOutcome_t BMK_benchOutcome_error(void) 384 { 385 BMK_benchOutcome_t b; 386 memset(&b, 0, sizeof(b)); 387 b.tag = 1; 388 return b; 389 } 390 391 static BMK_benchOutcome_t BMK_benchOutcome_setValidResult( 392 BMK_benchResult_t result) 393 { 394 BMK_benchOutcome_t b; 395 b.tag = 0; 396 b.internal_never_use_directly = result; 397 return b; 398 } 399 400 /* benchMem with no allocation */ 401 static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( 402 const void** srcPtrs, 403 size_t* srcSizes, 404 void** cPtrs, 405 size_t* cCapacities, 406 size_t* cSizes, 407 void** resPtrs, 408 size_t* resSizes, 409 void** resultBufferPtr, 410 void* compressedBuffer, 411 size_t maxCompressedSize, 412 BMK_timedFnState_t* timeStateCompress, 413 BMK_timedFnState_t* timeStateDecompress, 414 415 const void* srcBuffer, 416 size_t srcSize, 417 const size_t* fileSizes, 418 unsigned nbFiles, 419 const int cLevel, 420 const ZSTD_compressionParameters* comprParams, 421 const void* dictBuffer, 422 size_t dictBufferSize, 423 ZSTD_CCtx* cctx, 424 ZSTD_DCtx* dctx, 425 int displayLevel, 426 const char* displayName, 427 const BMK_advancedParams_t* adv) 428 { 429 size_t const blockSize = 430 ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly)) 431 ? adv->blockSize 432 : srcSize) 433 + (!srcSize); /* avoid div by 0 */ 434 BMK_benchResult_t benchResult; 435 size_t const loadedCompressedSize = srcSize; 436 size_t cSize = 0; 437 double ratio = 0.; 438 U32 nbBlocks; 439 440 assert(cctx != NULL); 441 assert(dctx != NULL); 442 443 /* init */ 444 memset(&benchResult, 0, sizeof(benchResult)); 445 if (strlen(displayName) > 17) 446 displayName += 447 strlen(displayName) - 17; /* display last 17 characters */ 448 if (adv->mode == BMK_decodeOnly) { 449 /* benchmark only decompression : source must be already compressed */ 450 const char* srcPtr = (const char*)srcBuffer; 451 U64 totalDSize64 = 0; 452 U32 fileNb; 453 for (fileNb = 0; fileNb < nbFiles; fileNb++) { 454 U64 const fSize64 = 455 ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]); 456 if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) { 457 RETURN_ERROR( 458 32, 459 BMK_benchOutcome_t, 460 "Decompressed size cannot be determined: cannot benchmark"); 461 } 462 if (fSize64 == ZSTD_CONTENTSIZE_ERROR) { 463 RETURN_ERROR( 464 32, 465 BMK_benchOutcome_t, 466 "Error while trying to assess decompressed size: data may be invalid"); 467 } 468 totalDSize64 += fSize64; 469 srcPtr += fileSizes[fileNb]; 470 } 471 { 472 size_t const decodedSize = (size_t)totalDSize64; 473 assert((U64)decodedSize == totalDSize64); /* check overflow */ 474 free(*resultBufferPtr); 475 if (totalDSize64 > decodedSize) { /* size_t overflow */ 476 RETURN_ERROR( 477 32, 478 BMK_benchOutcome_t, 479 "decompressed size is too large for local system"); 480 } 481 *resultBufferPtr = malloc(decodedSize); 482 if (!(*resultBufferPtr)) { 483 RETURN_ERROR( 484 33, 485 BMK_benchOutcome_t, 486 "allocation error: not enough memory"); 487 } 488 cSize = srcSize; 489 srcSize = decodedSize; 490 ratio = (double)srcSize / (double)cSize; 491 } 492 } 493 494 /* Init data blocks */ 495 { 496 const char* srcPtr = (const char*)srcBuffer; 497 char* cPtr = (char*)compressedBuffer; 498 char* resPtr = (char*)(*resultBufferPtr); 499 U32 fileNb; 500 for (nbBlocks = 0, fileNb = 0; fileNb < nbFiles; fileNb++) { 501 size_t remaining = fileSizes[fileNb]; 502 U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly) 503 ? 1 504 : (U32)((remaining + (blockSize - 1)) / blockSize); 505 U32 const blockEnd = nbBlocks + nbBlocksforThisFile; 506 for (; nbBlocks < blockEnd; nbBlocks++) { 507 size_t const thisBlockSize = MIN(remaining, blockSize); 508 srcPtrs[nbBlocks] = srcPtr; 509 srcSizes[nbBlocks] = thisBlockSize; 510 cPtrs[nbBlocks] = cPtr; 511 cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) 512 ? thisBlockSize 513 : ZSTD_compressBound(thisBlockSize); 514 resPtrs[nbBlocks] = resPtr; 515 resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) 516 ? (size_t)ZSTD_findDecompressedSize( 517 srcPtr, thisBlockSize) 518 : thisBlockSize; 519 srcPtr += thisBlockSize; 520 cPtr += cCapacities[nbBlocks]; 521 resPtr += thisBlockSize; 522 remaining -= thisBlockSize; 523 if (adv->mode == BMK_decodeOnly) { 524 cSizes[nbBlocks] = thisBlockSize; 525 benchResult.cSize = thisBlockSize; 526 } 527 } 528 } 529 } 530 531 /* warming up `compressedBuffer` */ 532 if (adv->mode == BMK_decodeOnly) { 533 memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); 534 } else { 535 RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); 536 } 537 538 if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) { 539 OUTPUTLEVEL( 540 2, 541 "Warning : time measurements may be incorrect in multithreading mode... \n") 542 } 543 544 /* Bench */ 545 { 546 U64 const crcOrig = (adv->mode == BMK_decodeOnly) 547 ? 0 548 : XXH64(srcBuffer, srcSize, 0); 549 #define NB_MARKS 4 550 const char* marks[NB_MARKS] = { " |", " /", " =", " \\" }; 551 U32 markNb = 0; 552 int compressionCompleted = (adv->mode == BMK_decodeOnly); 553 int decompressionCompleted = (adv->mode == BMK_compressOnly); 554 BMK_benchParams_t cbp, dbp; 555 BMK_initCCtxArgs cctxprep; 556 BMK_initDCtxArgs dctxprep; 557 558 cbp.benchFn = local_defaultCompress; /* ZSTD_compress2 */ 559 cbp.benchPayload = cctx; 560 cbp.initFn = local_initCCtx; /* BMK_initCCtx */ 561 cbp.initPayload = &cctxprep; 562 cbp.errorFn = ZSTD_isError; 563 cbp.blockCount = nbBlocks; 564 cbp.srcBuffers = srcPtrs; 565 cbp.srcSizes = srcSizes; 566 cbp.dstBuffers = cPtrs; 567 cbp.dstCapacities = cCapacities; 568 cbp.blockResults = cSizes; 569 570 cctxprep.cctx = cctx; 571 cctxprep.dictBuffer = dictBuffer; 572 cctxprep.dictBufferSize = dictBufferSize; 573 cctxprep.cLevel = cLevel; 574 cctxprep.comprParams = comprParams; 575 cctxprep.adv = adv; 576 577 dbp.benchFn = local_defaultDecompress; 578 dbp.benchPayload = dctx; 579 dbp.initFn = local_initDCtx; 580 dbp.initPayload = &dctxprep; 581 dbp.errorFn = ZSTD_isError; 582 dbp.blockCount = nbBlocks; 583 dbp.srcBuffers = (const void* const*)cPtrs; 584 dbp.srcSizes = cSizes; 585 dbp.dstBuffers = resPtrs; 586 dbp.dstCapacities = resSizes; 587 dbp.blockResults = NULL; 588 589 dctxprep.dctx = dctx; 590 dctxprep.dictBuffer = dictBuffer; 591 dctxprep.dictBufferSize = dictBufferSize; 592 593 OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */ 594 assert(srcSize < UINT_MAX); 595 OUTPUTLEVEL( 596 2, 597 "%2s-%-17.17s :%10u -> \r", 598 marks[markNb], 599 displayName, 600 (unsigned)srcSize); 601 602 while (!(compressionCompleted && decompressionCompleted)) { 603 if (!compressionCompleted) { 604 BMK_runOutcome_t const cOutcome = 605 BMK_benchTimedFn(timeStateCompress, cbp); 606 607 if (!BMK_isSuccessful_runOutcome(cOutcome)) { 608 RETURN_ERROR(30, BMK_benchOutcome_t, "compression error"); 609 } 610 611 { 612 BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome); 613 cSize = cResult.sumOfReturn; 614 ratio = (double)srcSize / (double)cSize; 615 { 616 BMK_benchResult_t newResult; 617 newResult.cSpeed = 618 (U64)((double)srcSize * TIMELOOP_NANOSEC 619 / cResult.nanoSecPerRun); 620 benchResult.cSize = cSize; 621 if (newResult.cSpeed > benchResult.cSpeed) 622 benchResult.cSpeed = newResult.cSpeed; 623 } 624 } 625 626 { 627 int const ratioAccuracy = (ratio < 10.) ? 3 : 2; 628 assert(cSize < UINT_MAX); 629 OUTPUTLEVEL( 630 2, 631 "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r", 632 marks[markNb], 633 displayName, 634 (unsigned)srcSize, 635 (unsigned)cSize, 636 ratioAccuracy, 637 ratio, 638 benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, 639 (double)benchResult.cSpeed / MB_UNIT); 640 } 641 compressionCompleted = 642 BMK_isCompleted_TimedFn(timeStateCompress); 643 } 644 645 if (!decompressionCompleted) { 646 BMK_runOutcome_t const dOutcome = 647 BMK_benchTimedFn(timeStateDecompress, dbp); 648 649 if (!BMK_isSuccessful_runOutcome(dOutcome)) { 650 RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error"); 651 } 652 653 { 654 BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome); 655 U64 const newDSpeed = 656 (U64)((double)srcSize * TIMELOOP_NANOSEC 657 / dResult.nanoSecPerRun); 658 if (newDSpeed > benchResult.dSpeed) 659 benchResult.dSpeed = newDSpeed; 660 } 661 662 { 663 int const ratioAccuracy = (ratio < 10.) ? 3 : 2; 664 OUTPUTLEVEL( 665 2, 666 "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r", 667 marks[markNb], 668 displayName, 669 (unsigned)srcSize, 670 (unsigned)cSize, 671 ratioAccuracy, 672 ratio, 673 benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, 674 (double)benchResult.cSpeed / MB_UNIT, 675 (double)benchResult.dSpeed / MB_UNIT); 676 } 677 decompressionCompleted = 678 BMK_isCompleted_TimedFn(timeStateDecompress); 679 } 680 markNb = (markNb + 1) % NB_MARKS; 681 } /* while (!(compressionCompleted && decompressionCompleted)) */ 682 683 /* CRC Checking */ 684 { 685 const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr); 686 U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); 687 if ((adv->mode == BMK_both) && (crcOrig != crcCheck)) { 688 size_t u; 689 DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", 690 displayName, 691 (unsigned)crcOrig, 692 (unsigned)crcCheck); 693 for (u = 0; u < srcSize; u++) { 694 if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) { 695 unsigned segNb, bNb, pos; 696 size_t bacc = 0; 697 DISPLAY("Decoding error at pos %u ", (unsigned)u); 698 for (segNb = 0; segNb < nbBlocks; segNb++) { 699 if (bacc + srcSizes[segNb] > u) 700 break; 701 bacc += srcSizes[segNb]; 702 } 703 pos = (U32)(u - bacc); 704 bNb = pos / (128 KB); 705 DISPLAY("(sample %u, block %u, pos %u) \n", 706 segNb, 707 bNb, 708 pos); 709 { 710 size_t const lowest = (u > 5) ? 5 : u; 711 size_t n; 712 DISPLAY("origin: "); 713 for (n = lowest; n > 0; n--) 714 DISPLAY("%02X ", 715 ((const BYTE*)srcBuffer)[u - n]); 716 DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); 717 for (n = 1; n < 3; n++) 718 DISPLAY("%02X ", 719 ((const BYTE*)srcBuffer)[u + n]); 720 DISPLAY(" \n"); 721 DISPLAY("decode: "); 722 for (n = lowest; n > 0; n--) 723 DISPLAY("%02X ", resultBuffer[u - n]); 724 DISPLAY(" :%02X: ", resultBuffer[u]); 725 for (n = 1; n < 3; n++) 726 DISPLAY("%02X ", resultBuffer[u + n]); 727 DISPLAY(" \n"); 728 } 729 break; 730 } 731 if (u == srcSize - 1) { /* should never happen */ 732 DISPLAY("no difference detected\n"); 733 } 734 } /* for (u=0; u<srcSize; u++) */ 735 } /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */ 736 } /* CRC Checking */ 737 738 if (displayLevel 739 == 1) { /* hidden display mode -q, used by python speed benchmark */ 740 double const cSpeed = (double)benchResult.cSpeed / MB_UNIT; 741 double const dSpeed = (double)benchResult.dSpeed / MB_UNIT; 742 if (adv->additionalParam) { 743 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", 744 cLevel, 745 (int)cSize, 746 ratio, 747 cSpeed, 748 dSpeed, 749 displayName, 750 adv->additionalParam); 751 } else { 752 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", 753 cLevel, 754 (int)cSize, 755 ratio, 756 cSpeed, 757 dSpeed, 758 displayName); 759 } 760 } 761 762 OUTPUTLEVEL(2, "%2i#\n", cLevel); 763 } /* Bench */ 764 765 benchResult.cMem = 766 (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx); 767 return BMK_benchOutcome_setValidResult(benchResult); 768 } 769 770 BMK_benchOutcome_t BMK_benchMemAdvanced( 771 const void* srcBuffer, 772 size_t srcSize, 773 void* dstBuffer, 774 size_t dstCapacity, 775 const size_t* fileSizes, 776 unsigned nbFiles, 777 int cLevel, 778 const ZSTD_compressionParameters* comprParams, 779 const void* dictBuffer, 780 size_t dictBufferSize, 781 int displayLevel, 782 const char* displayName, 783 const BMK_advancedParams_t* adv) 784 785 { 786 int const dstParamsError = 787 !dstBuffer ^ !dstCapacity; /* must be both NULL or none */ 788 789 size_t const blockSize = 790 ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly)) 791 ? adv->blockSize 792 : srcSize) 793 + (!srcSize) /* avoid div by 0 */; 794 U32 const maxNbBlocks = 795 (U32)((srcSize + (blockSize - 1)) / blockSize) + nbFiles; 796 797 /* these are the blockTable parameters, just split up */ 798 const void** const srcPtrs = 799 (const void**)malloc(maxNbBlocks * sizeof(void*)); 800 size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 801 802 void** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*)); 803 size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 804 size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 805 806 void** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*)); 807 size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 808 809 BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState( 810 adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); 811 BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState( 812 adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); 813 814 ZSTD_CCtx* const cctx = ZSTD_createCCtx(); 815 ZSTD_DCtx* const dctx = ZSTD_createDCtx(); 816 817 const size_t maxCompressedSize = dstCapacity 818 ? dstCapacity 819 : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); 820 821 void* const internalDstBuffer = 822 dstBuffer ? NULL : malloc(maxCompressedSize); 823 void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer; 824 825 BMK_benchOutcome_t outcome = 826 BMK_benchOutcome_error(); /* error by default */ 827 828 void* resultBuffer = srcSize ? malloc(srcSize) : NULL; 829 830 int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || !cSizes 831 || !cCapacities || !resPtrs || !resSizes || !timeStateCompress 832 || !timeStateDecompress || !cctx || !dctx || !compressedBuffer 833 || !resultBuffer; 834 835 if (!allocationincomplete && !dstParamsError) { 836 outcome = BMK_benchMemAdvancedNoAlloc( 837 srcPtrs, 838 srcSizes, 839 cPtrs, 840 cCapacities, 841 cSizes, 842 resPtrs, 843 resSizes, 844 &resultBuffer, 845 compressedBuffer, 846 maxCompressedSize, 847 timeStateCompress, 848 timeStateDecompress, 849 srcBuffer, 850 srcSize, 851 fileSizes, 852 nbFiles, 853 cLevel, 854 comprParams, 855 dictBuffer, 856 dictBufferSize, 857 cctx, 858 dctx, 859 displayLevel, 860 displayName, 861 adv); 862 } 863 864 /* clean up */ 865 BMK_freeTimedFnState(timeStateCompress); 866 BMK_freeTimedFnState(timeStateDecompress); 867 868 ZSTD_freeCCtx(cctx); 869 ZSTD_freeDCtx(dctx); 870 871 free(internalDstBuffer); 872 free(resultBuffer); 873 874 free((void*)srcPtrs); 875 free(srcSizes); 876 free(cPtrs); 877 free(cSizes); 878 free(cCapacities); 879 free(resPtrs); 880 free(resSizes); 881 882 if (allocationincomplete) { 883 RETURN_ERROR( 884 31, BMK_benchOutcome_t, "allocation error : not enough memory"); 885 } 886 887 if (dstParamsError) { 888 RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent"); 889 } 890 return outcome; 891 } 892 893 BMK_benchOutcome_t BMK_benchMem( 894 const void* srcBuffer, 895 size_t srcSize, 896 const size_t* fileSizes, 897 unsigned nbFiles, 898 int cLevel, 899 const ZSTD_compressionParameters* comprParams, 900 const void* dictBuffer, 901 size_t dictBufferSize, 902 int displayLevel, 903 const char* displayName) 904 { 905 BMK_advancedParams_t const adv = BMK_initAdvancedParams(); 906 return BMK_benchMemAdvanced( 907 srcBuffer, 908 srcSize, 909 NULL, 910 0, 911 fileSizes, 912 nbFiles, 913 cLevel, 914 comprParams, 915 dictBuffer, 916 dictBufferSize, 917 displayLevel, 918 displayName, 919 &adv); 920 } 921 922 static BMK_benchOutcome_t BMK_benchCLevel( 923 const void* srcBuffer, 924 size_t benchedSize, 925 const size_t* fileSizes, 926 unsigned nbFiles, 927 int cLevel, 928 const ZSTD_compressionParameters* comprParams, 929 const void* dictBuffer, 930 size_t dictBufferSize, 931 int displayLevel, 932 const char* displayName, 933 BMK_advancedParams_t const* const adv) 934 { 935 const char* pch = strrchr(displayName, '\\'); /* Windows */ 936 if (!pch) 937 pch = strrchr(displayName, '/'); /* Linux */ 938 if (pch) 939 displayName = pch + 1; 940 941 if (adv->realTime) { 942 DISPLAYLEVEL(2, "Note : switching to real-time priority \n"); 943 SET_REALTIME_PRIORITY; 944 } 945 946 if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */ 947 OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", 948 ZSTD_VERSION_STRING, 949 ZSTD_GIT_COMMIT_STRING, 950 (unsigned)benchedSize, 951 adv->nbSeconds, 952 (unsigned)(adv->blockSize >> 10)); 953 954 return BMK_benchMemAdvanced( 955 srcBuffer, 956 benchedSize, 957 NULL, 958 0, 959 fileSizes, 960 nbFiles, 961 cLevel, 962 comprParams, 963 dictBuffer, 964 dictBufferSize, 965 displayLevel, 966 displayName, 967 adv); 968 } 969 970 int BMK_syntheticTest( 971 int cLevel, 972 double compressibility, 973 const ZSTD_compressionParameters* compressionParams, 974 int displayLevel, 975 const BMK_advancedParams_t* adv) 976 { 977 char nameBuff[20] = { 0 }; 978 const char* name = nameBuff; 979 size_t const benchedSize = adv->blockSize ? adv->blockSize : 10000000; 980 void* srcBuffer; 981 BMK_benchOutcome_t res; 982 983 if (cLevel > ZSTD_maxCLevel()) { 984 DISPLAYLEVEL(1, "Invalid Compression Level"); 985 return 15; 986 } 987 988 /* Memory allocation */ 989 srcBuffer = malloc(benchedSize); 990 if (!srcBuffer) { 991 DISPLAYLEVEL(1, "allocation error : not enough memory"); 992 return 16; 993 } 994 995 /* Fill input buffer */ 996 if (compressibility < 0.0) { 997 LOREM_genBuffer(srcBuffer, benchedSize, 0); 998 name = "Lorem ipsum"; 999 } else { 1000 RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); 1001 formatString_u( 1002 nameBuff, 1003 sizeof(nameBuff), 1004 "Synthetic %u%%", 1005 (unsigned)(compressibility * 100)); 1006 } 1007 1008 /* Bench */ 1009 res = BMK_benchCLevel( 1010 srcBuffer, 1011 benchedSize, 1012 &benchedSize /* ? */, 1013 1 /* ? */, 1014 cLevel, 1015 compressionParams, 1016 NULL, 1017 0, /* dictionary */ 1018 displayLevel, 1019 name, 1020 adv); 1021 1022 /* clean up */ 1023 free(srcBuffer); 1024 1025 return !BMK_isSuccessful_benchOutcome(res); 1026 } 1027 1028 static size_t BMK_findMaxMem(U64 requiredMem) 1029 { 1030 size_t const step = 64 MB; 1031 BYTE* testmem = NULL; 1032 1033 requiredMem = (((requiredMem >> 26) + 1) << 26); 1034 requiredMem += step; 1035 if (requiredMem > maxMemory) 1036 requiredMem = maxMemory; 1037 1038 do { 1039 testmem = (BYTE*)malloc((size_t)requiredMem); 1040 requiredMem -= step; 1041 } while (!testmem && requiredMem > 0); 1042 1043 free(testmem); 1044 return (size_t)(requiredMem); 1045 } 1046 1047 /*! BMK_loadFiles() : 1048 * Loads `buffer` with content of files listed within `fileNamesTable`. 1049 * At most, fills `buffer` entirely. */ 1050 static int BMK_loadFiles( 1051 void* buffer, 1052 size_t bufferSize, 1053 size_t* fileSizes, 1054 const char* const* fileNamesTable, 1055 unsigned nbFiles, 1056 int displayLevel) 1057 { 1058 size_t pos = 0, totalSize = 0; 1059 unsigned n; 1060 for (n = 0; n < nbFiles; n++) { 1061 U64 fileSize = UTIL_getFileSize( 1062 fileNamesTable[n]); /* last file may be shortened */ 1063 if (UTIL_isDirectory(fileNamesTable[n])) { 1064 DISPLAYLEVEL( 1065 2, "Ignoring %s directory... \n", fileNamesTable[n]); 1066 fileSizes[n] = 0; 1067 continue; 1068 } 1069 if (fileSize == UTIL_FILESIZE_UNKNOWN) { 1070 DISPLAYLEVEL( 1071 2, 1072 "Cannot evaluate size of %s, ignoring ... \n", 1073 fileNamesTable[n]); 1074 fileSizes[n] = 0; 1075 continue; 1076 } 1077 { 1078 FILE* const f = fopen(fileNamesTable[n], "rb"); 1079 if (f == NULL) 1080 RETURN_ERROR_INT( 1081 10, "impossible to open file %s", fileNamesTable[n]); 1082 OUTPUTLEVEL(2, "Loading %s... \r", fileNamesTable[n]); 1083 if (fileSize > bufferSize - pos) 1084 fileSize = bufferSize - pos, 1085 nbFiles = n; /* buffer too small - stop after this file */ 1086 { 1087 size_t const readSize = 1088 fread(((char*)buffer) + pos, 1, (size_t)fileSize, f); 1089 if (readSize != (size_t)fileSize) 1090 RETURN_ERROR_INT( 1091 11, "could not read %s", fileNamesTable[n]); 1092 pos += readSize; 1093 } 1094 fileSizes[n] = (size_t)fileSize; 1095 totalSize += (size_t)fileSize; 1096 fclose(f); 1097 } 1098 } 1099 1100 if (totalSize == 0) 1101 RETURN_ERROR_INT(12, "no data to bench"); 1102 return 0; 1103 } 1104 1105 int BMK_benchFilesAdvanced( 1106 const char* const* fileNamesTable, 1107 unsigned nbFiles, 1108 const char* dictFileName, 1109 int cLevel, 1110 const ZSTD_compressionParameters* compressionParams, 1111 int displayLevel, 1112 const BMK_advancedParams_t* adv) 1113 { 1114 void* srcBuffer = NULL; 1115 size_t benchedSize; 1116 void* dictBuffer = NULL; 1117 size_t dictBufferSize = 0; 1118 size_t* fileSizes = NULL; 1119 BMK_benchOutcome_t res; 1120 U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); 1121 1122 if (!nbFiles) { 1123 DISPLAYLEVEL(1, "No Files to Benchmark"); 1124 return 13; 1125 } 1126 1127 if (cLevel > ZSTD_maxCLevel()) { 1128 DISPLAYLEVEL(1, "Invalid Compression Level"); 1129 return 14; 1130 } 1131 1132 if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) { 1133 DISPLAYLEVEL(1, "Error loading files"); 1134 return 15; 1135 } 1136 1137 fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t)); 1138 if (!fileSizes) { 1139 DISPLAYLEVEL(1, "not enough memory for fileSizes"); 1140 return 16; 1141 } 1142 1143 /* Load dictionary */ 1144 if (dictFileName != NULL) { 1145 U64 const dictFileSize = UTIL_getFileSize(dictFileName); 1146 if (dictFileSize == UTIL_FILESIZE_UNKNOWN) { 1147 DISPLAYLEVEL( 1148 1, 1149 "error loading %s : %s \n", 1150 dictFileName, 1151 strerror(errno)); 1152 free(fileSizes); 1153 DISPLAYLEVEL(1, "benchmark aborted"); 1154 return 17; 1155 } 1156 if (dictFileSize > 64 MB) { 1157 free(fileSizes); 1158 DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName); 1159 return 18; 1160 } 1161 dictBufferSize = (size_t)dictFileSize; 1162 dictBuffer = malloc(dictBufferSize); 1163 if (dictBuffer == NULL) { 1164 free(fileSizes); 1165 DISPLAYLEVEL( 1166 1, 1167 "not enough memory for dictionary (%u bytes)", 1168 (unsigned)dictBufferSize); 1169 return 19; 1170 } 1171 1172 { 1173 int const errorCode = BMK_loadFiles( 1174 dictBuffer, 1175 dictBufferSize, 1176 fileSizes, 1177 &dictFileName /*?*/, 1178 1 /*?*/, 1179 displayLevel); 1180 if (errorCode) { 1181 res = BMK_benchOutcome_error(); 1182 goto _cleanUp; 1183 } 1184 } 1185 } 1186 1187 /* Memory allocation & restrictions */ 1188 benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; 1189 if ((U64)benchedSize > totalSizeToLoad) 1190 benchedSize = (size_t)totalSizeToLoad; 1191 if (benchedSize < totalSizeToLoad) 1192 DISPLAY("Not enough memory; testing %u MB only...\n", 1193 (unsigned)(benchedSize >> 20)); 1194 1195 srcBuffer = benchedSize ? malloc(benchedSize) : NULL; 1196 if (!srcBuffer) { 1197 free(dictBuffer); 1198 free(fileSizes); 1199 DISPLAYLEVEL(1, "not enough memory for srcBuffer"); 1200 return 20; 1201 } 1202 1203 /* Load input buffer */ 1204 { 1205 int const errorCode = BMK_loadFiles( 1206 srcBuffer, 1207 benchedSize, 1208 fileSizes, 1209 fileNamesTable, 1210 nbFiles, 1211 displayLevel); 1212 if (errorCode) { 1213 res = BMK_benchOutcome_error(); 1214 goto _cleanUp; 1215 } 1216 } 1217 1218 /* Bench */ 1219 { 1220 char mfName[20] = { 0 }; 1221 formatString_u(mfName, sizeof(mfName), " %u files", nbFiles); 1222 { 1223 const char* const displayName = 1224 (nbFiles > 1) ? mfName : fileNamesTable[0]; 1225 res = BMK_benchCLevel( 1226 srcBuffer, 1227 benchedSize, 1228 fileSizes, 1229 nbFiles, 1230 cLevel, 1231 compressionParams, 1232 dictBuffer, 1233 dictBufferSize, 1234 displayLevel, 1235 displayName, 1236 adv); 1237 } 1238 } 1239 1240 _cleanUp: 1241 free(srcBuffer); 1242 free(dictBuffer); 1243 free(fileSizes); 1244 return !BMK_isSuccessful_benchOutcome(res); 1245 } 1246 1247 int BMK_benchFiles( 1248 const char* const* fileNamesTable, 1249 unsigned nbFiles, 1250 const char* dictFileName, 1251 int cLevel, 1252 const ZSTD_compressionParameters* compressionParams, 1253 int displayLevel) 1254 { 1255 BMK_advancedParams_t const adv = BMK_initAdvancedParams(); 1256 return BMK_benchFilesAdvanced( 1257 fileNamesTable, 1258 nbFiles, 1259 dictFileName, 1260 cLevel, 1261 compressionParams, 1262 displayLevel, 1263 &adv); 1264 } 1265