1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos 12*3117ece4Schristos /*-************************************ 13*3117ece4Schristos * Dependencies 14*3117ece4Schristos **************************************/ 15*3117ece4Schristos #include "util.h" /* Ensure platform.h is compiled first; also : compiler options, UTIL_GetFileSize */ 16*3117ece4Schristos #include <stdlib.h> /* malloc */ 17*3117ece4Schristos #include <stdio.h> /* fprintf, fopen, ftello64 */ 18*3117ece4Schristos #include <string.h> /* strcmp */ 19*3117ece4Schristos #include <math.h> /* log */ 20*3117ece4Schristos #include <assert.h> 21*3117ece4Schristos 22*3117ece4Schristos #include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_clockSpanMicro, UTIL_clockSpanNano, UTIL_getTime */ 23*3117ece4Schristos #include "mem.h" 24*3117ece4Schristos #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_estimateCCtxSize */ 25*3117ece4Schristos #include "zstd.h" 26*3117ece4Schristos #include "datagen.h" 27*3117ece4Schristos #include "xxhash.h" 28*3117ece4Schristos #include "benchfn.h" 29*3117ece4Schristos #include "benchzstd.h" 30*3117ece4Schristos #include "zstd_errors.h" 31*3117ece4Schristos #include "zstd_internal.h" /* should not be needed */ 32*3117ece4Schristos 33*3117ece4Schristos 34*3117ece4Schristos /*-************************************ 35*3117ece4Schristos * Constants 36*3117ece4Schristos **************************************/ 37*3117ece4Schristos #define PROGRAM_DESCRIPTION "ZSTD parameters tester" 38*3117ece4Schristos #define AUTHOR "Yann Collet" 39*3117ece4Schristos #define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR 40*3117ece4Schristos 41*3117ece4Schristos #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ 42*3117ece4Schristos #define NB_LEVELS_TRACKED 22 /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */ 43*3117ece4Schristos 44*3117ece4Schristos static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); 45*3117ece4Schristos 46*3117ece4Schristos #define COMPRESSIBILITY_DEFAULT 0.50 47*3117ece4Schristos 48*3117ece4Schristos static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO; 49*3117ece4Schristos static const int g_maxNbVariations = 64; 50*3117ece4Schristos 51*3117ece4Schristos 52*3117ece4Schristos /*-************************************ 53*3117ece4Schristos * Macros 54*3117ece4Schristos **************************************/ 55*3117ece4Schristos #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 56*3117ece4Schristos #define DISPLAYLEVEL(n, ...) if(g_displayLevel >= n) { fprintf(stderr, __VA_ARGS__); } 57*3117ece4Schristos #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } 58*3117ece4Schristos 59*3117ece4Schristos #define TIMED 0 60*3117ece4Schristos #ifndef DEBUG 61*3117ece4Schristos # define DEBUG 0 62*3117ece4Schristos #endif 63*3117ece4Schristos 64*3117ece4Schristos #undef MIN 65*3117ece4Schristos #undef MAX 66*3117ece4Schristos #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) 67*3117ece4Schristos #define MAX(a,b) ( (a) > (b) ? (a) : (b) ) 68*3117ece4Schristos #define CUSTOM_LEVEL 99 69*3117ece4Schristos #define BASE_CLEVEL 1 70*3117ece4Schristos 71*3117ece4Schristos #define FADT_MIN 0 72*3117ece4Schristos #define FADT_MAX ((U32)-1) 73*3117ece4Schristos 74*3117ece4Schristos #define WLOG_RANGE (ZSTD_WINDOWLOG_MAX - ZSTD_WINDOWLOG_MIN + 1) 75*3117ece4Schristos #define CLOG_RANGE (ZSTD_CHAINLOG_MAX - ZSTD_CHAINLOG_MIN + 1) 76*3117ece4Schristos #define HLOG_RANGE (ZSTD_HASHLOG_MAX - ZSTD_HASHLOG_MIN + 1) 77*3117ece4Schristos #define SLOG_RANGE (ZSTD_SEARCHLOG_MAX - ZSTD_SEARCHLOG_MIN + 1) 78*3117ece4Schristos #define MML_RANGE (ZSTD_MINMATCH_MAX - ZSTD_MINMATCH_MIN + 1) 79*3117ece4Schristos #define TLEN_RANGE 17 80*3117ece4Schristos #define STRT_RANGE (ZSTD_STRATEGY_MAX - ZSTD_STRATEGY_MIN + 1) 81*3117ece4Schristos #define FADT_RANGE 3 82*3117ece4Schristos 83*3117ece4Schristos #define CHECKTIME(r) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); return r; } } 84*3117ece4Schristos #define CHECKTIMEGT(ret, val, _gototag) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); ret = val; goto _gototag; } } 85*3117ece4Schristos 86*3117ece4Schristos #define PARAM_UNSET ((U32)-2) /* can't be -1 b/c fadt uses -1 */ 87*3117ece4Schristos 88*3117ece4Schristos static const char* g_stratName[ZSTD_STRATEGY_MAX+1] = { 89*3117ece4Schristos "(none) ", "ZSTD_fast ", "ZSTD_dfast ", 90*3117ece4Schristos "ZSTD_greedy ", "ZSTD_lazy ", "ZSTD_lazy2 ", 91*3117ece4Schristos "ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra ", 92*3117ece4Schristos "ZSTD_btultra2"}; 93*3117ece4Schristos 94*3117ece4Schristos static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 }; 95*3117ece4Schristos 96*3117ece4Schristos 97*3117ece4Schristos /*-************************************ 98*3117ece4Schristos * Setup for Adding new params 99*3117ece4Schristos **************************************/ 100*3117ece4Schristos 101*3117ece4Schristos /* indices for each of the variables */ 102*3117ece4Schristos typedef enum { 103*3117ece4Schristos wlog_ind = 0, 104*3117ece4Schristos clog_ind = 1, 105*3117ece4Schristos hlog_ind = 2, 106*3117ece4Schristos slog_ind = 3, 107*3117ece4Schristos mml_ind = 4, 108*3117ece4Schristos tlen_ind = 5, 109*3117ece4Schristos strt_ind = 6, 110*3117ece4Schristos fadt_ind = 7, /* forceAttachDict */ 111*3117ece4Schristos NUM_PARAMS = 8 112*3117ece4Schristos } varInds_t; 113*3117ece4Schristos 114*3117ece4Schristos typedef struct { 115*3117ece4Schristos U32 vals[NUM_PARAMS]; 116*3117ece4Schristos } paramValues_t; 117*3117ece4Schristos 118*3117ece4Schristos /* minimum value of parameters */ 119*3117ece4Schristos static const U32 mintable[NUM_PARAMS] = 120*3117ece4Schristos { ZSTD_WINDOWLOG_MIN, ZSTD_CHAINLOG_MIN, ZSTD_HASHLOG_MIN, ZSTD_SEARCHLOG_MIN, ZSTD_MINMATCH_MIN, ZSTD_TARGETLENGTH_MIN, ZSTD_STRATEGY_MIN, FADT_MIN }; 121*3117ece4Schristos 122*3117ece4Schristos /* maximum value of parameters */ 123*3117ece4Schristos static const U32 maxtable[NUM_PARAMS] = 124*3117ece4Schristos { ZSTD_WINDOWLOG_MAX, ZSTD_CHAINLOG_MAX, ZSTD_HASHLOG_MAX, ZSTD_SEARCHLOG_MAX, ZSTD_MINMATCH_MAX, ZSTD_TARGETLENGTH_MAX, ZSTD_STRATEGY_MAX, FADT_MAX }; 125*3117ece4Schristos 126*3117ece4Schristos /* # of values parameters can take on */ 127*3117ece4Schristos static const U32 rangetable[NUM_PARAMS] = 128*3117ece4Schristos { WLOG_RANGE, CLOG_RANGE, HLOG_RANGE, SLOG_RANGE, MML_RANGE, TLEN_RANGE, STRT_RANGE, FADT_RANGE }; 129*3117ece4Schristos 130*3117ece4Schristos /* ZSTD_cctxSetParameter() index to set */ 131*3117ece4Schristos static const ZSTD_cParameter cctxSetParamTable[NUM_PARAMS] = 132*3117ece4Schristos { ZSTD_c_windowLog, ZSTD_c_chainLog, ZSTD_c_hashLog, ZSTD_c_searchLog, ZSTD_c_minMatch, ZSTD_c_targetLength, ZSTD_c_strategy, ZSTD_c_forceAttachDict }; 133*3117ece4Schristos 134*3117ece4Schristos /* names of parameters */ 135*3117ece4Schristos static const char* g_paramNames[NUM_PARAMS] = 136*3117ece4Schristos { "windowLog", "chainLog", "hashLog","searchLog", "minMatch", "targetLength", "strategy", "forceAttachDict" }; 137*3117ece4Schristos 138*3117ece4Schristos /* shortened names of parameters */ 139*3117ece4Schristos static const char* g_shortParamNames[NUM_PARAMS] = 140*3117ece4Schristos { "wlog", "clog", "hlog", "slog", "mml", "tlen", "strat", "fadt" }; 141*3117ece4Schristos 142*3117ece4Schristos /* maps value from { 0 to rangetable[param] - 1 } to valid paramvalues */ 143*3117ece4Schristos static U32 rangeMap(varInds_t param, int ind) 144*3117ece4Schristos { 145*3117ece4Schristos U32 const uind = (U32)MAX(MIN(ind, (int)rangetable[param] - 1), 0); 146*3117ece4Schristos switch(param) { 147*3117ece4Schristos case wlog_ind: /* using default: triggers -Wswitch-enum */ 148*3117ece4Schristos case clog_ind: 149*3117ece4Schristos case hlog_ind: 150*3117ece4Schristos case slog_ind: 151*3117ece4Schristos case mml_ind: 152*3117ece4Schristos case strt_ind: 153*3117ece4Schristos return mintable[param] + uind; 154*3117ece4Schristos case tlen_ind: 155*3117ece4Schristos return tlen_table[uind]; 156*3117ece4Schristos case fadt_ind: /* 0, 1, 2 -> -1, 0, 1 */ 157*3117ece4Schristos return uind - 1; 158*3117ece4Schristos case NUM_PARAMS: 159*3117ece4Schristos default:; 160*3117ece4Schristos } 161*3117ece4Schristos DISPLAY("Error, not a valid param\n "); 162*3117ece4Schristos assert(0); 163*3117ece4Schristos return (U32)-1; 164*3117ece4Schristos } 165*3117ece4Schristos 166*3117ece4Schristos /* inverse of rangeMap */ 167*3117ece4Schristos static int invRangeMap(varInds_t param, U32 value) 168*3117ece4Schristos { 169*3117ece4Schristos value = MIN(MAX(mintable[param], value), maxtable[param]); 170*3117ece4Schristos switch(param) { 171*3117ece4Schristos case wlog_ind: 172*3117ece4Schristos case clog_ind: 173*3117ece4Schristos case hlog_ind: 174*3117ece4Schristos case slog_ind: 175*3117ece4Schristos case mml_ind: 176*3117ece4Schristos case strt_ind: 177*3117ece4Schristos return (int)(value - mintable[param]); 178*3117ece4Schristos case tlen_ind: /* bin search */ 179*3117ece4Schristos { 180*3117ece4Schristos int lo = 0; 181*3117ece4Schristos int hi = TLEN_RANGE; 182*3117ece4Schristos while(lo < hi) { 183*3117ece4Schristos int mid = (lo + hi) / 2; 184*3117ece4Schristos if(tlen_table[mid] < value) { 185*3117ece4Schristos lo = mid + 1; 186*3117ece4Schristos } if(tlen_table[mid] == value) { 187*3117ece4Schristos return mid; 188*3117ece4Schristos } else { 189*3117ece4Schristos hi = mid; 190*3117ece4Schristos } 191*3117ece4Schristos } 192*3117ece4Schristos return lo; 193*3117ece4Schristos } 194*3117ece4Schristos case fadt_ind: 195*3117ece4Schristos return (int)value + 1; 196*3117ece4Schristos case NUM_PARAMS: 197*3117ece4Schristos default:; 198*3117ece4Schristos } 199*3117ece4Schristos DISPLAY("Error, not a valid param\n "); 200*3117ece4Schristos assert(0); 201*3117ece4Schristos return -2; 202*3117ece4Schristos } 203*3117ece4Schristos 204*3117ece4Schristos /* display of params */ 205*3117ece4Schristos static void displayParamVal(FILE* f, varInds_t param, unsigned value, int width) 206*3117ece4Schristos { 207*3117ece4Schristos switch(param) { 208*3117ece4Schristos case wlog_ind: 209*3117ece4Schristos case clog_ind: 210*3117ece4Schristos case hlog_ind: 211*3117ece4Schristos case slog_ind: 212*3117ece4Schristos case mml_ind: 213*3117ece4Schristos case tlen_ind: 214*3117ece4Schristos if(width) { 215*3117ece4Schristos fprintf(f, "%*u", width, value); 216*3117ece4Schristos } else { 217*3117ece4Schristos fprintf(f, "%u", value); 218*3117ece4Schristos } 219*3117ece4Schristos break; 220*3117ece4Schristos case strt_ind: 221*3117ece4Schristos if(width) { 222*3117ece4Schristos fprintf(f, "%*s", width, g_stratName[value]); 223*3117ece4Schristos } else { 224*3117ece4Schristos fprintf(f, "%s", g_stratName[value]); 225*3117ece4Schristos } 226*3117ece4Schristos break; 227*3117ece4Schristos case fadt_ind: /* force attach dict */ 228*3117ece4Schristos if(width) { 229*3117ece4Schristos fprintf(f, "%*d", width, (int)value); 230*3117ece4Schristos } else { 231*3117ece4Schristos fprintf(f, "%d", (int)value); 232*3117ece4Schristos } 233*3117ece4Schristos break; 234*3117ece4Schristos case NUM_PARAMS: 235*3117ece4Schristos default: 236*3117ece4Schristos DISPLAY("Error, not a valid param\n "); 237*3117ece4Schristos assert(0); 238*3117ece4Schristos break; 239*3117ece4Schristos } 240*3117ece4Schristos } 241*3117ece4Schristos 242*3117ece4Schristos 243*3117ece4Schristos /*-************************************ 244*3117ece4Schristos * Benchmark Parameters/Global Variables 245*3117ece4Schristos **************************************/ 246*3117ece4Schristos 247*3117ece4Schristos /* General Utility */ 248*3117ece4Schristos static U32 g_timeLimit_s = 99999; /* about 27 hours */ 249*3117ece4Schristos static UTIL_time_t g_time; /* to be used to compare solution finding speeds to compare to original */ 250*3117ece4Schristos static U32 g_blockSize = 0; 251*3117ece4Schristos static U32 g_rand = 1; 252*3117ece4Schristos 253*3117ece4Schristos /* Display */ 254*3117ece4Schristos static int g_displayLevel = 3; 255*3117ece4Schristos static BYTE g_silenceParams[NUM_PARAMS]; /* can selectively silence some params when displaying them */ 256*3117ece4Schristos 257*3117ece4Schristos /* Mode Selection */ 258*3117ece4Schristos static U32 g_singleRun = 0; 259*3117ece4Schristos static U32 g_optimizer = 0; 260*3117ece4Schristos static int g_optmode = 0; 261*3117ece4Schristos 262*3117ece4Schristos /* For cLevel Table generation */ 263*3117ece4Schristos static U32 g_target = 0; 264*3117ece4Schristos static U32 g_noSeed = 0; 265*3117ece4Schristos 266*3117ece4Schristos /* For optimizer */ 267*3117ece4Schristos static paramValues_t g_params; /* Initialized at the beginning of main w/ emptyParams() function */ 268*3117ece4Schristos static double g_ratioMultiplier = 5.; 269*3117ece4Schristos static U32 g_strictness = PARAM_UNSET; /* range 1 - 100, measure of how strict */ 270*3117ece4Schristos static BMK_benchResult_t g_lvltarget; 271*3117ece4Schristos 272*3117ece4Schristos typedef enum { 273*3117ece4Schristos directMap, 274*3117ece4Schristos xxhashMap, 275*3117ece4Schristos noMemo 276*3117ece4Schristos } memoTableType_t; 277*3117ece4Schristos 278*3117ece4Schristos typedef struct { 279*3117ece4Schristos memoTableType_t tableType; 280*3117ece4Schristos BYTE* table; 281*3117ece4Schristos size_t tableLen; 282*3117ece4Schristos varInds_t varArray[NUM_PARAMS]; 283*3117ece4Schristos size_t varLen; 284*3117ece4Schristos } memoTable_t; 285*3117ece4Schristos 286*3117ece4Schristos typedef struct { 287*3117ece4Schristos BMK_benchResult_t result; 288*3117ece4Schristos paramValues_t params; 289*3117ece4Schristos } winnerInfo_t; 290*3117ece4Schristos 291*3117ece4Schristos typedef struct { 292*3117ece4Schristos U32 cSpeed; /* bytes / sec */ 293*3117ece4Schristos U32 dSpeed; 294*3117ece4Schristos U32 cMem; /* bytes */ 295*3117ece4Schristos } constraint_t; 296*3117ece4Schristos 297*3117ece4Schristos typedef struct winner_ll_node winner_ll_node; 298*3117ece4Schristos struct winner_ll_node { 299*3117ece4Schristos winnerInfo_t res; 300*3117ece4Schristos winner_ll_node* next; 301*3117ece4Schristos }; 302*3117ece4Schristos 303*3117ece4Schristos static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSpeed */ 304*3117ece4Schristos 305*3117ece4Schristos /* 306*3117ece4Schristos * Additional Global Variables (Defined Above Use) 307*3117ece4Schristos * g_level_constraint 308*3117ece4Schristos * g_alreadyTested 309*3117ece4Schristos * g_maxTries 310*3117ece4Schristos * g_clockGranularity 311*3117ece4Schristos */ 312*3117ece4Schristos 313*3117ece4Schristos 314*3117ece4Schristos /*-******************************************************* 315*3117ece4Schristos * General Util Functions 316*3117ece4Schristos *********************************************************/ 317*3117ece4Schristos 318*3117ece4Schristos /* nullified useless params, to ensure count stats */ 319*3117ece4Schristos /* cleans up params for memoizing / display */ 320*3117ece4Schristos static paramValues_t sanitizeParams(paramValues_t params) 321*3117ece4Schristos { 322*3117ece4Schristos if (params.vals[strt_ind] == ZSTD_fast) 323*3117ece4Schristos params.vals[clog_ind] = 0, params.vals[slog_ind] = 0; 324*3117ece4Schristos if (params.vals[strt_ind] == ZSTD_dfast) 325*3117ece4Schristos params.vals[slog_ind] = 0; 326*3117ece4Schristos if ( (params.vals[strt_ind] < ZSTD_btopt) && (params.vals[strt_ind] != ZSTD_fast) ) 327*3117ece4Schristos params.vals[tlen_ind] = 0; 328*3117ece4Schristos 329*3117ece4Schristos return params; 330*3117ece4Schristos } 331*3117ece4Schristos 332*3117ece4Schristos static ZSTD_compressionParameters pvalsToCParams(paramValues_t p) 333*3117ece4Schristos { 334*3117ece4Schristos ZSTD_compressionParameters c; 335*3117ece4Schristos memset(&c, 0, sizeof(ZSTD_compressionParameters)); 336*3117ece4Schristos c.windowLog = p.vals[wlog_ind]; 337*3117ece4Schristos c.chainLog = p.vals[clog_ind]; 338*3117ece4Schristos c.hashLog = p.vals[hlog_ind]; 339*3117ece4Schristos c.searchLog = p.vals[slog_ind]; 340*3117ece4Schristos c.minMatch = p.vals[mml_ind]; 341*3117ece4Schristos c.targetLength = p.vals[tlen_ind]; 342*3117ece4Schristos c.strategy = p.vals[strt_ind]; 343*3117ece4Schristos /* no forceAttachDict */ 344*3117ece4Schristos return c; 345*3117ece4Schristos } 346*3117ece4Schristos 347*3117ece4Schristos static paramValues_t cParamsToPVals(ZSTD_compressionParameters c) 348*3117ece4Schristos { 349*3117ece4Schristos paramValues_t p; 350*3117ece4Schristos varInds_t i; 351*3117ece4Schristos p.vals[wlog_ind] = c.windowLog; 352*3117ece4Schristos p.vals[clog_ind] = c.chainLog; 353*3117ece4Schristos p.vals[hlog_ind] = c.hashLog; 354*3117ece4Schristos p.vals[slog_ind] = c.searchLog; 355*3117ece4Schristos p.vals[mml_ind] = c.minMatch; 356*3117ece4Schristos p.vals[tlen_ind] = c.targetLength; 357*3117ece4Schristos p.vals[strt_ind] = c.strategy; 358*3117ece4Schristos 359*3117ece4Schristos /* set all other params to their minimum value */ 360*3117ece4Schristos for (i = strt_ind + 1; i < NUM_PARAMS; i++) { 361*3117ece4Schristos p.vals[i] = mintable[i]; 362*3117ece4Schristos } 363*3117ece4Schristos return p; 364*3117ece4Schristos } 365*3117ece4Schristos 366*3117ece4Schristos /* equivalent of ZSTD_adjustCParams for paramValues_t */ 367*3117ece4Schristos static paramValues_t 368*3117ece4Schristos adjustParams(paramValues_t p, const size_t maxBlockSize, const size_t dictSize) 369*3117ece4Schristos { 370*3117ece4Schristos paramValues_t ot = p; 371*3117ece4Schristos varInds_t i; 372*3117ece4Schristos p = cParamsToPVals(ZSTD_adjustCParams(pvalsToCParams(p), maxBlockSize, dictSize)); 373*3117ece4Schristos if (!dictSize) { p.vals[fadt_ind] = 0; } 374*3117ece4Schristos /* retain value of all other parameters */ 375*3117ece4Schristos for(i = strt_ind + 1; i < NUM_PARAMS; i++) { 376*3117ece4Schristos p.vals[i] = ot.vals[i]; 377*3117ece4Schristos } 378*3117ece4Schristos return p; 379*3117ece4Schristos } 380*3117ece4Schristos 381*3117ece4Schristos static size_t BMK_findMaxMem(U64 requiredMem) 382*3117ece4Schristos { 383*3117ece4Schristos size_t const step = 64 MB; 384*3117ece4Schristos void* testmem = NULL; 385*3117ece4Schristos 386*3117ece4Schristos requiredMem = (((requiredMem >> 26) + 1) << 26); 387*3117ece4Schristos if (requiredMem > maxMemory) requiredMem = maxMemory; 388*3117ece4Schristos 389*3117ece4Schristos requiredMem += 2 * step; 390*3117ece4Schristos while (!testmem && requiredMem > 0) { 391*3117ece4Schristos testmem = malloc ((size_t)requiredMem); 392*3117ece4Schristos requiredMem -= step; 393*3117ece4Schristos } 394*3117ece4Schristos 395*3117ece4Schristos free (testmem); 396*3117ece4Schristos return (size_t) requiredMem; 397*3117ece4Schristos } 398*3117ece4Schristos 399*3117ece4Schristos /* accuracy in seconds only, span can be multiple years */ 400*3117ece4Schristos static U32 BMK_timeSpan_s(const UTIL_time_t tStart) 401*3117ece4Schristos { 402*3117ece4Schristos return (U32)(UTIL_clockSpanMicro(tStart) / 1000000ULL); 403*3117ece4Schristos } 404*3117ece4Schristos 405*3117ece4Schristos static U32 FUZ_rotl32(U32 x, U32 r) 406*3117ece4Schristos { 407*3117ece4Schristos return ((x << r) | (x >> (32 - r))); 408*3117ece4Schristos } 409*3117ece4Schristos 410*3117ece4Schristos static U32 FUZ_rand(U32* src) 411*3117ece4Schristos { 412*3117ece4Schristos const U32 prime1 = 2654435761U; 413*3117ece4Schristos const U32 prime2 = 2246822519U; 414*3117ece4Schristos U32 rand32 = *src; 415*3117ece4Schristos rand32 *= prime1; 416*3117ece4Schristos rand32 += prime2; 417*3117ece4Schristos rand32 = FUZ_rotl32(rand32, 13); 418*3117ece4Schristos *src = rand32; 419*3117ece4Schristos return rand32 >> 5; 420*3117ece4Schristos } 421*3117ece4Schristos 422*3117ece4Schristos #define BOUNDCHECK(val,min,max) { \ 423*3117ece4Schristos if (((val)<(min)) | ((val)>(max))) { \ 424*3117ece4Schristos DISPLAY("INVALID PARAMETER CONSTRAINTS\n"); \ 425*3117ece4Schristos return 0; \ 426*3117ece4Schristos } } 427*3117ece4Schristos 428*3117ece4Schristos static int paramValid(const paramValues_t paramTarget) 429*3117ece4Schristos { 430*3117ece4Schristos U32 i; 431*3117ece4Schristos for(i = 0; i < NUM_PARAMS; i++) { 432*3117ece4Schristos BOUNDCHECK(paramTarget.vals[i], mintable[i], maxtable[i]); 433*3117ece4Schristos } 434*3117ece4Schristos return 1; 435*3117ece4Schristos } 436*3117ece4Schristos 437*3117ece4Schristos /* cParamUnsetMin() : 438*3117ece4Schristos * if any parameter in paramTarget is not yet set, 439*3117ece4Schristos * it will receive its corresponding minimal value. 440*3117ece4Schristos * This function never fails */ 441*3117ece4Schristos static paramValues_t cParamUnsetMin(paramValues_t paramTarget) 442*3117ece4Schristos { 443*3117ece4Schristos varInds_t vi; 444*3117ece4Schristos for (vi = 0; vi < NUM_PARAMS; vi++) { 445*3117ece4Schristos if (paramTarget.vals[vi] == PARAM_UNSET) { 446*3117ece4Schristos paramTarget.vals[vi] = mintable[vi]; 447*3117ece4Schristos } 448*3117ece4Schristos } 449*3117ece4Schristos return paramTarget; 450*3117ece4Schristos } 451*3117ece4Schristos 452*3117ece4Schristos static paramValues_t emptyParams(void) 453*3117ece4Schristos { 454*3117ece4Schristos U32 i; 455*3117ece4Schristos paramValues_t p; 456*3117ece4Schristos for(i = 0; i < NUM_PARAMS; i++) { 457*3117ece4Schristos p.vals[i] = PARAM_UNSET; 458*3117ece4Schristos } 459*3117ece4Schristos return p; 460*3117ece4Schristos } 461*3117ece4Schristos 462*3117ece4Schristos static winnerInfo_t initWinnerInfo(const paramValues_t p) 463*3117ece4Schristos { 464*3117ece4Schristos winnerInfo_t w1; 465*3117ece4Schristos w1.result.cSpeed = 0; 466*3117ece4Schristos w1.result.dSpeed = 0; 467*3117ece4Schristos w1.result.cMem = (size_t)-1; 468*3117ece4Schristos w1.result.cSize = (size_t)-1; 469*3117ece4Schristos w1.params = p; 470*3117ece4Schristos return w1; 471*3117ece4Schristos } 472*3117ece4Schristos 473*3117ece4Schristos static paramValues_t 474*3117ece4Schristos overwriteParams(paramValues_t base, const paramValues_t mask) 475*3117ece4Schristos { 476*3117ece4Schristos U32 i; 477*3117ece4Schristos for(i = 0; i < NUM_PARAMS; i++) { 478*3117ece4Schristos if(mask.vals[i] != PARAM_UNSET) { 479*3117ece4Schristos base.vals[i] = mask.vals[i]; 480*3117ece4Schristos } 481*3117ece4Schristos } 482*3117ece4Schristos return base; 483*3117ece4Schristos } 484*3117ece4Schristos 485*3117ece4Schristos static void 486*3117ece4Schristos paramVaryOnce(const varInds_t paramIndex, const int amt, paramValues_t* ptr) 487*3117ece4Schristos { 488*3117ece4Schristos ptr->vals[paramIndex] = rangeMap(paramIndex, 489*3117ece4Schristos invRangeMap(paramIndex, ptr->vals[paramIndex]) + amt); 490*3117ece4Schristos } 491*3117ece4Schristos 492*3117ece4Schristos /* varies ptr by nbChanges respecting varyParams*/ 493*3117ece4Schristos static void 494*3117ece4Schristos paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges) 495*3117ece4Schristos { 496*3117ece4Schristos paramValues_t p; 497*3117ece4Schristos int validated = 0; 498*3117ece4Schristos while (!validated) { 499*3117ece4Schristos U32 i; 500*3117ece4Schristos p = *ptr; 501*3117ece4Schristos for (i = 0 ; i < nbChanges ; i++) { 502*3117ece4Schristos const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1); 503*3117ece4Schristos paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1], 504*3117ece4Schristos (int)((changeID & 1) << 1) - 1, 505*3117ece4Schristos &p); 506*3117ece4Schristos } 507*3117ece4Schristos validated = paramValid(p); 508*3117ece4Schristos } 509*3117ece4Schristos *ptr = p; 510*3117ece4Schristos } 511*3117ece4Schristos 512*3117ece4Schristos /* Completely random parameter selection */ 513*3117ece4Schristos static paramValues_t randomParams(void) 514*3117ece4Schristos { 515*3117ece4Schristos varInds_t v; paramValues_t p; 516*3117ece4Schristos for(v = 0; v < NUM_PARAMS; v++) { 517*3117ece4Schristos p.vals[v] = rangeMap(v, (int)(FUZ_rand(&g_rand) % rangetable[v])); 518*3117ece4Schristos } 519*3117ece4Schristos return p; 520*3117ece4Schristos } 521*3117ece4Schristos 522*3117ece4Schristos static U64 g_clockGranularity = 100000000ULL; 523*3117ece4Schristos 524*3117ece4Schristos static void init_clockGranularity(void) 525*3117ece4Schristos { 526*3117ece4Schristos UTIL_time_t const clockStart = UTIL_getTime(); 527*3117ece4Schristos U64 el1 = 0, el2 = 0; 528*3117ece4Schristos int i = 0; 529*3117ece4Schristos do { 530*3117ece4Schristos el1 = el2; 531*3117ece4Schristos el2 = UTIL_clockSpanNano(clockStart); 532*3117ece4Schristos if(el1 < el2) { 533*3117ece4Schristos U64 iv = el2 - el1; 534*3117ece4Schristos if(g_clockGranularity > iv) { 535*3117ece4Schristos g_clockGranularity = iv; 536*3117ece4Schristos i = 0; 537*3117ece4Schristos } else { 538*3117ece4Schristos i++; 539*3117ece4Schristos } 540*3117ece4Schristos } 541*3117ece4Schristos } while(i < 10); 542*3117ece4Schristos DEBUGOUTPUT("Granularity: %llu\n", (unsigned long long)g_clockGranularity); 543*3117ece4Schristos } 544*3117ece4Schristos 545*3117ece4Schristos /*-************************************ 546*3117ece4Schristos * Optimizer Util Functions 547*3117ece4Schristos **************************************/ 548*3117ece4Schristos 549*3117ece4Schristos /* checks results are feasible */ 550*3117ece4Schristos static int feasible(const BMK_benchResult_t results, const constraint_t target) { 551*3117ece4Schristos return (results.cSpeed >= target.cSpeed) 552*3117ece4Schristos && (results.dSpeed >= target.dSpeed) 553*3117ece4Schristos && (results.cMem <= target.cMem) 554*3117ece4Schristos && (!g_optmode || results.cSize <= g_lvltarget.cSize); 555*3117ece4Schristos } 556*3117ece4Schristos 557*3117ece4Schristos /* hill climbing value for part 1 */ 558*3117ece4Schristos /* Scoring here is a linear reward for all set constraints normalized between 0 and 1 559*3117ece4Schristos * (with 0 at 0 and 1 being fully fulfilling the constraint), summed with a logarithmic 560*3117ece4Schristos * bonus to exceeding the constraint value. We also give linear ratio for compression ratio. 561*3117ece4Schristos * The constant factors are experimental. 562*3117ece4Schristos */ 563*3117ece4Schristos static double 564*3117ece4Schristos resultScore(const BMK_benchResult_t res, const size_t srcSize, const constraint_t target) 565*3117ece4Schristos { 566*3117ece4Schristos double cs = 0., ds = 0., rt, cm = 0.; 567*3117ece4Schristos const double r1 = 1, r2 = 0.1, rtr = 0.5; 568*3117ece4Schristos double ret; 569*3117ece4Schristos if(target.cSpeed) { cs = (double)res.cSpeed / (double)target.cSpeed; } 570*3117ece4Schristos if(target.dSpeed) { ds = (double)res.dSpeed / (double)target.dSpeed; } 571*3117ece4Schristos if(target.cMem != (U32)-1) { cm = (double)target.cMem / (double)res.cMem; } 572*3117ece4Schristos rt = ((double)srcSize / (double)res.cSize); 573*3117ece4Schristos 574*3117ece4Schristos ret = (MIN(1, cs) + MIN(1, ds) + MIN(1, cm))*r1 + rt * rtr + 575*3117ece4Schristos (MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2; 576*3117ece4Schristos 577*3117ece4Schristos return ret; 578*3117ece4Schristos } 579*3117ece4Schristos 580*3117ece4Schristos /* calculates normalized squared euclidean distance of result1 if it is in the first quadrant relative to lvlRes */ 581*3117ece4Schristos static double 582*3117ece4Schristos resultDistLvl(const BMK_benchResult_t result1, const BMK_benchResult_t lvlRes) 583*3117ece4Schristos { 584*3117ece4Schristos double normalizedCSpeedGain1 = ((double)result1.cSpeed / (double)lvlRes.cSpeed) - 1; 585*3117ece4Schristos double normalizedRatioGain1 = ((double)lvlRes.cSize / (double)result1.cSize) - 1; 586*3117ece4Schristos if(normalizedRatioGain1 < 0 || normalizedCSpeedGain1 < 0) { 587*3117ece4Schristos return 0.0; 588*3117ece4Schristos } 589*3117ece4Schristos return normalizedRatioGain1 * g_ratioMultiplier + normalizedCSpeedGain1; 590*3117ece4Schristos } 591*3117ece4Schristos 592*3117ece4Schristos /* return true if r2 strictly better than r1 */ 593*3117ece4Schristos static int 594*3117ece4Schristos compareResultLT(const BMK_benchResult_t result1, const BMK_benchResult_t result2, const constraint_t target, size_t srcSize) 595*3117ece4Schristos { 596*3117ece4Schristos if(feasible(result1, target) && feasible(result2, target)) { 597*3117ece4Schristos if(g_optmode) { 598*3117ece4Schristos return resultDistLvl(result1, g_lvltarget) < resultDistLvl(result2, g_lvltarget); 599*3117ece4Schristos } else { 600*3117ece4Schristos return (result1.cSize > result2.cSize) 601*3117ece4Schristos || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed) 602*3117ece4Schristos || (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed); 603*3117ece4Schristos } 604*3117ece4Schristos } 605*3117ece4Schristos return feasible(result2, target) 606*3117ece4Schristos || (!feasible(result1, target) 607*3117ece4Schristos && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target))); 608*3117ece4Schristos } 609*3117ece4Schristos 610*3117ece4Schristos static constraint_t relaxTarget(constraint_t target) { 611*3117ece4Schristos target.cMem = (U32)-1; 612*3117ece4Schristos target.cSpeed = (target.cSpeed * g_strictness) / 100; 613*3117ece4Schristos target.dSpeed = (target.dSpeed * g_strictness) / 100; 614*3117ece4Schristos return target; 615*3117ece4Schristos } 616*3117ece4Schristos 617*3117ece4Schristos static void optimizerAdjustInput(paramValues_t* pc, const size_t maxBlockSize) 618*3117ece4Schristos { 619*3117ece4Schristos varInds_t v; 620*3117ece4Schristos for(v = 0; v < NUM_PARAMS; v++) { 621*3117ece4Schristos if(pc->vals[v] != PARAM_UNSET) { 622*3117ece4Schristos U32 newval = MIN(MAX(pc->vals[v], mintable[v]), maxtable[v]); 623*3117ece4Schristos if(newval != pc->vals[v]) { 624*3117ece4Schristos pc->vals[v] = newval; 625*3117ece4Schristos DISPLAY("Warning: parameter %s not in valid range, adjusting to ", 626*3117ece4Schristos g_paramNames[v]); 627*3117ece4Schristos displayParamVal(stderr, v, newval, 0); DISPLAY("\n"); 628*3117ece4Schristos } 629*3117ece4Schristos } 630*3117ece4Schristos } 631*3117ece4Schristos 632*3117ece4Schristos if(pc->vals[wlog_ind] != PARAM_UNSET) { 633*3117ece4Schristos 634*3117ece4Schristos U32 sshb = maxBlockSize > 1 ? ZSTD_highbit32((U32)(maxBlockSize-1)) + 1 : 1; 635*3117ece4Schristos /* edge case of highBit not working for 0 */ 636*3117ece4Schristos 637*3117ece4Schristos if(maxBlockSize < (1ULL << 31) && sshb + 1 < pc->vals[wlog_ind]) { 638*3117ece4Schristos U32 adjust = MAX(mintable[wlog_ind], sshb); 639*3117ece4Schristos if(adjust != pc->vals[wlog_ind]) { 640*3117ece4Schristos pc->vals[wlog_ind] = adjust; 641*3117ece4Schristos DISPLAY("Warning: windowLog larger than src/block size, adjusted to %u\n", 642*3117ece4Schristos (unsigned)pc->vals[wlog_ind]); 643*3117ece4Schristos } 644*3117ece4Schristos } 645*3117ece4Schristos } 646*3117ece4Schristos 647*3117ece4Schristos if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { 648*3117ece4Schristos U32 maxclog; 649*3117ece4Schristos if(pc->vals[strt_ind] == PARAM_UNSET || pc->vals[strt_ind] >= (U32)ZSTD_btlazy2) { 650*3117ece4Schristos maxclog = pc->vals[wlog_ind] + 1; 651*3117ece4Schristos } else { 652*3117ece4Schristos maxclog = pc->vals[wlog_ind]; 653*3117ece4Schristos } 654*3117ece4Schristos 655*3117ece4Schristos if(pc->vals[clog_ind] > maxclog) { 656*3117ece4Schristos pc->vals[clog_ind] = maxclog; 657*3117ece4Schristos DISPLAY("Warning: chainlog too much larger than windowLog size, adjusted to %u\n", 658*3117ece4Schristos (unsigned)pc->vals[clog_ind]); 659*3117ece4Schristos } 660*3117ece4Schristos } 661*3117ece4Schristos 662*3117ece4Schristos if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[hlog_ind] != PARAM_UNSET) { 663*3117ece4Schristos if(pc->vals[wlog_ind] + 1 < pc->vals[hlog_ind]) { 664*3117ece4Schristos pc->vals[hlog_ind] = pc->vals[wlog_ind] + 1; 665*3117ece4Schristos DISPLAY("Warning: hashlog too much larger than windowLog size, adjusted to %u\n", 666*3117ece4Schristos (unsigned)pc->vals[hlog_ind]); 667*3117ece4Schristos } 668*3117ece4Schristos } 669*3117ece4Schristos 670*3117ece4Schristos if(pc->vals[slog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { 671*3117ece4Schristos if(pc->vals[slog_ind] > pc->vals[clog_ind]) { 672*3117ece4Schristos pc->vals[clog_ind] = pc->vals[slog_ind]; 673*3117ece4Schristos DISPLAY("Warning: searchLog larger than chainLog, adjusted to %u\n", 674*3117ece4Schristos (unsigned)pc->vals[slog_ind]); 675*3117ece4Schristos } 676*3117ece4Schristos } 677*3117ece4Schristos } 678*3117ece4Schristos 679*3117ece4Schristos static int 680*3117ece4Schristos redundantParams(const paramValues_t paramValues, const constraint_t target, const size_t maxBlockSize) 681*3117ece4Schristos { 682*3117ece4Schristos return 683*3117ece4Schristos (ZSTD_estimateCStreamSize_usingCParams(pvalsToCParams(paramValues)) > (size_t)target.cMem) /* Uses too much memory */ 684*3117ece4Schristos || ((1ULL << (paramValues.vals[wlog_ind] - 1)) >= maxBlockSize && paramValues.vals[wlog_ind] != mintable[wlog_ind]) /* wlog too much bigger than src size */ 685*3117ece4Schristos || (paramValues.vals[clog_ind] > (paramValues.vals[wlog_ind] + (paramValues.vals[strt_ind] > ZSTD_btlazy2))) /* chainLog larger than windowLog*/ 686*3117ece4Schristos || (paramValues.vals[slog_ind] > paramValues.vals[clog_ind]) /* searchLog larger than chainLog */ 687*3117ece4Schristos || (paramValues.vals[hlog_ind] > paramValues.vals[wlog_ind] + 1); /* hashLog larger than windowLog + 1 */ 688*3117ece4Schristos } 689*3117ece4Schristos 690*3117ece4Schristos 691*3117ece4Schristos /*-************************************ 692*3117ece4Schristos * Display Functions 693*3117ece4Schristos **************************************/ 694*3117ece4Schristos 695*3117ece4Schristos /* BMK_paramValues_into_commandLine() : 696*3117ece4Schristos * transform a set of parameters paramValues_t 697*3117ece4Schristos * into a command line compatible with `zstd` syntax 698*3117ece4Schristos * and writes it into FILE* f. 699*3117ece4Schristos * f must be already opened and writable */ 700*3117ece4Schristos static void 701*3117ece4Schristos BMK_paramValues_into_commandLine(FILE* f, const paramValues_t params) 702*3117ece4Schristos { 703*3117ece4Schristos varInds_t v; 704*3117ece4Schristos int first = 1; 705*3117ece4Schristos fprintf(f,"--zstd="); 706*3117ece4Schristos for (v = 0; v < NUM_PARAMS; v++) { 707*3117ece4Schristos if (g_silenceParams[v]) { continue; } 708*3117ece4Schristos if (!first) { fprintf(f, ","); } 709*3117ece4Schristos fprintf(f,"%s=", g_paramNames[v]); 710*3117ece4Schristos 711*3117ece4Schristos if (v == strt_ind) { fprintf(f,"%u", (unsigned)params.vals[v]); } 712*3117ece4Schristos else { displayParamVal(f, v, params.vals[v], 0); } 713*3117ece4Schristos first = 0; 714*3117ece4Schristos } 715*3117ece4Schristos fprintf(f, "\n"); 716*3117ece4Schristos } 717*3117ece4Schristos 718*3117ece4Schristos 719*3117ece4Schristos /* comparison function: */ 720*3117ece4Schristos /* strictly better, strictly worse, equal, speed-side adv, size-side adv */ 721*3117ece4Schristos #define WORSE_RESULT 0 722*3117ece4Schristos #define BETTER_RESULT 1 723*3117ece4Schristos #define ERROR_RESULT 2 724*3117ece4Schristos 725*3117ece4Schristos #define SPEED_RESULT 4 726*3117ece4Schristos #define SIZE_RESULT 5 727*3117ece4Schristos /* maybe have epsilon-eq to limit table size? */ 728*3117ece4Schristos static int 729*3117ece4Schristos speedSizeCompare(const BMK_benchResult_t r1, const BMK_benchResult_t r2) 730*3117ece4Schristos { 731*3117ece4Schristos if(r1.cSpeed < r2.cSpeed) { 732*3117ece4Schristos if(r1.cSize >= r2.cSize) { 733*3117ece4Schristos return BETTER_RESULT; 734*3117ece4Schristos } 735*3117ece4Schristos return SPEED_RESULT; /* r2 is smaller but not faster. */ 736*3117ece4Schristos } else { 737*3117ece4Schristos if(r1.cSize <= r2.cSize) { 738*3117ece4Schristos return WORSE_RESULT; 739*3117ece4Schristos } 740*3117ece4Schristos return SIZE_RESULT; /* r2 is faster but not smaller */ 741*3117ece4Schristos } 742*3117ece4Schristos } 743*3117ece4Schristos 744*3117ece4Schristos /* 0 for insertion, 1 for no insert */ 745*3117ece4Schristos /* maintain invariant speedSizeCompare(n, n->next) = SPEED_RESULT */ 746*3117ece4Schristos static int 747*3117ece4Schristos insertWinner(const winnerInfo_t w, const constraint_t targetConstraints) 748*3117ece4Schristos { 749*3117ece4Schristos BMK_benchResult_t r = w.result; 750*3117ece4Schristos winner_ll_node* cur_node = g_winners; 751*3117ece4Schristos /* first node to insert */ 752*3117ece4Schristos if(!feasible(r, targetConstraints)) { 753*3117ece4Schristos return 1; 754*3117ece4Schristos } 755*3117ece4Schristos 756*3117ece4Schristos if(g_winners == NULL) { 757*3117ece4Schristos winner_ll_node* first_node = malloc(sizeof(winner_ll_node)); 758*3117ece4Schristos if(first_node == NULL) { 759*3117ece4Schristos return 1; 760*3117ece4Schristos } 761*3117ece4Schristos first_node->next = NULL; 762*3117ece4Schristos first_node->res = w; 763*3117ece4Schristos g_winners = first_node; 764*3117ece4Schristos return 0; 765*3117ece4Schristos } 766*3117ece4Schristos 767*3117ece4Schristos while(cur_node->next != NULL) { 768*3117ece4Schristos switch(speedSizeCompare(cur_node->res.result, r)) { 769*3117ece4Schristos case WORSE_RESULT: 770*3117ece4Schristos { 771*3117ece4Schristos return 1; /* never insert if better */ 772*3117ece4Schristos } 773*3117ece4Schristos case BETTER_RESULT: 774*3117ece4Schristos { 775*3117ece4Schristos winner_ll_node* tmp; 776*3117ece4Schristos cur_node->res = cur_node->next->res; 777*3117ece4Schristos tmp = cur_node->next; 778*3117ece4Schristos cur_node->next = cur_node->next->next; 779*3117ece4Schristos free(tmp); 780*3117ece4Schristos break; 781*3117ece4Schristos } 782*3117ece4Schristos case SIZE_RESULT: 783*3117ece4Schristos { 784*3117ece4Schristos cur_node = cur_node->next; 785*3117ece4Schristos break; 786*3117ece4Schristos } 787*3117ece4Schristos case SPEED_RESULT: /* insert after first size result, then return */ 788*3117ece4Schristos { 789*3117ece4Schristos winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); 790*3117ece4Schristos if(newnode == NULL) { 791*3117ece4Schristos return 1; 792*3117ece4Schristos } 793*3117ece4Schristos newnode->res = cur_node->res; 794*3117ece4Schristos cur_node->res = w; 795*3117ece4Schristos newnode->next = cur_node->next; 796*3117ece4Schristos cur_node->next = newnode; 797*3117ece4Schristos return 0; 798*3117ece4Schristos } 799*3117ece4Schristos } 800*3117ece4Schristos 801*3117ece4Schristos } 802*3117ece4Schristos 803*3117ece4Schristos assert(cur_node->next == NULL); 804*3117ece4Schristos switch(speedSizeCompare(cur_node->res.result, r)) { 805*3117ece4Schristos case WORSE_RESULT: 806*3117ece4Schristos { 807*3117ece4Schristos return 1; /* never insert if better */ 808*3117ece4Schristos } 809*3117ece4Schristos case BETTER_RESULT: 810*3117ece4Schristos { 811*3117ece4Schristos cur_node->res = w; 812*3117ece4Schristos return 0; 813*3117ece4Schristos } 814*3117ece4Schristos case SIZE_RESULT: 815*3117ece4Schristos { 816*3117ece4Schristos winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); 817*3117ece4Schristos if(newnode == NULL) { 818*3117ece4Schristos return 1; 819*3117ece4Schristos } 820*3117ece4Schristos newnode->res = w; 821*3117ece4Schristos newnode->next = NULL; 822*3117ece4Schristos cur_node->next = newnode; 823*3117ece4Schristos return 0; 824*3117ece4Schristos } 825*3117ece4Schristos case SPEED_RESULT: /* insert before first size result, then return */ 826*3117ece4Schristos { 827*3117ece4Schristos winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); 828*3117ece4Schristos if(newnode == NULL) { 829*3117ece4Schristos return 1; 830*3117ece4Schristos } 831*3117ece4Schristos newnode->res = cur_node->res; 832*3117ece4Schristos cur_node->res = w; 833*3117ece4Schristos newnode->next = cur_node->next; 834*3117ece4Schristos cur_node->next = newnode; 835*3117ece4Schristos return 0; 836*3117ece4Schristos } 837*3117ece4Schristos default: 838*3117ece4Schristos return 1; 839*3117ece4Schristos } 840*3117ece4Schristos } 841*3117ece4Schristos 842*3117ece4Schristos static void 843*3117ece4Schristos BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize) 844*3117ece4Schristos { 845*3117ece4Schristos varInds_t v; 846*3117ece4Schristos int first = 1; 847*3117ece4Schristos res.params = cParamUnsetMin(res.params); 848*3117ece4Schristos fprintf(f, " {"); 849*3117ece4Schristos for (v = 0; v < NUM_PARAMS; v++) { 850*3117ece4Schristos if (g_silenceParams[v]) { continue; } 851*3117ece4Schristos if (!first) { fprintf(f, ","); } 852*3117ece4Schristos displayParamVal(f, v, res.params.vals[v], 3); 853*3117ece4Schristos first = 0; 854*3117ece4Schristos } 855*3117ece4Schristos 856*3117ece4Schristos { double const ratio = res.result.cSize ? 857*3117ece4Schristos (double)srcSize / (double)res.result.cSize : 0; 858*3117ece4Schristos double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT; 859*3117ece4Schristos double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT; 860*3117ece4Schristos 861*3117ece4Schristos fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n", 862*3117ece4Schristos ratio, cSpeedMBps, dSpeedMBps); 863*3117ece4Schristos } 864*3117ece4Schristos } 865*3117ece4Schristos 866*3117ece4Schristos /* Writes to f the results of a parameter benchmark */ 867*3117ece4Schristos /* when used with --optimize, will only print results better than previously discovered */ 868*3117ece4Schristos static void 869*3117ece4Schristos BMK_printWinner(FILE* f, const int cLevel, const BMK_benchResult_t result, const paramValues_t params, const size_t srcSize) 870*3117ece4Schristos { 871*3117ece4Schristos char lvlstr[15] = "Custom Level"; 872*3117ece4Schristos winnerInfo_t w; 873*3117ece4Schristos w.params = params; 874*3117ece4Schristos w.result = result; 875*3117ece4Schristos 876*3117ece4Schristos fprintf(f, "\r%79s\r", ""); 877*3117ece4Schristos 878*3117ece4Schristos if(cLevel != CUSTOM_LEVEL) { 879*3117ece4Schristos snprintf(lvlstr, 15, " Level %2d ", cLevel); 880*3117ece4Schristos } 881*3117ece4Schristos 882*3117ece4Schristos if(TIMED) { 883*3117ece4Schristos const U64 mn_in_ns = 60ULL * TIMELOOP_NANOSEC; 884*3117ece4Schristos const U64 time_ns = UTIL_clockSpanNano(g_time); 885*3117ece4Schristos const U64 minutes = time_ns / mn_in_ns; 886*3117ece4Schristos fprintf(f, "%1lu:%2lu:%05.2f - ", 887*3117ece4Schristos (unsigned long) minutes / 60, 888*3117ece4Schristos (unsigned long) minutes % 60, 889*3117ece4Schristos (double)(time_ns - (minutes * mn_in_ns)) / TIMELOOP_NANOSEC ); 890*3117ece4Schristos } 891*3117ece4Schristos 892*3117ece4Schristos fprintf(f, "/* %s */ ", lvlstr); 893*3117ece4Schristos BMK_displayOneResult(f, w, srcSize); 894*3117ece4Schristos } 895*3117ece4Schristos 896*3117ece4Schristos static void 897*3117ece4Schristos BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_t result, const paramValues_t params, const constraint_t targetConstraints, const size_t srcSize) 898*3117ece4Schristos { 899*3117ece4Schristos /* global winner used for constraints */ 900*3117ece4Schristos /* cSize, cSpeed, dSpeed, cMem */ 901*3117ece4Schristos static winnerInfo_t g_winner = { { (size_t)-1LL, 0, 0, (size_t)-1LL }, 902*3117ece4Schristos { { PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET } } 903*3117ece4Schristos }; 904*3117ece4Schristos if ( DEBUG 905*3117ece4Schristos || compareResultLT(g_winner.result, result, targetConstraints, srcSize) 906*3117ece4Schristos || g_displayLevel >= 4) { 907*3117ece4Schristos if ( DEBUG 908*3117ece4Schristos && compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { 909*3117ece4Schristos DISPLAY("New Winner: \n"); 910*3117ece4Schristos } 911*3117ece4Schristos 912*3117ece4Schristos if(g_displayLevel >= 2) { 913*3117ece4Schristos BMK_printWinner(f, cLevel, result, params, srcSize); 914*3117ece4Schristos } 915*3117ece4Schristos 916*3117ece4Schristos if(compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { 917*3117ece4Schristos if(g_displayLevel >= 1) { BMK_paramValues_into_commandLine(f, params); } 918*3117ece4Schristos g_winner.result = result; 919*3117ece4Schristos g_winner.params = params; 920*3117ece4Schristos } 921*3117ece4Schristos } 922*3117ece4Schristos 923*3117ece4Schristos if(g_optmode && g_optimizer && (DEBUG || g_displayLevel == 3)) { 924*3117ece4Schristos winnerInfo_t w; 925*3117ece4Schristos winner_ll_node* n; 926*3117ece4Schristos w.result = result; 927*3117ece4Schristos w.params = params; 928*3117ece4Schristos insertWinner(w, targetConstraints); 929*3117ece4Schristos 930*3117ece4Schristos if(!DEBUG) { fprintf(f, "\033c"); } 931*3117ece4Schristos fprintf(f, "\n"); 932*3117ece4Schristos 933*3117ece4Schristos /* the table */ 934*3117ece4Schristos fprintf(f, "================================\n"); 935*3117ece4Schristos for(n = g_winners; n != NULL; n = n->next) { 936*3117ece4Schristos BMK_displayOneResult(f, n->res, srcSize); 937*3117ece4Schristos } 938*3117ece4Schristos fprintf(f, "================================\n"); 939*3117ece4Schristos fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n", 940*3117ece4Schristos (double)srcSize / (double)g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT); 941*3117ece4Schristos 942*3117ece4Schristos 943*3117ece4Schristos fprintf(f, "Overall Winner: \n"); 944*3117ece4Schristos BMK_displayOneResult(f, g_winner, srcSize); 945*3117ece4Schristos BMK_paramValues_into_commandLine(f, g_winner.params); 946*3117ece4Schristos 947*3117ece4Schristos fprintf(f, "Latest BMK: \n");\ 948*3117ece4Schristos BMK_displayOneResult(f, w, srcSize); 949*3117ece4Schristos } 950*3117ece4Schristos } 951*3117ece4Schristos 952*3117ece4Schristos 953*3117ece4Schristos /* BMK_print_cLevelEntry() : 954*3117ece4Schristos * Writes one cLevelTable entry, for one level. 955*3117ece4Schristos * f must exist, be already opened, and be seekable. 956*3117ece4Schristos * this function cannot error. 957*3117ece4Schristos */ 958*3117ece4Schristos static void 959*3117ece4Schristos BMK_print_cLevelEntry(FILE* f, const int cLevel, 960*3117ece4Schristos paramValues_t params, 961*3117ece4Schristos const BMK_benchResult_t result, const size_t srcSize) 962*3117ece4Schristos { 963*3117ece4Schristos varInds_t v; 964*3117ece4Schristos int first = 1; 965*3117ece4Schristos 966*3117ece4Schristos assert(cLevel >= 0); 967*3117ece4Schristos assert(cLevel <= NB_LEVELS_TRACKED); 968*3117ece4Schristos params = cParamUnsetMin(params); 969*3117ece4Schristos 970*3117ece4Schristos fprintf(f, " {"); 971*3117ece4Schristos /* print cParams. 972*3117ece4Schristos * assumption : all cParams are present and in order in the following range */ 973*3117ece4Schristos for (v = 0; v <= strt_ind; v++) { 974*3117ece4Schristos if (!first) { fprintf(f, ","); } 975*3117ece4Schristos displayParamVal(f, v, params.vals[v], 3); 976*3117ece4Schristos first = 0; 977*3117ece4Schristos } 978*3117ece4Schristos /* print comment */ 979*3117ece4Schristos { double const ratio = result.cSize ? 980*3117ece4Schristos (double)srcSize / (double)result.cSize : 0; 981*3117ece4Schristos double const cSpeedMBps = (double)result.cSpeed / MB_UNIT; 982*3117ece4Schristos double const dSpeedMBps = (double)result.dSpeed / MB_UNIT; 983*3117ece4Schristos 984*3117ece4Schristos fprintf(f, " }, /* level %2i: R=%5.3f at %5.1f MB/s - %5.1f MB/s */\n", 985*3117ece4Schristos cLevel, ratio, cSpeedMBps, dSpeedMBps); 986*3117ece4Schristos } 987*3117ece4Schristos } 988*3117ece4Schristos 989*3117ece4Schristos 990*3117ece4Schristos /* BMK_print_cLevelTable() : 991*3117ece4Schristos * print candidate compression table into proposed FILE* f. 992*3117ece4Schristos * f must exist, be already opened, and be seekable. 993*3117ece4Schristos * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized 994*3117ece4Schristos * this function cannot error. 995*3117ece4Schristos */ 996*3117ece4Schristos static void 997*3117ece4Schristos BMK_print_cLevelTable(FILE* f, const winnerInfo_t* winners, const size_t srcSize) 998*3117ece4Schristos { 999*3117ece4Schristos int cLevel; 1000*3117ece4Schristos 1001*3117ece4Schristos fprintf(f, "\n /* Proposed configurations : */ \n"); 1002*3117ece4Schristos fprintf(f, " /* W, C, H, S, L, T, strat */ \n"); 1003*3117ece4Schristos 1004*3117ece4Schristos for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++) 1005*3117ece4Schristos BMK_print_cLevelEntry(f, 1006*3117ece4Schristos cLevel, winners[cLevel].params, 1007*3117ece4Schristos winners[cLevel].result, srcSize); 1008*3117ece4Schristos } 1009*3117ece4Schristos 1010*3117ece4Schristos 1011*3117ece4Schristos /* BMK_saveAndPrint_cLevelTable() : 1012*3117ece4Schristos * save candidate compression table into FILE* f, 1013*3117ece4Schristos * and then to stdout. 1014*3117ece4Schristos * f must exist, be already opened, and be seekable. 1015*3117ece4Schristos * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized 1016*3117ece4Schristos * this function cannot error. 1017*3117ece4Schristos */ 1018*3117ece4Schristos static void 1019*3117ece4Schristos BMK_saveAndPrint_cLevelTable(FILE* const f, 1020*3117ece4Schristos const winnerInfo_t* winners, 1021*3117ece4Schristos const size_t srcSize) 1022*3117ece4Schristos { 1023*3117ece4Schristos fseek(f, 0, SEEK_SET); 1024*3117ece4Schristos BMK_print_cLevelTable(f, winners, srcSize); 1025*3117ece4Schristos fflush(f); 1026*3117ece4Schristos BMK_print_cLevelTable(stdout, winners, srcSize); 1027*3117ece4Schristos } 1028*3117ece4Schristos 1029*3117ece4Schristos 1030*3117ece4Schristos /*-******************************************************* 1031*3117ece4Schristos * Functions to Benchmark 1032*3117ece4Schristos *********************************************************/ 1033*3117ece4Schristos 1034*3117ece4Schristos typedef struct { 1035*3117ece4Schristos ZSTD_CCtx* cctx; 1036*3117ece4Schristos const void* dictBuffer; 1037*3117ece4Schristos size_t dictBufferSize; 1038*3117ece4Schristos int cLevel; 1039*3117ece4Schristos const paramValues_t* comprParams; 1040*3117ece4Schristos } BMK_initCCtxArgs; 1041*3117ece4Schristos 1042*3117ece4Schristos static size_t local_initCCtx(void* payload) { 1043*3117ece4Schristos const BMK_initCCtxArgs* ag = (const BMK_initCCtxArgs*)payload; 1044*3117ece4Schristos varInds_t i; 1045*3117ece4Schristos ZSTD_CCtx_reset(ag->cctx, ZSTD_reset_session_and_parameters); 1046*3117ece4Schristos ZSTD_CCtx_setParameter(ag->cctx, ZSTD_c_compressionLevel, ag->cLevel); 1047*3117ece4Schristos 1048*3117ece4Schristos for(i = 0; i < NUM_PARAMS; i++) { 1049*3117ece4Schristos if(ag->comprParams->vals[i] != PARAM_UNSET) 1050*3117ece4Schristos ZSTD_CCtx_setParameter(ag->cctx, cctxSetParamTable[i], ag->comprParams->vals[i]); 1051*3117ece4Schristos } 1052*3117ece4Schristos ZSTD_CCtx_loadDictionary(ag->cctx, ag->dictBuffer, ag->dictBufferSize); 1053*3117ece4Schristos 1054*3117ece4Schristos return 0; 1055*3117ece4Schristos } 1056*3117ece4Schristos 1057*3117ece4Schristos typedef struct { 1058*3117ece4Schristos ZSTD_DCtx* dctx; 1059*3117ece4Schristos const void* dictBuffer; 1060*3117ece4Schristos size_t dictBufferSize; 1061*3117ece4Schristos } BMK_initDCtxArgs; 1062*3117ece4Schristos 1063*3117ece4Schristos static size_t local_initDCtx(void* payload) { 1064*3117ece4Schristos const BMK_initDCtxArgs* ag = (const BMK_initDCtxArgs*)payload; 1065*3117ece4Schristos ZSTD_DCtx_reset(ag->dctx, ZSTD_reset_session_and_parameters); 1066*3117ece4Schristos ZSTD_DCtx_loadDictionary(ag->dctx, ag->dictBuffer, ag->dictBufferSize); 1067*3117ece4Schristos return 0; 1068*3117ece4Schristos } 1069*3117ece4Schristos 1070*3117ece4Schristos /* additional argument is just the context */ 1071*3117ece4Schristos static size_t local_defaultCompress( 1072*3117ece4Schristos const void* srcBuffer, size_t srcSize, 1073*3117ece4Schristos void* dstBuffer, size_t dstSize, 1074*3117ece4Schristos void* addArgs) 1075*3117ece4Schristos { 1076*3117ece4Schristos ZSTD_CCtx* cctx = (ZSTD_CCtx*)addArgs; 1077*3117ece4Schristos assert(dstSize == ZSTD_compressBound(srcSize)); /* specific to this version, which is only used in paramgrill */ 1078*3117ece4Schristos return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize); 1079*3117ece4Schristos } 1080*3117ece4Schristos 1081*3117ece4Schristos /* additional argument is just the context */ 1082*3117ece4Schristos static size_t local_defaultDecompress( 1083*3117ece4Schristos const void* srcBuffer, size_t srcSize, 1084*3117ece4Schristos void* dstBuffer, size_t dstSize, 1085*3117ece4Schristos void* addArgs) { 1086*3117ece4Schristos size_t moreToFlush = 1; 1087*3117ece4Schristos ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs; 1088*3117ece4Schristos ZSTD_inBuffer in; 1089*3117ece4Schristos ZSTD_outBuffer out; 1090*3117ece4Schristos in.src = srcBuffer; 1091*3117ece4Schristos in.size = srcSize; 1092*3117ece4Schristos in.pos = 0; 1093*3117ece4Schristos out.dst = dstBuffer; 1094*3117ece4Schristos out.size = dstSize; 1095*3117ece4Schristos out.pos = 0; 1096*3117ece4Schristos while (moreToFlush) { 1097*3117ece4Schristos if(out.pos == out.size) { 1098*3117ece4Schristos return (size_t)-ZSTD_error_dstSize_tooSmall; 1099*3117ece4Schristos } 1100*3117ece4Schristos moreToFlush = ZSTD_decompressStream(dctx, 1101*3117ece4Schristos &out, &in); 1102*3117ece4Schristos if (ZSTD_isError(moreToFlush)) { 1103*3117ece4Schristos return moreToFlush; 1104*3117ece4Schristos } 1105*3117ece4Schristos } 1106*3117ece4Schristos return out.pos; 1107*3117ece4Schristos 1108*3117ece4Schristos } 1109*3117ece4Schristos 1110*3117ece4Schristos /*-************************************ 1111*3117ece4Schristos * Data Initialization Functions 1112*3117ece4Schristos **************************************/ 1113*3117ece4Schristos 1114*3117ece4Schristos typedef struct { 1115*3117ece4Schristos void* srcBuffer; 1116*3117ece4Schristos size_t srcSize; 1117*3117ece4Schristos const void** srcPtrs; 1118*3117ece4Schristos size_t* srcSizes; 1119*3117ece4Schristos void** dstPtrs; 1120*3117ece4Schristos size_t* dstCapacities; 1121*3117ece4Schristos size_t* dstSizes; 1122*3117ece4Schristos void** resPtrs; 1123*3117ece4Schristos size_t* resSizes; 1124*3117ece4Schristos size_t nbBlocks; 1125*3117ece4Schristos size_t maxBlockSize; 1126*3117ece4Schristos } buffers_t; 1127*3117ece4Schristos 1128*3117ece4Schristos typedef struct { 1129*3117ece4Schristos size_t dictSize; 1130*3117ece4Schristos void* dictBuffer; 1131*3117ece4Schristos ZSTD_CCtx* cctx; 1132*3117ece4Schristos ZSTD_DCtx* dctx; 1133*3117ece4Schristos } contexts_t; 1134*3117ece4Schristos 1135*3117ece4Schristos static void freeNonSrcBuffers(const buffers_t b) { 1136*3117ece4Schristos free((void*)b.srcPtrs); 1137*3117ece4Schristos free(b.srcSizes); 1138*3117ece4Schristos 1139*3117ece4Schristos if(b.dstPtrs != NULL) { 1140*3117ece4Schristos free(b.dstPtrs[0]); 1141*3117ece4Schristos } 1142*3117ece4Schristos free(b.dstPtrs); 1143*3117ece4Schristos free(b.dstCapacities); 1144*3117ece4Schristos free(b.dstSizes); 1145*3117ece4Schristos 1146*3117ece4Schristos if(b.resPtrs != NULL) { 1147*3117ece4Schristos free(b.resPtrs[0]); 1148*3117ece4Schristos } 1149*3117ece4Schristos free(b.resPtrs); 1150*3117ece4Schristos free(b.resSizes); 1151*3117ece4Schristos } 1152*3117ece4Schristos 1153*3117ece4Schristos static void freeBuffers(const buffers_t b) { 1154*3117ece4Schristos if(b.srcPtrs != NULL) { 1155*3117ece4Schristos free(b.srcBuffer); 1156*3117ece4Schristos } 1157*3117ece4Schristos freeNonSrcBuffers(b); 1158*3117ece4Schristos } 1159*3117ece4Schristos 1160*3117ece4Schristos /* srcBuffer will be freed by freeBuffers now */ 1161*3117ece4Schristos static int createBuffersFromMemory(buffers_t* buff, void * srcBuffer, const size_t nbFiles, 1162*3117ece4Schristos const size_t* fileSizes) 1163*3117ece4Schristos { 1164*3117ece4Schristos size_t pos = 0, n, blockSize; 1165*3117ece4Schristos U32 maxNbBlocks, blockNb = 0; 1166*3117ece4Schristos buff->srcSize = 0; 1167*3117ece4Schristos for(n = 0; n < nbFiles; n++) { 1168*3117ece4Schristos buff->srcSize += fileSizes[n]; 1169*3117ece4Schristos } 1170*3117ece4Schristos 1171*3117ece4Schristos if(buff->srcSize == 0) { 1172*3117ece4Schristos DISPLAY("No data to bench\n"); 1173*3117ece4Schristos return 1; 1174*3117ece4Schristos } 1175*3117ece4Schristos 1176*3117ece4Schristos blockSize = g_blockSize ? g_blockSize : buff->srcSize; 1177*3117ece4Schristos maxNbBlocks = (U32) ((buff->srcSize + (blockSize-1)) / blockSize) + (U32)nbFiles; 1178*3117ece4Schristos 1179*3117ece4Schristos buff->srcPtrs = (const void**)calloc(maxNbBlocks, sizeof(void*)); 1180*3117ece4Schristos buff->srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 1181*3117ece4Schristos 1182*3117ece4Schristos buff->dstPtrs = (void**)calloc(maxNbBlocks, sizeof(void*)); 1183*3117ece4Schristos buff->dstCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 1184*3117ece4Schristos buff->dstSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 1185*3117ece4Schristos 1186*3117ece4Schristos buff->resPtrs = (void**)calloc(maxNbBlocks, sizeof(void*)); 1187*3117ece4Schristos buff->resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 1188*3117ece4Schristos 1189*3117ece4Schristos if(!buff->srcPtrs || !buff->srcSizes || !buff->dstPtrs || !buff->dstCapacities || !buff->dstSizes || !buff->resPtrs || !buff->resSizes) { 1190*3117ece4Schristos DISPLAY("alloc error\n"); 1191*3117ece4Schristos freeNonSrcBuffers(*buff); 1192*3117ece4Schristos return 1; 1193*3117ece4Schristos } 1194*3117ece4Schristos 1195*3117ece4Schristos buff->srcBuffer = srcBuffer; 1196*3117ece4Schristos buff->srcPtrs[0] = (const void*)buff->srcBuffer; 1197*3117ece4Schristos buff->dstPtrs[0] = malloc(ZSTD_compressBound(buff->srcSize) + (maxNbBlocks * 1024)); 1198*3117ece4Schristos buff->resPtrs[0] = malloc(buff->srcSize); 1199*3117ece4Schristos 1200*3117ece4Schristos if(!buff->dstPtrs[0] || !buff->resPtrs[0]) { 1201*3117ece4Schristos DISPLAY("alloc error\n"); 1202*3117ece4Schristos freeNonSrcBuffers(*buff); 1203*3117ece4Schristos return 1; 1204*3117ece4Schristos } 1205*3117ece4Schristos 1206*3117ece4Schristos for(n = 0; n < nbFiles; n++) { 1207*3117ece4Schristos size_t pos_end = pos + fileSizes[n]; 1208*3117ece4Schristos for(; pos < pos_end; blockNb++) { 1209*3117ece4Schristos buff->srcPtrs[blockNb] = (const void*)((char*)srcBuffer + pos); 1210*3117ece4Schristos buff->srcSizes[blockNb] = blockSize; 1211*3117ece4Schristos pos += blockSize; 1212*3117ece4Schristos } 1213*3117ece4Schristos 1214*3117ece4Schristos if(fileSizes[n] > 0) { buff->srcSizes[blockNb - 1] = ((fileSizes[n] - 1) % blockSize) + 1; } 1215*3117ece4Schristos pos = pos_end; 1216*3117ece4Schristos } 1217*3117ece4Schristos 1218*3117ece4Schristos buff->dstCapacities[0] = ZSTD_compressBound(buff->srcSizes[0]); 1219*3117ece4Schristos buff->dstSizes[0] = buff->dstCapacities[0]; 1220*3117ece4Schristos buff->resSizes[0] = buff->srcSizes[0]; 1221*3117ece4Schristos buff->maxBlockSize = buff->srcSizes[0]; 1222*3117ece4Schristos 1223*3117ece4Schristos for(n = 1; n < blockNb; n++) { 1224*3117ece4Schristos buff->dstPtrs[n] = ((char*)buff->dstPtrs[n-1]) + buff->dstCapacities[n-1]; 1225*3117ece4Schristos buff->resPtrs[n] = ((char*)buff->resPtrs[n-1]) + buff->resSizes[n-1]; 1226*3117ece4Schristos buff->dstCapacities[n] = ZSTD_compressBound(buff->srcSizes[n]); 1227*3117ece4Schristos buff->dstSizes[n] = buff->dstCapacities[n]; 1228*3117ece4Schristos buff->resSizes[n] = buff->srcSizes[n]; 1229*3117ece4Schristos 1230*3117ece4Schristos buff->maxBlockSize = MAX(buff->maxBlockSize, buff->srcSizes[n]); 1231*3117ece4Schristos } 1232*3117ece4Schristos 1233*3117ece4Schristos buff->nbBlocks = blockNb; 1234*3117ece4Schristos 1235*3117ece4Schristos return 0; 1236*3117ece4Schristos } 1237*3117ece4Schristos 1238*3117ece4Schristos /* allocates buffer's arguments. returns success / failure */ 1239*3117ece4Schristos static int createBuffers(buffers_t* buff, const char* const * const fileNamesTable, 1240*3117ece4Schristos size_t nbFiles) { 1241*3117ece4Schristos size_t pos = 0; 1242*3117ece4Schristos size_t n; 1243*3117ece4Schristos size_t totalSizeToLoad = (size_t)UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles); 1244*3117ece4Schristos size_t benchedSize = MIN(BMK_findMaxMem(totalSizeToLoad * 3) / 3, totalSizeToLoad); 1245*3117ece4Schristos size_t* fileSizes = calloc(sizeof(size_t), nbFiles); 1246*3117ece4Schristos void* srcBuffer = NULL; 1247*3117ece4Schristos int ret = 0; 1248*3117ece4Schristos 1249*3117ece4Schristos if(!totalSizeToLoad || !benchedSize) { 1250*3117ece4Schristos ret = 1; 1251*3117ece4Schristos DISPLAY("Nothing to Bench\n"); 1252*3117ece4Schristos goto _cleanUp; 1253*3117ece4Schristos } 1254*3117ece4Schristos 1255*3117ece4Schristos srcBuffer = malloc(benchedSize); 1256*3117ece4Schristos 1257*3117ece4Schristos if(!fileSizes || !srcBuffer) { 1258*3117ece4Schristos ret = 1; 1259*3117ece4Schristos goto _cleanUp; 1260*3117ece4Schristos } 1261*3117ece4Schristos 1262*3117ece4Schristos for(n = 0; n < nbFiles; n++) { 1263*3117ece4Schristos FILE* f; 1264*3117ece4Schristos U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); 1265*3117ece4Schristos if (UTIL_isDirectory(fileNamesTable[n])) { 1266*3117ece4Schristos DISPLAY("Ignoring %s directory... \n", fileNamesTable[n]); 1267*3117ece4Schristos continue; 1268*3117ece4Schristos } 1269*3117ece4Schristos if (fileSize == UTIL_FILESIZE_UNKNOWN) { 1270*3117ece4Schristos DISPLAY("Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]); 1271*3117ece4Schristos continue; 1272*3117ece4Schristos } 1273*3117ece4Schristos f = fopen(fileNamesTable[n], "rb"); 1274*3117ece4Schristos if (f==NULL) { 1275*3117ece4Schristos DISPLAY("impossible to open file %s\n", fileNamesTable[n]); 1276*3117ece4Schristos fclose(f); 1277*3117ece4Schristos ret = 10; 1278*3117ece4Schristos goto _cleanUp; 1279*3117ece4Schristos } 1280*3117ece4Schristos 1281*3117ece4Schristos DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[n]); 1282*3117ece4Schristos 1283*3117ece4Schristos if (fileSize + pos > benchedSize) fileSize = benchedSize - pos, nbFiles=n; /* buffer too small - stop after this file */ 1284*3117ece4Schristos { 1285*3117ece4Schristos char* buffer = (char*)(srcBuffer); 1286*3117ece4Schristos size_t const readSize = fread((buffer)+pos, 1, (size_t)fileSize, f); 1287*3117ece4Schristos fclose(f); 1288*3117ece4Schristos if (readSize != (size_t)fileSize) { 1289*3117ece4Schristos DISPLAY("could not read %s", fileNamesTable[n]); 1290*3117ece4Schristos ret = 1; 1291*3117ece4Schristos goto _cleanUp; 1292*3117ece4Schristos } 1293*3117ece4Schristos 1294*3117ece4Schristos fileSizes[n] = readSize; 1295*3117ece4Schristos pos += readSize; 1296*3117ece4Schristos } 1297*3117ece4Schristos } 1298*3117ece4Schristos 1299*3117ece4Schristos ret = createBuffersFromMemory(buff, srcBuffer, nbFiles, fileSizes); 1300*3117ece4Schristos 1301*3117ece4Schristos _cleanUp: 1302*3117ece4Schristos if(ret) { free(srcBuffer); } 1303*3117ece4Schristos free(fileSizes); 1304*3117ece4Schristos return ret; 1305*3117ece4Schristos } 1306*3117ece4Schristos 1307*3117ece4Schristos static void freeContexts(const contexts_t ctx) { 1308*3117ece4Schristos free(ctx.dictBuffer); 1309*3117ece4Schristos ZSTD_freeCCtx(ctx.cctx); 1310*3117ece4Schristos ZSTD_freeDCtx(ctx.dctx); 1311*3117ece4Schristos } 1312*3117ece4Schristos 1313*3117ece4Schristos static int createContexts(contexts_t* ctx, const char* dictFileName) { 1314*3117ece4Schristos FILE* f; 1315*3117ece4Schristos size_t readSize; 1316*3117ece4Schristos ctx->cctx = ZSTD_createCCtx(); 1317*3117ece4Schristos ctx->dctx = ZSTD_createDCtx(); 1318*3117ece4Schristos assert(ctx->cctx != NULL); 1319*3117ece4Schristos assert(ctx->dctx != NULL); 1320*3117ece4Schristos 1321*3117ece4Schristos if(dictFileName == NULL) { 1322*3117ece4Schristos ctx->dictSize = 0; 1323*3117ece4Schristos ctx->dictBuffer = NULL; 1324*3117ece4Schristos return 0; 1325*3117ece4Schristos } 1326*3117ece4Schristos { U64 const dictFileSize = UTIL_getFileSize(dictFileName); 1327*3117ece4Schristos assert(dictFileSize != UTIL_FILESIZE_UNKNOWN); 1328*3117ece4Schristos ctx->dictSize = (size_t)dictFileSize; 1329*3117ece4Schristos assert((U64)ctx->dictSize == dictFileSize); /* check overflow */ 1330*3117ece4Schristos } 1331*3117ece4Schristos ctx->dictBuffer = malloc(ctx->dictSize); 1332*3117ece4Schristos 1333*3117ece4Schristos f = fopen(dictFileName, "rb"); 1334*3117ece4Schristos 1335*3117ece4Schristos if (f==NULL) { 1336*3117ece4Schristos DISPLAY("unable to open file\n"); 1337*3117ece4Schristos freeContexts(*ctx); 1338*3117ece4Schristos return 1; 1339*3117ece4Schristos } 1340*3117ece4Schristos 1341*3117ece4Schristos if (ctx->dictSize > 64 MB || !(ctx->dictBuffer)) { 1342*3117ece4Schristos DISPLAY("dictionary too large\n"); 1343*3117ece4Schristos fclose(f); 1344*3117ece4Schristos freeContexts(*ctx); 1345*3117ece4Schristos return 1; 1346*3117ece4Schristos } 1347*3117ece4Schristos readSize = fread(ctx->dictBuffer, 1, ctx->dictSize, f); 1348*3117ece4Schristos fclose(f); 1349*3117ece4Schristos if (readSize != ctx->dictSize) { 1350*3117ece4Schristos DISPLAY("unable to read file\n"); 1351*3117ece4Schristos freeContexts(*ctx); 1352*3117ece4Schristos return 1; 1353*3117ece4Schristos } 1354*3117ece4Schristos return 0; 1355*3117ece4Schristos } 1356*3117ece4Schristos 1357*3117ece4Schristos /*-************************************ 1358*3117ece4Schristos * Optimizer Memoization Functions 1359*3117ece4Schristos **************************************/ 1360*3117ece4Schristos 1361*3117ece4Schristos /* return: new length */ 1362*3117ece4Schristos /* keep old array, will need if iter over strategy. */ 1363*3117ece4Schristos /* prunes useless params */ 1364*3117ece4Schristos static size_t sanitizeVarArray(varInds_t* varNew, const size_t varLength, const varInds_t* varArray, const ZSTD_strategy strat) { 1365*3117ece4Schristos size_t i, j = 0; 1366*3117ece4Schristos for(i = 0; i < varLength; i++) { 1367*3117ece4Schristos if( !((varArray[i] == clog_ind && strat == ZSTD_fast) 1368*3117ece4Schristos || (varArray[i] == slog_ind && strat == ZSTD_fast) 1369*3117ece4Schristos || (varArray[i] == slog_ind && strat == ZSTD_dfast) 1370*3117ece4Schristos || (varArray[i] == tlen_ind && strat < ZSTD_btopt && strat != ZSTD_fast))) { 1371*3117ece4Schristos varNew[j] = varArray[i]; 1372*3117ece4Schristos j++; 1373*3117ece4Schristos } 1374*3117ece4Schristos } 1375*3117ece4Schristos return j; 1376*3117ece4Schristos } 1377*3117ece4Schristos 1378*3117ece4Schristos /* res should be NUM_PARAMS size */ 1379*3117ece4Schristos /* constructs varArray from paramValues_t style parameter */ 1380*3117ece4Schristos /* pass in using dict. */ 1381*3117ece4Schristos static size_t variableParams(const paramValues_t paramConstraints, varInds_t* res, const int usingDictionary) { 1382*3117ece4Schristos varInds_t i; 1383*3117ece4Schristos size_t j = 0; 1384*3117ece4Schristos for(i = 0; i < NUM_PARAMS; i++) { 1385*3117ece4Schristos if(paramConstraints.vals[i] == PARAM_UNSET) { 1386*3117ece4Schristos if(i == fadt_ind && !usingDictionary) continue; /* don't use fadt if no dictionary */ 1387*3117ece4Schristos res[j] = i; j++; 1388*3117ece4Schristos } 1389*3117ece4Schristos } 1390*3117ece4Schristos return j; 1391*3117ece4Schristos } 1392*3117ece4Schristos 1393*3117ece4Schristos /* length of memo table given free variables */ 1394*3117ece4Schristos static size_t memoTableLen(const varInds_t* varyParams, const size_t varyLen) { 1395*3117ece4Schristos size_t arrayLen = 1; 1396*3117ece4Schristos size_t i; 1397*3117ece4Schristos for(i = 0; i < varyLen; i++) { 1398*3117ece4Schristos if(varyParams[i] == strt_ind) continue; /* strategy separated by table */ 1399*3117ece4Schristos arrayLen *= rangetable[varyParams[i]]; 1400*3117ece4Schristos } 1401*3117ece4Schristos return arrayLen; 1402*3117ece4Schristos } 1403*3117ece4Schristos 1404*3117ece4Schristos /* returns unique index in memotable of compression parameters */ 1405*3117ece4Schristos static unsigned memoTableIndDirect(const paramValues_t* ptr, const varInds_t* varyParams, const size_t varyLen) { 1406*3117ece4Schristos size_t i; 1407*3117ece4Schristos unsigned ind = 0; 1408*3117ece4Schristos for(i = 0; i < varyLen; i++) { 1409*3117ece4Schristos varInds_t v = varyParams[i]; 1410*3117ece4Schristos if(v == strt_ind) continue; /* exclude strategy from memotable */ 1411*3117ece4Schristos ind *= rangetable[v]; ind += (unsigned)invRangeMap(v, ptr->vals[v]); 1412*3117ece4Schristos } 1413*3117ece4Schristos return ind; 1414*3117ece4Schristos } 1415*3117ece4Schristos 1416*3117ece4Schristos static size_t memoTableGet(const memoTable_t* memoTableArray, const paramValues_t p) { 1417*3117ece4Schristos const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; 1418*3117ece4Schristos switch(mt.tableType) { 1419*3117ece4Schristos case directMap: 1420*3117ece4Schristos return mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)]; 1421*3117ece4Schristos case xxhashMap: 1422*3117ece4Schristos return mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen]; 1423*3117ece4Schristos case noMemo: 1424*3117ece4Schristos return 0; 1425*3117ece4Schristos } 1426*3117ece4Schristos return 0; /* should never happen, stop compiler warnings */ 1427*3117ece4Schristos } 1428*3117ece4Schristos 1429*3117ece4Schristos static void memoTableSet(const memoTable_t* memoTableArray, const paramValues_t p, const BYTE value) { 1430*3117ece4Schristos const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; 1431*3117ece4Schristos switch(mt.tableType) { 1432*3117ece4Schristos case directMap: 1433*3117ece4Schristos mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)] = value; break; 1434*3117ece4Schristos case xxhashMap: 1435*3117ece4Schristos mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen] = value; break; 1436*3117ece4Schristos case noMemo: 1437*3117ece4Schristos break; 1438*3117ece4Schristos } 1439*3117ece4Schristos } 1440*3117ece4Schristos 1441*3117ece4Schristos /* frees all allocated memotables */ 1442*3117ece4Schristos /* secret contract : 1443*3117ece4Schristos * mtAll is a table of (ZSTD_STRATEGY_MAX+1) memoTable_t */ 1444*3117ece4Schristos static void freeMemoTableArray(memoTable_t* const mtAll) { 1445*3117ece4Schristos int i; 1446*3117ece4Schristos if(mtAll == NULL) { return; } 1447*3117ece4Schristos for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) { 1448*3117ece4Schristos free(mtAll[i].table); 1449*3117ece4Schristos } 1450*3117ece4Schristos free(mtAll); 1451*3117ece4Schristos } 1452*3117ece4Schristos 1453*3117ece4Schristos /* inits memotables for all (including mallocs), all strategies */ 1454*3117ece4Schristos /* takes unsanitized varyParams */ 1455*3117ece4Schristos static memoTable_t* 1456*3117ece4Schristos createMemoTableArray(const paramValues_t p, 1457*3117ece4Schristos const varInds_t* const varyParams, 1458*3117ece4Schristos const size_t varyLen, 1459*3117ece4Schristos const U32 memoTableLog) 1460*3117ece4Schristos { 1461*3117ece4Schristos memoTable_t* const mtAll = (memoTable_t*)calloc(sizeof(memoTable_t),(ZSTD_STRATEGY_MAX + 1)); 1462*3117ece4Schristos ZSTD_strategy i, stratMin = ZSTD_STRATEGY_MIN, stratMax = ZSTD_STRATEGY_MAX; 1463*3117ece4Schristos 1464*3117ece4Schristos if(mtAll == NULL) { 1465*3117ece4Schristos return NULL; 1466*3117ece4Schristos } 1467*3117ece4Schristos 1468*3117ece4Schristos for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) { 1469*3117ece4Schristos mtAll[i].varLen = sanitizeVarArray(mtAll[i].varArray, varyLen, varyParams, i); 1470*3117ece4Schristos } 1471*3117ece4Schristos 1472*3117ece4Schristos /* no memoization */ 1473*3117ece4Schristos if(memoTableLog == 0) { 1474*3117ece4Schristos for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) { 1475*3117ece4Schristos mtAll[i].tableType = noMemo; 1476*3117ece4Schristos mtAll[i].table = NULL; 1477*3117ece4Schristos mtAll[i].tableLen = 0; 1478*3117ece4Schristos } 1479*3117ece4Schristos return mtAll; 1480*3117ece4Schristos } 1481*3117ece4Schristos 1482*3117ece4Schristos 1483*3117ece4Schristos if(p.vals[strt_ind] != PARAM_UNSET) { 1484*3117ece4Schristos stratMin = p.vals[strt_ind]; 1485*3117ece4Schristos stratMax = p.vals[strt_ind]; 1486*3117ece4Schristos } 1487*3117ece4Schristos 1488*3117ece4Schristos 1489*3117ece4Schristos for(i = stratMin; i <= stratMax; i++) { 1490*3117ece4Schristos size_t mtl = memoTableLen(mtAll[i].varArray, mtAll[i].varLen); 1491*3117ece4Schristos mtAll[i].tableType = directMap; 1492*3117ece4Schristos 1493*3117ece4Schristos if(memoTableLog != PARAM_UNSET && mtl > (1ULL << memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */ 1494*3117ece4Schristos mtAll[i].tableType = xxhashMap; 1495*3117ece4Schristos mtl = ((size_t)1 << memoTableLog); 1496*3117ece4Schristos } 1497*3117ece4Schristos 1498*3117ece4Schristos mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl); 1499*3117ece4Schristos mtAll[i].tableLen = mtl; 1500*3117ece4Schristos 1501*3117ece4Schristos if(mtAll[i].table == NULL) { 1502*3117ece4Schristos freeMemoTableArray(mtAll); 1503*3117ece4Schristos return NULL; 1504*3117ece4Schristos } 1505*3117ece4Schristos } 1506*3117ece4Schristos 1507*3117ece4Schristos return mtAll; 1508*3117ece4Schristos } 1509*3117ece4Schristos 1510*3117ece4Schristos /* Sets pc to random unmeasured set of parameters */ 1511*3117ece4Schristos /* specify strategy */ 1512*3117ece4Schristos static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTableArray, const ZSTD_strategy st) 1513*3117ece4Schristos { 1514*3117ece4Schristos size_t j; 1515*3117ece4Schristos const memoTable_t mt = memoTableArray[st]; 1516*3117ece4Schristos pc->vals[strt_ind] = st; 1517*3117ece4Schristos for(j = 0; j < mt.tableLen; j++) { 1518*3117ece4Schristos int i; 1519*3117ece4Schristos for(i = 0; i < NUM_PARAMS; i++) { 1520*3117ece4Schristos varInds_t v = mt.varArray[i]; 1521*3117ece4Schristos if(v == strt_ind) continue; 1522*3117ece4Schristos pc->vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]); 1523*3117ece4Schristos } 1524*3117ece4Schristos 1525*3117ece4Schristos if(!(memoTableGet(memoTableArray, *pc))) break; /* only pick unpicked params. */ 1526*3117ece4Schristos } 1527*3117ece4Schristos } 1528*3117ece4Schristos 1529*3117ece4Schristos /*-************************************ 1530*3117ece4Schristos * Benchmarking Functions 1531*3117ece4Schristos **************************************/ 1532*3117ece4Schristos 1533*3117ece4Schristos static void display_params_tested(paramValues_t cParams) 1534*3117ece4Schristos { 1535*3117ece4Schristos varInds_t vi; 1536*3117ece4Schristos DISPLAYLEVEL(3, "\r testing :"); 1537*3117ece4Schristos for (vi=0; vi < NUM_PARAMS; vi++) { 1538*3117ece4Schristos DISPLAYLEVEL(3, "%3u,", (unsigned)cParams.vals[vi]); 1539*3117ece4Schristos } 1540*3117ece4Schristos DISPLAYLEVEL(3, "\b \r"); 1541*3117ece4Schristos } 1542*3117ece4Schristos 1543*3117ece4Schristos /* Replicate functionality of benchMemAdvanced, but with pre-split src / dst buffers */ 1544*3117ece4Schristos /* The purpose is so that sufficient information is returned so that a decompression call to benchMemInvertible is possible */ 1545*3117ece4Schristos /* BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */ 1546*3117ece4Schristos /* nbSeconds used in same way as in BMK_advancedParams_t */ 1547*3117ece4Schristos /* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */ 1548*3117ece4Schristos /* dictionary nullable, nothing else though. */ 1549*3117ece4Schristos /* note : it would be a lot better if this function was present in benchzstd.c, 1550*3117ece4Schristos * sharing code with benchMemAdvanced(), since it's technically a part of it */ 1551*3117ece4Schristos static BMK_benchOutcome_t 1552*3117ece4Schristos BMK_benchMemInvertible( buffers_t buf, contexts_t ctx, 1553*3117ece4Schristos int cLevel, const paramValues_t* comprParams, 1554*3117ece4Schristos BMK_mode_t mode, unsigned nbSeconds) 1555*3117ece4Schristos { 1556*3117ece4Schristos U32 i; 1557*3117ece4Schristos BMK_benchResult_t bResult; 1558*3117ece4Schristos const void *const *const srcPtrs = (const void *const *const)buf.srcPtrs; 1559*3117ece4Schristos size_t const *const srcSizes = buf.srcSizes; 1560*3117ece4Schristos void** const dstPtrs = buf.dstPtrs; 1561*3117ece4Schristos size_t const *const dstCapacities = buf.dstCapacities; 1562*3117ece4Schristos size_t* const dstSizes = buf.dstSizes; 1563*3117ece4Schristos void** const resPtrs = buf.resPtrs; 1564*3117ece4Schristos size_t const *const resSizes = buf.resSizes; 1565*3117ece4Schristos const void* dictBuffer = ctx.dictBuffer; 1566*3117ece4Schristos const size_t dictBufferSize = ctx.dictSize; 1567*3117ece4Schristos const size_t nbBlocks = buf.nbBlocks; 1568*3117ece4Schristos const size_t srcSize = buf.srcSize; 1569*3117ece4Schristos ZSTD_CCtx* cctx = ctx.cctx; 1570*3117ece4Schristos ZSTD_DCtx* dctx = ctx.dctx; 1571*3117ece4Schristos 1572*3117ece4Schristos /* init */ 1573*3117ece4Schristos display_params_tested(*comprParams); 1574*3117ece4Schristos memset(&bResult, 0, sizeof(bResult)); 1575*3117ece4Schristos 1576*3117ece4Schristos /* warming up memory */ 1577*3117ece4Schristos for (i = 0; i < buf.nbBlocks; i++) { 1578*3117ece4Schristos if (mode != BMK_decodeOnly) { 1579*3117ece4Schristos RDG_genBuffer(dstPtrs[i], dstCapacities[i], 0.10, 0.50, 1); 1580*3117ece4Schristos } else { 1581*3117ece4Schristos RDG_genBuffer(resPtrs[i], resSizes[i], 0.10, 0.50, 1); 1582*3117ece4Schristos } 1583*3117ece4Schristos } 1584*3117ece4Schristos 1585*3117ece4Schristos /* Bench */ 1586*3117ece4Schristos { 1587*3117ece4Schristos /* init args */ 1588*3117ece4Schristos int compressionCompleted = (mode == BMK_decodeOnly); 1589*3117ece4Schristos int decompressionCompleted = (mode == BMK_compressOnly); 1590*3117ece4Schristos BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000); 1591*3117ece4Schristos BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000); 1592*3117ece4Schristos BMK_benchParams_t cbp, dbp; 1593*3117ece4Schristos BMK_initCCtxArgs cctxprep; 1594*3117ece4Schristos BMK_initDCtxArgs dctxprep; 1595*3117ece4Schristos 1596*3117ece4Schristos cbp.benchFn = local_defaultCompress; 1597*3117ece4Schristos cbp.benchPayload = cctx; 1598*3117ece4Schristos cbp.initFn = local_initCCtx; 1599*3117ece4Schristos cbp.initPayload = &cctxprep; 1600*3117ece4Schristos cbp.errorFn = ZSTD_isError; 1601*3117ece4Schristos cbp.blockCount = nbBlocks; 1602*3117ece4Schristos cbp.srcBuffers = srcPtrs; 1603*3117ece4Schristos cbp.srcSizes = srcSizes; 1604*3117ece4Schristos cbp.dstBuffers = dstPtrs; 1605*3117ece4Schristos cbp.dstCapacities = dstCapacities; 1606*3117ece4Schristos cbp.blockResults = dstSizes; 1607*3117ece4Schristos 1608*3117ece4Schristos cctxprep.cctx = cctx; 1609*3117ece4Schristos cctxprep.dictBuffer = dictBuffer; 1610*3117ece4Schristos cctxprep.dictBufferSize = dictBufferSize; 1611*3117ece4Schristos cctxprep.cLevel = cLevel; 1612*3117ece4Schristos cctxprep.comprParams = comprParams; 1613*3117ece4Schristos 1614*3117ece4Schristos dbp.benchFn = local_defaultDecompress; 1615*3117ece4Schristos dbp.benchPayload = dctx; 1616*3117ece4Schristos dbp.initFn = local_initDCtx; 1617*3117ece4Schristos dbp.initPayload = &dctxprep; 1618*3117ece4Schristos dbp.errorFn = ZSTD_isError; 1619*3117ece4Schristos dbp.blockCount = nbBlocks; 1620*3117ece4Schristos dbp.srcBuffers = (const void* const *) dstPtrs; 1621*3117ece4Schristos dbp.srcSizes = dstCapacities; 1622*3117ece4Schristos dbp.dstBuffers = resPtrs; 1623*3117ece4Schristos dbp.dstCapacities = resSizes; 1624*3117ece4Schristos dbp.blockResults = NULL; 1625*3117ece4Schristos 1626*3117ece4Schristos dctxprep.dctx = dctx; 1627*3117ece4Schristos dctxprep.dictBuffer = dictBuffer; 1628*3117ece4Schristos dctxprep.dictBufferSize = dictBufferSize; 1629*3117ece4Schristos 1630*3117ece4Schristos assert(timeStateCompress != NULL); 1631*3117ece4Schristos assert(timeStateDecompress != NULL); 1632*3117ece4Schristos while(!compressionCompleted) { 1633*3117ece4Schristos BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, cbp); 1634*3117ece4Schristos 1635*3117ece4Schristos if (!BMK_isSuccessful_runOutcome(cOutcome)) { 1636*3117ece4Schristos BMK_benchOutcome_t bOut; 1637*3117ece4Schristos memset(&bOut, 0, sizeof(bOut)); 1638*3117ece4Schristos bOut.tag = 1; /* should rather be a function or a constant */ 1639*3117ece4Schristos BMK_freeTimedFnState(timeStateCompress); 1640*3117ece4Schristos BMK_freeTimedFnState(timeStateDecompress); 1641*3117ece4Schristos return bOut; 1642*3117ece4Schristos } 1643*3117ece4Schristos { BMK_runTime_t const rResult = BMK_extract_runTime(cOutcome); 1644*3117ece4Schristos bResult.cSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun); 1645*3117ece4Schristos bResult.cSize = rResult.sumOfReturn; 1646*3117ece4Schristos } 1647*3117ece4Schristos compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress); 1648*3117ece4Schristos } 1649*3117ece4Schristos 1650*3117ece4Schristos while (!decompressionCompleted) { 1651*3117ece4Schristos BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp); 1652*3117ece4Schristos 1653*3117ece4Schristos if (!BMK_isSuccessful_runOutcome(dOutcome)) { 1654*3117ece4Schristos BMK_benchOutcome_t bOut; 1655*3117ece4Schristos memset(&bOut, 0, sizeof(bOut)); 1656*3117ece4Schristos bOut.tag = 1; /* should rather be a function or a constant */ 1657*3117ece4Schristos BMK_freeTimedFnState(timeStateCompress); 1658*3117ece4Schristos BMK_freeTimedFnState(timeStateDecompress); 1659*3117ece4Schristos return bOut; 1660*3117ece4Schristos } 1661*3117ece4Schristos { BMK_runTime_t const rResult = BMK_extract_runTime(dOutcome); 1662*3117ece4Schristos bResult.dSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun); 1663*3117ece4Schristos } 1664*3117ece4Schristos decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress); 1665*3117ece4Schristos } 1666*3117ece4Schristos 1667*3117ece4Schristos BMK_freeTimedFnState(timeStateCompress); 1668*3117ece4Schristos BMK_freeTimedFnState(timeStateDecompress); 1669*3117ece4Schristos } 1670*3117ece4Schristos 1671*3117ece4Schristos /* Bench */ 1672*3117ece4Schristos bResult.cMem = ((size_t)1 << (comprParams->vals[wlog_ind])) + ZSTD_sizeof_CCtx(cctx); 1673*3117ece4Schristos 1674*3117ece4Schristos { BMK_benchOutcome_t bOut; 1675*3117ece4Schristos bOut.tag = 0; 1676*3117ece4Schristos bOut.internal_never_use_directly = bResult; /* should be a function */ 1677*3117ece4Schristos return bOut; 1678*3117ece4Schristos } 1679*3117ece4Schristos } 1680*3117ece4Schristos 1681*3117ece4Schristos /* BMK_benchParam() : 1682*3117ece4Schristos * benchmark a set of `cParams` over sample `buf`, 1683*3117ece4Schristos * store the result in `resultPtr`. 1684*3117ece4Schristos * @return : 0 if success, 1 if error */ 1685*3117ece4Schristos static int BMK_benchParam ( BMK_benchResult_t* resultPtr, 1686*3117ece4Schristos buffers_t buf, contexts_t ctx, 1687*3117ece4Schristos paramValues_t cParams) 1688*3117ece4Schristos { 1689*3117ece4Schristos BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, 1690*3117ece4Schristos BASE_CLEVEL, &cParams, 1691*3117ece4Schristos BMK_both, 3); 1692*3117ece4Schristos if (!BMK_isSuccessful_benchOutcome(outcome)) return 1; 1693*3117ece4Schristos *resultPtr = BMK_extract_benchResult(outcome); 1694*3117ece4Schristos return 0; 1695*3117ece4Schristos } 1696*3117ece4Schristos 1697*3117ece4Schristos 1698*3117ece4Schristos /* Benchmarking which stops when we are sufficiently sure the solution is infeasible / worse than the winner */ 1699*3117ece4Schristos #define VARIANCE 1.2 1700*3117ece4Schristos static int allBench(BMK_benchResult_t* resultPtr, 1701*3117ece4Schristos const buffers_t buf, const contexts_t ctx, 1702*3117ece4Schristos const paramValues_t cParams, 1703*3117ece4Schristos const constraint_t target, 1704*3117ece4Schristos BMK_benchResult_t* winnerResult, int feas) 1705*3117ece4Schristos { 1706*3117ece4Schristos BMK_benchResult_t benchres; 1707*3117ece4Schristos double uncertaintyConstantC = 3., uncertaintyConstantD = 3.; 1708*3117ece4Schristos double winnerRS; 1709*3117ece4Schristos 1710*3117ece4Schristos BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, BASE_CLEVEL, &cParams, BMK_both, 2); 1711*3117ece4Schristos if (!BMK_isSuccessful_benchOutcome(outcome)) { 1712*3117ece4Schristos DEBUGOUTPUT("Benchmarking failed \n"); 1713*3117ece4Schristos return ERROR_RESULT; 1714*3117ece4Schristos } 1715*3117ece4Schristos benchres = BMK_extract_benchResult(outcome); 1716*3117ece4Schristos 1717*3117ece4Schristos winnerRS = resultScore(*winnerResult, buf.srcSize, target); 1718*3117ece4Schristos DEBUGOUTPUT("WinnerScore: %f \n ", winnerRS); 1719*3117ece4Schristos 1720*3117ece4Schristos *resultPtr = benchres; 1721*3117ece4Schristos 1722*3117ece4Schristos /* anything with worse ratio in feas is definitely worse, discard */ 1723*3117ece4Schristos if(feas && benchres.cSize < winnerResult->cSize && !g_optmode) { 1724*3117ece4Schristos return WORSE_RESULT; 1725*3117ece4Schristos } 1726*3117ece4Schristos 1727*3117ece4Schristos /* calculate uncertainty in compression / decompression runs */ 1728*3117ece4Schristos if (benchres.cSpeed) { 1729*3117ece4Schristos double const loopDurationC = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed); 1730*3117ece4Schristos uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC); 1731*3117ece4Schristos } 1732*3117ece4Schristos 1733*3117ece4Schristos if (benchres.dSpeed) { 1734*3117ece4Schristos double const loopDurationD = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed); 1735*3117ece4Schristos uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD); 1736*3117ece4Schristos } 1737*3117ece4Schristos 1738*3117ece4Schristos /* optimistic assumption of benchres */ 1739*3117ece4Schristos { BMK_benchResult_t resultMax = benchres; 1740*3117ece4Schristos resultMax.cSpeed = (unsigned long long)((double)resultMax.cSpeed * uncertaintyConstantC * VARIANCE); 1741*3117ece4Schristos resultMax.dSpeed = (unsigned long long)((double)resultMax.dSpeed * uncertaintyConstantD * VARIANCE); 1742*3117ece4Schristos 1743*3117ece4Schristos /* disregard infeasible results in feas mode */ 1744*3117ece4Schristos /* disregard if resultMax < winner in infeas mode */ 1745*3117ece4Schristos if((feas && !feasible(resultMax, target)) || 1746*3117ece4Schristos (!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) { 1747*3117ece4Schristos return WORSE_RESULT; 1748*3117ece4Schristos } 1749*3117ece4Schristos } 1750*3117ece4Schristos 1751*3117ece4Schristos /* compare by resultScore when in infeas */ 1752*3117ece4Schristos /* compare by compareResultLT when in feas */ 1753*3117ece4Schristos if((!feas && (resultScore(benchres, buf.srcSize, target) > resultScore(*winnerResult, buf.srcSize, target))) || 1754*3117ece4Schristos (feas && (compareResultLT(*winnerResult, benchres, target, buf.srcSize))) ) { 1755*3117ece4Schristos return BETTER_RESULT; 1756*3117ece4Schristos } else { 1757*3117ece4Schristos return WORSE_RESULT; 1758*3117ece4Schristos } 1759*3117ece4Schristos } 1760*3117ece4Schristos 1761*3117ece4Schristos 1762*3117ece4Schristos #define INFEASIBLE_THRESHOLD 200 1763*3117ece4Schristos /* Memoized benchmarking, won't benchmark anything which has already been benchmarked before. */ 1764*3117ece4Schristos static int benchMemo(BMK_benchResult_t* resultPtr, 1765*3117ece4Schristos const buffers_t buf, const contexts_t ctx, 1766*3117ece4Schristos const paramValues_t cParams, 1767*3117ece4Schristos const constraint_t target, 1768*3117ece4Schristos BMK_benchResult_t* winnerResult, memoTable_t* const memoTableArray, 1769*3117ece4Schristos const int feas) { 1770*3117ece4Schristos static int bmcount = 0; 1771*3117ece4Schristos int res; 1772*3117ece4Schristos 1773*3117ece4Schristos if ( memoTableGet(memoTableArray, cParams) >= INFEASIBLE_THRESHOLD 1774*3117ece4Schristos || redundantParams(cParams, target, buf.maxBlockSize) ) { 1775*3117ece4Schristos return WORSE_RESULT; 1776*3117ece4Schristos } 1777*3117ece4Schristos 1778*3117ece4Schristos res = allBench(resultPtr, buf, ctx, cParams, target, winnerResult, feas); 1779*3117ece4Schristos 1780*3117ece4Schristos if(DEBUG && !(bmcount % 250)) { 1781*3117ece4Schristos DISPLAY("Count: %d\n", bmcount); 1782*3117ece4Schristos bmcount++; 1783*3117ece4Schristos } 1784*3117ece4Schristos BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, *resultPtr, cParams, target, buf.srcSize); 1785*3117ece4Schristos 1786*3117ece4Schristos if(res == BETTER_RESULT || feas) { 1787*3117ece4Schristos memoTableSet(memoTableArray, cParams, 255); /* what happens if collisions are frequent */ 1788*3117ece4Schristos } 1789*3117ece4Schristos return res; 1790*3117ece4Schristos } 1791*3117ece4Schristos 1792*3117ece4Schristos 1793*3117ece4Schristos typedef struct { 1794*3117ece4Schristos U64 cSpeed_min; 1795*3117ece4Schristos U64 dSpeed_min; 1796*3117ece4Schristos U32 windowLog_max; 1797*3117ece4Schristos ZSTD_strategy strategy_max; 1798*3117ece4Schristos } level_constraints_t; 1799*3117ece4Schristos 1800*3117ece4Schristos static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1]; 1801*3117ece4Schristos 1802*3117ece4Schristos static void BMK_init_level_constraints(int bytePerSec_level1) 1803*3117ece4Schristos { 1804*3117ece4Schristos assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel()); 1805*3117ece4Schristos memset(g_level_constraint, 0, sizeof(g_level_constraint)); 1806*3117ece4Schristos g_level_constraint[1].cSpeed_min = bytePerSec_level1; 1807*3117ece4Schristos g_level_constraint[1].dSpeed_min = 0; 1808*3117ece4Schristos g_level_constraint[1].windowLog_max = 19; 1809*3117ece4Schristos g_level_constraint[1].strategy_max = ZSTD_fast; 1810*3117ece4Schristos 1811*3117ece4Schristos /* establish speed objectives (relative to level 1) */ 1812*3117ece4Schristos { int l; 1813*3117ece4Schristos for (l=2; l<=NB_LEVELS_TRACKED; l++) { 1814*3117ece4Schristos g_level_constraint[l].cSpeed_min = (g_level_constraint[l-1].cSpeed_min * 49) / 64; 1815*3117ece4Schristos g_level_constraint[l].dSpeed_min = 0; 1816*3117ece4Schristos g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5; /* only --ultra levels >= 20 can use windowlog > 23 */ 1817*3117ece4Schristos g_level_constraint[l].strategy_max = ZSTD_STRATEGY_MAX; 1818*3117ece4Schristos } } 1819*3117ece4Schristos } 1820*3117ece4Schristos 1821*3117ece4Schristos static int BMK_seed(winnerInfo_t* winners, 1822*3117ece4Schristos const paramValues_t params, 1823*3117ece4Schristos const buffers_t buf, 1824*3117ece4Schristos const contexts_t ctx) 1825*3117ece4Schristos { 1826*3117ece4Schristos BMK_benchResult_t testResult; 1827*3117ece4Schristos int better = 0; 1828*3117ece4Schristos int cLevel; 1829*3117ece4Schristos 1830*3117ece4Schristos BMK_benchParam(&testResult, buf, ctx, params); 1831*3117ece4Schristos 1832*3117ece4Schristos for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) { 1833*3117ece4Schristos 1834*3117ece4Schristos if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min) 1835*3117ece4Schristos continue; /* not fast enough for this level */ 1836*3117ece4Schristos if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min) 1837*3117ece4Schristos continue; /* not fast enough for this level */ 1838*3117ece4Schristos if (params.vals[wlog_ind] > g_level_constraint[cLevel].windowLog_max) 1839*3117ece4Schristos continue; /* too much memory for this level */ 1840*3117ece4Schristos if (params.vals[strt_ind] > (U32)g_level_constraint[cLevel].strategy_max) 1841*3117ece4Schristos continue; /* forbidden strategy for this level */ 1842*3117ece4Schristos if (winners[cLevel].result.cSize==0) { 1843*3117ece4Schristos /* first solution for this cLevel */ 1844*3117ece4Schristos winners[cLevel].result = testResult; 1845*3117ece4Schristos winners[cLevel].params = params; 1846*3117ece4Schristos BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize); 1847*3117ece4Schristos better = 1; 1848*3117ece4Schristos continue; 1849*3117ece4Schristos } 1850*3117ece4Schristos 1851*3117ece4Schristos if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) { 1852*3117ece4Schristos /* Validate solution is "good enough" */ 1853*3117ece4Schristos double W_ratio = (double)buf.srcSize / (double)testResult.cSize; 1854*3117ece4Schristos double O_ratio = (double)buf.srcSize / (double)winners[cLevel].result.cSize; 1855*3117ece4Schristos double W_ratioNote = log (W_ratio); 1856*3117ece4Schristos double O_ratioNote = log (O_ratio); 1857*3117ece4Schristos size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB); 1858*3117ece4Schristos size_t O_DMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + (16 KB); 1859*3117ece4Schristos double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed); 1860*3117ece4Schristos double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed); 1861*3117ece4Schristos 1862*3117ece4Schristos size_t W_CMemUsed = ((size_t)1 << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params)); 1863*3117ece4Schristos size_t O_CMemUsed = ((size_t)1 << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params)); 1864*3117ece4Schristos double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); 1865*3117ece4Schristos double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); 1866*3117ece4Schristos 1867*3117ece4Schristos double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log((double)testResult.cSpeed); 1868*3117ece4Schristos double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log((double)winners[cLevel].result.cSpeed); 1869*3117ece4Schristos 1870*3117ece4Schristos double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log((double)testResult.dSpeed); 1871*3117ece4Schristos double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed); 1872*3117ece4Schristos 1873*3117ece4Schristos if (W_DMemUsed_note < O_DMemUsed_note) { 1874*3117ece4Schristos /* uses too much Decompression memory for too little benefit */ 1875*3117ece4Schristos if (W_ratio > O_ratio) 1876*3117ece4Schristos DISPLAYLEVEL(3, "Decompression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", 1877*3117ece4Schristos W_ratio, (double)(W_DMemUsed) / 1024 / 1024, 1878*3117ece4Schristos O_ratio, (double)(O_DMemUsed) / 1024 / 1024, cLevel); 1879*3117ece4Schristos continue; 1880*3117ece4Schristos } 1881*3117ece4Schristos if (W_CMemUsed_note < O_CMemUsed_note) { 1882*3117ece4Schristos /* uses too much memory for compression for too little benefit */ 1883*3117ece4Schristos if (W_ratio > O_ratio) 1884*3117ece4Schristos DISPLAYLEVEL(3, "Compression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", 1885*3117ece4Schristos W_ratio, (double)(W_CMemUsed) / 1024 / 1024, 1886*3117ece4Schristos O_ratio, (double)(O_CMemUsed) / 1024 / 1024, 1887*3117ece4Schristos cLevel); 1888*3117ece4Schristos continue; 1889*3117ece4Schristos } 1890*3117ece4Schristos if (W_CSpeed_note < O_CSpeed_note ) { 1891*3117ece4Schristos /* too large compression speed difference for the compression benefit */ 1892*3117ece4Schristos if (W_ratio > O_ratio) 1893*3117ece4Schristos DISPLAYLEVEL(3, "Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", 1894*3117ece4Schristos W_ratio, (double)testResult.cSpeed / MB_UNIT, 1895*3117ece4Schristos O_ratio, (double)winners[cLevel].result.cSpeed / MB_UNIT, 1896*3117ece4Schristos cLevel); 1897*3117ece4Schristos continue; 1898*3117ece4Schristos } 1899*3117ece4Schristos if (W_DSpeed_note < O_DSpeed_note ) { 1900*3117ece4Schristos /* too large decompression speed difference for the compression benefit */ 1901*3117ece4Schristos if (W_ratio > O_ratio) 1902*3117ece4Schristos DISPLAYLEVEL(3, "Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", 1903*3117ece4Schristos W_ratio, (double)testResult.dSpeed / MB_UNIT, 1904*3117ece4Schristos O_ratio, (double)winners[cLevel].result.dSpeed / MB_UNIT, 1905*3117ece4Schristos cLevel); 1906*3117ece4Schristos continue; 1907*3117ece4Schristos } 1908*3117ece4Schristos 1909*3117ece4Schristos if (W_ratio < O_ratio) 1910*3117ece4Schristos DISPLAYLEVEL(3, "Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n", 1911*3117ece4Schristos W_ratio, O_ratio, cLevel); 1912*3117ece4Schristos 1913*3117ece4Schristos winners[cLevel].result = testResult; 1914*3117ece4Schristos winners[cLevel].params = params; 1915*3117ece4Schristos BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize); 1916*3117ece4Schristos 1917*3117ece4Schristos better = 1; 1918*3117ece4Schristos } } 1919*3117ece4Schristos 1920*3117ece4Schristos return better; 1921*3117ece4Schristos } 1922*3117ece4Schristos 1923*3117ece4Schristos /*-************************************ 1924*3117ece4Schristos * Compression Level Table Generation Functions 1925*3117ece4Schristos **************************************/ 1926*3117ece4Schristos 1927*3117ece4Schristos #define PARAMTABLELOG 25 1928*3117ece4Schristos #define PARAMTABLESIZE (1<<PARAMTABLELOG) 1929*3117ece4Schristos #define PARAMTABLEMASK (PARAMTABLESIZE-1) 1930*3117ece4Schristos static BYTE g_alreadyTested[PARAMTABLESIZE] = {0}; /* init to zero */ 1931*3117ece4Schristos 1932*3117ece4Schristos static BYTE* NB_TESTS_PLAYED(paramValues_t p) 1933*3117ece4Schristos { 1934*3117ece4Schristos ZSTD_compressionParameters const cParams = pvalsToCParams(sanitizeParams(p)); 1935*3117ece4Schristos unsigned long long const h64 = XXH64(&cParams, sizeof(cParams), 0); 1936*3117ece4Schristos return &g_alreadyTested[(h64 >> 3) & PARAMTABLEMASK]; 1937*3117ece4Schristos } 1938*3117ece4Schristos 1939*3117ece4Schristos static void playAround(FILE* f, 1940*3117ece4Schristos winnerInfo_t* winners, 1941*3117ece4Schristos paramValues_t p, 1942*3117ece4Schristos const buffers_t buf, const contexts_t ctx) 1943*3117ece4Schristos { 1944*3117ece4Schristos int nbVariations = 0; 1945*3117ece4Schristos UTIL_time_t const clockStart = UTIL_getTime(); 1946*3117ece4Schristos 1947*3117ece4Schristos while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) { 1948*3117ece4Schristos if (nbVariations++ > g_maxNbVariations) break; 1949*3117ece4Schristos 1950*3117ece4Schristos do { 1951*3117ece4Schristos int i; 1952*3117ece4Schristos for(i = 0; i < 4; i++) { 1953*3117ece4Schristos paramVaryOnce(FUZ_rand(&g_rand) % (strt_ind + 1), 1954*3117ece4Schristos ((FUZ_rand(&g_rand) & 1) << 1) - 1, 1955*3117ece4Schristos &p); 1956*3117ece4Schristos } 1957*3117ece4Schristos } while (!paramValid(p)); 1958*3117ece4Schristos 1959*3117ece4Schristos /* exclude faster if already played params */ 1960*3117ece4Schristos if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p))-1)) 1961*3117ece4Schristos continue; 1962*3117ece4Schristos 1963*3117ece4Schristos /* test */ 1964*3117ece4Schristos { BYTE* const b = NB_TESTS_PLAYED(p); 1965*3117ece4Schristos (*b)++; 1966*3117ece4Schristos } 1967*3117ece4Schristos if (!BMK_seed(winners, p, buf, ctx)) continue; 1968*3117ece4Schristos 1969*3117ece4Schristos /* improvement found => search more */ 1970*3117ece4Schristos BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize); 1971*3117ece4Schristos playAround(f, winners, p, buf, ctx); 1972*3117ece4Schristos } 1973*3117ece4Schristos 1974*3117ece4Schristos } 1975*3117ece4Schristos 1976*3117ece4Schristos static void 1977*3117ece4Schristos BMK_selectRandomStart( FILE* f, 1978*3117ece4Schristos winnerInfo_t* winners, 1979*3117ece4Schristos const buffers_t buf, const contexts_t ctx) 1980*3117ece4Schristos { 1981*3117ece4Schristos U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1); 1982*3117ece4Schristos if ((id==0) || (winners[id].params.vals[wlog_ind]==0)) { 1983*3117ece4Schristos /* use some random entry */ 1984*3117ece4Schristos paramValues_t const p = adjustParams(cParamsToPVals(pvalsToCParams(randomParams())), /* defaults nonCompression parameters */ 1985*3117ece4Schristos buf.srcSize, 0); 1986*3117ece4Schristos playAround(f, winners, p, buf, ctx); 1987*3117ece4Schristos } else { 1988*3117ece4Schristos playAround(f, winners, winners[id].params, buf, ctx); 1989*3117ece4Schristos } 1990*3117ece4Schristos } 1991*3117ece4Schristos 1992*3117ece4Schristos 1993*3117ece4Schristos /* BMK_generate_cLevelTable() : 1994*3117ece4Schristos * test a large number of configurations 1995*3117ece4Schristos * and distribute them across compression levels according to speed conditions. 1996*3117ece4Schristos * display and save all intermediate results into rfName = "grillResults.txt". 1997*3117ece4Schristos * the function automatically stops after g_timeLimit_s. 1998*3117ece4Schristos * this function cannot error, it directly exit() in case of problem. 1999*3117ece4Schristos */ 2000*3117ece4Schristos static void BMK_generate_cLevelTable(const buffers_t buf, const contexts_t ctx) 2001*3117ece4Schristos { 2002*3117ece4Schristos paramValues_t params; 2003*3117ece4Schristos winnerInfo_t winners[NB_LEVELS_TRACKED+1]; 2004*3117ece4Schristos const char* const rfName = "grillResults.txt"; 2005*3117ece4Schristos FILE* const f = fopen(rfName, "w"); 2006*3117ece4Schristos 2007*3117ece4Schristos /* init */ 2008*3117ece4Schristos assert(g_singleRun==0); 2009*3117ece4Schristos memset(winners, 0, sizeof(winners)); 2010*3117ece4Schristos if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); } 2011*3117ece4Schristos 2012*3117ece4Schristos if (g_target) { 2013*3117ece4Schristos BMK_init_level_constraints(g_target * MB_UNIT); 2014*3117ece4Schristos } else { 2015*3117ece4Schristos /* baseline config for level 1 */ 2016*3117ece4Schristos paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize)); 2017*3117ece4Schristos BMK_benchResult_t testResult; 2018*3117ece4Schristos BMK_benchParam(&testResult, buf, ctx, l1params); 2019*3117ece4Schristos BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32)); 2020*3117ece4Schristos } 2021*3117ece4Schristos 2022*3117ece4Schristos /* populate initial solution */ 2023*3117ece4Schristos { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); 2024*3117ece4Schristos int i; 2025*3117ece4Schristos for (i=0; i<=maxSeeds; i++) { 2026*3117ece4Schristos params = cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, 0)); 2027*3117ece4Schristos BMK_seed(winners, params, buf, ctx); 2028*3117ece4Schristos } } 2029*3117ece4Schristos BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize); 2030*3117ece4Schristos 2031*3117ece4Schristos /* start tests */ 2032*3117ece4Schristos { const UTIL_time_t grillStart = UTIL_getTime(); 2033*3117ece4Schristos do { 2034*3117ece4Schristos BMK_selectRandomStart(f, winners, buf, ctx); 2035*3117ece4Schristos } while (BMK_timeSpan_s(grillStart) < g_timeLimit_s); 2036*3117ece4Schristos } 2037*3117ece4Schristos 2038*3117ece4Schristos /* end summary */ 2039*3117ece4Schristos BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize); 2040*3117ece4Schristos DISPLAY("grillParams operations completed \n"); 2041*3117ece4Schristos 2042*3117ece4Schristos /* clean up*/ 2043*3117ece4Schristos fclose(f); 2044*3117ece4Schristos } 2045*3117ece4Schristos 2046*3117ece4Schristos 2047*3117ece4Schristos /*-************************************ 2048*3117ece4Schristos * Single Benchmark Functions 2049*3117ece4Schristos **************************************/ 2050*3117ece4Schristos 2051*3117ece4Schristos static int 2052*3117ece4Schristos benchOnce(const buffers_t buf, const contexts_t ctx, const int cLevel) 2053*3117ece4Schristos { 2054*3117ece4Schristos BMK_benchResult_t testResult; 2055*3117ece4Schristos g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevel, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize); 2056*3117ece4Schristos 2057*3117ece4Schristos if (BMK_benchParam(&testResult, buf, ctx, g_params)) { 2058*3117ece4Schristos DISPLAY("Error during benchmarking\n"); 2059*3117ece4Schristos return 1; 2060*3117ece4Schristos } 2061*3117ece4Schristos 2062*3117ece4Schristos BMK_printWinner(stdout, CUSTOM_LEVEL, testResult, g_params, buf.srcSize); 2063*3117ece4Schristos 2064*3117ece4Schristos return 0; 2065*3117ece4Schristos } 2066*3117ece4Schristos 2067*3117ece4Schristos static int benchSample(double compressibility, int cLevel) 2068*3117ece4Schristos { 2069*3117ece4Schristos const char* const name = "Sample 10MB"; 2070*3117ece4Schristos size_t const benchedSize = 10 MB; 2071*3117ece4Schristos void* const srcBuffer = malloc(benchedSize); 2072*3117ece4Schristos int ret = 0; 2073*3117ece4Schristos 2074*3117ece4Schristos buffers_t buf; 2075*3117ece4Schristos contexts_t ctx; 2076*3117ece4Schristos 2077*3117ece4Schristos if(srcBuffer == NULL) { 2078*3117ece4Schristos DISPLAY("Out of Memory\n"); 2079*3117ece4Schristos return 2; 2080*3117ece4Schristos } 2081*3117ece4Schristos 2082*3117ece4Schristos RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); 2083*3117ece4Schristos 2084*3117ece4Schristos if(createBuffersFromMemory(&buf, srcBuffer, 1, &benchedSize)) { 2085*3117ece4Schristos DISPLAY("Buffer Creation Error\n"); 2086*3117ece4Schristos free(srcBuffer); 2087*3117ece4Schristos return 3; 2088*3117ece4Schristos } 2089*3117ece4Schristos 2090*3117ece4Schristos if(createContexts(&ctx, NULL)) { 2091*3117ece4Schristos DISPLAY("Context Creation Error\n"); 2092*3117ece4Schristos freeBuffers(buf); 2093*3117ece4Schristos return 1; 2094*3117ece4Schristos } 2095*3117ece4Schristos 2096*3117ece4Schristos /* bench */ 2097*3117ece4Schristos DISPLAY("\r%79s\r", ""); 2098*3117ece4Schristos DISPLAY("using %s %i%%: \n", name, (int)(compressibility*100)); 2099*3117ece4Schristos 2100*3117ece4Schristos if(g_singleRun) { 2101*3117ece4Schristos ret = benchOnce(buf, ctx, cLevel); 2102*3117ece4Schristos } else { 2103*3117ece4Schristos BMK_generate_cLevelTable(buf, ctx); 2104*3117ece4Schristos } 2105*3117ece4Schristos 2106*3117ece4Schristos freeBuffers(buf); 2107*3117ece4Schristos freeContexts(ctx); 2108*3117ece4Schristos 2109*3117ece4Schristos return ret; 2110*3117ece4Schristos } 2111*3117ece4Schristos 2112*3117ece4Schristos /* benchFiles() : 2113*3117ece4Schristos * note: while this function takes a table of filenames, 2114*3117ece4Schristos * in practice, only the first filename will be used */ 2115*3117ece4Schristos static int benchFiles(const char** fileNamesTable, int nbFiles, 2116*3117ece4Schristos const char* dictFileName, int cLevel) 2117*3117ece4Schristos { 2118*3117ece4Schristos buffers_t buf; 2119*3117ece4Schristos contexts_t ctx; 2120*3117ece4Schristos int ret = 0; 2121*3117ece4Schristos 2122*3117ece4Schristos if (createBuffers(&buf, fileNamesTable, nbFiles)) { 2123*3117ece4Schristos DISPLAY("unable to load files\n"); 2124*3117ece4Schristos return 1; 2125*3117ece4Schristos } 2126*3117ece4Schristos 2127*3117ece4Schristos if (createContexts(&ctx, dictFileName)) { 2128*3117ece4Schristos DISPLAY("unable to load dictionary\n"); 2129*3117ece4Schristos freeBuffers(buf); 2130*3117ece4Schristos return 2; 2131*3117ece4Schristos } 2132*3117ece4Schristos 2133*3117ece4Schristos DISPLAY("\r%79s\r", ""); 2134*3117ece4Schristos if (nbFiles == 1) { 2135*3117ece4Schristos DISPLAY("using %s : \n", fileNamesTable[0]); 2136*3117ece4Schristos } else { 2137*3117ece4Schristos DISPLAY("using %d Files : \n", nbFiles); 2138*3117ece4Schristos } 2139*3117ece4Schristos 2140*3117ece4Schristos if (g_singleRun) { 2141*3117ece4Schristos ret = benchOnce(buf, ctx, cLevel); 2142*3117ece4Schristos } else { 2143*3117ece4Schristos BMK_generate_cLevelTable(buf, ctx); 2144*3117ece4Schristos } 2145*3117ece4Schristos 2146*3117ece4Schristos freeBuffers(buf); 2147*3117ece4Schristos freeContexts(ctx); 2148*3117ece4Schristos return ret; 2149*3117ece4Schristos } 2150*3117ece4Schristos 2151*3117ece4Schristos 2152*3117ece4Schristos /*-************************************ 2153*3117ece4Schristos * Local Optimization Functions 2154*3117ece4Schristos **************************************/ 2155*3117ece4Schristos 2156*3117ece4Schristos /* One iteration of hill climbing. Specifically, it first tries all 2157*3117ece4Schristos * valid parameter configurations w/ manhattan distance 1 and picks the best one 2158*3117ece4Schristos * failing that, it progressively tries candidates further and further away (up to #dim + 2) 2159*3117ece4Schristos * if it finds a candidate exceeding winnerInfo, it will repeat. Otherwise, it will stop the 2160*3117ece4Schristos * current stage of hill climbing. 2161*3117ece4Schristos * Each iteration of hill climbing proceeds in 2 'phases'. Phase 1 climbs according to 2162*3117ece4Schristos * the resultScore function, which is effectively a linear increase in reward until it reaches 2163*3117ece4Schristos * the constraint-satisfying value, it which point any excess results in only logarithmic reward. 2164*3117ece4Schristos * This aims to find some constraint-satisfying point. 2165*3117ece4Schristos * Phase 2 optimizes in accordance with what the original function sets out to maximize, with 2166*3117ece4Schristos * all feasible solutions valued over all infeasible solutions. 2167*3117ece4Schristos */ 2168*3117ece4Schristos 2169*3117ece4Schristos /* sanitize all params here. 2170*3117ece4Schristos * all generation after random should be sanitized. (maybe sanitize random) 2171*3117ece4Schristos */ 2172*3117ece4Schristos static winnerInfo_t climbOnce(const constraint_t target, 2173*3117ece4Schristos memoTable_t* mtAll, 2174*3117ece4Schristos const buffers_t buf, const contexts_t ctx, 2175*3117ece4Schristos const paramValues_t init) 2176*3117ece4Schristos { 2177*3117ece4Schristos /* 2178*3117ece4Schristos * cparam - currently considered 'center' 2179*3117ece4Schristos * candidate - params to benchmark/results 2180*3117ece4Schristos * winner - best option found so far. 2181*3117ece4Schristos */ 2182*3117ece4Schristos paramValues_t cparam = init; 2183*3117ece4Schristos winnerInfo_t candidateInfo, winnerInfo; 2184*3117ece4Schristos int better = 1; 2185*3117ece4Schristos int feas = 0; 2186*3117ece4Schristos 2187*3117ece4Schristos winnerInfo = initWinnerInfo(init); 2188*3117ece4Schristos candidateInfo = winnerInfo; 2189*3117ece4Schristos 2190*3117ece4Schristos { winnerInfo_t bestFeasible1 = initWinnerInfo(cparam); 2191*3117ece4Schristos DEBUGOUTPUT("Climb Part 1\n"); 2192*3117ece4Schristos while(better) { 2193*3117ece4Schristos int offset; 2194*3117ece4Schristos size_t i, dist; 2195*3117ece4Schristos const size_t varLen = mtAll[cparam.vals[strt_ind]].varLen; 2196*3117ece4Schristos better = 0; 2197*3117ece4Schristos DEBUGOUTPUT("Start\n"); 2198*3117ece4Schristos cparam = winnerInfo.params; 2199*3117ece4Schristos candidateInfo.params = cparam; 2200*3117ece4Schristos /* all dist-1 candidates */ 2201*3117ece4Schristos for (i = 0; i < varLen; i++) { 2202*3117ece4Schristos for (offset = -1; offset <= 1; offset += 2) { 2203*3117ece4Schristos CHECKTIME(winnerInfo); 2204*3117ece4Schristos candidateInfo.params = cparam; 2205*3117ece4Schristos paramVaryOnce(mtAll[cparam.vals[strt_ind]].varArray[i], 2206*3117ece4Schristos offset, 2207*3117ece4Schristos &candidateInfo.params); 2208*3117ece4Schristos 2209*3117ece4Schristos if(paramValid(candidateInfo.params)) { 2210*3117ece4Schristos int res; 2211*3117ece4Schristos res = benchMemo(&candidateInfo.result, buf, ctx, 2212*3117ece4Schristos sanitizeParams(candidateInfo.params), target, &winnerInfo.result, mtAll, feas); 2213*3117ece4Schristos DEBUGOUTPUT("Res: %d\n", res); 2214*3117ece4Schristos if(res == BETTER_RESULT) { /* synonymous with better when called w/ infeasibleBM */ 2215*3117ece4Schristos winnerInfo = candidateInfo; 2216*3117ece4Schristos better = 1; 2217*3117ece4Schristos if(compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) { 2218*3117ece4Schristos bestFeasible1 = winnerInfo; 2219*3117ece4Schristos } 2220*3117ece4Schristos } 2221*3117ece4Schristos } 2222*3117ece4Schristos } /* for (offset = -1; offset <= 1; offset += 2) */ 2223*3117ece4Schristos } /* for (i = 0; i < varLen; i++) */ 2224*3117ece4Schristos 2225*3117ece4Schristos if(better) { 2226*3117ece4Schristos continue; 2227*3117ece4Schristos } 2228*3117ece4Schristos 2229*3117ece4Schristos for (dist = 2; dist < varLen + 2; dist++) { /* varLen is # dimensions */ 2230*3117ece4Schristos for (i = 0; i < (1ULL << varLen) / varLen + 2; i++) { 2231*3117ece4Schristos int res; 2232*3117ece4Schristos CHECKTIME(winnerInfo); 2233*3117ece4Schristos candidateInfo.params = cparam; 2234*3117ece4Schristos /* param error checking already done here */ 2235*3117ece4Schristos paramVariation(&candidateInfo.params, mtAll, (U32)dist); 2236*3117ece4Schristos 2237*3117ece4Schristos res = benchMemo(&candidateInfo.result, 2238*3117ece4Schristos buf, ctx, 2239*3117ece4Schristos sanitizeParams(candidateInfo.params), target, 2240*3117ece4Schristos &winnerInfo.result, mtAll, feas); 2241*3117ece4Schristos DEBUGOUTPUT("Res: %d\n", res); 2242*3117ece4Schristos if (res == BETTER_RESULT) { /* synonymous with better in this case*/ 2243*3117ece4Schristos winnerInfo = candidateInfo; 2244*3117ece4Schristos better = 1; 2245*3117ece4Schristos if (compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) { 2246*3117ece4Schristos bestFeasible1 = winnerInfo; 2247*3117ece4Schristos } 2248*3117ece4Schristos break; 2249*3117ece4Schristos } 2250*3117ece4Schristos } 2251*3117ece4Schristos 2252*3117ece4Schristos if (better) { 2253*3117ece4Schristos break; 2254*3117ece4Schristos } 2255*3117ece4Schristos } /* for(dist = 2; dist < varLen + 2; dist++) */ 2256*3117ece4Schristos 2257*3117ece4Schristos if (!better) { /* infeas -> feas -> stop */ 2258*3117ece4Schristos if (feas) return winnerInfo; 2259*3117ece4Schristos feas = 1; 2260*3117ece4Schristos better = 1; 2261*3117ece4Schristos winnerInfo = bestFeasible1; /* note with change, bestFeasible may not necessarily be feasible, but if one has been benchmarked, it will be. */ 2262*3117ece4Schristos DEBUGOUTPUT("Climb Part 2\n"); 2263*3117ece4Schristos } 2264*3117ece4Schristos } 2265*3117ece4Schristos winnerInfo = bestFeasible1; 2266*3117ece4Schristos } 2267*3117ece4Schristos 2268*3117ece4Schristos return winnerInfo; 2269*3117ece4Schristos } 2270*3117ece4Schristos 2271*3117ece4Schristos /* Optimizes for a fixed strategy */ 2272*3117ece4Schristos 2273*3117ece4Schristos /* flexible parameters: iterations of failed climbing (or if we do non-random, maybe this is when everything is close to visited) 2274*3117ece4Schristos weight more on visit for bad results, less on good results/more on later results / ones with more failures. 2275*3117ece4Schristos allocate memoTable here. 2276*3117ece4Schristos */ 2277*3117ece4Schristos static winnerInfo_t 2278*3117ece4Schristos optimizeFixedStrategy(const buffers_t buf, const contexts_t ctx, 2279*3117ece4Schristos const constraint_t target, paramValues_t paramTarget, 2280*3117ece4Schristos const ZSTD_strategy strat, 2281*3117ece4Schristos memoTable_t* memoTableArray, const int tries) 2282*3117ece4Schristos { 2283*3117ece4Schristos int i = 0; 2284*3117ece4Schristos 2285*3117ece4Schristos paramValues_t init; 2286*3117ece4Schristos winnerInfo_t winnerInfo, candidateInfo; 2287*3117ece4Schristos winnerInfo = initWinnerInfo(emptyParams()); 2288*3117ece4Schristos /* so climb is given the right fixed strategy */ 2289*3117ece4Schristos paramTarget.vals[strt_ind] = strat; 2290*3117ece4Schristos /* to pass ZSTD_checkCParams */ 2291*3117ece4Schristos paramTarget = cParamUnsetMin(paramTarget); 2292*3117ece4Schristos 2293*3117ece4Schristos init = paramTarget; 2294*3117ece4Schristos 2295*3117ece4Schristos for(i = 0; i < tries; i++) { 2296*3117ece4Schristos DEBUGOUTPUT("Restart\n"); 2297*3117ece4Schristos do { 2298*3117ece4Schristos randomConstrainedParams(&init, memoTableArray, strat); 2299*3117ece4Schristos } while(redundantParams(init, target, buf.maxBlockSize)); 2300*3117ece4Schristos candidateInfo = climbOnce(target, memoTableArray, buf, ctx, init); 2301*3117ece4Schristos if (compareResultLT(winnerInfo.result, candidateInfo.result, target, buf.srcSize)) { 2302*3117ece4Schristos winnerInfo = candidateInfo; 2303*3117ece4Schristos BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, target, buf.srcSize); 2304*3117ece4Schristos i = 0; 2305*3117ece4Schristos continue; 2306*3117ece4Schristos } 2307*3117ece4Schristos CHECKTIME(winnerInfo); 2308*3117ece4Schristos i++; 2309*3117ece4Schristos } 2310*3117ece4Schristos return winnerInfo; 2311*3117ece4Schristos } 2312*3117ece4Schristos 2313*3117ece4Schristos /* goes best, best-1, best+1, best-2, ... */ 2314*3117ece4Schristos /* return 0 if nothing remaining */ 2315*3117ece4Schristos static int nextStrategy(const int currentStrategy, const int bestStrategy) 2316*3117ece4Schristos { 2317*3117ece4Schristos if(bestStrategy <= currentStrategy) { 2318*3117ece4Schristos int candidate = 2 * bestStrategy - currentStrategy - 1; 2319*3117ece4Schristos if(candidate < 1) { 2320*3117ece4Schristos candidate = currentStrategy + 1; 2321*3117ece4Schristos if(candidate > (int)ZSTD_STRATEGY_MAX) { 2322*3117ece4Schristos return 0; 2323*3117ece4Schristos } else { 2324*3117ece4Schristos return candidate; 2325*3117ece4Schristos } 2326*3117ece4Schristos } else { 2327*3117ece4Schristos return candidate; 2328*3117ece4Schristos } 2329*3117ece4Schristos } else { /* bestStrategy >= currentStrategy */ 2330*3117ece4Schristos int candidate = 2 * bestStrategy - currentStrategy; 2331*3117ece4Schristos if(candidate > (int)ZSTD_STRATEGY_MAX) { 2332*3117ece4Schristos candidate = currentStrategy - 1; 2333*3117ece4Schristos if(candidate < 1) { 2334*3117ece4Schristos return 0; 2335*3117ece4Schristos } else { 2336*3117ece4Schristos return candidate; 2337*3117ece4Schristos } 2338*3117ece4Schristos } else { 2339*3117ece4Schristos return candidate; 2340*3117ece4Schristos } 2341*3117ece4Schristos } 2342*3117ece4Schristos } 2343*3117ece4Schristos 2344*3117ece4Schristos /* experiment with playing with this and decay value */ 2345*3117ece4Schristos 2346*3117ece4Schristos /* main fn called when using --optimize */ 2347*3117ece4Schristos /* Does strategy selection by benchmarking default compression levels 2348*3117ece4Schristos * then optimizes by strategy, starting with the best one and moving 2349*3117ece4Schristos * progressively moving further away by number 2350*3117ece4Schristos * args: 2351*3117ece4Schristos * fileNamesTable - list of files to benchmark 2352*3117ece4Schristos * nbFiles - length of fileNamesTable 2353*3117ece4Schristos * dictFileName - name of dictionary file if one, else NULL 2354*3117ece4Schristos * target - performance constraints (cSpeed, dSpeed, cMem) 2355*3117ece4Schristos * paramTarget - parameter constraints (i.e. restriction search space to where strategy = ZSTD_fast) 2356*3117ece4Schristos * cLevel - compression level to exceed (all solutions must be > lvl in cSpeed + ratio) 2357*3117ece4Schristos */ 2358*3117ece4Schristos 2359*3117ece4Schristos static unsigned g_maxTries = 5; 2360*3117ece4Schristos #define TRY_DECAY 1 2361*3117ece4Schristos 2362*3117ece4Schristos static int 2363*3117ece4Schristos optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles, 2364*3117ece4Schristos const char* dictFileName, 2365*3117ece4Schristos constraint_t target, paramValues_t paramTarget, 2366*3117ece4Schristos const int cLevelOpt, const int cLevelRun, 2367*3117ece4Schristos const U32 memoTableLog) 2368*3117ece4Schristos { 2369*3117ece4Schristos varInds_t varArray [NUM_PARAMS]; 2370*3117ece4Schristos int ret = 0; 2371*3117ece4Schristos const size_t varLen = variableParams(paramTarget, varArray, dictFileName != NULL); 2372*3117ece4Schristos winnerInfo_t winner = initWinnerInfo(emptyParams()); 2373*3117ece4Schristos memoTable_t* allMT = NULL; 2374*3117ece4Schristos paramValues_t paramBase; 2375*3117ece4Schristos contexts_t ctx; 2376*3117ece4Schristos buffers_t buf; 2377*3117ece4Schristos g_time = UTIL_getTime(); 2378*3117ece4Schristos 2379*3117ece4Schristos if (createBuffers(&buf, fileNamesTable, nbFiles)) { 2380*3117ece4Schristos DISPLAY("unable to load files\n"); 2381*3117ece4Schristos return 1; 2382*3117ece4Schristos } 2383*3117ece4Schristos 2384*3117ece4Schristos if (createContexts(&ctx, dictFileName)) { 2385*3117ece4Schristos DISPLAY("unable to load dictionary\n"); 2386*3117ece4Schristos freeBuffers(buf); 2387*3117ece4Schristos return 2; 2388*3117ece4Schristos } 2389*3117ece4Schristos 2390*3117ece4Schristos if (nbFiles == 1) { 2391*3117ece4Schristos DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[0]); 2392*3117ece4Schristos } else { 2393*3117ece4Schristos DISPLAYLEVEL(2, "Loading %lu Files... \r", (unsigned long)nbFiles); 2394*3117ece4Schristos } 2395*3117ece4Schristos 2396*3117ece4Schristos /* sanitize paramTarget */ 2397*3117ece4Schristos optimizerAdjustInput(¶mTarget, buf.maxBlockSize); 2398*3117ece4Schristos paramBase = cParamUnsetMin(paramTarget); 2399*3117ece4Schristos 2400*3117ece4Schristos allMT = createMemoTableArray(paramTarget, varArray, varLen, memoTableLog); 2401*3117ece4Schristos 2402*3117ece4Schristos if (!allMT) { 2403*3117ece4Schristos DISPLAY("MemoTable Init Error\n"); 2404*3117ece4Schristos ret = 2; 2405*3117ece4Schristos goto _cleanUp; 2406*3117ece4Schristos } 2407*3117ece4Schristos 2408*3117ece4Schristos /* default strictnesses */ 2409*3117ece4Schristos if (g_strictness == PARAM_UNSET) { 2410*3117ece4Schristos if(g_optmode) { 2411*3117ece4Schristos g_strictness = 100; 2412*3117ece4Schristos } else { 2413*3117ece4Schristos g_strictness = 90; 2414*3117ece4Schristos } 2415*3117ece4Schristos } else { 2416*3117ece4Schristos if(0 >= g_strictness || g_strictness > 100) { 2417*3117ece4Schristos DISPLAY("Strictness Outside of Bounds\n"); 2418*3117ece4Schristos ret = 4; 2419*3117ece4Schristos goto _cleanUp; 2420*3117ece4Schristos } 2421*3117ece4Schristos } 2422*3117ece4Schristos 2423*3117ece4Schristos /* use level'ing mode instead of normal target mode */ 2424*3117ece4Schristos if (g_optmode) { 2425*3117ece4Schristos winner.params = cParamsToPVals(ZSTD_getCParams(cLevelOpt, buf.maxBlockSize, ctx.dictSize)); 2426*3117ece4Schristos if(BMK_benchParam(&winner.result, buf, ctx, winner.params)) { 2427*3117ece4Schristos ret = 3; 2428*3117ece4Schristos goto _cleanUp; 2429*3117ece4Schristos } 2430*3117ece4Schristos 2431*3117ece4Schristos g_lvltarget = winner.result; 2432*3117ece4Schristos g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100; 2433*3117ece4Schristos g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100; 2434*3117ece4Schristos g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness; 2435*3117ece4Schristos 2436*3117ece4Schristos target.cSpeed = (U32)g_lvltarget.cSpeed; 2437*3117ece4Schristos target.dSpeed = (U32)g_lvltarget.dSpeed; 2438*3117ece4Schristos 2439*3117ece4Schristos BMK_printWinnerOpt(stdout, cLevelOpt, winner.result, winner.params, target, buf.srcSize); 2440*3117ece4Schristos } 2441*3117ece4Schristos 2442*3117ece4Schristos /* Don't want it to return anything worse than the best known result */ 2443*3117ece4Schristos if (g_singleRun) { 2444*3117ece4Schristos BMK_benchResult_t res; 2445*3117ece4Schristos g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevelRun, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize); 2446*3117ece4Schristos if (BMK_benchParam(&res, buf, ctx, g_params)) { 2447*3117ece4Schristos ret = 45; 2448*3117ece4Schristos goto _cleanUp; 2449*3117ece4Schristos } 2450*3117ece4Schristos if(compareResultLT(winner.result, res, relaxTarget(target), buf.srcSize)) { 2451*3117ece4Schristos winner.result = res; 2452*3117ece4Schristos winner.params = g_params; 2453*3117ece4Schristos } 2454*3117ece4Schristos } 2455*3117ece4Schristos 2456*3117ece4Schristos /* bench */ 2457*3117ece4Schristos DISPLAYLEVEL(2, "\r%79s\r", ""); 2458*3117ece4Schristos if(nbFiles == 1) { 2459*3117ece4Schristos DISPLAYLEVEL(2, "optimizing for %s", fileNamesTable[0]); 2460*3117ece4Schristos } else { 2461*3117ece4Schristos DISPLAYLEVEL(2, "optimizing for %lu Files", (unsigned long)nbFiles); 2462*3117ece4Schristos } 2463*3117ece4Schristos 2464*3117ece4Schristos if(target.cSpeed != 0) { DISPLAYLEVEL(2," - limit compression speed %u MB/s", (unsigned)(target.cSpeed >> 20)); } 2465*3117ece4Schristos if(target.dSpeed != 0) { DISPLAYLEVEL(2, " - limit decompression speed %u MB/s", (unsigned)(target.dSpeed >> 20)); } 2466*3117ece4Schristos if(target.cMem != (U32)-1) { DISPLAYLEVEL(2, " - limit memory %u MB", (unsigned)(target.cMem >> 20)); } 2467*3117ece4Schristos 2468*3117ece4Schristos DISPLAYLEVEL(2, "\n"); 2469*3117ece4Schristos init_clockGranularity(); 2470*3117ece4Schristos 2471*3117ece4Schristos { paramValues_t CParams; 2472*3117ece4Schristos 2473*3117ece4Schristos /* find best solution from default params */ 2474*3117ece4Schristos { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); 2475*3117ece4Schristos DEBUGOUTPUT("Strategy Selection\n"); 2476*3117ece4Schristos if (paramTarget.vals[strt_ind] == PARAM_UNSET) { 2477*3117ece4Schristos BMK_benchResult_t candidate; 2478*3117ece4Schristos int i; 2479*3117ece4Schristos for (i=1; i<=maxSeeds; i++) { 2480*3117ece4Schristos int ec; 2481*3117ece4Schristos CParams = overwriteParams(cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, ctx.dictSize)), paramTarget); 2482*3117ece4Schristos ec = BMK_benchParam(&candidate, buf, ctx, CParams); 2483*3117ece4Schristos BMK_printWinnerOpt(stdout, i, candidate, CParams, target, buf.srcSize); 2484*3117ece4Schristos 2485*3117ece4Schristos if(!ec && compareResultLT(winner.result, candidate, relaxTarget(target), buf.srcSize)) { 2486*3117ece4Schristos winner.result = candidate; 2487*3117ece4Schristos winner.params = CParams; 2488*3117ece4Schristos } 2489*3117ece4Schristos 2490*3117ece4Schristos CHECKTIMEGT(ret, 0, _displayCleanUp); /* if pass time limit, stop */ 2491*3117ece4Schristos /* if the current params are too slow, just stop. */ 2492*3117ece4Schristos if(target.cSpeed > candidate.cSpeed * 3 / 2) { break; } 2493*3117ece4Schristos } 2494*3117ece4Schristos 2495*3117ece4Schristos BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winner.result, winner.params, target, buf.srcSize); 2496*3117ece4Schristos } 2497*3117ece4Schristos } 2498*3117ece4Schristos 2499*3117ece4Schristos DEBUGOUTPUT("Real Opt\n"); 2500*3117ece4Schristos /* start 'real' optimization */ 2501*3117ece4Schristos { int bestStrategy = (int)winner.params.vals[strt_ind]; 2502*3117ece4Schristos if (paramTarget.vals[strt_ind] == PARAM_UNSET) { 2503*3117ece4Schristos int st = bestStrategy; 2504*3117ece4Schristos int tries = g_maxTries; 2505*3117ece4Schristos 2506*3117ece4Schristos /* one iterations of hill climbing with the level-defined parameters. */ 2507*3117ece4Schristos { winnerInfo_t const w1 = climbOnce(target, allMT, buf, ctx, winner.params); 2508*3117ece4Schristos if (compareResultLT(winner.result, w1.result, target, buf.srcSize)) { 2509*3117ece4Schristos winner = w1; 2510*3117ece4Schristos } 2511*3117ece4Schristos CHECKTIMEGT(ret, 0, _displayCleanUp); 2512*3117ece4Schristos } 2513*3117ece4Schristos 2514*3117ece4Schristos while(st && tries > 0) { 2515*3117ece4Schristos winnerInfo_t wc; 2516*3117ece4Schristos DEBUGOUTPUT("StrategySwitch: %s\n", g_stratName[st]); 2517*3117ece4Schristos 2518*3117ece4Schristos wc = optimizeFixedStrategy(buf, ctx, target, paramBase, st, allMT, tries); 2519*3117ece4Schristos 2520*3117ece4Schristos if(compareResultLT(winner.result, wc.result, target, buf.srcSize)) { 2521*3117ece4Schristos winner = wc; 2522*3117ece4Schristos tries = g_maxTries; 2523*3117ece4Schristos bestStrategy = st; 2524*3117ece4Schristos } else { 2525*3117ece4Schristos st = nextStrategy(st, bestStrategy); 2526*3117ece4Schristos tries -= TRY_DECAY; 2527*3117ece4Schristos } 2528*3117ece4Schristos CHECKTIMEGT(ret, 0, _displayCleanUp); 2529*3117ece4Schristos } 2530*3117ece4Schristos } else { 2531*3117ece4Schristos winner = optimizeFixedStrategy(buf, ctx, target, paramBase, paramTarget.vals[strt_ind], allMT, g_maxTries); 2532*3117ece4Schristos } 2533*3117ece4Schristos 2534*3117ece4Schristos } 2535*3117ece4Schristos 2536*3117ece4Schristos /* no solution found */ 2537*3117ece4Schristos if(winner.result.cSize == (size_t)-1) { 2538*3117ece4Schristos ret = 1; 2539*3117ece4Schristos DISPLAY("No feasible solution found\n"); 2540*3117ece4Schristos goto _cleanUp; 2541*3117ece4Schristos } 2542*3117ece4Schristos 2543*3117ece4Schristos /* end summary */ 2544*3117ece4Schristos _displayCleanUp: 2545*3117ece4Schristos if (g_displayLevel >= 0) { 2546*3117ece4Schristos BMK_displayOneResult(stdout, winner, buf.srcSize); 2547*3117ece4Schristos } 2548*3117ece4Schristos BMK_paramValues_into_commandLine(stdout, winner.params); 2549*3117ece4Schristos DISPLAYLEVEL(1, "grillParams size - optimizer completed \n"); 2550*3117ece4Schristos } 2551*3117ece4Schristos 2552*3117ece4Schristos _cleanUp: 2553*3117ece4Schristos freeContexts(ctx); 2554*3117ece4Schristos freeBuffers(buf); 2555*3117ece4Schristos freeMemoTableArray(allMT); 2556*3117ece4Schristos return ret; 2557*3117ece4Schristos } 2558*3117ece4Schristos 2559*3117ece4Schristos /*-************************************ 2560*3117ece4Schristos * CLI parsing functions 2561*3117ece4Schristos **************************************/ 2562*3117ece4Schristos 2563*3117ece4Schristos /** longCommandWArg() : 2564*3117ece4Schristos * check if *stringPtr is the same as longCommand. 2565*3117ece4Schristos * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. 2566*3117ece4Schristos * @return 0 and doesn't modify *stringPtr otherwise. 2567*3117ece4Schristos * from zstdcli.c 2568*3117ece4Schristos */ 2569*3117ece4Schristos static int longCommandWArg(const char** stringPtr, const char* longCommand) 2570*3117ece4Schristos { 2571*3117ece4Schristos size_t const comSize = strlen(longCommand); 2572*3117ece4Schristos int const result = !strncmp(*stringPtr, longCommand, comSize); 2573*3117ece4Schristos if (result) *stringPtr += comSize; 2574*3117ece4Schristos return result; 2575*3117ece4Schristos } 2576*3117ece4Schristos 2577*3117ece4Schristos static void errorOut(const char* msg) 2578*3117ece4Schristos { 2579*3117ece4Schristos DISPLAY("%s \n", msg); exit(1); 2580*3117ece4Schristos } 2581*3117ece4Schristos 2582*3117ece4Schristos /*! readU32FromChar() : 2583*3117ece4Schristos * @return : unsigned integer value read from input in `char` format. 2584*3117ece4Schristos * allows and interprets K, KB, KiB, M, MB and MiB suffix. 2585*3117ece4Schristos * Will also modify `*stringPtr`, advancing it to position where it stopped reading. 2586*3117ece4Schristos * Note : function will exit() program if digit sequence overflows */ 2587*3117ece4Schristos static unsigned readU32FromChar(const char** stringPtr) 2588*3117ece4Schristos { 2589*3117ece4Schristos const char errorMsg[] = "error: numeric value too large"; 2590*3117ece4Schristos unsigned sign = 1; 2591*3117ece4Schristos unsigned result = 0; 2592*3117ece4Schristos if(**stringPtr == '-') { sign = (unsigned)-1; (*stringPtr)++; } 2593*3117ece4Schristos while ((**stringPtr >='0') && (**stringPtr <='9')) { 2594*3117ece4Schristos unsigned const max = (((unsigned)(-1)) / 10) - 1; 2595*3117ece4Schristos if (result > max) errorOut(errorMsg); 2596*3117ece4Schristos result *= 10; 2597*3117ece4Schristos assert(**stringPtr >= '0'); 2598*3117ece4Schristos result += (unsigned)(**stringPtr - '0'); 2599*3117ece4Schristos (*stringPtr)++ ; 2600*3117ece4Schristos } 2601*3117ece4Schristos if ((**stringPtr=='K') || (**stringPtr=='M')) { 2602*3117ece4Schristos unsigned const maxK = ((unsigned)(-1)) >> 10; 2603*3117ece4Schristos if (result > maxK) errorOut(errorMsg); 2604*3117ece4Schristos result <<= 10; 2605*3117ece4Schristos if (**stringPtr=='M') { 2606*3117ece4Schristos if (result > maxK) errorOut(errorMsg); 2607*3117ece4Schristos result <<= 10; 2608*3117ece4Schristos } 2609*3117ece4Schristos (*stringPtr)++; /* skip `K` or `M` */ 2610*3117ece4Schristos if (**stringPtr=='i') (*stringPtr)++; 2611*3117ece4Schristos if (**stringPtr=='B') (*stringPtr)++; 2612*3117ece4Schristos } 2613*3117ece4Schristos return result * sign; 2614*3117ece4Schristos } 2615*3117ece4Schristos 2616*3117ece4Schristos static double readDoubleFromChar(const char** stringPtr) 2617*3117ece4Schristos { 2618*3117ece4Schristos double result = 0, divide = 10; 2619*3117ece4Schristos while ((**stringPtr >='0') && (**stringPtr <='9')) { 2620*3117ece4Schristos result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; 2621*3117ece4Schristos } 2622*3117ece4Schristos if(**stringPtr!='.') { 2623*3117ece4Schristos return result; 2624*3117ece4Schristos } 2625*3117ece4Schristos (*stringPtr)++; 2626*3117ece4Schristos while ((**stringPtr >='0') && (**stringPtr <='9')) { 2627*3117ece4Schristos result += (double)(**stringPtr - '0') / divide, divide *= 10, (*stringPtr)++ ; 2628*3117ece4Schristos } 2629*3117ece4Schristos return result; 2630*3117ece4Schristos } 2631*3117ece4Schristos 2632*3117ece4Schristos static int usage(const char* exename) 2633*3117ece4Schristos { 2634*3117ece4Schristos DISPLAY( "Usage :\n"); 2635*3117ece4Schristos DISPLAY( " %s [arg] file\n", exename); 2636*3117ece4Schristos DISPLAY( "Arguments :\n"); 2637*3117ece4Schristos DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n"); 2638*3117ece4Schristos DISPLAY( " -H/-h : Help (this text + advanced options)\n"); 2639*3117ece4Schristos return 0; 2640*3117ece4Schristos } 2641*3117ece4Schristos 2642*3117ece4Schristos static int usage_advanced(void) 2643*3117ece4Schristos { 2644*3117ece4Schristos DISPLAY( "\nAdvanced options :\n"); 2645*3117ece4Schristos DISPLAY( " -T# : set level 1 speed objective \n"); 2646*3117ece4Schristos DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n"); 2647*3117ece4Schristos DISPLAY( " --optimize= : same as -O with more verbose syntax (see README.md)\n"); 2648*3117ece4Schristos DISPLAY( " -S : Single run \n"); 2649*3117ece4Schristos DISPLAY( " --zstd : Single run, parameter selection same as zstdcli \n"); 2650*3117ece4Schristos DISPLAY( " -P# : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100); 2651*3117ece4Schristos DISPLAY( " -t# : Caps runtime of operation in seconds (default : %u seconds (%.1f hours)) \n", 2652*3117ece4Schristos (unsigned)g_timeLimit_s, (double)g_timeLimit_s / 3600); 2653*3117ece4Schristos DISPLAY( " -v : Prints Benchmarking output\n"); 2654*3117ece4Schristos DISPLAY( " -D : Next argument dictionary file\n"); 2655*3117ece4Schristos DISPLAY( " -s : Separate Files\n"); 2656*3117ece4Schristos return 0; 2657*3117ece4Schristos } 2658*3117ece4Schristos 2659*3117ece4Schristos static int badusage(const char* exename) 2660*3117ece4Schristos { 2661*3117ece4Schristos DISPLAY("Wrong parameters\n"); 2662*3117ece4Schristos usage(exename); 2663*3117ece4Schristos return 1; 2664*3117ece4Schristos } 2665*3117ece4Schristos 2666*3117ece4Schristos #define PARSE_SUB_ARGS(stringLong, stringShort, variable) { \ 2667*3117ece4Schristos if ( longCommandWArg(&argument, stringLong) \ 2668*3117ece4Schristos || longCommandWArg(&argument, stringShort) ) { \ 2669*3117ece4Schristos variable = readU32FromChar(&argument); \ 2670*3117ece4Schristos if (argument[0]==',') { \ 2671*3117ece4Schristos argument++; continue; \ 2672*3117ece4Schristos } else break; \ 2673*3117ece4Schristos } } 2674*3117ece4Schristos 2675*3117ece4Schristos /* 1 if successful parse, 0 otherwise */ 2676*3117ece4Schristos static int parse_params(const char** argptr, paramValues_t* pv) { 2677*3117ece4Schristos int matched = 0; 2678*3117ece4Schristos const char* argOrig = *argptr; 2679*3117ece4Schristos varInds_t v; 2680*3117ece4Schristos for(v = 0; v < NUM_PARAMS; v++) { 2681*3117ece4Schristos if ( longCommandWArg(argptr,g_shortParamNames[v]) 2682*3117ece4Schristos || longCommandWArg(argptr, g_paramNames[v]) ) { 2683*3117ece4Schristos if(**argptr == '=') { 2684*3117ece4Schristos (*argptr)++; 2685*3117ece4Schristos pv->vals[v] = readU32FromChar(argptr); 2686*3117ece4Schristos matched = 1; 2687*3117ece4Schristos break; 2688*3117ece4Schristos } 2689*3117ece4Schristos } 2690*3117ece4Schristos /* reset and try again */ 2691*3117ece4Schristos *argptr = argOrig; 2692*3117ece4Schristos } 2693*3117ece4Schristos return matched; 2694*3117ece4Schristos } 2695*3117ece4Schristos 2696*3117ece4Schristos /*-************************************ 2697*3117ece4Schristos * Main 2698*3117ece4Schristos **************************************/ 2699*3117ece4Schristos 2700*3117ece4Schristos int main(int argc, const char** argv) 2701*3117ece4Schristos { 2702*3117ece4Schristos int i, 2703*3117ece4Schristos filenamesStart=0, 2704*3117ece4Schristos result; 2705*3117ece4Schristos const char* exename=argv[0]; 2706*3117ece4Schristos const char* input_filename = NULL; 2707*3117ece4Schristos const char* dictFileName = NULL; 2708*3117ece4Schristos U32 main_pause = 0; 2709*3117ece4Schristos int cLevelOpt = 0, cLevelRun = 0; 2710*3117ece4Schristos int separateFiles = 0; 2711*3117ece4Schristos double compressibility = COMPRESSIBILITY_DEFAULT; 2712*3117ece4Schristos U32 memoTableLog = PARAM_UNSET; 2713*3117ece4Schristos constraint_t target = { 0, 0, (U32)-1 }; 2714*3117ece4Schristos 2715*3117ece4Schristos paramValues_t paramTarget = emptyParams(); 2716*3117ece4Schristos g_params = emptyParams(); 2717*3117ece4Schristos 2718*3117ece4Schristos assert(argc>=1); /* for exename */ 2719*3117ece4Schristos 2720*3117ece4Schristos for(i=1; i<argc; i++) { 2721*3117ece4Schristos const char* argument = argv[i]; 2722*3117ece4Schristos DEBUGOUTPUT("%d: %s\n", i, argument); 2723*3117ece4Schristos assert(argument != NULL); 2724*3117ece4Schristos 2725*3117ece4Schristos if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; } 2726*3117ece4Schristos 2727*3117ece4Schristos if (longCommandWArg(&argument, "--optimize=")) { 2728*3117ece4Schristos g_optimizer = 1; 2729*3117ece4Schristos for ( ; ;) { 2730*3117ece4Schristos if(parse_params(&argument, ¶mTarget)) { if(argument[0] == ',') { argument++; continue; } else break; } 2731*3117ece4Schristos PARSE_SUB_ARGS("compressionSpeed=" , "cSpeed=", target.cSpeed); 2732*3117ece4Schristos PARSE_SUB_ARGS("decompressionSpeed=", "dSpeed=", target.dSpeed); 2733*3117ece4Schristos PARSE_SUB_ARGS("compressionMemory=" , "cMem=", target.cMem); 2734*3117ece4Schristos PARSE_SUB_ARGS("strict=", "stc=", g_strictness); 2735*3117ece4Schristos PARSE_SUB_ARGS("maxTries=", "tries=", g_maxTries); 2736*3117ece4Schristos PARSE_SUB_ARGS("memoLimitLog=", "memLog=", memoTableLog); 2737*3117ece4Schristos if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = (int)readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; } 2738*3117ece4Schristos if (longCommandWArg(&argument, "speedForRatio=") || longCommandWArg(&argument, "speedRatio=")) { g_ratioMultiplier = readDoubleFromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } 2739*3117ece4Schristos 2740*3117ece4Schristos DISPLAY("invalid optimization parameter \n"); 2741*3117ece4Schristos return 1; 2742*3117ece4Schristos } 2743*3117ece4Schristos 2744*3117ece4Schristos if (argument[0] != 0) { 2745*3117ece4Schristos DISPLAY("invalid --optimize= format\n"); 2746*3117ece4Schristos return 1; /* check the end of string */ 2747*3117ece4Schristos } 2748*3117ece4Schristos continue; 2749*3117ece4Schristos } else if (longCommandWArg(&argument, "--zstd=")) { 2750*3117ece4Schristos /* Decode command (note : aggregated commands are allowed) */ 2751*3117ece4Schristos g_singleRun = 1; 2752*3117ece4Schristos for ( ; ;) { 2753*3117ece4Schristos if(parse_params(&argument, &g_params)) { if(argument[0] == ',') { argument++; continue; } else break; } 2754*3117ece4Schristos if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelRun = (int)readU32FromChar(&argument); g_params = emptyParams(); if (argument[0]==',') { argument++; continue; } else break; } 2755*3117ece4Schristos 2756*3117ece4Schristos DISPLAY("invalid compression parameter \n"); 2757*3117ece4Schristos return 1; 2758*3117ece4Schristos } 2759*3117ece4Schristos 2760*3117ece4Schristos if (argument[0] != 0) { 2761*3117ece4Schristos DISPLAY("invalid --zstd= format\n"); 2762*3117ece4Schristos return 1; /* check the end of string */ 2763*3117ece4Schristos } 2764*3117ece4Schristos continue; 2765*3117ece4Schristos /* if not return, success */ 2766*3117ece4Schristos 2767*3117ece4Schristos } else if (longCommandWArg(&argument, "--display=")) { 2768*3117ece4Schristos /* Decode command (note : aggregated commands are allowed) */ 2769*3117ece4Schristos memset(g_silenceParams, 1, sizeof(g_silenceParams)); 2770*3117ece4Schristos for ( ; ;) { 2771*3117ece4Schristos int found = 0; 2772*3117ece4Schristos varInds_t v; 2773*3117ece4Schristos for(v = 0; v < NUM_PARAMS; v++) { 2774*3117ece4Schristos if(longCommandWArg(&argument, g_shortParamNames[v]) || longCommandWArg(&argument, g_paramNames[v])) { 2775*3117ece4Schristos g_silenceParams[v] = 0; 2776*3117ece4Schristos found = 1; 2777*3117ece4Schristos } 2778*3117ece4Schristos } 2779*3117ece4Schristos if(longCommandWArg(&argument, "compressionParameters") || longCommandWArg(&argument, "cParams")) { 2780*3117ece4Schristos for(v = 0; v <= strt_ind; v++) { 2781*3117ece4Schristos g_silenceParams[v] = 0; 2782*3117ece4Schristos } 2783*3117ece4Schristos found = 1; 2784*3117ece4Schristos } 2785*3117ece4Schristos 2786*3117ece4Schristos 2787*3117ece4Schristos if(found) { 2788*3117ece4Schristos if(argument[0]==',') { 2789*3117ece4Schristos continue; 2790*3117ece4Schristos } else { 2791*3117ece4Schristos break; 2792*3117ece4Schristos } 2793*3117ece4Schristos } 2794*3117ece4Schristos DISPLAY("invalid parameter name parameter \n"); 2795*3117ece4Schristos return 1; 2796*3117ece4Schristos } 2797*3117ece4Schristos 2798*3117ece4Schristos if (argument[0] != 0) { 2799*3117ece4Schristos DISPLAY("invalid --display format\n"); 2800*3117ece4Schristos return 1; /* check the end of string */ 2801*3117ece4Schristos } 2802*3117ece4Schristos continue; 2803*3117ece4Schristos } else if (argument[0]=='-') { 2804*3117ece4Schristos argument++; 2805*3117ece4Schristos 2806*3117ece4Schristos while (argument[0]!=0) { 2807*3117ece4Schristos 2808*3117ece4Schristos switch(argument[0]) 2809*3117ece4Schristos { 2810*3117ece4Schristos /* Display help on usage */ 2811*3117ece4Schristos case 'h' : 2812*3117ece4Schristos case 'H': usage(exename); usage_advanced(); return 0; 2813*3117ece4Schristos 2814*3117ece4Schristos /* Pause at the end (hidden option) */ 2815*3117ece4Schristos case 'p': main_pause = 1; argument++; break; 2816*3117ece4Schristos 2817*3117ece4Schristos /* Sample compressibility (when no file provided) */ 2818*3117ece4Schristos case 'P': 2819*3117ece4Schristos argument++; 2820*3117ece4Schristos { U32 const proba32 = readU32FromChar(&argument); 2821*3117ece4Schristos compressibility = (double)proba32 / 100.; 2822*3117ece4Schristos } 2823*3117ece4Schristos break; 2824*3117ece4Schristos 2825*3117ece4Schristos /* Run Single conf */ 2826*3117ece4Schristos case 'S': 2827*3117ece4Schristos g_singleRun = 1; 2828*3117ece4Schristos argument++; 2829*3117ece4Schristos for ( ; ; ) { 2830*3117ece4Schristos switch(*argument) 2831*3117ece4Schristos { 2832*3117ece4Schristos case 'w': 2833*3117ece4Schristos argument++; 2834*3117ece4Schristos g_params.vals[wlog_ind] = readU32FromChar(&argument); 2835*3117ece4Schristos continue; 2836*3117ece4Schristos case 'c': 2837*3117ece4Schristos argument++; 2838*3117ece4Schristos g_params.vals[clog_ind] = readU32FromChar(&argument); 2839*3117ece4Schristos continue; 2840*3117ece4Schristos case 'h': 2841*3117ece4Schristos argument++; 2842*3117ece4Schristos g_params.vals[hlog_ind] = readU32FromChar(&argument); 2843*3117ece4Schristos continue; 2844*3117ece4Schristos case 's': 2845*3117ece4Schristos argument++; 2846*3117ece4Schristos g_params.vals[slog_ind] = readU32FromChar(&argument); 2847*3117ece4Schristos continue; 2848*3117ece4Schristos case 'l': /* search length */ 2849*3117ece4Schristos argument++; 2850*3117ece4Schristos g_params.vals[mml_ind] = readU32FromChar(&argument); 2851*3117ece4Schristos continue; 2852*3117ece4Schristos case 't': /* target length */ 2853*3117ece4Schristos argument++; 2854*3117ece4Schristos g_params.vals[tlen_ind] = readU32FromChar(&argument); 2855*3117ece4Schristos continue; 2856*3117ece4Schristos case 'S': /* strategy */ 2857*3117ece4Schristos argument++; 2858*3117ece4Schristos g_params.vals[strt_ind] = readU32FromChar(&argument); 2859*3117ece4Schristos continue; 2860*3117ece4Schristos case 'f': /* forceAttachDict */ 2861*3117ece4Schristos argument++; 2862*3117ece4Schristos g_params.vals[fadt_ind] = readU32FromChar(&argument); 2863*3117ece4Schristos continue; 2864*3117ece4Schristos case 'L': 2865*3117ece4Schristos { argument++; 2866*3117ece4Schristos cLevelRun = (int)readU32FromChar(&argument); 2867*3117ece4Schristos g_params = emptyParams(); 2868*3117ece4Schristos continue; 2869*3117ece4Schristos } 2870*3117ece4Schristos default : ; 2871*3117ece4Schristos } 2872*3117ece4Schristos break; 2873*3117ece4Schristos } 2874*3117ece4Schristos 2875*3117ece4Schristos break; 2876*3117ece4Schristos 2877*3117ece4Schristos /* target level1 speed objective, in MB/s */ 2878*3117ece4Schristos case 'T': 2879*3117ece4Schristos argument++; 2880*3117ece4Schristos g_target = readU32FromChar(&argument); 2881*3117ece4Schristos break; 2882*3117ece4Schristos 2883*3117ece4Schristos /* cut input into blocks */ 2884*3117ece4Schristos case 'B': 2885*3117ece4Schristos argument++; 2886*3117ece4Schristos g_blockSize = readU32FromChar(&argument); 2887*3117ece4Schristos DISPLAY("using %u KB block size \n", (unsigned)(g_blockSize>>10)); 2888*3117ece4Schristos break; 2889*3117ece4Schristos 2890*3117ece4Schristos /* caps runtime (in seconds) */ 2891*3117ece4Schristos case 't': 2892*3117ece4Schristos argument++; 2893*3117ece4Schristos g_timeLimit_s = readU32FromChar(&argument); 2894*3117ece4Schristos break; 2895*3117ece4Schristos 2896*3117ece4Schristos case 's': 2897*3117ece4Schristos argument++; 2898*3117ece4Schristos separateFiles = 1; 2899*3117ece4Schristos break; 2900*3117ece4Schristos 2901*3117ece4Schristos case 'q': 2902*3117ece4Schristos while (argument[0] == 'q') { argument++; g_displayLevel--; } 2903*3117ece4Schristos break; 2904*3117ece4Schristos 2905*3117ece4Schristos case 'v': 2906*3117ece4Schristos while (argument[0] == 'v') { argument++; g_displayLevel++; } 2907*3117ece4Schristos break; 2908*3117ece4Schristos 2909*3117ece4Schristos /* load dictionary file (only applicable for optimizer rn) */ 2910*3117ece4Schristos case 'D': 2911*3117ece4Schristos if(i == argc - 1) { /* last argument, return error. */ 2912*3117ece4Schristos DISPLAY("Dictionary file expected but not given : %d\n", i); 2913*3117ece4Schristos return 1; 2914*3117ece4Schristos } else { 2915*3117ece4Schristos i++; 2916*3117ece4Schristos dictFileName = argv[i]; 2917*3117ece4Schristos argument += strlen(argument); 2918*3117ece4Schristos } 2919*3117ece4Schristos break; 2920*3117ece4Schristos 2921*3117ece4Schristos /* Unknown command */ 2922*3117ece4Schristos default : return badusage(exename); 2923*3117ece4Schristos } 2924*3117ece4Schristos } 2925*3117ece4Schristos continue; 2926*3117ece4Schristos } /* if (argument[0]=='-') */ 2927*3117ece4Schristos 2928*3117ece4Schristos /* first provided filename is input */ 2929*3117ece4Schristos if (!input_filename) { input_filename=argument; filenamesStart=i; continue; } 2930*3117ece4Schristos } 2931*3117ece4Schristos 2932*3117ece4Schristos /* Welcome message */ 2933*3117ece4Schristos DISPLAYLEVEL(2, WELCOME_MESSAGE); 2934*3117ece4Schristos 2935*3117ece4Schristos if (filenamesStart==0) { 2936*3117ece4Schristos if (g_optimizer) { 2937*3117ece4Schristos DISPLAY("Optimizer Expects File\n"); 2938*3117ece4Schristos return 1; 2939*3117ece4Schristos } else { 2940*3117ece4Schristos result = benchSample(compressibility, cLevelRun); 2941*3117ece4Schristos } 2942*3117ece4Schristos } else { 2943*3117ece4Schristos if(separateFiles) { 2944*3117ece4Schristos for(i = 0; i < argc - filenamesStart; i++) { 2945*3117ece4Schristos if (g_optimizer) { 2946*3117ece4Schristos result = optimizeForSize(argv+filenamesStart + i, 1, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog); 2947*3117ece4Schristos if(result) { DISPLAY("Error on File %d", i); return result; } 2948*3117ece4Schristos } else { 2949*3117ece4Schristos result = benchFiles(argv+filenamesStart + i, 1, dictFileName, cLevelRun); 2950*3117ece4Schristos if(result) { DISPLAY("Error on File %d", i); return result; } 2951*3117ece4Schristos } 2952*3117ece4Schristos } 2953*3117ece4Schristos } else { 2954*3117ece4Schristos if (g_optimizer) { 2955*3117ece4Schristos assert(filenamesStart < argc); 2956*3117ece4Schristos result = optimizeForSize(argv+filenamesStart, (size_t)(argc-filenamesStart), dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog); 2957*3117ece4Schristos } else { 2958*3117ece4Schristos result = benchFiles(argv+filenamesStart, argc-filenamesStart, dictFileName, cLevelRun); 2959*3117ece4Schristos } 2960*3117ece4Schristos } 2961*3117ece4Schristos } 2962*3117ece4Schristos 2963*3117ece4Schristos if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; } 2964*3117ece4Schristos 2965*3117ece4Schristos return result; 2966*3117ece4Schristos } 2967