xref: /netbsd-src/external/bsd/zstd/dist/tests/paramgrill.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos 
12*3117ece4Schristos /*-************************************
13*3117ece4Schristos *  Dependencies
14*3117ece4Schristos **************************************/
15*3117ece4Schristos #include "util.h"      /* Ensure platform.h is compiled first; also : compiler options, UTIL_GetFileSize */
16*3117ece4Schristos #include <stdlib.h>    /* malloc */
17*3117ece4Schristos #include <stdio.h>     /* fprintf, fopen, ftello64 */
18*3117ece4Schristos #include <string.h>    /* strcmp */
19*3117ece4Schristos #include <math.h>      /* log */
20*3117ece4Schristos #include <assert.h>
21*3117ece4Schristos 
22*3117ece4Schristos #include "timefn.h"    /* SEC_TO_MICRO, UTIL_time_t, UTIL_clockSpanMicro, UTIL_clockSpanNano, UTIL_getTime */
23*3117ece4Schristos #include "mem.h"
24*3117ece4Schristos #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters, ZSTD_estimateCCtxSize */
25*3117ece4Schristos #include "zstd.h"
26*3117ece4Schristos #include "datagen.h"
27*3117ece4Schristos #include "xxhash.h"
28*3117ece4Schristos #include "benchfn.h"
29*3117ece4Schristos #include "benchzstd.h"
30*3117ece4Schristos #include "zstd_errors.h"
31*3117ece4Schristos #include "zstd_internal.h"     /* should not be needed */
32*3117ece4Schristos 
33*3117ece4Schristos 
34*3117ece4Schristos /*-************************************
35*3117ece4Schristos *  Constants
36*3117ece4Schristos **************************************/
37*3117ece4Schristos #define PROGRAM_DESCRIPTION "ZSTD parameters tester"
38*3117ece4Schristos #define AUTHOR "Yann Collet"
39*3117ece4Schristos #define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR
40*3117ece4Schristos 
41*3117ece4Schristos #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
42*3117ece4Schristos #define NB_LEVELS_TRACKED 22   /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */
43*3117ece4Schristos 
44*3117ece4Schristos static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
45*3117ece4Schristos 
46*3117ece4Schristos #define COMPRESSIBILITY_DEFAULT 0.50
47*3117ece4Schristos 
48*3117ece4Schristos static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO;
49*3117ece4Schristos static const int g_maxNbVariations = 64;
50*3117ece4Schristos 
51*3117ece4Schristos 
52*3117ece4Schristos /*-************************************
53*3117ece4Schristos *  Macros
54*3117ece4Schristos **************************************/
55*3117ece4Schristos #define DISPLAY(...)  fprintf(stderr, __VA_ARGS__)
56*3117ece4Schristos #define DISPLAYLEVEL(n, ...) if(g_displayLevel >= n) { fprintf(stderr, __VA_ARGS__); }
57*3117ece4Schristos #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
58*3117ece4Schristos 
59*3117ece4Schristos #define TIMED 0
60*3117ece4Schristos #ifndef DEBUG
61*3117ece4Schristos #  define DEBUG 0
62*3117ece4Schristos #endif
63*3117ece4Schristos 
64*3117ece4Schristos #undef MIN
65*3117ece4Schristos #undef MAX
66*3117ece4Schristos #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
67*3117ece4Schristos #define MAX(a,b)   ( (a) > (b) ? (a) : (b) )
68*3117ece4Schristos #define CUSTOM_LEVEL 99
69*3117ece4Schristos #define BASE_CLEVEL 1
70*3117ece4Schristos 
71*3117ece4Schristos #define FADT_MIN 0
72*3117ece4Schristos #define FADT_MAX ((U32)-1)
73*3117ece4Schristos 
74*3117ece4Schristos #define WLOG_RANGE (ZSTD_WINDOWLOG_MAX - ZSTD_WINDOWLOG_MIN + 1)
75*3117ece4Schristos #define CLOG_RANGE (ZSTD_CHAINLOG_MAX - ZSTD_CHAINLOG_MIN + 1)
76*3117ece4Schristos #define HLOG_RANGE (ZSTD_HASHLOG_MAX - ZSTD_HASHLOG_MIN + 1)
77*3117ece4Schristos #define SLOG_RANGE (ZSTD_SEARCHLOG_MAX - ZSTD_SEARCHLOG_MIN + 1)
78*3117ece4Schristos #define MML_RANGE  (ZSTD_MINMATCH_MAX - ZSTD_MINMATCH_MIN + 1)
79*3117ece4Schristos #define TLEN_RANGE  17
80*3117ece4Schristos #define STRT_RANGE (ZSTD_STRATEGY_MAX - ZSTD_STRATEGY_MIN + 1)
81*3117ece4Schristos #define FADT_RANGE   3
82*3117ece4Schristos 
83*3117ece4Schristos #define CHECKTIME(r) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); return r; } }
84*3117ece4Schristos #define CHECKTIMEGT(ret, val, _gototag) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); ret = val; goto _gototag; } }
85*3117ece4Schristos 
86*3117ece4Schristos #define PARAM_UNSET ((U32)-2) /* can't be -1 b/c fadt uses -1 */
87*3117ece4Schristos 
88*3117ece4Schristos static const char* g_stratName[ZSTD_STRATEGY_MAX+1] = {
89*3117ece4Schristos                 "(none)       ", "ZSTD_fast    ", "ZSTD_dfast   ",
90*3117ece4Schristos                 "ZSTD_greedy  ", "ZSTD_lazy    ", "ZSTD_lazy2   ",
91*3117ece4Schristos                 "ZSTD_btlazy2 ", "ZSTD_btopt   ", "ZSTD_btultra ",
92*3117ece4Schristos                 "ZSTD_btultra2"};
93*3117ece4Schristos 
94*3117ece4Schristos static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 };
95*3117ece4Schristos 
96*3117ece4Schristos 
97*3117ece4Schristos /*-************************************
98*3117ece4Schristos *  Setup for Adding new params
99*3117ece4Schristos **************************************/
100*3117ece4Schristos 
101*3117ece4Schristos /* indices for each of the variables */
102*3117ece4Schristos typedef enum {
103*3117ece4Schristos     wlog_ind = 0,
104*3117ece4Schristos     clog_ind = 1,
105*3117ece4Schristos     hlog_ind = 2,
106*3117ece4Schristos     slog_ind = 3,
107*3117ece4Schristos     mml_ind  = 4,
108*3117ece4Schristos     tlen_ind = 5,
109*3117ece4Schristos     strt_ind = 6,
110*3117ece4Schristos     fadt_ind = 7, /* forceAttachDict */
111*3117ece4Schristos     NUM_PARAMS = 8
112*3117ece4Schristos } varInds_t;
113*3117ece4Schristos 
114*3117ece4Schristos typedef struct {
115*3117ece4Schristos     U32 vals[NUM_PARAMS];
116*3117ece4Schristos } paramValues_t;
117*3117ece4Schristos 
118*3117ece4Schristos /* minimum value of parameters */
119*3117ece4Schristos static const U32 mintable[NUM_PARAMS] =
120*3117ece4Schristos         { ZSTD_WINDOWLOG_MIN, ZSTD_CHAINLOG_MIN, ZSTD_HASHLOG_MIN, ZSTD_SEARCHLOG_MIN, ZSTD_MINMATCH_MIN, ZSTD_TARGETLENGTH_MIN, ZSTD_STRATEGY_MIN, FADT_MIN };
121*3117ece4Schristos 
122*3117ece4Schristos /* maximum value of parameters */
123*3117ece4Schristos static const U32 maxtable[NUM_PARAMS] =
124*3117ece4Schristos         { ZSTD_WINDOWLOG_MAX, ZSTD_CHAINLOG_MAX, ZSTD_HASHLOG_MAX, ZSTD_SEARCHLOG_MAX, ZSTD_MINMATCH_MAX, ZSTD_TARGETLENGTH_MAX, ZSTD_STRATEGY_MAX, FADT_MAX };
125*3117ece4Schristos 
126*3117ece4Schristos /* # of values parameters can take on */
127*3117ece4Schristos static const U32 rangetable[NUM_PARAMS] =
128*3117ece4Schristos         { WLOG_RANGE, CLOG_RANGE, HLOG_RANGE, SLOG_RANGE, MML_RANGE, TLEN_RANGE, STRT_RANGE, FADT_RANGE };
129*3117ece4Schristos 
130*3117ece4Schristos /* ZSTD_cctxSetParameter() index to set */
131*3117ece4Schristos static const ZSTD_cParameter cctxSetParamTable[NUM_PARAMS] =
132*3117ece4Schristos         { ZSTD_c_windowLog, ZSTD_c_chainLog, ZSTD_c_hashLog, ZSTD_c_searchLog, ZSTD_c_minMatch, ZSTD_c_targetLength, ZSTD_c_strategy, ZSTD_c_forceAttachDict };
133*3117ece4Schristos 
134*3117ece4Schristos /* names of parameters */
135*3117ece4Schristos static const char* g_paramNames[NUM_PARAMS] =
136*3117ece4Schristos         { "windowLog", "chainLog", "hashLog","searchLog", "minMatch", "targetLength", "strategy", "forceAttachDict" };
137*3117ece4Schristos 
138*3117ece4Schristos /* shortened names of parameters */
139*3117ece4Schristos static const char* g_shortParamNames[NUM_PARAMS] =
140*3117ece4Schristos         { "wlog", "clog", "hlog", "slog", "mml", "tlen", "strat", "fadt" };
141*3117ece4Schristos 
142*3117ece4Schristos /* maps value from { 0 to rangetable[param] - 1 } to valid paramvalues */
143*3117ece4Schristos static U32 rangeMap(varInds_t param, int ind)
144*3117ece4Schristos {
145*3117ece4Schristos     U32 const uind = (U32)MAX(MIN(ind, (int)rangetable[param] - 1), 0);
146*3117ece4Schristos     switch(param) {
147*3117ece4Schristos         case wlog_ind: /* using default: triggers -Wswitch-enum */
148*3117ece4Schristos         case clog_ind:
149*3117ece4Schristos         case hlog_ind:
150*3117ece4Schristos         case slog_ind:
151*3117ece4Schristos         case mml_ind:
152*3117ece4Schristos         case strt_ind:
153*3117ece4Schristos             return mintable[param] + uind;
154*3117ece4Schristos         case tlen_ind:
155*3117ece4Schristos             return tlen_table[uind];
156*3117ece4Schristos         case fadt_ind: /* 0, 1, 2 -> -1, 0, 1 */
157*3117ece4Schristos             return uind - 1;
158*3117ece4Schristos         case NUM_PARAMS:
159*3117ece4Schristos         default:;
160*3117ece4Schristos     }
161*3117ece4Schristos     DISPLAY("Error, not a valid param\n ");
162*3117ece4Schristos     assert(0);
163*3117ece4Schristos     return (U32)-1;
164*3117ece4Schristos }
165*3117ece4Schristos 
166*3117ece4Schristos /* inverse of rangeMap */
167*3117ece4Schristos static int invRangeMap(varInds_t param, U32 value)
168*3117ece4Schristos {
169*3117ece4Schristos     value = MIN(MAX(mintable[param], value), maxtable[param]);
170*3117ece4Schristos     switch(param) {
171*3117ece4Schristos         case wlog_ind:
172*3117ece4Schristos         case clog_ind:
173*3117ece4Schristos         case hlog_ind:
174*3117ece4Schristos         case slog_ind:
175*3117ece4Schristos         case mml_ind:
176*3117ece4Schristos         case strt_ind:
177*3117ece4Schristos             return (int)(value - mintable[param]);
178*3117ece4Schristos         case tlen_ind: /* bin search */
179*3117ece4Schristos         {
180*3117ece4Schristos             int lo = 0;
181*3117ece4Schristos             int hi = TLEN_RANGE;
182*3117ece4Schristos             while(lo < hi) {
183*3117ece4Schristos                 int mid = (lo + hi) / 2;
184*3117ece4Schristos                 if(tlen_table[mid] < value) {
185*3117ece4Schristos                     lo = mid + 1;
186*3117ece4Schristos                 } if(tlen_table[mid] == value) {
187*3117ece4Schristos                     return mid;
188*3117ece4Schristos                 } else {
189*3117ece4Schristos                     hi = mid;
190*3117ece4Schristos                 }
191*3117ece4Schristos             }
192*3117ece4Schristos             return lo;
193*3117ece4Schristos         }
194*3117ece4Schristos         case fadt_ind:
195*3117ece4Schristos             return (int)value + 1;
196*3117ece4Schristos         case NUM_PARAMS:
197*3117ece4Schristos         default:;
198*3117ece4Schristos     }
199*3117ece4Schristos     DISPLAY("Error, not a valid param\n ");
200*3117ece4Schristos     assert(0);
201*3117ece4Schristos     return -2;
202*3117ece4Schristos }
203*3117ece4Schristos 
204*3117ece4Schristos /* display of params */
205*3117ece4Schristos static void displayParamVal(FILE* f, varInds_t param, unsigned value, int width)
206*3117ece4Schristos {
207*3117ece4Schristos     switch(param) {
208*3117ece4Schristos         case wlog_ind:
209*3117ece4Schristos         case clog_ind:
210*3117ece4Schristos         case hlog_ind:
211*3117ece4Schristos         case slog_ind:
212*3117ece4Schristos         case mml_ind:
213*3117ece4Schristos         case tlen_ind:
214*3117ece4Schristos             if(width) {
215*3117ece4Schristos                 fprintf(f, "%*u", width, value);
216*3117ece4Schristos             } else {
217*3117ece4Schristos                 fprintf(f, "%u", value);
218*3117ece4Schristos             }
219*3117ece4Schristos             break;
220*3117ece4Schristos         case strt_ind:
221*3117ece4Schristos             if(width) {
222*3117ece4Schristos                 fprintf(f, "%*s", width, g_stratName[value]);
223*3117ece4Schristos             } else {
224*3117ece4Schristos                 fprintf(f, "%s", g_stratName[value]);
225*3117ece4Schristos             }
226*3117ece4Schristos             break;
227*3117ece4Schristos         case fadt_ind:   /* force attach dict */
228*3117ece4Schristos             if(width) {
229*3117ece4Schristos                 fprintf(f, "%*d", width, (int)value);
230*3117ece4Schristos             } else {
231*3117ece4Schristos                 fprintf(f, "%d", (int)value);
232*3117ece4Schristos             }
233*3117ece4Schristos             break;
234*3117ece4Schristos         case NUM_PARAMS:
235*3117ece4Schristos         default:
236*3117ece4Schristos             DISPLAY("Error, not a valid param\n ");
237*3117ece4Schristos             assert(0);
238*3117ece4Schristos             break;
239*3117ece4Schristos     }
240*3117ece4Schristos }
241*3117ece4Schristos 
242*3117ece4Schristos 
243*3117ece4Schristos /*-************************************
244*3117ece4Schristos *  Benchmark Parameters/Global Variables
245*3117ece4Schristos **************************************/
246*3117ece4Schristos 
247*3117ece4Schristos /* General Utility */
248*3117ece4Schristos static U32 g_timeLimit_s = 99999;   /* about 27 hours */
249*3117ece4Schristos static UTIL_time_t g_time; /* to be used to compare solution finding speeds to compare to original */
250*3117ece4Schristos static U32 g_blockSize = 0;
251*3117ece4Schristos static U32 g_rand = 1;
252*3117ece4Schristos 
253*3117ece4Schristos /* Display */
254*3117ece4Schristos static int g_displayLevel = 3;
255*3117ece4Schristos static BYTE g_silenceParams[NUM_PARAMS];   /* can selectively silence some params when displaying them */
256*3117ece4Schristos 
257*3117ece4Schristos /* Mode Selection */
258*3117ece4Schristos static U32 g_singleRun = 0;
259*3117ece4Schristos static U32 g_optimizer = 0;
260*3117ece4Schristos static int g_optmode = 0;
261*3117ece4Schristos 
262*3117ece4Schristos /* For cLevel Table generation */
263*3117ece4Schristos static U32 g_target = 0;
264*3117ece4Schristos static U32 g_noSeed = 0;
265*3117ece4Schristos 
266*3117ece4Schristos /* For optimizer */
267*3117ece4Schristos static paramValues_t g_params; /* Initialized at the beginning of main w/ emptyParams() function */
268*3117ece4Schristos static double g_ratioMultiplier = 5.;
269*3117ece4Schristos static U32 g_strictness = PARAM_UNSET; /* range 1 - 100, measure of how strict  */
270*3117ece4Schristos static BMK_benchResult_t g_lvltarget;
271*3117ece4Schristos 
272*3117ece4Schristos typedef enum {
273*3117ece4Schristos     directMap,
274*3117ece4Schristos     xxhashMap,
275*3117ece4Schristos     noMemo
276*3117ece4Schristos } memoTableType_t;
277*3117ece4Schristos 
278*3117ece4Schristos typedef struct {
279*3117ece4Schristos     memoTableType_t tableType;
280*3117ece4Schristos     BYTE* table;
281*3117ece4Schristos     size_t tableLen;
282*3117ece4Schristos     varInds_t varArray[NUM_PARAMS];
283*3117ece4Schristos     size_t varLen;
284*3117ece4Schristos } memoTable_t;
285*3117ece4Schristos 
286*3117ece4Schristos typedef struct {
287*3117ece4Schristos     BMK_benchResult_t result;
288*3117ece4Schristos     paramValues_t params;
289*3117ece4Schristos } winnerInfo_t;
290*3117ece4Schristos 
291*3117ece4Schristos typedef struct {
292*3117ece4Schristos     U32 cSpeed;  /* bytes / sec */
293*3117ece4Schristos     U32 dSpeed;
294*3117ece4Schristos     U32 cMem;    /* bytes */
295*3117ece4Schristos } constraint_t;
296*3117ece4Schristos 
297*3117ece4Schristos typedef struct winner_ll_node winner_ll_node;
298*3117ece4Schristos struct winner_ll_node {
299*3117ece4Schristos     winnerInfo_t res;
300*3117ece4Schristos     winner_ll_node* next;
301*3117ece4Schristos };
302*3117ece4Schristos 
303*3117ece4Schristos static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSpeed */
304*3117ece4Schristos 
305*3117ece4Schristos /*
306*3117ece4Schristos  * Additional Global Variables (Defined Above Use)
307*3117ece4Schristos  * g_level_constraint
308*3117ece4Schristos  * g_alreadyTested
309*3117ece4Schristos  * g_maxTries
310*3117ece4Schristos  * g_clockGranularity
311*3117ece4Schristos  */
312*3117ece4Schristos 
313*3117ece4Schristos 
314*3117ece4Schristos /*-*******************************************************
315*3117ece4Schristos *  General Util Functions
316*3117ece4Schristos *********************************************************/
317*3117ece4Schristos 
318*3117ece4Schristos /* nullified useless params, to ensure count stats */
319*3117ece4Schristos /* cleans up params for memoizing / display */
320*3117ece4Schristos static paramValues_t sanitizeParams(paramValues_t params)
321*3117ece4Schristos {
322*3117ece4Schristos     if (params.vals[strt_ind] == ZSTD_fast)
323*3117ece4Schristos         params.vals[clog_ind] = 0, params.vals[slog_ind] = 0;
324*3117ece4Schristos     if (params.vals[strt_ind] == ZSTD_dfast)
325*3117ece4Schristos         params.vals[slog_ind] = 0;
326*3117ece4Schristos     if ( (params.vals[strt_ind] < ZSTD_btopt) && (params.vals[strt_ind] != ZSTD_fast) )
327*3117ece4Schristos         params.vals[tlen_ind] = 0;
328*3117ece4Schristos 
329*3117ece4Schristos     return params;
330*3117ece4Schristos }
331*3117ece4Schristos 
332*3117ece4Schristos static ZSTD_compressionParameters pvalsToCParams(paramValues_t p)
333*3117ece4Schristos {
334*3117ece4Schristos     ZSTD_compressionParameters c;
335*3117ece4Schristos     memset(&c, 0, sizeof(ZSTD_compressionParameters));
336*3117ece4Schristos     c.windowLog = p.vals[wlog_ind];
337*3117ece4Schristos     c.chainLog = p.vals[clog_ind];
338*3117ece4Schristos     c.hashLog = p.vals[hlog_ind];
339*3117ece4Schristos     c.searchLog = p.vals[slog_ind];
340*3117ece4Schristos     c.minMatch = p.vals[mml_ind];
341*3117ece4Schristos     c.targetLength = p.vals[tlen_ind];
342*3117ece4Schristos     c.strategy = p.vals[strt_ind];
343*3117ece4Schristos     /* no forceAttachDict */
344*3117ece4Schristos     return c;
345*3117ece4Schristos }
346*3117ece4Schristos 
347*3117ece4Schristos static paramValues_t cParamsToPVals(ZSTD_compressionParameters c)
348*3117ece4Schristos {
349*3117ece4Schristos     paramValues_t p;
350*3117ece4Schristos     varInds_t i;
351*3117ece4Schristos     p.vals[wlog_ind] = c.windowLog;
352*3117ece4Schristos     p.vals[clog_ind] = c.chainLog;
353*3117ece4Schristos     p.vals[hlog_ind] = c.hashLog;
354*3117ece4Schristos     p.vals[slog_ind] = c.searchLog;
355*3117ece4Schristos     p.vals[mml_ind]  = c.minMatch;
356*3117ece4Schristos     p.vals[tlen_ind] = c.targetLength;
357*3117ece4Schristos     p.vals[strt_ind] = c.strategy;
358*3117ece4Schristos 
359*3117ece4Schristos     /* set all other params to their minimum value */
360*3117ece4Schristos     for (i = strt_ind + 1; i < NUM_PARAMS; i++) {
361*3117ece4Schristos         p.vals[i] = mintable[i];
362*3117ece4Schristos     }
363*3117ece4Schristos     return p;
364*3117ece4Schristos }
365*3117ece4Schristos 
366*3117ece4Schristos /* equivalent of ZSTD_adjustCParams for paramValues_t */
367*3117ece4Schristos static paramValues_t
368*3117ece4Schristos adjustParams(paramValues_t p, const size_t maxBlockSize, const size_t dictSize)
369*3117ece4Schristos {
370*3117ece4Schristos     paramValues_t ot = p;
371*3117ece4Schristos     varInds_t i;
372*3117ece4Schristos     p = cParamsToPVals(ZSTD_adjustCParams(pvalsToCParams(p), maxBlockSize, dictSize));
373*3117ece4Schristos     if (!dictSize) { p.vals[fadt_ind] = 0; }
374*3117ece4Schristos     /* retain value of all other parameters */
375*3117ece4Schristos     for(i = strt_ind + 1; i < NUM_PARAMS; i++) {
376*3117ece4Schristos         p.vals[i] = ot.vals[i];
377*3117ece4Schristos     }
378*3117ece4Schristos     return p;
379*3117ece4Schristos }
380*3117ece4Schristos 
381*3117ece4Schristos static size_t BMK_findMaxMem(U64 requiredMem)
382*3117ece4Schristos {
383*3117ece4Schristos     size_t const step = 64 MB;
384*3117ece4Schristos     void* testmem = NULL;
385*3117ece4Schristos 
386*3117ece4Schristos     requiredMem = (((requiredMem >> 26) + 1) << 26);
387*3117ece4Schristos     if (requiredMem > maxMemory) requiredMem = maxMemory;
388*3117ece4Schristos 
389*3117ece4Schristos     requiredMem += 2 * step;
390*3117ece4Schristos     while (!testmem && requiredMem > 0) {
391*3117ece4Schristos         testmem = malloc ((size_t)requiredMem);
392*3117ece4Schristos         requiredMem -= step;
393*3117ece4Schristos     }
394*3117ece4Schristos 
395*3117ece4Schristos     free (testmem);
396*3117ece4Schristos     return (size_t) requiredMem;
397*3117ece4Schristos }
398*3117ece4Schristos 
399*3117ece4Schristos /* accuracy in seconds only, span can be multiple years */
400*3117ece4Schristos static U32 BMK_timeSpan_s(const UTIL_time_t tStart)
401*3117ece4Schristos {
402*3117ece4Schristos     return (U32)(UTIL_clockSpanMicro(tStart) / 1000000ULL);
403*3117ece4Schristos }
404*3117ece4Schristos 
405*3117ece4Schristos static U32 FUZ_rotl32(U32 x, U32 r)
406*3117ece4Schristos {
407*3117ece4Schristos     return ((x << r) | (x >> (32 - r)));
408*3117ece4Schristos }
409*3117ece4Schristos 
410*3117ece4Schristos static U32 FUZ_rand(U32* src)
411*3117ece4Schristos {
412*3117ece4Schristos     const U32 prime1 = 2654435761U;
413*3117ece4Schristos     const U32 prime2 = 2246822519U;
414*3117ece4Schristos     U32 rand32 = *src;
415*3117ece4Schristos     rand32 *= prime1;
416*3117ece4Schristos     rand32 += prime2;
417*3117ece4Schristos     rand32  = FUZ_rotl32(rand32, 13);
418*3117ece4Schristos     *src = rand32;
419*3117ece4Schristos     return rand32 >> 5;
420*3117ece4Schristos }
421*3117ece4Schristos 
422*3117ece4Schristos #define BOUNDCHECK(val,min,max) {                     \
423*3117ece4Schristos     if (((val)<(min)) | ((val)>(max))) {              \
424*3117ece4Schristos         DISPLAY("INVALID PARAMETER CONSTRAINTS\n");   \
425*3117ece4Schristos         return 0;                                     \
426*3117ece4Schristos }   }
427*3117ece4Schristos 
428*3117ece4Schristos static int paramValid(const paramValues_t paramTarget)
429*3117ece4Schristos {
430*3117ece4Schristos     U32 i;
431*3117ece4Schristos     for(i = 0; i < NUM_PARAMS; i++) {
432*3117ece4Schristos         BOUNDCHECK(paramTarget.vals[i], mintable[i], maxtable[i]);
433*3117ece4Schristos     }
434*3117ece4Schristos     return 1;
435*3117ece4Schristos }
436*3117ece4Schristos 
437*3117ece4Schristos /* cParamUnsetMin() :
438*3117ece4Schristos  * if any parameter in paramTarget is not yet set,
439*3117ece4Schristos  * it will receive its corresponding minimal value.
440*3117ece4Schristos  * This function never fails */
441*3117ece4Schristos static paramValues_t cParamUnsetMin(paramValues_t paramTarget)
442*3117ece4Schristos {
443*3117ece4Schristos     varInds_t vi;
444*3117ece4Schristos     for (vi = 0; vi < NUM_PARAMS; vi++) {
445*3117ece4Schristos         if (paramTarget.vals[vi] == PARAM_UNSET) {
446*3117ece4Schristos             paramTarget.vals[vi] = mintable[vi];
447*3117ece4Schristos         }
448*3117ece4Schristos     }
449*3117ece4Schristos     return paramTarget;
450*3117ece4Schristos }
451*3117ece4Schristos 
452*3117ece4Schristos static paramValues_t emptyParams(void)
453*3117ece4Schristos {
454*3117ece4Schristos     U32 i;
455*3117ece4Schristos     paramValues_t p;
456*3117ece4Schristos     for(i = 0; i < NUM_PARAMS; i++) {
457*3117ece4Schristos         p.vals[i] = PARAM_UNSET;
458*3117ece4Schristos     }
459*3117ece4Schristos     return p;
460*3117ece4Schristos }
461*3117ece4Schristos 
462*3117ece4Schristos static winnerInfo_t initWinnerInfo(const paramValues_t p)
463*3117ece4Schristos {
464*3117ece4Schristos     winnerInfo_t w1;
465*3117ece4Schristos     w1.result.cSpeed = 0;
466*3117ece4Schristos     w1.result.dSpeed = 0;
467*3117ece4Schristos     w1.result.cMem = (size_t)-1;
468*3117ece4Schristos     w1.result.cSize = (size_t)-1;
469*3117ece4Schristos     w1.params = p;
470*3117ece4Schristos     return w1;
471*3117ece4Schristos }
472*3117ece4Schristos 
473*3117ece4Schristos static paramValues_t
474*3117ece4Schristos overwriteParams(paramValues_t base, const paramValues_t mask)
475*3117ece4Schristos {
476*3117ece4Schristos     U32 i;
477*3117ece4Schristos     for(i = 0; i < NUM_PARAMS; i++) {
478*3117ece4Schristos         if(mask.vals[i] != PARAM_UNSET) {
479*3117ece4Schristos             base.vals[i] = mask.vals[i];
480*3117ece4Schristos         }
481*3117ece4Schristos     }
482*3117ece4Schristos     return base;
483*3117ece4Schristos }
484*3117ece4Schristos 
485*3117ece4Schristos static void
486*3117ece4Schristos paramVaryOnce(const varInds_t paramIndex, const int amt, paramValues_t* ptr)
487*3117ece4Schristos {
488*3117ece4Schristos     ptr->vals[paramIndex] = rangeMap(paramIndex,
489*3117ece4Schristos                                      invRangeMap(paramIndex, ptr->vals[paramIndex]) + amt);
490*3117ece4Schristos }
491*3117ece4Schristos 
492*3117ece4Schristos /* varies ptr by nbChanges respecting varyParams*/
493*3117ece4Schristos static void
494*3117ece4Schristos paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges)
495*3117ece4Schristos {
496*3117ece4Schristos     paramValues_t p;
497*3117ece4Schristos     int validated = 0;
498*3117ece4Schristos     while (!validated) {
499*3117ece4Schristos         U32 i;
500*3117ece4Schristos         p = *ptr;
501*3117ece4Schristos         for (i = 0 ; i < nbChanges ; i++) {
502*3117ece4Schristos             const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1);
503*3117ece4Schristos             paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1],
504*3117ece4Schristos                           (int)((changeID & 1) << 1) - 1,
505*3117ece4Schristos                           &p);
506*3117ece4Schristos         }
507*3117ece4Schristos         validated = paramValid(p);
508*3117ece4Schristos     }
509*3117ece4Schristos     *ptr = p;
510*3117ece4Schristos }
511*3117ece4Schristos 
512*3117ece4Schristos /* Completely random parameter selection */
513*3117ece4Schristos static paramValues_t randomParams(void)
514*3117ece4Schristos {
515*3117ece4Schristos     varInds_t v; paramValues_t p;
516*3117ece4Schristos     for(v = 0; v < NUM_PARAMS; v++) {
517*3117ece4Schristos         p.vals[v] = rangeMap(v, (int)(FUZ_rand(&g_rand) % rangetable[v]));
518*3117ece4Schristos     }
519*3117ece4Schristos     return p;
520*3117ece4Schristos }
521*3117ece4Schristos 
522*3117ece4Schristos static U64 g_clockGranularity = 100000000ULL;
523*3117ece4Schristos 
524*3117ece4Schristos static void init_clockGranularity(void)
525*3117ece4Schristos {
526*3117ece4Schristos     UTIL_time_t const clockStart = UTIL_getTime();
527*3117ece4Schristos     U64 el1 = 0, el2 = 0;
528*3117ece4Schristos     int i = 0;
529*3117ece4Schristos     do {
530*3117ece4Schristos         el1 = el2;
531*3117ece4Schristos         el2 = UTIL_clockSpanNano(clockStart);
532*3117ece4Schristos         if(el1 < el2) {
533*3117ece4Schristos             U64 iv = el2 - el1;
534*3117ece4Schristos             if(g_clockGranularity > iv) {
535*3117ece4Schristos                 g_clockGranularity = iv;
536*3117ece4Schristos                 i = 0;
537*3117ece4Schristos             } else {
538*3117ece4Schristos                 i++;
539*3117ece4Schristos             }
540*3117ece4Schristos         }
541*3117ece4Schristos     } while(i < 10);
542*3117ece4Schristos     DEBUGOUTPUT("Granularity: %llu\n", (unsigned long long)g_clockGranularity);
543*3117ece4Schristos }
544*3117ece4Schristos 
545*3117ece4Schristos /*-************************************
546*3117ece4Schristos *  Optimizer Util Functions
547*3117ece4Schristos **************************************/
548*3117ece4Schristos 
549*3117ece4Schristos /* checks results are feasible */
550*3117ece4Schristos static int feasible(const BMK_benchResult_t results, const constraint_t target) {
551*3117ece4Schristos     return (results.cSpeed >= target.cSpeed)
552*3117ece4Schristos         && (results.dSpeed >= target.dSpeed)
553*3117ece4Schristos         && (results.cMem <= target.cMem)
554*3117ece4Schristos         && (!g_optmode || results.cSize <= g_lvltarget.cSize);
555*3117ece4Schristos }
556*3117ece4Schristos 
557*3117ece4Schristos /* hill climbing value for part 1 */
558*3117ece4Schristos /* Scoring here is a linear reward for all set constraints normalized between 0 and 1
559*3117ece4Schristos  * (with 0 at 0 and 1 being fully fulfilling the constraint), summed with a logarithmic
560*3117ece4Schristos  * bonus to exceeding the constraint value. We also give linear ratio for compression ratio.
561*3117ece4Schristos  * The constant factors are experimental.
562*3117ece4Schristos  */
563*3117ece4Schristos static double
564*3117ece4Schristos resultScore(const BMK_benchResult_t res, const size_t srcSize, const constraint_t target)
565*3117ece4Schristos {
566*3117ece4Schristos     double cs = 0., ds = 0., rt, cm = 0.;
567*3117ece4Schristos     const double r1 = 1, r2 = 0.1, rtr = 0.5;
568*3117ece4Schristos     double ret;
569*3117ece4Schristos     if(target.cSpeed) { cs = (double)res.cSpeed / (double)target.cSpeed; }
570*3117ece4Schristos     if(target.dSpeed) { ds = (double)res.dSpeed / (double)target.dSpeed; }
571*3117ece4Schristos     if(target.cMem != (U32)-1) { cm = (double)target.cMem / (double)res.cMem; }
572*3117ece4Schristos     rt = ((double)srcSize / (double)res.cSize);
573*3117ece4Schristos 
574*3117ece4Schristos     ret = (MIN(1, cs) + MIN(1, ds)  + MIN(1, cm))*r1 + rt * rtr +
575*3117ece4Schristos          (MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2;
576*3117ece4Schristos 
577*3117ece4Schristos     return ret;
578*3117ece4Schristos }
579*3117ece4Schristos 
580*3117ece4Schristos /* calculates normalized squared euclidean distance of result1 if it is in the first quadrant relative to lvlRes */
581*3117ece4Schristos static double
582*3117ece4Schristos resultDistLvl(const BMK_benchResult_t result1, const BMK_benchResult_t lvlRes)
583*3117ece4Schristos {
584*3117ece4Schristos     double normalizedCSpeedGain1 = ((double)result1.cSpeed / (double)lvlRes.cSpeed) - 1;
585*3117ece4Schristos     double normalizedRatioGain1 = ((double)lvlRes.cSize / (double)result1.cSize) - 1;
586*3117ece4Schristos     if(normalizedRatioGain1 < 0 || normalizedCSpeedGain1 < 0) {
587*3117ece4Schristos         return 0.0;
588*3117ece4Schristos     }
589*3117ece4Schristos     return normalizedRatioGain1 * g_ratioMultiplier + normalizedCSpeedGain1;
590*3117ece4Schristos }
591*3117ece4Schristos 
592*3117ece4Schristos /* return true if r2 strictly better than r1 */
593*3117ece4Schristos static int
594*3117ece4Schristos compareResultLT(const BMK_benchResult_t result1, const BMK_benchResult_t result2, const constraint_t target, size_t srcSize)
595*3117ece4Schristos {
596*3117ece4Schristos     if(feasible(result1, target) && feasible(result2, target)) {
597*3117ece4Schristos         if(g_optmode) {
598*3117ece4Schristos             return resultDistLvl(result1, g_lvltarget) < resultDistLvl(result2, g_lvltarget);
599*3117ece4Schristos         } else {
600*3117ece4Schristos             return (result1.cSize > result2.cSize)
601*3117ece4Schristos                 || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed)
602*3117ece4Schristos                 || (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed);
603*3117ece4Schristos         }
604*3117ece4Schristos     }
605*3117ece4Schristos     return feasible(result2, target)
606*3117ece4Schristos         || (!feasible(result1, target)
607*3117ece4Schristos             && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target)));
608*3117ece4Schristos }
609*3117ece4Schristos 
610*3117ece4Schristos static constraint_t relaxTarget(constraint_t target) {
611*3117ece4Schristos     target.cMem = (U32)-1;
612*3117ece4Schristos     target.cSpeed = (target.cSpeed * g_strictness) / 100;
613*3117ece4Schristos     target.dSpeed = (target.dSpeed * g_strictness) / 100;
614*3117ece4Schristos     return target;
615*3117ece4Schristos }
616*3117ece4Schristos 
617*3117ece4Schristos static void optimizerAdjustInput(paramValues_t* pc, const size_t maxBlockSize)
618*3117ece4Schristos {
619*3117ece4Schristos     varInds_t v;
620*3117ece4Schristos     for(v = 0; v < NUM_PARAMS; v++) {
621*3117ece4Schristos         if(pc->vals[v] != PARAM_UNSET) {
622*3117ece4Schristos             U32 newval = MIN(MAX(pc->vals[v], mintable[v]), maxtable[v]);
623*3117ece4Schristos             if(newval != pc->vals[v]) {
624*3117ece4Schristos                 pc->vals[v] = newval;
625*3117ece4Schristos                 DISPLAY("Warning: parameter %s not in valid range, adjusting to ",
626*3117ece4Schristos                         g_paramNames[v]);
627*3117ece4Schristos                 displayParamVal(stderr, v, newval, 0); DISPLAY("\n");
628*3117ece4Schristos             }
629*3117ece4Schristos         }
630*3117ece4Schristos     }
631*3117ece4Schristos 
632*3117ece4Schristos     if(pc->vals[wlog_ind] != PARAM_UNSET) {
633*3117ece4Schristos 
634*3117ece4Schristos         U32 sshb = maxBlockSize > 1 ? ZSTD_highbit32((U32)(maxBlockSize-1)) + 1 : 1;
635*3117ece4Schristos         /* edge case of highBit not working for 0 */
636*3117ece4Schristos 
637*3117ece4Schristos         if(maxBlockSize < (1ULL << 31) && sshb + 1 < pc->vals[wlog_ind]) {
638*3117ece4Schristos             U32 adjust = MAX(mintable[wlog_ind], sshb);
639*3117ece4Schristos             if(adjust != pc->vals[wlog_ind]) {
640*3117ece4Schristos                 pc->vals[wlog_ind] = adjust;
641*3117ece4Schristos                 DISPLAY("Warning: windowLog larger than src/block size, adjusted to %u\n",
642*3117ece4Schristos                         (unsigned)pc->vals[wlog_ind]);
643*3117ece4Schristos             }
644*3117ece4Schristos         }
645*3117ece4Schristos     }
646*3117ece4Schristos 
647*3117ece4Schristos     if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) {
648*3117ece4Schristos         U32 maxclog;
649*3117ece4Schristos         if(pc->vals[strt_ind] == PARAM_UNSET || pc->vals[strt_ind] >= (U32)ZSTD_btlazy2) {
650*3117ece4Schristos             maxclog = pc->vals[wlog_ind] + 1;
651*3117ece4Schristos         } else {
652*3117ece4Schristos             maxclog = pc->vals[wlog_ind];
653*3117ece4Schristos         }
654*3117ece4Schristos 
655*3117ece4Schristos         if(pc->vals[clog_ind] > maxclog) {
656*3117ece4Schristos             pc->vals[clog_ind] = maxclog;
657*3117ece4Schristos             DISPLAY("Warning: chainlog too much larger than windowLog size, adjusted to %u\n",
658*3117ece4Schristos                     (unsigned)pc->vals[clog_ind]);
659*3117ece4Schristos         }
660*3117ece4Schristos     }
661*3117ece4Schristos 
662*3117ece4Schristos     if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[hlog_ind] != PARAM_UNSET) {
663*3117ece4Schristos         if(pc->vals[wlog_ind] + 1 < pc->vals[hlog_ind]) {
664*3117ece4Schristos             pc->vals[hlog_ind] = pc->vals[wlog_ind] + 1;
665*3117ece4Schristos             DISPLAY("Warning: hashlog too much larger than windowLog size, adjusted to %u\n",
666*3117ece4Schristos                     (unsigned)pc->vals[hlog_ind]);
667*3117ece4Schristos         }
668*3117ece4Schristos     }
669*3117ece4Schristos 
670*3117ece4Schristos     if(pc->vals[slog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) {
671*3117ece4Schristos         if(pc->vals[slog_ind] > pc->vals[clog_ind]) {
672*3117ece4Schristos             pc->vals[clog_ind] = pc->vals[slog_ind];
673*3117ece4Schristos             DISPLAY("Warning: searchLog larger than chainLog, adjusted to %u\n",
674*3117ece4Schristos                     (unsigned)pc->vals[slog_ind]);
675*3117ece4Schristos         }
676*3117ece4Schristos     }
677*3117ece4Schristos }
678*3117ece4Schristos 
679*3117ece4Schristos static int
680*3117ece4Schristos redundantParams(const paramValues_t paramValues, const constraint_t target, const size_t maxBlockSize)
681*3117ece4Schristos {
682*3117ece4Schristos     return
683*3117ece4Schristos        (ZSTD_estimateCStreamSize_usingCParams(pvalsToCParams(paramValues)) > (size_t)target.cMem) /* Uses too much memory */
684*3117ece4Schristos     || ((1ULL << (paramValues.vals[wlog_ind] - 1)) >= maxBlockSize && paramValues.vals[wlog_ind] != mintable[wlog_ind]) /* wlog too much bigger than src size */
685*3117ece4Schristos     || (paramValues.vals[clog_ind] > (paramValues.vals[wlog_ind] + (paramValues.vals[strt_ind] > ZSTD_btlazy2))) /* chainLog larger than windowLog*/
686*3117ece4Schristos     || (paramValues.vals[slog_ind] > paramValues.vals[clog_ind]) /* searchLog larger than chainLog */
687*3117ece4Schristos     || (paramValues.vals[hlog_ind] > paramValues.vals[wlog_ind] + 1); /* hashLog larger than windowLog + 1 */
688*3117ece4Schristos }
689*3117ece4Schristos 
690*3117ece4Schristos 
691*3117ece4Schristos /*-************************************
692*3117ece4Schristos *  Display Functions
693*3117ece4Schristos **************************************/
694*3117ece4Schristos 
695*3117ece4Schristos /* BMK_paramValues_into_commandLine() :
696*3117ece4Schristos  * transform a set of parameters paramValues_t
697*3117ece4Schristos  * into a command line compatible with `zstd` syntax
698*3117ece4Schristos  * and writes it into FILE* f.
699*3117ece4Schristos  * f must be already opened and writable */
700*3117ece4Schristos static void
701*3117ece4Schristos BMK_paramValues_into_commandLine(FILE* f, const paramValues_t params)
702*3117ece4Schristos {
703*3117ece4Schristos     varInds_t v;
704*3117ece4Schristos     int first = 1;
705*3117ece4Schristos     fprintf(f,"--zstd=");
706*3117ece4Schristos     for (v = 0; v < NUM_PARAMS; v++) {
707*3117ece4Schristos         if (g_silenceParams[v]) { continue; }
708*3117ece4Schristos         if (!first) { fprintf(f, ","); }
709*3117ece4Schristos         fprintf(f,"%s=", g_paramNames[v]);
710*3117ece4Schristos 
711*3117ece4Schristos         if (v == strt_ind) { fprintf(f,"%u", (unsigned)params.vals[v]); }
712*3117ece4Schristos         else { displayParamVal(f, v, params.vals[v], 0); }
713*3117ece4Schristos         first = 0;
714*3117ece4Schristos     }
715*3117ece4Schristos     fprintf(f, "\n");
716*3117ece4Schristos }
717*3117ece4Schristos 
718*3117ece4Schristos 
719*3117ece4Schristos /* comparison function: */
720*3117ece4Schristos /* strictly better, strictly worse, equal, speed-side adv, size-side adv */
721*3117ece4Schristos #define WORSE_RESULT 0
722*3117ece4Schristos #define BETTER_RESULT 1
723*3117ece4Schristos #define ERROR_RESULT 2
724*3117ece4Schristos 
725*3117ece4Schristos #define SPEED_RESULT 4
726*3117ece4Schristos #define SIZE_RESULT 5
727*3117ece4Schristos /* maybe have epsilon-eq to limit table size? */
728*3117ece4Schristos static int
729*3117ece4Schristos speedSizeCompare(const BMK_benchResult_t r1, const BMK_benchResult_t r2)
730*3117ece4Schristos {
731*3117ece4Schristos     if(r1.cSpeed < r2.cSpeed) {
732*3117ece4Schristos         if(r1.cSize >= r2.cSize) {
733*3117ece4Schristos             return BETTER_RESULT;
734*3117ece4Schristos         }
735*3117ece4Schristos         return SPEED_RESULT; /* r2 is smaller but not faster. */
736*3117ece4Schristos     } else {
737*3117ece4Schristos         if(r1.cSize <= r2.cSize) {
738*3117ece4Schristos             return WORSE_RESULT;
739*3117ece4Schristos         }
740*3117ece4Schristos         return SIZE_RESULT; /* r2 is faster but not smaller */
741*3117ece4Schristos     }
742*3117ece4Schristos }
743*3117ece4Schristos 
744*3117ece4Schristos /* 0 for insertion, 1 for no insert */
745*3117ece4Schristos /* maintain invariant speedSizeCompare(n, n->next) = SPEED_RESULT */
746*3117ece4Schristos static int
747*3117ece4Schristos insertWinner(const winnerInfo_t w, const constraint_t targetConstraints)
748*3117ece4Schristos {
749*3117ece4Schristos     BMK_benchResult_t r = w.result;
750*3117ece4Schristos     winner_ll_node* cur_node = g_winners;
751*3117ece4Schristos     /* first node to insert */
752*3117ece4Schristos     if(!feasible(r, targetConstraints)) {
753*3117ece4Schristos         return 1;
754*3117ece4Schristos     }
755*3117ece4Schristos 
756*3117ece4Schristos     if(g_winners == NULL) {
757*3117ece4Schristos         winner_ll_node* first_node = malloc(sizeof(winner_ll_node));
758*3117ece4Schristos         if(first_node == NULL) {
759*3117ece4Schristos             return 1;
760*3117ece4Schristos         }
761*3117ece4Schristos         first_node->next = NULL;
762*3117ece4Schristos         first_node->res = w;
763*3117ece4Schristos         g_winners = first_node;
764*3117ece4Schristos         return 0;
765*3117ece4Schristos     }
766*3117ece4Schristos 
767*3117ece4Schristos     while(cur_node->next != NULL) {
768*3117ece4Schristos         switch(speedSizeCompare(cur_node->res.result, r)) {
769*3117ece4Schristos             case WORSE_RESULT:
770*3117ece4Schristos             {
771*3117ece4Schristos                 return 1; /* never insert if better */
772*3117ece4Schristos             }
773*3117ece4Schristos             case BETTER_RESULT:
774*3117ece4Schristos             {
775*3117ece4Schristos                 winner_ll_node* tmp;
776*3117ece4Schristos                 cur_node->res = cur_node->next->res;
777*3117ece4Schristos                 tmp = cur_node->next;
778*3117ece4Schristos                 cur_node->next = cur_node->next->next;
779*3117ece4Schristos                 free(tmp);
780*3117ece4Schristos                 break;
781*3117ece4Schristos             }
782*3117ece4Schristos             case SIZE_RESULT:
783*3117ece4Schristos             {
784*3117ece4Schristos                 cur_node = cur_node->next;
785*3117ece4Schristos                 break;
786*3117ece4Schristos             }
787*3117ece4Schristos             case SPEED_RESULT: /* insert after first size result, then return */
788*3117ece4Schristos             {
789*3117ece4Schristos                 winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
790*3117ece4Schristos                 if(newnode == NULL) {
791*3117ece4Schristos                     return 1;
792*3117ece4Schristos                 }
793*3117ece4Schristos                 newnode->res = cur_node->res;
794*3117ece4Schristos                 cur_node->res = w;
795*3117ece4Schristos                 newnode->next = cur_node->next;
796*3117ece4Schristos                 cur_node->next = newnode;
797*3117ece4Schristos                 return 0;
798*3117ece4Schristos             }
799*3117ece4Schristos         }
800*3117ece4Schristos 
801*3117ece4Schristos     }
802*3117ece4Schristos 
803*3117ece4Schristos     assert(cur_node->next == NULL);
804*3117ece4Schristos     switch(speedSizeCompare(cur_node->res.result, r)) {
805*3117ece4Schristos         case WORSE_RESULT:
806*3117ece4Schristos         {
807*3117ece4Schristos             return 1; /* never insert if better */
808*3117ece4Schristos         }
809*3117ece4Schristos         case BETTER_RESULT:
810*3117ece4Schristos         {
811*3117ece4Schristos             cur_node->res = w;
812*3117ece4Schristos             return 0;
813*3117ece4Schristos         }
814*3117ece4Schristos         case SIZE_RESULT:
815*3117ece4Schristos         {
816*3117ece4Schristos             winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
817*3117ece4Schristos             if(newnode == NULL) {
818*3117ece4Schristos                 return 1;
819*3117ece4Schristos             }
820*3117ece4Schristos             newnode->res = w;
821*3117ece4Schristos             newnode->next = NULL;
822*3117ece4Schristos             cur_node->next = newnode;
823*3117ece4Schristos             return 0;
824*3117ece4Schristos         }
825*3117ece4Schristos         case SPEED_RESULT: /* insert before first size result, then return */
826*3117ece4Schristos         {
827*3117ece4Schristos             winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
828*3117ece4Schristos             if(newnode == NULL) {
829*3117ece4Schristos                 return 1;
830*3117ece4Schristos             }
831*3117ece4Schristos             newnode->res = cur_node->res;
832*3117ece4Schristos             cur_node->res = w;
833*3117ece4Schristos             newnode->next = cur_node->next;
834*3117ece4Schristos             cur_node->next = newnode;
835*3117ece4Schristos             return 0;
836*3117ece4Schristos         }
837*3117ece4Schristos         default:
838*3117ece4Schristos             return 1;
839*3117ece4Schristos     }
840*3117ece4Schristos }
841*3117ece4Schristos 
842*3117ece4Schristos static void
843*3117ece4Schristos BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize)
844*3117ece4Schristos {
845*3117ece4Schristos     varInds_t v;
846*3117ece4Schristos     int first = 1;
847*3117ece4Schristos     res.params = cParamUnsetMin(res.params);
848*3117ece4Schristos     fprintf(f, "    {");
849*3117ece4Schristos     for (v = 0; v < NUM_PARAMS; v++) {
850*3117ece4Schristos         if (g_silenceParams[v]) { continue; }
851*3117ece4Schristos         if (!first) { fprintf(f, ","); }
852*3117ece4Schristos         displayParamVal(f, v, res.params.vals[v], 3);
853*3117ece4Schristos         first = 0;
854*3117ece4Schristos     }
855*3117ece4Schristos 
856*3117ece4Schristos     {   double const ratio = res.result.cSize ?
857*3117ece4Schristos                             (double)srcSize / (double)res.result.cSize : 0;
858*3117ece4Schristos         double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT;
859*3117ece4Schristos         double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT;
860*3117ece4Schristos 
861*3117ece4Schristos         fprintf(f, " },     /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
862*3117ece4Schristos                             ratio, cSpeedMBps, dSpeedMBps);
863*3117ece4Schristos     }
864*3117ece4Schristos }
865*3117ece4Schristos 
866*3117ece4Schristos /* Writes to f the results of a parameter benchmark */
867*3117ece4Schristos /* when used with --optimize, will only print results better than previously discovered */
868*3117ece4Schristos static void
869*3117ece4Schristos BMK_printWinner(FILE* f, const int cLevel, const BMK_benchResult_t result, const paramValues_t params, const size_t srcSize)
870*3117ece4Schristos {
871*3117ece4Schristos     char lvlstr[15] = "Custom Level";
872*3117ece4Schristos     winnerInfo_t w;
873*3117ece4Schristos     w.params = params;
874*3117ece4Schristos     w.result = result;
875*3117ece4Schristos 
876*3117ece4Schristos     fprintf(f, "\r%79s\r", "");
877*3117ece4Schristos 
878*3117ece4Schristos     if(cLevel != CUSTOM_LEVEL) {
879*3117ece4Schristos         snprintf(lvlstr, 15, "  Level %2d  ", cLevel);
880*3117ece4Schristos     }
881*3117ece4Schristos 
882*3117ece4Schristos     if(TIMED) {
883*3117ece4Schristos         const U64 mn_in_ns = 60ULL * TIMELOOP_NANOSEC;
884*3117ece4Schristos         const U64 time_ns = UTIL_clockSpanNano(g_time);
885*3117ece4Schristos         const U64 minutes = time_ns / mn_in_ns;
886*3117ece4Schristos         fprintf(f, "%1lu:%2lu:%05.2f - ",
887*3117ece4Schristos                 (unsigned long) minutes / 60,
888*3117ece4Schristos                 (unsigned long) minutes % 60,
889*3117ece4Schristos                 (double)(time_ns - (minutes * mn_in_ns)) / TIMELOOP_NANOSEC );
890*3117ece4Schristos     }
891*3117ece4Schristos 
892*3117ece4Schristos     fprintf(f, "/* %s */   ", lvlstr);
893*3117ece4Schristos     BMK_displayOneResult(f, w, srcSize);
894*3117ece4Schristos }
895*3117ece4Schristos 
896*3117ece4Schristos static void
897*3117ece4Schristos BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_t result, const paramValues_t params, const constraint_t targetConstraints, const size_t srcSize)
898*3117ece4Schristos {
899*3117ece4Schristos     /* global winner used for constraints */
900*3117ece4Schristos                                     /* cSize, cSpeed, dSpeed, cMem */
901*3117ece4Schristos     static winnerInfo_t g_winner = { { (size_t)-1LL, 0, 0, (size_t)-1LL },
902*3117ece4Schristos                                      { { PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET } }
903*3117ece4Schristos                                    };
904*3117ece4Schristos     if ( DEBUG
905*3117ece4Schristos       || compareResultLT(g_winner.result, result, targetConstraints, srcSize)
906*3117ece4Schristos       || g_displayLevel >= 4) {
907*3117ece4Schristos         if ( DEBUG
908*3117ece4Schristos           && compareResultLT(g_winner.result, result, targetConstraints, srcSize)) {
909*3117ece4Schristos             DISPLAY("New Winner: \n");
910*3117ece4Schristos         }
911*3117ece4Schristos 
912*3117ece4Schristos         if(g_displayLevel >= 2) {
913*3117ece4Schristos             BMK_printWinner(f, cLevel, result, params, srcSize);
914*3117ece4Schristos         }
915*3117ece4Schristos 
916*3117ece4Schristos         if(compareResultLT(g_winner.result, result, targetConstraints, srcSize)) {
917*3117ece4Schristos             if(g_displayLevel >= 1) { BMK_paramValues_into_commandLine(f, params); }
918*3117ece4Schristos             g_winner.result = result;
919*3117ece4Schristos             g_winner.params = params;
920*3117ece4Schristos         }
921*3117ece4Schristos     }
922*3117ece4Schristos 
923*3117ece4Schristos     if(g_optmode && g_optimizer && (DEBUG || g_displayLevel == 3)) {
924*3117ece4Schristos         winnerInfo_t w;
925*3117ece4Schristos         winner_ll_node* n;
926*3117ece4Schristos         w.result = result;
927*3117ece4Schristos         w.params = params;
928*3117ece4Schristos         insertWinner(w, targetConstraints);
929*3117ece4Schristos 
930*3117ece4Schristos         if(!DEBUG) { fprintf(f, "\033c"); }
931*3117ece4Schristos         fprintf(f, "\n");
932*3117ece4Schristos 
933*3117ece4Schristos         /* the table */
934*3117ece4Schristos         fprintf(f, "================================\n");
935*3117ece4Schristos         for(n = g_winners; n != NULL; n = n->next) {
936*3117ece4Schristos             BMK_displayOneResult(f, n->res, srcSize);
937*3117ece4Schristos         }
938*3117ece4Schristos         fprintf(f, "================================\n");
939*3117ece4Schristos         fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n",
940*3117ece4Schristos             (double)srcSize / (double)g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT);
941*3117ece4Schristos 
942*3117ece4Schristos 
943*3117ece4Schristos         fprintf(f, "Overall Winner: \n");
944*3117ece4Schristos         BMK_displayOneResult(f, g_winner, srcSize);
945*3117ece4Schristos         BMK_paramValues_into_commandLine(f, g_winner.params);
946*3117ece4Schristos 
947*3117ece4Schristos         fprintf(f, "Latest BMK: \n");\
948*3117ece4Schristos         BMK_displayOneResult(f, w, srcSize);
949*3117ece4Schristos     }
950*3117ece4Schristos }
951*3117ece4Schristos 
952*3117ece4Schristos 
953*3117ece4Schristos /* BMK_print_cLevelEntry() :
954*3117ece4Schristos  * Writes one cLevelTable entry, for one level.
955*3117ece4Schristos  * f must exist, be already opened, and be seekable.
956*3117ece4Schristos  * this function cannot error.
957*3117ece4Schristos  */
958*3117ece4Schristos static void
959*3117ece4Schristos BMK_print_cLevelEntry(FILE* f, const int cLevel,
960*3117ece4Schristos                       paramValues_t params,
961*3117ece4Schristos                       const BMK_benchResult_t result, const size_t srcSize)
962*3117ece4Schristos {
963*3117ece4Schristos     varInds_t v;
964*3117ece4Schristos     int first = 1;
965*3117ece4Schristos 
966*3117ece4Schristos     assert(cLevel >= 0);
967*3117ece4Schristos     assert(cLevel <= NB_LEVELS_TRACKED);
968*3117ece4Schristos     params = cParamUnsetMin(params);
969*3117ece4Schristos 
970*3117ece4Schristos     fprintf(f, "   {");
971*3117ece4Schristos     /* print cParams.
972*3117ece4Schristos      * assumption : all cParams are present and in order in the following range */
973*3117ece4Schristos     for (v = 0; v <= strt_ind; v++) {
974*3117ece4Schristos         if (!first) { fprintf(f, ","); }
975*3117ece4Schristos         displayParamVal(f, v, params.vals[v], 3);
976*3117ece4Schristos         first = 0;
977*3117ece4Schristos     }
978*3117ece4Schristos     /* print comment */
979*3117ece4Schristos     {   double const ratio = result.cSize ?
980*3117ece4Schristos                             (double)srcSize / (double)result.cSize : 0;
981*3117ece4Schristos         double const cSpeedMBps = (double)result.cSpeed / MB_UNIT;
982*3117ece4Schristos         double const dSpeedMBps = (double)result.dSpeed / MB_UNIT;
983*3117ece4Schristos 
984*3117ece4Schristos         fprintf(f, " },   /* level %2i:  R=%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
985*3117ece4Schristos                              cLevel, ratio, cSpeedMBps, dSpeedMBps);
986*3117ece4Schristos     }
987*3117ece4Schristos }
988*3117ece4Schristos 
989*3117ece4Schristos 
990*3117ece4Schristos /* BMK_print_cLevelTable() :
991*3117ece4Schristos  * print candidate compression table into proposed FILE* f.
992*3117ece4Schristos  * f must exist, be already opened, and be seekable.
993*3117ece4Schristos  * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized
994*3117ece4Schristos  * this function cannot error.
995*3117ece4Schristos  */
996*3117ece4Schristos static void
997*3117ece4Schristos BMK_print_cLevelTable(FILE* f, const winnerInfo_t* winners, const size_t srcSize)
998*3117ece4Schristos {
999*3117ece4Schristos     int cLevel;
1000*3117ece4Schristos 
1001*3117ece4Schristos     fprintf(f, "\n /* Proposed configurations : */ \n");
1002*3117ece4Schristos     fprintf(f, "   /* W,  C,  H,  S,  L,  T, strat */ \n");
1003*3117ece4Schristos 
1004*3117ece4Schristos     for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++)
1005*3117ece4Schristos         BMK_print_cLevelEntry(f,
1006*3117ece4Schristos                               cLevel, winners[cLevel].params,
1007*3117ece4Schristos                               winners[cLevel].result, srcSize);
1008*3117ece4Schristos }
1009*3117ece4Schristos 
1010*3117ece4Schristos 
1011*3117ece4Schristos /* BMK_saveAndPrint_cLevelTable() :
1012*3117ece4Schristos  * save candidate compression table into FILE* f,
1013*3117ece4Schristos  * and then to stdout.
1014*3117ece4Schristos  * f must exist, be already opened, and be seekable.
1015*3117ece4Schristos  * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized
1016*3117ece4Schristos  * this function cannot error.
1017*3117ece4Schristos  */
1018*3117ece4Schristos static void
1019*3117ece4Schristos BMK_saveAndPrint_cLevelTable(FILE* const f,
1020*3117ece4Schristos                        const winnerInfo_t* winners,
1021*3117ece4Schristos                        const size_t srcSize)
1022*3117ece4Schristos {
1023*3117ece4Schristos     fseek(f, 0, SEEK_SET);
1024*3117ece4Schristos     BMK_print_cLevelTable(f, winners, srcSize);
1025*3117ece4Schristos     fflush(f);
1026*3117ece4Schristos     BMK_print_cLevelTable(stdout, winners, srcSize);
1027*3117ece4Schristos }
1028*3117ece4Schristos 
1029*3117ece4Schristos 
1030*3117ece4Schristos /*-*******************************************************
1031*3117ece4Schristos *  Functions to Benchmark
1032*3117ece4Schristos *********************************************************/
1033*3117ece4Schristos 
1034*3117ece4Schristos typedef struct {
1035*3117ece4Schristos     ZSTD_CCtx* cctx;
1036*3117ece4Schristos     const void* dictBuffer;
1037*3117ece4Schristos     size_t dictBufferSize;
1038*3117ece4Schristos     int cLevel;
1039*3117ece4Schristos     const paramValues_t* comprParams;
1040*3117ece4Schristos } BMK_initCCtxArgs;
1041*3117ece4Schristos 
1042*3117ece4Schristos static size_t local_initCCtx(void* payload) {
1043*3117ece4Schristos     const BMK_initCCtxArgs* ag = (const BMK_initCCtxArgs*)payload;
1044*3117ece4Schristos     varInds_t i;
1045*3117ece4Schristos     ZSTD_CCtx_reset(ag->cctx, ZSTD_reset_session_and_parameters);
1046*3117ece4Schristos     ZSTD_CCtx_setParameter(ag->cctx, ZSTD_c_compressionLevel, ag->cLevel);
1047*3117ece4Schristos 
1048*3117ece4Schristos     for(i = 0; i < NUM_PARAMS; i++) {
1049*3117ece4Schristos         if(ag->comprParams->vals[i] != PARAM_UNSET)
1050*3117ece4Schristos         ZSTD_CCtx_setParameter(ag->cctx, cctxSetParamTable[i], ag->comprParams->vals[i]);
1051*3117ece4Schristos     }
1052*3117ece4Schristos     ZSTD_CCtx_loadDictionary(ag->cctx, ag->dictBuffer, ag->dictBufferSize);
1053*3117ece4Schristos 
1054*3117ece4Schristos     return 0;
1055*3117ece4Schristos }
1056*3117ece4Schristos 
1057*3117ece4Schristos typedef struct {
1058*3117ece4Schristos     ZSTD_DCtx* dctx;
1059*3117ece4Schristos     const void* dictBuffer;
1060*3117ece4Schristos     size_t dictBufferSize;
1061*3117ece4Schristos } BMK_initDCtxArgs;
1062*3117ece4Schristos 
1063*3117ece4Schristos static size_t local_initDCtx(void* payload) {
1064*3117ece4Schristos     const BMK_initDCtxArgs* ag = (const BMK_initDCtxArgs*)payload;
1065*3117ece4Schristos     ZSTD_DCtx_reset(ag->dctx, ZSTD_reset_session_and_parameters);
1066*3117ece4Schristos     ZSTD_DCtx_loadDictionary(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
1067*3117ece4Schristos     return 0;
1068*3117ece4Schristos }
1069*3117ece4Schristos 
1070*3117ece4Schristos /* additional argument is just the context */
1071*3117ece4Schristos static size_t local_defaultCompress(
1072*3117ece4Schristos                             const void* srcBuffer, size_t srcSize,
1073*3117ece4Schristos                             void* dstBuffer, size_t dstSize,
1074*3117ece4Schristos                             void* addArgs)
1075*3117ece4Schristos {
1076*3117ece4Schristos     ZSTD_CCtx* cctx = (ZSTD_CCtx*)addArgs;
1077*3117ece4Schristos     assert(dstSize == ZSTD_compressBound(srcSize)); /* specific to this version, which is only used in paramgrill */
1078*3117ece4Schristos     return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
1079*3117ece4Schristos }
1080*3117ece4Schristos 
1081*3117ece4Schristos /* additional argument is just the context */
1082*3117ece4Schristos static size_t local_defaultDecompress(
1083*3117ece4Schristos     const void* srcBuffer, size_t srcSize,
1084*3117ece4Schristos     void* dstBuffer, size_t dstSize,
1085*3117ece4Schristos     void* addArgs) {
1086*3117ece4Schristos     size_t moreToFlush = 1;
1087*3117ece4Schristos     ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs;
1088*3117ece4Schristos     ZSTD_inBuffer in;
1089*3117ece4Schristos     ZSTD_outBuffer out;
1090*3117ece4Schristos     in.src = srcBuffer;
1091*3117ece4Schristos     in.size = srcSize;
1092*3117ece4Schristos     in.pos = 0;
1093*3117ece4Schristos     out.dst = dstBuffer;
1094*3117ece4Schristos     out.size = dstSize;
1095*3117ece4Schristos     out.pos = 0;
1096*3117ece4Schristos     while (moreToFlush) {
1097*3117ece4Schristos         if(out.pos == out.size) {
1098*3117ece4Schristos             return (size_t)-ZSTD_error_dstSize_tooSmall;
1099*3117ece4Schristos         }
1100*3117ece4Schristos         moreToFlush = ZSTD_decompressStream(dctx,
1101*3117ece4Schristos                             &out, &in);
1102*3117ece4Schristos         if (ZSTD_isError(moreToFlush)) {
1103*3117ece4Schristos             return moreToFlush;
1104*3117ece4Schristos         }
1105*3117ece4Schristos     }
1106*3117ece4Schristos     return out.pos;
1107*3117ece4Schristos 
1108*3117ece4Schristos }
1109*3117ece4Schristos 
1110*3117ece4Schristos /*-************************************
1111*3117ece4Schristos *  Data Initialization Functions
1112*3117ece4Schristos **************************************/
1113*3117ece4Schristos 
1114*3117ece4Schristos typedef struct {
1115*3117ece4Schristos     void* srcBuffer;
1116*3117ece4Schristos     size_t srcSize;
1117*3117ece4Schristos     const void** srcPtrs;
1118*3117ece4Schristos     size_t* srcSizes;
1119*3117ece4Schristos     void** dstPtrs;
1120*3117ece4Schristos     size_t* dstCapacities;
1121*3117ece4Schristos     size_t* dstSizes;
1122*3117ece4Schristos     void** resPtrs;
1123*3117ece4Schristos     size_t* resSizes;
1124*3117ece4Schristos     size_t nbBlocks;
1125*3117ece4Schristos     size_t maxBlockSize;
1126*3117ece4Schristos } buffers_t;
1127*3117ece4Schristos 
1128*3117ece4Schristos typedef struct {
1129*3117ece4Schristos     size_t dictSize;
1130*3117ece4Schristos     void* dictBuffer;
1131*3117ece4Schristos     ZSTD_CCtx* cctx;
1132*3117ece4Schristos     ZSTD_DCtx* dctx;
1133*3117ece4Schristos } contexts_t;
1134*3117ece4Schristos 
1135*3117ece4Schristos static void freeNonSrcBuffers(const buffers_t b) {
1136*3117ece4Schristos     free((void*)b.srcPtrs);
1137*3117ece4Schristos     free(b.srcSizes);
1138*3117ece4Schristos 
1139*3117ece4Schristos     if(b.dstPtrs != NULL) {
1140*3117ece4Schristos         free(b.dstPtrs[0]);
1141*3117ece4Schristos     }
1142*3117ece4Schristos     free(b.dstPtrs);
1143*3117ece4Schristos     free(b.dstCapacities);
1144*3117ece4Schristos     free(b.dstSizes);
1145*3117ece4Schristos 
1146*3117ece4Schristos     if(b.resPtrs != NULL) {
1147*3117ece4Schristos         free(b.resPtrs[0]);
1148*3117ece4Schristos     }
1149*3117ece4Schristos     free(b.resPtrs);
1150*3117ece4Schristos     free(b.resSizes);
1151*3117ece4Schristos }
1152*3117ece4Schristos 
1153*3117ece4Schristos static void freeBuffers(const buffers_t b) {
1154*3117ece4Schristos     if(b.srcPtrs != NULL) {
1155*3117ece4Schristos         free(b.srcBuffer);
1156*3117ece4Schristos     }
1157*3117ece4Schristos     freeNonSrcBuffers(b);
1158*3117ece4Schristos }
1159*3117ece4Schristos 
1160*3117ece4Schristos /* srcBuffer will be freed by freeBuffers now */
1161*3117ece4Schristos static int createBuffersFromMemory(buffers_t* buff, void * srcBuffer, const size_t nbFiles,
1162*3117ece4Schristos     const size_t* fileSizes)
1163*3117ece4Schristos {
1164*3117ece4Schristos     size_t pos = 0, n, blockSize;
1165*3117ece4Schristos     U32 maxNbBlocks, blockNb = 0;
1166*3117ece4Schristos     buff->srcSize = 0;
1167*3117ece4Schristos     for(n = 0; n < nbFiles; n++) {
1168*3117ece4Schristos         buff->srcSize += fileSizes[n];
1169*3117ece4Schristos     }
1170*3117ece4Schristos 
1171*3117ece4Schristos     if(buff->srcSize == 0) {
1172*3117ece4Schristos         DISPLAY("No data to bench\n");
1173*3117ece4Schristos         return 1;
1174*3117ece4Schristos     }
1175*3117ece4Schristos 
1176*3117ece4Schristos     blockSize = g_blockSize ? g_blockSize : buff->srcSize;
1177*3117ece4Schristos     maxNbBlocks = (U32) ((buff->srcSize + (blockSize-1)) / blockSize) + (U32)nbFiles;
1178*3117ece4Schristos 
1179*3117ece4Schristos     buff->srcPtrs = (const void**)calloc(maxNbBlocks, sizeof(void*));
1180*3117ece4Schristos     buff->srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1181*3117ece4Schristos 
1182*3117ece4Schristos     buff->dstPtrs = (void**)calloc(maxNbBlocks, sizeof(void*));
1183*3117ece4Schristos     buff->dstCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1184*3117ece4Schristos     buff->dstSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1185*3117ece4Schristos 
1186*3117ece4Schristos     buff->resPtrs = (void**)calloc(maxNbBlocks, sizeof(void*));
1187*3117ece4Schristos     buff->resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1188*3117ece4Schristos 
1189*3117ece4Schristos     if(!buff->srcPtrs || !buff->srcSizes || !buff->dstPtrs || !buff->dstCapacities || !buff->dstSizes || !buff->resPtrs || !buff->resSizes) {
1190*3117ece4Schristos         DISPLAY("alloc error\n");
1191*3117ece4Schristos         freeNonSrcBuffers(*buff);
1192*3117ece4Schristos         return 1;
1193*3117ece4Schristos     }
1194*3117ece4Schristos 
1195*3117ece4Schristos     buff->srcBuffer = srcBuffer;
1196*3117ece4Schristos     buff->srcPtrs[0] = (const void*)buff->srcBuffer;
1197*3117ece4Schristos     buff->dstPtrs[0] = malloc(ZSTD_compressBound(buff->srcSize) + (maxNbBlocks * 1024));
1198*3117ece4Schristos     buff->resPtrs[0] = malloc(buff->srcSize);
1199*3117ece4Schristos 
1200*3117ece4Schristos     if(!buff->dstPtrs[0] || !buff->resPtrs[0]) {
1201*3117ece4Schristos         DISPLAY("alloc error\n");
1202*3117ece4Schristos         freeNonSrcBuffers(*buff);
1203*3117ece4Schristos         return 1;
1204*3117ece4Schristos     }
1205*3117ece4Schristos 
1206*3117ece4Schristos     for(n = 0; n < nbFiles; n++) {
1207*3117ece4Schristos         size_t pos_end = pos + fileSizes[n];
1208*3117ece4Schristos         for(; pos < pos_end; blockNb++) {
1209*3117ece4Schristos             buff->srcPtrs[blockNb] = (const void*)((char*)srcBuffer + pos);
1210*3117ece4Schristos             buff->srcSizes[blockNb] = blockSize;
1211*3117ece4Schristos             pos += blockSize;
1212*3117ece4Schristos         }
1213*3117ece4Schristos 
1214*3117ece4Schristos         if(fileSizes[n] > 0) { buff->srcSizes[blockNb - 1] = ((fileSizes[n] - 1) % blockSize) + 1; }
1215*3117ece4Schristos         pos = pos_end;
1216*3117ece4Schristos     }
1217*3117ece4Schristos 
1218*3117ece4Schristos     buff->dstCapacities[0] = ZSTD_compressBound(buff->srcSizes[0]);
1219*3117ece4Schristos     buff->dstSizes[0] = buff->dstCapacities[0];
1220*3117ece4Schristos     buff->resSizes[0] = buff->srcSizes[0];
1221*3117ece4Schristos     buff->maxBlockSize = buff->srcSizes[0];
1222*3117ece4Schristos 
1223*3117ece4Schristos     for(n = 1; n < blockNb; n++) {
1224*3117ece4Schristos         buff->dstPtrs[n] = ((char*)buff->dstPtrs[n-1]) + buff->dstCapacities[n-1];
1225*3117ece4Schristos         buff->resPtrs[n] = ((char*)buff->resPtrs[n-1]) + buff->resSizes[n-1];
1226*3117ece4Schristos         buff->dstCapacities[n] = ZSTD_compressBound(buff->srcSizes[n]);
1227*3117ece4Schristos         buff->dstSizes[n] = buff->dstCapacities[n];
1228*3117ece4Schristos         buff->resSizes[n] = buff->srcSizes[n];
1229*3117ece4Schristos 
1230*3117ece4Schristos         buff->maxBlockSize = MAX(buff->maxBlockSize, buff->srcSizes[n]);
1231*3117ece4Schristos     }
1232*3117ece4Schristos 
1233*3117ece4Schristos     buff->nbBlocks = blockNb;
1234*3117ece4Schristos 
1235*3117ece4Schristos     return 0;
1236*3117ece4Schristos }
1237*3117ece4Schristos 
1238*3117ece4Schristos /* allocates buffer's arguments. returns success / failure */
1239*3117ece4Schristos static int createBuffers(buffers_t* buff, const char* const * const fileNamesTable,
1240*3117ece4Schristos                           size_t nbFiles) {
1241*3117ece4Schristos     size_t pos = 0;
1242*3117ece4Schristos     size_t n;
1243*3117ece4Schristos     size_t totalSizeToLoad = (size_t)UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles);
1244*3117ece4Schristos     size_t benchedSize = MIN(BMK_findMaxMem(totalSizeToLoad * 3) / 3, totalSizeToLoad);
1245*3117ece4Schristos     size_t* fileSizes = calloc(sizeof(size_t), nbFiles);
1246*3117ece4Schristos     void* srcBuffer = NULL;
1247*3117ece4Schristos     int ret = 0;
1248*3117ece4Schristos 
1249*3117ece4Schristos     if(!totalSizeToLoad || !benchedSize) {
1250*3117ece4Schristos         ret = 1;
1251*3117ece4Schristos         DISPLAY("Nothing to Bench\n");
1252*3117ece4Schristos         goto _cleanUp;
1253*3117ece4Schristos     }
1254*3117ece4Schristos 
1255*3117ece4Schristos     srcBuffer = malloc(benchedSize);
1256*3117ece4Schristos 
1257*3117ece4Schristos     if(!fileSizes || !srcBuffer) {
1258*3117ece4Schristos         ret = 1;
1259*3117ece4Schristos         goto _cleanUp;
1260*3117ece4Schristos     }
1261*3117ece4Schristos 
1262*3117ece4Schristos     for(n = 0; n < nbFiles; n++) {
1263*3117ece4Schristos         FILE* f;
1264*3117ece4Schristos         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
1265*3117ece4Schristos         if (UTIL_isDirectory(fileNamesTable[n])) {
1266*3117ece4Schristos             DISPLAY("Ignoring %s directory...       \n", fileNamesTable[n]);
1267*3117ece4Schristos             continue;
1268*3117ece4Schristos         }
1269*3117ece4Schristos         if (fileSize == UTIL_FILESIZE_UNKNOWN) {
1270*3117ece4Schristos             DISPLAY("Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
1271*3117ece4Schristos             continue;
1272*3117ece4Schristos         }
1273*3117ece4Schristos         f = fopen(fileNamesTable[n], "rb");
1274*3117ece4Schristos         if (f==NULL) {
1275*3117ece4Schristos             DISPLAY("impossible to open file %s\n", fileNamesTable[n]);
1276*3117ece4Schristos             fclose(f);
1277*3117ece4Schristos             ret = 10;
1278*3117ece4Schristos             goto _cleanUp;
1279*3117ece4Schristos         }
1280*3117ece4Schristos 
1281*3117ece4Schristos         DISPLAYLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
1282*3117ece4Schristos 
1283*3117ece4Schristos         if (fileSize + pos > benchedSize) fileSize = benchedSize - pos, nbFiles=n;   /* buffer too small - stop after this file */
1284*3117ece4Schristos         {
1285*3117ece4Schristos             char* buffer = (char*)(srcBuffer);
1286*3117ece4Schristos             size_t const readSize = fread((buffer)+pos, 1, (size_t)fileSize, f);
1287*3117ece4Schristos             fclose(f);
1288*3117ece4Schristos             if (readSize != (size_t)fileSize) {
1289*3117ece4Schristos                 DISPLAY("could not read %s", fileNamesTable[n]);
1290*3117ece4Schristos                 ret = 1;
1291*3117ece4Schristos                 goto _cleanUp;
1292*3117ece4Schristos             }
1293*3117ece4Schristos 
1294*3117ece4Schristos             fileSizes[n] = readSize;
1295*3117ece4Schristos             pos += readSize;
1296*3117ece4Schristos         }
1297*3117ece4Schristos     }
1298*3117ece4Schristos 
1299*3117ece4Schristos     ret = createBuffersFromMemory(buff, srcBuffer, nbFiles, fileSizes);
1300*3117ece4Schristos 
1301*3117ece4Schristos _cleanUp:
1302*3117ece4Schristos     if(ret) { free(srcBuffer); }
1303*3117ece4Schristos     free(fileSizes);
1304*3117ece4Schristos     return ret;
1305*3117ece4Schristos }
1306*3117ece4Schristos 
1307*3117ece4Schristos static void freeContexts(const contexts_t ctx) {
1308*3117ece4Schristos     free(ctx.dictBuffer);
1309*3117ece4Schristos     ZSTD_freeCCtx(ctx.cctx);
1310*3117ece4Schristos     ZSTD_freeDCtx(ctx.dctx);
1311*3117ece4Schristos }
1312*3117ece4Schristos 
1313*3117ece4Schristos static int createContexts(contexts_t* ctx, const char* dictFileName) {
1314*3117ece4Schristos     FILE* f;
1315*3117ece4Schristos     size_t readSize;
1316*3117ece4Schristos     ctx->cctx = ZSTD_createCCtx();
1317*3117ece4Schristos     ctx->dctx = ZSTD_createDCtx();
1318*3117ece4Schristos     assert(ctx->cctx != NULL);
1319*3117ece4Schristos     assert(ctx->dctx != NULL);
1320*3117ece4Schristos 
1321*3117ece4Schristos     if(dictFileName == NULL) {
1322*3117ece4Schristos         ctx->dictSize = 0;
1323*3117ece4Schristos         ctx->dictBuffer = NULL;
1324*3117ece4Schristos         return 0;
1325*3117ece4Schristos     }
1326*3117ece4Schristos     {   U64 const dictFileSize = UTIL_getFileSize(dictFileName);
1327*3117ece4Schristos         assert(dictFileSize != UTIL_FILESIZE_UNKNOWN);
1328*3117ece4Schristos         ctx->dictSize = (size_t)dictFileSize;
1329*3117ece4Schristos         assert((U64)ctx->dictSize == dictFileSize); /* check overflow */
1330*3117ece4Schristos     }
1331*3117ece4Schristos     ctx->dictBuffer = malloc(ctx->dictSize);
1332*3117ece4Schristos 
1333*3117ece4Schristos     f = fopen(dictFileName, "rb");
1334*3117ece4Schristos 
1335*3117ece4Schristos     if (f==NULL) {
1336*3117ece4Schristos         DISPLAY("unable to open file\n");
1337*3117ece4Schristos         freeContexts(*ctx);
1338*3117ece4Schristos         return 1;
1339*3117ece4Schristos     }
1340*3117ece4Schristos 
1341*3117ece4Schristos     if (ctx->dictSize > 64 MB || !(ctx->dictBuffer)) {
1342*3117ece4Schristos         DISPLAY("dictionary too large\n");
1343*3117ece4Schristos         fclose(f);
1344*3117ece4Schristos         freeContexts(*ctx);
1345*3117ece4Schristos         return 1;
1346*3117ece4Schristos     }
1347*3117ece4Schristos     readSize = fread(ctx->dictBuffer, 1, ctx->dictSize, f);
1348*3117ece4Schristos     fclose(f);
1349*3117ece4Schristos     if (readSize != ctx->dictSize) {
1350*3117ece4Schristos         DISPLAY("unable to read file\n");
1351*3117ece4Schristos         freeContexts(*ctx);
1352*3117ece4Schristos         return 1;
1353*3117ece4Schristos     }
1354*3117ece4Schristos     return 0;
1355*3117ece4Schristos }
1356*3117ece4Schristos 
1357*3117ece4Schristos /*-************************************
1358*3117ece4Schristos *  Optimizer Memoization Functions
1359*3117ece4Schristos **************************************/
1360*3117ece4Schristos 
1361*3117ece4Schristos /* return: new length */
1362*3117ece4Schristos /* keep old array, will need if iter over strategy. */
1363*3117ece4Schristos /* prunes useless params */
1364*3117ece4Schristos static size_t sanitizeVarArray(varInds_t* varNew, const size_t varLength, const varInds_t* varArray, const ZSTD_strategy strat) {
1365*3117ece4Schristos     size_t i, j = 0;
1366*3117ece4Schristos     for(i = 0; i < varLength; i++) {
1367*3117ece4Schristos         if( !((varArray[i] == clog_ind && strat == ZSTD_fast)
1368*3117ece4Schristos             || (varArray[i] == slog_ind && strat == ZSTD_fast)
1369*3117ece4Schristos             || (varArray[i] == slog_ind && strat == ZSTD_dfast)
1370*3117ece4Schristos             || (varArray[i] == tlen_ind && strat < ZSTD_btopt && strat != ZSTD_fast))) {
1371*3117ece4Schristos             varNew[j] = varArray[i];
1372*3117ece4Schristos             j++;
1373*3117ece4Schristos         }
1374*3117ece4Schristos     }
1375*3117ece4Schristos     return j;
1376*3117ece4Schristos }
1377*3117ece4Schristos 
1378*3117ece4Schristos /* res should be NUM_PARAMS size */
1379*3117ece4Schristos /* constructs varArray from paramValues_t style parameter */
1380*3117ece4Schristos /* pass in using dict. */
1381*3117ece4Schristos static size_t variableParams(const paramValues_t paramConstraints, varInds_t* res, const int usingDictionary) {
1382*3117ece4Schristos     varInds_t i;
1383*3117ece4Schristos     size_t j = 0;
1384*3117ece4Schristos     for(i = 0; i < NUM_PARAMS; i++) {
1385*3117ece4Schristos         if(paramConstraints.vals[i] == PARAM_UNSET) {
1386*3117ece4Schristos             if(i == fadt_ind && !usingDictionary) continue; /* don't use fadt if no dictionary */
1387*3117ece4Schristos             res[j] = i; j++;
1388*3117ece4Schristos         }
1389*3117ece4Schristos     }
1390*3117ece4Schristos     return j;
1391*3117ece4Schristos }
1392*3117ece4Schristos 
1393*3117ece4Schristos /* length of memo table given free variables */
1394*3117ece4Schristos static size_t memoTableLen(const varInds_t* varyParams, const size_t varyLen) {
1395*3117ece4Schristos     size_t arrayLen = 1;
1396*3117ece4Schristos     size_t i;
1397*3117ece4Schristos     for(i = 0; i < varyLen; i++) {
1398*3117ece4Schristos         if(varyParams[i] == strt_ind) continue; /* strategy separated by table */
1399*3117ece4Schristos         arrayLen *= rangetable[varyParams[i]];
1400*3117ece4Schristos     }
1401*3117ece4Schristos     return arrayLen;
1402*3117ece4Schristos }
1403*3117ece4Schristos 
1404*3117ece4Schristos /* returns unique index in memotable of compression parameters */
1405*3117ece4Schristos static unsigned memoTableIndDirect(const paramValues_t* ptr, const varInds_t* varyParams, const size_t varyLen) {
1406*3117ece4Schristos     size_t i;
1407*3117ece4Schristos     unsigned ind = 0;
1408*3117ece4Schristos     for(i = 0; i < varyLen; i++) {
1409*3117ece4Schristos         varInds_t v = varyParams[i];
1410*3117ece4Schristos         if(v == strt_ind) continue; /* exclude strategy from memotable */
1411*3117ece4Schristos         ind *= rangetable[v]; ind += (unsigned)invRangeMap(v, ptr->vals[v]);
1412*3117ece4Schristos     }
1413*3117ece4Schristos     return ind;
1414*3117ece4Schristos }
1415*3117ece4Schristos 
1416*3117ece4Schristos static size_t memoTableGet(const memoTable_t* memoTableArray, const paramValues_t p) {
1417*3117ece4Schristos     const memoTable_t mt = memoTableArray[p.vals[strt_ind]];
1418*3117ece4Schristos     switch(mt.tableType) {
1419*3117ece4Schristos         case directMap:
1420*3117ece4Schristos             return mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)];
1421*3117ece4Schristos         case xxhashMap:
1422*3117ece4Schristos             return mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen];
1423*3117ece4Schristos         case noMemo:
1424*3117ece4Schristos             return 0;
1425*3117ece4Schristos     }
1426*3117ece4Schristos     return 0; /* should never happen, stop compiler warnings */
1427*3117ece4Schristos }
1428*3117ece4Schristos 
1429*3117ece4Schristos static void memoTableSet(const memoTable_t* memoTableArray, const paramValues_t p, const BYTE value) {
1430*3117ece4Schristos     const memoTable_t mt = memoTableArray[p.vals[strt_ind]];
1431*3117ece4Schristos     switch(mt.tableType) {
1432*3117ece4Schristos         case directMap:
1433*3117ece4Schristos             mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)] = value; break;
1434*3117ece4Schristos         case xxhashMap:
1435*3117ece4Schristos             mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen] = value; break;
1436*3117ece4Schristos         case noMemo:
1437*3117ece4Schristos             break;
1438*3117ece4Schristos     }
1439*3117ece4Schristos }
1440*3117ece4Schristos 
1441*3117ece4Schristos /* frees all allocated memotables */
1442*3117ece4Schristos /* secret contract :
1443*3117ece4Schristos  * mtAll is a table of (ZSTD_STRATEGY_MAX+1) memoTable_t */
1444*3117ece4Schristos static void freeMemoTableArray(memoTable_t* const mtAll) {
1445*3117ece4Schristos     int i;
1446*3117ece4Schristos     if(mtAll == NULL) { return; }
1447*3117ece4Schristos     for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1448*3117ece4Schristos         free(mtAll[i].table);
1449*3117ece4Schristos     }
1450*3117ece4Schristos     free(mtAll);
1451*3117ece4Schristos }
1452*3117ece4Schristos 
1453*3117ece4Schristos /* inits memotables for all (including mallocs), all strategies */
1454*3117ece4Schristos /* takes unsanitized varyParams */
1455*3117ece4Schristos static memoTable_t*
1456*3117ece4Schristos createMemoTableArray(const paramValues_t p,
1457*3117ece4Schristos                      const varInds_t* const varyParams,
1458*3117ece4Schristos                      const size_t varyLen,
1459*3117ece4Schristos                      const U32 memoTableLog)
1460*3117ece4Schristos {
1461*3117ece4Schristos     memoTable_t* const mtAll = (memoTable_t*)calloc(sizeof(memoTable_t),(ZSTD_STRATEGY_MAX + 1));
1462*3117ece4Schristos     ZSTD_strategy i, stratMin = ZSTD_STRATEGY_MIN, stratMax = ZSTD_STRATEGY_MAX;
1463*3117ece4Schristos 
1464*3117ece4Schristos     if(mtAll == NULL) {
1465*3117ece4Schristos         return NULL;
1466*3117ece4Schristos     }
1467*3117ece4Schristos 
1468*3117ece4Schristos     for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1469*3117ece4Schristos         mtAll[i].varLen = sanitizeVarArray(mtAll[i].varArray, varyLen, varyParams, i);
1470*3117ece4Schristos     }
1471*3117ece4Schristos 
1472*3117ece4Schristos     /* no memoization */
1473*3117ece4Schristos     if(memoTableLog == 0) {
1474*3117ece4Schristos         for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1475*3117ece4Schristos             mtAll[i].tableType = noMemo;
1476*3117ece4Schristos             mtAll[i].table = NULL;
1477*3117ece4Schristos             mtAll[i].tableLen = 0;
1478*3117ece4Schristos         }
1479*3117ece4Schristos         return mtAll;
1480*3117ece4Schristos     }
1481*3117ece4Schristos 
1482*3117ece4Schristos 
1483*3117ece4Schristos     if(p.vals[strt_ind] != PARAM_UNSET) {
1484*3117ece4Schristos         stratMin = p.vals[strt_ind];
1485*3117ece4Schristos         stratMax = p.vals[strt_ind];
1486*3117ece4Schristos     }
1487*3117ece4Schristos 
1488*3117ece4Schristos 
1489*3117ece4Schristos     for(i = stratMin; i <= stratMax; i++) {
1490*3117ece4Schristos         size_t mtl = memoTableLen(mtAll[i].varArray, mtAll[i].varLen);
1491*3117ece4Schristos         mtAll[i].tableType = directMap;
1492*3117ece4Schristos 
1493*3117ece4Schristos         if(memoTableLog != PARAM_UNSET && mtl > (1ULL << memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */
1494*3117ece4Schristos             mtAll[i].tableType = xxhashMap;
1495*3117ece4Schristos             mtl = ((size_t)1 << memoTableLog);
1496*3117ece4Schristos         }
1497*3117ece4Schristos 
1498*3117ece4Schristos         mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl);
1499*3117ece4Schristos         mtAll[i].tableLen = mtl;
1500*3117ece4Schristos 
1501*3117ece4Schristos         if(mtAll[i].table == NULL) {
1502*3117ece4Schristos             freeMemoTableArray(mtAll);
1503*3117ece4Schristos             return NULL;
1504*3117ece4Schristos         }
1505*3117ece4Schristos     }
1506*3117ece4Schristos 
1507*3117ece4Schristos     return mtAll;
1508*3117ece4Schristos }
1509*3117ece4Schristos 
1510*3117ece4Schristos /* Sets pc to random unmeasured set of parameters */
1511*3117ece4Schristos /* specify strategy */
1512*3117ece4Schristos static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTableArray, const ZSTD_strategy st)
1513*3117ece4Schristos {
1514*3117ece4Schristos     size_t j;
1515*3117ece4Schristos     const memoTable_t mt = memoTableArray[st];
1516*3117ece4Schristos     pc->vals[strt_ind] = st;
1517*3117ece4Schristos     for(j = 0; j < mt.tableLen; j++) {
1518*3117ece4Schristos         int i;
1519*3117ece4Schristos         for(i = 0; i < NUM_PARAMS; i++) {
1520*3117ece4Schristos             varInds_t v = mt.varArray[i];
1521*3117ece4Schristos             if(v == strt_ind) continue;
1522*3117ece4Schristos             pc->vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]);
1523*3117ece4Schristos         }
1524*3117ece4Schristos 
1525*3117ece4Schristos         if(!(memoTableGet(memoTableArray, *pc))) break; /* only pick unpicked params. */
1526*3117ece4Schristos     }
1527*3117ece4Schristos }
1528*3117ece4Schristos 
1529*3117ece4Schristos /*-************************************
1530*3117ece4Schristos *  Benchmarking Functions
1531*3117ece4Schristos **************************************/
1532*3117ece4Schristos 
1533*3117ece4Schristos static void display_params_tested(paramValues_t cParams)
1534*3117ece4Schristos {
1535*3117ece4Schristos     varInds_t vi;
1536*3117ece4Schristos     DISPLAYLEVEL(3, "\r testing :");
1537*3117ece4Schristos     for (vi=0; vi < NUM_PARAMS; vi++) {
1538*3117ece4Schristos         DISPLAYLEVEL(3, "%3u,", (unsigned)cParams.vals[vi]);
1539*3117ece4Schristos     }
1540*3117ece4Schristos     DISPLAYLEVEL(3, "\b    \r");
1541*3117ece4Schristos }
1542*3117ece4Schristos 
1543*3117ece4Schristos /* Replicate functionality of benchMemAdvanced, but with pre-split src / dst buffers */
1544*3117ece4Schristos /* The purpose is so that sufficient information is returned so that a decompression call to benchMemInvertible is possible */
1545*3117ece4Schristos /* BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */
1546*3117ece4Schristos /* nbSeconds used in same way as in BMK_advancedParams_t */
1547*3117ece4Schristos /* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */
1548*3117ece4Schristos /* dictionary nullable, nothing else though. */
1549*3117ece4Schristos /* note : it would be a lot better if this function was present in benchzstd.c,
1550*3117ece4Schristos  * sharing code with benchMemAdvanced(), since it's technically a part of it */
1551*3117ece4Schristos static BMK_benchOutcome_t
1552*3117ece4Schristos BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
1553*3117ece4Schristos                         int cLevel, const paramValues_t* comprParams,
1554*3117ece4Schristos                         BMK_mode_t mode, unsigned nbSeconds)
1555*3117ece4Schristos {
1556*3117ece4Schristos     U32 i;
1557*3117ece4Schristos     BMK_benchResult_t bResult;
1558*3117ece4Schristos     const void *const *const srcPtrs = (const void *const *const)buf.srcPtrs;
1559*3117ece4Schristos     size_t const *const srcSizes = buf.srcSizes;
1560*3117ece4Schristos     void** const dstPtrs = buf.dstPtrs;
1561*3117ece4Schristos     size_t const *const dstCapacities = buf.dstCapacities;
1562*3117ece4Schristos     size_t* const dstSizes = buf.dstSizes;
1563*3117ece4Schristos     void** const resPtrs = buf.resPtrs;
1564*3117ece4Schristos     size_t const *const resSizes = buf.resSizes;
1565*3117ece4Schristos     const void* dictBuffer = ctx.dictBuffer;
1566*3117ece4Schristos     const size_t dictBufferSize = ctx.dictSize;
1567*3117ece4Schristos     const size_t nbBlocks = buf.nbBlocks;
1568*3117ece4Schristos     const size_t srcSize = buf.srcSize;
1569*3117ece4Schristos     ZSTD_CCtx* cctx = ctx.cctx;
1570*3117ece4Schristos     ZSTD_DCtx* dctx = ctx.dctx;
1571*3117ece4Schristos 
1572*3117ece4Schristos     /* init */
1573*3117ece4Schristos     display_params_tested(*comprParams);
1574*3117ece4Schristos     memset(&bResult, 0, sizeof(bResult));
1575*3117ece4Schristos 
1576*3117ece4Schristos     /* warming up memory */
1577*3117ece4Schristos     for (i = 0; i < buf.nbBlocks; i++) {
1578*3117ece4Schristos         if (mode != BMK_decodeOnly) {
1579*3117ece4Schristos             RDG_genBuffer(dstPtrs[i], dstCapacities[i], 0.10, 0.50, 1);
1580*3117ece4Schristos         } else {
1581*3117ece4Schristos             RDG_genBuffer(resPtrs[i], resSizes[i], 0.10, 0.50, 1);
1582*3117ece4Schristos         }
1583*3117ece4Schristos     }
1584*3117ece4Schristos 
1585*3117ece4Schristos     /* Bench */
1586*3117ece4Schristos     {
1587*3117ece4Schristos         /* init args */
1588*3117ece4Schristos         int compressionCompleted = (mode == BMK_decodeOnly);
1589*3117ece4Schristos         int decompressionCompleted = (mode == BMK_compressOnly);
1590*3117ece4Schristos         BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
1591*3117ece4Schristos         BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
1592*3117ece4Schristos         BMK_benchParams_t cbp, dbp;
1593*3117ece4Schristos         BMK_initCCtxArgs cctxprep;
1594*3117ece4Schristos         BMK_initDCtxArgs dctxprep;
1595*3117ece4Schristos 
1596*3117ece4Schristos         cbp.benchFn = local_defaultCompress;
1597*3117ece4Schristos         cbp.benchPayload = cctx;
1598*3117ece4Schristos         cbp.initFn = local_initCCtx;
1599*3117ece4Schristos         cbp.initPayload = &cctxprep;
1600*3117ece4Schristos         cbp.errorFn = ZSTD_isError;
1601*3117ece4Schristos         cbp.blockCount = nbBlocks;
1602*3117ece4Schristos         cbp.srcBuffers = srcPtrs;
1603*3117ece4Schristos         cbp.srcSizes = srcSizes;
1604*3117ece4Schristos         cbp.dstBuffers = dstPtrs;
1605*3117ece4Schristos         cbp.dstCapacities = dstCapacities;
1606*3117ece4Schristos         cbp.blockResults = dstSizes;
1607*3117ece4Schristos 
1608*3117ece4Schristos         cctxprep.cctx = cctx;
1609*3117ece4Schristos         cctxprep.dictBuffer = dictBuffer;
1610*3117ece4Schristos         cctxprep.dictBufferSize = dictBufferSize;
1611*3117ece4Schristos         cctxprep.cLevel = cLevel;
1612*3117ece4Schristos         cctxprep.comprParams = comprParams;
1613*3117ece4Schristos 
1614*3117ece4Schristos         dbp.benchFn = local_defaultDecompress;
1615*3117ece4Schristos         dbp.benchPayload = dctx;
1616*3117ece4Schristos         dbp.initFn = local_initDCtx;
1617*3117ece4Schristos         dbp.initPayload = &dctxprep;
1618*3117ece4Schristos         dbp.errorFn = ZSTD_isError;
1619*3117ece4Schristos         dbp.blockCount = nbBlocks;
1620*3117ece4Schristos         dbp.srcBuffers = (const void* const *) dstPtrs;
1621*3117ece4Schristos         dbp.srcSizes = dstCapacities;
1622*3117ece4Schristos         dbp.dstBuffers = resPtrs;
1623*3117ece4Schristos         dbp.dstCapacities = resSizes;
1624*3117ece4Schristos         dbp.blockResults = NULL;
1625*3117ece4Schristos 
1626*3117ece4Schristos         dctxprep.dctx = dctx;
1627*3117ece4Schristos         dctxprep.dictBuffer = dictBuffer;
1628*3117ece4Schristos         dctxprep.dictBufferSize = dictBufferSize;
1629*3117ece4Schristos 
1630*3117ece4Schristos         assert(timeStateCompress != NULL);
1631*3117ece4Schristos         assert(timeStateDecompress != NULL);
1632*3117ece4Schristos         while(!compressionCompleted) {
1633*3117ece4Schristos             BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, cbp);
1634*3117ece4Schristos 
1635*3117ece4Schristos             if (!BMK_isSuccessful_runOutcome(cOutcome)) {
1636*3117ece4Schristos                 BMK_benchOutcome_t bOut;
1637*3117ece4Schristos                 memset(&bOut, 0, sizeof(bOut));
1638*3117ece4Schristos                 bOut.tag = 1;   /* should rather be a function or a constant */
1639*3117ece4Schristos                 BMK_freeTimedFnState(timeStateCompress);
1640*3117ece4Schristos                 BMK_freeTimedFnState(timeStateDecompress);
1641*3117ece4Schristos                 return bOut;
1642*3117ece4Schristos             }
1643*3117ece4Schristos             {   BMK_runTime_t const rResult = BMK_extract_runTime(cOutcome);
1644*3117ece4Schristos                 bResult.cSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
1645*3117ece4Schristos                 bResult.cSize = rResult.sumOfReturn;
1646*3117ece4Schristos             }
1647*3117ece4Schristos             compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
1648*3117ece4Schristos         }
1649*3117ece4Schristos 
1650*3117ece4Schristos         while (!decompressionCompleted) {
1651*3117ece4Schristos             BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
1652*3117ece4Schristos 
1653*3117ece4Schristos             if (!BMK_isSuccessful_runOutcome(dOutcome)) {
1654*3117ece4Schristos                 BMK_benchOutcome_t bOut;
1655*3117ece4Schristos                 memset(&bOut, 0, sizeof(bOut));
1656*3117ece4Schristos                 bOut.tag = 1;   /* should rather be a function or a constant */
1657*3117ece4Schristos                 BMK_freeTimedFnState(timeStateCompress);
1658*3117ece4Schristos                 BMK_freeTimedFnState(timeStateDecompress);
1659*3117ece4Schristos                 return bOut;
1660*3117ece4Schristos             }
1661*3117ece4Schristos             {   BMK_runTime_t const rResult = BMK_extract_runTime(dOutcome);
1662*3117ece4Schristos                 bResult.dSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
1663*3117ece4Schristos             }
1664*3117ece4Schristos             decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
1665*3117ece4Schristos         }
1666*3117ece4Schristos 
1667*3117ece4Schristos         BMK_freeTimedFnState(timeStateCompress);
1668*3117ece4Schristos         BMK_freeTimedFnState(timeStateDecompress);
1669*3117ece4Schristos     }
1670*3117ece4Schristos 
1671*3117ece4Schristos    /* Bench */
1672*3117ece4Schristos     bResult.cMem = ((size_t)1 << (comprParams->vals[wlog_ind])) + ZSTD_sizeof_CCtx(cctx);
1673*3117ece4Schristos 
1674*3117ece4Schristos     {   BMK_benchOutcome_t bOut;
1675*3117ece4Schristos         bOut.tag = 0;
1676*3117ece4Schristos         bOut.internal_never_use_directly = bResult;  /* should be a function */
1677*3117ece4Schristos         return bOut;
1678*3117ece4Schristos     }
1679*3117ece4Schristos }
1680*3117ece4Schristos 
1681*3117ece4Schristos /* BMK_benchParam() :
1682*3117ece4Schristos  * benchmark a set of `cParams` over sample `buf`,
1683*3117ece4Schristos  * store the result in `resultPtr`.
1684*3117ece4Schristos  * @return : 0 if success, 1 if error */
1685*3117ece4Schristos static int BMK_benchParam ( BMK_benchResult_t* resultPtr,
1686*3117ece4Schristos                             buffers_t buf, contexts_t ctx,
1687*3117ece4Schristos                             paramValues_t cParams)
1688*3117ece4Schristos {
1689*3117ece4Schristos     BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx,
1690*3117ece4Schristos                                                         BASE_CLEVEL, &cParams,
1691*3117ece4Schristos                                                         BMK_both, 3);
1692*3117ece4Schristos     if (!BMK_isSuccessful_benchOutcome(outcome)) return 1;
1693*3117ece4Schristos     *resultPtr = BMK_extract_benchResult(outcome);
1694*3117ece4Schristos     return 0;
1695*3117ece4Schristos }
1696*3117ece4Schristos 
1697*3117ece4Schristos 
1698*3117ece4Schristos /* Benchmarking which stops when we are sufficiently sure the solution is infeasible / worse than the winner */
1699*3117ece4Schristos #define VARIANCE 1.2
1700*3117ece4Schristos static int allBench(BMK_benchResult_t* resultPtr,
1701*3117ece4Schristos                 const buffers_t buf, const contexts_t ctx,
1702*3117ece4Schristos                 const paramValues_t cParams,
1703*3117ece4Schristos                 const constraint_t target,
1704*3117ece4Schristos                 BMK_benchResult_t* winnerResult, int feas)
1705*3117ece4Schristos {
1706*3117ece4Schristos     BMK_benchResult_t benchres;
1707*3117ece4Schristos     double uncertaintyConstantC = 3., uncertaintyConstantD = 3.;
1708*3117ece4Schristos     double winnerRS;
1709*3117ece4Schristos 
1710*3117ece4Schristos     BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, BASE_CLEVEL, &cParams, BMK_both, 2);
1711*3117ece4Schristos     if (!BMK_isSuccessful_benchOutcome(outcome)) {
1712*3117ece4Schristos         DEBUGOUTPUT("Benchmarking failed \n");
1713*3117ece4Schristos         return ERROR_RESULT;
1714*3117ece4Schristos     }
1715*3117ece4Schristos     benchres = BMK_extract_benchResult(outcome);
1716*3117ece4Schristos 
1717*3117ece4Schristos     winnerRS = resultScore(*winnerResult, buf.srcSize, target);
1718*3117ece4Schristos     DEBUGOUTPUT("WinnerScore: %f \n ", winnerRS);
1719*3117ece4Schristos 
1720*3117ece4Schristos     *resultPtr = benchres;
1721*3117ece4Schristos 
1722*3117ece4Schristos     /* anything with worse ratio in feas is definitely worse, discard */
1723*3117ece4Schristos     if(feas && benchres.cSize < winnerResult->cSize && !g_optmode) {
1724*3117ece4Schristos         return WORSE_RESULT;
1725*3117ece4Schristos     }
1726*3117ece4Schristos 
1727*3117ece4Schristos     /* calculate uncertainty in compression / decompression runs */
1728*3117ece4Schristos     if (benchres.cSpeed) {
1729*3117ece4Schristos         double const loopDurationC = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed);
1730*3117ece4Schristos         uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC);
1731*3117ece4Schristos     }
1732*3117ece4Schristos 
1733*3117ece4Schristos     if (benchres.dSpeed) {
1734*3117ece4Schristos         double const loopDurationD = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed);
1735*3117ece4Schristos         uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD);
1736*3117ece4Schristos     }
1737*3117ece4Schristos 
1738*3117ece4Schristos     /* optimistic assumption of benchres */
1739*3117ece4Schristos     {   BMK_benchResult_t resultMax = benchres;
1740*3117ece4Schristos         resultMax.cSpeed = (unsigned long long)((double)resultMax.cSpeed * uncertaintyConstantC * VARIANCE);
1741*3117ece4Schristos         resultMax.dSpeed = (unsigned long long)((double)resultMax.dSpeed * uncertaintyConstantD * VARIANCE);
1742*3117ece4Schristos 
1743*3117ece4Schristos         /* disregard infeasible results in feas mode */
1744*3117ece4Schristos         /* disregard if resultMax < winner in infeas mode */
1745*3117ece4Schristos         if((feas && !feasible(resultMax, target)) ||
1746*3117ece4Schristos           (!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) {
1747*3117ece4Schristos             return WORSE_RESULT;
1748*3117ece4Schristos         }
1749*3117ece4Schristos     }
1750*3117ece4Schristos 
1751*3117ece4Schristos     /* compare by resultScore when in infeas */
1752*3117ece4Schristos     /* compare by compareResultLT when in feas */
1753*3117ece4Schristos     if((!feas && (resultScore(benchres, buf.srcSize, target) > resultScore(*winnerResult, buf.srcSize, target))) ||
1754*3117ece4Schristos        (feas && (compareResultLT(*winnerResult, benchres, target, buf.srcSize))) )  {
1755*3117ece4Schristos         return BETTER_RESULT;
1756*3117ece4Schristos     } else {
1757*3117ece4Schristos         return WORSE_RESULT;
1758*3117ece4Schristos     }
1759*3117ece4Schristos }
1760*3117ece4Schristos 
1761*3117ece4Schristos 
1762*3117ece4Schristos #define INFEASIBLE_THRESHOLD 200
1763*3117ece4Schristos /* Memoized benchmarking, won't benchmark anything which has already been benchmarked before. */
1764*3117ece4Schristos static int benchMemo(BMK_benchResult_t* resultPtr,
1765*3117ece4Schristos                 const buffers_t buf, const contexts_t ctx,
1766*3117ece4Schristos                 const paramValues_t cParams,
1767*3117ece4Schristos                 const constraint_t target,
1768*3117ece4Schristos                 BMK_benchResult_t* winnerResult, memoTable_t* const memoTableArray,
1769*3117ece4Schristos                 const int feas) {
1770*3117ece4Schristos     static int bmcount = 0;
1771*3117ece4Schristos     int res;
1772*3117ece4Schristos 
1773*3117ece4Schristos     if ( memoTableGet(memoTableArray, cParams) >= INFEASIBLE_THRESHOLD
1774*3117ece4Schristos       || redundantParams(cParams, target, buf.maxBlockSize) ) {
1775*3117ece4Schristos         return WORSE_RESULT;
1776*3117ece4Schristos     }
1777*3117ece4Schristos 
1778*3117ece4Schristos     res = allBench(resultPtr, buf, ctx, cParams, target, winnerResult, feas);
1779*3117ece4Schristos 
1780*3117ece4Schristos     if(DEBUG && !(bmcount % 250)) {
1781*3117ece4Schristos         DISPLAY("Count: %d\n", bmcount);
1782*3117ece4Schristos         bmcount++;
1783*3117ece4Schristos     }
1784*3117ece4Schristos     BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, *resultPtr, cParams, target, buf.srcSize);
1785*3117ece4Schristos 
1786*3117ece4Schristos     if(res == BETTER_RESULT || feas) {
1787*3117ece4Schristos         memoTableSet(memoTableArray, cParams, 255); /* what happens if collisions are frequent */
1788*3117ece4Schristos     }
1789*3117ece4Schristos     return res;
1790*3117ece4Schristos }
1791*3117ece4Schristos 
1792*3117ece4Schristos 
1793*3117ece4Schristos typedef struct {
1794*3117ece4Schristos     U64 cSpeed_min;
1795*3117ece4Schristos     U64 dSpeed_min;
1796*3117ece4Schristos     U32 windowLog_max;
1797*3117ece4Schristos     ZSTD_strategy strategy_max;
1798*3117ece4Schristos } level_constraints_t;
1799*3117ece4Schristos 
1800*3117ece4Schristos static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1];
1801*3117ece4Schristos 
1802*3117ece4Schristos static void BMK_init_level_constraints(int bytePerSec_level1)
1803*3117ece4Schristos {
1804*3117ece4Schristos     assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel());
1805*3117ece4Schristos     memset(g_level_constraint, 0, sizeof(g_level_constraint));
1806*3117ece4Schristos     g_level_constraint[1].cSpeed_min = bytePerSec_level1;
1807*3117ece4Schristos     g_level_constraint[1].dSpeed_min = 0;
1808*3117ece4Schristos     g_level_constraint[1].windowLog_max = 19;
1809*3117ece4Schristos     g_level_constraint[1].strategy_max = ZSTD_fast;
1810*3117ece4Schristos 
1811*3117ece4Schristos     /* establish speed objectives (relative to level 1) */
1812*3117ece4Schristos     {   int l;
1813*3117ece4Schristos         for (l=2; l<=NB_LEVELS_TRACKED; l++) {
1814*3117ece4Schristos             g_level_constraint[l].cSpeed_min = (g_level_constraint[l-1].cSpeed_min * 49) / 64;
1815*3117ece4Schristos             g_level_constraint[l].dSpeed_min = 0;
1816*3117ece4Schristos             g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5;   /* only --ultra levels >= 20 can use windowlog > 23 */
1817*3117ece4Schristos             g_level_constraint[l].strategy_max = ZSTD_STRATEGY_MAX;
1818*3117ece4Schristos     }   }
1819*3117ece4Schristos }
1820*3117ece4Schristos 
1821*3117ece4Schristos static int BMK_seed(winnerInfo_t* winners,
1822*3117ece4Schristos                     const paramValues_t params,
1823*3117ece4Schristos                     const buffers_t buf,
1824*3117ece4Schristos                     const contexts_t ctx)
1825*3117ece4Schristos {
1826*3117ece4Schristos     BMK_benchResult_t testResult;
1827*3117ece4Schristos     int better = 0;
1828*3117ece4Schristos     int cLevel;
1829*3117ece4Schristos 
1830*3117ece4Schristos     BMK_benchParam(&testResult, buf, ctx, params);
1831*3117ece4Schristos 
1832*3117ece4Schristos     for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) {
1833*3117ece4Schristos 
1834*3117ece4Schristos         if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min)
1835*3117ece4Schristos             continue;   /* not fast enough for this level */
1836*3117ece4Schristos         if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min)
1837*3117ece4Schristos             continue;   /* not fast enough for this level */
1838*3117ece4Schristos         if (params.vals[wlog_ind] > g_level_constraint[cLevel].windowLog_max)
1839*3117ece4Schristos             continue;   /* too much memory for this level */
1840*3117ece4Schristos         if (params.vals[strt_ind] > (U32)g_level_constraint[cLevel].strategy_max)
1841*3117ece4Schristos             continue;   /* forbidden strategy for this level */
1842*3117ece4Schristos         if (winners[cLevel].result.cSize==0) {
1843*3117ece4Schristos             /* first solution for this cLevel */
1844*3117ece4Schristos             winners[cLevel].result = testResult;
1845*3117ece4Schristos             winners[cLevel].params = params;
1846*3117ece4Schristos             BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize);
1847*3117ece4Schristos             better = 1;
1848*3117ece4Schristos             continue;
1849*3117ece4Schristos         }
1850*3117ece4Schristos 
1851*3117ece4Schristos         if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) {
1852*3117ece4Schristos             /* Validate solution is "good enough" */
1853*3117ece4Schristos             double W_ratio = (double)buf.srcSize / (double)testResult.cSize;
1854*3117ece4Schristos             double O_ratio = (double)buf.srcSize / (double)winners[cLevel].result.cSize;
1855*3117ece4Schristos             double W_ratioNote = log (W_ratio);
1856*3117ece4Schristos             double O_ratioNote = log (O_ratio);
1857*3117ece4Schristos             size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB);
1858*3117ece4Schristos             size_t O_DMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + (16 KB);
1859*3117ece4Schristos             double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed);
1860*3117ece4Schristos             double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed);
1861*3117ece4Schristos 
1862*3117ece4Schristos             size_t W_CMemUsed = ((size_t)1 << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params));
1863*3117ece4Schristos             size_t O_CMemUsed = ((size_t)1 << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params));
1864*3117ece4Schristos             double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed);
1865*3117ece4Schristos             double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed);
1866*3117ece4Schristos 
1867*3117ece4Schristos             double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log((double)testResult.cSpeed);
1868*3117ece4Schristos             double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log((double)winners[cLevel].result.cSpeed);
1869*3117ece4Schristos 
1870*3117ece4Schristos             double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log((double)testResult.dSpeed);
1871*3117ece4Schristos             double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed);
1872*3117ece4Schristos 
1873*3117ece4Schristos             if (W_DMemUsed_note < O_DMemUsed_note) {
1874*3117ece4Schristos                 /* uses too much Decompression memory for too little benefit */
1875*3117ece4Schristos                 if (W_ratio > O_ratio)
1876*3117ece4Schristos                 DISPLAYLEVEL(3, "Decompression Memory : %5.3f @ %4.1f MB  vs  %5.3f @ %4.1f MB   : not enough for level %i\n",
1877*3117ece4Schristos                          W_ratio, (double)(W_DMemUsed) / 1024 / 1024,
1878*3117ece4Schristos                          O_ratio, (double)(O_DMemUsed) / 1024 / 1024,   cLevel);
1879*3117ece4Schristos                 continue;
1880*3117ece4Schristos             }
1881*3117ece4Schristos             if (W_CMemUsed_note < O_CMemUsed_note) {
1882*3117ece4Schristos                 /* uses too much memory for compression for too little benefit */
1883*3117ece4Schristos                 if (W_ratio > O_ratio)
1884*3117ece4Schristos                 DISPLAYLEVEL(3, "Compression Memory : %5.3f @ %4.1f MB  vs  %5.3f @ %4.1f MB   : not enough for level %i\n",
1885*3117ece4Schristos                          W_ratio, (double)(W_CMemUsed) / 1024 / 1024,
1886*3117ece4Schristos                          O_ratio, (double)(O_CMemUsed) / 1024 / 1024,
1887*3117ece4Schristos                          cLevel);
1888*3117ece4Schristos                 continue;
1889*3117ece4Schristos             }
1890*3117ece4Schristos             if (W_CSpeed_note   < O_CSpeed_note  ) {
1891*3117ece4Schristos                 /* too large compression speed difference for the compression benefit */
1892*3117ece4Schristos                 if (W_ratio > O_ratio)
1893*3117ece4Schristos                 DISPLAYLEVEL(3, "Compression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
1894*3117ece4Schristos                          W_ratio, (double)testResult.cSpeed / MB_UNIT,
1895*3117ece4Schristos                          O_ratio, (double)winners[cLevel].result.cSpeed / MB_UNIT,
1896*3117ece4Schristos                          cLevel);
1897*3117ece4Schristos                 continue;
1898*3117ece4Schristos             }
1899*3117ece4Schristos             if (W_DSpeed_note   < O_DSpeed_note  ) {
1900*3117ece4Schristos                 /* too large decompression speed difference for the compression benefit */
1901*3117ece4Schristos                 if (W_ratio > O_ratio)
1902*3117ece4Schristos                 DISPLAYLEVEL(3, "Decompression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
1903*3117ece4Schristos                          W_ratio, (double)testResult.dSpeed / MB_UNIT,
1904*3117ece4Schristos                          O_ratio, (double)winners[cLevel].result.dSpeed / MB_UNIT,
1905*3117ece4Schristos                          cLevel);
1906*3117ece4Schristos                 continue;
1907*3117ece4Schristos             }
1908*3117ece4Schristos 
1909*3117ece4Schristos             if (W_ratio < O_ratio)
1910*3117ece4Schristos                 DISPLAYLEVEL(3, "Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n",
1911*3117ece4Schristos                                 W_ratio, O_ratio, cLevel);
1912*3117ece4Schristos 
1913*3117ece4Schristos             winners[cLevel].result = testResult;
1914*3117ece4Schristos             winners[cLevel].params = params;
1915*3117ece4Schristos             BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize);
1916*3117ece4Schristos 
1917*3117ece4Schristos             better = 1;
1918*3117ece4Schristos     }   }
1919*3117ece4Schristos 
1920*3117ece4Schristos     return better;
1921*3117ece4Schristos }
1922*3117ece4Schristos 
1923*3117ece4Schristos /*-************************************
1924*3117ece4Schristos *  Compression Level Table Generation Functions
1925*3117ece4Schristos **************************************/
1926*3117ece4Schristos 
1927*3117ece4Schristos #define PARAMTABLELOG   25
1928*3117ece4Schristos #define PARAMTABLESIZE (1<<PARAMTABLELOG)
1929*3117ece4Schristos #define PARAMTABLEMASK (PARAMTABLESIZE-1)
1930*3117ece4Schristos static BYTE g_alreadyTested[PARAMTABLESIZE] = {0};   /* init to zero */
1931*3117ece4Schristos 
1932*3117ece4Schristos static BYTE* NB_TESTS_PLAYED(paramValues_t p)
1933*3117ece4Schristos {
1934*3117ece4Schristos     ZSTD_compressionParameters const cParams = pvalsToCParams(sanitizeParams(p));
1935*3117ece4Schristos     unsigned long long const h64 = XXH64(&cParams, sizeof(cParams), 0);
1936*3117ece4Schristos     return &g_alreadyTested[(h64 >> 3) & PARAMTABLEMASK];
1937*3117ece4Schristos }
1938*3117ece4Schristos 
1939*3117ece4Schristos static void playAround(FILE* f,
1940*3117ece4Schristos                        winnerInfo_t* winners,
1941*3117ece4Schristos                        paramValues_t p,
1942*3117ece4Schristos                        const buffers_t buf, const contexts_t ctx)
1943*3117ece4Schristos {
1944*3117ece4Schristos     int nbVariations = 0;
1945*3117ece4Schristos     UTIL_time_t const clockStart = UTIL_getTime();
1946*3117ece4Schristos 
1947*3117ece4Schristos     while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) {
1948*3117ece4Schristos         if (nbVariations++ > g_maxNbVariations) break;
1949*3117ece4Schristos 
1950*3117ece4Schristos         do {
1951*3117ece4Schristos             int i;
1952*3117ece4Schristos             for(i = 0; i < 4; i++) {
1953*3117ece4Schristos                 paramVaryOnce(FUZ_rand(&g_rand) % (strt_ind + 1),
1954*3117ece4Schristos                               ((FUZ_rand(&g_rand) & 1) << 1) - 1,
1955*3117ece4Schristos                               &p);
1956*3117ece4Schristos             }
1957*3117ece4Schristos         } while (!paramValid(p));
1958*3117ece4Schristos 
1959*3117ece4Schristos         /* exclude faster if already played params */
1960*3117ece4Schristos         if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p))-1))
1961*3117ece4Schristos             continue;
1962*3117ece4Schristos 
1963*3117ece4Schristos         /* test */
1964*3117ece4Schristos         {   BYTE* const b = NB_TESTS_PLAYED(p);
1965*3117ece4Schristos             (*b)++;
1966*3117ece4Schristos         }
1967*3117ece4Schristos         if (!BMK_seed(winners, p, buf, ctx)) continue;
1968*3117ece4Schristos 
1969*3117ece4Schristos         /* improvement found => search more */
1970*3117ece4Schristos         BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
1971*3117ece4Schristos         playAround(f, winners, p, buf, ctx);
1972*3117ece4Schristos     }
1973*3117ece4Schristos 
1974*3117ece4Schristos }
1975*3117ece4Schristos 
1976*3117ece4Schristos static void
1977*3117ece4Schristos BMK_selectRandomStart( FILE* f,
1978*3117ece4Schristos                        winnerInfo_t* winners,
1979*3117ece4Schristos                        const buffers_t buf, const contexts_t ctx)
1980*3117ece4Schristos {
1981*3117ece4Schristos     U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1);
1982*3117ece4Schristos     if ((id==0) || (winners[id].params.vals[wlog_ind]==0)) {
1983*3117ece4Schristos         /* use some random entry */
1984*3117ece4Schristos         paramValues_t const p = adjustParams(cParamsToPVals(pvalsToCParams(randomParams())), /* defaults nonCompression parameters */
1985*3117ece4Schristos                                              buf.srcSize, 0);
1986*3117ece4Schristos         playAround(f, winners, p, buf, ctx);
1987*3117ece4Schristos     } else {
1988*3117ece4Schristos         playAround(f, winners, winners[id].params, buf, ctx);
1989*3117ece4Schristos     }
1990*3117ece4Schristos }
1991*3117ece4Schristos 
1992*3117ece4Schristos 
1993*3117ece4Schristos /* BMK_generate_cLevelTable() :
1994*3117ece4Schristos  * test a large number of configurations
1995*3117ece4Schristos  * and distribute them across compression levels according to speed conditions.
1996*3117ece4Schristos  * display and save all intermediate results into rfName = "grillResults.txt".
1997*3117ece4Schristos  * the function automatically stops after g_timeLimit_s.
1998*3117ece4Schristos  * this function cannot error, it directly exit() in case of problem.
1999*3117ece4Schristos  */
2000*3117ece4Schristos static void BMK_generate_cLevelTable(const buffers_t buf, const contexts_t ctx)
2001*3117ece4Schristos {
2002*3117ece4Schristos     paramValues_t params;
2003*3117ece4Schristos     winnerInfo_t winners[NB_LEVELS_TRACKED+1];
2004*3117ece4Schristos     const char* const rfName = "grillResults.txt";
2005*3117ece4Schristos     FILE* const f = fopen(rfName, "w");
2006*3117ece4Schristos 
2007*3117ece4Schristos     /* init */
2008*3117ece4Schristos     assert(g_singleRun==0);
2009*3117ece4Schristos     memset(winners, 0, sizeof(winners));
2010*3117ece4Schristos     if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); }
2011*3117ece4Schristos 
2012*3117ece4Schristos     if (g_target) {
2013*3117ece4Schristos         BMK_init_level_constraints(g_target * MB_UNIT);
2014*3117ece4Schristos     } else {
2015*3117ece4Schristos         /* baseline config for level 1 */
2016*3117ece4Schristos         paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize));
2017*3117ece4Schristos         BMK_benchResult_t testResult;
2018*3117ece4Schristos         BMK_benchParam(&testResult, buf, ctx, l1params);
2019*3117ece4Schristos         BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32));
2020*3117ece4Schristos     }
2021*3117ece4Schristos 
2022*3117ece4Schristos     /* populate initial solution */
2023*3117ece4Schristos     {   const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
2024*3117ece4Schristos         int i;
2025*3117ece4Schristos         for (i=0; i<=maxSeeds; i++) {
2026*3117ece4Schristos             params = cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, 0));
2027*3117ece4Schristos             BMK_seed(winners, params, buf, ctx);
2028*3117ece4Schristos     }   }
2029*3117ece4Schristos     BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
2030*3117ece4Schristos 
2031*3117ece4Schristos     /* start tests */
2032*3117ece4Schristos     {   const UTIL_time_t grillStart = UTIL_getTime();
2033*3117ece4Schristos         do {
2034*3117ece4Schristos             BMK_selectRandomStart(f, winners, buf, ctx);
2035*3117ece4Schristos         } while (BMK_timeSpan_s(grillStart) < g_timeLimit_s);
2036*3117ece4Schristos     }
2037*3117ece4Schristos 
2038*3117ece4Schristos     /* end summary */
2039*3117ece4Schristos     BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
2040*3117ece4Schristos     DISPLAY("grillParams operations completed \n");
2041*3117ece4Schristos 
2042*3117ece4Schristos     /* clean up*/
2043*3117ece4Schristos     fclose(f);
2044*3117ece4Schristos }
2045*3117ece4Schristos 
2046*3117ece4Schristos 
2047*3117ece4Schristos /*-************************************
2048*3117ece4Schristos *  Single Benchmark Functions
2049*3117ece4Schristos **************************************/
2050*3117ece4Schristos 
2051*3117ece4Schristos static int
2052*3117ece4Schristos benchOnce(const buffers_t buf, const contexts_t ctx, const int cLevel)
2053*3117ece4Schristos {
2054*3117ece4Schristos     BMK_benchResult_t testResult;
2055*3117ece4Schristos     g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevel, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize);
2056*3117ece4Schristos 
2057*3117ece4Schristos     if (BMK_benchParam(&testResult, buf, ctx, g_params)) {
2058*3117ece4Schristos         DISPLAY("Error during benchmarking\n");
2059*3117ece4Schristos         return 1;
2060*3117ece4Schristos     }
2061*3117ece4Schristos 
2062*3117ece4Schristos     BMK_printWinner(stdout, CUSTOM_LEVEL, testResult, g_params, buf.srcSize);
2063*3117ece4Schristos 
2064*3117ece4Schristos     return 0;
2065*3117ece4Schristos }
2066*3117ece4Schristos 
2067*3117ece4Schristos static int benchSample(double compressibility, int cLevel)
2068*3117ece4Schristos {
2069*3117ece4Schristos     const char* const name = "Sample 10MB";
2070*3117ece4Schristos     size_t const benchedSize = 10 MB;
2071*3117ece4Schristos     void* const srcBuffer = malloc(benchedSize);
2072*3117ece4Schristos     int ret = 0;
2073*3117ece4Schristos 
2074*3117ece4Schristos     buffers_t buf;
2075*3117ece4Schristos     contexts_t ctx;
2076*3117ece4Schristos 
2077*3117ece4Schristos     if(srcBuffer == NULL) {
2078*3117ece4Schristos         DISPLAY("Out of Memory\n");
2079*3117ece4Schristos         return 2;
2080*3117ece4Schristos     }
2081*3117ece4Schristos 
2082*3117ece4Schristos     RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
2083*3117ece4Schristos 
2084*3117ece4Schristos     if(createBuffersFromMemory(&buf, srcBuffer, 1, &benchedSize)) {
2085*3117ece4Schristos         DISPLAY("Buffer Creation Error\n");
2086*3117ece4Schristos         free(srcBuffer);
2087*3117ece4Schristos         return 3;
2088*3117ece4Schristos     }
2089*3117ece4Schristos 
2090*3117ece4Schristos     if(createContexts(&ctx, NULL)) {
2091*3117ece4Schristos         DISPLAY("Context Creation Error\n");
2092*3117ece4Schristos         freeBuffers(buf);
2093*3117ece4Schristos         return 1;
2094*3117ece4Schristos     }
2095*3117ece4Schristos 
2096*3117ece4Schristos     /* bench */
2097*3117ece4Schristos     DISPLAY("\r%79s\r", "");
2098*3117ece4Schristos     DISPLAY("using %s %i%%: \n", name, (int)(compressibility*100));
2099*3117ece4Schristos 
2100*3117ece4Schristos     if(g_singleRun) {
2101*3117ece4Schristos         ret = benchOnce(buf, ctx, cLevel);
2102*3117ece4Schristos     } else {
2103*3117ece4Schristos         BMK_generate_cLevelTable(buf, ctx);
2104*3117ece4Schristos     }
2105*3117ece4Schristos 
2106*3117ece4Schristos     freeBuffers(buf);
2107*3117ece4Schristos     freeContexts(ctx);
2108*3117ece4Schristos 
2109*3117ece4Schristos     return ret;
2110*3117ece4Schristos }
2111*3117ece4Schristos 
2112*3117ece4Schristos /* benchFiles() :
2113*3117ece4Schristos  * note: while this function takes a table of filenames,
2114*3117ece4Schristos  * in practice, only the first filename will be used */
2115*3117ece4Schristos static int benchFiles(const char** fileNamesTable, int nbFiles,
2116*3117ece4Schristos                       const char* dictFileName, int cLevel)
2117*3117ece4Schristos {
2118*3117ece4Schristos     buffers_t buf;
2119*3117ece4Schristos     contexts_t ctx;
2120*3117ece4Schristos     int ret = 0;
2121*3117ece4Schristos 
2122*3117ece4Schristos     if (createBuffers(&buf, fileNamesTable, nbFiles)) {
2123*3117ece4Schristos         DISPLAY("unable to load files\n");
2124*3117ece4Schristos         return 1;
2125*3117ece4Schristos     }
2126*3117ece4Schristos 
2127*3117ece4Schristos     if (createContexts(&ctx, dictFileName)) {
2128*3117ece4Schristos         DISPLAY("unable to load dictionary\n");
2129*3117ece4Schristos         freeBuffers(buf);
2130*3117ece4Schristos         return 2;
2131*3117ece4Schristos     }
2132*3117ece4Schristos 
2133*3117ece4Schristos     DISPLAY("\r%79s\r", "");
2134*3117ece4Schristos     if (nbFiles == 1) {
2135*3117ece4Schristos         DISPLAY("using %s : \n", fileNamesTable[0]);
2136*3117ece4Schristos     } else {
2137*3117ece4Schristos         DISPLAY("using %d Files : \n", nbFiles);
2138*3117ece4Schristos     }
2139*3117ece4Schristos 
2140*3117ece4Schristos     if (g_singleRun) {
2141*3117ece4Schristos         ret = benchOnce(buf, ctx, cLevel);
2142*3117ece4Schristos     } else {
2143*3117ece4Schristos         BMK_generate_cLevelTable(buf, ctx);
2144*3117ece4Schristos     }
2145*3117ece4Schristos 
2146*3117ece4Schristos     freeBuffers(buf);
2147*3117ece4Schristos     freeContexts(ctx);
2148*3117ece4Schristos     return ret;
2149*3117ece4Schristos }
2150*3117ece4Schristos 
2151*3117ece4Schristos 
2152*3117ece4Schristos /*-************************************
2153*3117ece4Schristos *  Local Optimization Functions
2154*3117ece4Schristos **************************************/
2155*3117ece4Schristos 
2156*3117ece4Schristos /* One iteration of hill climbing. Specifically, it first tries all
2157*3117ece4Schristos  * valid parameter configurations w/ manhattan distance 1 and picks the best one
2158*3117ece4Schristos  * failing that, it progressively tries candidates further and further away (up to #dim + 2)
2159*3117ece4Schristos  * if it finds a candidate exceeding winnerInfo, it will repeat. Otherwise, it will stop the
2160*3117ece4Schristos  * current stage of hill climbing.
2161*3117ece4Schristos  * Each iteration of hill climbing proceeds in 2 'phases'. Phase 1 climbs according to
2162*3117ece4Schristos  * the resultScore function, which is effectively a linear increase in reward until it reaches
2163*3117ece4Schristos  * the constraint-satisfying value, it which point any excess results in only logarithmic reward.
2164*3117ece4Schristos  * This aims to find some constraint-satisfying point.
2165*3117ece4Schristos  * Phase 2 optimizes in accordance with what the original function sets out to maximize, with
2166*3117ece4Schristos  * all feasible solutions valued over all infeasible solutions.
2167*3117ece4Schristos  */
2168*3117ece4Schristos 
2169*3117ece4Schristos /* sanitize all params here.
2170*3117ece4Schristos  * all generation after random should be sanitized. (maybe sanitize random)
2171*3117ece4Schristos  */
2172*3117ece4Schristos static winnerInfo_t climbOnce(const constraint_t target,
2173*3117ece4Schristos                 memoTable_t* mtAll,
2174*3117ece4Schristos                 const buffers_t buf, const contexts_t ctx,
2175*3117ece4Schristos                 const paramValues_t init)
2176*3117ece4Schristos {
2177*3117ece4Schristos     /*
2178*3117ece4Schristos      * cparam - currently considered 'center'
2179*3117ece4Schristos      * candidate - params to benchmark/results
2180*3117ece4Schristos      * winner - best option found so far.
2181*3117ece4Schristos      */
2182*3117ece4Schristos     paramValues_t cparam = init;
2183*3117ece4Schristos     winnerInfo_t candidateInfo, winnerInfo;
2184*3117ece4Schristos     int better = 1;
2185*3117ece4Schristos     int feas = 0;
2186*3117ece4Schristos 
2187*3117ece4Schristos     winnerInfo = initWinnerInfo(init);
2188*3117ece4Schristos     candidateInfo = winnerInfo;
2189*3117ece4Schristos 
2190*3117ece4Schristos     {   winnerInfo_t bestFeasible1 = initWinnerInfo(cparam);
2191*3117ece4Schristos         DEBUGOUTPUT("Climb Part 1\n");
2192*3117ece4Schristos         while(better) {
2193*3117ece4Schristos             int offset;
2194*3117ece4Schristos             size_t i, dist;
2195*3117ece4Schristos             const size_t varLen = mtAll[cparam.vals[strt_ind]].varLen;
2196*3117ece4Schristos             better = 0;
2197*3117ece4Schristos             DEBUGOUTPUT("Start\n");
2198*3117ece4Schristos             cparam = winnerInfo.params;
2199*3117ece4Schristos             candidateInfo.params = cparam;
2200*3117ece4Schristos              /* all dist-1 candidates */
2201*3117ece4Schristos             for (i = 0; i < varLen; i++) {
2202*3117ece4Schristos                 for (offset = -1; offset <= 1; offset += 2) {
2203*3117ece4Schristos                     CHECKTIME(winnerInfo);
2204*3117ece4Schristos                     candidateInfo.params = cparam;
2205*3117ece4Schristos                     paramVaryOnce(mtAll[cparam.vals[strt_ind]].varArray[i],
2206*3117ece4Schristos                                   offset,
2207*3117ece4Schristos                                   &candidateInfo.params);
2208*3117ece4Schristos 
2209*3117ece4Schristos                     if(paramValid(candidateInfo.params)) {
2210*3117ece4Schristos                         int res;
2211*3117ece4Schristos                         res = benchMemo(&candidateInfo.result, buf, ctx,
2212*3117ece4Schristos                             sanitizeParams(candidateInfo.params), target, &winnerInfo.result, mtAll, feas);
2213*3117ece4Schristos                         DEBUGOUTPUT("Res: %d\n", res);
2214*3117ece4Schristos                         if(res == BETTER_RESULT) { /* synonymous with better when called w/ infeasibleBM */
2215*3117ece4Schristos                             winnerInfo = candidateInfo;
2216*3117ece4Schristos                             better = 1;
2217*3117ece4Schristos                             if(compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) {
2218*3117ece4Schristos                                 bestFeasible1 = winnerInfo;
2219*3117ece4Schristos                             }
2220*3117ece4Schristos                         }
2221*3117ece4Schristos                     }
2222*3117ece4Schristos                 }  /* for (offset = -1; offset <= 1; offset += 2) */
2223*3117ece4Schristos             }   /* for (i = 0; i < varLen; i++) */
2224*3117ece4Schristos 
2225*3117ece4Schristos             if(better) {
2226*3117ece4Schristos                 continue;
2227*3117ece4Schristos             }
2228*3117ece4Schristos 
2229*3117ece4Schristos             for (dist = 2; dist < varLen + 2; dist++) { /* varLen is # dimensions */
2230*3117ece4Schristos                 for (i = 0; i < (1ULL << varLen) / varLen + 2; i++) {
2231*3117ece4Schristos                     int res;
2232*3117ece4Schristos                     CHECKTIME(winnerInfo);
2233*3117ece4Schristos                     candidateInfo.params = cparam;
2234*3117ece4Schristos                     /* param error checking already done here */
2235*3117ece4Schristos                     paramVariation(&candidateInfo.params, mtAll, (U32)dist);
2236*3117ece4Schristos 
2237*3117ece4Schristos                     res = benchMemo(&candidateInfo.result,
2238*3117ece4Schristos                                 buf, ctx,
2239*3117ece4Schristos                                 sanitizeParams(candidateInfo.params), target,
2240*3117ece4Schristos                                 &winnerInfo.result, mtAll, feas);
2241*3117ece4Schristos                     DEBUGOUTPUT("Res: %d\n", res);
2242*3117ece4Schristos                     if (res == BETTER_RESULT) { /* synonymous with better in this case*/
2243*3117ece4Schristos                         winnerInfo = candidateInfo;
2244*3117ece4Schristos                         better = 1;
2245*3117ece4Schristos                         if (compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) {
2246*3117ece4Schristos                             bestFeasible1 = winnerInfo;
2247*3117ece4Schristos                         }
2248*3117ece4Schristos                         break;
2249*3117ece4Schristos                     }
2250*3117ece4Schristos                 }
2251*3117ece4Schristos 
2252*3117ece4Schristos                 if (better) {
2253*3117ece4Schristos                     break;
2254*3117ece4Schristos                 }
2255*3117ece4Schristos             }   /* for(dist = 2; dist < varLen + 2; dist++) */
2256*3117ece4Schristos 
2257*3117ece4Schristos             if (!better) { /* infeas -> feas -> stop */
2258*3117ece4Schristos                 if (feas) return winnerInfo;
2259*3117ece4Schristos                 feas = 1;
2260*3117ece4Schristos                 better = 1;
2261*3117ece4Schristos                 winnerInfo = bestFeasible1; /* note with change, bestFeasible may not necessarily be feasible, but if one has been benchmarked, it will be. */
2262*3117ece4Schristos                 DEBUGOUTPUT("Climb Part 2\n");
2263*3117ece4Schristos             }
2264*3117ece4Schristos         }
2265*3117ece4Schristos         winnerInfo = bestFeasible1;
2266*3117ece4Schristos     }
2267*3117ece4Schristos 
2268*3117ece4Schristos     return winnerInfo;
2269*3117ece4Schristos }
2270*3117ece4Schristos 
2271*3117ece4Schristos /* Optimizes for a fixed strategy */
2272*3117ece4Schristos 
2273*3117ece4Schristos /* flexible parameters: iterations of failed climbing (or if we do non-random, maybe this is when everything is close to visited)
2274*3117ece4Schristos    weight more on visit for bad results, less on good results/more on later results / ones with more failures.
2275*3117ece4Schristos    allocate memoTable here.
2276*3117ece4Schristos  */
2277*3117ece4Schristos static winnerInfo_t
2278*3117ece4Schristos optimizeFixedStrategy(const buffers_t buf, const contexts_t ctx,
2279*3117ece4Schristos                       const constraint_t target, paramValues_t paramTarget,
2280*3117ece4Schristos                       const ZSTD_strategy strat,
2281*3117ece4Schristos                       memoTable_t* memoTableArray, const int tries)
2282*3117ece4Schristos {
2283*3117ece4Schristos     int i = 0;
2284*3117ece4Schristos 
2285*3117ece4Schristos     paramValues_t init;
2286*3117ece4Schristos     winnerInfo_t winnerInfo, candidateInfo;
2287*3117ece4Schristos     winnerInfo = initWinnerInfo(emptyParams());
2288*3117ece4Schristos     /* so climb is given the right fixed strategy */
2289*3117ece4Schristos     paramTarget.vals[strt_ind] = strat;
2290*3117ece4Schristos     /* to pass ZSTD_checkCParams */
2291*3117ece4Schristos     paramTarget = cParamUnsetMin(paramTarget);
2292*3117ece4Schristos 
2293*3117ece4Schristos     init = paramTarget;
2294*3117ece4Schristos 
2295*3117ece4Schristos     for(i = 0; i < tries; i++) {
2296*3117ece4Schristos         DEBUGOUTPUT("Restart\n");
2297*3117ece4Schristos         do {
2298*3117ece4Schristos             randomConstrainedParams(&init, memoTableArray, strat);
2299*3117ece4Schristos         } while(redundantParams(init, target, buf.maxBlockSize));
2300*3117ece4Schristos         candidateInfo = climbOnce(target, memoTableArray, buf, ctx, init);
2301*3117ece4Schristos         if (compareResultLT(winnerInfo.result, candidateInfo.result, target, buf.srcSize)) {
2302*3117ece4Schristos             winnerInfo = candidateInfo;
2303*3117ece4Schristos             BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, target, buf.srcSize);
2304*3117ece4Schristos             i = 0;
2305*3117ece4Schristos             continue;
2306*3117ece4Schristos         }
2307*3117ece4Schristos         CHECKTIME(winnerInfo);
2308*3117ece4Schristos         i++;
2309*3117ece4Schristos     }
2310*3117ece4Schristos     return winnerInfo;
2311*3117ece4Schristos }
2312*3117ece4Schristos 
2313*3117ece4Schristos /* goes best, best-1, best+1, best-2, ... */
2314*3117ece4Schristos /* return 0 if nothing remaining */
2315*3117ece4Schristos static int nextStrategy(const int currentStrategy, const int bestStrategy)
2316*3117ece4Schristos {
2317*3117ece4Schristos     if(bestStrategy <= currentStrategy) {
2318*3117ece4Schristos         int candidate = 2 * bestStrategy - currentStrategy - 1;
2319*3117ece4Schristos         if(candidate < 1) {
2320*3117ece4Schristos             candidate = currentStrategy + 1;
2321*3117ece4Schristos             if(candidate > (int)ZSTD_STRATEGY_MAX) {
2322*3117ece4Schristos                 return 0;
2323*3117ece4Schristos             } else {
2324*3117ece4Schristos                 return candidate;
2325*3117ece4Schristos             }
2326*3117ece4Schristos         } else {
2327*3117ece4Schristos             return candidate;
2328*3117ece4Schristos         }
2329*3117ece4Schristos     } else { /* bestStrategy >= currentStrategy */
2330*3117ece4Schristos         int candidate = 2 * bestStrategy - currentStrategy;
2331*3117ece4Schristos         if(candidate > (int)ZSTD_STRATEGY_MAX) {
2332*3117ece4Schristos             candidate = currentStrategy - 1;
2333*3117ece4Schristos             if(candidate < 1) {
2334*3117ece4Schristos                 return 0;
2335*3117ece4Schristos             } else {
2336*3117ece4Schristos                 return candidate;
2337*3117ece4Schristos             }
2338*3117ece4Schristos         } else {
2339*3117ece4Schristos             return candidate;
2340*3117ece4Schristos         }
2341*3117ece4Schristos     }
2342*3117ece4Schristos }
2343*3117ece4Schristos 
2344*3117ece4Schristos /* experiment with playing with this and decay value */
2345*3117ece4Schristos 
2346*3117ece4Schristos /* main fn called when using --optimize */
2347*3117ece4Schristos /* Does strategy selection by benchmarking default compression levels
2348*3117ece4Schristos  * then optimizes by strategy, starting with the best one and moving
2349*3117ece4Schristos  * progressively moving further away by number
2350*3117ece4Schristos  * args:
2351*3117ece4Schristos  * fileNamesTable - list of files to benchmark
2352*3117ece4Schristos  * nbFiles - length of fileNamesTable
2353*3117ece4Schristos  * dictFileName - name of dictionary file if one, else NULL
2354*3117ece4Schristos  * target - performance constraints (cSpeed, dSpeed, cMem)
2355*3117ece4Schristos  * paramTarget - parameter constraints (i.e. restriction search space to where strategy = ZSTD_fast)
2356*3117ece4Schristos  * cLevel - compression level to exceed (all solutions must be > lvl in cSpeed + ratio)
2357*3117ece4Schristos  */
2358*3117ece4Schristos 
2359*3117ece4Schristos static unsigned g_maxTries = 5;
2360*3117ece4Schristos #define TRY_DECAY 1
2361*3117ece4Schristos 
2362*3117ece4Schristos static int
2363*3117ece4Schristos optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles,
2364*3117ece4Schristos                 const char* dictFileName,
2365*3117ece4Schristos                 constraint_t target, paramValues_t paramTarget,
2366*3117ece4Schristos                 const int cLevelOpt, const int cLevelRun,
2367*3117ece4Schristos                 const U32 memoTableLog)
2368*3117ece4Schristos {
2369*3117ece4Schristos     varInds_t varArray [NUM_PARAMS];
2370*3117ece4Schristos     int ret = 0;
2371*3117ece4Schristos     const size_t varLen = variableParams(paramTarget, varArray, dictFileName != NULL);
2372*3117ece4Schristos     winnerInfo_t winner = initWinnerInfo(emptyParams());
2373*3117ece4Schristos     memoTable_t* allMT = NULL;
2374*3117ece4Schristos     paramValues_t paramBase;
2375*3117ece4Schristos     contexts_t ctx;
2376*3117ece4Schristos     buffers_t buf;
2377*3117ece4Schristos     g_time = UTIL_getTime();
2378*3117ece4Schristos 
2379*3117ece4Schristos     if (createBuffers(&buf, fileNamesTable, nbFiles)) {
2380*3117ece4Schristos         DISPLAY("unable to load files\n");
2381*3117ece4Schristos         return 1;
2382*3117ece4Schristos     }
2383*3117ece4Schristos 
2384*3117ece4Schristos     if (createContexts(&ctx, dictFileName)) {
2385*3117ece4Schristos         DISPLAY("unable to load dictionary\n");
2386*3117ece4Schristos         freeBuffers(buf);
2387*3117ece4Schristos         return 2;
2388*3117ece4Schristos     }
2389*3117ece4Schristos 
2390*3117ece4Schristos     if (nbFiles == 1) {
2391*3117ece4Schristos         DISPLAYLEVEL(2, "Loading %s...       \r", fileNamesTable[0]);
2392*3117ece4Schristos     } else {
2393*3117ece4Schristos         DISPLAYLEVEL(2, "Loading %lu Files...       \r", (unsigned long)nbFiles);
2394*3117ece4Schristos     }
2395*3117ece4Schristos 
2396*3117ece4Schristos     /* sanitize paramTarget */
2397*3117ece4Schristos     optimizerAdjustInput(&paramTarget, buf.maxBlockSize);
2398*3117ece4Schristos     paramBase = cParamUnsetMin(paramTarget);
2399*3117ece4Schristos 
2400*3117ece4Schristos     allMT = createMemoTableArray(paramTarget, varArray, varLen, memoTableLog);
2401*3117ece4Schristos 
2402*3117ece4Schristos     if (!allMT) {
2403*3117ece4Schristos         DISPLAY("MemoTable Init Error\n");
2404*3117ece4Schristos         ret = 2;
2405*3117ece4Schristos         goto _cleanUp;
2406*3117ece4Schristos     }
2407*3117ece4Schristos 
2408*3117ece4Schristos     /* default strictnesses */
2409*3117ece4Schristos     if (g_strictness == PARAM_UNSET) {
2410*3117ece4Schristos         if(g_optmode) {
2411*3117ece4Schristos             g_strictness = 100;
2412*3117ece4Schristos         } else {
2413*3117ece4Schristos             g_strictness = 90;
2414*3117ece4Schristos         }
2415*3117ece4Schristos     } else {
2416*3117ece4Schristos         if(0 >= g_strictness || g_strictness > 100) {
2417*3117ece4Schristos             DISPLAY("Strictness Outside of Bounds\n");
2418*3117ece4Schristos             ret = 4;
2419*3117ece4Schristos             goto _cleanUp;
2420*3117ece4Schristos         }
2421*3117ece4Schristos     }
2422*3117ece4Schristos 
2423*3117ece4Schristos     /* use level'ing mode instead of normal target mode */
2424*3117ece4Schristos     if (g_optmode) {
2425*3117ece4Schristos         winner.params = cParamsToPVals(ZSTD_getCParams(cLevelOpt, buf.maxBlockSize, ctx.dictSize));
2426*3117ece4Schristos         if(BMK_benchParam(&winner.result, buf, ctx, winner.params)) {
2427*3117ece4Schristos             ret = 3;
2428*3117ece4Schristos             goto _cleanUp;
2429*3117ece4Schristos         }
2430*3117ece4Schristos 
2431*3117ece4Schristos         g_lvltarget = winner.result;
2432*3117ece4Schristos         g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100;
2433*3117ece4Schristos         g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100;
2434*3117ece4Schristos         g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness;
2435*3117ece4Schristos 
2436*3117ece4Schristos         target.cSpeed = (U32)g_lvltarget.cSpeed;
2437*3117ece4Schristos         target.dSpeed = (U32)g_lvltarget.dSpeed;
2438*3117ece4Schristos 
2439*3117ece4Schristos         BMK_printWinnerOpt(stdout, cLevelOpt, winner.result, winner.params, target, buf.srcSize);
2440*3117ece4Schristos     }
2441*3117ece4Schristos 
2442*3117ece4Schristos     /* Don't want it to return anything worse than the best known result */
2443*3117ece4Schristos     if (g_singleRun) {
2444*3117ece4Schristos         BMK_benchResult_t res;
2445*3117ece4Schristos         g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevelRun, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize);
2446*3117ece4Schristos         if (BMK_benchParam(&res, buf, ctx, g_params)) {
2447*3117ece4Schristos             ret = 45;
2448*3117ece4Schristos             goto _cleanUp;
2449*3117ece4Schristos         }
2450*3117ece4Schristos         if(compareResultLT(winner.result, res, relaxTarget(target), buf.srcSize)) {
2451*3117ece4Schristos             winner.result = res;
2452*3117ece4Schristos             winner.params = g_params;
2453*3117ece4Schristos         }
2454*3117ece4Schristos     }
2455*3117ece4Schristos 
2456*3117ece4Schristos     /* bench */
2457*3117ece4Schristos     DISPLAYLEVEL(2, "\r%79s\r", "");
2458*3117ece4Schristos     if(nbFiles == 1) {
2459*3117ece4Schristos         DISPLAYLEVEL(2, "optimizing for %s", fileNamesTable[0]);
2460*3117ece4Schristos     } else {
2461*3117ece4Schristos         DISPLAYLEVEL(2, "optimizing for %lu Files", (unsigned long)nbFiles);
2462*3117ece4Schristos     }
2463*3117ece4Schristos 
2464*3117ece4Schristos     if(target.cSpeed != 0) { DISPLAYLEVEL(2," - limit compression speed %u MB/s", (unsigned)(target.cSpeed >> 20)); }
2465*3117ece4Schristos     if(target.dSpeed != 0) { DISPLAYLEVEL(2, " - limit decompression speed %u MB/s", (unsigned)(target.dSpeed >> 20)); }
2466*3117ece4Schristos     if(target.cMem != (U32)-1) { DISPLAYLEVEL(2, " - limit memory %u MB", (unsigned)(target.cMem >> 20)); }
2467*3117ece4Schristos 
2468*3117ece4Schristos     DISPLAYLEVEL(2, "\n");
2469*3117ece4Schristos     init_clockGranularity();
2470*3117ece4Schristos 
2471*3117ece4Schristos     {   paramValues_t CParams;
2472*3117ece4Schristos 
2473*3117ece4Schristos         /* find best solution from default params */
2474*3117ece4Schristos         {   const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
2475*3117ece4Schristos             DEBUGOUTPUT("Strategy Selection\n");
2476*3117ece4Schristos             if (paramTarget.vals[strt_ind] == PARAM_UNSET) {
2477*3117ece4Schristos                 BMK_benchResult_t candidate;
2478*3117ece4Schristos                 int i;
2479*3117ece4Schristos                 for (i=1; i<=maxSeeds; i++) {
2480*3117ece4Schristos                     int ec;
2481*3117ece4Schristos                     CParams = overwriteParams(cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, ctx.dictSize)), paramTarget);
2482*3117ece4Schristos                     ec = BMK_benchParam(&candidate, buf, ctx, CParams);
2483*3117ece4Schristos                     BMK_printWinnerOpt(stdout, i, candidate, CParams, target, buf.srcSize);
2484*3117ece4Schristos 
2485*3117ece4Schristos                     if(!ec && compareResultLT(winner.result, candidate, relaxTarget(target), buf.srcSize)) {
2486*3117ece4Schristos                         winner.result = candidate;
2487*3117ece4Schristos                         winner.params = CParams;
2488*3117ece4Schristos                     }
2489*3117ece4Schristos 
2490*3117ece4Schristos                     CHECKTIMEGT(ret, 0, _displayCleanUp); /* if pass time limit, stop */
2491*3117ece4Schristos                     /* if the current params are too slow, just stop. */
2492*3117ece4Schristos                     if(target.cSpeed > candidate.cSpeed * 3 / 2) { break; }
2493*3117ece4Schristos                 }
2494*3117ece4Schristos 
2495*3117ece4Schristos                 BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winner.result, winner.params, target, buf.srcSize);
2496*3117ece4Schristos             }
2497*3117ece4Schristos         }
2498*3117ece4Schristos 
2499*3117ece4Schristos         DEBUGOUTPUT("Real Opt\n");
2500*3117ece4Schristos         /* start 'real' optimization */
2501*3117ece4Schristos         {   int bestStrategy = (int)winner.params.vals[strt_ind];
2502*3117ece4Schristos             if (paramTarget.vals[strt_ind] == PARAM_UNSET) {
2503*3117ece4Schristos                 int st = bestStrategy;
2504*3117ece4Schristos                 int tries = g_maxTries;
2505*3117ece4Schristos 
2506*3117ece4Schristos                 /* one iterations of hill climbing with the level-defined parameters. */
2507*3117ece4Schristos                 {   winnerInfo_t const w1 = climbOnce(target, allMT, buf, ctx, winner.params);
2508*3117ece4Schristos                     if (compareResultLT(winner.result, w1.result, target, buf.srcSize)) {
2509*3117ece4Schristos                         winner = w1;
2510*3117ece4Schristos                     }
2511*3117ece4Schristos                     CHECKTIMEGT(ret, 0, _displayCleanUp);
2512*3117ece4Schristos                 }
2513*3117ece4Schristos 
2514*3117ece4Schristos                 while(st && tries > 0) {
2515*3117ece4Schristos                     winnerInfo_t wc;
2516*3117ece4Schristos                     DEBUGOUTPUT("StrategySwitch: %s\n", g_stratName[st]);
2517*3117ece4Schristos 
2518*3117ece4Schristos                     wc = optimizeFixedStrategy(buf, ctx, target, paramBase, st, allMT, tries);
2519*3117ece4Schristos 
2520*3117ece4Schristos                     if(compareResultLT(winner.result, wc.result, target, buf.srcSize)) {
2521*3117ece4Schristos                         winner = wc;
2522*3117ece4Schristos                         tries = g_maxTries;
2523*3117ece4Schristos                         bestStrategy = st;
2524*3117ece4Schristos                     } else {
2525*3117ece4Schristos                         st = nextStrategy(st, bestStrategy);
2526*3117ece4Schristos                         tries -= TRY_DECAY;
2527*3117ece4Schristos                     }
2528*3117ece4Schristos                     CHECKTIMEGT(ret, 0, _displayCleanUp);
2529*3117ece4Schristos                 }
2530*3117ece4Schristos             } else {
2531*3117ece4Schristos                 winner = optimizeFixedStrategy(buf, ctx, target, paramBase, paramTarget.vals[strt_ind], allMT, g_maxTries);
2532*3117ece4Schristos             }
2533*3117ece4Schristos 
2534*3117ece4Schristos         }
2535*3117ece4Schristos 
2536*3117ece4Schristos         /* no solution found */
2537*3117ece4Schristos         if(winner.result.cSize == (size_t)-1) {
2538*3117ece4Schristos             ret = 1;
2539*3117ece4Schristos             DISPLAY("No feasible solution found\n");
2540*3117ece4Schristos             goto _cleanUp;
2541*3117ece4Schristos         }
2542*3117ece4Schristos 
2543*3117ece4Schristos         /* end summary */
2544*3117ece4Schristos _displayCleanUp:
2545*3117ece4Schristos         if (g_displayLevel >= 0) {
2546*3117ece4Schristos             BMK_displayOneResult(stdout, winner, buf.srcSize);
2547*3117ece4Schristos         }
2548*3117ece4Schristos         BMK_paramValues_into_commandLine(stdout, winner.params);
2549*3117ece4Schristos         DISPLAYLEVEL(1, "grillParams size - optimizer completed \n");
2550*3117ece4Schristos     }
2551*3117ece4Schristos 
2552*3117ece4Schristos _cleanUp:
2553*3117ece4Schristos     freeContexts(ctx);
2554*3117ece4Schristos     freeBuffers(buf);
2555*3117ece4Schristos     freeMemoTableArray(allMT);
2556*3117ece4Schristos     return ret;
2557*3117ece4Schristos }
2558*3117ece4Schristos 
2559*3117ece4Schristos /*-************************************
2560*3117ece4Schristos *  CLI parsing functions
2561*3117ece4Schristos **************************************/
2562*3117ece4Schristos 
2563*3117ece4Schristos /** longCommandWArg() :
2564*3117ece4Schristos  *  check if *stringPtr is the same as longCommand.
2565*3117ece4Schristos  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
2566*3117ece4Schristos  * @return 0 and doesn't modify *stringPtr otherwise.
2567*3117ece4Schristos  * from zstdcli.c
2568*3117ece4Schristos  */
2569*3117ece4Schristos static int longCommandWArg(const char** stringPtr, const char* longCommand)
2570*3117ece4Schristos {
2571*3117ece4Schristos     size_t const comSize = strlen(longCommand);
2572*3117ece4Schristos     int const result = !strncmp(*stringPtr, longCommand, comSize);
2573*3117ece4Schristos     if (result) *stringPtr += comSize;
2574*3117ece4Schristos     return result;
2575*3117ece4Schristos }
2576*3117ece4Schristos 
2577*3117ece4Schristos static void errorOut(const char* msg)
2578*3117ece4Schristos {
2579*3117ece4Schristos     DISPLAY("%s \n", msg); exit(1);
2580*3117ece4Schristos }
2581*3117ece4Schristos 
2582*3117ece4Schristos /*! readU32FromChar() :
2583*3117ece4Schristos  * @return : unsigned integer value read from input in `char` format.
2584*3117ece4Schristos  *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
2585*3117ece4Schristos  *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
2586*3117ece4Schristos  *  Note : function will exit() program if digit sequence overflows */
2587*3117ece4Schristos static unsigned readU32FromChar(const char** stringPtr)
2588*3117ece4Schristos {
2589*3117ece4Schristos     const char errorMsg[] = "error: numeric value too large";
2590*3117ece4Schristos     unsigned sign = 1;
2591*3117ece4Schristos     unsigned result = 0;
2592*3117ece4Schristos     if(**stringPtr == '-') { sign = (unsigned)-1; (*stringPtr)++; }
2593*3117ece4Schristos     while ((**stringPtr >='0') && (**stringPtr <='9')) {
2594*3117ece4Schristos         unsigned const max = (((unsigned)(-1)) / 10) - 1;
2595*3117ece4Schristos         if (result > max) errorOut(errorMsg);
2596*3117ece4Schristos         result *= 10;
2597*3117ece4Schristos         assert(**stringPtr >= '0');
2598*3117ece4Schristos         result += (unsigned)(**stringPtr - '0');
2599*3117ece4Schristos         (*stringPtr)++ ;
2600*3117ece4Schristos     }
2601*3117ece4Schristos     if ((**stringPtr=='K') || (**stringPtr=='M')) {
2602*3117ece4Schristos         unsigned const maxK = ((unsigned)(-1)) >> 10;
2603*3117ece4Schristos         if (result > maxK) errorOut(errorMsg);
2604*3117ece4Schristos         result <<= 10;
2605*3117ece4Schristos         if (**stringPtr=='M') {
2606*3117ece4Schristos             if (result > maxK) errorOut(errorMsg);
2607*3117ece4Schristos             result <<= 10;
2608*3117ece4Schristos         }
2609*3117ece4Schristos         (*stringPtr)++;  /* skip `K` or `M` */
2610*3117ece4Schristos         if (**stringPtr=='i') (*stringPtr)++;
2611*3117ece4Schristos         if (**stringPtr=='B') (*stringPtr)++;
2612*3117ece4Schristos     }
2613*3117ece4Schristos     return result * sign;
2614*3117ece4Schristos }
2615*3117ece4Schristos 
2616*3117ece4Schristos static double readDoubleFromChar(const char** stringPtr)
2617*3117ece4Schristos {
2618*3117ece4Schristos     double result = 0, divide = 10;
2619*3117ece4Schristos     while ((**stringPtr >='0') && (**stringPtr <='9')) {
2620*3117ece4Schristos         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
2621*3117ece4Schristos     }
2622*3117ece4Schristos     if(**stringPtr!='.') {
2623*3117ece4Schristos         return result;
2624*3117ece4Schristos     }
2625*3117ece4Schristos     (*stringPtr)++;
2626*3117ece4Schristos     while ((**stringPtr >='0') && (**stringPtr <='9')) {
2627*3117ece4Schristos         result += (double)(**stringPtr - '0') / divide, divide *= 10, (*stringPtr)++ ;
2628*3117ece4Schristos     }
2629*3117ece4Schristos     return result;
2630*3117ece4Schristos }
2631*3117ece4Schristos 
2632*3117ece4Schristos static int usage(const char* exename)
2633*3117ece4Schristos {
2634*3117ece4Schristos     DISPLAY( "Usage :\n");
2635*3117ece4Schristos     DISPLAY( "      %s [arg] file\n", exename);
2636*3117ece4Schristos     DISPLAY( "Arguments :\n");
2637*3117ece4Schristos     DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n");
2638*3117ece4Schristos     DISPLAY( " -H/-h  : Help (this text + advanced options)\n");
2639*3117ece4Schristos     return 0;
2640*3117ece4Schristos }
2641*3117ece4Schristos 
2642*3117ece4Schristos static int usage_advanced(void)
2643*3117ece4Schristos {
2644*3117ece4Schristos     DISPLAY( "\nAdvanced options :\n");
2645*3117ece4Schristos     DISPLAY( " -T#          : set level 1 speed objective \n");
2646*3117ece4Schristos     DISPLAY( " -B#          : cut input into blocks of size # (default : single block) \n");
2647*3117ece4Schristos     DISPLAY( " --optimize=  : same as -O with more verbose syntax (see README.md)\n");
2648*3117ece4Schristos     DISPLAY( " -S           : Single run \n");
2649*3117ece4Schristos     DISPLAY( " --zstd       : Single run, parameter selection same as zstdcli \n");
2650*3117ece4Schristos     DISPLAY( " -P#          : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100);
2651*3117ece4Schristos     DISPLAY( " -t#          : Caps runtime of operation in seconds (default : %u seconds (%.1f hours)) \n",
2652*3117ece4Schristos                                 (unsigned)g_timeLimit_s, (double)g_timeLimit_s / 3600);
2653*3117ece4Schristos     DISPLAY( " -v           : Prints Benchmarking output\n");
2654*3117ece4Schristos     DISPLAY( " -D           : Next argument dictionary file\n");
2655*3117ece4Schristos     DISPLAY( " -s           : Separate Files\n");
2656*3117ece4Schristos     return 0;
2657*3117ece4Schristos }
2658*3117ece4Schristos 
2659*3117ece4Schristos static int badusage(const char* exename)
2660*3117ece4Schristos {
2661*3117ece4Schristos     DISPLAY("Wrong parameters\n");
2662*3117ece4Schristos     usage(exename);
2663*3117ece4Schristos     return 1;
2664*3117ece4Schristos }
2665*3117ece4Schristos 
2666*3117ece4Schristos #define PARSE_SUB_ARGS(stringLong, stringShort, variable) { \
2667*3117ece4Schristos     if ( longCommandWArg(&argument, stringLong)             \
2668*3117ece4Schristos       || longCommandWArg(&argument, stringShort) ) {        \
2669*3117ece4Schristos           variable = readU32FromChar(&argument);            \
2670*3117ece4Schristos           if (argument[0]==',') {                           \
2671*3117ece4Schristos               argument++; continue;                         \
2672*3117ece4Schristos           } else break;                                     \
2673*3117ece4Schristos }   }
2674*3117ece4Schristos 
2675*3117ece4Schristos /* 1 if successful parse, 0 otherwise */
2676*3117ece4Schristos static int parse_params(const char** argptr, paramValues_t* pv) {
2677*3117ece4Schristos     int matched = 0;
2678*3117ece4Schristos     const char* argOrig = *argptr;
2679*3117ece4Schristos     varInds_t v;
2680*3117ece4Schristos     for(v = 0; v < NUM_PARAMS; v++) {
2681*3117ece4Schristos         if ( longCommandWArg(argptr,g_shortParamNames[v])
2682*3117ece4Schristos           || longCommandWArg(argptr, g_paramNames[v]) ) {
2683*3117ece4Schristos             if(**argptr == '=') {
2684*3117ece4Schristos                 (*argptr)++;
2685*3117ece4Schristos                 pv->vals[v] = readU32FromChar(argptr);
2686*3117ece4Schristos                 matched = 1;
2687*3117ece4Schristos                 break;
2688*3117ece4Schristos             }
2689*3117ece4Schristos         }
2690*3117ece4Schristos         /* reset and try again */
2691*3117ece4Schristos         *argptr = argOrig;
2692*3117ece4Schristos     }
2693*3117ece4Schristos     return matched;
2694*3117ece4Schristos }
2695*3117ece4Schristos 
2696*3117ece4Schristos /*-************************************
2697*3117ece4Schristos *  Main
2698*3117ece4Schristos **************************************/
2699*3117ece4Schristos 
2700*3117ece4Schristos int main(int argc, const char** argv)
2701*3117ece4Schristos {
2702*3117ece4Schristos     int i,
2703*3117ece4Schristos         filenamesStart=0,
2704*3117ece4Schristos         result;
2705*3117ece4Schristos     const char* exename=argv[0];
2706*3117ece4Schristos     const char* input_filename = NULL;
2707*3117ece4Schristos     const char* dictFileName = NULL;
2708*3117ece4Schristos     U32 main_pause = 0;
2709*3117ece4Schristos     int cLevelOpt = 0, cLevelRun = 0;
2710*3117ece4Schristos     int separateFiles = 0;
2711*3117ece4Schristos     double compressibility = COMPRESSIBILITY_DEFAULT;
2712*3117ece4Schristos     U32 memoTableLog = PARAM_UNSET;
2713*3117ece4Schristos     constraint_t target = { 0, 0, (U32)-1 };
2714*3117ece4Schristos 
2715*3117ece4Schristos     paramValues_t paramTarget = emptyParams();
2716*3117ece4Schristos     g_params = emptyParams();
2717*3117ece4Schristos 
2718*3117ece4Schristos     assert(argc>=1);   /* for exename */
2719*3117ece4Schristos 
2720*3117ece4Schristos     for(i=1; i<argc; i++) {
2721*3117ece4Schristos         const char* argument = argv[i];
2722*3117ece4Schristos         DEBUGOUTPUT("%d: %s\n", i, argument);
2723*3117ece4Schristos         assert(argument != NULL);
2724*3117ece4Schristos 
2725*3117ece4Schristos         if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; }
2726*3117ece4Schristos 
2727*3117ece4Schristos         if (longCommandWArg(&argument, "--optimize=")) {
2728*3117ece4Schristos             g_optimizer = 1;
2729*3117ece4Schristos             for ( ; ;) {
2730*3117ece4Schristos                 if(parse_params(&argument, &paramTarget)) { if(argument[0] == ',') { argument++; continue; } else break; }
2731*3117ece4Schristos                 PARSE_SUB_ARGS("compressionSpeed=" ,  "cSpeed=", target.cSpeed);
2732*3117ece4Schristos                 PARSE_SUB_ARGS("decompressionSpeed=", "dSpeed=", target.dSpeed);
2733*3117ece4Schristos                 PARSE_SUB_ARGS("compressionMemory=" , "cMem=", target.cMem);
2734*3117ece4Schristos                 PARSE_SUB_ARGS("strict=", "stc=", g_strictness);
2735*3117ece4Schristos                 PARSE_SUB_ARGS("maxTries=", "tries=", g_maxTries);
2736*3117ece4Schristos                 PARSE_SUB_ARGS("memoLimitLog=", "memLog=", memoTableLog);
2737*3117ece4Schristos                 if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = (int)readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; }
2738*3117ece4Schristos                 if (longCommandWArg(&argument, "speedForRatio=") || longCommandWArg(&argument, "speedRatio=")) { g_ratioMultiplier = readDoubleFromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; }
2739*3117ece4Schristos 
2740*3117ece4Schristos                 DISPLAY("invalid optimization parameter \n");
2741*3117ece4Schristos                 return 1;
2742*3117ece4Schristos             }
2743*3117ece4Schristos 
2744*3117ece4Schristos             if (argument[0] != 0) {
2745*3117ece4Schristos                 DISPLAY("invalid --optimize= format\n");
2746*3117ece4Schristos                 return 1; /* check the end of string */
2747*3117ece4Schristos             }
2748*3117ece4Schristos             continue;
2749*3117ece4Schristos         } else if (longCommandWArg(&argument, "--zstd=")) {
2750*3117ece4Schristos         /* Decode command (note : aggregated commands are allowed) */
2751*3117ece4Schristos             g_singleRun = 1;
2752*3117ece4Schristos             for ( ; ;) {
2753*3117ece4Schristos                 if(parse_params(&argument, &g_params)) { if(argument[0] == ',') { argument++; continue; } else break; }
2754*3117ece4Schristos                 if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelRun = (int)readU32FromChar(&argument); g_params = emptyParams(); if (argument[0]==',') { argument++; continue; } else break; }
2755*3117ece4Schristos 
2756*3117ece4Schristos                 DISPLAY("invalid compression parameter \n");
2757*3117ece4Schristos                 return 1;
2758*3117ece4Schristos             }
2759*3117ece4Schristos 
2760*3117ece4Schristos             if (argument[0] != 0) {
2761*3117ece4Schristos                 DISPLAY("invalid --zstd= format\n");
2762*3117ece4Schristos                 return 1; /* check the end of string */
2763*3117ece4Schristos             }
2764*3117ece4Schristos             continue;
2765*3117ece4Schristos             /* if not return, success */
2766*3117ece4Schristos 
2767*3117ece4Schristos         } else if (longCommandWArg(&argument, "--display=")) {
2768*3117ece4Schristos             /* Decode command (note : aggregated commands are allowed) */
2769*3117ece4Schristos             memset(g_silenceParams, 1, sizeof(g_silenceParams));
2770*3117ece4Schristos             for ( ; ;) {
2771*3117ece4Schristos                 int found = 0;
2772*3117ece4Schristos                 varInds_t v;
2773*3117ece4Schristos                 for(v = 0; v < NUM_PARAMS; v++) {
2774*3117ece4Schristos                     if(longCommandWArg(&argument, g_shortParamNames[v]) || longCommandWArg(&argument, g_paramNames[v])) {
2775*3117ece4Schristos                         g_silenceParams[v] = 0;
2776*3117ece4Schristos                         found = 1;
2777*3117ece4Schristos                     }
2778*3117ece4Schristos                 }
2779*3117ece4Schristos                 if(longCommandWArg(&argument, "compressionParameters") || longCommandWArg(&argument, "cParams")) {
2780*3117ece4Schristos                     for(v = 0; v <= strt_ind; v++) {
2781*3117ece4Schristos                         g_silenceParams[v] = 0;
2782*3117ece4Schristos                     }
2783*3117ece4Schristos                     found = 1;
2784*3117ece4Schristos                 }
2785*3117ece4Schristos 
2786*3117ece4Schristos 
2787*3117ece4Schristos                 if(found) {
2788*3117ece4Schristos                     if(argument[0]==',') {
2789*3117ece4Schristos                         continue;
2790*3117ece4Schristos                     } else {
2791*3117ece4Schristos                         break;
2792*3117ece4Schristos                     }
2793*3117ece4Schristos                 }
2794*3117ece4Schristos                 DISPLAY("invalid parameter name parameter \n");
2795*3117ece4Schristos                 return 1;
2796*3117ece4Schristos             }
2797*3117ece4Schristos 
2798*3117ece4Schristos             if (argument[0] != 0) {
2799*3117ece4Schristos                 DISPLAY("invalid --display format\n");
2800*3117ece4Schristos                 return 1; /* check the end of string */
2801*3117ece4Schristos             }
2802*3117ece4Schristos             continue;
2803*3117ece4Schristos         } else if (argument[0]=='-') {
2804*3117ece4Schristos             argument++;
2805*3117ece4Schristos 
2806*3117ece4Schristos             while (argument[0]!=0) {
2807*3117ece4Schristos 
2808*3117ece4Schristos                 switch(argument[0])
2809*3117ece4Schristos                 {
2810*3117ece4Schristos                     /* Display help on usage */
2811*3117ece4Schristos                 case 'h' :
2812*3117ece4Schristos                 case 'H': usage(exename); usage_advanced(); return 0;
2813*3117ece4Schristos 
2814*3117ece4Schristos                     /* Pause at the end (hidden option) */
2815*3117ece4Schristos                 case 'p': main_pause = 1; argument++; break;
2816*3117ece4Schristos 
2817*3117ece4Schristos                     /* Sample compressibility (when no file provided) */
2818*3117ece4Schristos                 case 'P':
2819*3117ece4Schristos                     argument++;
2820*3117ece4Schristos                     {   U32 const proba32 = readU32FromChar(&argument);
2821*3117ece4Schristos                         compressibility = (double)proba32 / 100.;
2822*3117ece4Schristos                     }
2823*3117ece4Schristos                     break;
2824*3117ece4Schristos 
2825*3117ece4Schristos                     /* Run Single conf */
2826*3117ece4Schristos                 case 'S':
2827*3117ece4Schristos                     g_singleRun = 1;
2828*3117ece4Schristos                     argument++;
2829*3117ece4Schristos                     for ( ; ; ) {
2830*3117ece4Schristos                         switch(*argument)
2831*3117ece4Schristos                         {
2832*3117ece4Schristos                         case 'w':
2833*3117ece4Schristos                             argument++;
2834*3117ece4Schristos                             g_params.vals[wlog_ind] = readU32FromChar(&argument);
2835*3117ece4Schristos                             continue;
2836*3117ece4Schristos                         case 'c':
2837*3117ece4Schristos                             argument++;
2838*3117ece4Schristos                             g_params.vals[clog_ind] = readU32FromChar(&argument);
2839*3117ece4Schristos                             continue;
2840*3117ece4Schristos                         case 'h':
2841*3117ece4Schristos                             argument++;
2842*3117ece4Schristos                             g_params.vals[hlog_ind] = readU32FromChar(&argument);
2843*3117ece4Schristos                             continue;
2844*3117ece4Schristos                         case 's':
2845*3117ece4Schristos                             argument++;
2846*3117ece4Schristos                             g_params.vals[slog_ind] = readU32FromChar(&argument);
2847*3117ece4Schristos                             continue;
2848*3117ece4Schristos                         case 'l':  /* search length */
2849*3117ece4Schristos                             argument++;
2850*3117ece4Schristos                             g_params.vals[mml_ind] = readU32FromChar(&argument);
2851*3117ece4Schristos                             continue;
2852*3117ece4Schristos                         case 't':  /* target length */
2853*3117ece4Schristos                             argument++;
2854*3117ece4Schristos                             g_params.vals[tlen_ind] = readU32FromChar(&argument);
2855*3117ece4Schristos                             continue;
2856*3117ece4Schristos                         case 'S':  /* strategy */
2857*3117ece4Schristos                             argument++;
2858*3117ece4Schristos                             g_params.vals[strt_ind] = readU32FromChar(&argument);
2859*3117ece4Schristos                             continue;
2860*3117ece4Schristos                         case 'f':  /* forceAttachDict */
2861*3117ece4Schristos                             argument++;
2862*3117ece4Schristos                             g_params.vals[fadt_ind] = readU32FromChar(&argument);
2863*3117ece4Schristos                             continue;
2864*3117ece4Schristos                         case 'L':
2865*3117ece4Schristos                             {   argument++;
2866*3117ece4Schristos                                 cLevelRun = (int)readU32FromChar(&argument);
2867*3117ece4Schristos                                 g_params = emptyParams();
2868*3117ece4Schristos                                 continue;
2869*3117ece4Schristos                             }
2870*3117ece4Schristos                         default : ;
2871*3117ece4Schristos                         }
2872*3117ece4Schristos                         break;
2873*3117ece4Schristos                     }
2874*3117ece4Schristos 
2875*3117ece4Schristos                     break;
2876*3117ece4Schristos 
2877*3117ece4Schristos                     /* target level1 speed objective, in MB/s */
2878*3117ece4Schristos                 case 'T':
2879*3117ece4Schristos                     argument++;
2880*3117ece4Schristos                     g_target = readU32FromChar(&argument);
2881*3117ece4Schristos                     break;
2882*3117ece4Schristos 
2883*3117ece4Schristos                     /* cut input into blocks */
2884*3117ece4Schristos                 case 'B':
2885*3117ece4Schristos                     argument++;
2886*3117ece4Schristos                     g_blockSize = readU32FromChar(&argument);
2887*3117ece4Schristos                     DISPLAY("using %u KB block size \n", (unsigned)(g_blockSize>>10));
2888*3117ece4Schristos                     break;
2889*3117ece4Schristos 
2890*3117ece4Schristos                     /* caps runtime (in seconds) */
2891*3117ece4Schristos                 case 't':
2892*3117ece4Schristos                     argument++;
2893*3117ece4Schristos                     g_timeLimit_s = readU32FromChar(&argument);
2894*3117ece4Schristos                     break;
2895*3117ece4Schristos 
2896*3117ece4Schristos                 case 's':
2897*3117ece4Schristos                     argument++;
2898*3117ece4Schristos                     separateFiles = 1;
2899*3117ece4Schristos                     break;
2900*3117ece4Schristos 
2901*3117ece4Schristos                 case 'q':
2902*3117ece4Schristos                     while (argument[0] == 'q') { argument++; g_displayLevel--; }
2903*3117ece4Schristos                     break;
2904*3117ece4Schristos 
2905*3117ece4Schristos                 case 'v':
2906*3117ece4Schristos                     while (argument[0] == 'v') { argument++; g_displayLevel++; }
2907*3117ece4Schristos                     break;
2908*3117ece4Schristos 
2909*3117ece4Schristos                 /* load dictionary file (only applicable for optimizer rn) */
2910*3117ece4Schristos                 case 'D':
2911*3117ece4Schristos                     if(i == argc - 1) { /* last argument, return error. */
2912*3117ece4Schristos                         DISPLAY("Dictionary file expected but not given : %d\n", i);
2913*3117ece4Schristos                         return 1;
2914*3117ece4Schristos                     } else {
2915*3117ece4Schristos                         i++;
2916*3117ece4Schristos                         dictFileName = argv[i];
2917*3117ece4Schristos                         argument += strlen(argument);
2918*3117ece4Schristos                     }
2919*3117ece4Schristos                     break;
2920*3117ece4Schristos 
2921*3117ece4Schristos                     /* Unknown command */
2922*3117ece4Schristos                 default : return badusage(exename);
2923*3117ece4Schristos                 }
2924*3117ece4Schristos             }
2925*3117ece4Schristos             continue;
2926*3117ece4Schristos         }   /* if (argument[0]=='-') */
2927*3117ece4Schristos 
2928*3117ece4Schristos         /* first provided filename is input */
2929*3117ece4Schristos         if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
2930*3117ece4Schristos     }
2931*3117ece4Schristos 
2932*3117ece4Schristos     /* Welcome message */
2933*3117ece4Schristos     DISPLAYLEVEL(2, WELCOME_MESSAGE);
2934*3117ece4Schristos 
2935*3117ece4Schristos     if (filenamesStart==0) {
2936*3117ece4Schristos         if (g_optimizer) {
2937*3117ece4Schristos             DISPLAY("Optimizer Expects File\n");
2938*3117ece4Schristos             return 1;
2939*3117ece4Schristos         } else {
2940*3117ece4Schristos             result = benchSample(compressibility, cLevelRun);
2941*3117ece4Schristos         }
2942*3117ece4Schristos     } else {
2943*3117ece4Schristos         if(separateFiles) {
2944*3117ece4Schristos             for(i = 0; i < argc - filenamesStart; i++) {
2945*3117ece4Schristos                 if (g_optimizer) {
2946*3117ece4Schristos                     result = optimizeForSize(argv+filenamesStart + i, 1, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
2947*3117ece4Schristos                     if(result) { DISPLAY("Error on File %d", i); return result; }
2948*3117ece4Schristos                 } else {
2949*3117ece4Schristos                     result = benchFiles(argv+filenamesStart + i, 1, dictFileName, cLevelRun);
2950*3117ece4Schristos                     if(result) { DISPLAY("Error on File %d", i); return result; }
2951*3117ece4Schristos                 }
2952*3117ece4Schristos             }
2953*3117ece4Schristos         } else {
2954*3117ece4Schristos             if (g_optimizer) {
2955*3117ece4Schristos                 assert(filenamesStart < argc);
2956*3117ece4Schristos                 result = optimizeForSize(argv+filenamesStart, (size_t)(argc-filenamesStart), dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
2957*3117ece4Schristos             } else {
2958*3117ece4Schristos                 result = benchFiles(argv+filenamesStart, argc-filenamesStart, dictFileName, cLevelRun);
2959*3117ece4Schristos             }
2960*3117ece4Schristos         }
2961*3117ece4Schristos     }
2962*3117ece4Schristos 
2963*3117ece4Schristos     if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; }
2964*3117ece4Schristos 
2965*3117ece4Schristos     return result;
2966*3117ece4Schristos }
2967