xref: /netbsd-src/external/bsd/zstd/dist/programs/benchfn.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos 
12*3117ece4Schristos 
13*3117ece4Schristos /* *************************************
14*3117ece4Schristos *  Includes
15*3117ece4Schristos ***************************************/
16*3117ece4Schristos #include <stdlib.h>      /* malloc, free */
17*3117ece4Schristos #include <string.h>      /* memset */
18*3117ece4Schristos #include <assert.h>      /* assert */
19*3117ece4Schristos 
20*3117ece4Schristos #include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
21*3117ece4Schristos #include "benchfn.h"
22*3117ece4Schristos 
23*3117ece4Schristos 
24*3117ece4Schristos /* *************************************
25*3117ece4Schristos *  Constants
26*3117ece4Schristos ***************************************/
27*3117ece4Schristos #define TIMELOOP_MICROSEC     SEC_TO_MICRO      /* 1 second */
28*3117ece4Schristos #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
29*3117ece4Schristos 
30*3117ece4Schristos #define KB *(1 <<10)
31*3117ece4Schristos #define MB *(1 <<20)
32*3117ece4Schristos #define GB *(1U<<30)
33*3117ece4Schristos 
34*3117ece4Schristos 
35*3117ece4Schristos /* *************************************
36*3117ece4Schristos *  Debug errors
37*3117ece4Schristos ***************************************/
38*3117ece4Schristos #if defined(DEBUG) && (DEBUG >= 1)
39*3117ece4Schristos #  include <stdio.h>       /* fprintf */
40*3117ece4Schristos #  define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
41*3117ece4Schristos #  define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
42*3117ece4Schristos #else
43*3117ece4Schristos #  define DEBUGOUTPUT(...)
44*3117ece4Schristos #endif
45*3117ece4Schristos 
46*3117ece4Schristos 
47*3117ece4Schristos /* error without displaying */
48*3117ece4Schristos #define RETURN_QUIET_ERROR(retValue, ...) {           \
49*3117ece4Schristos     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
50*3117ece4Schristos     DEBUGOUTPUT("Error : ");                          \
51*3117ece4Schristos     DEBUGOUTPUT(__VA_ARGS__);                         \
52*3117ece4Schristos     DEBUGOUTPUT(" \n");                               \
53*3117ece4Schristos     return retValue;                                  \
54*3117ece4Schristos }
55*3117ece4Schristos 
56*3117ece4Schristos /* Abort execution if a condition is not met */
57*3117ece4Schristos #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
58*3117ece4Schristos 
59*3117ece4Schristos 
60*3117ece4Schristos /* *************************************
61*3117ece4Schristos *  Benchmarking an arbitrary function
62*3117ece4Schristos ***************************************/
63*3117ece4Schristos 
64*3117ece4Schristos int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
65*3117ece4Schristos {
66*3117ece4Schristos     return outcome.error_tag_never_ever_use_directly == 0;
67*3117ece4Schristos }
68*3117ece4Schristos 
69*3117ece4Schristos /* warning : this function will stop program execution if outcome is invalid !
70*3117ece4Schristos  *           check outcome validity first, using BMK_isValid_runResult() */
71*3117ece4Schristos BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
72*3117ece4Schristos {
73*3117ece4Schristos     CONTROL(outcome.error_tag_never_ever_use_directly == 0);
74*3117ece4Schristos     return outcome.internal_never_ever_use_directly;
75*3117ece4Schristos }
76*3117ece4Schristos 
77*3117ece4Schristos size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
78*3117ece4Schristos {
79*3117ece4Schristos     CONTROL(outcome.error_tag_never_ever_use_directly != 0);
80*3117ece4Schristos     return outcome.error_result_never_ever_use_directly;
81*3117ece4Schristos }
82*3117ece4Schristos 
83*3117ece4Schristos static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
84*3117ece4Schristos {
85*3117ece4Schristos     BMK_runOutcome_t b;
86*3117ece4Schristos     memset(&b, 0, sizeof(b));
87*3117ece4Schristos     b.error_tag_never_ever_use_directly = 1;
88*3117ece4Schristos     b.error_result_never_ever_use_directly = errorResult;
89*3117ece4Schristos     return b;
90*3117ece4Schristos }
91*3117ece4Schristos 
92*3117ece4Schristos static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
93*3117ece4Schristos {
94*3117ece4Schristos     BMK_runOutcome_t outcome;
95*3117ece4Schristos     outcome.error_tag_never_ever_use_directly = 0;
96*3117ece4Schristos     outcome.internal_never_ever_use_directly = runTime;
97*3117ece4Schristos     return outcome;
98*3117ece4Schristos }
99*3117ece4Schristos 
100*3117ece4Schristos 
101*3117ece4Schristos /* initFn will be measured once, benchFn will be measured `nbLoops` times */
102*3117ece4Schristos /* initFn is optional, provide NULL if none */
103*3117ece4Schristos /* benchFn must return a size_t value that errorFn can interpret */
104*3117ece4Schristos /* takes # of blocks and list of size & stuff for each. */
105*3117ece4Schristos /* can report result of benchFn for each block into blockResult. */
106*3117ece4Schristos /* blockResult is optional, provide NULL if this information is not required */
107*3117ece4Schristos /* note : time per loop can be reported as zero if run time < timer resolution */
108*3117ece4Schristos BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
109*3117ece4Schristos                                    unsigned nbLoops)
110*3117ece4Schristos {
111*3117ece4Schristos     nbLoops += !nbLoops;   /* minimum nbLoops is 1 */
112*3117ece4Schristos 
113*3117ece4Schristos     /* init */
114*3117ece4Schristos     {   size_t i;
115*3117ece4Schristos         for(i = 0; i < p.blockCount; i++) {
116*3117ece4Schristos             memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
117*3117ece4Schristos     }   }
118*3117ece4Schristos 
119*3117ece4Schristos     /* benchmark */
120*3117ece4Schristos     {   size_t dstSize = 0;
121*3117ece4Schristos         UTIL_time_t const clockStart = UTIL_getTime();
122*3117ece4Schristos         unsigned loopNb, blockNb;
123*3117ece4Schristos         if (p.initFn != NULL) p.initFn(p.initPayload);
124*3117ece4Schristos         for (loopNb = 0; loopNb < nbLoops; loopNb++) {
125*3117ece4Schristos             for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
126*3117ece4Schristos                 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
127*3117ece4Schristos                                    p.dstBuffers[blockNb], p.dstCapacities[blockNb],
128*3117ece4Schristos                                    p.benchPayload);
129*3117ece4Schristos                 if (loopNb == 0) {
130*3117ece4Schristos                     if (p.blockResults != NULL) p.blockResults[blockNb] = res;
131*3117ece4Schristos                     if ((p.errorFn != NULL) && (p.errorFn(res))) {
132*3117ece4Schristos                         RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
133*3117ece4Schristos                             "Function benchmark failed on block %u (of size %u) with error %i",
134*3117ece4Schristos                             blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
135*3117ece4Schristos                     }
136*3117ece4Schristos                     dstSize += res;
137*3117ece4Schristos             }   }
138*3117ece4Schristos         }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
139*3117ece4Schristos 
140*3117ece4Schristos         {   PTime const totalTime = UTIL_clockSpanNano(clockStart);
141*3117ece4Schristos             BMK_runTime_t rt;
142*3117ece4Schristos             rt.nanoSecPerRun = (double)totalTime / nbLoops;
143*3117ece4Schristos             rt.sumOfReturn = dstSize;
144*3117ece4Schristos             return BMK_setValid_runTime(rt);
145*3117ece4Schristos     }   }
146*3117ece4Schristos }
147*3117ece4Schristos 
148*3117ece4Schristos 
149*3117ece4Schristos /* ====  Benchmarking any function, providing intermediate results  ==== */
150*3117ece4Schristos 
151*3117ece4Schristos struct BMK_timedFnState_s {
152*3117ece4Schristos     PTime timeSpent_ns;
153*3117ece4Schristos     PTime timeBudget_ns;
154*3117ece4Schristos     PTime runBudget_ns;
155*3117ece4Schristos     BMK_runTime_t fastestRun;
156*3117ece4Schristos     unsigned nbLoops;
157*3117ece4Schristos     UTIL_time_t coolTime;
158*3117ece4Schristos };  /* typedef'd to BMK_timedFnState_t within bench.h */
159*3117ece4Schristos 
160*3117ece4Schristos BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
161*3117ece4Schristos {
162*3117ece4Schristos     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
163*3117ece4Schristos     if (r == NULL) return NULL;   /* malloc() error */
164*3117ece4Schristos     BMK_resetTimedFnState(r, total_ms, run_ms);
165*3117ece4Schristos     return r;
166*3117ece4Schristos }
167*3117ece4Schristos 
168*3117ece4Schristos void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
169*3117ece4Schristos 
170*3117ece4Schristos BMK_timedFnState_t*
171*3117ece4Schristos BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
172*3117ece4Schristos {
173*3117ece4Schristos     typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1];  /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
174*3117ece4Schristos     typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align;  /* force tfs to be aligned at its next best position */
175*3117ece4Schristos     size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
176*3117ece4Schristos     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
177*3117ece4Schristos     if (buffer == NULL) return NULL;
178*3117ece4Schristos     if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
179*3117ece4Schristos     if ((size_t)buffer % tfs_alignment) return NULL;  /* buffer must be properly aligned */
180*3117ece4Schristos     BMK_resetTimedFnState(r, total_ms, run_ms);
181*3117ece4Schristos     return r;
182*3117ece4Schristos }
183*3117ece4Schristos 
184*3117ece4Schristos void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
185*3117ece4Schristos {
186*3117ece4Schristos     if (!total_ms) total_ms = 1 ;
187*3117ece4Schristos     if (!run_ms) run_ms = 1;
188*3117ece4Schristos     if (run_ms > total_ms) run_ms = total_ms;
189*3117ece4Schristos     timedFnState->timeSpent_ns = 0;
190*3117ece4Schristos     timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
191*3117ece4Schristos     timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
192*3117ece4Schristos     timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
193*3117ece4Schristos     timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
194*3117ece4Schristos     timedFnState->nbLoops = 1;
195*3117ece4Schristos     timedFnState->coolTime = UTIL_getTime();
196*3117ece4Schristos }
197*3117ece4Schristos 
198*3117ece4Schristos /* Tells if nb of seconds set in timedFnState for all runs is spent.
199*3117ece4Schristos  * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
200*3117ece4Schristos int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
201*3117ece4Schristos {
202*3117ece4Schristos     return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
203*3117ece4Schristos }
204*3117ece4Schristos 
205*3117ece4Schristos 
206*3117ece4Schristos #undef MIN
207*3117ece4Schristos #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
208*3117ece4Schristos 
209*3117ece4Schristos #define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
210*3117ece4Schristos 
211*3117ece4Schristos BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
212*3117ece4Schristos                                   BMK_benchParams_t p)
213*3117ece4Schristos {
214*3117ece4Schristos     PTime const runBudget_ns = cont->runBudget_ns;
215*3117ece4Schristos     PTime const runTimeMin_ns = runBudget_ns / 2;
216*3117ece4Schristos     int completed = 0;
217*3117ece4Schristos     BMK_runTime_t bestRunTime = cont->fastestRun;
218*3117ece4Schristos 
219*3117ece4Schristos     while (!completed) {
220*3117ece4Schristos         BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
221*3117ece4Schristos 
222*3117ece4Schristos         if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
223*3117ece4Schristos             return runResult;
224*3117ece4Schristos         }
225*3117ece4Schristos 
226*3117ece4Schristos         {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
227*3117ece4Schristos             double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
228*3117ece4Schristos 
229*3117ece4Schristos             cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
230*3117ece4Schristos 
231*3117ece4Schristos             /* estimate nbLoops for next run to last approximately 1 second */
232*3117ece4Schristos             if (loopDuration_ns > ((double)runBudget_ns / 50)) {
233*3117ece4Schristos                 double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
234*3117ece4Schristos                 cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1;
235*3117ece4Schristos             } else {
236*3117ece4Schristos                 /* previous run was too short : blindly increase workload by x multiplier */
237*3117ece4Schristos                 const unsigned multiplier = 10;
238*3117ece4Schristos                 assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
239*3117ece4Schristos                 cont->nbLoops *= multiplier;
240*3117ece4Schristos             }
241*3117ece4Schristos 
242*3117ece4Schristos             if(loopDuration_ns < (double)runTimeMin_ns) {
243*3117ece4Schristos                 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
244*3117ece4Schristos                 assert(completed == 0);
245*3117ece4Schristos                 continue;
246*3117ece4Schristos             } else {
247*3117ece4Schristos                 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
248*3117ece4Schristos                     bestRunTime = newRunTime;
249*3117ece4Schristos                 }
250*3117ece4Schristos                 completed = 1;
251*3117ece4Schristos             }
252*3117ece4Schristos         }
253*3117ece4Schristos     }   /* while (!completed) */
254*3117ece4Schristos 
255*3117ece4Schristos     return BMK_setValid_runTime(bestRunTime);
256*3117ece4Schristos }
257