1a28cd43dSSascha Wildner /*
2a28cd43dSSascha Wildner * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3a28cd43dSSascha Wildner * All rights reserved.
4a28cd43dSSascha Wildner *
5a28cd43dSSascha Wildner * This source code is licensed under both the BSD-style license (found in the
6a28cd43dSSascha Wildner * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7a28cd43dSSascha Wildner * in the COPYING file in the root directory of this source tree).
8a28cd43dSSascha Wildner * You may select, at your option, one of the above-listed licenses.
9a28cd43dSSascha Wildner */
10a28cd43dSSascha Wildner
11a28cd43dSSascha Wildner
12a28cd43dSSascha Wildner
13a28cd43dSSascha Wildner /* *************************************
14a28cd43dSSascha Wildner * Includes
15a28cd43dSSascha Wildner ***************************************/
16a28cd43dSSascha Wildner #include <stdlib.h> /* malloc, free */
17a28cd43dSSascha Wildner #include <string.h> /* memset */
18a28cd43dSSascha Wildner #include <assert.h> /* assert */
19a28cd43dSSascha Wildner
20a28cd43dSSascha Wildner #include "timefn.h" /* UTIL_time_t, UTIL_getTime */
21a28cd43dSSascha Wildner #include "benchfn.h"
22a28cd43dSSascha Wildner
23a28cd43dSSascha Wildner
24a28cd43dSSascha Wildner /* *************************************
25a28cd43dSSascha Wildner * Constants
26a28cd43dSSascha Wildner ***************************************/
27a28cd43dSSascha Wildner #define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
28a28cd43dSSascha Wildner #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
29a28cd43dSSascha Wildner
30a28cd43dSSascha Wildner #define KB *(1 <<10)
31a28cd43dSSascha Wildner #define MB *(1 <<20)
32a28cd43dSSascha Wildner #define GB *(1U<<30)
33a28cd43dSSascha Wildner
34a28cd43dSSascha Wildner
35a28cd43dSSascha Wildner /* *************************************
36a28cd43dSSascha Wildner * Debug errors
37a28cd43dSSascha Wildner ***************************************/
38a28cd43dSSascha Wildner #if defined(DEBUG) && (DEBUG >= 1)
39a28cd43dSSascha Wildner # include <stdio.h> /* fprintf */
40a28cd43dSSascha Wildner # define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
41a28cd43dSSascha Wildner # define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
42a28cd43dSSascha Wildner #else
43a28cd43dSSascha Wildner # define DEBUGOUTPUT(...)
44a28cd43dSSascha Wildner #endif
45a28cd43dSSascha Wildner
46a28cd43dSSascha Wildner
47a28cd43dSSascha Wildner /* error without displaying */
48a28cd43dSSascha Wildner #define RETURN_QUIET_ERROR(retValue, ...) { \
49a28cd43dSSascha Wildner DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
50a28cd43dSSascha Wildner DEBUGOUTPUT("Error : "); \
51a28cd43dSSascha Wildner DEBUGOUTPUT(__VA_ARGS__); \
52a28cd43dSSascha Wildner DEBUGOUTPUT(" \n"); \
53a28cd43dSSascha Wildner return retValue; \
54a28cd43dSSascha Wildner }
55a28cd43dSSascha Wildner
56a28cd43dSSascha Wildner /* Abort execution if a condition is not met */
57a28cd43dSSascha Wildner #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
58a28cd43dSSascha Wildner
59a28cd43dSSascha Wildner
60a28cd43dSSascha Wildner /* *************************************
61a28cd43dSSascha Wildner * Benchmarking an arbitrary function
62a28cd43dSSascha Wildner ***************************************/
63a28cd43dSSascha Wildner
BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)64a28cd43dSSascha Wildner int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
65a28cd43dSSascha Wildner {
66a28cd43dSSascha Wildner return outcome.error_tag_never_ever_use_directly == 0;
67a28cd43dSSascha Wildner }
68a28cd43dSSascha Wildner
69a28cd43dSSascha Wildner /* warning : this function will stop program execution if outcome is invalid !
70a28cd43dSSascha Wildner * check outcome validity first, using BMK_isValid_runResult() */
BMK_extract_runTime(BMK_runOutcome_t outcome)71a28cd43dSSascha Wildner BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
72a28cd43dSSascha Wildner {
73a28cd43dSSascha Wildner CONTROL(outcome.error_tag_never_ever_use_directly == 0);
74a28cd43dSSascha Wildner return outcome.internal_never_ever_use_directly;
75a28cd43dSSascha Wildner }
76a28cd43dSSascha Wildner
BMK_extract_errorResult(BMK_runOutcome_t outcome)77a28cd43dSSascha Wildner size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
78a28cd43dSSascha Wildner {
79a28cd43dSSascha Wildner CONTROL(outcome.error_tag_never_ever_use_directly != 0);
80a28cd43dSSascha Wildner return outcome.error_result_never_ever_use_directly;
81a28cd43dSSascha Wildner }
82a28cd43dSSascha Wildner
BMK_runOutcome_error(size_t errorResult)83a28cd43dSSascha Wildner static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
84a28cd43dSSascha Wildner {
85a28cd43dSSascha Wildner BMK_runOutcome_t b;
86a28cd43dSSascha Wildner memset(&b, 0, sizeof(b));
87a28cd43dSSascha Wildner b.error_tag_never_ever_use_directly = 1;
88a28cd43dSSascha Wildner b.error_result_never_ever_use_directly = errorResult;
89a28cd43dSSascha Wildner return b;
90a28cd43dSSascha Wildner }
91a28cd43dSSascha Wildner
BMK_setValid_runTime(BMK_runTime_t runTime)92a28cd43dSSascha Wildner static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
93a28cd43dSSascha Wildner {
94a28cd43dSSascha Wildner BMK_runOutcome_t outcome;
95a28cd43dSSascha Wildner outcome.error_tag_never_ever_use_directly = 0;
96a28cd43dSSascha Wildner outcome.internal_never_ever_use_directly = runTime;
97a28cd43dSSascha Wildner return outcome;
98a28cd43dSSascha Wildner }
99a28cd43dSSascha Wildner
100a28cd43dSSascha Wildner
101a28cd43dSSascha Wildner /* initFn will be measured once, benchFn will be measured `nbLoops` times */
102a28cd43dSSascha Wildner /* initFn is optional, provide NULL if none */
103a28cd43dSSascha Wildner /* benchFn must return a size_t value that errorFn can interpret */
104a28cd43dSSascha Wildner /* takes # of blocks and list of size & stuff for each. */
105a28cd43dSSascha Wildner /* can report result of benchFn for each block into blockResult. */
106a28cd43dSSascha Wildner /* blockResult is optional, provide NULL if this information is not required */
107a28cd43dSSascha Wildner /* note : time per loop can be reported as zero if run time < timer resolution */
BMK_benchFunction(BMK_benchParams_t p,unsigned nbLoops)108a28cd43dSSascha Wildner BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
109a28cd43dSSascha Wildner unsigned nbLoops)
110a28cd43dSSascha Wildner {
111a28cd43dSSascha Wildner size_t dstSize = 0;
112a28cd43dSSascha Wildner nbLoops += !nbLoops; /* minimum nbLoops is 1 */
113a28cd43dSSascha Wildner
114a28cd43dSSascha Wildner /* init */
115a28cd43dSSascha Wildner { size_t i;
116a28cd43dSSascha Wildner for(i = 0; i < p.blockCount; i++) {
117a28cd43dSSascha Wildner memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
118a28cd43dSSascha Wildner } }
119a28cd43dSSascha Wildner
120a28cd43dSSascha Wildner /* benchmark */
121a28cd43dSSascha Wildner { UTIL_time_t const clockStart = UTIL_getTime();
122a28cd43dSSascha Wildner unsigned loopNb, blockNb;
123a28cd43dSSascha Wildner if (p.initFn != NULL) p.initFn(p.initPayload);
124a28cd43dSSascha Wildner for (loopNb = 0; loopNb < nbLoops; loopNb++) {
125a28cd43dSSascha Wildner for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
126a28cd43dSSascha Wildner size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
127a28cd43dSSascha Wildner p.dstBuffers[blockNb], p.dstCapacities[blockNb],
128a28cd43dSSascha Wildner p.benchPayload);
129a28cd43dSSascha Wildner if (loopNb == 0) {
130a28cd43dSSascha Wildner if (p.blockResults != NULL) p.blockResults[blockNb] = res;
131a28cd43dSSascha Wildner if ((p.errorFn != NULL) && (p.errorFn(res))) {
132a28cd43dSSascha Wildner RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
133a28cd43dSSascha Wildner "Function benchmark failed on block %u (of size %u) with error %i",
134a28cd43dSSascha Wildner blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
135a28cd43dSSascha Wildner }
136a28cd43dSSascha Wildner dstSize += res;
137a28cd43dSSascha Wildner } }
138a28cd43dSSascha Wildner } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
139a28cd43dSSascha Wildner
140a28cd43dSSascha Wildner { PTime const totalTime = UTIL_clockSpanNano(clockStart);
141a28cd43dSSascha Wildner BMK_runTime_t rt;
142a28cd43dSSascha Wildner rt.nanoSecPerRun = (double)totalTime / nbLoops;
143a28cd43dSSascha Wildner rt.sumOfReturn = dstSize;
144a28cd43dSSascha Wildner return BMK_setValid_runTime(rt);
145a28cd43dSSascha Wildner } }
146a28cd43dSSascha Wildner }
147a28cd43dSSascha Wildner
148a28cd43dSSascha Wildner
149a28cd43dSSascha Wildner /* ==== Benchmarking any function, providing intermediate results ==== */
150a28cd43dSSascha Wildner
151a28cd43dSSascha Wildner struct BMK_timedFnState_s {
152a28cd43dSSascha Wildner PTime timeSpent_ns;
153a28cd43dSSascha Wildner PTime timeBudget_ns;
154a28cd43dSSascha Wildner PTime runBudget_ns;
155a28cd43dSSascha Wildner BMK_runTime_t fastestRun;
156a28cd43dSSascha Wildner unsigned nbLoops;
157a28cd43dSSascha Wildner UTIL_time_t coolTime;
158a28cd43dSSascha Wildner }; /* typedef'd to BMK_timedFnState_t within bench.h */
159a28cd43dSSascha Wildner
BMK_createTimedFnState(unsigned total_ms,unsigned run_ms)160a28cd43dSSascha Wildner BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
161a28cd43dSSascha Wildner {
162a28cd43dSSascha Wildner BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
163a28cd43dSSascha Wildner if (r == NULL) return NULL; /* malloc() error */
164a28cd43dSSascha Wildner BMK_resetTimedFnState(r, total_ms, run_ms);
165a28cd43dSSascha Wildner return r;
166a28cd43dSSascha Wildner }
167a28cd43dSSascha Wildner
BMK_freeTimedFnState(BMK_timedFnState_t * state)168a28cd43dSSascha Wildner void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
169a28cd43dSSascha Wildner
170a28cd43dSSascha Wildner BMK_timedFnState_t*
BMK_initStatic_timedFnState(void * buffer,size_t size,unsigned total_ms,unsigned run_ms)171a28cd43dSSascha Wildner BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
172a28cd43dSSascha Wildner {
173a28cd43dSSascha Wildner typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
174a28cd43dSSascha Wildner typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
175a28cd43dSSascha Wildner size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
176a28cd43dSSascha Wildner BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
177a28cd43dSSascha Wildner if (buffer == NULL) return NULL;
178a28cd43dSSascha Wildner if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
179a28cd43dSSascha Wildner if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
180a28cd43dSSascha Wildner BMK_resetTimedFnState(r, total_ms, run_ms);
181a28cd43dSSascha Wildner return r;
182a28cd43dSSascha Wildner }
183a28cd43dSSascha Wildner
BMK_resetTimedFnState(BMK_timedFnState_t * timedFnState,unsigned total_ms,unsigned run_ms)184a28cd43dSSascha Wildner void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
185a28cd43dSSascha Wildner {
186a28cd43dSSascha Wildner if (!total_ms) total_ms = 1 ;
187a28cd43dSSascha Wildner if (!run_ms) run_ms = 1;
188a28cd43dSSascha Wildner if (run_ms > total_ms) run_ms = total_ms;
189a28cd43dSSascha Wildner timedFnState->timeSpent_ns = 0;
190a28cd43dSSascha Wildner timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
191a28cd43dSSascha Wildner timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
192a28cd43dSSascha Wildner timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
193a28cd43dSSascha Wildner timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
194a28cd43dSSascha Wildner timedFnState->nbLoops = 1;
195a28cd43dSSascha Wildner timedFnState->coolTime = UTIL_getTime();
196a28cd43dSSascha Wildner }
197a28cd43dSSascha Wildner
198a28cd43dSSascha Wildner /* Tells if nb of seconds set in timedFnState for all runs is spent.
199a28cd43dSSascha Wildner * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
BMK_isCompleted_TimedFn(const BMK_timedFnState_t * timedFnState)200a28cd43dSSascha Wildner int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
201a28cd43dSSascha Wildner {
202a28cd43dSSascha Wildner return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
203a28cd43dSSascha Wildner }
204a28cd43dSSascha Wildner
205a28cd43dSSascha Wildner
206a28cd43dSSascha Wildner #undef MIN
207a28cd43dSSascha Wildner #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
208a28cd43dSSascha Wildner
209a28cd43dSSascha Wildner #define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
210a28cd43dSSascha Wildner
BMK_benchTimedFn(BMK_timedFnState_t * cont,BMK_benchParams_t p)211a28cd43dSSascha Wildner BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
212a28cd43dSSascha Wildner BMK_benchParams_t p)
213a28cd43dSSascha Wildner {
214a28cd43dSSascha Wildner PTime const runBudget_ns = cont->runBudget_ns;
215a28cd43dSSascha Wildner PTime const runTimeMin_ns = runBudget_ns / 2;
216a28cd43dSSascha Wildner int completed = 0;
217a28cd43dSSascha Wildner BMK_runTime_t bestRunTime = cont->fastestRun;
218a28cd43dSSascha Wildner
219a28cd43dSSascha Wildner while (!completed) {
220a28cd43dSSascha Wildner BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
221a28cd43dSSascha Wildner
222a28cd43dSSascha Wildner if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
223a28cd43dSSascha Wildner return runResult;
224a28cd43dSSascha Wildner }
225a28cd43dSSascha Wildner
226a28cd43dSSascha Wildner { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
227a28cd43dSSascha Wildner double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
228a28cd43dSSascha Wildner
229a28cd43dSSascha Wildner cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
230a28cd43dSSascha Wildner
231a28cd43dSSascha Wildner /* estimate nbLoops for next run to last approximately 1 second */
232a28cd43dSSascha Wildner if (loopDuration_ns > (runBudget_ns / 50)) {
233a28cd43dSSascha Wildner double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
234a28cd43dSSascha Wildner cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
235a28cd43dSSascha Wildner } else {
236a28cd43dSSascha Wildner /* previous run was too short : blindly increase workload by x multiplier */
237a28cd43dSSascha Wildner const unsigned multiplier = 10;
238a28cd43dSSascha Wildner assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
239a28cd43dSSascha Wildner cont->nbLoops *= multiplier;
240a28cd43dSSascha Wildner }
241a28cd43dSSascha Wildner
242a28cd43dSSascha Wildner if(loopDuration_ns < runTimeMin_ns) {
243a28cd43dSSascha Wildner /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
244a28cd43dSSascha Wildner assert(completed == 0);
245a28cd43dSSascha Wildner continue;
246a28cd43dSSascha Wildner } else {
247a28cd43dSSascha Wildner if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
248a28cd43dSSascha Wildner bestRunTime = newRunTime;
249a28cd43dSSascha Wildner }
250a28cd43dSSascha Wildner completed = 1;
251a28cd43dSSascha Wildner }
252a28cd43dSSascha Wildner }
253a28cd43dSSascha Wildner } /* while (!completed) */
254a28cd43dSSascha Wildner
255a28cd43dSSascha Wildner return BMK_setValid_runTime(bestRunTime);
256a28cd43dSSascha Wildner }
257