1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos 12*3117ece4Schristos 13*3117ece4Schristos /* ************************************* 14*3117ece4Schristos * Includes 15*3117ece4Schristos ***************************************/ 16*3117ece4Schristos #include <stdlib.h> /* malloc, free */ 17*3117ece4Schristos #include <string.h> /* memset */ 18*3117ece4Schristos #include <assert.h> /* assert */ 19*3117ece4Schristos 20*3117ece4Schristos #include "timefn.h" /* UTIL_time_t, UTIL_getTime */ 21*3117ece4Schristos #include "benchfn.h" 22*3117ece4Schristos 23*3117ece4Schristos 24*3117ece4Schristos /* ************************************* 25*3117ece4Schristos * Constants 26*3117ece4Schristos ***************************************/ 27*3117ece4Schristos #define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */ 28*3117ece4Schristos #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ 29*3117ece4Schristos 30*3117ece4Schristos #define KB *(1 <<10) 31*3117ece4Schristos #define MB *(1 <<20) 32*3117ece4Schristos #define GB *(1U<<30) 33*3117ece4Schristos 34*3117ece4Schristos 35*3117ece4Schristos /* ************************************* 36*3117ece4Schristos * Debug errors 37*3117ece4Schristos ***************************************/ 38*3117ece4Schristos #if defined(DEBUG) && (DEBUG >= 1) 39*3117ece4Schristos # include <stdio.h> /* fprintf */ 40*3117ece4Schristos # define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 41*3117ece4Schristos # define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } 42*3117ece4Schristos #else 43*3117ece4Schristos # define DEBUGOUTPUT(...) 44*3117ece4Schristos #endif 45*3117ece4Schristos 46*3117ece4Schristos 47*3117ece4Schristos /* error without displaying */ 48*3117ece4Schristos #define RETURN_QUIET_ERROR(retValue, ...) { \ 49*3117ece4Schristos DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ 50*3117ece4Schristos DEBUGOUTPUT("Error : "); \ 51*3117ece4Schristos DEBUGOUTPUT(__VA_ARGS__); \ 52*3117ece4Schristos DEBUGOUTPUT(" \n"); \ 53*3117ece4Schristos return retValue; \ 54*3117ece4Schristos } 55*3117ece4Schristos 56*3117ece4Schristos /* Abort execution if a condition is not met */ 57*3117ece4Schristos #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } } 58*3117ece4Schristos 59*3117ece4Schristos 60*3117ece4Schristos /* ************************************* 61*3117ece4Schristos * Benchmarking an arbitrary function 62*3117ece4Schristos ***************************************/ 63*3117ece4Schristos 64*3117ece4Schristos int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) 65*3117ece4Schristos { 66*3117ece4Schristos return outcome.error_tag_never_ever_use_directly == 0; 67*3117ece4Schristos } 68*3117ece4Schristos 69*3117ece4Schristos /* warning : this function will stop program execution if outcome is invalid ! 70*3117ece4Schristos * check outcome validity first, using BMK_isValid_runResult() */ 71*3117ece4Schristos BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) 72*3117ece4Schristos { 73*3117ece4Schristos CONTROL(outcome.error_tag_never_ever_use_directly == 0); 74*3117ece4Schristos return outcome.internal_never_ever_use_directly; 75*3117ece4Schristos } 76*3117ece4Schristos 77*3117ece4Schristos size_t BMK_extract_errorResult(BMK_runOutcome_t outcome) 78*3117ece4Schristos { 79*3117ece4Schristos CONTROL(outcome.error_tag_never_ever_use_directly != 0); 80*3117ece4Schristos return outcome.error_result_never_ever_use_directly; 81*3117ece4Schristos } 82*3117ece4Schristos 83*3117ece4Schristos static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult) 84*3117ece4Schristos { 85*3117ece4Schristos BMK_runOutcome_t b; 86*3117ece4Schristos memset(&b, 0, sizeof(b)); 87*3117ece4Schristos b.error_tag_never_ever_use_directly = 1; 88*3117ece4Schristos b.error_result_never_ever_use_directly = errorResult; 89*3117ece4Schristos return b; 90*3117ece4Schristos } 91*3117ece4Schristos 92*3117ece4Schristos static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) 93*3117ece4Schristos { 94*3117ece4Schristos BMK_runOutcome_t outcome; 95*3117ece4Schristos outcome.error_tag_never_ever_use_directly = 0; 96*3117ece4Schristos outcome.internal_never_ever_use_directly = runTime; 97*3117ece4Schristos return outcome; 98*3117ece4Schristos } 99*3117ece4Schristos 100*3117ece4Schristos 101*3117ece4Schristos /* initFn will be measured once, benchFn will be measured `nbLoops` times */ 102*3117ece4Schristos /* initFn is optional, provide NULL if none */ 103*3117ece4Schristos /* benchFn must return a size_t value that errorFn can interpret */ 104*3117ece4Schristos /* takes # of blocks and list of size & stuff for each. */ 105*3117ece4Schristos /* can report result of benchFn for each block into blockResult. */ 106*3117ece4Schristos /* blockResult is optional, provide NULL if this information is not required */ 107*3117ece4Schristos /* note : time per loop can be reported as zero if run time < timer resolution */ 108*3117ece4Schristos BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p, 109*3117ece4Schristos unsigned nbLoops) 110*3117ece4Schristos { 111*3117ece4Schristos nbLoops += !nbLoops; /* minimum nbLoops is 1 */ 112*3117ece4Schristos 113*3117ece4Schristos /* init */ 114*3117ece4Schristos { size_t i; 115*3117ece4Schristos for(i = 0; i < p.blockCount; i++) { 116*3117ece4Schristos memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */ 117*3117ece4Schristos } } 118*3117ece4Schristos 119*3117ece4Schristos /* benchmark */ 120*3117ece4Schristos { size_t dstSize = 0; 121*3117ece4Schristos UTIL_time_t const clockStart = UTIL_getTime(); 122*3117ece4Schristos unsigned loopNb, blockNb; 123*3117ece4Schristos if (p.initFn != NULL) p.initFn(p.initPayload); 124*3117ece4Schristos for (loopNb = 0; loopNb < nbLoops; loopNb++) { 125*3117ece4Schristos for (blockNb = 0; blockNb < p.blockCount; blockNb++) { 126*3117ece4Schristos size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb], 127*3117ece4Schristos p.dstBuffers[blockNb], p.dstCapacities[blockNb], 128*3117ece4Schristos p.benchPayload); 129*3117ece4Schristos if (loopNb == 0) { 130*3117ece4Schristos if (p.blockResults != NULL) p.blockResults[blockNb] = res; 131*3117ece4Schristos if ((p.errorFn != NULL) && (p.errorFn(res))) { 132*3117ece4Schristos RETURN_QUIET_ERROR(BMK_runOutcome_error(res), 133*3117ece4Schristos "Function benchmark failed on block %u (of size %u) with error %i", 134*3117ece4Schristos blockNb, (unsigned)p.srcSizes[blockNb], (int)res); 135*3117ece4Schristos } 136*3117ece4Schristos dstSize += res; 137*3117ece4Schristos } } 138*3117ece4Schristos } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ 139*3117ece4Schristos 140*3117ece4Schristos { PTime const totalTime = UTIL_clockSpanNano(clockStart); 141*3117ece4Schristos BMK_runTime_t rt; 142*3117ece4Schristos rt.nanoSecPerRun = (double)totalTime / nbLoops; 143*3117ece4Schristos rt.sumOfReturn = dstSize; 144*3117ece4Schristos return BMK_setValid_runTime(rt); 145*3117ece4Schristos } } 146*3117ece4Schristos } 147*3117ece4Schristos 148*3117ece4Schristos 149*3117ece4Schristos /* ==== Benchmarking any function, providing intermediate results ==== */ 150*3117ece4Schristos 151*3117ece4Schristos struct BMK_timedFnState_s { 152*3117ece4Schristos PTime timeSpent_ns; 153*3117ece4Schristos PTime timeBudget_ns; 154*3117ece4Schristos PTime runBudget_ns; 155*3117ece4Schristos BMK_runTime_t fastestRun; 156*3117ece4Schristos unsigned nbLoops; 157*3117ece4Schristos UTIL_time_t coolTime; 158*3117ece4Schristos }; /* typedef'd to BMK_timedFnState_t within bench.h */ 159*3117ece4Schristos 160*3117ece4Schristos BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) 161*3117ece4Schristos { 162*3117ece4Schristos BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); 163*3117ece4Schristos if (r == NULL) return NULL; /* malloc() error */ 164*3117ece4Schristos BMK_resetTimedFnState(r, total_ms, run_ms); 165*3117ece4Schristos return r; 166*3117ece4Schristos } 167*3117ece4Schristos 168*3117ece4Schristos void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); } 169*3117ece4Schristos 170*3117ece4Schristos BMK_timedFnState_t* 171*3117ece4Schristos BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms) 172*3117ece4Schristos { 173*3117ece4Schristos typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */ 174*3117ece4Schristos typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */ 175*3117ece4Schristos size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */ 176*3117ece4Schristos BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer; 177*3117ece4Schristos if (buffer == NULL) return NULL; 178*3117ece4Schristos if (size < sizeof(struct BMK_timedFnState_s)) return NULL; 179*3117ece4Schristos if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */ 180*3117ece4Schristos BMK_resetTimedFnState(r, total_ms, run_ms); 181*3117ece4Schristos return r; 182*3117ece4Schristos } 183*3117ece4Schristos 184*3117ece4Schristos void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) 185*3117ece4Schristos { 186*3117ece4Schristos if (!total_ms) total_ms = 1 ; 187*3117ece4Schristos if (!run_ms) run_ms = 1; 188*3117ece4Schristos if (run_ms > total_ms) run_ms = total_ms; 189*3117ece4Schristos timedFnState->timeSpent_ns = 0; 190*3117ece4Schristos timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000; 191*3117ece4Schristos timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000; 192*3117ece4Schristos timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */ 193*3117ece4Schristos timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); 194*3117ece4Schristos timedFnState->nbLoops = 1; 195*3117ece4Schristos timedFnState->coolTime = UTIL_getTime(); 196*3117ece4Schristos } 197*3117ece4Schristos 198*3117ece4Schristos /* Tells if nb of seconds set in timedFnState for all runs is spent. 199*3117ece4Schristos * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ 200*3117ece4Schristos int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) 201*3117ece4Schristos { 202*3117ece4Schristos return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); 203*3117ece4Schristos } 204*3117ece4Schristos 205*3117ece4Schristos 206*3117ece4Schristos #undef MIN 207*3117ece4Schristos #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) 208*3117ece4Schristos 209*3117ece4Schristos #define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ 210*3117ece4Schristos 211*3117ece4Schristos BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, 212*3117ece4Schristos BMK_benchParams_t p) 213*3117ece4Schristos { 214*3117ece4Schristos PTime const runBudget_ns = cont->runBudget_ns; 215*3117ece4Schristos PTime const runTimeMin_ns = runBudget_ns / 2; 216*3117ece4Schristos int completed = 0; 217*3117ece4Schristos BMK_runTime_t bestRunTime = cont->fastestRun; 218*3117ece4Schristos 219*3117ece4Schristos while (!completed) { 220*3117ece4Schristos BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops); 221*3117ece4Schristos 222*3117ece4Schristos if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ 223*3117ece4Schristos return runResult; 224*3117ece4Schristos } 225*3117ece4Schristos 226*3117ece4Schristos { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); 227*3117ece4Schristos double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; 228*3117ece4Schristos 229*3117ece4Schristos cont->timeSpent_ns += (unsigned long long)loopDuration_ns; 230*3117ece4Schristos 231*3117ece4Schristos /* estimate nbLoops for next run to last approximately 1 second */ 232*3117ece4Schristos if (loopDuration_ns > ((double)runBudget_ns / 50)) { 233*3117ece4Schristos double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); 234*3117ece4Schristos cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1; 235*3117ece4Schristos } else { 236*3117ece4Schristos /* previous run was too short : blindly increase workload by x multiplier */ 237*3117ece4Schristos const unsigned multiplier = 10; 238*3117ece4Schristos assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ 239*3117ece4Schristos cont->nbLoops *= multiplier; 240*3117ece4Schristos } 241*3117ece4Schristos 242*3117ece4Schristos if(loopDuration_ns < (double)runTimeMin_ns) { 243*3117ece4Schristos /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ 244*3117ece4Schristos assert(completed == 0); 245*3117ece4Schristos continue; 246*3117ece4Schristos } else { 247*3117ece4Schristos if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { 248*3117ece4Schristos bestRunTime = newRunTime; 249*3117ece4Schristos } 250*3117ece4Schristos completed = 1; 251*3117ece4Schristos } 252*3117ece4Schristos } 253*3117ece4Schristos } /* while (!completed) */ 254*3117ece4Schristos 255*3117ece4Schristos return BMK_setValid_runTime(bestRunTime); 256*3117ece4Schristos } 257