131914882SAlex Richardson /* 231914882SAlex Richardson * Microbenchmark for math functions. 331914882SAlex Richardson * 4*072a4ba8SAndrew Turner * Copyright (c) 2018-2022, Arm Limited. 5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 831914882SAlex Richardson #undef _GNU_SOURCE 931914882SAlex Richardson #define _GNU_SOURCE 1 1031914882SAlex Richardson #include <stdint.h> 1131914882SAlex Richardson #include <stdlib.h> 1231914882SAlex Richardson #include <stdio.h> 1331914882SAlex Richardson #include <string.h> 1431914882SAlex Richardson #include <time.h> 1531914882SAlex Richardson #include <math.h> 1631914882SAlex Richardson #include "mathlib.h" 1731914882SAlex Richardson 1831914882SAlex Richardson #ifndef WANT_VMATH 1931914882SAlex Richardson /* Enable the build of vector math code. */ 2031914882SAlex Richardson # define WANT_VMATH 1 2131914882SAlex Richardson #endif 2231914882SAlex Richardson 2331914882SAlex Richardson /* Number of measurements, best result is reported. */ 2431914882SAlex Richardson #define MEASURE 60 2531914882SAlex Richardson /* Array size. */ 2631914882SAlex Richardson #define N 8000 2731914882SAlex Richardson /* Iterations over the array. */ 2831914882SAlex Richardson #define ITER 125 2931914882SAlex Richardson 3031914882SAlex Richardson static double *Trace; 3131914882SAlex Richardson static size_t trace_size; 3231914882SAlex Richardson static double A[N]; 3331914882SAlex Richardson static float Af[N]; 3431914882SAlex Richardson static long measurecount = MEASURE; 3531914882SAlex Richardson static long itercount = ITER; 3631914882SAlex Richardson 3731914882SAlex Richardson #if __aarch64__ && WANT_VMATH 3831914882SAlex Richardson typedef __f64x2_t v_double; 3931914882SAlex Richardson 4031914882SAlex Richardson #define v_double_len() 2 4131914882SAlex Richardson 4231914882SAlex Richardson static inline v_double 4331914882SAlex Richardson v_double_load (const double *p) 4431914882SAlex Richardson { 4531914882SAlex Richardson return (v_double){p[0], p[1]}; 4631914882SAlex Richardson } 4731914882SAlex Richardson 4831914882SAlex Richardson static inline v_double 4931914882SAlex Richardson v_double_dup (double x) 5031914882SAlex Richardson { 5131914882SAlex Richardson return (v_double){x, x}; 5231914882SAlex Richardson } 5331914882SAlex Richardson 5431914882SAlex Richardson typedef __f32x4_t v_float; 5531914882SAlex Richardson 5631914882SAlex Richardson #define v_float_len() 4 5731914882SAlex Richardson 5831914882SAlex Richardson static inline v_float 5931914882SAlex Richardson v_float_load (const float *p) 6031914882SAlex Richardson { 6131914882SAlex Richardson return (v_float){p[0], p[1], p[2], p[3]}; 6231914882SAlex Richardson } 6331914882SAlex Richardson 6431914882SAlex Richardson static inline v_float 6531914882SAlex Richardson v_float_dup (float x) 6631914882SAlex Richardson { 6731914882SAlex Richardson return (v_float){x, x, x, x}; 6831914882SAlex Richardson } 69*072a4ba8SAndrew Turner #if WANT_SVE_MATH 70*072a4ba8SAndrew Turner #include <arm_sve.h> 71*072a4ba8SAndrew Turner typedef svbool_t sv_bool; 72*072a4ba8SAndrew Turner typedef svfloat64_t sv_double; 73*072a4ba8SAndrew Turner 74*072a4ba8SAndrew Turner #define sv_double_len() svcntd() 75*072a4ba8SAndrew Turner 76*072a4ba8SAndrew Turner static inline sv_double 77*072a4ba8SAndrew Turner sv_double_load (const double *p) 78*072a4ba8SAndrew Turner { 79*072a4ba8SAndrew Turner svbool_t pg = svptrue_b64(); 80*072a4ba8SAndrew Turner return svld1(pg, p); 81*072a4ba8SAndrew Turner } 82*072a4ba8SAndrew Turner 83*072a4ba8SAndrew Turner static inline sv_double 84*072a4ba8SAndrew Turner sv_double_dup (double x) 85*072a4ba8SAndrew Turner { 86*072a4ba8SAndrew Turner return svdup_n_f64(x); 87*072a4ba8SAndrew Turner } 88*072a4ba8SAndrew Turner 89*072a4ba8SAndrew Turner typedef svfloat32_t sv_float; 90*072a4ba8SAndrew Turner 91*072a4ba8SAndrew Turner #define sv_float_len() svcntw() 92*072a4ba8SAndrew Turner 93*072a4ba8SAndrew Turner static inline sv_float 94*072a4ba8SAndrew Turner sv_float_load (const float *p) 95*072a4ba8SAndrew Turner { 96*072a4ba8SAndrew Turner svbool_t pg = svptrue_b32(); 97*072a4ba8SAndrew Turner return svld1(pg, p); 98*072a4ba8SAndrew Turner } 99*072a4ba8SAndrew Turner 100*072a4ba8SAndrew Turner static inline sv_float 101*072a4ba8SAndrew Turner sv_float_dup (float x) 102*072a4ba8SAndrew Turner { 103*072a4ba8SAndrew Turner return svdup_n_f32(x); 104*072a4ba8SAndrew Turner } 105*072a4ba8SAndrew Turner #endif 10631914882SAlex Richardson #else 10731914882SAlex Richardson /* dummy definitions to make things compile. */ 10831914882SAlex Richardson typedef double v_double; 10931914882SAlex Richardson typedef float v_float; 11031914882SAlex Richardson #define v_double_len(x) 1 11131914882SAlex Richardson #define v_double_load(x) (x)[0] 11231914882SAlex Richardson #define v_double_dup(x) (x) 11331914882SAlex Richardson #define v_float_len(x) 1 11431914882SAlex Richardson #define v_float_load(x) (x)[0] 11531914882SAlex Richardson #define v_float_dup(x) (x) 11631914882SAlex Richardson #endif 11731914882SAlex Richardson 11831914882SAlex Richardson static double 11931914882SAlex Richardson dummy (double x) 12031914882SAlex Richardson { 12131914882SAlex Richardson return x; 12231914882SAlex Richardson } 12331914882SAlex Richardson 12431914882SAlex Richardson static float 12531914882SAlex Richardson dummyf (float x) 12631914882SAlex Richardson { 12731914882SAlex Richardson return x; 12831914882SAlex Richardson } 12931914882SAlex Richardson #if WANT_VMATH 13031914882SAlex Richardson #if __aarch64__ 13131914882SAlex Richardson static v_double 13231914882SAlex Richardson __v_dummy (v_double x) 13331914882SAlex Richardson { 13431914882SAlex Richardson return x; 13531914882SAlex Richardson } 13631914882SAlex Richardson 13731914882SAlex Richardson static v_float 13831914882SAlex Richardson __v_dummyf (v_float x) 13931914882SAlex Richardson { 14031914882SAlex Richardson return x; 14131914882SAlex Richardson } 14231914882SAlex Richardson 14331914882SAlex Richardson #ifdef __vpcs 14431914882SAlex Richardson __vpcs static v_double 14531914882SAlex Richardson __vn_dummy (v_double x) 14631914882SAlex Richardson { 14731914882SAlex Richardson return x; 14831914882SAlex Richardson } 14931914882SAlex Richardson 15031914882SAlex Richardson __vpcs static v_float 15131914882SAlex Richardson __vn_dummyf (v_float x) 15231914882SAlex Richardson { 15331914882SAlex Richardson return x; 15431914882SAlex Richardson } 155*072a4ba8SAndrew Turner #endif 156*072a4ba8SAndrew Turner #if WANT_SVE_MATH 157*072a4ba8SAndrew Turner static sv_double 158*072a4ba8SAndrew Turner __sv_dummy (sv_double x, sv_bool pg) 15931914882SAlex Richardson { 160*072a4ba8SAndrew Turner return x; 16131914882SAlex Richardson } 16231914882SAlex Richardson 163*072a4ba8SAndrew Turner static sv_float 164*072a4ba8SAndrew Turner __sv_dummyf (sv_float x, sv_bool pg) 16531914882SAlex Richardson { 166*072a4ba8SAndrew Turner return x; 16731914882SAlex Richardson } 16831914882SAlex Richardson 169*072a4ba8SAndrew Turner #endif 170*072a4ba8SAndrew Turner #endif 17131914882SAlex Richardson #endif 17231914882SAlex Richardson 173*072a4ba8SAndrew Turner #include "test/mathbench_wrappers.h" 17431914882SAlex Richardson 17531914882SAlex Richardson static const struct fun 17631914882SAlex Richardson { 17731914882SAlex Richardson const char *name; 17831914882SAlex Richardson int prec; 17931914882SAlex Richardson int vec; 18031914882SAlex Richardson double lo; 18131914882SAlex Richardson double hi; 18231914882SAlex Richardson union 18331914882SAlex Richardson { 18431914882SAlex Richardson double (*d) (double); 18531914882SAlex Richardson float (*f) (float); 18631914882SAlex Richardson v_double (*vd) (v_double); 18731914882SAlex Richardson v_float (*vf) (v_float); 18831914882SAlex Richardson #ifdef __vpcs 18931914882SAlex Richardson __vpcs v_double (*vnd) (v_double); 19031914882SAlex Richardson __vpcs v_float (*vnf) (v_float); 19131914882SAlex Richardson #endif 192*072a4ba8SAndrew Turner #if WANT_SVE_MATH 193*072a4ba8SAndrew Turner sv_double (*svd) (sv_double, sv_bool); 194*072a4ba8SAndrew Turner sv_float (*svf) (sv_float, sv_bool); 195*072a4ba8SAndrew Turner #endif 19631914882SAlex Richardson } fun; 19731914882SAlex Richardson } funtab[] = { 19831914882SAlex Richardson #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}}, 19931914882SAlex Richardson #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}}, 20031914882SAlex Richardson #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}}, 20131914882SAlex Richardson #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}}, 20231914882SAlex Richardson #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}}, 20331914882SAlex Richardson #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}}, 204*072a4ba8SAndrew Turner #define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}}, 205*072a4ba8SAndrew Turner #define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}}, 20631914882SAlex Richardson D (dummy, 1.0, 2.0) 20731914882SAlex Richardson F (dummyf, 1.0, 2.0) 20831914882SAlex Richardson #if WANT_VMATH 20931914882SAlex Richardson #if __aarch64__ 21031914882SAlex Richardson VD (__v_dummy, 1.0, 2.0) 21131914882SAlex Richardson VF (__v_dummyf, 1.0, 2.0) 21231914882SAlex Richardson #ifdef __vpcs 21331914882SAlex Richardson VND (__vn_dummy, 1.0, 2.0) 21431914882SAlex Richardson VNF (__vn_dummyf, 1.0, 2.0) 215*072a4ba8SAndrew Turner #endif 216*072a4ba8SAndrew Turner #if WANT_SVE_MATH 217*072a4ba8SAndrew Turner SVD (__sv_dummy, 1.0, 2.0) 218*072a4ba8SAndrew Turner SVF (__sv_dummyf, 1.0, 2.0) 21931914882SAlex Richardson #endif 22031914882SAlex Richardson #endif 22131914882SAlex Richardson #endif 222*072a4ba8SAndrew Turner #include "test/mathbench_funcs.h" 22331914882SAlex Richardson {0}, 22431914882SAlex Richardson #undef F 22531914882SAlex Richardson #undef D 22631914882SAlex Richardson #undef VF 22731914882SAlex Richardson #undef VD 22831914882SAlex Richardson #undef VNF 22931914882SAlex Richardson #undef VND 230*072a4ba8SAndrew Turner #undef SVF 231*072a4ba8SAndrew Turner #undef SVD 23231914882SAlex Richardson }; 23331914882SAlex Richardson 23431914882SAlex Richardson static void 23531914882SAlex Richardson gen_linear (double lo, double hi) 23631914882SAlex Richardson { 23731914882SAlex Richardson for (int i = 0; i < N; i++) 23831914882SAlex Richardson A[i] = (lo * (N - i) + hi * i) / N; 23931914882SAlex Richardson } 24031914882SAlex Richardson 24131914882SAlex Richardson static void 24231914882SAlex Richardson genf_linear (double lo, double hi) 24331914882SAlex Richardson { 24431914882SAlex Richardson for (int i = 0; i < N; i++) 24531914882SAlex Richardson Af[i] = (float)(lo * (N - i) + hi * i) / N; 24631914882SAlex Richardson } 24731914882SAlex Richardson 24831914882SAlex Richardson static inline double 24931914882SAlex Richardson asdouble (uint64_t i) 25031914882SAlex Richardson { 25131914882SAlex Richardson union 25231914882SAlex Richardson { 25331914882SAlex Richardson uint64_t i; 25431914882SAlex Richardson double f; 25531914882SAlex Richardson } u = {i}; 25631914882SAlex Richardson return u.f; 25731914882SAlex Richardson } 25831914882SAlex Richardson 25931914882SAlex Richardson static uint64_t seed = 0x0123456789abcdef; 26031914882SAlex Richardson 26131914882SAlex Richardson static double 26231914882SAlex Richardson frand (double lo, double hi) 26331914882SAlex Richardson { 26431914882SAlex Richardson seed = 6364136223846793005ULL * seed + 1; 26531914882SAlex Richardson return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0); 26631914882SAlex Richardson } 26731914882SAlex Richardson 26831914882SAlex Richardson static void 26931914882SAlex Richardson gen_rand (double lo, double hi) 27031914882SAlex Richardson { 27131914882SAlex Richardson for (int i = 0; i < N; i++) 27231914882SAlex Richardson A[i] = frand (lo, hi); 27331914882SAlex Richardson } 27431914882SAlex Richardson 27531914882SAlex Richardson static void 27631914882SAlex Richardson genf_rand (double lo, double hi) 27731914882SAlex Richardson { 27831914882SAlex Richardson for (int i = 0; i < N; i++) 27931914882SAlex Richardson Af[i] = (float)frand (lo, hi); 28031914882SAlex Richardson } 28131914882SAlex Richardson 28231914882SAlex Richardson static void 28331914882SAlex Richardson gen_trace (int index) 28431914882SAlex Richardson { 28531914882SAlex Richardson for (int i = 0; i < N; i++) 28631914882SAlex Richardson A[i] = Trace[index + i]; 28731914882SAlex Richardson } 28831914882SAlex Richardson 28931914882SAlex Richardson static void 29031914882SAlex Richardson genf_trace (int index) 29131914882SAlex Richardson { 29231914882SAlex Richardson for (int i = 0; i < N; i++) 29331914882SAlex Richardson Af[i] = (float)Trace[index + i]; 29431914882SAlex Richardson } 29531914882SAlex Richardson 29631914882SAlex Richardson static void 29731914882SAlex Richardson run_thruput (double f (double)) 29831914882SAlex Richardson { 29931914882SAlex Richardson for (int i = 0; i < N; i++) 30031914882SAlex Richardson f (A[i]); 30131914882SAlex Richardson } 30231914882SAlex Richardson 30331914882SAlex Richardson static void 30431914882SAlex Richardson runf_thruput (float f (float)) 30531914882SAlex Richardson { 30631914882SAlex Richardson for (int i = 0; i < N; i++) 30731914882SAlex Richardson f (Af[i]); 30831914882SAlex Richardson } 30931914882SAlex Richardson 31031914882SAlex Richardson volatile double zero = 0; 31131914882SAlex Richardson 31231914882SAlex Richardson static void 31331914882SAlex Richardson run_latency (double f (double)) 31431914882SAlex Richardson { 31531914882SAlex Richardson double z = zero; 31631914882SAlex Richardson double prev = z; 31731914882SAlex Richardson for (int i = 0; i < N; i++) 31831914882SAlex Richardson prev = f (A[i] + prev * z); 31931914882SAlex Richardson } 32031914882SAlex Richardson 32131914882SAlex Richardson static void 32231914882SAlex Richardson runf_latency (float f (float)) 32331914882SAlex Richardson { 32431914882SAlex Richardson float z = (float)zero; 32531914882SAlex Richardson float prev = z; 32631914882SAlex Richardson for (int i = 0; i < N; i++) 32731914882SAlex Richardson prev = f (Af[i] + prev * z); 32831914882SAlex Richardson } 32931914882SAlex Richardson 33031914882SAlex Richardson static void 33131914882SAlex Richardson run_v_thruput (v_double f (v_double)) 33231914882SAlex Richardson { 33331914882SAlex Richardson for (int i = 0; i < N; i += v_double_len ()) 33431914882SAlex Richardson f (v_double_load (A+i)); 33531914882SAlex Richardson } 33631914882SAlex Richardson 33731914882SAlex Richardson static void 33831914882SAlex Richardson runf_v_thruput (v_float f (v_float)) 33931914882SAlex Richardson { 34031914882SAlex Richardson for (int i = 0; i < N; i += v_float_len ()) 34131914882SAlex Richardson f (v_float_load (Af+i)); 34231914882SAlex Richardson } 34331914882SAlex Richardson 34431914882SAlex Richardson static void 34531914882SAlex Richardson run_v_latency (v_double f (v_double)) 34631914882SAlex Richardson { 34731914882SAlex Richardson v_double z = v_double_dup (zero); 34831914882SAlex Richardson v_double prev = z; 34931914882SAlex Richardson for (int i = 0; i < N; i += v_double_len ()) 35031914882SAlex Richardson prev = f (v_double_load (A+i) + prev * z); 35131914882SAlex Richardson } 35231914882SAlex Richardson 35331914882SAlex Richardson static void 35431914882SAlex Richardson runf_v_latency (v_float f (v_float)) 35531914882SAlex Richardson { 35631914882SAlex Richardson v_float z = v_float_dup (zero); 35731914882SAlex Richardson v_float prev = z; 35831914882SAlex Richardson for (int i = 0; i < N; i += v_float_len ()) 35931914882SAlex Richardson prev = f (v_float_load (Af+i) + prev * z); 36031914882SAlex Richardson } 36131914882SAlex Richardson 36231914882SAlex Richardson #ifdef __vpcs 36331914882SAlex Richardson static void 36431914882SAlex Richardson run_vn_thruput (__vpcs v_double f (v_double)) 36531914882SAlex Richardson { 36631914882SAlex Richardson for (int i = 0; i < N; i += v_double_len ()) 36731914882SAlex Richardson f (v_double_load (A+i)); 36831914882SAlex Richardson } 36931914882SAlex Richardson 37031914882SAlex Richardson static void 37131914882SAlex Richardson runf_vn_thruput (__vpcs v_float f (v_float)) 37231914882SAlex Richardson { 37331914882SAlex Richardson for (int i = 0; i < N; i += v_float_len ()) 37431914882SAlex Richardson f (v_float_load (Af+i)); 37531914882SAlex Richardson } 37631914882SAlex Richardson 37731914882SAlex Richardson static void 37831914882SAlex Richardson run_vn_latency (__vpcs v_double f (v_double)) 37931914882SAlex Richardson { 38031914882SAlex Richardson v_double z = v_double_dup (zero); 38131914882SAlex Richardson v_double prev = z; 38231914882SAlex Richardson for (int i = 0; i < N; i += v_double_len ()) 38331914882SAlex Richardson prev = f (v_double_load (A+i) + prev * z); 38431914882SAlex Richardson } 38531914882SAlex Richardson 38631914882SAlex Richardson static void 38731914882SAlex Richardson runf_vn_latency (__vpcs v_float f (v_float)) 38831914882SAlex Richardson { 38931914882SAlex Richardson v_float z = v_float_dup (zero); 39031914882SAlex Richardson v_float prev = z; 39131914882SAlex Richardson for (int i = 0; i < N; i += v_float_len ()) 39231914882SAlex Richardson prev = f (v_float_load (Af+i) + prev * z); 39331914882SAlex Richardson } 39431914882SAlex Richardson #endif 39531914882SAlex Richardson 396*072a4ba8SAndrew Turner #if WANT_SVE_MATH 397*072a4ba8SAndrew Turner static void 398*072a4ba8SAndrew Turner run_sv_thruput (sv_double f (sv_double, sv_bool)) 399*072a4ba8SAndrew Turner { 400*072a4ba8SAndrew Turner for (int i = 0; i < N; i += sv_double_len ()) 401*072a4ba8SAndrew Turner f (sv_double_load (A+i), svptrue_b64 ()); 402*072a4ba8SAndrew Turner } 403*072a4ba8SAndrew Turner 404*072a4ba8SAndrew Turner static void 405*072a4ba8SAndrew Turner runf_sv_thruput (sv_float f (sv_float, sv_bool)) 406*072a4ba8SAndrew Turner { 407*072a4ba8SAndrew Turner for (int i = 0; i < N; i += sv_float_len ()) 408*072a4ba8SAndrew Turner f (sv_float_load (Af+i), svptrue_b32 ()); 409*072a4ba8SAndrew Turner } 410*072a4ba8SAndrew Turner 411*072a4ba8SAndrew Turner static void 412*072a4ba8SAndrew Turner run_sv_latency (sv_double f (sv_double, sv_bool)) 413*072a4ba8SAndrew Turner { 414*072a4ba8SAndrew Turner sv_double z = sv_double_dup (zero); 415*072a4ba8SAndrew Turner sv_double prev = z; 416*072a4ba8SAndrew Turner for (int i = 0; i < N; i += sv_double_len ()) 417*072a4ba8SAndrew Turner prev = f (svmad_f64_x (svptrue_b64 (), prev, z, sv_double_load (A+i)), svptrue_b64 ()); 418*072a4ba8SAndrew Turner } 419*072a4ba8SAndrew Turner 420*072a4ba8SAndrew Turner static void 421*072a4ba8SAndrew Turner runf_sv_latency (sv_float f (sv_float, sv_bool)) 422*072a4ba8SAndrew Turner { 423*072a4ba8SAndrew Turner sv_float z = sv_float_dup (zero); 424*072a4ba8SAndrew Turner sv_float prev = z; 425*072a4ba8SAndrew Turner for (int i = 0; i < N; i += sv_float_len ()) 426*072a4ba8SAndrew Turner prev = f (svmad_f32_x (svptrue_b32 (), prev, z, sv_float_load (Af+i)), svptrue_b32 ()); 427*072a4ba8SAndrew Turner } 428*072a4ba8SAndrew Turner #endif 429*072a4ba8SAndrew Turner 43031914882SAlex Richardson static uint64_t 43131914882SAlex Richardson tic (void) 43231914882SAlex Richardson { 43331914882SAlex Richardson struct timespec ts; 43431914882SAlex Richardson if (clock_gettime (CLOCK_REALTIME, &ts)) 43531914882SAlex Richardson abort (); 43631914882SAlex Richardson return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 43731914882SAlex Richardson } 43831914882SAlex Richardson 43931914882SAlex Richardson #define TIMEIT(run, f) do { \ 44031914882SAlex Richardson dt = -1; \ 44131914882SAlex Richardson run (f); /* Warm up. */ \ 44231914882SAlex Richardson for (int j = 0; j < measurecount; j++) \ 44331914882SAlex Richardson { \ 44431914882SAlex Richardson uint64_t t0 = tic (); \ 44531914882SAlex Richardson for (int i = 0; i < itercount; i++) \ 44631914882SAlex Richardson run (f); \ 44731914882SAlex Richardson uint64_t t1 = tic (); \ 44831914882SAlex Richardson if (t1 - t0 < dt) \ 44931914882SAlex Richardson dt = t1 - t0; \ 45031914882SAlex Richardson } \ 45131914882SAlex Richardson } while (0) 45231914882SAlex Richardson 45331914882SAlex Richardson static void 45431914882SAlex Richardson bench1 (const struct fun *f, int type, double lo, double hi) 45531914882SAlex Richardson { 45631914882SAlex Richardson uint64_t dt = 0; 45731914882SAlex Richardson uint64_t ns100; 45831914882SAlex Richardson const char *s = type == 't' ? "rthruput" : "latency"; 45931914882SAlex Richardson int vlen = 1; 46031914882SAlex Richardson 46131914882SAlex Richardson if (f->vec && f->prec == 'd') 46231914882SAlex Richardson vlen = v_double_len(); 46331914882SAlex Richardson else if (f->vec && f->prec == 'f') 46431914882SAlex Richardson vlen = v_float_len(); 46531914882SAlex Richardson 46631914882SAlex Richardson if (f->prec == 'd' && type == 't' && f->vec == 0) 46731914882SAlex Richardson TIMEIT (run_thruput, f->fun.d); 46831914882SAlex Richardson else if (f->prec == 'd' && type == 'l' && f->vec == 0) 46931914882SAlex Richardson TIMEIT (run_latency, f->fun.d); 47031914882SAlex Richardson else if (f->prec == 'f' && type == 't' && f->vec == 0) 47131914882SAlex Richardson TIMEIT (runf_thruput, f->fun.f); 47231914882SAlex Richardson else if (f->prec == 'f' && type == 'l' && f->vec == 0) 47331914882SAlex Richardson TIMEIT (runf_latency, f->fun.f); 47431914882SAlex Richardson else if (f->prec == 'd' && type == 't' && f->vec == 'v') 47531914882SAlex Richardson TIMEIT (run_v_thruput, f->fun.vd); 47631914882SAlex Richardson else if (f->prec == 'd' && type == 'l' && f->vec == 'v') 47731914882SAlex Richardson TIMEIT (run_v_latency, f->fun.vd); 47831914882SAlex Richardson else if (f->prec == 'f' && type == 't' && f->vec == 'v') 47931914882SAlex Richardson TIMEIT (runf_v_thruput, f->fun.vf); 48031914882SAlex Richardson else if (f->prec == 'f' && type == 'l' && f->vec == 'v') 48131914882SAlex Richardson TIMEIT (runf_v_latency, f->fun.vf); 48231914882SAlex Richardson #ifdef __vpcs 48331914882SAlex Richardson else if (f->prec == 'd' && type == 't' && f->vec == 'n') 48431914882SAlex Richardson TIMEIT (run_vn_thruput, f->fun.vnd); 48531914882SAlex Richardson else if (f->prec == 'd' && type == 'l' && f->vec == 'n') 48631914882SAlex Richardson TIMEIT (run_vn_latency, f->fun.vnd); 48731914882SAlex Richardson else if (f->prec == 'f' && type == 't' && f->vec == 'n') 48831914882SAlex Richardson TIMEIT (runf_vn_thruput, f->fun.vnf); 48931914882SAlex Richardson else if (f->prec == 'f' && type == 'l' && f->vec == 'n') 49031914882SAlex Richardson TIMEIT (runf_vn_latency, f->fun.vnf); 49131914882SAlex Richardson #endif 492*072a4ba8SAndrew Turner #if WANT_SVE_MATH 493*072a4ba8SAndrew Turner else if (f->prec == 'd' && type == 't' && f->vec == 's') 494*072a4ba8SAndrew Turner TIMEIT (run_sv_thruput, f->fun.svd); 495*072a4ba8SAndrew Turner else if (f->prec == 'd' && type == 'l' && f->vec == 's') 496*072a4ba8SAndrew Turner TIMEIT (run_sv_latency, f->fun.svd); 497*072a4ba8SAndrew Turner else if (f->prec == 'f' && type == 't' && f->vec == 's') 498*072a4ba8SAndrew Turner TIMEIT (runf_sv_thruput, f->fun.svf); 499*072a4ba8SAndrew Turner else if (f->prec == 'f' && type == 'l' && f->vec == 's') 500*072a4ba8SAndrew Turner TIMEIT (runf_sv_latency, f->fun.svf); 501*072a4ba8SAndrew Turner #endif 50231914882SAlex Richardson 50331914882SAlex Richardson if (type == 't') 50431914882SAlex Richardson { 50531914882SAlex Richardson ns100 = (100 * dt + itercount * N / 2) / (itercount * N); 50631914882SAlex Richardson printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s, 50731914882SAlex Richardson (unsigned) (ns100 / 100), (unsigned) (ns100 % 100), 50831914882SAlex Richardson (unsigned long long) dt, lo, hi); 50931914882SAlex Richardson } 51031914882SAlex Richardson else if (type == 'l') 51131914882SAlex Richardson { 51231914882SAlex Richardson ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen); 51331914882SAlex Richardson printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s, 51431914882SAlex Richardson (unsigned) (ns100 / 100), (unsigned) (ns100 % 100), 51531914882SAlex Richardson (unsigned long long) dt, lo, hi); 51631914882SAlex Richardson } 51731914882SAlex Richardson fflush (stdout); 51831914882SAlex Richardson } 51931914882SAlex Richardson 52031914882SAlex Richardson static void 52131914882SAlex Richardson bench (const struct fun *f, double lo, double hi, int type, int gen) 52231914882SAlex Richardson { 52331914882SAlex Richardson if (f->prec == 'd' && gen == 'r') 52431914882SAlex Richardson gen_rand (lo, hi); 52531914882SAlex Richardson else if (f->prec == 'd' && gen == 'l') 52631914882SAlex Richardson gen_linear (lo, hi); 52731914882SAlex Richardson else if (f->prec == 'd' && gen == 't') 52831914882SAlex Richardson gen_trace (0); 52931914882SAlex Richardson else if (f->prec == 'f' && gen == 'r') 53031914882SAlex Richardson genf_rand (lo, hi); 53131914882SAlex Richardson else if (f->prec == 'f' && gen == 'l') 53231914882SAlex Richardson genf_linear (lo, hi); 53331914882SAlex Richardson else if (f->prec == 'f' && gen == 't') 53431914882SAlex Richardson genf_trace (0); 53531914882SAlex Richardson 53631914882SAlex Richardson if (gen == 't') 53731914882SAlex Richardson hi = trace_size / N; 53831914882SAlex Richardson 53931914882SAlex Richardson if (type == 'b' || type == 't') 54031914882SAlex Richardson bench1 (f, 't', lo, hi); 54131914882SAlex Richardson 54231914882SAlex Richardson if (type == 'b' || type == 'l') 54331914882SAlex Richardson bench1 (f, 'l', lo, hi); 54431914882SAlex Richardson 54531914882SAlex Richardson for (int i = N; i < trace_size; i += N) 54631914882SAlex Richardson { 54731914882SAlex Richardson if (f->prec == 'd') 54831914882SAlex Richardson gen_trace (i); 54931914882SAlex Richardson else 55031914882SAlex Richardson genf_trace (i); 55131914882SAlex Richardson 55231914882SAlex Richardson lo = i / N; 55331914882SAlex Richardson if (type == 'b' || type == 't') 55431914882SAlex Richardson bench1 (f, 't', lo, hi); 55531914882SAlex Richardson 55631914882SAlex Richardson if (type == 'b' || type == 'l') 55731914882SAlex Richardson bench1 (f, 'l', lo, hi); 55831914882SAlex Richardson } 55931914882SAlex Richardson } 56031914882SAlex Richardson 56131914882SAlex Richardson static void 56231914882SAlex Richardson readtrace (const char *name) 56331914882SAlex Richardson { 56431914882SAlex Richardson int n = 0; 56531914882SAlex Richardson FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r"); 56631914882SAlex Richardson if (!f) 56731914882SAlex Richardson { 56831914882SAlex Richardson printf ("openning \"%s\" failed: %m\n", name); 56931914882SAlex Richardson exit (1); 57031914882SAlex Richardson } 57131914882SAlex Richardson for (;;) 57231914882SAlex Richardson { 57331914882SAlex Richardson if (n >= trace_size) 57431914882SAlex Richardson { 57531914882SAlex Richardson trace_size += N; 57631914882SAlex Richardson Trace = realloc (Trace, trace_size * sizeof (Trace[0])); 57731914882SAlex Richardson if (Trace == NULL) 57831914882SAlex Richardson { 57931914882SAlex Richardson printf ("out of memory\n"); 58031914882SAlex Richardson exit (1); 58131914882SAlex Richardson } 58231914882SAlex Richardson } 58331914882SAlex Richardson if (fscanf (f, "%lf", Trace + n) != 1) 58431914882SAlex Richardson break; 58531914882SAlex Richardson n++; 58631914882SAlex Richardson } 58731914882SAlex Richardson if (ferror (f) || n == 0) 58831914882SAlex Richardson { 58931914882SAlex Richardson printf ("reading \"%s\" failed: %m\n", name); 59031914882SAlex Richardson exit (1); 59131914882SAlex Richardson } 59231914882SAlex Richardson fclose (f); 59331914882SAlex Richardson if (n % N == 0) 59431914882SAlex Richardson trace_size = n; 59531914882SAlex Richardson for (int i = 0; n < trace_size; n++, i++) 59631914882SAlex Richardson Trace[n] = Trace[i]; 59731914882SAlex Richardson } 59831914882SAlex Richardson 59931914882SAlex Richardson static void 60031914882SAlex Richardson usage (void) 60131914882SAlex Richardson { 60231914882SAlex Richardson printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] " 60331914882SAlex Richardson "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func " 60431914882SAlex Richardson "[func2 ..]\n"); 60531914882SAlex Richardson printf ("func:\n"); 60631914882SAlex Richardson printf ("%7s [run all benchmarks]\n", "all"); 60731914882SAlex Richardson for (const struct fun *f = funtab; f->name; f++) 60831914882SAlex Richardson printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi); 60931914882SAlex Richardson exit (1); 61031914882SAlex Richardson } 61131914882SAlex Richardson 61231914882SAlex Richardson int 61331914882SAlex Richardson main (int argc, char *argv[]) 61431914882SAlex Richardson { 61531914882SAlex Richardson int usergen = 0, gen = 'r', type = 'b', all = 0; 61631914882SAlex Richardson double lo = 0, hi = 0; 61731914882SAlex Richardson const char *tracefile = "-"; 61831914882SAlex Richardson 61931914882SAlex Richardson argv++; 62031914882SAlex Richardson argc--; 62131914882SAlex Richardson for (;;) 62231914882SAlex Richardson { 62331914882SAlex Richardson if (argc <= 0) 62431914882SAlex Richardson usage (); 62531914882SAlex Richardson if (argv[0][0] != '-') 62631914882SAlex Richardson break; 62731914882SAlex Richardson else if (argc >= 3 && strcmp (argv[0], "-i") == 0) 62831914882SAlex Richardson { 62931914882SAlex Richardson usergen = 1; 63031914882SAlex Richardson lo = strtod (argv[1], 0); 63131914882SAlex Richardson hi = strtod (argv[2], 0); 63231914882SAlex Richardson argv += 3; 63331914882SAlex Richardson argc -= 3; 63431914882SAlex Richardson } 63531914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-m") == 0) 63631914882SAlex Richardson { 63731914882SAlex Richardson measurecount = strtol (argv[1], 0, 0); 63831914882SAlex Richardson argv += 2; 63931914882SAlex Richardson argc -= 2; 64031914882SAlex Richardson } 64131914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-c") == 0) 64231914882SAlex Richardson { 64331914882SAlex Richardson itercount = strtol (argv[1], 0, 0); 64431914882SAlex Richardson argv += 2; 64531914882SAlex Richardson argc -= 2; 64631914882SAlex Richardson } 64731914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-g") == 0) 64831914882SAlex Richardson { 64931914882SAlex Richardson gen = argv[1][0]; 65031914882SAlex Richardson if (strchr ("rlt", gen) == 0) 65131914882SAlex Richardson usage (); 65231914882SAlex Richardson argv += 2; 65331914882SAlex Richardson argc -= 2; 65431914882SAlex Richardson } 65531914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-f") == 0) 65631914882SAlex Richardson { 65731914882SAlex Richardson gen = 't'; /* -f implies -g trace. */ 65831914882SAlex Richardson tracefile = argv[1]; 65931914882SAlex Richardson argv += 2; 66031914882SAlex Richardson argc -= 2; 66131914882SAlex Richardson } 66231914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-t") == 0) 66331914882SAlex Richardson { 66431914882SAlex Richardson type = argv[1][0]; 66531914882SAlex Richardson if (strchr ("ltb", type) == 0) 66631914882SAlex Richardson usage (); 66731914882SAlex Richardson argv += 2; 66831914882SAlex Richardson argc -= 2; 66931914882SAlex Richardson } 67031914882SAlex Richardson else 67131914882SAlex Richardson usage (); 67231914882SAlex Richardson } 67331914882SAlex Richardson if (gen == 't') 67431914882SAlex Richardson { 67531914882SAlex Richardson readtrace (tracefile); 67631914882SAlex Richardson lo = hi = 0; 67731914882SAlex Richardson usergen = 1; 67831914882SAlex Richardson } 67931914882SAlex Richardson while (argc > 0) 68031914882SAlex Richardson { 68131914882SAlex Richardson int found = 0; 68231914882SAlex Richardson all = strcmp (argv[0], "all") == 0; 68331914882SAlex Richardson for (const struct fun *f = funtab; f->name; f++) 68431914882SAlex Richardson if (all || strcmp (argv[0], f->name) == 0) 68531914882SAlex Richardson { 68631914882SAlex Richardson found = 1; 68731914882SAlex Richardson if (!usergen) 68831914882SAlex Richardson { 68931914882SAlex Richardson lo = f->lo; 69031914882SAlex Richardson hi = f->hi; 69131914882SAlex Richardson } 69231914882SAlex Richardson bench (f, lo, hi, type, gen); 69331914882SAlex Richardson if (usergen && !all) 69431914882SAlex Richardson break; 69531914882SAlex Richardson } 69631914882SAlex Richardson if (!found) 69731914882SAlex Richardson printf ("unknown function: %s\n", argv[0]); 69831914882SAlex Richardson argv++; 69931914882SAlex Richardson argc--; 70031914882SAlex Richardson } 70131914882SAlex Richardson return 0; 70231914882SAlex Richardson } 703