xref: /freebsd-src/contrib/arm-optimized-routines/math/test/mathbench.c (revision 072a4ba82a01476eaee33781ccd241033eefcf0b)
131914882SAlex Richardson /*
231914882SAlex Richardson  * Microbenchmark for math functions.
331914882SAlex Richardson  *
4*072a4ba8SAndrew Turner  * Copyright (c) 2018-2022, Arm Limited.
5*072a4ba8SAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson  */
731914882SAlex Richardson 
831914882SAlex Richardson #undef _GNU_SOURCE
931914882SAlex Richardson #define _GNU_SOURCE 1
1031914882SAlex Richardson #include <stdint.h>
1131914882SAlex Richardson #include <stdlib.h>
1231914882SAlex Richardson #include <stdio.h>
1331914882SAlex Richardson #include <string.h>
1431914882SAlex Richardson #include <time.h>
1531914882SAlex Richardson #include <math.h>
1631914882SAlex Richardson #include "mathlib.h"
1731914882SAlex Richardson 
1831914882SAlex Richardson #ifndef WANT_VMATH
1931914882SAlex Richardson /* Enable the build of vector math code.  */
2031914882SAlex Richardson # define WANT_VMATH 1
2131914882SAlex Richardson #endif
2231914882SAlex Richardson 
2331914882SAlex Richardson /* Number of measurements, best result is reported.  */
2431914882SAlex Richardson #define MEASURE 60
2531914882SAlex Richardson /* Array size.  */
2631914882SAlex Richardson #define N 8000
2731914882SAlex Richardson /* Iterations over the array.  */
2831914882SAlex Richardson #define ITER 125
2931914882SAlex Richardson 
3031914882SAlex Richardson static double *Trace;
3131914882SAlex Richardson static size_t trace_size;
3231914882SAlex Richardson static double A[N];
3331914882SAlex Richardson static float Af[N];
3431914882SAlex Richardson static long measurecount = MEASURE;
3531914882SAlex Richardson static long itercount = ITER;
3631914882SAlex Richardson 
3731914882SAlex Richardson #if __aarch64__ && WANT_VMATH
3831914882SAlex Richardson typedef __f64x2_t v_double;
3931914882SAlex Richardson 
4031914882SAlex Richardson #define v_double_len() 2
4131914882SAlex Richardson 
4231914882SAlex Richardson static inline v_double
4331914882SAlex Richardson v_double_load (const double *p)
4431914882SAlex Richardson {
4531914882SAlex Richardson   return (v_double){p[0], p[1]};
4631914882SAlex Richardson }
4731914882SAlex Richardson 
4831914882SAlex Richardson static inline v_double
4931914882SAlex Richardson v_double_dup (double x)
5031914882SAlex Richardson {
5131914882SAlex Richardson   return (v_double){x, x};
5231914882SAlex Richardson }
5331914882SAlex Richardson 
5431914882SAlex Richardson typedef __f32x4_t v_float;
5531914882SAlex Richardson 
5631914882SAlex Richardson #define v_float_len() 4
5731914882SAlex Richardson 
5831914882SAlex Richardson static inline v_float
5931914882SAlex Richardson v_float_load (const float *p)
6031914882SAlex Richardson {
6131914882SAlex Richardson   return (v_float){p[0], p[1], p[2], p[3]};
6231914882SAlex Richardson }
6331914882SAlex Richardson 
6431914882SAlex Richardson static inline v_float
6531914882SAlex Richardson v_float_dup (float x)
6631914882SAlex Richardson {
6731914882SAlex Richardson   return (v_float){x, x, x, x};
6831914882SAlex Richardson }
69*072a4ba8SAndrew Turner #if WANT_SVE_MATH
70*072a4ba8SAndrew Turner #include <arm_sve.h>
71*072a4ba8SAndrew Turner typedef svbool_t sv_bool;
72*072a4ba8SAndrew Turner typedef svfloat64_t sv_double;
73*072a4ba8SAndrew Turner 
74*072a4ba8SAndrew Turner #define sv_double_len() svcntd()
75*072a4ba8SAndrew Turner 
76*072a4ba8SAndrew Turner static inline sv_double
77*072a4ba8SAndrew Turner sv_double_load (const double *p)
78*072a4ba8SAndrew Turner {
79*072a4ba8SAndrew Turner   svbool_t pg = svptrue_b64();
80*072a4ba8SAndrew Turner   return svld1(pg, p);
81*072a4ba8SAndrew Turner }
82*072a4ba8SAndrew Turner 
83*072a4ba8SAndrew Turner static inline sv_double
84*072a4ba8SAndrew Turner sv_double_dup (double x)
85*072a4ba8SAndrew Turner {
86*072a4ba8SAndrew Turner   return svdup_n_f64(x);
87*072a4ba8SAndrew Turner }
88*072a4ba8SAndrew Turner 
89*072a4ba8SAndrew Turner typedef svfloat32_t sv_float;
90*072a4ba8SAndrew Turner 
91*072a4ba8SAndrew Turner #define sv_float_len() svcntw()
92*072a4ba8SAndrew Turner 
93*072a4ba8SAndrew Turner static inline sv_float
94*072a4ba8SAndrew Turner sv_float_load (const float *p)
95*072a4ba8SAndrew Turner {
96*072a4ba8SAndrew Turner   svbool_t pg = svptrue_b32();
97*072a4ba8SAndrew Turner   return svld1(pg, p);
98*072a4ba8SAndrew Turner }
99*072a4ba8SAndrew Turner 
100*072a4ba8SAndrew Turner static inline sv_float
101*072a4ba8SAndrew Turner sv_float_dup (float x)
102*072a4ba8SAndrew Turner {
103*072a4ba8SAndrew Turner   return svdup_n_f32(x);
104*072a4ba8SAndrew Turner }
105*072a4ba8SAndrew Turner #endif
10631914882SAlex Richardson #else
10731914882SAlex Richardson /* dummy definitions to make things compile.  */
10831914882SAlex Richardson typedef double v_double;
10931914882SAlex Richardson typedef float v_float;
11031914882SAlex Richardson #define v_double_len(x) 1
11131914882SAlex Richardson #define v_double_load(x) (x)[0]
11231914882SAlex Richardson #define v_double_dup(x) (x)
11331914882SAlex Richardson #define v_float_len(x) 1
11431914882SAlex Richardson #define v_float_load(x) (x)[0]
11531914882SAlex Richardson #define v_float_dup(x) (x)
11631914882SAlex Richardson #endif
11731914882SAlex Richardson 
11831914882SAlex Richardson static double
11931914882SAlex Richardson dummy (double x)
12031914882SAlex Richardson {
12131914882SAlex Richardson   return x;
12231914882SAlex Richardson }
12331914882SAlex Richardson 
12431914882SAlex Richardson static float
12531914882SAlex Richardson dummyf (float x)
12631914882SAlex Richardson {
12731914882SAlex Richardson   return x;
12831914882SAlex Richardson }
12931914882SAlex Richardson #if WANT_VMATH
13031914882SAlex Richardson #if __aarch64__
13131914882SAlex Richardson static v_double
13231914882SAlex Richardson __v_dummy (v_double x)
13331914882SAlex Richardson {
13431914882SAlex Richardson   return x;
13531914882SAlex Richardson }
13631914882SAlex Richardson 
13731914882SAlex Richardson static v_float
13831914882SAlex Richardson __v_dummyf (v_float x)
13931914882SAlex Richardson {
14031914882SAlex Richardson   return x;
14131914882SAlex Richardson }
14231914882SAlex Richardson 
14331914882SAlex Richardson #ifdef __vpcs
14431914882SAlex Richardson __vpcs static v_double
14531914882SAlex Richardson __vn_dummy (v_double x)
14631914882SAlex Richardson {
14731914882SAlex Richardson   return x;
14831914882SAlex Richardson }
14931914882SAlex Richardson 
15031914882SAlex Richardson __vpcs static v_float
15131914882SAlex Richardson __vn_dummyf (v_float x)
15231914882SAlex Richardson {
15331914882SAlex Richardson   return x;
15431914882SAlex Richardson }
155*072a4ba8SAndrew Turner #endif
156*072a4ba8SAndrew Turner #if WANT_SVE_MATH
157*072a4ba8SAndrew Turner static sv_double
158*072a4ba8SAndrew Turner __sv_dummy (sv_double x, sv_bool pg)
15931914882SAlex Richardson {
160*072a4ba8SAndrew Turner   return x;
16131914882SAlex Richardson }
16231914882SAlex Richardson 
163*072a4ba8SAndrew Turner static sv_float
164*072a4ba8SAndrew Turner __sv_dummyf (sv_float x, sv_bool pg)
16531914882SAlex Richardson {
166*072a4ba8SAndrew Turner   return x;
16731914882SAlex Richardson }
16831914882SAlex Richardson 
169*072a4ba8SAndrew Turner #endif
170*072a4ba8SAndrew Turner #endif
17131914882SAlex Richardson #endif
17231914882SAlex Richardson 
173*072a4ba8SAndrew Turner #include "test/mathbench_wrappers.h"
17431914882SAlex Richardson 
17531914882SAlex Richardson static const struct fun
17631914882SAlex Richardson {
17731914882SAlex Richardson   const char *name;
17831914882SAlex Richardson   int prec;
17931914882SAlex Richardson   int vec;
18031914882SAlex Richardson   double lo;
18131914882SAlex Richardson   double hi;
18231914882SAlex Richardson   union
18331914882SAlex Richardson   {
18431914882SAlex Richardson     double (*d) (double);
18531914882SAlex Richardson     float (*f) (float);
18631914882SAlex Richardson     v_double (*vd) (v_double);
18731914882SAlex Richardson     v_float (*vf) (v_float);
18831914882SAlex Richardson #ifdef __vpcs
18931914882SAlex Richardson     __vpcs v_double (*vnd) (v_double);
19031914882SAlex Richardson     __vpcs v_float (*vnf) (v_float);
19131914882SAlex Richardson #endif
192*072a4ba8SAndrew Turner #if WANT_SVE_MATH
193*072a4ba8SAndrew Turner     sv_double (*svd) (sv_double, sv_bool);
194*072a4ba8SAndrew Turner     sv_float (*svf) (sv_float, sv_bool);
195*072a4ba8SAndrew Turner #endif
19631914882SAlex Richardson   } fun;
19731914882SAlex Richardson } funtab[] = {
19831914882SAlex Richardson #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
19931914882SAlex Richardson #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
20031914882SAlex Richardson #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
20131914882SAlex Richardson #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
20231914882SAlex Richardson #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
20331914882SAlex Richardson #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
204*072a4ba8SAndrew Turner #define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}},
205*072a4ba8SAndrew Turner #define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}},
20631914882SAlex Richardson D (dummy, 1.0, 2.0)
20731914882SAlex Richardson F (dummyf, 1.0, 2.0)
20831914882SAlex Richardson #if WANT_VMATH
20931914882SAlex Richardson #if __aarch64__
21031914882SAlex Richardson VD (__v_dummy, 1.0, 2.0)
21131914882SAlex Richardson VF (__v_dummyf, 1.0, 2.0)
21231914882SAlex Richardson #ifdef __vpcs
21331914882SAlex Richardson VND (__vn_dummy, 1.0, 2.0)
21431914882SAlex Richardson VNF (__vn_dummyf, 1.0, 2.0)
215*072a4ba8SAndrew Turner #endif
216*072a4ba8SAndrew Turner #if WANT_SVE_MATH
217*072a4ba8SAndrew Turner SVD (__sv_dummy, 1.0, 2.0)
218*072a4ba8SAndrew Turner SVF (__sv_dummyf, 1.0, 2.0)
21931914882SAlex Richardson #endif
22031914882SAlex Richardson #endif
22131914882SAlex Richardson #endif
222*072a4ba8SAndrew Turner #include "test/mathbench_funcs.h"
22331914882SAlex Richardson {0},
22431914882SAlex Richardson #undef F
22531914882SAlex Richardson #undef D
22631914882SAlex Richardson #undef VF
22731914882SAlex Richardson #undef VD
22831914882SAlex Richardson #undef VNF
22931914882SAlex Richardson #undef VND
230*072a4ba8SAndrew Turner #undef SVF
231*072a4ba8SAndrew Turner #undef SVD
23231914882SAlex Richardson };
23331914882SAlex Richardson 
23431914882SAlex Richardson static void
23531914882SAlex Richardson gen_linear (double lo, double hi)
23631914882SAlex Richardson {
23731914882SAlex Richardson   for (int i = 0; i < N; i++)
23831914882SAlex Richardson     A[i] = (lo * (N - i) + hi * i) / N;
23931914882SAlex Richardson }
24031914882SAlex Richardson 
24131914882SAlex Richardson static void
24231914882SAlex Richardson genf_linear (double lo, double hi)
24331914882SAlex Richardson {
24431914882SAlex Richardson   for (int i = 0; i < N; i++)
24531914882SAlex Richardson     Af[i] = (float)(lo * (N - i) + hi * i) / N;
24631914882SAlex Richardson }
24731914882SAlex Richardson 
24831914882SAlex Richardson static inline double
24931914882SAlex Richardson asdouble (uint64_t i)
25031914882SAlex Richardson {
25131914882SAlex Richardson   union
25231914882SAlex Richardson   {
25331914882SAlex Richardson     uint64_t i;
25431914882SAlex Richardson     double f;
25531914882SAlex Richardson   } u = {i};
25631914882SAlex Richardson   return u.f;
25731914882SAlex Richardson }
25831914882SAlex Richardson 
25931914882SAlex Richardson static uint64_t seed = 0x0123456789abcdef;
26031914882SAlex Richardson 
26131914882SAlex Richardson static double
26231914882SAlex Richardson frand (double lo, double hi)
26331914882SAlex Richardson {
26431914882SAlex Richardson   seed = 6364136223846793005ULL * seed + 1;
26531914882SAlex Richardson   return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
26631914882SAlex Richardson }
26731914882SAlex Richardson 
26831914882SAlex Richardson static void
26931914882SAlex Richardson gen_rand (double lo, double hi)
27031914882SAlex Richardson {
27131914882SAlex Richardson   for (int i = 0; i < N; i++)
27231914882SAlex Richardson     A[i] = frand (lo, hi);
27331914882SAlex Richardson }
27431914882SAlex Richardson 
27531914882SAlex Richardson static void
27631914882SAlex Richardson genf_rand (double lo, double hi)
27731914882SAlex Richardson {
27831914882SAlex Richardson   for (int i = 0; i < N; i++)
27931914882SAlex Richardson     Af[i] = (float)frand (lo, hi);
28031914882SAlex Richardson }
28131914882SAlex Richardson 
28231914882SAlex Richardson static void
28331914882SAlex Richardson gen_trace (int index)
28431914882SAlex Richardson {
28531914882SAlex Richardson   for (int i = 0; i < N; i++)
28631914882SAlex Richardson     A[i] = Trace[index + i];
28731914882SAlex Richardson }
28831914882SAlex Richardson 
28931914882SAlex Richardson static void
29031914882SAlex Richardson genf_trace (int index)
29131914882SAlex Richardson {
29231914882SAlex Richardson   for (int i = 0; i < N; i++)
29331914882SAlex Richardson     Af[i] = (float)Trace[index + i];
29431914882SAlex Richardson }
29531914882SAlex Richardson 
29631914882SAlex Richardson static void
29731914882SAlex Richardson run_thruput (double f (double))
29831914882SAlex Richardson {
29931914882SAlex Richardson   for (int i = 0; i < N; i++)
30031914882SAlex Richardson     f (A[i]);
30131914882SAlex Richardson }
30231914882SAlex Richardson 
30331914882SAlex Richardson static void
30431914882SAlex Richardson runf_thruput (float f (float))
30531914882SAlex Richardson {
30631914882SAlex Richardson   for (int i = 0; i < N; i++)
30731914882SAlex Richardson     f (Af[i]);
30831914882SAlex Richardson }
30931914882SAlex Richardson 
31031914882SAlex Richardson volatile double zero = 0;
31131914882SAlex Richardson 
31231914882SAlex Richardson static void
31331914882SAlex Richardson run_latency (double f (double))
31431914882SAlex Richardson {
31531914882SAlex Richardson   double z = zero;
31631914882SAlex Richardson   double prev = z;
31731914882SAlex Richardson   for (int i = 0; i < N; i++)
31831914882SAlex Richardson     prev = f (A[i] + prev * z);
31931914882SAlex Richardson }
32031914882SAlex Richardson 
32131914882SAlex Richardson static void
32231914882SAlex Richardson runf_latency (float f (float))
32331914882SAlex Richardson {
32431914882SAlex Richardson   float z = (float)zero;
32531914882SAlex Richardson   float prev = z;
32631914882SAlex Richardson   for (int i = 0; i < N; i++)
32731914882SAlex Richardson     prev = f (Af[i] + prev * z);
32831914882SAlex Richardson }
32931914882SAlex Richardson 
33031914882SAlex Richardson static void
33131914882SAlex Richardson run_v_thruput (v_double f (v_double))
33231914882SAlex Richardson {
33331914882SAlex Richardson   for (int i = 0; i < N; i += v_double_len ())
33431914882SAlex Richardson     f (v_double_load (A+i));
33531914882SAlex Richardson }
33631914882SAlex Richardson 
33731914882SAlex Richardson static void
33831914882SAlex Richardson runf_v_thruput (v_float f (v_float))
33931914882SAlex Richardson {
34031914882SAlex Richardson   for (int i = 0; i < N; i += v_float_len ())
34131914882SAlex Richardson     f (v_float_load (Af+i));
34231914882SAlex Richardson }
34331914882SAlex Richardson 
34431914882SAlex Richardson static void
34531914882SAlex Richardson run_v_latency (v_double f (v_double))
34631914882SAlex Richardson {
34731914882SAlex Richardson   v_double z = v_double_dup (zero);
34831914882SAlex Richardson   v_double prev = z;
34931914882SAlex Richardson   for (int i = 0; i < N; i += v_double_len ())
35031914882SAlex Richardson     prev = f (v_double_load (A+i) + prev * z);
35131914882SAlex Richardson }
35231914882SAlex Richardson 
35331914882SAlex Richardson static void
35431914882SAlex Richardson runf_v_latency (v_float f (v_float))
35531914882SAlex Richardson {
35631914882SAlex Richardson   v_float z = v_float_dup (zero);
35731914882SAlex Richardson   v_float prev = z;
35831914882SAlex Richardson   for (int i = 0; i < N; i += v_float_len ())
35931914882SAlex Richardson     prev = f (v_float_load (Af+i) + prev * z);
36031914882SAlex Richardson }
36131914882SAlex Richardson 
36231914882SAlex Richardson #ifdef __vpcs
36331914882SAlex Richardson static void
36431914882SAlex Richardson run_vn_thruput (__vpcs v_double f (v_double))
36531914882SAlex Richardson {
36631914882SAlex Richardson   for (int i = 0; i < N; i += v_double_len ())
36731914882SAlex Richardson     f (v_double_load (A+i));
36831914882SAlex Richardson }
36931914882SAlex Richardson 
37031914882SAlex Richardson static void
37131914882SAlex Richardson runf_vn_thruput (__vpcs v_float f (v_float))
37231914882SAlex Richardson {
37331914882SAlex Richardson   for (int i = 0; i < N; i += v_float_len ())
37431914882SAlex Richardson     f (v_float_load (Af+i));
37531914882SAlex Richardson }
37631914882SAlex Richardson 
37731914882SAlex Richardson static void
37831914882SAlex Richardson run_vn_latency (__vpcs v_double f (v_double))
37931914882SAlex Richardson {
38031914882SAlex Richardson   v_double z = v_double_dup (zero);
38131914882SAlex Richardson   v_double prev = z;
38231914882SAlex Richardson   for (int i = 0; i < N; i += v_double_len ())
38331914882SAlex Richardson     prev = f (v_double_load (A+i) + prev * z);
38431914882SAlex Richardson }
38531914882SAlex Richardson 
38631914882SAlex Richardson static void
38731914882SAlex Richardson runf_vn_latency (__vpcs v_float f (v_float))
38831914882SAlex Richardson {
38931914882SAlex Richardson   v_float z = v_float_dup (zero);
39031914882SAlex Richardson   v_float prev = z;
39131914882SAlex Richardson   for (int i = 0; i < N; i += v_float_len ())
39231914882SAlex Richardson     prev = f (v_float_load (Af+i) + prev * z);
39331914882SAlex Richardson }
39431914882SAlex Richardson #endif
39531914882SAlex Richardson 
396*072a4ba8SAndrew Turner #if WANT_SVE_MATH
397*072a4ba8SAndrew Turner static void
398*072a4ba8SAndrew Turner run_sv_thruput (sv_double f (sv_double, sv_bool))
399*072a4ba8SAndrew Turner {
400*072a4ba8SAndrew Turner   for (int i = 0; i < N; i += sv_double_len ())
401*072a4ba8SAndrew Turner     f (sv_double_load (A+i), svptrue_b64 ());
402*072a4ba8SAndrew Turner }
403*072a4ba8SAndrew Turner 
404*072a4ba8SAndrew Turner static void
405*072a4ba8SAndrew Turner runf_sv_thruput (sv_float f (sv_float, sv_bool))
406*072a4ba8SAndrew Turner {
407*072a4ba8SAndrew Turner   for (int i = 0; i < N; i += sv_float_len ())
408*072a4ba8SAndrew Turner     f (sv_float_load (Af+i), svptrue_b32 ());
409*072a4ba8SAndrew Turner }
410*072a4ba8SAndrew Turner 
411*072a4ba8SAndrew Turner static void
412*072a4ba8SAndrew Turner run_sv_latency (sv_double f (sv_double, sv_bool))
413*072a4ba8SAndrew Turner {
414*072a4ba8SAndrew Turner   sv_double z = sv_double_dup (zero);
415*072a4ba8SAndrew Turner   sv_double prev = z;
416*072a4ba8SAndrew Turner   for (int i = 0; i < N; i += sv_double_len ())
417*072a4ba8SAndrew Turner     prev = f (svmad_f64_x (svptrue_b64 (), prev, z, sv_double_load (A+i)), svptrue_b64 ());
418*072a4ba8SAndrew Turner }
419*072a4ba8SAndrew Turner 
420*072a4ba8SAndrew Turner static void
421*072a4ba8SAndrew Turner runf_sv_latency (sv_float f (sv_float, sv_bool))
422*072a4ba8SAndrew Turner {
423*072a4ba8SAndrew Turner   sv_float z = sv_float_dup (zero);
424*072a4ba8SAndrew Turner   sv_float prev = z;
425*072a4ba8SAndrew Turner   for (int i = 0; i < N; i += sv_float_len ())
426*072a4ba8SAndrew Turner     prev = f (svmad_f32_x (svptrue_b32 (), prev, z, sv_float_load (Af+i)), svptrue_b32 ());
427*072a4ba8SAndrew Turner }
428*072a4ba8SAndrew Turner #endif
429*072a4ba8SAndrew Turner 
43031914882SAlex Richardson static uint64_t
43131914882SAlex Richardson tic (void)
43231914882SAlex Richardson {
43331914882SAlex Richardson   struct timespec ts;
43431914882SAlex Richardson   if (clock_gettime (CLOCK_REALTIME, &ts))
43531914882SAlex Richardson     abort ();
43631914882SAlex Richardson   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
43731914882SAlex Richardson }
43831914882SAlex Richardson 
43931914882SAlex Richardson #define TIMEIT(run, f) do { \
44031914882SAlex Richardson   dt = -1; \
44131914882SAlex Richardson   run (f); /* Warm up.  */ \
44231914882SAlex Richardson   for (int j = 0; j < measurecount; j++) \
44331914882SAlex Richardson     { \
44431914882SAlex Richardson       uint64_t t0 = tic (); \
44531914882SAlex Richardson       for (int i = 0; i < itercount; i++) \
44631914882SAlex Richardson 	run (f); \
44731914882SAlex Richardson       uint64_t t1 = tic (); \
44831914882SAlex Richardson       if (t1 - t0 < dt) \
44931914882SAlex Richardson 	dt = t1 - t0; \
45031914882SAlex Richardson     } \
45131914882SAlex Richardson } while (0)
45231914882SAlex Richardson 
45331914882SAlex Richardson static void
45431914882SAlex Richardson bench1 (const struct fun *f, int type, double lo, double hi)
45531914882SAlex Richardson {
45631914882SAlex Richardson   uint64_t dt = 0;
45731914882SAlex Richardson   uint64_t ns100;
45831914882SAlex Richardson   const char *s = type == 't' ? "rthruput" : "latency";
45931914882SAlex Richardson   int vlen = 1;
46031914882SAlex Richardson 
46131914882SAlex Richardson   if (f->vec && f->prec == 'd')
46231914882SAlex Richardson     vlen = v_double_len();
46331914882SAlex Richardson   else if (f->vec && f->prec == 'f')
46431914882SAlex Richardson     vlen = v_float_len();
46531914882SAlex Richardson 
46631914882SAlex Richardson   if (f->prec == 'd' && type == 't' && f->vec == 0)
46731914882SAlex Richardson     TIMEIT (run_thruput, f->fun.d);
46831914882SAlex Richardson   else if (f->prec == 'd' && type == 'l' && f->vec == 0)
46931914882SAlex Richardson     TIMEIT (run_latency, f->fun.d);
47031914882SAlex Richardson   else if (f->prec == 'f' && type == 't' && f->vec == 0)
47131914882SAlex Richardson     TIMEIT (runf_thruput, f->fun.f);
47231914882SAlex Richardson   else if (f->prec == 'f' && type == 'l' && f->vec == 0)
47331914882SAlex Richardson     TIMEIT (runf_latency, f->fun.f);
47431914882SAlex Richardson   else if (f->prec == 'd' && type == 't' && f->vec == 'v')
47531914882SAlex Richardson     TIMEIT (run_v_thruput, f->fun.vd);
47631914882SAlex Richardson   else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
47731914882SAlex Richardson     TIMEIT (run_v_latency, f->fun.vd);
47831914882SAlex Richardson   else if (f->prec == 'f' && type == 't' && f->vec == 'v')
47931914882SAlex Richardson     TIMEIT (runf_v_thruput, f->fun.vf);
48031914882SAlex Richardson   else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
48131914882SAlex Richardson     TIMEIT (runf_v_latency, f->fun.vf);
48231914882SAlex Richardson #ifdef __vpcs
48331914882SAlex Richardson   else if (f->prec == 'd' && type == 't' && f->vec == 'n')
48431914882SAlex Richardson     TIMEIT (run_vn_thruput, f->fun.vnd);
48531914882SAlex Richardson   else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
48631914882SAlex Richardson     TIMEIT (run_vn_latency, f->fun.vnd);
48731914882SAlex Richardson   else if (f->prec == 'f' && type == 't' && f->vec == 'n')
48831914882SAlex Richardson     TIMEIT (runf_vn_thruput, f->fun.vnf);
48931914882SAlex Richardson   else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
49031914882SAlex Richardson     TIMEIT (runf_vn_latency, f->fun.vnf);
49131914882SAlex Richardson #endif
492*072a4ba8SAndrew Turner #if WANT_SVE_MATH
493*072a4ba8SAndrew Turner   else if (f->prec == 'd' && type == 't' && f->vec == 's')
494*072a4ba8SAndrew Turner     TIMEIT (run_sv_thruput, f->fun.svd);
495*072a4ba8SAndrew Turner   else if (f->prec == 'd' && type == 'l' && f->vec == 's')
496*072a4ba8SAndrew Turner     TIMEIT (run_sv_latency, f->fun.svd);
497*072a4ba8SAndrew Turner   else if (f->prec == 'f' && type == 't' && f->vec == 's')
498*072a4ba8SAndrew Turner     TIMEIT (runf_sv_thruput, f->fun.svf);
499*072a4ba8SAndrew Turner   else if (f->prec == 'f' && type == 'l' && f->vec == 's')
500*072a4ba8SAndrew Turner     TIMEIT (runf_sv_latency, f->fun.svf);
501*072a4ba8SAndrew Turner #endif
50231914882SAlex Richardson 
50331914882SAlex Richardson   if (type == 't')
50431914882SAlex Richardson     {
50531914882SAlex Richardson       ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
50631914882SAlex Richardson       printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
50731914882SAlex Richardson 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
50831914882SAlex Richardson 	      (unsigned long long) dt, lo, hi);
50931914882SAlex Richardson     }
51031914882SAlex Richardson   else if (type == 'l')
51131914882SAlex Richardson     {
51231914882SAlex Richardson       ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
51331914882SAlex Richardson       printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
51431914882SAlex Richardson 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
51531914882SAlex Richardson 	      (unsigned long long) dt, lo, hi);
51631914882SAlex Richardson     }
51731914882SAlex Richardson   fflush (stdout);
51831914882SAlex Richardson }
51931914882SAlex Richardson 
52031914882SAlex Richardson static void
52131914882SAlex Richardson bench (const struct fun *f, double lo, double hi, int type, int gen)
52231914882SAlex Richardson {
52331914882SAlex Richardson   if (f->prec == 'd' && gen == 'r')
52431914882SAlex Richardson     gen_rand (lo, hi);
52531914882SAlex Richardson   else if (f->prec == 'd' && gen == 'l')
52631914882SAlex Richardson     gen_linear (lo, hi);
52731914882SAlex Richardson   else if (f->prec == 'd' && gen == 't')
52831914882SAlex Richardson     gen_trace (0);
52931914882SAlex Richardson   else if (f->prec == 'f' && gen == 'r')
53031914882SAlex Richardson     genf_rand (lo, hi);
53131914882SAlex Richardson   else if (f->prec == 'f' && gen == 'l')
53231914882SAlex Richardson     genf_linear (lo, hi);
53331914882SAlex Richardson   else if (f->prec == 'f' && gen == 't')
53431914882SAlex Richardson     genf_trace (0);
53531914882SAlex Richardson 
53631914882SAlex Richardson   if (gen == 't')
53731914882SAlex Richardson     hi = trace_size / N;
53831914882SAlex Richardson 
53931914882SAlex Richardson   if (type == 'b' || type == 't')
54031914882SAlex Richardson     bench1 (f, 't', lo, hi);
54131914882SAlex Richardson 
54231914882SAlex Richardson   if (type == 'b' || type == 'l')
54331914882SAlex Richardson     bench1 (f, 'l', lo, hi);
54431914882SAlex Richardson 
54531914882SAlex Richardson   for (int i = N; i < trace_size; i += N)
54631914882SAlex Richardson     {
54731914882SAlex Richardson       if (f->prec == 'd')
54831914882SAlex Richardson 	gen_trace (i);
54931914882SAlex Richardson       else
55031914882SAlex Richardson 	genf_trace (i);
55131914882SAlex Richardson 
55231914882SAlex Richardson       lo = i / N;
55331914882SAlex Richardson       if (type == 'b' || type == 't')
55431914882SAlex Richardson 	bench1 (f, 't', lo, hi);
55531914882SAlex Richardson 
55631914882SAlex Richardson       if (type == 'b' || type == 'l')
55731914882SAlex Richardson 	bench1 (f, 'l', lo, hi);
55831914882SAlex Richardson     }
55931914882SAlex Richardson }
56031914882SAlex Richardson 
56131914882SAlex Richardson static void
56231914882SAlex Richardson readtrace (const char *name)
56331914882SAlex Richardson {
56431914882SAlex Richardson 	int n = 0;
56531914882SAlex Richardson 	FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
56631914882SAlex Richardson 	if (!f)
56731914882SAlex Richardson 	  {
56831914882SAlex Richardson 	    printf ("openning \"%s\" failed: %m\n", name);
56931914882SAlex Richardson 	    exit (1);
57031914882SAlex Richardson 	  }
57131914882SAlex Richardson 	for (;;)
57231914882SAlex Richardson 	  {
57331914882SAlex Richardson 	    if (n >= trace_size)
57431914882SAlex Richardson 	      {
57531914882SAlex Richardson 		trace_size += N;
57631914882SAlex Richardson 		Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
57731914882SAlex Richardson 		if (Trace == NULL)
57831914882SAlex Richardson 		  {
57931914882SAlex Richardson 		    printf ("out of memory\n");
58031914882SAlex Richardson 		    exit (1);
58131914882SAlex Richardson 		  }
58231914882SAlex Richardson 	      }
58331914882SAlex Richardson 	    if (fscanf (f, "%lf", Trace + n) != 1)
58431914882SAlex Richardson 	      break;
58531914882SAlex Richardson 	    n++;
58631914882SAlex Richardson 	  }
58731914882SAlex Richardson 	if (ferror (f) || n == 0)
58831914882SAlex Richardson 	  {
58931914882SAlex Richardson 	    printf ("reading \"%s\" failed: %m\n", name);
59031914882SAlex Richardson 	    exit (1);
59131914882SAlex Richardson 	  }
59231914882SAlex Richardson 	fclose (f);
59331914882SAlex Richardson 	if (n % N == 0)
59431914882SAlex Richardson 	  trace_size = n;
59531914882SAlex Richardson 	for (int i = 0; n < trace_size; n++, i++)
59631914882SAlex Richardson 	  Trace[n] = Trace[i];
59731914882SAlex Richardson }
59831914882SAlex Richardson 
59931914882SAlex Richardson static void
60031914882SAlex Richardson usage (void)
60131914882SAlex Richardson {
60231914882SAlex Richardson   printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
60331914882SAlex Richardson 	  "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
60431914882SAlex Richardson 	  "[func2 ..]\n");
60531914882SAlex Richardson   printf ("func:\n");
60631914882SAlex Richardson   printf ("%7s [run all benchmarks]\n", "all");
60731914882SAlex Richardson   for (const struct fun *f = funtab; f->name; f++)
60831914882SAlex Richardson     printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
60931914882SAlex Richardson   exit (1);
61031914882SAlex Richardson }
61131914882SAlex Richardson 
61231914882SAlex Richardson int
61331914882SAlex Richardson main (int argc, char *argv[])
61431914882SAlex Richardson {
61531914882SAlex Richardson   int usergen = 0, gen = 'r', type = 'b', all = 0;
61631914882SAlex Richardson   double lo = 0, hi = 0;
61731914882SAlex Richardson   const char *tracefile = "-";
61831914882SAlex Richardson 
61931914882SAlex Richardson   argv++;
62031914882SAlex Richardson   argc--;
62131914882SAlex Richardson   for (;;)
62231914882SAlex Richardson     {
62331914882SAlex Richardson       if (argc <= 0)
62431914882SAlex Richardson 	usage ();
62531914882SAlex Richardson       if (argv[0][0] != '-')
62631914882SAlex Richardson 	break;
62731914882SAlex Richardson       else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
62831914882SAlex Richardson 	{
62931914882SAlex Richardson 	  usergen = 1;
63031914882SAlex Richardson 	  lo = strtod (argv[1], 0);
63131914882SAlex Richardson 	  hi = strtod (argv[2], 0);
63231914882SAlex Richardson 	  argv += 3;
63331914882SAlex Richardson 	  argc -= 3;
63431914882SAlex Richardson 	}
63531914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
63631914882SAlex Richardson 	{
63731914882SAlex Richardson 	  measurecount = strtol (argv[1], 0, 0);
63831914882SAlex Richardson 	  argv += 2;
63931914882SAlex Richardson 	  argc -= 2;
64031914882SAlex Richardson 	}
64131914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
64231914882SAlex Richardson 	{
64331914882SAlex Richardson 	  itercount = strtol (argv[1], 0, 0);
64431914882SAlex Richardson 	  argv += 2;
64531914882SAlex Richardson 	  argc -= 2;
64631914882SAlex Richardson 	}
64731914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
64831914882SAlex Richardson 	{
64931914882SAlex Richardson 	  gen = argv[1][0];
65031914882SAlex Richardson 	  if (strchr ("rlt", gen) == 0)
65131914882SAlex Richardson 	    usage ();
65231914882SAlex Richardson 	  argv += 2;
65331914882SAlex Richardson 	  argc -= 2;
65431914882SAlex Richardson 	}
65531914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
65631914882SAlex Richardson 	{
65731914882SAlex Richardson 	  gen = 't';  /* -f implies -g trace.  */
65831914882SAlex Richardson 	  tracefile = argv[1];
65931914882SAlex Richardson 	  argv += 2;
66031914882SAlex Richardson 	  argc -= 2;
66131914882SAlex Richardson 	}
66231914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
66331914882SAlex Richardson 	{
66431914882SAlex Richardson 	  type = argv[1][0];
66531914882SAlex Richardson 	  if (strchr ("ltb", type) == 0)
66631914882SAlex Richardson 	    usage ();
66731914882SAlex Richardson 	  argv += 2;
66831914882SAlex Richardson 	  argc -= 2;
66931914882SAlex Richardson 	}
67031914882SAlex Richardson       else
67131914882SAlex Richardson 	usage ();
67231914882SAlex Richardson     }
67331914882SAlex Richardson   if (gen == 't')
67431914882SAlex Richardson     {
67531914882SAlex Richardson       readtrace (tracefile);
67631914882SAlex Richardson       lo = hi = 0;
67731914882SAlex Richardson       usergen = 1;
67831914882SAlex Richardson     }
67931914882SAlex Richardson   while (argc > 0)
68031914882SAlex Richardson     {
68131914882SAlex Richardson       int found = 0;
68231914882SAlex Richardson       all = strcmp (argv[0], "all") == 0;
68331914882SAlex Richardson       for (const struct fun *f = funtab; f->name; f++)
68431914882SAlex Richardson 	if (all || strcmp (argv[0], f->name) == 0)
68531914882SAlex Richardson 	  {
68631914882SAlex Richardson 	    found = 1;
68731914882SAlex Richardson 	    if (!usergen)
68831914882SAlex Richardson 	      {
68931914882SAlex Richardson 		lo = f->lo;
69031914882SAlex Richardson 		hi = f->hi;
69131914882SAlex Richardson 	      }
69231914882SAlex Richardson 	    bench (f, lo, hi, type, gen);
69331914882SAlex Richardson 	    if (usergen && !all)
69431914882SAlex Richardson 	      break;
69531914882SAlex Richardson 	  }
69631914882SAlex Richardson       if (!found)
69731914882SAlex Richardson 	printf ("unknown function: %s\n", argv[0]);
69831914882SAlex Richardson       argv++;
69931914882SAlex Richardson       argc--;
70031914882SAlex Richardson     }
70131914882SAlex Richardson   return 0;
70231914882SAlex Richardson }
703