xref: /freebsd-src/contrib/arm-optimized-routines/math/test/mathbench.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
131914882SAlex Richardson /*
231914882SAlex Richardson  * Microbenchmark for math functions.
331914882SAlex Richardson  *
4*f3087befSAndrew Turner  * Copyright (c) 2018-2024, Arm Limited.
5072a4ba8SAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson  */
731914882SAlex Richardson 
8*f3087befSAndrew Turner #if WANT_SVE_TESTS
9*f3087befSAndrew Turner #  if __aarch64__ && __linux__
10*f3087befSAndrew Turner #    ifdef __clang__
11*f3087befSAndrew Turner #      pragma clang attribute push(__attribute__((target("sve"))),            \
12*f3087befSAndrew Turner 				   apply_to = any(function))
13*f3087befSAndrew Turner #    else
14*f3087befSAndrew Turner #      pragma GCC target("+sve")
15*f3087befSAndrew Turner #    endif
16*f3087befSAndrew Turner #  else
17*f3087befSAndrew Turner #    error "SVE not supported - please disable WANT_SVE_TESTS"
18*f3087befSAndrew Turner #  endif
19*f3087befSAndrew Turner #endif
20*f3087befSAndrew Turner 
2131914882SAlex Richardson #undef _GNU_SOURCE
2231914882SAlex Richardson #define _GNU_SOURCE 1
2331914882SAlex Richardson #include <stdint.h>
2431914882SAlex Richardson #include <stdlib.h>
2531914882SAlex Richardson #include <stdio.h>
2631914882SAlex Richardson #include <string.h>
2731914882SAlex Richardson #include <time.h>
2831914882SAlex Richardson #include <math.h>
2931914882SAlex Richardson #include "mathlib.h"
3031914882SAlex Richardson 
3131914882SAlex Richardson /* Number of measurements, best result is reported.  */
3231914882SAlex Richardson #define MEASURE 60
3331914882SAlex Richardson /* Array size.  */
3431914882SAlex Richardson #define N 8000
3531914882SAlex Richardson /* Iterations over the array.  */
3631914882SAlex Richardson #define ITER 125
3731914882SAlex Richardson 
3831914882SAlex Richardson static double *Trace;
3931914882SAlex Richardson static size_t trace_size;
4031914882SAlex Richardson static double A[N];
4131914882SAlex Richardson static float Af[N];
4231914882SAlex Richardson static long measurecount = MEASURE;
4331914882SAlex Richardson static long itercount = ITER;
4431914882SAlex Richardson 
4531914882SAlex Richardson static double
4631914882SAlex Richardson dummy (double x)
4731914882SAlex Richardson {
4831914882SAlex Richardson   return x;
4931914882SAlex Richardson }
5031914882SAlex Richardson 
5131914882SAlex Richardson static float
5231914882SAlex Richardson dummyf (float x)
5331914882SAlex Richardson {
5431914882SAlex Richardson   return x;
5531914882SAlex Richardson }
56*f3087befSAndrew Turner #if __aarch64__ && __linux__
57*f3087befSAndrew Turner __vpcs static float64x2_t
58*f3087befSAndrew Turner __vn_dummy (float64x2_t x)
5931914882SAlex Richardson {
6031914882SAlex Richardson   return x;
6131914882SAlex Richardson }
6231914882SAlex Richardson 
63*f3087befSAndrew Turner __vpcs static float32x4_t
64*f3087befSAndrew Turner __vn_dummyf (float32x4_t x)
6531914882SAlex Richardson {
6631914882SAlex Richardson   return x;
6731914882SAlex Richardson }
68072a4ba8SAndrew Turner #endif
69*f3087befSAndrew Turner #if WANT_SVE_TESTS
70*f3087befSAndrew Turner static svfloat64_t
71*f3087befSAndrew Turner __sv_dummy (svfloat64_t x, svbool_t pg)
7231914882SAlex Richardson {
73072a4ba8SAndrew Turner   return x;
7431914882SAlex Richardson }
7531914882SAlex Richardson 
76*f3087befSAndrew Turner static svfloat32_t
77*f3087befSAndrew Turner __sv_dummyf (svfloat32_t x, svbool_t pg)
7831914882SAlex Richardson {
79072a4ba8SAndrew Turner   return x;
8031914882SAlex Richardson }
8131914882SAlex Richardson 
82072a4ba8SAndrew Turner #endif
8331914882SAlex Richardson 
84072a4ba8SAndrew Turner #include "test/mathbench_wrappers.h"
8531914882SAlex Richardson 
8631914882SAlex Richardson static const struct fun
8731914882SAlex Richardson {
8831914882SAlex Richardson   const char *name;
8931914882SAlex Richardson   int prec;
9031914882SAlex Richardson   int vec;
9131914882SAlex Richardson   double lo;
9231914882SAlex Richardson   double hi;
9331914882SAlex Richardson   union
9431914882SAlex Richardson   {
9531914882SAlex Richardson     double (*d) (double);
9631914882SAlex Richardson     float (*f) (float);
97*f3087befSAndrew Turner #if __aarch64__ && __linux__
98*f3087befSAndrew Turner     __vpcs float64x2_t (*vnd) (float64x2_t);
99*f3087befSAndrew Turner     __vpcs float32x4_t (*vnf) (float32x4_t);
10031914882SAlex Richardson #endif
101*f3087befSAndrew Turner #if WANT_SVE_TESTS
102*f3087befSAndrew Turner     svfloat64_t (*svd) (svfloat64_t, svbool_t);
103*f3087befSAndrew Turner     svfloat32_t (*svf) (svfloat32_t, svbool_t);
104072a4ba8SAndrew Turner #endif
10531914882SAlex Richardson   } fun;
10631914882SAlex Richardson } funtab[] = {
107*f3087befSAndrew Turner // clang-format off
10831914882SAlex Richardson #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
10931914882SAlex Richardson #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
11031914882SAlex Richardson #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
11131914882SAlex Richardson #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
112072a4ba8SAndrew Turner #define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}},
113072a4ba8SAndrew Turner #define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}},
11431914882SAlex Richardson D (dummy, 1.0, 2.0)
11531914882SAlex Richardson F (dummyf, 1.0, 2.0)
116*f3087befSAndrew Turner #if  __aarch64__ && __linux__
11731914882SAlex Richardson VND (__vn_dummy, 1.0, 2.0)
11831914882SAlex Richardson VNF (__vn_dummyf, 1.0, 2.0)
119072a4ba8SAndrew Turner #endif
120*f3087befSAndrew Turner #if WANT_SVE_TESTS
121072a4ba8SAndrew Turner SVD (__sv_dummy, 1.0, 2.0)
122072a4ba8SAndrew Turner SVF (__sv_dummyf, 1.0, 2.0)
12331914882SAlex Richardson #endif
124072a4ba8SAndrew Turner #include "test/mathbench_funcs.h"
12531914882SAlex Richardson {0},
12631914882SAlex Richardson #undef F
12731914882SAlex Richardson #undef D
12831914882SAlex Richardson #undef VNF
12931914882SAlex Richardson #undef VND
130072a4ba8SAndrew Turner #undef SVF
131072a4ba8SAndrew Turner #undef SVD
132*f3087befSAndrew Turner   // clang-format on
13331914882SAlex Richardson };
13431914882SAlex Richardson 
13531914882SAlex Richardson static void
13631914882SAlex Richardson gen_linear (double lo, double hi)
13731914882SAlex Richardson {
13831914882SAlex Richardson   for (int i = 0; i < N; i++)
13931914882SAlex Richardson     A[i] = (lo * (N - i) + hi * i) / N;
14031914882SAlex Richardson }
14131914882SAlex Richardson 
14231914882SAlex Richardson static void
14331914882SAlex Richardson genf_linear (double lo, double hi)
14431914882SAlex Richardson {
14531914882SAlex Richardson   for (int i = 0; i < N; i++)
14631914882SAlex Richardson     Af[i] = (float)(lo * (N - i) + hi * i) / N;
14731914882SAlex Richardson }
14831914882SAlex Richardson 
14931914882SAlex Richardson static inline double
15031914882SAlex Richardson asdouble (uint64_t i)
15131914882SAlex Richardson {
15231914882SAlex Richardson   union
15331914882SAlex Richardson   {
15431914882SAlex Richardson     uint64_t i;
15531914882SAlex Richardson     double f;
15631914882SAlex Richardson   } u = {i};
15731914882SAlex Richardson   return u.f;
15831914882SAlex Richardson }
15931914882SAlex Richardson 
16031914882SAlex Richardson static uint64_t seed = 0x0123456789abcdef;
16131914882SAlex Richardson 
16231914882SAlex Richardson static double
16331914882SAlex Richardson frand (double lo, double hi)
16431914882SAlex Richardson {
16531914882SAlex Richardson   seed = 6364136223846793005ULL * seed + 1;
16631914882SAlex Richardson   return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
16731914882SAlex Richardson }
16831914882SAlex Richardson 
16931914882SAlex Richardson static void
17031914882SAlex Richardson gen_rand (double lo, double hi)
17131914882SAlex Richardson {
17231914882SAlex Richardson   for (int i = 0; i < N; i++)
17331914882SAlex Richardson     A[i] = frand (lo, hi);
17431914882SAlex Richardson }
17531914882SAlex Richardson 
17631914882SAlex Richardson static void
17731914882SAlex Richardson genf_rand (double lo, double hi)
17831914882SAlex Richardson {
17931914882SAlex Richardson   for (int i = 0; i < N; i++)
18031914882SAlex Richardson     Af[i] = (float)frand (lo, hi);
18131914882SAlex Richardson }
18231914882SAlex Richardson 
18331914882SAlex Richardson static void
18431914882SAlex Richardson gen_trace (int index)
18531914882SAlex Richardson {
18631914882SAlex Richardson   for (int i = 0; i < N; i++)
18731914882SAlex Richardson     A[i] = Trace[index + i];
18831914882SAlex Richardson }
18931914882SAlex Richardson 
19031914882SAlex Richardson static void
19131914882SAlex Richardson genf_trace (int index)
19231914882SAlex Richardson {
19331914882SAlex Richardson   for (int i = 0; i < N; i++)
19431914882SAlex Richardson     Af[i] = (float)Trace[index + i];
19531914882SAlex Richardson }
19631914882SAlex Richardson 
19731914882SAlex Richardson static void
19831914882SAlex Richardson run_thruput (double f (double))
19931914882SAlex Richardson {
20031914882SAlex Richardson   for (int i = 0; i < N; i++)
20131914882SAlex Richardson     f (A[i]);
20231914882SAlex Richardson }
20331914882SAlex Richardson 
20431914882SAlex Richardson static void
20531914882SAlex Richardson runf_thruput (float f (float))
20631914882SAlex Richardson {
20731914882SAlex Richardson   for (int i = 0; i < N; i++)
20831914882SAlex Richardson     f (Af[i]);
20931914882SAlex Richardson }
21031914882SAlex Richardson 
21131914882SAlex Richardson volatile double zero = 0;
21231914882SAlex Richardson 
21331914882SAlex Richardson static void
21431914882SAlex Richardson run_latency (double f (double))
21531914882SAlex Richardson {
21631914882SAlex Richardson   double z = zero;
21731914882SAlex Richardson   double prev = z;
21831914882SAlex Richardson   for (int i = 0; i < N; i++)
21931914882SAlex Richardson     prev = f (A[i] + prev * z);
22031914882SAlex Richardson }
22131914882SAlex Richardson 
22231914882SAlex Richardson static void
22331914882SAlex Richardson runf_latency (float f (float))
22431914882SAlex Richardson {
22531914882SAlex Richardson   float z = (float)zero;
22631914882SAlex Richardson   float prev = z;
22731914882SAlex Richardson   for (int i = 0; i < N; i++)
22831914882SAlex Richardson     prev = f (Af[i] + prev * z);
22931914882SAlex Richardson }
23031914882SAlex Richardson 
231*f3087befSAndrew Turner #if  __aarch64__ && __linux__
23231914882SAlex Richardson static void
233*f3087befSAndrew Turner run_vn_thruput (__vpcs float64x2_t f (float64x2_t))
23431914882SAlex Richardson {
235*f3087befSAndrew Turner   for (int i = 0; i < N; i += 2)
236*f3087befSAndrew Turner     f (vld1q_f64 (A + i));
23731914882SAlex Richardson }
23831914882SAlex Richardson 
23931914882SAlex Richardson static void
240*f3087befSAndrew Turner runf_vn_thruput (__vpcs float32x4_t f (float32x4_t))
24131914882SAlex Richardson {
242*f3087befSAndrew Turner   for (int i = 0; i < N; i += 4)
243*f3087befSAndrew Turner     f (vld1q_f32 (Af + i));
24431914882SAlex Richardson }
24531914882SAlex Richardson 
24631914882SAlex Richardson static void
247*f3087befSAndrew Turner run_vn_latency (__vpcs float64x2_t f (float64x2_t))
24831914882SAlex Richardson {
2495a02ffc3SAndrew Turner   volatile uint64x2_t vsel = (uint64x2_t) { 0, 0 };
2505a02ffc3SAndrew Turner   uint64x2_t sel = vsel;
251*f3087befSAndrew Turner   float64x2_t prev = vdupq_n_f64 (0);
252*f3087befSAndrew Turner   for (int i = 0; i < N; i += 2)
253*f3087befSAndrew Turner     prev = f (vbslq_f64 (sel, prev, vld1q_f64 (A + i)));
25431914882SAlex Richardson }
25531914882SAlex Richardson 
25631914882SAlex Richardson static void
257*f3087befSAndrew Turner runf_vn_latency (__vpcs float32x4_t f (float32x4_t))
25831914882SAlex Richardson {
2595a02ffc3SAndrew Turner   volatile uint32x4_t vsel = (uint32x4_t) { 0, 0, 0, 0 };
2605a02ffc3SAndrew Turner   uint32x4_t sel = vsel;
261*f3087befSAndrew Turner   float32x4_t prev = vdupq_n_f32 (0);
262*f3087befSAndrew Turner   for (int i = 0; i < N; i += 4)
263*f3087befSAndrew Turner     prev = f (vbslq_f32 (sel, prev, vld1q_f32 (Af + i)));
26431914882SAlex Richardson }
26531914882SAlex Richardson #endif
26631914882SAlex Richardson 
267*f3087befSAndrew Turner #if WANT_SVE_TESTS
268072a4ba8SAndrew Turner static void
269*f3087befSAndrew Turner run_sv_thruput (svfloat64_t f (svfloat64_t, svbool_t))
270072a4ba8SAndrew Turner {
271*f3087befSAndrew Turner   for (int i = 0; i < N; i += svcntd ())
272*f3087befSAndrew Turner     f (svld1_f64 (svptrue_b64 (), A + i), svptrue_b64 ());
273072a4ba8SAndrew Turner }
274072a4ba8SAndrew Turner 
275072a4ba8SAndrew Turner static void
276*f3087befSAndrew Turner runf_sv_thruput (svfloat32_t f (svfloat32_t, svbool_t))
277072a4ba8SAndrew Turner {
278*f3087befSAndrew Turner   for (int i = 0; i < N; i += svcntw ())
279*f3087befSAndrew Turner     f (svld1_f32 (svptrue_b32 (), Af + i), svptrue_b32 ());
280072a4ba8SAndrew Turner }
281072a4ba8SAndrew Turner 
282072a4ba8SAndrew Turner static void
283*f3087befSAndrew Turner run_sv_latency (svfloat64_t f (svfloat64_t, svbool_t))
284072a4ba8SAndrew Turner {
285*f3087befSAndrew Turner   volatile svbool_t vsel = svptrue_b64 ();
286*f3087befSAndrew Turner   svbool_t sel = vsel;
287*f3087befSAndrew Turner   svfloat64_t prev = svdup_f64 (0);
288*f3087befSAndrew Turner   for (int i = 0; i < N; i += svcntd ())
289*f3087befSAndrew Turner     prev = f (svsel_f64 (sel, svld1_f64 (svptrue_b64 (), A + i), prev),
290*f3087befSAndrew Turner 	      svptrue_b64 ());
291072a4ba8SAndrew Turner }
292072a4ba8SAndrew Turner 
293072a4ba8SAndrew Turner static void
294*f3087befSAndrew Turner runf_sv_latency (svfloat32_t f (svfloat32_t, svbool_t))
295072a4ba8SAndrew Turner {
296*f3087befSAndrew Turner   volatile svbool_t vsel = svptrue_b32 ();
297*f3087befSAndrew Turner   svbool_t sel = vsel;
298*f3087befSAndrew Turner   svfloat32_t prev = svdup_f32 (0);
299*f3087befSAndrew Turner   for (int i = 0; i < N; i += svcntw ())
300*f3087befSAndrew Turner     prev = f (svsel_f32 (sel, svld1_f32 (svptrue_b32 (), Af + i), prev),
301*f3087befSAndrew Turner 	      svptrue_b32 ());
302072a4ba8SAndrew Turner }
303072a4ba8SAndrew Turner #endif
304072a4ba8SAndrew Turner 
30531914882SAlex Richardson static uint64_t
30631914882SAlex Richardson tic (void)
30731914882SAlex Richardson {
30831914882SAlex Richardson   struct timespec ts;
309*f3087befSAndrew Turner #if defined(_MSC_VER)
310*f3087befSAndrew Turner   if (!timespec_get (&ts, TIME_UTC))
311*f3087befSAndrew Turner #else
31231914882SAlex Richardson   if (clock_gettime (CLOCK_REALTIME, &ts))
313*f3087befSAndrew Turner #endif
31431914882SAlex Richardson     abort ();
31531914882SAlex Richardson   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
31631914882SAlex Richardson }
31731914882SAlex Richardson 
31831914882SAlex Richardson #define TIMEIT(run, f) do { \
31931914882SAlex Richardson   dt = -1; \
32031914882SAlex Richardson   run (f); /* Warm up.  */ \
32131914882SAlex Richardson   for (int j = 0; j < measurecount; j++) \
32231914882SAlex Richardson     { \
32331914882SAlex Richardson       uint64_t t0 = tic (); \
32431914882SAlex Richardson       for (int i = 0; i < itercount; i++) \
32531914882SAlex Richardson 	run (f); \
32631914882SAlex Richardson       uint64_t t1 = tic (); \
32731914882SAlex Richardson       if (t1 - t0 < dt) \
32831914882SAlex Richardson 	dt = t1 - t0; \
32931914882SAlex Richardson     } \
33031914882SAlex Richardson } while (0)
33131914882SAlex Richardson 
33231914882SAlex Richardson static void
33331914882SAlex Richardson bench1 (const struct fun *f, int type, double lo, double hi)
33431914882SAlex Richardson {
33531914882SAlex Richardson   uint64_t dt = 0;
33631914882SAlex Richardson   uint64_t ns100;
33731914882SAlex Richardson   const char *s = type == 't' ? "rthruput" : "latency";
33831914882SAlex Richardson   int vlen = 1;
33931914882SAlex Richardson 
3405a02ffc3SAndrew Turner   if (f->vec == 'n')
341*f3087befSAndrew Turner     vlen = f->prec == 'd' ? 2 : 4;
342*f3087befSAndrew Turner #if WANT_SVE_TESTS
3435a02ffc3SAndrew Turner   else if (f->vec == 's')
344*f3087befSAndrew Turner     vlen = f->prec == 'd' ? svcntd () : svcntw ();
345*f3087befSAndrew Turner #endif
34631914882SAlex Richardson 
34731914882SAlex Richardson   if (f->prec == 'd' && type == 't' && f->vec == 0)
34831914882SAlex Richardson     TIMEIT (run_thruput, f->fun.d);
34931914882SAlex Richardson   else if (f->prec == 'd' && type == 'l' && f->vec == 0)
35031914882SAlex Richardson     TIMEIT (run_latency, f->fun.d);
35131914882SAlex Richardson   else if (f->prec == 'f' && type == 't' && f->vec == 0)
35231914882SAlex Richardson     TIMEIT (runf_thruput, f->fun.f);
35331914882SAlex Richardson   else if (f->prec == 'f' && type == 'l' && f->vec == 0)
35431914882SAlex Richardson     TIMEIT (runf_latency, f->fun.f);
355*f3087befSAndrew Turner #if __aarch64__ && __linux__
35631914882SAlex Richardson   else if (f->prec == 'd' && type == 't' && f->vec == 'n')
35731914882SAlex Richardson     TIMEIT (run_vn_thruput, f->fun.vnd);
35831914882SAlex Richardson   else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
35931914882SAlex Richardson     TIMEIT (run_vn_latency, f->fun.vnd);
36031914882SAlex Richardson   else if (f->prec == 'f' && type == 't' && f->vec == 'n')
36131914882SAlex Richardson     TIMEIT (runf_vn_thruput, f->fun.vnf);
36231914882SAlex Richardson   else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
36331914882SAlex Richardson     TIMEIT (runf_vn_latency, f->fun.vnf);
36431914882SAlex Richardson #endif
365*f3087befSAndrew Turner #if WANT_SVE_TESTS
366072a4ba8SAndrew Turner   else if (f->prec == 'd' && type == 't' && f->vec == 's')
367072a4ba8SAndrew Turner     TIMEIT (run_sv_thruput, f->fun.svd);
368072a4ba8SAndrew Turner   else if (f->prec == 'd' && type == 'l' && f->vec == 's')
369072a4ba8SAndrew Turner     TIMEIT (run_sv_latency, f->fun.svd);
370072a4ba8SAndrew Turner   else if (f->prec == 'f' && type == 't' && f->vec == 's')
371072a4ba8SAndrew Turner     TIMEIT (runf_sv_thruput, f->fun.svf);
372072a4ba8SAndrew Turner   else if (f->prec == 'f' && type == 'l' && f->vec == 's')
373072a4ba8SAndrew Turner     TIMEIT (runf_sv_latency, f->fun.svf);
374072a4ba8SAndrew Turner #endif
37531914882SAlex Richardson 
37631914882SAlex Richardson   if (type == 't')
37731914882SAlex Richardson     {
37831914882SAlex Richardson       ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
3795a02ffc3SAndrew Turner       printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g] vlen %d\n",
3805a02ffc3SAndrew Turner 	      f->name, s,
38131914882SAlex Richardson 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
3825a02ffc3SAndrew Turner 	      (unsigned long long) dt, lo, hi, vlen);
38331914882SAlex Richardson     }
38431914882SAlex Richardson   else if (type == 'l')
38531914882SAlex Richardson     {
38631914882SAlex Richardson       ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
3875a02ffc3SAndrew Turner       printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g] vlen %d\n",
3885a02ffc3SAndrew Turner 	      f->name, s,
38931914882SAlex Richardson 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
3905a02ffc3SAndrew Turner 	      (unsigned long long) dt, lo, hi, vlen);
39131914882SAlex Richardson     }
39231914882SAlex Richardson   fflush (stdout);
39331914882SAlex Richardson }
39431914882SAlex Richardson 
39531914882SAlex Richardson static void
39631914882SAlex Richardson bench (const struct fun *f, double lo, double hi, int type, int gen)
39731914882SAlex Richardson {
39831914882SAlex Richardson   if (f->prec == 'd' && gen == 'r')
39931914882SAlex Richardson     gen_rand (lo, hi);
40031914882SAlex Richardson   else if (f->prec == 'd' && gen == 'l')
40131914882SAlex Richardson     gen_linear (lo, hi);
40231914882SAlex Richardson   else if (f->prec == 'd' && gen == 't')
40331914882SAlex Richardson     gen_trace (0);
40431914882SAlex Richardson   else if (f->prec == 'f' && gen == 'r')
40531914882SAlex Richardson     genf_rand (lo, hi);
40631914882SAlex Richardson   else if (f->prec == 'f' && gen == 'l')
40731914882SAlex Richardson     genf_linear (lo, hi);
40831914882SAlex Richardson   else if (f->prec == 'f' && gen == 't')
40931914882SAlex Richardson     genf_trace (0);
41031914882SAlex Richardson 
41131914882SAlex Richardson   if (gen == 't')
41231914882SAlex Richardson     hi = trace_size / N;
41331914882SAlex Richardson 
41431914882SAlex Richardson   if (type == 'b' || type == 't')
41531914882SAlex Richardson     bench1 (f, 't', lo, hi);
41631914882SAlex Richardson 
41731914882SAlex Richardson   if (type == 'b' || type == 'l')
41831914882SAlex Richardson     bench1 (f, 'l', lo, hi);
41931914882SAlex Richardson 
42031914882SAlex Richardson   for (int i = N; i < trace_size; i += N)
42131914882SAlex Richardson     {
42231914882SAlex Richardson       if (f->prec == 'd')
42331914882SAlex Richardson 	gen_trace (i);
42431914882SAlex Richardson       else
42531914882SAlex Richardson 	genf_trace (i);
42631914882SAlex Richardson 
42731914882SAlex Richardson       lo = i / N;
42831914882SAlex Richardson       if (type == 'b' || type == 't')
42931914882SAlex Richardson 	bench1 (f, 't', lo, hi);
43031914882SAlex Richardson 
43131914882SAlex Richardson       if (type == 'b' || type == 'l')
43231914882SAlex Richardson 	bench1 (f, 'l', lo, hi);
43331914882SAlex Richardson     }
43431914882SAlex Richardson }
43531914882SAlex Richardson 
43631914882SAlex Richardson static void
43731914882SAlex Richardson readtrace (const char *name)
43831914882SAlex Richardson {
43931914882SAlex Richardson 	int n = 0;
44031914882SAlex Richardson 	FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
44131914882SAlex Richardson 	if (!f)
44231914882SAlex Richardson 	  {
44331914882SAlex Richardson 	    printf ("openning \"%s\" failed: %m\n", name);
44431914882SAlex Richardson 	    exit (1);
44531914882SAlex Richardson 	  }
44631914882SAlex Richardson 	for (;;)
44731914882SAlex Richardson 	  {
44831914882SAlex Richardson 	    if (n >= trace_size)
44931914882SAlex Richardson 	      {
45031914882SAlex Richardson 		trace_size += N;
45131914882SAlex Richardson 		Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
45231914882SAlex Richardson 		if (Trace == NULL)
45331914882SAlex Richardson 		  {
45431914882SAlex Richardson 		    printf ("out of memory\n");
45531914882SAlex Richardson 		    exit (1);
45631914882SAlex Richardson 		  }
45731914882SAlex Richardson 	      }
45831914882SAlex Richardson 	    if (fscanf (f, "%lf", Trace + n) != 1)
45931914882SAlex Richardson 	      break;
46031914882SAlex Richardson 	    n++;
46131914882SAlex Richardson 	  }
46231914882SAlex Richardson 	if (ferror (f) || n == 0)
46331914882SAlex Richardson 	  {
46431914882SAlex Richardson 	    printf ("reading \"%s\" failed: %m\n", name);
46531914882SAlex Richardson 	    exit (1);
46631914882SAlex Richardson 	  }
46731914882SAlex Richardson 	fclose (f);
46831914882SAlex Richardson 	if (n % N == 0)
46931914882SAlex Richardson 	  trace_size = n;
47031914882SAlex Richardson 	for (int i = 0; n < trace_size; n++, i++)
47131914882SAlex Richardson 	  Trace[n] = Trace[i];
47231914882SAlex Richardson }
47331914882SAlex Richardson 
47431914882SAlex Richardson static void
47531914882SAlex Richardson usage (void)
47631914882SAlex Richardson {
47731914882SAlex Richardson   printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
47831914882SAlex Richardson 	  "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
47931914882SAlex Richardson 	  "[func2 ..]\n");
48031914882SAlex Richardson   printf ("func:\n");
48131914882SAlex Richardson   printf ("%7s [run all benchmarks]\n", "all");
48231914882SAlex Richardson   for (const struct fun *f = funtab; f->name; f++)
48331914882SAlex Richardson     printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
48431914882SAlex Richardson   exit (1);
48531914882SAlex Richardson }
48631914882SAlex Richardson 
48731914882SAlex Richardson int
48831914882SAlex Richardson main (int argc, char *argv[])
48931914882SAlex Richardson {
49031914882SAlex Richardson   int usergen = 0, gen = 'r', type = 'b', all = 0;
49131914882SAlex Richardson   double lo = 0, hi = 0;
49231914882SAlex Richardson   const char *tracefile = "-";
49331914882SAlex Richardson 
49431914882SAlex Richardson   argv++;
49531914882SAlex Richardson   argc--;
49631914882SAlex Richardson   for (;;)
49731914882SAlex Richardson     {
49831914882SAlex Richardson       if (argc <= 0)
49931914882SAlex Richardson 	usage ();
50031914882SAlex Richardson       if (argv[0][0] != '-')
50131914882SAlex Richardson 	break;
50231914882SAlex Richardson       else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
50331914882SAlex Richardson 	{
50431914882SAlex Richardson 	  usergen = 1;
50531914882SAlex Richardson 	  lo = strtod (argv[1], 0);
50631914882SAlex Richardson 	  hi = strtod (argv[2], 0);
50731914882SAlex Richardson 	  argv += 3;
50831914882SAlex Richardson 	  argc -= 3;
50931914882SAlex Richardson 	}
51031914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
51131914882SAlex Richardson 	{
51231914882SAlex Richardson 	  measurecount = strtol (argv[1], 0, 0);
51331914882SAlex Richardson 	  argv += 2;
51431914882SAlex Richardson 	  argc -= 2;
51531914882SAlex Richardson 	}
51631914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
51731914882SAlex Richardson 	{
51831914882SAlex Richardson 	  itercount = strtol (argv[1], 0, 0);
51931914882SAlex Richardson 	  argv += 2;
52031914882SAlex Richardson 	  argc -= 2;
52131914882SAlex Richardson 	}
52231914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
52331914882SAlex Richardson 	{
52431914882SAlex Richardson 	  gen = argv[1][0];
52531914882SAlex Richardson 	  if (strchr ("rlt", gen) == 0)
52631914882SAlex Richardson 	    usage ();
52731914882SAlex Richardson 	  argv += 2;
52831914882SAlex Richardson 	  argc -= 2;
52931914882SAlex Richardson 	}
53031914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
53131914882SAlex Richardson 	{
53231914882SAlex Richardson 	  gen = 't';  /* -f implies -g trace.  */
53331914882SAlex Richardson 	  tracefile = argv[1];
53431914882SAlex Richardson 	  argv += 2;
53531914882SAlex Richardson 	  argc -= 2;
53631914882SAlex Richardson 	}
53731914882SAlex Richardson       else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
53831914882SAlex Richardson 	{
53931914882SAlex Richardson 	  type = argv[1][0];
54031914882SAlex Richardson 	  if (strchr ("ltb", type) == 0)
54131914882SAlex Richardson 	    usage ();
54231914882SAlex Richardson 	  argv += 2;
54331914882SAlex Richardson 	  argc -= 2;
54431914882SAlex Richardson 	}
54531914882SAlex Richardson       else
54631914882SAlex Richardson 	usage ();
54731914882SAlex Richardson     }
54831914882SAlex Richardson   if (gen == 't')
54931914882SAlex Richardson     {
55031914882SAlex Richardson       readtrace (tracefile);
55131914882SAlex Richardson       lo = hi = 0;
55231914882SAlex Richardson       usergen = 1;
55331914882SAlex Richardson     }
55431914882SAlex Richardson   while (argc > 0)
55531914882SAlex Richardson     {
55631914882SAlex Richardson       int found = 0;
55731914882SAlex Richardson       all = strcmp (argv[0], "all") == 0;
55831914882SAlex Richardson       for (const struct fun *f = funtab; f->name; f++)
55931914882SAlex Richardson 	if (all || strcmp (argv[0], f->name) == 0)
56031914882SAlex Richardson 	  {
56131914882SAlex Richardson 	    found = 1;
56231914882SAlex Richardson 	    if (!usergen)
56331914882SAlex Richardson 	      {
56431914882SAlex Richardson 		lo = f->lo;
56531914882SAlex Richardson 		hi = f->hi;
56631914882SAlex Richardson 	      }
56731914882SAlex Richardson 	    bench (f, lo, hi, type, gen);
56831914882SAlex Richardson 	    if (usergen && !all)
56931914882SAlex Richardson 	      break;
57031914882SAlex Richardson 	  }
57131914882SAlex Richardson       if (!found)
57231914882SAlex Richardson 	printf ("unknown function: %s\n", argv[0]);
57331914882SAlex Richardson       argv++;
57431914882SAlex Richardson       argc--;
57531914882SAlex Richardson     }
57631914882SAlex Richardson   return 0;
57731914882SAlex Richardson }
578*f3087befSAndrew Turner 
579*f3087befSAndrew Turner #if __aarch64__ && __linux__ && WANT_SVE_TESTS && defined(__clang__)
580*f3087befSAndrew Turner #  pragma clang attribute pop
581*f3087befSAndrew Turner #endif
582