131914882SAlex Richardson /* 231914882SAlex Richardson * Microbenchmark for math functions. 331914882SAlex Richardson * 4*f3087befSAndrew Turner * Copyright (c) 2018-2024, Arm Limited. 5072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 8*f3087befSAndrew Turner #if WANT_SVE_TESTS 9*f3087befSAndrew Turner # if __aarch64__ && __linux__ 10*f3087befSAndrew Turner # ifdef __clang__ 11*f3087befSAndrew Turner # pragma clang attribute push(__attribute__((target("sve"))), \ 12*f3087befSAndrew Turner apply_to = any(function)) 13*f3087befSAndrew Turner # else 14*f3087befSAndrew Turner # pragma GCC target("+sve") 15*f3087befSAndrew Turner # endif 16*f3087befSAndrew Turner # else 17*f3087befSAndrew Turner # error "SVE not supported - please disable WANT_SVE_TESTS" 18*f3087befSAndrew Turner # endif 19*f3087befSAndrew Turner #endif 20*f3087befSAndrew Turner 2131914882SAlex Richardson #undef _GNU_SOURCE 2231914882SAlex Richardson #define _GNU_SOURCE 1 2331914882SAlex Richardson #include <stdint.h> 2431914882SAlex Richardson #include <stdlib.h> 2531914882SAlex Richardson #include <stdio.h> 2631914882SAlex Richardson #include <string.h> 2731914882SAlex Richardson #include <time.h> 2831914882SAlex Richardson #include <math.h> 2931914882SAlex Richardson #include "mathlib.h" 3031914882SAlex Richardson 3131914882SAlex Richardson /* Number of measurements, best result is reported. */ 3231914882SAlex Richardson #define MEASURE 60 3331914882SAlex Richardson /* Array size. */ 3431914882SAlex Richardson #define N 8000 3531914882SAlex Richardson /* Iterations over the array. */ 3631914882SAlex Richardson #define ITER 125 3731914882SAlex Richardson 3831914882SAlex Richardson static double *Trace; 3931914882SAlex Richardson static size_t trace_size; 4031914882SAlex Richardson static double A[N]; 4131914882SAlex Richardson static float Af[N]; 4231914882SAlex Richardson static long measurecount = MEASURE; 4331914882SAlex Richardson static long itercount = ITER; 4431914882SAlex Richardson 4531914882SAlex Richardson static double 4631914882SAlex Richardson dummy (double x) 4731914882SAlex Richardson { 4831914882SAlex Richardson return x; 4931914882SAlex Richardson } 5031914882SAlex Richardson 5131914882SAlex Richardson static float 5231914882SAlex Richardson dummyf (float x) 5331914882SAlex Richardson { 5431914882SAlex Richardson return x; 5531914882SAlex Richardson } 56*f3087befSAndrew Turner #if __aarch64__ && __linux__ 57*f3087befSAndrew Turner __vpcs static float64x2_t 58*f3087befSAndrew Turner __vn_dummy (float64x2_t x) 5931914882SAlex Richardson { 6031914882SAlex Richardson return x; 6131914882SAlex Richardson } 6231914882SAlex Richardson 63*f3087befSAndrew Turner __vpcs static float32x4_t 64*f3087befSAndrew Turner __vn_dummyf (float32x4_t x) 6531914882SAlex Richardson { 6631914882SAlex Richardson return x; 6731914882SAlex Richardson } 68072a4ba8SAndrew Turner #endif 69*f3087befSAndrew Turner #if WANT_SVE_TESTS 70*f3087befSAndrew Turner static svfloat64_t 71*f3087befSAndrew Turner __sv_dummy (svfloat64_t x, svbool_t pg) 7231914882SAlex Richardson { 73072a4ba8SAndrew Turner return x; 7431914882SAlex Richardson } 7531914882SAlex Richardson 76*f3087befSAndrew Turner static svfloat32_t 77*f3087befSAndrew Turner __sv_dummyf (svfloat32_t x, svbool_t pg) 7831914882SAlex Richardson { 79072a4ba8SAndrew Turner return x; 8031914882SAlex Richardson } 8131914882SAlex Richardson 82072a4ba8SAndrew Turner #endif 8331914882SAlex Richardson 84072a4ba8SAndrew Turner #include "test/mathbench_wrappers.h" 8531914882SAlex Richardson 8631914882SAlex Richardson static const struct fun 8731914882SAlex Richardson { 8831914882SAlex Richardson const char *name; 8931914882SAlex Richardson int prec; 9031914882SAlex Richardson int vec; 9131914882SAlex Richardson double lo; 9231914882SAlex Richardson double hi; 9331914882SAlex Richardson union 9431914882SAlex Richardson { 9531914882SAlex Richardson double (*d) (double); 9631914882SAlex Richardson float (*f) (float); 97*f3087befSAndrew Turner #if __aarch64__ && __linux__ 98*f3087befSAndrew Turner __vpcs float64x2_t (*vnd) (float64x2_t); 99*f3087befSAndrew Turner __vpcs float32x4_t (*vnf) (float32x4_t); 10031914882SAlex Richardson #endif 101*f3087befSAndrew Turner #if WANT_SVE_TESTS 102*f3087befSAndrew Turner svfloat64_t (*svd) (svfloat64_t, svbool_t); 103*f3087befSAndrew Turner svfloat32_t (*svf) (svfloat32_t, svbool_t); 104072a4ba8SAndrew Turner #endif 10531914882SAlex Richardson } fun; 10631914882SAlex Richardson } funtab[] = { 107*f3087befSAndrew Turner // clang-format off 10831914882SAlex Richardson #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}}, 10931914882SAlex Richardson #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}}, 11031914882SAlex Richardson #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}}, 11131914882SAlex Richardson #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}}, 112072a4ba8SAndrew Turner #define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}}, 113072a4ba8SAndrew Turner #define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}}, 11431914882SAlex Richardson D (dummy, 1.0, 2.0) 11531914882SAlex Richardson F (dummyf, 1.0, 2.0) 116*f3087befSAndrew Turner #if __aarch64__ && __linux__ 11731914882SAlex Richardson VND (__vn_dummy, 1.0, 2.0) 11831914882SAlex Richardson VNF (__vn_dummyf, 1.0, 2.0) 119072a4ba8SAndrew Turner #endif 120*f3087befSAndrew Turner #if WANT_SVE_TESTS 121072a4ba8SAndrew Turner SVD (__sv_dummy, 1.0, 2.0) 122072a4ba8SAndrew Turner SVF (__sv_dummyf, 1.0, 2.0) 12331914882SAlex Richardson #endif 124072a4ba8SAndrew Turner #include "test/mathbench_funcs.h" 12531914882SAlex Richardson {0}, 12631914882SAlex Richardson #undef F 12731914882SAlex Richardson #undef D 12831914882SAlex Richardson #undef VNF 12931914882SAlex Richardson #undef VND 130072a4ba8SAndrew Turner #undef SVF 131072a4ba8SAndrew Turner #undef SVD 132*f3087befSAndrew Turner // clang-format on 13331914882SAlex Richardson }; 13431914882SAlex Richardson 13531914882SAlex Richardson static void 13631914882SAlex Richardson gen_linear (double lo, double hi) 13731914882SAlex Richardson { 13831914882SAlex Richardson for (int i = 0; i < N; i++) 13931914882SAlex Richardson A[i] = (lo * (N - i) + hi * i) / N; 14031914882SAlex Richardson } 14131914882SAlex Richardson 14231914882SAlex Richardson static void 14331914882SAlex Richardson genf_linear (double lo, double hi) 14431914882SAlex Richardson { 14531914882SAlex Richardson for (int i = 0; i < N; i++) 14631914882SAlex Richardson Af[i] = (float)(lo * (N - i) + hi * i) / N; 14731914882SAlex Richardson } 14831914882SAlex Richardson 14931914882SAlex Richardson static inline double 15031914882SAlex Richardson asdouble (uint64_t i) 15131914882SAlex Richardson { 15231914882SAlex Richardson union 15331914882SAlex Richardson { 15431914882SAlex Richardson uint64_t i; 15531914882SAlex Richardson double f; 15631914882SAlex Richardson } u = {i}; 15731914882SAlex Richardson return u.f; 15831914882SAlex Richardson } 15931914882SAlex Richardson 16031914882SAlex Richardson static uint64_t seed = 0x0123456789abcdef; 16131914882SAlex Richardson 16231914882SAlex Richardson static double 16331914882SAlex Richardson frand (double lo, double hi) 16431914882SAlex Richardson { 16531914882SAlex Richardson seed = 6364136223846793005ULL * seed + 1; 16631914882SAlex Richardson return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0); 16731914882SAlex Richardson } 16831914882SAlex Richardson 16931914882SAlex Richardson static void 17031914882SAlex Richardson gen_rand (double lo, double hi) 17131914882SAlex Richardson { 17231914882SAlex Richardson for (int i = 0; i < N; i++) 17331914882SAlex Richardson A[i] = frand (lo, hi); 17431914882SAlex Richardson } 17531914882SAlex Richardson 17631914882SAlex Richardson static void 17731914882SAlex Richardson genf_rand (double lo, double hi) 17831914882SAlex Richardson { 17931914882SAlex Richardson for (int i = 0; i < N; i++) 18031914882SAlex Richardson Af[i] = (float)frand (lo, hi); 18131914882SAlex Richardson } 18231914882SAlex Richardson 18331914882SAlex Richardson static void 18431914882SAlex Richardson gen_trace (int index) 18531914882SAlex Richardson { 18631914882SAlex Richardson for (int i = 0; i < N; i++) 18731914882SAlex Richardson A[i] = Trace[index + i]; 18831914882SAlex Richardson } 18931914882SAlex Richardson 19031914882SAlex Richardson static void 19131914882SAlex Richardson genf_trace (int index) 19231914882SAlex Richardson { 19331914882SAlex Richardson for (int i = 0; i < N; i++) 19431914882SAlex Richardson Af[i] = (float)Trace[index + i]; 19531914882SAlex Richardson } 19631914882SAlex Richardson 19731914882SAlex Richardson static void 19831914882SAlex Richardson run_thruput (double f (double)) 19931914882SAlex Richardson { 20031914882SAlex Richardson for (int i = 0; i < N; i++) 20131914882SAlex Richardson f (A[i]); 20231914882SAlex Richardson } 20331914882SAlex Richardson 20431914882SAlex Richardson static void 20531914882SAlex Richardson runf_thruput (float f (float)) 20631914882SAlex Richardson { 20731914882SAlex Richardson for (int i = 0; i < N; i++) 20831914882SAlex Richardson f (Af[i]); 20931914882SAlex Richardson } 21031914882SAlex Richardson 21131914882SAlex Richardson volatile double zero = 0; 21231914882SAlex Richardson 21331914882SAlex Richardson static void 21431914882SAlex Richardson run_latency (double f (double)) 21531914882SAlex Richardson { 21631914882SAlex Richardson double z = zero; 21731914882SAlex Richardson double prev = z; 21831914882SAlex Richardson for (int i = 0; i < N; i++) 21931914882SAlex Richardson prev = f (A[i] + prev * z); 22031914882SAlex Richardson } 22131914882SAlex Richardson 22231914882SAlex Richardson static void 22331914882SAlex Richardson runf_latency (float f (float)) 22431914882SAlex Richardson { 22531914882SAlex Richardson float z = (float)zero; 22631914882SAlex Richardson float prev = z; 22731914882SAlex Richardson for (int i = 0; i < N; i++) 22831914882SAlex Richardson prev = f (Af[i] + prev * z); 22931914882SAlex Richardson } 23031914882SAlex Richardson 231*f3087befSAndrew Turner #if __aarch64__ && __linux__ 23231914882SAlex Richardson static void 233*f3087befSAndrew Turner run_vn_thruput (__vpcs float64x2_t f (float64x2_t)) 23431914882SAlex Richardson { 235*f3087befSAndrew Turner for (int i = 0; i < N; i += 2) 236*f3087befSAndrew Turner f (vld1q_f64 (A + i)); 23731914882SAlex Richardson } 23831914882SAlex Richardson 23931914882SAlex Richardson static void 240*f3087befSAndrew Turner runf_vn_thruput (__vpcs float32x4_t f (float32x4_t)) 24131914882SAlex Richardson { 242*f3087befSAndrew Turner for (int i = 0; i < N; i += 4) 243*f3087befSAndrew Turner f (vld1q_f32 (Af + i)); 24431914882SAlex Richardson } 24531914882SAlex Richardson 24631914882SAlex Richardson static void 247*f3087befSAndrew Turner run_vn_latency (__vpcs float64x2_t f (float64x2_t)) 24831914882SAlex Richardson { 2495a02ffc3SAndrew Turner volatile uint64x2_t vsel = (uint64x2_t) { 0, 0 }; 2505a02ffc3SAndrew Turner uint64x2_t sel = vsel; 251*f3087befSAndrew Turner float64x2_t prev = vdupq_n_f64 (0); 252*f3087befSAndrew Turner for (int i = 0; i < N; i += 2) 253*f3087befSAndrew Turner prev = f (vbslq_f64 (sel, prev, vld1q_f64 (A + i))); 25431914882SAlex Richardson } 25531914882SAlex Richardson 25631914882SAlex Richardson static void 257*f3087befSAndrew Turner runf_vn_latency (__vpcs float32x4_t f (float32x4_t)) 25831914882SAlex Richardson { 2595a02ffc3SAndrew Turner volatile uint32x4_t vsel = (uint32x4_t) { 0, 0, 0, 0 }; 2605a02ffc3SAndrew Turner uint32x4_t sel = vsel; 261*f3087befSAndrew Turner float32x4_t prev = vdupq_n_f32 (0); 262*f3087befSAndrew Turner for (int i = 0; i < N; i += 4) 263*f3087befSAndrew Turner prev = f (vbslq_f32 (sel, prev, vld1q_f32 (Af + i))); 26431914882SAlex Richardson } 26531914882SAlex Richardson #endif 26631914882SAlex Richardson 267*f3087befSAndrew Turner #if WANT_SVE_TESTS 268072a4ba8SAndrew Turner static void 269*f3087befSAndrew Turner run_sv_thruput (svfloat64_t f (svfloat64_t, svbool_t)) 270072a4ba8SAndrew Turner { 271*f3087befSAndrew Turner for (int i = 0; i < N; i += svcntd ()) 272*f3087befSAndrew Turner f (svld1_f64 (svptrue_b64 (), A + i), svptrue_b64 ()); 273072a4ba8SAndrew Turner } 274072a4ba8SAndrew Turner 275072a4ba8SAndrew Turner static void 276*f3087befSAndrew Turner runf_sv_thruput (svfloat32_t f (svfloat32_t, svbool_t)) 277072a4ba8SAndrew Turner { 278*f3087befSAndrew Turner for (int i = 0; i < N; i += svcntw ()) 279*f3087befSAndrew Turner f (svld1_f32 (svptrue_b32 (), Af + i), svptrue_b32 ()); 280072a4ba8SAndrew Turner } 281072a4ba8SAndrew Turner 282072a4ba8SAndrew Turner static void 283*f3087befSAndrew Turner run_sv_latency (svfloat64_t f (svfloat64_t, svbool_t)) 284072a4ba8SAndrew Turner { 285*f3087befSAndrew Turner volatile svbool_t vsel = svptrue_b64 (); 286*f3087befSAndrew Turner svbool_t sel = vsel; 287*f3087befSAndrew Turner svfloat64_t prev = svdup_f64 (0); 288*f3087befSAndrew Turner for (int i = 0; i < N; i += svcntd ()) 289*f3087befSAndrew Turner prev = f (svsel_f64 (sel, svld1_f64 (svptrue_b64 (), A + i), prev), 290*f3087befSAndrew Turner svptrue_b64 ()); 291072a4ba8SAndrew Turner } 292072a4ba8SAndrew Turner 293072a4ba8SAndrew Turner static void 294*f3087befSAndrew Turner runf_sv_latency (svfloat32_t f (svfloat32_t, svbool_t)) 295072a4ba8SAndrew Turner { 296*f3087befSAndrew Turner volatile svbool_t vsel = svptrue_b32 (); 297*f3087befSAndrew Turner svbool_t sel = vsel; 298*f3087befSAndrew Turner svfloat32_t prev = svdup_f32 (0); 299*f3087befSAndrew Turner for (int i = 0; i < N; i += svcntw ()) 300*f3087befSAndrew Turner prev = f (svsel_f32 (sel, svld1_f32 (svptrue_b32 (), Af + i), prev), 301*f3087befSAndrew Turner svptrue_b32 ()); 302072a4ba8SAndrew Turner } 303072a4ba8SAndrew Turner #endif 304072a4ba8SAndrew Turner 30531914882SAlex Richardson static uint64_t 30631914882SAlex Richardson tic (void) 30731914882SAlex Richardson { 30831914882SAlex Richardson struct timespec ts; 309*f3087befSAndrew Turner #if defined(_MSC_VER) 310*f3087befSAndrew Turner if (!timespec_get (&ts, TIME_UTC)) 311*f3087befSAndrew Turner #else 31231914882SAlex Richardson if (clock_gettime (CLOCK_REALTIME, &ts)) 313*f3087befSAndrew Turner #endif 31431914882SAlex Richardson abort (); 31531914882SAlex Richardson return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 31631914882SAlex Richardson } 31731914882SAlex Richardson 31831914882SAlex Richardson #define TIMEIT(run, f) do { \ 31931914882SAlex Richardson dt = -1; \ 32031914882SAlex Richardson run (f); /* Warm up. */ \ 32131914882SAlex Richardson for (int j = 0; j < measurecount; j++) \ 32231914882SAlex Richardson { \ 32331914882SAlex Richardson uint64_t t0 = tic (); \ 32431914882SAlex Richardson for (int i = 0; i < itercount; i++) \ 32531914882SAlex Richardson run (f); \ 32631914882SAlex Richardson uint64_t t1 = tic (); \ 32731914882SAlex Richardson if (t1 - t0 < dt) \ 32831914882SAlex Richardson dt = t1 - t0; \ 32931914882SAlex Richardson } \ 33031914882SAlex Richardson } while (0) 33131914882SAlex Richardson 33231914882SAlex Richardson static void 33331914882SAlex Richardson bench1 (const struct fun *f, int type, double lo, double hi) 33431914882SAlex Richardson { 33531914882SAlex Richardson uint64_t dt = 0; 33631914882SAlex Richardson uint64_t ns100; 33731914882SAlex Richardson const char *s = type == 't' ? "rthruput" : "latency"; 33831914882SAlex Richardson int vlen = 1; 33931914882SAlex Richardson 3405a02ffc3SAndrew Turner if (f->vec == 'n') 341*f3087befSAndrew Turner vlen = f->prec == 'd' ? 2 : 4; 342*f3087befSAndrew Turner #if WANT_SVE_TESTS 3435a02ffc3SAndrew Turner else if (f->vec == 's') 344*f3087befSAndrew Turner vlen = f->prec == 'd' ? svcntd () : svcntw (); 345*f3087befSAndrew Turner #endif 34631914882SAlex Richardson 34731914882SAlex Richardson if (f->prec == 'd' && type == 't' && f->vec == 0) 34831914882SAlex Richardson TIMEIT (run_thruput, f->fun.d); 34931914882SAlex Richardson else if (f->prec == 'd' && type == 'l' && f->vec == 0) 35031914882SAlex Richardson TIMEIT (run_latency, f->fun.d); 35131914882SAlex Richardson else if (f->prec == 'f' && type == 't' && f->vec == 0) 35231914882SAlex Richardson TIMEIT (runf_thruput, f->fun.f); 35331914882SAlex Richardson else if (f->prec == 'f' && type == 'l' && f->vec == 0) 35431914882SAlex Richardson TIMEIT (runf_latency, f->fun.f); 355*f3087befSAndrew Turner #if __aarch64__ && __linux__ 35631914882SAlex Richardson else if (f->prec == 'd' && type == 't' && f->vec == 'n') 35731914882SAlex Richardson TIMEIT (run_vn_thruput, f->fun.vnd); 35831914882SAlex Richardson else if (f->prec == 'd' && type == 'l' && f->vec == 'n') 35931914882SAlex Richardson TIMEIT (run_vn_latency, f->fun.vnd); 36031914882SAlex Richardson else if (f->prec == 'f' && type == 't' && f->vec == 'n') 36131914882SAlex Richardson TIMEIT (runf_vn_thruput, f->fun.vnf); 36231914882SAlex Richardson else if (f->prec == 'f' && type == 'l' && f->vec == 'n') 36331914882SAlex Richardson TIMEIT (runf_vn_latency, f->fun.vnf); 36431914882SAlex Richardson #endif 365*f3087befSAndrew Turner #if WANT_SVE_TESTS 366072a4ba8SAndrew Turner else if (f->prec == 'd' && type == 't' && f->vec == 's') 367072a4ba8SAndrew Turner TIMEIT (run_sv_thruput, f->fun.svd); 368072a4ba8SAndrew Turner else if (f->prec == 'd' && type == 'l' && f->vec == 's') 369072a4ba8SAndrew Turner TIMEIT (run_sv_latency, f->fun.svd); 370072a4ba8SAndrew Turner else if (f->prec == 'f' && type == 't' && f->vec == 's') 371072a4ba8SAndrew Turner TIMEIT (runf_sv_thruput, f->fun.svf); 372072a4ba8SAndrew Turner else if (f->prec == 'f' && type == 'l' && f->vec == 's') 373072a4ba8SAndrew Turner TIMEIT (runf_sv_latency, f->fun.svf); 374072a4ba8SAndrew Turner #endif 37531914882SAlex Richardson 37631914882SAlex Richardson if (type == 't') 37731914882SAlex Richardson { 37831914882SAlex Richardson ns100 = (100 * dt + itercount * N / 2) / (itercount * N); 3795a02ffc3SAndrew Turner printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g] vlen %d\n", 3805a02ffc3SAndrew Turner f->name, s, 38131914882SAlex Richardson (unsigned) (ns100 / 100), (unsigned) (ns100 % 100), 3825a02ffc3SAndrew Turner (unsigned long long) dt, lo, hi, vlen); 38331914882SAlex Richardson } 38431914882SAlex Richardson else if (type == 'l') 38531914882SAlex Richardson { 38631914882SAlex Richardson ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen); 3875a02ffc3SAndrew Turner printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g] vlen %d\n", 3885a02ffc3SAndrew Turner f->name, s, 38931914882SAlex Richardson (unsigned) (ns100 / 100), (unsigned) (ns100 % 100), 3905a02ffc3SAndrew Turner (unsigned long long) dt, lo, hi, vlen); 39131914882SAlex Richardson } 39231914882SAlex Richardson fflush (stdout); 39331914882SAlex Richardson } 39431914882SAlex Richardson 39531914882SAlex Richardson static void 39631914882SAlex Richardson bench (const struct fun *f, double lo, double hi, int type, int gen) 39731914882SAlex Richardson { 39831914882SAlex Richardson if (f->prec == 'd' && gen == 'r') 39931914882SAlex Richardson gen_rand (lo, hi); 40031914882SAlex Richardson else if (f->prec == 'd' && gen == 'l') 40131914882SAlex Richardson gen_linear (lo, hi); 40231914882SAlex Richardson else if (f->prec == 'd' && gen == 't') 40331914882SAlex Richardson gen_trace (0); 40431914882SAlex Richardson else if (f->prec == 'f' && gen == 'r') 40531914882SAlex Richardson genf_rand (lo, hi); 40631914882SAlex Richardson else if (f->prec == 'f' && gen == 'l') 40731914882SAlex Richardson genf_linear (lo, hi); 40831914882SAlex Richardson else if (f->prec == 'f' && gen == 't') 40931914882SAlex Richardson genf_trace (0); 41031914882SAlex Richardson 41131914882SAlex Richardson if (gen == 't') 41231914882SAlex Richardson hi = trace_size / N; 41331914882SAlex Richardson 41431914882SAlex Richardson if (type == 'b' || type == 't') 41531914882SAlex Richardson bench1 (f, 't', lo, hi); 41631914882SAlex Richardson 41731914882SAlex Richardson if (type == 'b' || type == 'l') 41831914882SAlex Richardson bench1 (f, 'l', lo, hi); 41931914882SAlex Richardson 42031914882SAlex Richardson for (int i = N; i < trace_size; i += N) 42131914882SAlex Richardson { 42231914882SAlex Richardson if (f->prec == 'd') 42331914882SAlex Richardson gen_trace (i); 42431914882SAlex Richardson else 42531914882SAlex Richardson genf_trace (i); 42631914882SAlex Richardson 42731914882SAlex Richardson lo = i / N; 42831914882SAlex Richardson if (type == 'b' || type == 't') 42931914882SAlex Richardson bench1 (f, 't', lo, hi); 43031914882SAlex Richardson 43131914882SAlex Richardson if (type == 'b' || type == 'l') 43231914882SAlex Richardson bench1 (f, 'l', lo, hi); 43331914882SAlex Richardson } 43431914882SAlex Richardson } 43531914882SAlex Richardson 43631914882SAlex Richardson static void 43731914882SAlex Richardson readtrace (const char *name) 43831914882SAlex Richardson { 43931914882SAlex Richardson int n = 0; 44031914882SAlex Richardson FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r"); 44131914882SAlex Richardson if (!f) 44231914882SAlex Richardson { 44331914882SAlex Richardson printf ("openning \"%s\" failed: %m\n", name); 44431914882SAlex Richardson exit (1); 44531914882SAlex Richardson } 44631914882SAlex Richardson for (;;) 44731914882SAlex Richardson { 44831914882SAlex Richardson if (n >= trace_size) 44931914882SAlex Richardson { 45031914882SAlex Richardson trace_size += N; 45131914882SAlex Richardson Trace = realloc (Trace, trace_size * sizeof (Trace[0])); 45231914882SAlex Richardson if (Trace == NULL) 45331914882SAlex Richardson { 45431914882SAlex Richardson printf ("out of memory\n"); 45531914882SAlex Richardson exit (1); 45631914882SAlex Richardson } 45731914882SAlex Richardson } 45831914882SAlex Richardson if (fscanf (f, "%lf", Trace + n) != 1) 45931914882SAlex Richardson break; 46031914882SAlex Richardson n++; 46131914882SAlex Richardson } 46231914882SAlex Richardson if (ferror (f) || n == 0) 46331914882SAlex Richardson { 46431914882SAlex Richardson printf ("reading \"%s\" failed: %m\n", name); 46531914882SAlex Richardson exit (1); 46631914882SAlex Richardson } 46731914882SAlex Richardson fclose (f); 46831914882SAlex Richardson if (n % N == 0) 46931914882SAlex Richardson trace_size = n; 47031914882SAlex Richardson for (int i = 0; n < trace_size; n++, i++) 47131914882SAlex Richardson Trace[n] = Trace[i]; 47231914882SAlex Richardson } 47331914882SAlex Richardson 47431914882SAlex Richardson static void 47531914882SAlex Richardson usage (void) 47631914882SAlex Richardson { 47731914882SAlex Richardson printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] " 47831914882SAlex Richardson "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func " 47931914882SAlex Richardson "[func2 ..]\n"); 48031914882SAlex Richardson printf ("func:\n"); 48131914882SAlex Richardson printf ("%7s [run all benchmarks]\n", "all"); 48231914882SAlex Richardson for (const struct fun *f = funtab; f->name; f++) 48331914882SAlex Richardson printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi); 48431914882SAlex Richardson exit (1); 48531914882SAlex Richardson } 48631914882SAlex Richardson 48731914882SAlex Richardson int 48831914882SAlex Richardson main (int argc, char *argv[]) 48931914882SAlex Richardson { 49031914882SAlex Richardson int usergen = 0, gen = 'r', type = 'b', all = 0; 49131914882SAlex Richardson double lo = 0, hi = 0; 49231914882SAlex Richardson const char *tracefile = "-"; 49331914882SAlex Richardson 49431914882SAlex Richardson argv++; 49531914882SAlex Richardson argc--; 49631914882SAlex Richardson for (;;) 49731914882SAlex Richardson { 49831914882SAlex Richardson if (argc <= 0) 49931914882SAlex Richardson usage (); 50031914882SAlex Richardson if (argv[0][0] != '-') 50131914882SAlex Richardson break; 50231914882SAlex Richardson else if (argc >= 3 && strcmp (argv[0], "-i") == 0) 50331914882SAlex Richardson { 50431914882SAlex Richardson usergen = 1; 50531914882SAlex Richardson lo = strtod (argv[1], 0); 50631914882SAlex Richardson hi = strtod (argv[2], 0); 50731914882SAlex Richardson argv += 3; 50831914882SAlex Richardson argc -= 3; 50931914882SAlex Richardson } 51031914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-m") == 0) 51131914882SAlex Richardson { 51231914882SAlex Richardson measurecount = strtol (argv[1], 0, 0); 51331914882SAlex Richardson argv += 2; 51431914882SAlex Richardson argc -= 2; 51531914882SAlex Richardson } 51631914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-c") == 0) 51731914882SAlex Richardson { 51831914882SAlex Richardson itercount = strtol (argv[1], 0, 0); 51931914882SAlex Richardson argv += 2; 52031914882SAlex Richardson argc -= 2; 52131914882SAlex Richardson } 52231914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-g") == 0) 52331914882SAlex Richardson { 52431914882SAlex Richardson gen = argv[1][0]; 52531914882SAlex Richardson if (strchr ("rlt", gen) == 0) 52631914882SAlex Richardson usage (); 52731914882SAlex Richardson argv += 2; 52831914882SAlex Richardson argc -= 2; 52931914882SAlex Richardson } 53031914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-f") == 0) 53131914882SAlex Richardson { 53231914882SAlex Richardson gen = 't'; /* -f implies -g trace. */ 53331914882SAlex Richardson tracefile = argv[1]; 53431914882SAlex Richardson argv += 2; 53531914882SAlex Richardson argc -= 2; 53631914882SAlex Richardson } 53731914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-t") == 0) 53831914882SAlex Richardson { 53931914882SAlex Richardson type = argv[1][0]; 54031914882SAlex Richardson if (strchr ("ltb", type) == 0) 54131914882SAlex Richardson usage (); 54231914882SAlex Richardson argv += 2; 54331914882SAlex Richardson argc -= 2; 54431914882SAlex Richardson } 54531914882SAlex Richardson else 54631914882SAlex Richardson usage (); 54731914882SAlex Richardson } 54831914882SAlex Richardson if (gen == 't') 54931914882SAlex Richardson { 55031914882SAlex Richardson readtrace (tracefile); 55131914882SAlex Richardson lo = hi = 0; 55231914882SAlex Richardson usergen = 1; 55331914882SAlex Richardson } 55431914882SAlex Richardson while (argc > 0) 55531914882SAlex Richardson { 55631914882SAlex Richardson int found = 0; 55731914882SAlex Richardson all = strcmp (argv[0], "all") == 0; 55831914882SAlex Richardson for (const struct fun *f = funtab; f->name; f++) 55931914882SAlex Richardson if (all || strcmp (argv[0], f->name) == 0) 56031914882SAlex Richardson { 56131914882SAlex Richardson found = 1; 56231914882SAlex Richardson if (!usergen) 56331914882SAlex Richardson { 56431914882SAlex Richardson lo = f->lo; 56531914882SAlex Richardson hi = f->hi; 56631914882SAlex Richardson } 56731914882SAlex Richardson bench (f, lo, hi, type, gen); 56831914882SAlex Richardson if (usergen && !all) 56931914882SAlex Richardson break; 57031914882SAlex Richardson } 57131914882SAlex Richardson if (!found) 57231914882SAlex Richardson printf ("unknown function: %s\n", argv[0]); 57331914882SAlex Richardson argv++; 57431914882SAlex Richardson argc--; 57531914882SAlex Richardson } 57631914882SAlex Richardson return 0; 57731914882SAlex Richardson } 578*f3087befSAndrew Turner 579*f3087befSAndrew Turner #if __aarch64__ && __linux__ && WANT_SVE_TESTS && defined(__clang__) 580*f3087befSAndrew Turner # pragma clang attribute pop 581*f3087befSAndrew Turner #endif 582