10928368fSKristof Beyls /*
20928368fSKristof Beyls * Microbenchmark for math functions.
30928368fSKristof Beyls *
40928368fSKristof Beyls * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
50928368fSKristof Beyls * See https://llvm.org/LICENSE.txt for license information.
60928368fSKristof Beyls * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
70928368fSKristof Beyls */
80928368fSKristof Beyls
90928368fSKristof Beyls #undef _GNU_SOURCE
100928368fSKristof Beyls #define _GNU_SOURCE 1
110928368fSKristof Beyls #include <stdint.h>
120928368fSKristof Beyls #include <stdlib.h>
130928368fSKristof Beyls #include <stdio.h>
140928368fSKristof Beyls #include <string.h>
150928368fSKristof Beyls #include <time.h>
160928368fSKristof Beyls #include <math.h>
170928368fSKristof Beyls #include "mathlib.h"
180928368fSKristof Beyls
190928368fSKristof Beyls #ifndef WANT_VMATH
200928368fSKristof Beyls /* Enable the build of vector math code. */
210928368fSKristof Beyls # define WANT_VMATH 1
220928368fSKristof Beyls #endif
230928368fSKristof Beyls
240928368fSKristof Beyls /* Number of measurements, best result is reported. */
250928368fSKristof Beyls #define MEASURE 60
260928368fSKristof Beyls /* Array size. */
270928368fSKristof Beyls #define N 8000
280928368fSKristof Beyls /* Iterations over the array. */
290928368fSKristof Beyls #define ITER 125
300928368fSKristof Beyls
310928368fSKristof Beyls static double *Trace;
320928368fSKristof Beyls static size_t trace_size;
330928368fSKristof Beyls static double A[N];
340928368fSKristof Beyls static float Af[N];
350928368fSKristof Beyls static long measurecount = MEASURE;
360928368fSKristof Beyls static long itercount = ITER;
370928368fSKristof Beyls
380928368fSKristof Beyls #if __aarch64__ && WANT_VMATH
390928368fSKristof Beyls typedef __f64x2_t v_double;
400928368fSKristof Beyls
410928368fSKristof Beyls #define v_double_len() 2
420928368fSKristof Beyls
430928368fSKristof Beyls static inline v_double
v_double_load(const double * p)440928368fSKristof Beyls v_double_load (const double *p)
450928368fSKristof Beyls {
460928368fSKristof Beyls return (v_double){p[0], p[1]};
470928368fSKristof Beyls }
480928368fSKristof Beyls
490928368fSKristof Beyls static inline v_double
v_double_dup(double x)500928368fSKristof Beyls v_double_dup (double x)
510928368fSKristof Beyls {
520928368fSKristof Beyls return (v_double){x, x};
530928368fSKristof Beyls }
540928368fSKristof Beyls
550928368fSKristof Beyls typedef __f32x4_t v_float;
560928368fSKristof Beyls
570928368fSKristof Beyls #define v_float_len() 4
580928368fSKristof Beyls
590928368fSKristof Beyls static inline v_float
v_float_load(const float * p)600928368fSKristof Beyls v_float_load (const float *p)
610928368fSKristof Beyls {
620928368fSKristof Beyls return (v_float){p[0], p[1], p[2], p[3]};
630928368fSKristof Beyls }
640928368fSKristof Beyls
650928368fSKristof Beyls static inline v_float
v_float_dup(float x)660928368fSKristof Beyls v_float_dup (float x)
670928368fSKristof Beyls {
680928368fSKristof Beyls return (v_float){x, x, x, x};
690928368fSKristof Beyls }
700928368fSKristof Beyls #else
710928368fSKristof Beyls /* dummy definitions to make things compile. */
720928368fSKristof Beyls typedef double v_double;
730928368fSKristof Beyls typedef float v_float;
740928368fSKristof Beyls #define v_double_len(x) 1
750928368fSKristof Beyls #define v_double_load(x) (x)[0]
760928368fSKristof Beyls #define v_double_dup(x) (x)
770928368fSKristof Beyls #define v_float_len(x) 1
780928368fSKristof Beyls #define v_float_load(x) (x)[0]
790928368fSKristof Beyls #define v_float_dup(x) (x)
800928368fSKristof Beyls #endif
810928368fSKristof Beyls
820928368fSKristof Beyls static double
dummy(double x)830928368fSKristof Beyls dummy (double x)
840928368fSKristof Beyls {
850928368fSKristof Beyls return x;
860928368fSKristof Beyls }
870928368fSKristof Beyls
880928368fSKristof Beyls static float
dummyf(float x)890928368fSKristof Beyls dummyf (float x)
900928368fSKristof Beyls {
910928368fSKristof Beyls return x;
920928368fSKristof Beyls }
930928368fSKristof Beyls
940928368fSKristof Beyls #if WANT_VMATH
950928368fSKristof Beyls #if __aarch64__
960928368fSKristof Beyls static v_double
__v_dummy(v_double x)970928368fSKristof Beyls __v_dummy (v_double x)
980928368fSKristof Beyls {
990928368fSKristof Beyls return x;
1000928368fSKristof Beyls }
1010928368fSKristof Beyls
1020928368fSKristof Beyls static v_float
__v_dummyf(v_float x)1030928368fSKristof Beyls __v_dummyf (v_float x)
1040928368fSKristof Beyls {
1050928368fSKristof Beyls return x;
1060928368fSKristof Beyls }
1070928368fSKristof Beyls
1080928368fSKristof Beyls #ifdef __vpcs
1090928368fSKristof Beyls __vpcs static v_double
__vn_dummy(v_double x)1100928368fSKristof Beyls __vn_dummy (v_double x)
1110928368fSKristof Beyls {
1120928368fSKristof Beyls return x;
1130928368fSKristof Beyls }
1140928368fSKristof Beyls
1150928368fSKristof Beyls __vpcs static v_float
__vn_dummyf(v_float x)1160928368fSKristof Beyls __vn_dummyf (v_float x)
1170928368fSKristof Beyls {
1180928368fSKristof Beyls return x;
1190928368fSKristof Beyls }
1200928368fSKristof Beyls
1210928368fSKristof Beyls __vpcs static v_float
xy__vn_powf(v_float x)1220928368fSKristof Beyls xy__vn_powf (v_float x)
1230928368fSKristof Beyls {
1240928368fSKristof Beyls return __vn_powf (x, x);
1250928368fSKristof Beyls }
1260928368fSKristof Beyls
1270928368fSKristof Beyls __vpcs static v_float
xy_Z_powf(v_float x)1280928368fSKristof Beyls xy_Z_powf (v_float x)
1290928368fSKristof Beyls {
1300928368fSKristof Beyls return _ZGVnN4vv_powf (x, x);
1310928368fSKristof Beyls }
1320928368fSKristof Beyls
1330928368fSKristof Beyls __vpcs static v_double
xy__vn_pow(v_double x)1340928368fSKristof Beyls xy__vn_pow (v_double x)
1350928368fSKristof Beyls {
1360928368fSKristof Beyls return __vn_pow (x, x);
1370928368fSKristof Beyls }
1380928368fSKristof Beyls
1390928368fSKristof Beyls __vpcs static v_double
xy_Z_pow(v_double x)1400928368fSKristof Beyls xy_Z_pow (v_double x)
1410928368fSKristof Beyls {
1420928368fSKristof Beyls return _ZGVnN2vv_pow (x, x);
1430928368fSKristof Beyls }
1440928368fSKristof Beyls #endif
1450928368fSKristof Beyls
1460928368fSKristof Beyls static v_float
xy__v_powf(v_float x)1470928368fSKristof Beyls xy__v_powf (v_float x)
1480928368fSKristof Beyls {
1490928368fSKristof Beyls return __v_powf (x, x);
1500928368fSKristof Beyls }
1510928368fSKristof Beyls
1520928368fSKristof Beyls static v_double
xy__v_pow(v_double x)1530928368fSKristof Beyls xy__v_pow (v_double x)
1540928368fSKristof Beyls {
1550928368fSKristof Beyls return __v_pow (x, x);
1560928368fSKristof Beyls }
1570928368fSKristof Beyls #endif
1580928368fSKristof Beyls
1590928368fSKristof Beyls static float
xy__s_powf(float x)1600928368fSKristof Beyls xy__s_powf (float x)
1610928368fSKristof Beyls {
1620928368fSKristof Beyls return __s_powf (x, x);
1630928368fSKristof Beyls }
1640928368fSKristof Beyls
1650928368fSKristof Beyls static double
xy__s_pow(double x)1660928368fSKristof Beyls xy__s_pow (double x)
1670928368fSKristof Beyls {
1680928368fSKristof Beyls return __s_pow (x, x);
1690928368fSKristof Beyls }
1700928368fSKristof Beyls #endif
1710928368fSKristof Beyls
1720928368fSKristof Beyls static double
xypow(double x)1730928368fSKristof Beyls xypow (double x)
1740928368fSKristof Beyls {
1750928368fSKristof Beyls return pow (x, x);
1760928368fSKristof Beyls }
1770928368fSKristof Beyls
1780928368fSKristof Beyls static float
xypowf(float x)1790928368fSKristof Beyls xypowf (float x)
1800928368fSKristof Beyls {
1810928368fSKristof Beyls return powf (x, x);
1820928368fSKristof Beyls }
1830928368fSKristof Beyls
1840928368fSKristof Beyls static double
xpow(double x)1850928368fSKristof Beyls xpow (double x)
1860928368fSKristof Beyls {
1870928368fSKristof Beyls return pow (x, 23.4);
1880928368fSKristof Beyls }
1890928368fSKristof Beyls
1900928368fSKristof Beyls static float
xpowf(float x)1910928368fSKristof Beyls xpowf (float x)
1920928368fSKristof Beyls {
1930928368fSKristof Beyls return powf (x, 23.4f);
1940928368fSKristof Beyls }
1950928368fSKristof Beyls
1960928368fSKristof Beyls static double
ypow(double x)1970928368fSKristof Beyls ypow (double x)
1980928368fSKristof Beyls {
1990928368fSKristof Beyls return pow (2.34, x);
2000928368fSKristof Beyls }
2010928368fSKristof Beyls
2020928368fSKristof Beyls static float
ypowf(float x)2030928368fSKristof Beyls ypowf (float x)
2040928368fSKristof Beyls {
2050928368fSKristof Beyls return powf (2.34f, x);
2060928368fSKristof Beyls }
2070928368fSKristof Beyls
2080928368fSKristof Beyls static float
sincosf_wrap(float x)2090928368fSKristof Beyls sincosf_wrap (float x)
2100928368fSKristof Beyls {
2110928368fSKristof Beyls float s, c;
2120928368fSKristof Beyls sincosf (x, &s, &c);
2130928368fSKristof Beyls return s + c;
2140928368fSKristof Beyls }
2150928368fSKristof Beyls
2160928368fSKristof Beyls static const struct fun
2170928368fSKristof Beyls {
2180928368fSKristof Beyls const char *name;
2190928368fSKristof Beyls int prec;
2200928368fSKristof Beyls int vec;
2210928368fSKristof Beyls double lo;
2220928368fSKristof Beyls double hi;
2230928368fSKristof Beyls union
2240928368fSKristof Beyls {
2250928368fSKristof Beyls double (*d) (double);
2260928368fSKristof Beyls float (*f) (float);
2270928368fSKristof Beyls v_double (*vd) (v_double);
2280928368fSKristof Beyls v_float (*vf) (v_float);
2290928368fSKristof Beyls #ifdef __vpcs
2300928368fSKristof Beyls __vpcs v_double (*vnd) (v_double);
2310928368fSKristof Beyls __vpcs v_float (*vnf) (v_float);
2320928368fSKristof Beyls #endif
2330928368fSKristof Beyls } fun;
2340928368fSKristof Beyls } funtab[] = {
2350928368fSKristof Beyls #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
2360928368fSKristof Beyls #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
2370928368fSKristof Beyls #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
2380928368fSKristof Beyls #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
2390928368fSKristof Beyls #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
2400928368fSKristof Beyls #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
2410928368fSKristof Beyls D (dummy, 1.0, 2.0)
2420928368fSKristof Beyls D (exp, -9.9, 9.9)
2430928368fSKristof Beyls D (exp, 0.5, 1.0)
2440928368fSKristof Beyls D (exp2, -9.9, 9.9)
2450928368fSKristof Beyls D (log, 0.01, 11.1)
2460928368fSKristof Beyls D (log, 0.999, 1.001)
2470928368fSKristof Beyls D (log2, 0.01, 11.1)
2480928368fSKristof Beyls D (log2, 0.999, 1.001)
2490928368fSKristof Beyls {"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
2500928368fSKristof Beyls D (xpow, 0.01, 11.1)
2510928368fSKristof Beyls D (ypow, -9.9, 9.9)
2520928368fSKristof Beyls
2530928368fSKristof Beyls F (dummyf, 1.0, 2.0)
2540928368fSKristof Beyls F (expf, -9.9, 9.9)
2550928368fSKristof Beyls F (exp2f, -9.9, 9.9)
2560928368fSKristof Beyls F (logf, 0.01, 11.1)
2570928368fSKristof Beyls F (log2f, 0.01, 11.1)
2580928368fSKristof Beyls {"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
2590928368fSKristof Beyls F (xpowf, 0.01, 11.1)
2600928368fSKristof Beyls F (ypowf, -9.9, 9.9)
2610928368fSKristof Beyls {"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
2620928368fSKristof Beyls {"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
2630928368fSKristof Beyls {"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
2640928368fSKristof Beyls {"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
2650928368fSKristof Beyls {"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
2660928368fSKristof Beyls {"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
2670928368fSKristof Beyls F (sinf, 0.1, 0.7)
2680928368fSKristof Beyls F (sinf, 0.8, 3.1)
2690928368fSKristof Beyls F (sinf, -3.1, 3.1)
2700928368fSKristof Beyls F (sinf, 3.3, 33.3)
2710928368fSKristof Beyls F (sinf, 100, 1000)
2720928368fSKristof Beyls F (sinf, 1e6, 1e32)
2730928368fSKristof Beyls F (cosf, 0.1, 0.7)
2740928368fSKristof Beyls F (cosf, 0.8, 3.1)
2750928368fSKristof Beyls F (cosf, -3.1, 3.1)
2760928368fSKristof Beyls F (cosf, 3.3, 33.3)
2770928368fSKristof Beyls F (cosf, 100, 1000)
2780928368fSKristof Beyls F (cosf, 1e6, 1e32)
2790928368fSKristof Beyls #if WANT_VMATH
2800928368fSKristof Beyls D (__s_sin, -3.1, 3.1)
2810928368fSKristof Beyls D (__s_cos, -3.1, 3.1)
2820928368fSKristof Beyls D (__s_exp, -9.9, 9.9)
2830928368fSKristof Beyls D (__s_log, 0.01, 11.1)
2840928368fSKristof Beyls {"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
2850928368fSKristof Beyls F (__s_expf, -9.9, 9.9)
2860928368fSKristof Beyls F (__s_expf_1u, -9.9, 9.9)
2870928368fSKristof Beyls F (__s_exp2f, -9.9, 9.9)
2880928368fSKristof Beyls F (__s_exp2f_1u, -9.9, 9.9)
2890928368fSKristof Beyls F (__s_logf, 0.01, 11.1)
2900928368fSKristof Beyls {"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
2910928368fSKristof Beyls F (__s_sinf, -3.1, 3.1)
2920928368fSKristof Beyls F (__s_cosf, -3.1, 3.1)
2930928368fSKristof Beyls #if __aarch64__
2940928368fSKristof Beyls VD (__v_dummy, 1.0, 2.0)
2950928368fSKristof Beyls VD (__v_sin, -3.1, 3.1)
2960928368fSKristof Beyls VD (__v_cos, -3.1, 3.1)
2970928368fSKristof Beyls VD (__v_exp, -9.9, 9.9)
2980928368fSKristof Beyls VD (__v_log, 0.01, 11.1)
2990928368fSKristof Beyls {"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
3000928368fSKristof Beyls VF (__v_dummyf, 1.0, 2.0)
3010928368fSKristof Beyls VF (__v_expf, -9.9, 9.9)
3020928368fSKristof Beyls VF (__v_expf_1u, -9.9, 9.9)
3030928368fSKristof Beyls VF (__v_exp2f, -9.9, 9.9)
3040928368fSKristof Beyls VF (__v_exp2f_1u, -9.9, 9.9)
3050928368fSKristof Beyls VF (__v_logf, 0.01, 11.1)
3060928368fSKristof Beyls {"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
3070928368fSKristof Beyls VF (__v_sinf, -3.1, 3.1)
3080928368fSKristof Beyls VF (__v_cosf, -3.1, 3.1)
3090928368fSKristof Beyls #ifdef __vpcs
3100928368fSKristof Beyls VND (__vn_dummy, 1.0, 2.0)
3110928368fSKristof Beyls VND (__vn_exp, -9.9, 9.9)
3120928368fSKristof Beyls VND (_ZGVnN2v_exp, -9.9, 9.9)
3130928368fSKristof Beyls VND (__vn_log, 0.01, 11.1)
3140928368fSKristof Beyls VND (_ZGVnN2v_log, 0.01, 11.1)
3150928368fSKristof Beyls {"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
3160928368fSKristof Beyls {"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
3170928368fSKristof Beyls VND (__vn_sin, -3.1, 3.1)
3180928368fSKristof Beyls VND (_ZGVnN2v_sin, -3.1, 3.1)
3190928368fSKristof Beyls VND (__vn_cos, -3.1, 3.1)
3200928368fSKristof Beyls VND (_ZGVnN2v_cos, -3.1, 3.1)
3210928368fSKristof Beyls VNF (__vn_dummyf, 1.0, 2.0)
3220928368fSKristof Beyls VNF (__vn_expf, -9.9, 9.9)
3230928368fSKristof Beyls VNF (_ZGVnN4v_expf, -9.9, 9.9)
3240928368fSKristof Beyls VNF (__vn_expf_1u, -9.9, 9.9)
3250928368fSKristof Beyls VNF (__vn_exp2f, -9.9, 9.9)
3260928368fSKristof Beyls VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
3270928368fSKristof Beyls VNF (__vn_exp2f_1u, -9.9, 9.9)
3280928368fSKristof Beyls VNF (__vn_logf, 0.01, 11.1)
3290928368fSKristof Beyls VNF (_ZGVnN4v_logf, 0.01, 11.1)
3300928368fSKristof Beyls {"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
3310928368fSKristof Beyls {"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
3320928368fSKristof Beyls VNF (__vn_sinf, -3.1, 3.1)
3330928368fSKristof Beyls VNF (_ZGVnN4v_sinf, -3.1, 3.1)
3340928368fSKristof Beyls VNF (__vn_cosf, -3.1, 3.1)
3350928368fSKristof Beyls VNF (_ZGVnN4v_cosf, -3.1, 3.1)
3360928368fSKristof Beyls #endif
3370928368fSKristof Beyls #endif
3380928368fSKristof Beyls #endif
3390928368fSKristof Beyls {0},
3400928368fSKristof Beyls #undef F
3410928368fSKristof Beyls #undef D
3420928368fSKristof Beyls #undef VF
3430928368fSKristof Beyls #undef VD
3440928368fSKristof Beyls #undef VNF
3450928368fSKristof Beyls #undef VND
3460928368fSKristof Beyls };
3470928368fSKristof Beyls
3480928368fSKristof Beyls static void
gen_linear(double lo,double hi)3490928368fSKristof Beyls gen_linear (double lo, double hi)
3500928368fSKristof Beyls {
3510928368fSKristof Beyls for (int i = 0; i < N; i++)
3520928368fSKristof Beyls A[i] = (lo * (N - i) + hi * i) / N;
3530928368fSKristof Beyls }
3540928368fSKristof Beyls
3550928368fSKristof Beyls static void
genf_linear(double lo,double hi)3560928368fSKristof Beyls genf_linear (double lo, double hi)
3570928368fSKristof Beyls {
3580928368fSKristof Beyls for (int i = 0; i < N; i++)
3590928368fSKristof Beyls Af[i] = (float)(lo * (N - i) + hi * i) / N;
3600928368fSKristof Beyls }
3610928368fSKristof Beyls
3620928368fSKristof Beyls static inline double
asdouble(uint64_t i)3630928368fSKristof Beyls asdouble (uint64_t i)
3640928368fSKristof Beyls {
3650928368fSKristof Beyls union
3660928368fSKristof Beyls {
3670928368fSKristof Beyls uint64_t i;
3680928368fSKristof Beyls double f;
3690928368fSKristof Beyls } u = {i};
3700928368fSKristof Beyls return u.f;
3710928368fSKristof Beyls }
3720928368fSKristof Beyls
3730928368fSKristof Beyls static uint64_t seed = 0x0123456789abcdef;
3740928368fSKristof Beyls
3750928368fSKristof Beyls static double
frand(double lo,double hi)3760928368fSKristof Beyls frand (double lo, double hi)
3770928368fSKristof Beyls {
3780928368fSKristof Beyls seed = 6364136223846793005ULL * seed + 1;
3790928368fSKristof Beyls return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
3800928368fSKristof Beyls }
3810928368fSKristof Beyls
3820928368fSKristof Beyls static void
gen_rand(double lo,double hi)3830928368fSKristof Beyls gen_rand (double lo, double hi)
3840928368fSKristof Beyls {
3850928368fSKristof Beyls for (int i = 0; i < N; i++)
3860928368fSKristof Beyls A[i] = frand (lo, hi);
3870928368fSKristof Beyls }
3880928368fSKristof Beyls
3890928368fSKristof Beyls static void
genf_rand(double lo,double hi)3900928368fSKristof Beyls genf_rand (double lo, double hi)
3910928368fSKristof Beyls {
3920928368fSKristof Beyls for (int i = 0; i < N; i++)
3930928368fSKristof Beyls Af[i] = (float)frand (lo, hi);
3940928368fSKristof Beyls }
3950928368fSKristof Beyls
3960928368fSKristof Beyls static void
gen_trace(int index)3970928368fSKristof Beyls gen_trace (int index)
3980928368fSKristof Beyls {
3990928368fSKristof Beyls for (int i = 0; i < N; i++)
4000928368fSKristof Beyls A[i] = Trace[index + i];
4010928368fSKristof Beyls }
4020928368fSKristof Beyls
4030928368fSKristof Beyls static void
genf_trace(int index)4040928368fSKristof Beyls genf_trace (int index)
4050928368fSKristof Beyls {
4060928368fSKristof Beyls for (int i = 0; i < N; i++)
4070928368fSKristof Beyls Af[i] = (float)Trace[index + i];
4080928368fSKristof Beyls }
4090928368fSKristof Beyls
4100928368fSKristof Beyls static void
run_thruput(double f (double))4110928368fSKristof Beyls run_thruput (double f (double))
4120928368fSKristof Beyls {
4130928368fSKristof Beyls for (int i = 0; i < N; i++)
4140928368fSKristof Beyls f (A[i]);
4150928368fSKristof Beyls }
4160928368fSKristof Beyls
4170928368fSKristof Beyls static void
runf_thruput(float f (float))4180928368fSKristof Beyls runf_thruput (float f (float))
4190928368fSKristof Beyls {
4200928368fSKristof Beyls for (int i = 0; i < N; i++)
4210928368fSKristof Beyls f (Af[i]);
4220928368fSKristof Beyls }
4230928368fSKristof Beyls
4240928368fSKristof Beyls volatile double zero = 0;
4250928368fSKristof Beyls
4260928368fSKristof Beyls static void
run_latency(double f (double))4270928368fSKristof Beyls run_latency (double f (double))
4280928368fSKristof Beyls {
4290928368fSKristof Beyls double z = zero;
4300928368fSKristof Beyls double prev = z;
4310928368fSKristof Beyls for (int i = 0; i < N; i++)
4320928368fSKristof Beyls prev = f (A[i] + prev * z);
4330928368fSKristof Beyls }
4340928368fSKristof Beyls
4350928368fSKristof Beyls static void
runf_latency(float f (float))4360928368fSKristof Beyls runf_latency (float f (float))
4370928368fSKristof Beyls {
4380928368fSKristof Beyls float z = (float)zero;
4390928368fSKristof Beyls float prev = z;
4400928368fSKristof Beyls for (int i = 0; i < N; i++)
4410928368fSKristof Beyls prev = f (Af[i] + prev * z);
4420928368fSKristof Beyls }
4430928368fSKristof Beyls
4440928368fSKristof Beyls static void
run_v_thruput(v_double f (v_double))4450928368fSKristof Beyls run_v_thruput (v_double f (v_double))
4460928368fSKristof Beyls {
4470928368fSKristof Beyls for (int i = 0; i < N; i += v_double_len ())
4480928368fSKristof Beyls f (v_double_load (A+i));
4490928368fSKristof Beyls }
4500928368fSKristof Beyls
4510928368fSKristof Beyls static void
runf_v_thruput(v_float f (v_float))4520928368fSKristof Beyls runf_v_thruput (v_float f (v_float))
4530928368fSKristof Beyls {
4540928368fSKristof Beyls for (int i = 0; i < N; i += v_float_len ())
4550928368fSKristof Beyls f (v_float_load (Af+i));
4560928368fSKristof Beyls }
4570928368fSKristof Beyls
4580928368fSKristof Beyls static void
run_v_latency(v_double f (v_double))4590928368fSKristof Beyls run_v_latency (v_double f (v_double))
4600928368fSKristof Beyls {
4610928368fSKristof Beyls v_double z = v_double_dup (zero);
4620928368fSKristof Beyls v_double prev = z;
4630928368fSKristof Beyls for (int i = 0; i < N; i += v_double_len ())
4640928368fSKristof Beyls prev = f (v_double_load (A+i) + prev * z);
4650928368fSKristof Beyls }
4660928368fSKristof Beyls
4670928368fSKristof Beyls static void
runf_v_latency(v_float f (v_float))4680928368fSKristof Beyls runf_v_latency (v_float f (v_float))
4690928368fSKristof Beyls {
4700928368fSKristof Beyls v_float z = v_float_dup (zero);
4710928368fSKristof Beyls v_float prev = z;
4720928368fSKristof Beyls for (int i = 0; i < N; i += v_float_len ())
4730928368fSKristof Beyls prev = f (v_float_load (Af+i) + prev * z);
4740928368fSKristof Beyls }
4750928368fSKristof Beyls
4760928368fSKristof Beyls #ifdef __vpcs
4770928368fSKristof Beyls static void
run_vn_thruput(__vpcs v_double f (v_double))4780928368fSKristof Beyls run_vn_thruput (__vpcs v_double f (v_double))
4790928368fSKristof Beyls {
4800928368fSKristof Beyls for (int i = 0; i < N; i += v_double_len ())
4810928368fSKristof Beyls f (v_double_load (A+i));
4820928368fSKristof Beyls }
4830928368fSKristof Beyls
4840928368fSKristof Beyls static void
runf_vn_thruput(__vpcs v_float f (v_float))4850928368fSKristof Beyls runf_vn_thruput (__vpcs v_float f (v_float))
4860928368fSKristof Beyls {
4870928368fSKristof Beyls for (int i = 0; i < N; i += v_float_len ())
4880928368fSKristof Beyls f (v_float_load (Af+i));
4890928368fSKristof Beyls }
4900928368fSKristof Beyls
4910928368fSKristof Beyls static void
run_vn_latency(__vpcs v_double f (v_double))4920928368fSKristof Beyls run_vn_latency (__vpcs v_double f (v_double))
4930928368fSKristof Beyls {
4940928368fSKristof Beyls v_double z = v_double_dup (zero);
4950928368fSKristof Beyls v_double prev = z;
4960928368fSKristof Beyls for (int i = 0; i < N; i += v_double_len ())
4970928368fSKristof Beyls prev = f (v_double_load (A+i) + prev * z);
4980928368fSKristof Beyls }
4990928368fSKristof Beyls
5000928368fSKristof Beyls static void
runf_vn_latency(__vpcs v_float f (v_float))5010928368fSKristof Beyls runf_vn_latency (__vpcs v_float f (v_float))
5020928368fSKristof Beyls {
5030928368fSKristof Beyls v_float z = v_float_dup (zero);
5040928368fSKristof Beyls v_float prev = z;
5050928368fSKristof Beyls for (int i = 0; i < N; i += v_float_len ())
5060928368fSKristof Beyls prev = f (v_float_load (Af+i) + prev * z);
5070928368fSKristof Beyls }
5080928368fSKristof Beyls #endif
5090928368fSKristof Beyls
5100928368fSKristof Beyls static uint64_t
tic(void)5110928368fSKristof Beyls tic (void)
5120928368fSKristof Beyls {
5130928368fSKristof Beyls struct timespec ts;
5140928368fSKristof Beyls if (clock_gettime (CLOCK_REALTIME, &ts))
5150928368fSKristof Beyls abort ();
5160928368fSKristof Beyls return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
5170928368fSKristof Beyls }
5180928368fSKristof Beyls
5190928368fSKristof Beyls #define TIMEIT(run, f) do { \
5200928368fSKristof Beyls dt = -1; \
5210928368fSKristof Beyls run (f); /* Warm up. */ \
5220928368fSKristof Beyls for (int j = 0; j < measurecount; j++) \
5230928368fSKristof Beyls { \
5240928368fSKristof Beyls uint64_t t0 = tic (); \
5250928368fSKristof Beyls for (int i = 0; i < itercount; i++) \
5260928368fSKristof Beyls run (f); \
5270928368fSKristof Beyls uint64_t t1 = tic (); \
5280928368fSKristof Beyls if (t1 - t0 < dt) \
5290928368fSKristof Beyls dt = t1 - t0; \
5300928368fSKristof Beyls } \
5310928368fSKristof Beyls } while (0)
5320928368fSKristof Beyls
5330928368fSKristof Beyls static void
bench1(const struct fun * f,int type,double lo,double hi)5340928368fSKristof Beyls bench1 (const struct fun *f, int type, double lo, double hi)
5350928368fSKristof Beyls {
5360928368fSKristof Beyls uint64_t dt = 0;
5370928368fSKristof Beyls uint64_t ns100;
5380928368fSKristof Beyls const char *s = type == 't' ? "rthruput" : "latency";
5390928368fSKristof Beyls int vlen = 1;
5400928368fSKristof Beyls
5410928368fSKristof Beyls if (f->vec && f->prec == 'd')
5420928368fSKristof Beyls vlen = v_double_len();
5430928368fSKristof Beyls else if (f->vec && f->prec == 'f')
5440928368fSKristof Beyls vlen = v_float_len();
5450928368fSKristof Beyls
5460928368fSKristof Beyls if (f->prec == 'd' && type == 't' && f->vec == 0)
5470928368fSKristof Beyls TIMEIT (run_thruput, f->fun.d);
5480928368fSKristof Beyls else if (f->prec == 'd' && type == 'l' && f->vec == 0)
5490928368fSKristof Beyls TIMEIT (run_latency, f->fun.d);
5500928368fSKristof Beyls else if (f->prec == 'f' && type == 't' && f->vec == 0)
5510928368fSKristof Beyls TIMEIT (runf_thruput, f->fun.f);
5520928368fSKristof Beyls else if (f->prec == 'f' && type == 'l' && f->vec == 0)
5530928368fSKristof Beyls TIMEIT (runf_latency, f->fun.f);
5540928368fSKristof Beyls else if (f->prec == 'd' && type == 't' && f->vec == 'v')
5550928368fSKristof Beyls TIMEIT (run_v_thruput, f->fun.vd);
5560928368fSKristof Beyls else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
5570928368fSKristof Beyls TIMEIT (run_v_latency, f->fun.vd);
5580928368fSKristof Beyls else if (f->prec == 'f' && type == 't' && f->vec == 'v')
5590928368fSKristof Beyls TIMEIT (runf_v_thruput, f->fun.vf);
5600928368fSKristof Beyls else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
5610928368fSKristof Beyls TIMEIT (runf_v_latency, f->fun.vf);
5620928368fSKristof Beyls #ifdef __vpcs
5630928368fSKristof Beyls else if (f->prec == 'd' && type == 't' && f->vec == 'n')
5640928368fSKristof Beyls TIMEIT (run_vn_thruput, f->fun.vnd);
5650928368fSKristof Beyls else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
5660928368fSKristof Beyls TIMEIT (run_vn_latency, f->fun.vnd);
5670928368fSKristof Beyls else if (f->prec == 'f' && type == 't' && f->vec == 'n')
5680928368fSKristof Beyls TIMEIT (runf_vn_thruput, f->fun.vnf);
5690928368fSKristof Beyls else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
5700928368fSKristof Beyls TIMEIT (runf_vn_latency, f->fun.vnf);
5710928368fSKristof Beyls #endif
5720928368fSKristof Beyls
5730928368fSKristof Beyls if (type == 't')
5740928368fSKristof Beyls {
5750928368fSKristof Beyls ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
5760928368fSKristof Beyls printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
5770928368fSKristof Beyls (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
5780928368fSKristof Beyls (unsigned long long) dt, lo, hi);
5790928368fSKristof Beyls }
5800928368fSKristof Beyls else if (type == 'l')
5810928368fSKristof Beyls {
5820928368fSKristof Beyls ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
5830928368fSKristof Beyls printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
5840928368fSKristof Beyls (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
5850928368fSKristof Beyls (unsigned long long) dt, lo, hi);
5860928368fSKristof Beyls }
5870928368fSKristof Beyls fflush (stdout);
5880928368fSKristof Beyls }
5890928368fSKristof Beyls
5900928368fSKristof Beyls static void
bench(const struct fun * f,double lo,double hi,int type,int gen)5910928368fSKristof Beyls bench (const struct fun *f, double lo, double hi, int type, int gen)
5920928368fSKristof Beyls {
5930928368fSKristof Beyls if (f->prec == 'd' && gen == 'r')
5940928368fSKristof Beyls gen_rand (lo, hi);
5950928368fSKristof Beyls else if (f->prec == 'd' && gen == 'l')
5960928368fSKristof Beyls gen_linear (lo, hi);
5970928368fSKristof Beyls else if (f->prec == 'd' && gen == 't')
5980928368fSKristof Beyls gen_trace (0);
5990928368fSKristof Beyls else if (f->prec == 'f' && gen == 'r')
6000928368fSKristof Beyls genf_rand (lo, hi);
6010928368fSKristof Beyls else if (f->prec == 'f' && gen == 'l')
6020928368fSKristof Beyls genf_linear (lo, hi);
6030928368fSKristof Beyls else if (f->prec == 'f' && gen == 't')
6040928368fSKristof Beyls genf_trace (0);
6050928368fSKristof Beyls
6060928368fSKristof Beyls if (gen == 't')
6070928368fSKristof Beyls hi = trace_size / N;
6080928368fSKristof Beyls
6090928368fSKristof Beyls if (type == 'b' || type == 't')
6100928368fSKristof Beyls bench1 (f, 't', lo, hi);
6110928368fSKristof Beyls
6120928368fSKristof Beyls if (type == 'b' || type == 'l')
6130928368fSKristof Beyls bench1 (f, 'l', lo, hi);
6140928368fSKristof Beyls
6150928368fSKristof Beyls for (int i = N; i < trace_size; i += N)
6160928368fSKristof Beyls {
6170928368fSKristof Beyls if (f->prec == 'd')
6180928368fSKristof Beyls gen_trace (i);
6190928368fSKristof Beyls else
6200928368fSKristof Beyls genf_trace (i);
6210928368fSKristof Beyls
6220928368fSKristof Beyls lo = i / N;
6230928368fSKristof Beyls if (type == 'b' || type == 't')
6240928368fSKristof Beyls bench1 (f, 't', lo, hi);
6250928368fSKristof Beyls
6260928368fSKristof Beyls if (type == 'b' || type == 'l')
6270928368fSKristof Beyls bench1 (f, 'l', lo, hi);
6280928368fSKristof Beyls }
6290928368fSKristof Beyls }
6300928368fSKristof Beyls
6310928368fSKristof Beyls static void
readtrace(const char * name)6320928368fSKristof Beyls readtrace (const char *name)
6330928368fSKristof Beyls {
6340928368fSKristof Beyls int n = 0;
6350928368fSKristof Beyls FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
6360928368fSKristof Beyls if (!f)
6370928368fSKristof Beyls {
638*0570de73SKazuaki Ishizaki printf ("opening \"%s\" failed: %m\n", name);
6390928368fSKristof Beyls exit (1);
6400928368fSKristof Beyls }
6410928368fSKristof Beyls for (;;)
6420928368fSKristof Beyls {
6430928368fSKristof Beyls if (n >= trace_size)
6440928368fSKristof Beyls {
6450928368fSKristof Beyls trace_size += N;
6460928368fSKristof Beyls Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
6470928368fSKristof Beyls if (Trace == NULL)
6480928368fSKristof Beyls {
6490928368fSKristof Beyls printf ("out of memory\n");
6500928368fSKristof Beyls exit (1);
6510928368fSKristof Beyls }
6520928368fSKristof Beyls }
6530928368fSKristof Beyls if (fscanf (f, "%lf", Trace + n) != 1)
6540928368fSKristof Beyls break;
6550928368fSKristof Beyls n++;
6560928368fSKristof Beyls }
6570928368fSKristof Beyls if (ferror (f) || n == 0)
6580928368fSKristof Beyls {
6590928368fSKristof Beyls printf ("reading \"%s\" failed: %m\n", name);
6600928368fSKristof Beyls exit (1);
6610928368fSKristof Beyls }
6620928368fSKristof Beyls fclose (f);
6630928368fSKristof Beyls if (n % N == 0)
6640928368fSKristof Beyls trace_size = n;
6650928368fSKristof Beyls for (int i = 0; n < trace_size; n++, i++)
6660928368fSKristof Beyls Trace[n] = Trace[i];
6670928368fSKristof Beyls }
6680928368fSKristof Beyls
6690928368fSKristof Beyls static void
usage(void)6700928368fSKristof Beyls usage (void)
6710928368fSKristof Beyls {
6720928368fSKristof Beyls printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
6730928368fSKristof Beyls "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
6740928368fSKristof Beyls "[func2 ..]\n");
6750928368fSKristof Beyls printf ("func:\n");
6760928368fSKristof Beyls printf ("%7s [run all benchmarks]\n", "all");
6770928368fSKristof Beyls for (const struct fun *f = funtab; f->name; f++)
6780928368fSKristof Beyls printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
6790928368fSKristof Beyls exit (1);
6800928368fSKristof Beyls }
6810928368fSKristof Beyls
6820928368fSKristof Beyls int
main(int argc,char * argv[])6830928368fSKristof Beyls main (int argc, char *argv[])
6840928368fSKristof Beyls {
6850928368fSKristof Beyls int usergen = 0, gen = 'r', type = 'b', all = 0;
6860928368fSKristof Beyls double lo = 0, hi = 0;
6870928368fSKristof Beyls const char *tracefile = "-";
6880928368fSKristof Beyls
6890928368fSKristof Beyls argv++;
6900928368fSKristof Beyls argc--;
6910928368fSKristof Beyls for (;;)
6920928368fSKristof Beyls {
6930928368fSKristof Beyls if (argc <= 0)
6940928368fSKristof Beyls usage ();
6950928368fSKristof Beyls if (argv[0][0] != '-')
6960928368fSKristof Beyls break;
6970928368fSKristof Beyls else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
6980928368fSKristof Beyls {
6990928368fSKristof Beyls usergen = 1;
7000928368fSKristof Beyls lo = strtod (argv[1], 0);
7010928368fSKristof Beyls hi = strtod (argv[2], 0);
7020928368fSKristof Beyls argv += 3;
7030928368fSKristof Beyls argc -= 3;
7040928368fSKristof Beyls }
7050928368fSKristof Beyls else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
7060928368fSKristof Beyls {
7070928368fSKristof Beyls measurecount = strtol (argv[1], 0, 0);
7080928368fSKristof Beyls argv += 2;
7090928368fSKristof Beyls argc -= 2;
7100928368fSKristof Beyls }
7110928368fSKristof Beyls else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
7120928368fSKristof Beyls {
7130928368fSKristof Beyls itercount = strtol (argv[1], 0, 0);
7140928368fSKristof Beyls argv += 2;
7150928368fSKristof Beyls argc -= 2;
7160928368fSKristof Beyls }
7170928368fSKristof Beyls else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
7180928368fSKristof Beyls {
7190928368fSKristof Beyls gen = argv[1][0];
7200928368fSKristof Beyls if (strchr ("rlt", gen) == 0)
7210928368fSKristof Beyls usage ();
7220928368fSKristof Beyls argv += 2;
7230928368fSKristof Beyls argc -= 2;
7240928368fSKristof Beyls }
7250928368fSKristof Beyls else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
7260928368fSKristof Beyls {
7270928368fSKristof Beyls gen = 't'; /* -f implies -g trace. */
7280928368fSKristof Beyls tracefile = argv[1];
7290928368fSKristof Beyls argv += 2;
7300928368fSKristof Beyls argc -= 2;
7310928368fSKristof Beyls }
7320928368fSKristof Beyls else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
7330928368fSKristof Beyls {
7340928368fSKristof Beyls type = argv[1][0];
7350928368fSKristof Beyls if (strchr ("ltb", type) == 0)
7360928368fSKristof Beyls usage ();
7370928368fSKristof Beyls argv += 2;
7380928368fSKristof Beyls argc -= 2;
7390928368fSKristof Beyls }
7400928368fSKristof Beyls else
7410928368fSKristof Beyls usage ();
7420928368fSKristof Beyls }
7430928368fSKristof Beyls if (gen == 't')
7440928368fSKristof Beyls {
7450928368fSKristof Beyls readtrace (tracefile);
7460928368fSKristof Beyls lo = hi = 0;
7470928368fSKristof Beyls usergen = 1;
7480928368fSKristof Beyls }
7490928368fSKristof Beyls while (argc > 0)
7500928368fSKristof Beyls {
7510928368fSKristof Beyls int found = 0;
7520928368fSKristof Beyls all = strcmp (argv[0], "all") == 0;
7530928368fSKristof Beyls for (const struct fun *f = funtab; f->name; f++)
7540928368fSKristof Beyls if (all || strcmp (argv[0], f->name) == 0)
7550928368fSKristof Beyls {
7560928368fSKristof Beyls found = 1;
7570928368fSKristof Beyls if (!usergen)
7580928368fSKristof Beyls {
7590928368fSKristof Beyls lo = f->lo;
7600928368fSKristof Beyls hi = f->hi;
7610928368fSKristof Beyls }
7620928368fSKristof Beyls bench (f, lo, hi, type, gen);
7630928368fSKristof Beyls if (usergen && !all)
7640928368fSKristof Beyls break;
7650928368fSKristof Beyls }
7660928368fSKristof Beyls if (!found)
7670928368fSKristof Beyls printf ("unknown function: %s\n", argv[0]);
7680928368fSKristof Beyls argv++;
7690928368fSKristof Beyls argc--;
7700928368fSKristof Beyls }
7710928368fSKristof Beyls return 0;
7720928368fSKristof Beyls }
773