xref: /llvm-project/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp (revision 4531f82c1ad905614c1df9359a77d48e6397fd97)
1 //===-- Performance test for nearest integer functions --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "src/__support/FPUtil/FPBits.h"
10 #include "src/math/ceilf.h"
11 #include "src/math/ceilf16.h"
12 #include "src/math/floorf.h"
13 #include "src/math/floorf16.h"
14 #include "src/math/rintf.h"
15 #include "src/math/rintf16.h"
16 #include "src/math/roundevenf.h"
17 #include "src/math/roundevenf16.h"
18 #include "src/math/roundf.h"
19 #include "src/math/roundf16.h"
20 #include "src/math/truncf.h"
21 #include "src/math/truncf16.h"
22 #include "test/UnitTest/RoundingModeUtils.h"
23 #include "test/src/math/performance_testing/Timer.h"
24 
25 #include <fstream>
26 #include <math.h>
27 
28 using LIBC_NAMESPACE::fputil::testing::ForceRoundingMode;
29 using LIBC_NAMESPACE::fputil::testing::RoundingMode;
30 
31 namespace LIBC_NAMESPACE::testing {
32 
33 template <typename T> class NearestIntegerPerf {
34   using FPBits = fputil::FPBits<T>;
35   using StorageType = typename FPBits::StorageType;
36 
37 public:
38   typedef T Func(T);
39 
40   static void run_perf_in_range(Func my_func, Func other_func,
41                                 StorageType starting_bit,
42                                 StorageType ending_bit, StorageType step,
43                                 size_t rounds, std::ofstream &log) {
44     auto runner = [=](Func func) {
45       [[maybe_unused]] volatile T result;
46       for (size_t i = 0; i < rounds; i++) {
47         for (StorageType bits = starting_bit; bits <= ending_bit;
48              bits += step) {
49           T x = FPBits(bits).get_val();
50           result = func(x);
51         }
52       }
53     };
54 
55     Timer timer;
56     timer.start();
57     runner(my_func);
58     timer.stop();
59 
60     size_t number_of_runs = (ending_bit - starting_bit) / step + 1;
61     double my_average =
62         static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
63     log << "-- My function --\n";
64     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
65     log << "     Average runtime : " << my_average << " ns/op \n";
66     log << "     Ops per second  : "
67         << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
68 
69     timer.start();
70     runner(other_func);
71     timer.stop();
72 
73     double other_average =
74         static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
75     log << "-- Other function --\n";
76     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
77     log << "     Average runtime : " << other_average << " ns/op \n";
78     log << "     Ops per second  : "
79         << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
80 
81     log << "-- Average runtime ratio --\n";
82     log << "     Mine / Other's  : " << my_average / other_average << " \n";
83   }
84 
85   static void run_perf(Func my_func, Func other_func, size_t rounds,
86                        const char *log_file) {
87     std::ofstream log(log_file);
88     log << "Performance tests with inputs in normal integral range:\n";
89     run_perf_in_range(
90         my_func, other_func,
91         /*starting_bit=*/StorageType((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN),
92         /*ending_bit=*/
93         StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN - 1)
94                     << FPBits::SIG_LEN),
95         /*step=*/StorageType(1 << FPBits::SIG_LEN),
96         rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
97     log << "\n Performance tests with inputs in low integral range:\n";
98     run_perf_in_range(
99         my_func, other_func,
100         /*starting_bit=*/StorageType(1 << FPBits::SIG_LEN),
101         /*ending_bit=*/StorageType((FPBits::EXP_BIAS - 1) << FPBits::SIG_LEN),
102         /*step_bit=*/StorageType(1 << FPBits::SIG_LEN),
103         rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
104     log << "\n Performance tests with inputs in high integral range:\n";
105     run_perf_in_range(
106         my_func, other_func,
107         /*starting_bit=*/
108         StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN)
109                     << FPBits::SIG_LEN),
110         /*ending_bit=*/
111         StorageType(FPBits::MAX_BIASED_EXPONENT << FPBits::SIG_LEN),
112         /*step=*/StorageType(1 << FPBits::SIG_LEN),
113         rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
114     log << "\n Performance tests with inputs in normal fractional range:\n";
115     run_perf_in_range(
116         my_func, other_func,
117         /*starting_bit=*/
118         StorageType(((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN) + 1),
119         /*ending_bit=*/
120         StorageType(((FPBits::EXP_BIAS + 2) << FPBits::SIG_LEN) - 1),
121         /*step=*/StorageType(1), rounds * 2, log);
122     log << "\n Performance tests with inputs in subnormal fractional range:\n";
123     run_perf_in_range(my_func, other_func, /*starting_bit=*/StorageType(1),
124                       /*ending_bit=*/StorageType(FPBits::SIG_MASK),
125                       /*step=*/StorageType(1), rounds, log);
126   }
127 };
128 
129 } // namespace LIBC_NAMESPACE::testing
130 
131 #define NEAREST_INTEGER_PERF(T, my_func, other_func, rounds, filename)         \
132   {                                                                            \
133     LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf(                  \
134         &my_func, &other_func, rounds, filename);                              \
135     LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf(                  \
136         &my_func, &other_func, rounds, filename);                              \
137   }
138 
139 static constexpr size_t FLOAT16_ROUNDS = 20'000;
140 static constexpr size_t FLOAT_ROUNDS = 40;
141 
142 // LLVM libc might be the only libc implementation with support for float16 math
143 // functions currently. We can't compare our float16 functions against the
144 // system libc, so we compare them against this placeholder function.
145 float16 placeholderf16(float16 x) { return x; }
146 
147 // The system libc might not provide the roundeven* C23 math functions either.
148 float placeholderf(float x) { return x; }
149 
150 int main() {
151   NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::ceilf16, ::placeholderf16,
152                        FLOAT16_ROUNDS, "ceilf16_perf.log")
153   NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::floorf16, ::placeholderf16,
154                        FLOAT16_ROUNDS, "floorf16_perf.log")
155   NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundf16, ::placeholderf16,
156                        FLOAT16_ROUNDS, "roundf16_perf.log")
157   NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundevenf16, ::placeholderf16,
158                        FLOAT16_ROUNDS, "roundevenf16_perf.log")
159   NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::truncf16, ::placeholderf16,
160                        FLOAT16_ROUNDS, "truncf16_perf.log")
161 
162   NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf, FLOAT_ROUNDS,
163                        "ceilf_perf.log")
164   NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::floorf, ::floorf, FLOAT_ROUNDS,
165                        "floorf_perf.log")
166   NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundf, ::roundf, FLOAT_ROUNDS,
167                        "roundf_perf.log")
168   NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundevenf, ::placeholderf,
169                        FLOAT_ROUNDS, "roundevenf_perf.log")
170   NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::truncf, ::truncf, FLOAT_ROUNDS,
171                        "truncf_perf.log")
172 
173   if (ForceRoundingMode r(RoundingMode::Upward); r.success) {
174     NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
175                          FLOAT16_ROUNDS, "rintf16_upward_perf.log")
176     NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
177                          "rintf_upward_perf.log")
178   }
179   if (ForceRoundingMode r(RoundingMode::Downward); r.success) {
180     NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
181                          FLOAT16_ROUNDS, "rintf16_downward_perf.log")
182     NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
183                          "rintf_downward_perf.log")
184   }
185   if (ForceRoundingMode r(RoundingMode::TowardZero); r.success) {
186     NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
187                          FLOAT16_ROUNDS, "rintf16_towardzero_perf.log")
188     NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
189                          "rintf_towardzero_perf.log")
190   }
191   if (ForceRoundingMode r(RoundingMode::Nearest); r.success) {
192     NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
193                          FLOAT16_ROUNDS, "rintf16_nearest_perf.log")
194     NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
195                          "rintf_nearest_perf.log")
196   }
197 
198   return 0;
199 }
200