1 //===-- Performance test for nearest integer functions --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "src/__support/FPUtil/FPBits.h" 10 #include "src/math/ceilf.h" 11 #include "src/math/ceilf16.h" 12 #include "src/math/floorf.h" 13 #include "src/math/floorf16.h" 14 #include "src/math/rintf.h" 15 #include "src/math/rintf16.h" 16 #include "src/math/roundevenf.h" 17 #include "src/math/roundevenf16.h" 18 #include "src/math/roundf.h" 19 #include "src/math/roundf16.h" 20 #include "src/math/truncf.h" 21 #include "src/math/truncf16.h" 22 #include "test/UnitTest/RoundingModeUtils.h" 23 #include "test/src/math/performance_testing/Timer.h" 24 25 #include <fstream> 26 #include <math.h> 27 28 using LIBC_NAMESPACE::fputil::testing::ForceRoundingMode; 29 using LIBC_NAMESPACE::fputil::testing::RoundingMode; 30 31 namespace LIBC_NAMESPACE::testing { 32 33 template <typename T> class NearestIntegerPerf { 34 using FPBits = fputil::FPBits<T>; 35 using StorageType = typename FPBits::StorageType; 36 37 public: 38 typedef T Func(T); 39 40 static void run_perf_in_range(Func my_func, Func other_func, 41 StorageType starting_bit, 42 StorageType ending_bit, StorageType step, 43 size_t rounds, std::ofstream &log) { 44 auto runner = [=](Func func) { 45 [[maybe_unused]] volatile T result; 46 for (size_t i = 0; i < rounds; i++) { 47 for (StorageType bits = starting_bit; bits <= ending_bit; 48 bits += step) { 49 T x = FPBits(bits).get_val(); 50 result = func(x); 51 } 52 } 53 }; 54 55 Timer timer; 56 timer.start(); 57 runner(my_func); 58 timer.stop(); 59 60 size_t number_of_runs = (ending_bit - starting_bit) / step + 1; 61 double my_average = 62 static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds; 63 log << "-- My function --\n"; 64 log << " Total time : " << timer.nanoseconds() << " ns \n"; 65 log << " Average runtime : " << my_average << " ns/op \n"; 66 log << " Ops per second : " 67 << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n"; 68 69 timer.start(); 70 runner(other_func); 71 timer.stop(); 72 73 double other_average = 74 static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds; 75 log << "-- Other function --\n"; 76 log << " Total time : " << timer.nanoseconds() << " ns \n"; 77 log << " Average runtime : " << other_average << " ns/op \n"; 78 log << " Ops per second : " 79 << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n"; 80 81 log << "-- Average runtime ratio --\n"; 82 log << " Mine / Other's : " << my_average / other_average << " \n"; 83 } 84 85 static void run_perf(Func my_func, Func other_func, size_t rounds, 86 const char *log_file) { 87 std::ofstream log(log_file); 88 log << "Performance tests with inputs in normal integral range:\n"; 89 run_perf_in_range( 90 my_func, other_func, 91 /*starting_bit=*/StorageType((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN), 92 /*ending_bit=*/ 93 StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN - 1) 94 << FPBits::SIG_LEN), 95 /*step=*/StorageType(1 << FPBits::SIG_LEN), 96 rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log); 97 log << "\n Performance tests with inputs in low integral range:\n"; 98 run_perf_in_range( 99 my_func, other_func, 100 /*starting_bit=*/StorageType(1 << FPBits::SIG_LEN), 101 /*ending_bit=*/StorageType((FPBits::EXP_BIAS - 1) << FPBits::SIG_LEN), 102 /*step_bit=*/StorageType(1 << FPBits::SIG_LEN), 103 rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log); 104 log << "\n Performance tests with inputs in high integral range:\n"; 105 run_perf_in_range( 106 my_func, other_func, 107 /*starting_bit=*/ 108 StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN) 109 << FPBits::SIG_LEN), 110 /*ending_bit=*/ 111 StorageType(FPBits::MAX_BIASED_EXPONENT << FPBits::SIG_LEN), 112 /*step=*/StorageType(1 << FPBits::SIG_LEN), 113 rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log); 114 log << "\n Performance tests with inputs in normal fractional range:\n"; 115 run_perf_in_range( 116 my_func, other_func, 117 /*starting_bit=*/ 118 StorageType(((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN) + 1), 119 /*ending_bit=*/ 120 StorageType(((FPBits::EXP_BIAS + 2) << FPBits::SIG_LEN) - 1), 121 /*step=*/StorageType(1), rounds * 2, log); 122 log << "\n Performance tests with inputs in subnormal fractional range:\n"; 123 run_perf_in_range(my_func, other_func, /*starting_bit=*/StorageType(1), 124 /*ending_bit=*/StorageType(FPBits::SIG_MASK), 125 /*step=*/StorageType(1), rounds, log); 126 } 127 }; 128 129 } // namespace LIBC_NAMESPACE::testing 130 131 #define NEAREST_INTEGER_PERF(T, my_func, other_func, rounds, filename) \ 132 { \ 133 LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf( \ 134 &my_func, &other_func, rounds, filename); \ 135 LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf( \ 136 &my_func, &other_func, rounds, filename); \ 137 } 138 139 static constexpr size_t FLOAT16_ROUNDS = 20'000; 140 static constexpr size_t FLOAT_ROUNDS = 40; 141 142 // LLVM libc might be the only libc implementation with support for float16 math 143 // functions currently. We can't compare our float16 functions against the 144 // system libc, so we compare them against this placeholder function. 145 float16 placeholderf16(float16 x) { return x; } 146 147 // The system libc might not provide the roundeven* C23 math functions either. 148 float placeholderf(float x) { return x; } 149 150 int main() { 151 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::ceilf16, ::placeholderf16, 152 FLOAT16_ROUNDS, "ceilf16_perf.log") 153 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::floorf16, ::placeholderf16, 154 FLOAT16_ROUNDS, "floorf16_perf.log") 155 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundf16, ::placeholderf16, 156 FLOAT16_ROUNDS, "roundf16_perf.log") 157 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundevenf16, ::placeholderf16, 158 FLOAT16_ROUNDS, "roundevenf16_perf.log") 159 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::truncf16, ::placeholderf16, 160 FLOAT16_ROUNDS, "truncf16_perf.log") 161 162 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf, FLOAT_ROUNDS, 163 "ceilf_perf.log") 164 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::floorf, ::floorf, FLOAT_ROUNDS, 165 "floorf_perf.log") 166 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundf, ::roundf, FLOAT_ROUNDS, 167 "roundf_perf.log") 168 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundevenf, ::placeholderf, 169 FLOAT_ROUNDS, "roundevenf_perf.log") 170 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::truncf, ::truncf, FLOAT_ROUNDS, 171 "truncf_perf.log") 172 173 if (ForceRoundingMode r(RoundingMode::Upward); r.success) { 174 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16, 175 FLOAT16_ROUNDS, "rintf16_upward_perf.log") 176 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS, 177 "rintf_upward_perf.log") 178 } 179 if (ForceRoundingMode r(RoundingMode::Downward); r.success) { 180 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16, 181 FLOAT16_ROUNDS, "rintf16_downward_perf.log") 182 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS, 183 "rintf_downward_perf.log") 184 } 185 if (ForceRoundingMode r(RoundingMode::TowardZero); r.success) { 186 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16, 187 FLOAT16_ROUNDS, "rintf16_towardzero_perf.log") 188 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS, 189 "rintf_towardzero_perf.log") 190 } 191 if (ForceRoundingMode r(RoundingMode::Nearest); r.success) { 192 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16, 193 FLOAT16_ROUNDS, "rintf16_nearest_perf.log") 194 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS, 195 "rintf_nearest_perf.log") 196 } 197 198 return 0; 199 } 200