xref: /llvm-project/third-party/benchmark/src/statistics.cc (revision a5b797172cc902db166e9a695716fb81405f86e4)
15dda2efdSMircea Trofin // Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
25dda2efdSMircea Trofin // Copyright 2017 Roman Lebedev. All rights reserved.
35dda2efdSMircea Trofin //
45dda2efdSMircea Trofin // Licensed under the Apache License, Version 2.0 (the "License");
55dda2efdSMircea Trofin // you may not use this file except in compliance with the License.
65dda2efdSMircea Trofin // You may obtain a copy of the License at
75dda2efdSMircea Trofin //
85dda2efdSMircea Trofin //     http://www.apache.org/licenses/LICENSE-2.0
95dda2efdSMircea Trofin //
105dda2efdSMircea Trofin // Unless required by applicable law or agreed to in writing, software
115dda2efdSMircea Trofin // distributed under the License is distributed on an "AS IS" BASIS,
125dda2efdSMircea Trofin // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
135dda2efdSMircea Trofin // See the License for the specific language governing permissions and
145dda2efdSMircea Trofin // limitations under the License.
155dda2efdSMircea Trofin 
16a290770fSMircea Trofin #include "statistics.h"
175dda2efdSMircea Trofin 
185dda2efdSMircea Trofin #include <algorithm>
195dda2efdSMircea Trofin #include <cmath>
205dda2efdSMircea Trofin #include <numeric>
215dda2efdSMircea Trofin #include <string>
225dda2efdSMircea Trofin #include <vector>
23a290770fSMircea Trofin 
24a290770fSMircea Trofin #include "benchmark/benchmark.h"
255dda2efdSMircea Trofin #include "check.h"
265dda2efdSMircea Trofin 
275dda2efdSMircea Trofin namespace benchmark {
285dda2efdSMircea Trofin 
__anon5c3fc5f90102(const std::vector<double>& v) 295dda2efdSMircea Trofin auto StatisticsSum = [](const std::vector<double>& v) {
305dda2efdSMircea Trofin   return std::accumulate(v.begin(), v.end(), 0.0);
315dda2efdSMircea Trofin };
325dda2efdSMircea Trofin 
StatisticsMean(const std::vector<double> & v)335dda2efdSMircea Trofin double StatisticsMean(const std::vector<double>& v) {
345dda2efdSMircea Trofin   if (v.empty()) return 0.0;
35*a5b79717SMircea Trofin   return StatisticsSum(v) * (1.0 / static_cast<double>(v.size()));
365dda2efdSMircea Trofin }
375dda2efdSMircea Trofin 
StatisticsMedian(const std::vector<double> & v)385dda2efdSMircea Trofin double StatisticsMedian(const std::vector<double>& v) {
395dda2efdSMircea Trofin   if (v.size() < 3) return StatisticsMean(v);
405dda2efdSMircea Trofin   std::vector<double> copy(v);
415dda2efdSMircea Trofin 
425dda2efdSMircea Trofin   auto center = copy.begin() + v.size() / 2;
435dda2efdSMircea Trofin   std::nth_element(copy.begin(), center, copy.end());
445dda2efdSMircea Trofin 
45*a5b79717SMircea Trofin   // Did we have an odd number of samples?  If yes, then center is the median.
46*a5b79717SMircea Trofin   // If not, then we are looking for the average between center and the value
47*a5b79717SMircea Trofin   // before.  Instead of resorting, we just look for the max value before it,
48*a5b79717SMircea Trofin   // which is not necessarily the element immediately preceding `center` Since
49*a5b79717SMircea Trofin   // `copy` is only partially sorted by `nth_element`.
505dda2efdSMircea Trofin   if (v.size() % 2 == 1) return *center;
51*a5b79717SMircea Trofin   auto center2 = std::max_element(copy.begin(), center);
525dda2efdSMircea Trofin   return (*center + *center2) / 2.0;
535dda2efdSMircea Trofin }
545dda2efdSMircea Trofin 
555dda2efdSMircea Trofin // Return the sum of the squares of this sample set
__anon5c3fc5f90202(const std::vector<double>& v) 565dda2efdSMircea Trofin auto SumSquares = [](const std::vector<double>& v) {
575dda2efdSMircea Trofin   return std::inner_product(v.begin(), v.end(), v.begin(), 0.0);
585dda2efdSMircea Trofin };
595dda2efdSMircea Trofin 
__anon5c3fc5f90302(const double dat) 605dda2efdSMircea Trofin auto Sqr = [](const double dat) { return dat * dat; };
__anon5c3fc5f90402(const double dat) 615dda2efdSMircea Trofin auto Sqrt = [](const double dat) {
625dda2efdSMircea Trofin   // Avoid NaN due to imprecision in the calculations
635dda2efdSMircea Trofin   if (dat < 0.0) return 0.0;
645dda2efdSMircea Trofin   return std::sqrt(dat);
655dda2efdSMircea Trofin };
665dda2efdSMircea Trofin 
StatisticsStdDev(const std::vector<double> & v)675dda2efdSMircea Trofin double StatisticsStdDev(const std::vector<double>& v) {
685dda2efdSMircea Trofin   const auto mean = StatisticsMean(v);
695dda2efdSMircea Trofin   if (v.empty()) return mean;
705dda2efdSMircea Trofin 
715dda2efdSMircea Trofin   // Sample standard deviation is undefined for n = 1
725dda2efdSMircea Trofin   if (v.size() == 1) return 0.0;
735dda2efdSMircea Trofin 
74*a5b79717SMircea Trofin   const double avg_squares =
75*a5b79717SMircea Trofin       SumSquares(v) * (1.0 / static_cast<double>(v.size()));
76*a5b79717SMircea Trofin   return Sqrt(static_cast<double>(v.size()) /
77*a5b79717SMircea Trofin               (static_cast<double>(v.size()) - 1.0) *
78*a5b79717SMircea Trofin               (avg_squares - Sqr(mean)));
795dda2efdSMircea Trofin }
805dda2efdSMircea Trofin 
StatisticsCV(const std::vector<double> & v)81a290770fSMircea Trofin double StatisticsCV(const std::vector<double>& v) {
82a290770fSMircea Trofin   if (v.size() < 2) return 0.0;
83a290770fSMircea Trofin 
84a290770fSMircea Trofin   const auto stddev = StatisticsStdDev(v);
85a290770fSMircea Trofin   const auto mean = StatisticsMean(v);
86a290770fSMircea Trofin 
87*a5b79717SMircea Trofin   if (std::fpclassify(mean) == FP_ZERO) return 0.0;
88*a5b79717SMircea Trofin 
89a290770fSMircea Trofin   return stddev / mean;
90a290770fSMircea Trofin }
91a290770fSMircea Trofin 
ComputeStats(const std::vector<BenchmarkReporter::Run> & reports)925dda2efdSMircea Trofin std::vector<BenchmarkReporter::Run> ComputeStats(
935dda2efdSMircea Trofin     const std::vector<BenchmarkReporter::Run>& reports) {
945dda2efdSMircea Trofin   typedef BenchmarkReporter::Run Run;
955dda2efdSMircea Trofin   std::vector<Run> results;
965dda2efdSMircea Trofin 
97*a5b79717SMircea Trofin   auto error_count = std::count_if(reports.begin(), reports.end(),
98*a5b79717SMircea Trofin                                    [](Run const& run) { return run.skipped; });
995dda2efdSMircea Trofin 
1005dda2efdSMircea Trofin   if (reports.size() - error_count < 2) {
1015dda2efdSMircea Trofin     // We don't report aggregated data if there was a single run.
1025dda2efdSMircea Trofin     return results;
1035dda2efdSMircea Trofin   }
1045dda2efdSMircea Trofin 
1055dda2efdSMircea Trofin   // Accumulators.
1065dda2efdSMircea Trofin   std::vector<double> real_accumulated_time_stat;
1075dda2efdSMircea Trofin   std::vector<double> cpu_accumulated_time_stat;
1085dda2efdSMircea Trofin 
1095dda2efdSMircea Trofin   real_accumulated_time_stat.reserve(reports.size());
1105dda2efdSMircea Trofin   cpu_accumulated_time_stat.reserve(reports.size());
1115dda2efdSMircea Trofin 
1125dda2efdSMircea Trofin   // All repetitions should be run with the same number of iterations so we
1135dda2efdSMircea Trofin   // can take this information from the first benchmark.
1145dda2efdSMircea Trofin   const IterationCount run_iterations = reports.front().iterations;
1155dda2efdSMircea Trofin   // create stats for user counters
1165dda2efdSMircea Trofin   struct CounterStat {
1175dda2efdSMircea Trofin     Counter c;
1185dda2efdSMircea Trofin     std::vector<double> s;
1195dda2efdSMircea Trofin   };
1205dda2efdSMircea Trofin   std::map<std::string, CounterStat> counter_stats;
1215dda2efdSMircea Trofin   for (Run const& r : reports) {
1225dda2efdSMircea Trofin     for (auto const& cnt : r.counters) {
1235dda2efdSMircea Trofin       auto it = counter_stats.find(cnt.first);
1245dda2efdSMircea Trofin       if (it == counter_stats.end()) {
125*a5b79717SMircea Trofin         it = counter_stats
126*a5b79717SMircea Trofin                  .emplace(cnt.first,
127*a5b79717SMircea Trofin                           CounterStat{cnt.second, std::vector<double>{}})
128*a5b79717SMircea Trofin                  .first;
1295dda2efdSMircea Trofin         it->second.s.reserve(reports.size());
1305dda2efdSMircea Trofin       } else {
131*a5b79717SMircea Trofin         BM_CHECK_EQ(it->second.c.flags, cnt.second.flags);
1325dda2efdSMircea Trofin       }
1335dda2efdSMircea Trofin     }
1345dda2efdSMircea Trofin   }
1355dda2efdSMircea Trofin 
1365dda2efdSMircea Trofin   // Populate the accumulators.
1375dda2efdSMircea Trofin   for (Run const& run : reports) {
138a290770fSMircea Trofin     BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
139a290770fSMircea Trofin     BM_CHECK_EQ(run_iterations, run.iterations);
140*a5b79717SMircea Trofin     if (run.skipped) continue;
1415dda2efdSMircea Trofin     real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
1425dda2efdSMircea Trofin     cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
1435dda2efdSMircea Trofin     // user counters
1445dda2efdSMircea Trofin     for (auto const& cnt : run.counters) {
1455dda2efdSMircea Trofin       auto it = counter_stats.find(cnt.first);
146a290770fSMircea Trofin       BM_CHECK_NE(it, counter_stats.end());
1475dda2efdSMircea Trofin       it->second.s.emplace_back(cnt.second);
1485dda2efdSMircea Trofin     }
1495dda2efdSMircea Trofin   }
1505dda2efdSMircea Trofin 
1515dda2efdSMircea Trofin   // Only add label if it is same for all runs
1525dda2efdSMircea Trofin   std::string report_label = reports[0].report_label;
1535dda2efdSMircea Trofin   for (std::size_t i = 1; i < reports.size(); i++) {
1545dda2efdSMircea Trofin     if (reports[i].report_label != report_label) {
1555dda2efdSMircea Trofin       report_label = "";
1565dda2efdSMircea Trofin       break;
1575dda2efdSMircea Trofin     }
1585dda2efdSMircea Trofin   }
1595dda2efdSMircea Trofin 
1605dda2efdSMircea Trofin   const double iteration_rescale_factor =
1615dda2efdSMircea Trofin       double(reports.size()) / double(run_iterations);
1625dda2efdSMircea Trofin 
1635dda2efdSMircea Trofin   for (const auto& Stat : *reports[0].statistics) {
1645dda2efdSMircea Trofin     // Get the data from the accumulator to BenchmarkReporter::Run's.
1655dda2efdSMircea Trofin     Run data;
1665dda2efdSMircea Trofin     data.run_name = reports[0].run_name;
1675dda2efdSMircea Trofin     data.family_index = reports[0].family_index;
1685dda2efdSMircea Trofin     data.per_family_instance_index = reports[0].per_family_instance_index;
1695dda2efdSMircea Trofin     data.run_type = BenchmarkReporter::Run::RT_Aggregate;
1705dda2efdSMircea Trofin     data.threads = reports[0].threads;
1715dda2efdSMircea Trofin     data.repetitions = reports[0].repetitions;
1725dda2efdSMircea Trofin     data.repetition_index = Run::no_repetition_index;
1735dda2efdSMircea Trofin     data.aggregate_name = Stat.name_;
174a290770fSMircea Trofin     data.aggregate_unit = Stat.unit_;
1755dda2efdSMircea Trofin     data.report_label = report_label;
1765dda2efdSMircea Trofin 
1775dda2efdSMircea Trofin     // It is incorrect to say that an aggregate is computed over
1785dda2efdSMircea Trofin     // run's iterations, because those iterations already got averaged.
1795dda2efdSMircea Trofin     // Similarly, if there are N repetitions with 1 iterations each,
1805dda2efdSMircea Trofin     // an aggregate will be computed over N measurements, not 1.
1815dda2efdSMircea Trofin     // Thus it is best to simply use the count of separate reports.
1825dda2efdSMircea Trofin     data.iterations = reports.size();
1835dda2efdSMircea Trofin 
1845dda2efdSMircea Trofin     data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
1855dda2efdSMircea Trofin     data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
1865dda2efdSMircea Trofin 
187a290770fSMircea Trofin     if (data.aggregate_unit == StatisticUnit::kTime) {
1885dda2efdSMircea Trofin       // We will divide these times by data.iterations when reporting, but the
189a290770fSMircea Trofin       // data.iterations is not necessarily the scale of these measurements,
190a290770fSMircea Trofin       // because in each repetition, these timers are sum over all the iters.
1915dda2efdSMircea Trofin       // And if we want to say that the stats are over N repetitions and not
1925dda2efdSMircea Trofin       // M iterations, we need to multiply these by (N/M).
1935dda2efdSMircea Trofin       data.real_accumulated_time *= iteration_rescale_factor;
1945dda2efdSMircea Trofin       data.cpu_accumulated_time *= iteration_rescale_factor;
195a290770fSMircea Trofin     }
1965dda2efdSMircea Trofin 
1975dda2efdSMircea Trofin     data.time_unit = reports[0].time_unit;
1985dda2efdSMircea Trofin 
1995dda2efdSMircea Trofin     // user counters
2005dda2efdSMircea Trofin     for (auto const& kv : counter_stats) {
2015dda2efdSMircea Trofin       // Do *NOT* rescale the custom counters. They are already properly scaled.
2025dda2efdSMircea Trofin       const auto uc_stat = Stat.compute_(kv.second.s);
2035dda2efdSMircea Trofin       auto c = Counter(uc_stat, counter_stats[kv.first].c.flags,
2045dda2efdSMircea Trofin                        counter_stats[kv.first].c.oneK);
2055dda2efdSMircea Trofin       data.counters[kv.first] = c;
2065dda2efdSMircea Trofin     }
2075dda2efdSMircea Trofin 
2085dda2efdSMircea Trofin     results.push_back(data);
2095dda2efdSMircea Trofin   }
2105dda2efdSMircea Trofin 
2115dda2efdSMircea Trofin   return results;
2125dda2efdSMircea Trofin }
2135dda2efdSMircea Trofin 
2145dda2efdSMircea Trofin }  // end namespace benchmark
215