xref: /llvm-project/third-party/benchmark/test/perf_counters_gtest.cc (revision a5b797172cc902db166e9a695716fb81405f86e4)
1*a5b79717SMircea Trofin #include <random>
25dda2efdSMircea Trofin #include <thread>
35dda2efdSMircea Trofin 
45dda2efdSMircea Trofin #include "../src/perf_counters.h"
5*a5b79717SMircea Trofin #include "gmock/gmock.h"
65dda2efdSMircea Trofin #include "gtest/gtest.h"
75dda2efdSMircea Trofin 
85dda2efdSMircea Trofin #ifndef GTEST_SKIP
95dda2efdSMircea Trofin struct MsgHandler {
operator =MsgHandler105dda2efdSMircea Trofin   void operator=(std::ostream&) {}
115dda2efdSMircea Trofin };
125dda2efdSMircea Trofin #define GTEST_SKIP() return MsgHandler() = std::cout
135dda2efdSMircea Trofin #endif
145dda2efdSMircea Trofin 
155dda2efdSMircea Trofin using benchmark::internal::PerfCounters;
16*a5b79717SMircea Trofin using benchmark::internal::PerfCountersMeasurement;
175dda2efdSMircea Trofin using benchmark::internal::PerfCounterValues;
18*a5b79717SMircea Trofin using ::testing::AllOf;
19*a5b79717SMircea Trofin using ::testing::Gt;
20*a5b79717SMircea Trofin using ::testing::Lt;
215dda2efdSMircea Trofin 
225dda2efdSMircea Trofin namespace {
235dda2efdSMircea Trofin const char kGenericPerfEvent1[] = "CYCLES";
24*a5b79717SMircea Trofin const char kGenericPerfEvent2[] = "INSTRUCTIONS";
255dda2efdSMircea Trofin 
TEST(PerfCountersTest,Init)265dda2efdSMircea Trofin TEST(PerfCountersTest, Init) {
275dda2efdSMircea Trofin   EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
285dda2efdSMircea Trofin }
295dda2efdSMircea Trofin 
TEST(PerfCountersTest,OneCounter)305dda2efdSMircea Trofin TEST(PerfCountersTest, OneCounter) {
315dda2efdSMircea Trofin   if (!PerfCounters::kSupported) {
325dda2efdSMircea Trofin     GTEST_SKIP() << "Performance counters not supported.\n";
335dda2efdSMircea Trofin   }
345dda2efdSMircea Trofin   EXPECT_TRUE(PerfCounters::Initialize());
35*a5b79717SMircea Trofin   EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1);
365dda2efdSMircea Trofin }
375dda2efdSMircea Trofin 
TEST(PerfCountersTest,NegativeTest)385dda2efdSMircea Trofin TEST(PerfCountersTest, NegativeTest) {
395dda2efdSMircea Trofin   if (!PerfCounters::kSupported) {
405dda2efdSMircea Trofin     EXPECT_FALSE(PerfCounters::Initialize());
415dda2efdSMircea Trofin     return;
425dda2efdSMircea Trofin   }
435dda2efdSMircea Trofin   EXPECT_TRUE(PerfCounters::Initialize());
44*a5b79717SMircea Trofin   // Safety checks
45*a5b79717SMircea Trofin   // Create() will always create a valid object, even if passed no or
46*a5b79717SMircea Trofin   // wrong arguments as the new behavior is to warn and drop unsupported
47*a5b79717SMircea Trofin   // counters
48*a5b79717SMircea Trofin   EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0);
49*a5b79717SMircea Trofin   EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0);
50*a5b79717SMircea Trofin   EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0);
515dda2efdSMircea Trofin   {
52*a5b79717SMircea Trofin     // Try sneaking in a bad egg to see if it is filtered out. The
53*a5b79717SMircea Trofin     // number of counters has to be two, not zero
54*a5b79717SMircea Trofin     auto counter =
55*a5b79717SMircea Trofin         PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
56*a5b79717SMircea Trofin     EXPECT_EQ(counter.num_counters(), 2);
57*a5b79717SMircea Trofin     EXPECT_EQ(counter.names(), std::vector<std::string>(
58*a5b79717SMircea Trofin                                    {kGenericPerfEvent2, kGenericPerfEvent1}));
595dda2efdSMircea Trofin   }
605dda2efdSMircea Trofin   {
61*a5b79717SMircea Trofin     // Try sneaking in an outrageous counter, like a fat finger mistake
62*a5b79717SMircea Trofin     auto counter = PerfCounters::Create(
63*a5b79717SMircea Trofin         {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1});
64*a5b79717SMircea Trofin     EXPECT_EQ(counter.num_counters(), 2);
65*a5b79717SMircea Trofin     EXPECT_EQ(counter.names(), std::vector<std::string>(
66*a5b79717SMircea Trofin                                    {kGenericPerfEvent2, kGenericPerfEvent1}));
675dda2efdSMircea Trofin   }
68*a5b79717SMircea Trofin   {
69*a5b79717SMircea Trofin     // Finally try a golden input - it should like both of them
70*a5b79717SMircea Trofin     EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2})
71*a5b79717SMircea Trofin                   .num_counters(),
72*a5b79717SMircea Trofin               2);
73*a5b79717SMircea Trofin   }
74*a5b79717SMircea Trofin   {
75*a5b79717SMircea Trofin     // Add a bad apple in the end of the chain to check the edges
76*a5b79717SMircea Trofin     auto counter = PerfCounters::Create(
77*a5b79717SMircea Trofin         {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"});
78*a5b79717SMircea Trofin     EXPECT_EQ(counter.num_counters(), 2);
79*a5b79717SMircea Trofin     EXPECT_EQ(counter.names(), std::vector<std::string>(
80*a5b79717SMircea Trofin                                    {kGenericPerfEvent1, kGenericPerfEvent2}));
81*a5b79717SMircea Trofin   }
825dda2efdSMircea Trofin }
835dda2efdSMircea Trofin 
TEST(PerfCountersTest,Read1Counter)845dda2efdSMircea Trofin TEST(PerfCountersTest, Read1Counter) {
855dda2efdSMircea Trofin   if (!PerfCounters::kSupported) {
865dda2efdSMircea Trofin     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
875dda2efdSMircea Trofin   }
885dda2efdSMircea Trofin   EXPECT_TRUE(PerfCounters::Initialize());
895dda2efdSMircea Trofin   auto counters = PerfCounters::Create({kGenericPerfEvent1});
90*a5b79717SMircea Trofin   EXPECT_EQ(counters.num_counters(), 1);
915dda2efdSMircea Trofin   PerfCounterValues values1(1);
925dda2efdSMircea Trofin   EXPECT_TRUE(counters.Snapshot(&values1));
935dda2efdSMircea Trofin   EXPECT_GT(values1[0], 0);
945dda2efdSMircea Trofin   PerfCounterValues values2(1);
955dda2efdSMircea Trofin   EXPECT_TRUE(counters.Snapshot(&values2));
965dda2efdSMircea Trofin   EXPECT_GT(values2[0], 0);
975dda2efdSMircea Trofin   EXPECT_GT(values2[0], values1[0]);
985dda2efdSMircea Trofin }
995dda2efdSMircea Trofin 
TEST(PerfCountersTest,Read2Counters)1005dda2efdSMircea Trofin TEST(PerfCountersTest, Read2Counters) {
1015dda2efdSMircea Trofin   if (!PerfCounters::kSupported) {
1025dda2efdSMircea Trofin     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
1035dda2efdSMircea Trofin   }
1045dda2efdSMircea Trofin   EXPECT_TRUE(PerfCounters::Initialize());
1055dda2efdSMircea Trofin   auto counters =
1065dda2efdSMircea Trofin       PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
107*a5b79717SMircea Trofin   EXPECT_EQ(counters.num_counters(), 2);
1085dda2efdSMircea Trofin   PerfCounterValues values1(2);
1095dda2efdSMircea Trofin   EXPECT_TRUE(counters.Snapshot(&values1));
1105dda2efdSMircea Trofin   EXPECT_GT(values1[0], 0);
1115dda2efdSMircea Trofin   EXPECT_GT(values1[1], 0);
1125dda2efdSMircea Trofin   PerfCounterValues values2(2);
1135dda2efdSMircea Trofin   EXPECT_TRUE(counters.Snapshot(&values2));
1145dda2efdSMircea Trofin   EXPECT_GT(values2[0], 0);
1155dda2efdSMircea Trofin   EXPECT_GT(values2[1], 0);
1165dda2efdSMircea Trofin }
1175dda2efdSMircea Trofin 
TEST(PerfCountersTest,ReopenExistingCounters)118*a5b79717SMircea Trofin TEST(PerfCountersTest, ReopenExistingCounters) {
119*a5b79717SMircea Trofin   // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6.
120*a5b79717SMircea Trofin   // However we cannot make assumptions beyond 2 HW counters due to Pixel 6.
121*a5b79717SMircea Trofin   if (!PerfCounters::kSupported) {
122*a5b79717SMircea Trofin     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
123*a5b79717SMircea Trofin   }
124*a5b79717SMircea Trofin   EXPECT_TRUE(PerfCounters::Initialize());
125*a5b79717SMircea Trofin   std::vector<std::string> kMetrics({kGenericPerfEvent1});
126*a5b79717SMircea Trofin   std::vector<PerfCounters> counters(2);
127*a5b79717SMircea Trofin   for (auto& counter : counters) {
128*a5b79717SMircea Trofin     counter = PerfCounters::Create(kMetrics);
129*a5b79717SMircea Trofin   }
130*a5b79717SMircea Trofin   PerfCounterValues values(1);
131*a5b79717SMircea Trofin   EXPECT_TRUE(counters[0].Snapshot(&values));
132*a5b79717SMircea Trofin   EXPECT_TRUE(counters[1].Snapshot(&values));
1335dda2efdSMircea Trofin }
1345dda2efdSMircea Trofin 
TEST(PerfCountersTest,CreateExistingMeasurements)135*a5b79717SMircea Trofin TEST(PerfCountersTest, CreateExistingMeasurements) {
136*a5b79717SMircea Trofin   // The test works (i.e. causes read to fail) for the assumptions
137*a5b79717SMircea Trofin   // about hardware capabilities (i.e. small number (2) hardware
138*a5b79717SMircea Trofin   // counters) at this date,
139*a5b79717SMircea Trofin   // the same as previous test ReopenExistingCounters.
140*a5b79717SMircea Trofin   if (!PerfCounters::kSupported) {
141*a5b79717SMircea Trofin     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
142*a5b79717SMircea Trofin   }
143*a5b79717SMircea Trofin   EXPECT_TRUE(PerfCounters::Initialize());
144*a5b79717SMircea Trofin 
145*a5b79717SMircea Trofin   // This means we will try 10 counters but we can only guarantee
146*a5b79717SMircea Trofin   // for sure at this time that only 3 will work. Perhaps in the future
147*a5b79717SMircea Trofin   // we could use libpfm to query for the hardware limits on this
148*a5b79717SMircea Trofin   // particular platform.
149*a5b79717SMircea Trofin   const int kMaxCounters = 10;
150*a5b79717SMircea Trofin   const int kMinValidCounters = 2;
151*a5b79717SMircea Trofin 
152*a5b79717SMircea Trofin   // Let's use a ubiquitous counter that is guaranteed to work
153*a5b79717SMircea Trofin   // on all platforms
154*a5b79717SMircea Trofin   const std::vector<std::string> kMetrics{"cycles"};
155*a5b79717SMircea Trofin 
156*a5b79717SMircea Trofin   // Cannot create a vector of actual objects because the
157*a5b79717SMircea Trofin   // copy constructor of PerfCounters is deleted - and so is
158*a5b79717SMircea Trofin   // implicitly deleted on PerfCountersMeasurement too
159*a5b79717SMircea Trofin   std::vector<std::unique_ptr<PerfCountersMeasurement>>
160*a5b79717SMircea Trofin       perf_counter_measurements;
161*a5b79717SMircea Trofin 
162*a5b79717SMircea Trofin   perf_counter_measurements.reserve(kMaxCounters);
163*a5b79717SMircea Trofin   for (int j = 0; j < kMaxCounters; ++j) {
164*a5b79717SMircea Trofin     perf_counter_measurements.emplace_back(
165*a5b79717SMircea Trofin         new PerfCountersMeasurement(kMetrics));
166*a5b79717SMircea Trofin   }
167*a5b79717SMircea Trofin 
168*a5b79717SMircea Trofin   std::vector<std::pair<std::string, double>> measurements;
169*a5b79717SMircea Trofin 
170*a5b79717SMircea Trofin   // Start all counters together to see if they hold
171*a5b79717SMircea Trofin   size_t max_counters = kMaxCounters;
172*a5b79717SMircea Trofin   for (size_t i = 0; i < kMaxCounters; ++i) {
173*a5b79717SMircea Trofin     auto& counter(*perf_counter_measurements[i]);
174*a5b79717SMircea Trofin     EXPECT_EQ(counter.num_counters(), 1);
175*a5b79717SMircea Trofin     if (!counter.Start()) {
176*a5b79717SMircea Trofin       max_counters = i;
177*a5b79717SMircea Trofin       break;
178*a5b79717SMircea Trofin     };
179*a5b79717SMircea Trofin   }
180*a5b79717SMircea Trofin 
181*a5b79717SMircea Trofin   ASSERT_GE(max_counters, kMinValidCounters);
182*a5b79717SMircea Trofin 
183*a5b79717SMircea Trofin   // Start all together
184*a5b79717SMircea Trofin   for (size_t i = 0; i < max_counters; ++i) {
185*a5b79717SMircea Trofin     auto& counter(*perf_counter_measurements[i]);
186*a5b79717SMircea Trofin     EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
187*a5b79717SMircea Trofin   }
188*a5b79717SMircea Trofin 
189*a5b79717SMircea Trofin   // Start/stop individually
190*a5b79717SMircea Trofin   for (size_t i = 0; i < max_counters; ++i) {
191*a5b79717SMircea Trofin     auto& counter(*perf_counter_measurements[i]);
192*a5b79717SMircea Trofin     measurements.clear();
193*a5b79717SMircea Trofin     counter.Start();
194*a5b79717SMircea Trofin     EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
195*a5b79717SMircea Trofin   }
196*a5b79717SMircea Trofin }
197*a5b79717SMircea Trofin 
198*a5b79717SMircea Trofin // We try to do some meaningful work here but the compiler
199*a5b79717SMircea Trofin // insists in optimizing away our loop so we had to add a
200*a5b79717SMircea Trofin // no-optimize macro. In case it fails, we added some entropy
201*a5b79717SMircea Trofin // to this pool as well.
202*a5b79717SMircea Trofin 
do_work()203*a5b79717SMircea Trofin BENCHMARK_DONT_OPTIMIZE size_t do_work() {
204*a5b79717SMircea Trofin   static std::mt19937 rd{std::random_device{}()};
205*a5b79717SMircea Trofin   static std::uniform_int_distribution<size_t> mrand(0, 10);
206*a5b79717SMircea Trofin   const size_t kNumLoops = 1000000;
207*a5b79717SMircea Trofin   size_t sum = 0;
208*a5b79717SMircea Trofin   for (size_t j = 0; j < kNumLoops; ++j) {
209*a5b79717SMircea Trofin     sum += mrand(rd);
210*a5b79717SMircea Trofin   }
211*a5b79717SMircea Trofin   benchmark::DoNotOptimize(sum);
212*a5b79717SMircea Trofin   return sum;
213*a5b79717SMircea Trofin }
214*a5b79717SMircea Trofin 
measure(size_t threadcount,PerfCounterValues * before,PerfCounterValues * after)215*a5b79717SMircea Trofin void measure(size_t threadcount, PerfCounterValues* before,
216*a5b79717SMircea Trofin              PerfCounterValues* after) {
217*a5b79717SMircea Trofin   BM_CHECK_NE(before, nullptr);
218*a5b79717SMircea Trofin   BM_CHECK_NE(after, nullptr);
2195dda2efdSMircea Trofin   std::vector<std::thread> threads(threadcount);
220a290770fSMircea Trofin   auto work = [&]() { BM_CHECK(do_work() > 1000); };
2215dda2efdSMircea Trofin 
2225dda2efdSMircea Trofin   // We need to first set up the counters, then start the threads, so the
223*a5b79717SMircea Trofin   // threads would inherit the counters. But later, we need to first destroy
224*a5b79717SMircea Trofin   // the thread pool (so all the work finishes), then measure the counters. So
225*a5b79717SMircea Trofin   // the scopes overlap, and we need to explicitly control the scope of the
2265dda2efdSMircea Trofin   // threadpool.
2275dda2efdSMircea Trofin   auto counters =
228*a5b79717SMircea Trofin       PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
2295dda2efdSMircea Trofin   for (auto& t : threads) t = std::thread(work);
230*a5b79717SMircea Trofin   counters.Snapshot(before);
2315dda2efdSMircea Trofin   for (auto& t : threads) t.join();
232*a5b79717SMircea Trofin   counters.Snapshot(after);
2335dda2efdSMircea Trofin }
2345dda2efdSMircea Trofin 
TEST(PerfCountersTest,MultiThreaded)2355dda2efdSMircea Trofin TEST(PerfCountersTest, MultiThreaded) {
2365dda2efdSMircea Trofin   if (!PerfCounters::kSupported) {
2375dda2efdSMircea Trofin     GTEST_SKIP() << "Test skipped because libpfm is not supported.";
2385dda2efdSMircea Trofin   }
2395dda2efdSMircea Trofin   EXPECT_TRUE(PerfCounters::Initialize());
240*a5b79717SMircea Trofin   PerfCounterValues before(2);
241*a5b79717SMircea Trofin   PerfCounterValues after(2);
2425dda2efdSMircea Trofin 
243*a5b79717SMircea Trofin   // Notice that this test will work even if we taskset it to a single CPU
244*a5b79717SMircea Trofin   // In this case the threads will run sequentially
245*a5b79717SMircea Trofin   // Start two threads and measure the number of combined cycles and
246*a5b79717SMircea Trofin   // instructions
247*a5b79717SMircea Trofin   measure(2, &before, &after);
248*a5b79717SMircea Trofin   std::vector<double> Elapsed2Threads{
249*a5b79717SMircea Trofin       static_cast<double>(after[0] - before[0]),
250*a5b79717SMircea Trofin       static_cast<double>(after[1] - before[1])};
2515dda2efdSMircea Trofin 
252*a5b79717SMircea Trofin   // Start four threads and measure the number of combined cycles and
253*a5b79717SMircea Trofin   // instructions
254*a5b79717SMircea Trofin   measure(4, &before, &after);
255*a5b79717SMircea Trofin   std::vector<double> Elapsed4Threads{
256*a5b79717SMircea Trofin       static_cast<double>(after[0] - before[0]),
257*a5b79717SMircea Trofin       static_cast<double>(after[1] - before[1])};
2585dda2efdSMircea Trofin 
259*a5b79717SMircea Trofin   // The following expectations fail (at least on a beefy workstation with lots
260*a5b79717SMircea Trofin   // of cpus) - it seems that in some circumstances the runtime of 4 threads
261*a5b79717SMircea Trofin   // can even be better than with 2.
262*a5b79717SMircea Trofin   // So instead of expecting 4 threads to be slower, let's just make sure they
263*a5b79717SMircea Trofin   // do not differ too much in general (one is not more than 10x than the
264*a5b79717SMircea Trofin   // other).
265*a5b79717SMircea Trofin   EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
266*a5b79717SMircea Trofin   EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
2675dda2efdSMircea Trofin }
268*a5b79717SMircea Trofin 
TEST(PerfCountersTest,HardwareLimits)269*a5b79717SMircea Trofin TEST(PerfCountersTest, HardwareLimits) {
270*a5b79717SMircea Trofin   // The test works (i.e. causes read to fail) for the assumptions
271*a5b79717SMircea Trofin   // about hardware capabilities (i.e. small number (3-4) hardware
272*a5b79717SMircea Trofin   // counters) at this date,
273*a5b79717SMircea Trofin   // the same as previous test ReopenExistingCounters.
274*a5b79717SMircea Trofin   if (!PerfCounters::kSupported) {
275*a5b79717SMircea Trofin     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
276*a5b79717SMircea Trofin   }
277*a5b79717SMircea Trofin   EXPECT_TRUE(PerfCounters::Initialize());
278*a5b79717SMircea Trofin 
279*a5b79717SMircea Trofin   // Taken from `perf list`, but focusses only on those HW events that actually
280*a5b79717SMircea Trofin   // were reported when running `sudo perf stat -a sleep 10`, intersected over
281*a5b79717SMircea Trofin   // several platforms. All HW events listed in the first command not reported
282*a5b79717SMircea Trofin   // in the second seem to not work. This is sad as we don't really get to test
283*a5b79717SMircea Trofin   // the grouping here (groups can contain up to 6 members)...
284*a5b79717SMircea Trofin   std::vector<std::string> counter_names{
285*a5b79717SMircea Trofin       "cycles",         // leader
286*a5b79717SMircea Trofin       "instructions",   //
287*a5b79717SMircea Trofin       "branch-misses",  //
288*a5b79717SMircea Trofin   };
289*a5b79717SMircea Trofin 
290*a5b79717SMircea Trofin   // In the off-chance that some of these values are not supported,
291*a5b79717SMircea Trofin   // we filter them out so the test will complete without failure
292*a5b79717SMircea Trofin   // albeit it might not actually test the grouping on that platform
293*a5b79717SMircea Trofin   std::vector<std::string> valid_names;
294*a5b79717SMircea Trofin   for (const std::string& name : counter_names) {
295*a5b79717SMircea Trofin     if (PerfCounters::IsCounterSupported(name)) {
296*a5b79717SMircea Trofin       valid_names.push_back(name);
297*a5b79717SMircea Trofin     }
298*a5b79717SMircea Trofin   }
299*a5b79717SMircea Trofin   PerfCountersMeasurement counter(valid_names);
300*a5b79717SMircea Trofin 
301*a5b79717SMircea Trofin   std::vector<std::pair<std::string, double>> measurements;
302*a5b79717SMircea Trofin 
303*a5b79717SMircea Trofin   counter.Start();
304*a5b79717SMircea Trofin   EXPECT_TRUE(counter.Stop(measurements));
305*a5b79717SMircea Trofin }
306*a5b79717SMircea Trofin 
3075dda2efdSMircea Trofin }  // namespace
308