1*a5b79717SMircea Trofin #include <random>
25dda2efdSMircea Trofin #include <thread>
35dda2efdSMircea Trofin
45dda2efdSMircea Trofin #include "../src/perf_counters.h"
5*a5b79717SMircea Trofin #include "gmock/gmock.h"
65dda2efdSMircea Trofin #include "gtest/gtest.h"
75dda2efdSMircea Trofin
85dda2efdSMircea Trofin #ifndef GTEST_SKIP
95dda2efdSMircea Trofin struct MsgHandler {
operator =MsgHandler105dda2efdSMircea Trofin void operator=(std::ostream&) {}
115dda2efdSMircea Trofin };
125dda2efdSMircea Trofin #define GTEST_SKIP() return MsgHandler() = std::cout
135dda2efdSMircea Trofin #endif
145dda2efdSMircea Trofin
155dda2efdSMircea Trofin using benchmark::internal::PerfCounters;
16*a5b79717SMircea Trofin using benchmark::internal::PerfCountersMeasurement;
175dda2efdSMircea Trofin using benchmark::internal::PerfCounterValues;
18*a5b79717SMircea Trofin using ::testing::AllOf;
19*a5b79717SMircea Trofin using ::testing::Gt;
20*a5b79717SMircea Trofin using ::testing::Lt;
215dda2efdSMircea Trofin
225dda2efdSMircea Trofin namespace {
235dda2efdSMircea Trofin const char kGenericPerfEvent1[] = "CYCLES";
24*a5b79717SMircea Trofin const char kGenericPerfEvent2[] = "INSTRUCTIONS";
255dda2efdSMircea Trofin
TEST(PerfCountersTest,Init)265dda2efdSMircea Trofin TEST(PerfCountersTest, Init) {
275dda2efdSMircea Trofin EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
285dda2efdSMircea Trofin }
295dda2efdSMircea Trofin
TEST(PerfCountersTest,OneCounter)305dda2efdSMircea Trofin TEST(PerfCountersTest, OneCounter) {
315dda2efdSMircea Trofin if (!PerfCounters::kSupported) {
325dda2efdSMircea Trofin GTEST_SKIP() << "Performance counters not supported.\n";
335dda2efdSMircea Trofin }
345dda2efdSMircea Trofin EXPECT_TRUE(PerfCounters::Initialize());
35*a5b79717SMircea Trofin EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1);
365dda2efdSMircea Trofin }
375dda2efdSMircea Trofin
TEST(PerfCountersTest,NegativeTest)385dda2efdSMircea Trofin TEST(PerfCountersTest, NegativeTest) {
395dda2efdSMircea Trofin if (!PerfCounters::kSupported) {
405dda2efdSMircea Trofin EXPECT_FALSE(PerfCounters::Initialize());
415dda2efdSMircea Trofin return;
425dda2efdSMircea Trofin }
435dda2efdSMircea Trofin EXPECT_TRUE(PerfCounters::Initialize());
44*a5b79717SMircea Trofin // Safety checks
45*a5b79717SMircea Trofin // Create() will always create a valid object, even if passed no or
46*a5b79717SMircea Trofin // wrong arguments as the new behavior is to warn and drop unsupported
47*a5b79717SMircea Trofin // counters
48*a5b79717SMircea Trofin EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0);
49*a5b79717SMircea Trofin EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0);
50*a5b79717SMircea Trofin EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0);
515dda2efdSMircea Trofin {
52*a5b79717SMircea Trofin // Try sneaking in a bad egg to see if it is filtered out. The
53*a5b79717SMircea Trofin // number of counters has to be two, not zero
54*a5b79717SMircea Trofin auto counter =
55*a5b79717SMircea Trofin PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
56*a5b79717SMircea Trofin EXPECT_EQ(counter.num_counters(), 2);
57*a5b79717SMircea Trofin EXPECT_EQ(counter.names(), std::vector<std::string>(
58*a5b79717SMircea Trofin {kGenericPerfEvent2, kGenericPerfEvent1}));
595dda2efdSMircea Trofin }
605dda2efdSMircea Trofin {
61*a5b79717SMircea Trofin // Try sneaking in an outrageous counter, like a fat finger mistake
62*a5b79717SMircea Trofin auto counter = PerfCounters::Create(
63*a5b79717SMircea Trofin {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1});
64*a5b79717SMircea Trofin EXPECT_EQ(counter.num_counters(), 2);
65*a5b79717SMircea Trofin EXPECT_EQ(counter.names(), std::vector<std::string>(
66*a5b79717SMircea Trofin {kGenericPerfEvent2, kGenericPerfEvent1}));
675dda2efdSMircea Trofin }
68*a5b79717SMircea Trofin {
69*a5b79717SMircea Trofin // Finally try a golden input - it should like both of them
70*a5b79717SMircea Trofin EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2})
71*a5b79717SMircea Trofin .num_counters(),
72*a5b79717SMircea Trofin 2);
73*a5b79717SMircea Trofin }
74*a5b79717SMircea Trofin {
75*a5b79717SMircea Trofin // Add a bad apple in the end of the chain to check the edges
76*a5b79717SMircea Trofin auto counter = PerfCounters::Create(
77*a5b79717SMircea Trofin {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"});
78*a5b79717SMircea Trofin EXPECT_EQ(counter.num_counters(), 2);
79*a5b79717SMircea Trofin EXPECT_EQ(counter.names(), std::vector<std::string>(
80*a5b79717SMircea Trofin {kGenericPerfEvent1, kGenericPerfEvent2}));
81*a5b79717SMircea Trofin }
825dda2efdSMircea Trofin }
835dda2efdSMircea Trofin
TEST(PerfCountersTest,Read1Counter)845dda2efdSMircea Trofin TEST(PerfCountersTest, Read1Counter) {
855dda2efdSMircea Trofin if (!PerfCounters::kSupported) {
865dda2efdSMircea Trofin GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
875dda2efdSMircea Trofin }
885dda2efdSMircea Trofin EXPECT_TRUE(PerfCounters::Initialize());
895dda2efdSMircea Trofin auto counters = PerfCounters::Create({kGenericPerfEvent1});
90*a5b79717SMircea Trofin EXPECT_EQ(counters.num_counters(), 1);
915dda2efdSMircea Trofin PerfCounterValues values1(1);
925dda2efdSMircea Trofin EXPECT_TRUE(counters.Snapshot(&values1));
935dda2efdSMircea Trofin EXPECT_GT(values1[0], 0);
945dda2efdSMircea Trofin PerfCounterValues values2(1);
955dda2efdSMircea Trofin EXPECT_TRUE(counters.Snapshot(&values2));
965dda2efdSMircea Trofin EXPECT_GT(values2[0], 0);
975dda2efdSMircea Trofin EXPECT_GT(values2[0], values1[0]);
985dda2efdSMircea Trofin }
995dda2efdSMircea Trofin
TEST(PerfCountersTest,Read2Counters)1005dda2efdSMircea Trofin TEST(PerfCountersTest, Read2Counters) {
1015dda2efdSMircea Trofin if (!PerfCounters::kSupported) {
1025dda2efdSMircea Trofin GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
1035dda2efdSMircea Trofin }
1045dda2efdSMircea Trofin EXPECT_TRUE(PerfCounters::Initialize());
1055dda2efdSMircea Trofin auto counters =
1065dda2efdSMircea Trofin PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
107*a5b79717SMircea Trofin EXPECT_EQ(counters.num_counters(), 2);
1085dda2efdSMircea Trofin PerfCounterValues values1(2);
1095dda2efdSMircea Trofin EXPECT_TRUE(counters.Snapshot(&values1));
1105dda2efdSMircea Trofin EXPECT_GT(values1[0], 0);
1115dda2efdSMircea Trofin EXPECT_GT(values1[1], 0);
1125dda2efdSMircea Trofin PerfCounterValues values2(2);
1135dda2efdSMircea Trofin EXPECT_TRUE(counters.Snapshot(&values2));
1145dda2efdSMircea Trofin EXPECT_GT(values2[0], 0);
1155dda2efdSMircea Trofin EXPECT_GT(values2[1], 0);
1165dda2efdSMircea Trofin }
1175dda2efdSMircea Trofin
TEST(PerfCountersTest,ReopenExistingCounters)118*a5b79717SMircea Trofin TEST(PerfCountersTest, ReopenExistingCounters) {
119*a5b79717SMircea Trofin // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6.
120*a5b79717SMircea Trofin // However we cannot make assumptions beyond 2 HW counters due to Pixel 6.
121*a5b79717SMircea Trofin if (!PerfCounters::kSupported) {
122*a5b79717SMircea Trofin GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
123*a5b79717SMircea Trofin }
124*a5b79717SMircea Trofin EXPECT_TRUE(PerfCounters::Initialize());
125*a5b79717SMircea Trofin std::vector<std::string> kMetrics({kGenericPerfEvent1});
126*a5b79717SMircea Trofin std::vector<PerfCounters> counters(2);
127*a5b79717SMircea Trofin for (auto& counter : counters) {
128*a5b79717SMircea Trofin counter = PerfCounters::Create(kMetrics);
129*a5b79717SMircea Trofin }
130*a5b79717SMircea Trofin PerfCounterValues values(1);
131*a5b79717SMircea Trofin EXPECT_TRUE(counters[0].Snapshot(&values));
132*a5b79717SMircea Trofin EXPECT_TRUE(counters[1].Snapshot(&values));
1335dda2efdSMircea Trofin }
1345dda2efdSMircea Trofin
TEST(PerfCountersTest,CreateExistingMeasurements)135*a5b79717SMircea Trofin TEST(PerfCountersTest, CreateExistingMeasurements) {
136*a5b79717SMircea Trofin // The test works (i.e. causes read to fail) for the assumptions
137*a5b79717SMircea Trofin // about hardware capabilities (i.e. small number (2) hardware
138*a5b79717SMircea Trofin // counters) at this date,
139*a5b79717SMircea Trofin // the same as previous test ReopenExistingCounters.
140*a5b79717SMircea Trofin if (!PerfCounters::kSupported) {
141*a5b79717SMircea Trofin GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
142*a5b79717SMircea Trofin }
143*a5b79717SMircea Trofin EXPECT_TRUE(PerfCounters::Initialize());
144*a5b79717SMircea Trofin
145*a5b79717SMircea Trofin // This means we will try 10 counters but we can only guarantee
146*a5b79717SMircea Trofin // for sure at this time that only 3 will work. Perhaps in the future
147*a5b79717SMircea Trofin // we could use libpfm to query for the hardware limits on this
148*a5b79717SMircea Trofin // particular platform.
149*a5b79717SMircea Trofin const int kMaxCounters = 10;
150*a5b79717SMircea Trofin const int kMinValidCounters = 2;
151*a5b79717SMircea Trofin
152*a5b79717SMircea Trofin // Let's use a ubiquitous counter that is guaranteed to work
153*a5b79717SMircea Trofin // on all platforms
154*a5b79717SMircea Trofin const std::vector<std::string> kMetrics{"cycles"};
155*a5b79717SMircea Trofin
156*a5b79717SMircea Trofin // Cannot create a vector of actual objects because the
157*a5b79717SMircea Trofin // copy constructor of PerfCounters is deleted - and so is
158*a5b79717SMircea Trofin // implicitly deleted on PerfCountersMeasurement too
159*a5b79717SMircea Trofin std::vector<std::unique_ptr<PerfCountersMeasurement>>
160*a5b79717SMircea Trofin perf_counter_measurements;
161*a5b79717SMircea Trofin
162*a5b79717SMircea Trofin perf_counter_measurements.reserve(kMaxCounters);
163*a5b79717SMircea Trofin for (int j = 0; j < kMaxCounters; ++j) {
164*a5b79717SMircea Trofin perf_counter_measurements.emplace_back(
165*a5b79717SMircea Trofin new PerfCountersMeasurement(kMetrics));
166*a5b79717SMircea Trofin }
167*a5b79717SMircea Trofin
168*a5b79717SMircea Trofin std::vector<std::pair<std::string, double>> measurements;
169*a5b79717SMircea Trofin
170*a5b79717SMircea Trofin // Start all counters together to see if they hold
171*a5b79717SMircea Trofin size_t max_counters = kMaxCounters;
172*a5b79717SMircea Trofin for (size_t i = 0; i < kMaxCounters; ++i) {
173*a5b79717SMircea Trofin auto& counter(*perf_counter_measurements[i]);
174*a5b79717SMircea Trofin EXPECT_EQ(counter.num_counters(), 1);
175*a5b79717SMircea Trofin if (!counter.Start()) {
176*a5b79717SMircea Trofin max_counters = i;
177*a5b79717SMircea Trofin break;
178*a5b79717SMircea Trofin };
179*a5b79717SMircea Trofin }
180*a5b79717SMircea Trofin
181*a5b79717SMircea Trofin ASSERT_GE(max_counters, kMinValidCounters);
182*a5b79717SMircea Trofin
183*a5b79717SMircea Trofin // Start all together
184*a5b79717SMircea Trofin for (size_t i = 0; i < max_counters; ++i) {
185*a5b79717SMircea Trofin auto& counter(*perf_counter_measurements[i]);
186*a5b79717SMircea Trofin EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
187*a5b79717SMircea Trofin }
188*a5b79717SMircea Trofin
189*a5b79717SMircea Trofin // Start/stop individually
190*a5b79717SMircea Trofin for (size_t i = 0; i < max_counters; ++i) {
191*a5b79717SMircea Trofin auto& counter(*perf_counter_measurements[i]);
192*a5b79717SMircea Trofin measurements.clear();
193*a5b79717SMircea Trofin counter.Start();
194*a5b79717SMircea Trofin EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
195*a5b79717SMircea Trofin }
196*a5b79717SMircea Trofin }
197*a5b79717SMircea Trofin
198*a5b79717SMircea Trofin // We try to do some meaningful work here but the compiler
199*a5b79717SMircea Trofin // insists in optimizing away our loop so we had to add a
200*a5b79717SMircea Trofin // no-optimize macro. In case it fails, we added some entropy
201*a5b79717SMircea Trofin // to this pool as well.
202*a5b79717SMircea Trofin
do_work()203*a5b79717SMircea Trofin BENCHMARK_DONT_OPTIMIZE size_t do_work() {
204*a5b79717SMircea Trofin static std::mt19937 rd{std::random_device{}()};
205*a5b79717SMircea Trofin static std::uniform_int_distribution<size_t> mrand(0, 10);
206*a5b79717SMircea Trofin const size_t kNumLoops = 1000000;
207*a5b79717SMircea Trofin size_t sum = 0;
208*a5b79717SMircea Trofin for (size_t j = 0; j < kNumLoops; ++j) {
209*a5b79717SMircea Trofin sum += mrand(rd);
210*a5b79717SMircea Trofin }
211*a5b79717SMircea Trofin benchmark::DoNotOptimize(sum);
212*a5b79717SMircea Trofin return sum;
213*a5b79717SMircea Trofin }
214*a5b79717SMircea Trofin
measure(size_t threadcount,PerfCounterValues * before,PerfCounterValues * after)215*a5b79717SMircea Trofin void measure(size_t threadcount, PerfCounterValues* before,
216*a5b79717SMircea Trofin PerfCounterValues* after) {
217*a5b79717SMircea Trofin BM_CHECK_NE(before, nullptr);
218*a5b79717SMircea Trofin BM_CHECK_NE(after, nullptr);
2195dda2efdSMircea Trofin std::vector<std::thread> threads(threadcount);
220a290770fSMircea Trofin auto work = [&]() { BM_CHECK(do_work() > 1000); };
2215dda2efdSMircea Trofin
2225dda2efdSMircea Trofin // We need to first set up the counters, then start the threads, so the
223*a5b79717SMircea Trofin // threads would inherit the counters. But later, we need to first destroy
224*a5b79717SMircea Trofin // the thread pool (so all the work finishes), then measure the counters. So
225*a5b79717SMircea Trofin // the scopes overlap, and we need to explicitly control the scope of the
2265dda2efdSMircea Trofin // threadpool.
2275dda2efdSMircea Trofin auto counters =
228*a5b79717SMircea Trofin PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
2295dda2efdSMircea Trofin for (auto& t : threads) t = std::thread(work);
230*a5b79717SMircea Trofin counters.Snapshot(before);
2315dda2efdSMircea Trofin for (auto& t : threads) t.join();
232*a5b79717SMircea Trofin counters.Snapshot(after);
2335dda2efdSMircea Trofin }
2345dda2efdSMircea Trofin
TEST(PerfCountersTest,MultiThreaded)2355dda2efdSMircea Trofin TEST(PerfCountersTest, MultiThreaded) {
2365dda2efdSMircea Trofin if (!PerfCounters::kSupported) {
2375dda2efdSMircea Trofin GTEST_SKIP() << "Test skipped because libpfm is not supported.";
2385dda2efdSMircea Trofin }
2395dda2efdSMircea Trofin EXPECT_TRUE(PerfCounters::Initialize());
240*a5b79717SMircea Trofin PerfCounterValues before(2);
241*a5b79717SMircea Trofin PerfCounterValues after(2);
2425dda2efdSMircea Trofin
243*a5b79717SMircea Trofin // Notice that this test will work even if we taskset it to a single CPU
244*a5b79717SMircea Trofin // In this case the threads will run sequentially
245*a5b79717SMircea Trofin // Start two threads and measure the number of combined cycles and
246*a5b79717SMircea Trofin // instructions
247*a5b79717SMircea Trofin measure(2, &before, &after);
248*a5b79717SMircea Trofin std::vector<double> Elapsed2Threads{
249*a5b79717SMircea Trofin static_cast<double>(after[0] - before[0]),
250*a5b79717SMircea Trofin static_cast<double>(after[1] - before[1])};
2515dda2efdSMircea Trofin
252*a5b79717SMircea Trofin // Start four threads and measure the number of combined cycles and
253*a5b79717SMircea Trofin // instructions
254*a5b79717SMircea Trofin measure(4, &before, &after);
255*a5b79717SMircea Trofin std::vector<double> Elapsed4Threads{
256*a5b79717SMircea Trofin static_cast<double>(after[0] - before[0]),
257*a5b79717SMircea Trofin static_cast<double>(after[1] - before[1])};
2585dda2efdSMircea Trofin
259*a5b79717SMircea Trofin // The following expectations fail (at least on a beefy workstation with lots
260*a5b79717SMircea Trofin // of cpus) - it seems that in some circumstances the runtime of 4 threads
261*a5b79717SMircea Trofin // can even be better than with 2.
262*a5b79717SMircea Trofin // So instead of expecting 4 threads to be slower, let's just make sure they
263*a5b79717SMircea Trofin // do not differ too much in general (one is not more than 10x than the
264*a5b79717SMircea Trofin // other).
265*a5b79717SMircea Trofin EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
266*a5b79717SMircea Trofin EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
2675dda2efdSMircea Trofin }
268*a5b79717SMircea Trofin
TEST(PerfCountersTest,HardwareLimits)269*a5b79717SMircea Trofin TEST(PerfCountersTest, HardwareLimits) {
270*a5b79717SMircea Trofin // The test works (i.e. causes read to fail) for the assumptions
271*a5b79717SMircea Trofin // about hardware capabilities (i.e. small number (3-4) hardware
272*a5b79717SMircea Trofin // counters) at this date,
273*a5b79717SMircea Trofin // the same as previous test ReopenExistingCounters.
274*a5b79717SMircea Trofin if (!PerfCounters::kSupported) {
275*a5b79717SMircea Trofin GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
276*a5b79717SMircea Trofin }
277*a5b79717SMircea Trofin EXPECT_TRUE(PerfCounters::Initialize());
278*a5b79717SMircea Trofin
279*a5b79717SMircea Trofin // Taken from `perf list`, but focusses only on those HW events that actually
280*a5b79717SMircea Trofin // were reported when running `sudo perf stat -a sleep 10`, intersected over
281*a5b79717SMircea Trofin // several platforms. All HW events listed in the first command not reported
282*a5b79717SMircea Trofin // in the second seem to not work. This is sad as we don't really get to test
283*a5b79717SMircea Trofin // the grouping here (groups can contain up to 6 members)...
284*a5b79717SMircea Trofin std::vector<std::string> counter_names{
285*a5b79717SMircea Trofin "cycles", // leader
286*a5b79717SMircea Trofin "instructions", //
287*a5b79717SMircea Trofin "branch-misses", //
288*a5b79717SMircea Trofin };
289*a5b79717SMircea Trofin
290*a5b79717SMircea Trofin // In the off-chance that some of these values are not supported,
291*a5b79717SMircea Trofin // we filter them out so the test will complete without failure
292*a5b79717SMircea Trofin // albeit it might not actually test the grouping on that platform
293*a5b79717SMircea Trofin std::vector<std::string> valid_names;
294*a5b79717SMircea Trofin for (const std::string& name : counter_names) {
295*a5b79717SMircea Trofin if (PerfCounters::IsCounterSupported(name)) {
296*a5b79717SMircea Trofin valid_names.push_back(name);
297*a5b79717SMircea Trofin }
298*a5b79717SMircea Trofin }
299*a5b79717SMircea Trofin PerfCountersMeasurement counter(valid_names);
300*a5b79717SMircea Trofin
301*a5b79717SMircea Trofin std::vector<std::pair<std::string, double>> measurements;
302*a5b79717SMircea Trofin
303*a5b79717SMircea Trofin counter.Start();
304*a5b79717SMircea Trofin EXPECT_TRUE(counter.Stop(measurements));
305*a5b79717SMircea Trofin }
306*a5b79717SMircea Trofin
3075dda2efdSMircea Trofin } // namespace
308