xref: /llvm-project/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp (revision faf675ce34ee1e2c6105e9a816f220412fd2f8d5)
1ee7caa75SVy Nguyen //===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
2ee7caa75SVy Nguyen //
3ee7caa75SVy Nguyen // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ee7caa75SVy Nguyen // See https://llvm.org/LICENSE.txt for license information.
5ee7caa75SVy Nguyen // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ee7caa75SVy Nguyen //
7ee7caa75SVy Nguyen //===----------------------------------------------------------------------===//
8ee7caa75SVy Nguyen 
9ee7caa75SVy Nguyen #include "X86Counter.h"
10ee7caa75SVy Nguyen 
11a35480f8SVy Nguyen #if defined(__linux__) && defined(HAVE_LIBPFM) &&                              \
12a35480f8SVy Nguyen     defined(LIBPFM_HAS_FIELD_CYCLES)
1304f8ffd9SClement Courbet 
14ee7caa75SVy Nguyen // FIXME: Use appropriate wrappers for poll.h and mman.h
15ee7caa75SVy Nguyen // to support Windows and remove this linux-only guard.
16a35480f8SVy Nguyen 
17ee7caa75SVy Nguyen #include "llvm/Support/Endian.h"
18ee7caa75SVy Nguyen #include "llvm/Support/Errc.h"
19ee7caa75SVy Nguyen 
20a35480f8SVy Nguyen #include <perfmon/perf_event.h>
21a35480f8SVy Nguyen #include <perfmon/pfmlib.h>
22a35480f8SVy Nguyen #include <perfmon/pfmlib_perf_event.h>
23ee7caa75SVy Nguyen 
24ee7caa75SVy Nguyen #include <atomic>
25cb3fd715SVy Nguyen #include <chrono>
26ee7caa75SVy Nguyen #include <cstddef>
27ee7caa75SVy Nguyen #include <cstdint>
28ee7caa75SVy Nguyen #include <limits>
29ee7caa75SVy Nguyen #include <memory>
30ee7caa75SVy Nguyen #include <vector>
31ee7caa75SVy Nguyen 
32ee7caa75SVy Nguyen #include <poll.h>
33ee7caa75SVy Nguyen #include <sys/mman.h>
34ee7caa75SVy Nguyen #include <unistd.h>
35ee7caa75SVy Nguyen 
36ee7caa75SVy Nguyen namespace llvm {
37ee7caa75SVy Nguyen namespace exegesis {
38ee7caa75SVy Nguyen 
39cb3fd715SVy Nguyen // Number of entries in the LBR.
40cb3fd715SVy Nguyen static constexpr int kLbrEntries = 16;
41ee7caa75SVy Nguyen static constexpr size_t kBufferPages = 8;
42ee7caa75SVy Nguyen static const size_t kDataBufferSize = kBufferPages * getpagesize();
43ee7caa75SVy Nguyen 
44941188e9SSimon Pilgrim // First page is reserved for perf_event_mmap_page. Data buffer starts on
45941188e9SSimon Pilgrim // the next page, so we allocate one more page.
46941188e9SSimon Pilgrim static const size_t kMappedBufferSize = (kBufferPages + 1) * getpagesize();
47941188e9SSimon Pilgrim 
48ee7caa75SVy Nguyen // Waits for the LBR perf events.
pollLbrPerfEvent(const int FileDescriptor)49ee7caa75SVy Nguyen static int pollLbrPerfEvent(const int FileDescriptor) {
50ee7caa75SVy Nguyen   struct pollfd PollFd;
51ee7caa75SVy Nguyen   PollFd.fd = FileDescriptor;
52ee7caa75SVy Nguyen   PollFd.events = POLLIN;
53ee7caa75SVy Nguyen   PollFd.revents = 0;
54ee7caa75SVy Nguyen   return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */);
55ee7caa75SVy Nguyen }
56ee7caa75SVy Nguyen 
57ee7caa75SVy Nguyen // Copies the data-buffer into Buf, given the pointer to MMapped.
copyDataBuffer(void * MMappedBuffer,char * Buf,uint64_t Tail,size_t DataSize)58ee7caa75SVy Nguyen static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
59ee7caa75SVy Nguyen                            size_t DataSize) {
60ee7caa75SVy Nguyen   // First page is reserved for perf_event_mmap_page. Data buffer starts on
61ee7caa75SVy Nguyen   // the next page.
62ee7caa75SVy Nguyen   char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize();
63ee7caa75SVy Nguyen   // The LBR buffer is a cyclic buffer, we copy data to another buffer.
64ee7caa75SVy Nguyen   uint64_t Offset = Tail % kDataBufferSize;
65ee7caa75SVy Nguyen   size_t CopySize = kDataBufferSize - Offset;
66ee7caa75SVy Nguyen   memcpy(Buf, Start + Offset, CopySize);
67ee7caa75SVy Nguyen   if (CopySize >= DataSize)
68ee7caa75SVy Nguyen     return;
69ee7caa75SVy Nguyen 
70ee7caa75SVy Nguyen   memcpy(Buf + CopySize, Start, Offset);
71ee7caa75SVy Nguyen   return;
72ee7caa75SVy Nguyen }
73ee7caa75SVy Nguyen 
74ee7caa75SVy Nguyen // Parses the given data-buffer for stats and fill the CycleArray.
75ee7caa75SVy Nguyen // If data has been extracted successfully, also modifies the code to jump
76ee7caa75SVy Nguyen // out the benchmark loop.
parseDataBuffer(const char * DataBuf,size_t DataSize,const void * From,const void * To,SmallVector<int64_t,4> * CycleArray)77*faf675ceSAiden Grossman static Error parseDataBuffer(const char *DataBuf, size_t DataSize,
78ee7caa75SVy Nguyen                              const void *From, const void *To,
79*faf675ceSAiden Grossman                              SmallVector<int64_t, 4> *CycleArray) {
80ee7caa75SVy Nguyen   const char *DataPtr = DataBuf;
81ee7caa75SVy Nguyen   while (DataPtr < DataBuf + DataSize) {
82ee7caa75SVy Nguyen     struct perf_event_header Header;
83ee7caa75SVy Nguyen     memcpy(&Header, DataPtr, sizeof(struct perf_event_header));
84ee7caa75SVy Nguyen     if (Header.type != PERF_RECORD_SAMPLE) {
85ee7caa75SVy Nguyen       // Ignores non-sample records.
86ee7caa75SVy Nguyen       DataPtr += Header.size;
87ee7caa75SVy Nguyen       continue;
88ee7caa75SVy Nguyen     }
89ee7caa75SVy Nguyen     DataPtr += sizeof(Header);
90*faf675ceSAiden Grossman     uint64_t Count = support::endian::read64(DataPtr, endianness::native);
91ee7caa75SVy Nguyen     DataPtr += sizeof(Count);
92ee7caa75SVy Nguyen 
93ee7caa75SVy Nguyen     struct perf_branch_entry Entry;
94ee7caa75SVy Nguyen     memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
95ee7caa75SVy Nguyen 
96ee7caa75SVy Nguyen     // Read the perf_branch_entry array.
97ee7caa75SVy Nguyen     for (uint64_t i = 0; i < Count; ++i) {
98ee7caa75SVy Nguyen       const uint64_t BlockStart = From == nullptr
99ee7caa75SVy Nguyen                                       ? std::numeric_limits<uint64_t>::min()
100ee7caa75SVy Nguyen                                       : reinterpret_cast<uint64_t>(From);
101ee7caa75SVy Nguyen       const uint64_t BlockEnd = To == nullptr
102ee7caa75SVy Nguyen                                     ? std::numeric_limits<uint64_t>::max()
103ee7caa75SVy Nguyen                                     : reinterpret_cast<uint64_t>(To);
104ee7caa75SVy Nguyen 
105ee7caa75SVy Nguyen       if (BlockStart <= Entry.from && BlockEnd >= Entry.to)
106ee7caa75SVy Nguyen         CycleArray->push_back(Entry.cycles);
107ee7caa75SVy Nguyen 
108ee7caa75SVy Nguyen       if (i == Count - 1)
109ee7caa75SVy Nguyen         // We've reached the last entry.
110*faf675ceSAiden Grossman         return Error::success();
111ee7caa75SVy Nguyen 
112ee7caa75SVy Nguyen       // Advance to next entry
113ee7caa75SVy Nguyen       DataPtr += sizeof(Entry);
114ee7caa75SVy Nguyen       memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
115ee7caa75SVy Nguyen     }
116ee7caa75SVy Nguyen   }
117*faf675ceSAiden Grossman   return make_error<StringError>("Unable to parse databuffer.", errc::io_error);
118ee7caa75SVy Nguyen }
119ee7caa75SVy Nguyen 
X86LbrPerfEvent(unsigned SamplingPeriod)120ee7caa75SVy Nguyen X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
121ee7caa75SVy Nguyen   assert(SamplingPeriod > 0 && "SamplingPeriod must be positive");
122ee7caa75SVy Nguyen   EventString = "BR_INST_RETIRED.NEAR_TAKEN";
123ee7caa75SVy Nguyen   Attr = new perf_event_attr();
124ee7caa75SVy Nguyen   Attr->size = sizeof(*Attr);
125ee7caa75SVy Nguyen   Attr->type = PERF_TYPE_RAW;
126ee7caa75SVy Nguyen   // FIXME This is SKL's encoding. Not sure if it'll change.
127ee7caa75SVy Nguyen   Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
128ee7caa75SVy Nguyen   Attr->sample_type = PERF_SAMPLE_BRANCH_STACK;
129ee7caa75SVy Nguyen   // Don't need to specify "USER" because we've already excluded HV and Kernel.
130ee7caa75SVy Nguyen   Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
131ee7caa75SVy Nguyen   Attr->sample_period = SamplingPeriod;
132ee7caa75SVy Nguyen   Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH.
133ee7caa75SVy Nguyen   Attr->disabled = 1;
134ee7caa75SVy Nguyen   Attr->exclude_kernel = 1;
135ee7caa75SVy Nguyen   Attr->exclude_hv = 1;
136ee7caa75SVy Nguyen   Attr->read_format = PERF_FORMAT_GROUP;
137ee7caa75SVy Nguyen 
138ee7caa75SVy Nguyen   FullQualifiedEventString = EventString;
139ee7caa75SVy Nguyen }
140ee7caa75SVy Nguyen 
X86LbrCounter(pfm::PerfEvent && NewEvent)141ee7caa75SVy Nguyen X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
142f670112aSAiden Grossman     : CounterGroup(std::move(NewEvent), {}) {
143941188e9SSimon Pilgrim   MMappedBuffer = mmap(nullptr, kMappedBufferSize, PROT_READ | PROT_WRITE,
144a974303eSAiden Grossman                        MAP_SHARED, getFileDescriptor(), 0);
145ee7caa75SVy Nguyen   if (MMappedBuffer == MAP_FAILED)
146*faf675ceSAiden Grossman     errs() << "Failed to mmap buffer.";
147ee7caa75SVy Nguyen }
148ee7caa75SVy Nguyen 
~X86LbrCounter()149941188e9SSimon Pilgrim X86LbrCounter::~X86LbrCounter() {
150941188e9SSimon Pilgrim   if (0 != munmap(MMappedBuffer, kMappedBufferSize))
151*faf675ceSAiden Grossman     errs() << "Failed to munmap buffer.";
152941188e9SSimon Pilgrim }
153ee7caa75SVy Nguyen 
start()154ee7caa75SVy Nguyen void X86LbrCounter::start() {
155a974303eSAiden Grossman   ioctl(getFileDescriptor(), PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
156ee7caa75SVy Nguyen }
157ee7caa75SVy Nguyen 
checkLbrSupport()158*faf675ceSAiden Grossman Error X86LbrCounter::checkLbrSupport() {
159cb3fd715SVy Nguyen   // Do a sample read and check if the results contain non-zero values.
160cb3fd715SVy Nguyen 
161cb3fd715SVy Nguyen   X86LbrCounter counter(X86LbrPerfEvent(123));
162cb3fd715SVy Nguyen   counter.start();
163cb3fd715SVy Nguyen 
164cb3fd715SVy Nguyen   // Prevent the compiler from unrolling the loop and get rid of all the
165cb3fd715SVy Nguyen   // branches. We need at least 16 iterations.
166cb3fd715SVy Nguyen   int Sum = 0;
167cb3fd715SVy Nguyen   int V = 1;
168cb3fd715SVy Nguyen 
169cb3fd715SVy Nguyen   volatile int *P = &V;
170cb3fd715SVy Nguyen   auto TimeLimit =
171cb3fd715SVy Nguyen       std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
172cb3fd715SVy Nguyen 
173cb3fd715SVy Nguyen   for (int I = 0;
174cb3fd715SVy Nguyen        I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit;
175cb3fd715SVy Nguyen        ++I) {
176cb3fd715SVy Nguyen     Sum += *P;
177cb3fd715SVy Nguyen   }
178cb3fd715SVy Nguyen 
179cb3fd715SVy Nguyen   counter.stop();
18004f8ffd9SClement Courbet   (void)Sum;
181cb3fd715SVy Nguyen 
182cb3fd715SVy Nguyen   auto ResultOrError = counter.doReadCounter(nullptr, nullptr);
183cb3fd715SVy Nguyen   if (ResultOrError)
184cb3fd715SVy Nguyen     if (!ResultOrError.get().empty())
185cb3fd715SVy Nguyen       // If there is at least one non-zero entry, then LBR is supported.
186cb3fd715SVy Nguyen       for (const int64_t &Value : ResultOrError.get())
187cb3fd715SVy Nguyen         if (Value != 0)
188cb3fd715SVy Nguyen           return Error::success();
189cb3fd715SVy Nguyen 
190*faf675ceSAiden Grossman   return make_error<StringError>(
191cb3fd715SVy Nguyen       "LBR format with cycles is not suppported on the host.",
192*faf675ceSAiden Grossman       errc::not_supported);
193cb3fd715SVy Nguyen }
194cb3fd715SVy Nguyen 
195*faf675ceSAiden Grossman Expected<SmallVector<int64_t, 4>>
readOrError(StringRef FunctionBytes) const196ee7caa75SVy Nguyen X86LbrCounter::readOrError(StringRef FunctionBytes) const {
197ee7caa75SVy Nguyen   // Disable the event before reading
198a974303eSAiden Grossman   ioctl(getFileDescriptor(), PERF_EVENT_IOC_DISABLE, 0);
199ee7caa75SVy Nguyen 
200ee7caa75SVy Nguyen   // Find the boundary of the function so that we could filter the LBRs
201ee7caa75SVy Nguyen   // to keep only the relevant records.
202ee7caa75SVy Nguyen   if (FunctionBytes.empty())
203*faf675ceSAiden Grossman     return make_error<StringError>("Empty function bytes",
204*faf675ceSAiden Grossman                                    errc::invalid_argument);
205ee7caa75SVy Nguyen   const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
206ee7caa75SVy Nguyen   const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
207ee7caa75SVy Nguyen                                                   FunctionBytes.size());
208cb3fd715SVy Nguyen   return doReadCounter(From, To);
209cb3fd715SVy Nguyen }
210cb3fd715SVy Nguyen 
211*faf675ceSAiden Grossman Expected<SmallVector<int64_t, 4>>
doReadCounter(const void * From,const void * To) const212cb3fd715SVy Nguyen X86LbrCounter::doReadCounter(const void *From, const void *To) const {
213cb3fd715SVy Nguyen   // The max number of time-outs/retries before we give up.
214cb3fd715SVy Nguyen   static constexpr int kMaxTimeouts = 160;
215cb3fd715SVy Nguyen 
216cb3fd715SVy Nguyen   // Parses the LBR buffer and fills CycleArray with the sequence of cycle
217cb3fd715SVy Nguyen   // counts from the buffer.
218*faf675ceSAiden Grossman   SmallVector<int64_t, 4> CycleArray;
219cb3fd715SVy Nguyen   auto DataBuf = std::make_unique<char[]>(kDataBufferSize);
220cb3fd715SVy Nguyen   int NumTimeouts = 0;
221cb3fd715SVy Nguyen   int PollResult = 0;
222cb3fd715SVy Nguyen 
223ee7caa75SVy Nguyen   while (PollResult <= 0) {
224a974303eSAiden Grossman     PollResult = pollLbrPerfEvent(getFileDescriptor());
225ee7caa75SVy Nguyen     if (PollResult > 0)
226ee7caa75SVy Nguyen       break;
227ee7caa75SVy Nguyen     if (PollResult == -1)
228*faf675ceSAiden Grossman       return make_error<StringError>("Cannot poll LBR perf event.",
229*faf675ceSAiden Grossman                                      errc::io_error);
230ee7caa75SVy Nguyen     if (NumTimeouts++ >= kMaxTimeouts)
231*faf675ceSAiden Grossman       return make_error<StringError>(
232ee7caa75SVy Nguyen           "LBR polling still timed out after max number of attempts.",
233*faf675ceSAiden Grossman           errc::device_or_resource_busy);
234ee7caa75SVy Nguyen   }
235ee7caa75SVy Nguyen 
236ee7caa75SVy Nguyen   struct perf_event_mmap_page Page;
237ee7caa75SVy Nguyen   memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page));
238ee7caa75SVy Nguyen 
239ee7caa75SVy Nguyen   const uint64_t DataTail = Page.data_tail;
240ee7caa75SVy Nguyen   const uint64_t DataHead = Page.data_head;
241ee7caa75SVy Nguyen   // We're supposed to use a barrier after reading data_head.
242ee7caa75SVy Nguyen   std::atomic_thread_fence(std::memory_order_acq_rel);
243ee7caa75SVy Nguyen   const size_t DataSize = DataHead - DataTail;
244ee7caa75SVy Nguyen   if (DataSize > kDataBufferSize)
245*faf675ceSAiden Grossman     return make_error<StringError>("DataSize larger than buffer size.",
246*faf675ceSAiden Grossman                                    errc::invalid_argument);
247ee7caa75SVy Nguyen 
248ee7caa75SVy Nguyen   copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize);
249*faf675ceSAiden Grossman   Error error = parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray);
250ee7caa75SVy Nguyen   if (!error)
251ee7caa75SVy Nguyen     return CycleArray;
252ee7caa75SVy Nguyen   return std::move(error);
253ee7caa75SVy Nguyen }
254ee7caa75SVy Nguyen 
255ee7caa75SVy Nguyen } // namespace exegesis
256ee7caa75SVy Nguyen } // namespace llvm
257ee7caa75SVy Nguyen 
258a35480f8SVy Nguyen #endif // defined(__linux__) && defined(HAVE_LIBPFM) &&
259a35480f8SVy Nguyen        // defined(LIBPFM_HAS_FIELD_CYCLES)
260