1ee7caa75SVy Nguyen //===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
2ee7caa75SVy Nguyen //
3ee7caa75SVy Nguyen // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ee7caa75SVy Nguyen // See https://llvm.org/LICENSE.txt for license information.
5ee7caa75SVy Nguyen // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ee7caa75SVy Nguyen //
7ee7caa75SVy Nguyen //===----------------------------------------------------------------------===//
8ee7caa75SVy Nguyen
9ee7caa75SVy Nguyen #include "X86Counter.h"
10ee7caa75SVy Nguyen
11a35480f8SVy Nguyen #if defined(__linux__) && defined(HAVE_LIBPFM) && \
12a35480f8SVy Nguyen defined(LIBPFM_HAS_FIELD_CYCLES)
1304f8ffd9SClement Courbet
14ee7caa75SVy Nguyen // FIXME: Use appropriate wrappers for poll.h and mman.h
15ee7caa75SVy Nguyen // to support Windows and remove this linux-only guard.
16a35480f8SVy Nguyen
17ee7caa75SVy Nguyen #include "llvm/Support/Endian.h"
18ee7caa75SVy Nguyen #include "llvm/Support/Errc.h"
19ee7caa75SVy Nguyen
20a35480f8SVy Nguyen #include <perfmon/perf_event.h>
21a35480f8SVy Nguyen #include <perfmon/pfmlib.h>
22a35480f8SVy Nguyen #include <perfmon/pfmlib_perf_event.h>
23ee7caa75SVy Nguyen
24ee7caa75SVy Nguyen #include <atomic>
25cb3fd715SVy Nguyen #include <chrono>
26ee7caa75SVy Nguyen #include <cstddef>
27ee7caa75SVy Nguyen #include <cstdint>
28ee7caa75SVy Nguyen #include <limits>
29ee7caa75SVy Nguyen #include <memory>
30ee7caa75SVy Nguyen #include <vector>
31ee7caa75SVy Nguyen
32ee7caa75SVy Nguyen #include <poll.h>
33ee7caa75SVy Nguyen #include <sys/mman.h>
34ee7caa75SVy Nguyen #include <unistd.h>
35ee7caa75SVy Nguyen
36ee7caa75SVy Nguyen namespace llvm {
37ee7caa75SVy Nguyen namespace exegesis {
38ee7caa75SVy Nguyen
39cb3fd715SVy Nguyen // Number of entries in the LBR.
40cb3fd715SVy Nguyen static constexpr int kLbrEntries = 16;
41ee7caa75SVy Nguyen static constexpr size_t kBufferPages = 8;
42ee7caa75SVy Nguyen static const size_t kDataBufferSize = kBufferPages * getpagesize();
43ee7caa75SVy Nguyen
44941188e9SSimon Pilgrim // First page is reserved for perf_event_mmap_page. Data buffer starts on
45941188e9SSimon Pilgrim // the next page, so we allocate one more page.
46941188e9SSimon Pilgrim static const size_t kMappedBufferSize = (kBufferPages + 1) * getpagesize();
47941188e9SSimon Pilgrim
48ee7caa75SVy Nguyen // Waits for the LBR perf events.
pollLbrPerfEvent(const int FileDescriptor)49ee7caa75SVy Nguyen static int pollLbrPerfEvent(const int FileDescriptor) {
50ee7caa75SVy Nguyen struct pollfd PollFd;
51ee7caa75SVy Nguyen PollFd.fd = FileDescriptor;
52ee7caa75SVy Nguyen PollFd.events = POLLIN;
53ee7caa75SVy Nguyen PollFd.revents = 0;
54ee7caa75SVy Nguyen return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */);
55ee7caa75SVy Nguyen }
56ee7caa75SVy Nguyen
57ee7caa75SVy Nguyen // Copies the data-buffer into Buf, given the pointer to MMapped.
copyDataBuffer(void * MMappedBuffer,char * Buf,uint64_t Tail,size_t DataSize)58ee7caa75SVy Nguyen static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
59ee7caa75SVy Nguyen size_t DataSize) {
60ee7caa75SVy Nguyen // First page is reserved for perf_event_mmap_page. Data buffer starts on
61ee7caa75SVy Nguyen // the next page.
62ee7caa75SVy Nguyen char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize();
63ee7caa75SVy Nguyen // The LBR buffer is a cyclic buffer, we copy data to another buffer.
64ee7caa75SVy Nguyen uint64_t Offset = Tail % kDataBufferSize;
65ee7caa75SVy Nguyen size_t CopySize = kDataBufferSize - Offset;
66ee7caa75SVy Nguyen memcpy(Buf, Start + Offset, CopySize);
67ee7caa75SVy Nguyen if (CopySize >= DataSize)
68ee7caa75SVy Nguyen return;
69ee7caa75SVy Nguyen
70ee7caa75SVy Nguyen memcpy(Buf + CopySize, Start, Offset);
71ee7caa75SVy Nguyen return;
72ee7caa75SVy Nguyen }
73ee7caa75SVy Nguyen
74ee7caa75SVy Nguyen // Parses the given data-buffer for stats and fill the CycleArray.
75ee7caa75SVy Nguyen // If data has been extracted successfully, also modifies the code to jump
76ee7caa75SVy Nguyen // out the benchmark loop.
parseDataBuffer(const char * DataBuf,size_t DataSize,const void * From,const void * To,SmallVector<int64_t,4> * CycleArray)77*faf675ceSAiden Grossman static Error parseDataBuffer(const char *DataBuf, size_t DataSize,
78ee7caa75SVy Nguyen const void *From, const void *To,
79*faf675ceSAiden Grossman SmallVector<int64_t, 4> *CycleArray) {
80ee7caa75SVy Nguyen const char *DataPtr = DataBuf;
81ee7caa75SVy Nguyen while (DataPtr < DataBuf + DataSize) {
82ee7caa75SVy Nguyen struct perf_event_header Header;
83ee7caa75SVy Nguyen memcpy(&Header, DataPtr, sizeof(struct perf_event_header));
84ee7caa75SVy Nguyen if (Header.type != PERF_RECORD_SAMPLE) {
85ee7caa75SVy Nguyen // Ignores non-sample records.
86ee7caa75SVy Nguyen DataPtr += Header.size;
87ee7caa75SVy Nguyen continue;
88ee7caa75SVy Nguyen }
89ee7caa75SVy Nguyen DataPtr += sizeof(Header);
90*faf675ceSAiden Grossman uint64_t Count = support::endian::read64(DataPtr, endianness::native);
91ee7caa75SVy Nguyen DataPtr += sizeof(Count);
92ee7caa75SVy Nguyen
93ee7caa75SVy Nguyen struct perf_branch_entry Entry;
94ee7caa75SVy Nguyen memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
95ee7caa75SVy Nguyen
96ee7caa75SVy Nguyen // Read the perf_branch_entry array.
97ee7caa75SVy Nguyen for (uint64_t i = 0; i < Count; ++i) {
98ee7caa75SVy Nguyen const uint64_t BlockStart = From == nullptr
99ee7caa75SVy Nguyen ? std::numeric_limits<uint64_t>::min()
100ee7caa75SVy Nguyen : reinterpret_cast<uint64_t>(From);
101ee7caa75SVy Nguyen const uint64_t BlockEnd = To == nullptr
102ee7caa75SVy Nguyen ? std::numeric_limits<uint64_t>::max()
103ee7caa75SVy Nguyen : reinterpret_cast<uint64_t>(To);
104ee7caa75SVy Nguyen
105ee7caa75SVy Nguyen if (BlockStart <= Entry.from && BlockEnd >= Entry.to)
106ee7caa75SVy Nguyen CycleArray->push_back(Entry.cycles);
107ee7caa75SVy Nguyen
108ee7caa75SVy Nguyen if (i == Count - 1)
109ee7caa75SVy Nguyen // We've reached the last entry.
110*faf675ceSAiden Grossman return Error::success();
111ee7caa75SVy Nguyen
112ee7caa75SVy Nguyen // Advance to next entry
113ee7caa75SVy Nguyen DataPtr += sizeof(Entry);
114ee7caa75SVy Nguyen memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
115ee7caa75SVy Nguyen }
116ee7caa75SVy Nguyen }
117*faf675ceSAiden Grossman return make_error<StringError>("Unable to parse databuffer.", errc::io_error);
118ee7caa75SVy Nguyen }
119ee7caa75SVy Nguyen
X86LbrPerfEvent(unsigned SamplingPeriod)120ee7caa75SVy Nguyen X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
121ee7caa75SVy Nguyen assert(SamplingPeriod > 0 && "SamplingPeriod must be positive");
122ee7caa75SVy Nguyen EventString = "BR_INST_RETIRED.NEAR_TAKEN";
123ee7caa75SVy Nguyen Attr = new perf_event_attr();
124ee7caa75SVy Nguyen Attr->size = sizeof(*Attr);
125ee7caa75SVy Nguyen Attr->type = PERF_TYPE_RAW;
126ee7caa75SVy Nguyen // FIXME This is SKL's encoding. Not sure if it'll change.
127ee7caa75SVy Nguyen Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
128ee7caa75SVy Nguyen Attr->sample_type = PERF_SAMPLE_BRANCH_STACK;
129ee7caa75SVy Nguyen // Don't need to specify "USER" because we've already excluded HV and Kernel.
130ee7caa75SVy Nguyen Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
131ee7caa75SVy Nguyen Attr->sample_period = SamplingPeriod;
132ee7caa75SVy Nguyen Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH.
133ee7caa75SVy Nguyen Attr->disabled = 1;
134ee7caa75SVy Nguyen Attr->exclude_kernel = 1;
135ee7caa75SVy Nguyen Attr->exclude_hv = 1;
136ee7caa75SVy Nguyen Attr->read_format = PERF_FORMAT_GROUP;
137ee7caa75SVy Nguyen
138ee7caa75SVy Nguyen FullQualifiedEventString = EventString;
139ee7caa75SVy Nguyen }
140ee7caa75SVy Nguyen
X86LbrCounter(pfm::PerfEvent && NewEvent)141ee7caa75SVy Nguyen X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
142f670112aSAiden Grossman : CounterGroup(std::move(NewEvent), {}) {
143941188e9SSimon Pilgrim MMappedBuffer = mmap(nullptr, kMappedBufferSize, PROT_READ | PROT_WRITE,
144a974303eSAiden Grossman MAP_SHARED, getFileDescriptor(), 0);
145ee7caa75SVy Nguyen if (MMappedBuffer == MAP_FAILED)
146*faf675ceSAiden Grossman errs() << "Failed to mmap buffer.";
147ee7caa75SVy Nguyen }
148ee7caa75SVy Nguyen
~X86LbrCounter()149941188e9SSimon Pilgrim X86LbrCounter::~X86LbrCounter() {
150941188e9SSimon Pilgrim if (0 != munmap(MMappedBuffer, kMappedBufferSize))
151*faf675ceSAiden Grossman errs() << "Failed to munmap buffer.";
152941188e9SSimon Pilgrim }
153ee7caa75SVy Nguyen
start()154ee7caa75SVy Nguyen void X86LbrCounter::start() {
155a974303eSAiden Grossman ioctl(getFileDescriptor(), PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
156ee7caa75SVy Nguyen }
157ee7caa75SVy Nguyen
checkLbrSupport()158*faf675ceSAiden Grossman Error X86LbrCounter::checkLbrSupport() {
159cb3fd715SVy Nguyen // Do a sample read and check if the results contain non-zero values.
160cb3fd715SVy Nguyen
161cb3fd715SVy Nguyen X86LbrCounter counter(X86LbrPerfEvent(123));
162cb3fd715SVy Nguyen counter.start();
163cb3fd715SVy Nguyen
164cb3fd715SVy Nguyen // Prevent the compiler from unrolling the loop and get rid of all the
165cb3fd715SVy Nguyen // branches. We need at least 16 iterations.
166cb3fd715SVy Nguyen int Sum = 0;
167cb3fd715SVy Nguyen int V = 1;
168cb3fd715SVy Nguyen
169cb3fd715SVy Nguyen volatile int *P = &V;
170cb3fd715SVy Nguyen auto TimeLimit =
171cb3fd715SVy Nguyen std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
172cb3fd715SVy Nguyen
173cb3fd715SVy Nguyen for (int I = 0;
174cb3fd715SVy Nguyen I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit;
175cb3fd715SVy Nguyen ++I) {
176cb3fd715SVy Nguyen Sum += *P;
177cb3fd715SVy Nguyen }
178cb3fd715SVy Nguyen
179cb3fd715SVy Nguyen counter.stop();
18004f8ffd9SClement Courbet (void)Sum;
181cb3fd715SVy Nguyen
182cb3fd715SVy Nguyen auto ResultOrError = counter.doReadCounter(nullptr, nullptr);
183cb3fd715SVy Nguyen if (ResultOrError)
184cb3fd715SVy Nguyen if (!ResultOrError.get().empty())
185cb3fd715SVy Nguyen // If there is at least one non-zero entry, then LBR is supported.
186cb3fd715SVy Nguyen for (const int64_t &Value : ResultOrError.get())
187cb3fd715SVy Nguyen if (Value != 0)
188cb3fd715SVy Nguyen return Error::success();
189cb3fd715SVy Nguyen
190*faf675ceSAiden Grossman return make_error<StringError>(
191cb3fd715SVy Nguyen "LBR format with cycles is not suppported on the host.",
192*faf675ceSAiden Grossman errc::not_supported);
193cb3fd715SVy Nguyen }
194cb3fd715SVy Nguyen
195*faf675ceSAiden Grossman Expected<SmallVector<int64_t, 4>>
readOrError(StringRef FunctionBytes) const196ee7caa75SVy Nguyen X86LbrCounter::readOrError(StringRef FunctionBytes) const {
197ee7caa75SVy Nguyen // Disable the event before reading
198a974303eSAiden Grossman ioctl(getFileDescriptor(), PERF_EVENT_IOC_DISABLE, 0);
199ee7caa75SVy Nguyen
200ee7caa75SVy Nguyen // Find the boundary of the function so that we could filter the LBRs
201ee7caa75SVy Nguyen // to keep only the relevant records.
202ee7caa75SVy Nguyen if (FunctionBytes.empty())
203*faf675ceSAiden Grossman return make_error<StringError>("Empty function bytes",
204*faf675ceSAiden Grossman errc::invalid_argument);
205ee7caa75SVy Nguyen const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
206ee7caa75SVy Nguyen const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
207ee7caa75SVy Nguyen FunctionBytes.size());
208cb3fd715SVy Nguyen return doReadCounter(From, To);
209cb3fd715SVy Nguyen }
210cb3fd715SVy Nguyen
211*faf675ceSAiden Grossman Expected<SmallVector<int64_t, 4>>
doReadCounter(const void * From,const void * To) const212cb3fd715SVy Nguyen X86LbrCounter::doReadCounter(const void *From, const void *To) const {
213cb3fd715SVy Nguyen // The max number of time-outs/retries before we give up.
214cb3fd715SVy Nguyen static constexpr int kMaxTimeouts = 160;
215cb3fd715SVy Nguyen
216cb3fd715SVy Nguyen // Parses the LBR buffer and fills CycleArray with the sequence of cycle
217cb3fd715SVy Nguyen // counts from the buffer.
218*faf675ceSAiden Grossman SmallVector<int64_t, 4> CycleArray;
219cb3fd715SVy Nguyen auto DataBuf = std::make_unique<char[]>(kDataBufferSize);
220cb3fd715SVy Nguyen int NumTimeouts = 0;
221cb3fd715SVy Nguyen int PollResult = 0;
222cb3fd715SVy Nguyen
223ee7caa75SVy Nguyen while (PollResult <= 0) {
224a974303eSAiden Grossman PollResult = pollLbrPerfEvent(getFileDescriptor());
225ee7caa75SVy Nguyen if (PollResult > 0)
226ee7caa75SVy Nguyen break;
227ee7caa75SVy Nguyen if (PollResult == -1)
228*faf675ceSAiden Grossman return make_error<StringError>("Cannot poll LBR perf event.",
229*faf675ceSAiden Grossman errc::io_error);
230ee7caa75SVy Nguyen if (NumTimeouts++ >= kMaxTimeouts)
231*faf675ceSAiden Grossman return make_error<StringError>(
232ee7caa75SVy Nguyen "LBR polling still timed out after max number of attempts.",
233*faf675ceSAiden Grossman errc::device_or_resource_busy);
234ee7caa75SVy Nguyen }
235ee7caa75SVy Nguyen
236ee7caa75SVy Nguyen struct perf_event_mmap_page Page;
237ee7caa75SVy Nguyen memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page));
238ee7caa75SVy Nguyen
239ee7caa75SVy Nguyen const uint64_t DataTail = Page.data_tail;
240ee7caa75SVy Nguyen const uint64_t DataHead = Page.data_head;
241ee7caa75SVy Nguyen // We're supposed to use a barrier after reading data_head.
242ee7caa75SVy Nguyen std::atomic_thread_fence(std::memory_order_acq_rel);
243ee7caa75SVy Nguyen const size_t DataSize = DataHead - DataTail;
244ee7caa75SVy Nguyen if (DataSize > kDataBufferSize)
245*faf675ceSAiden Grossman return make_error<StringError>("DataSize larger than buffer size.",
246*faf675ceSAiden Grossman errc::invalid_argument);
247ee7caa75SVy Nguyen
248ee7caa75SVy Nguyen copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize);
249*faf675ceSAiden Grossman Error error = parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray);
250ee7caa75SVy Nguyen if (!error)
251ee7caa75SVy Nguyen return CycleArray;
252ee7caa75SVy Nguyen return std::move(error);
253ee7caa75SVy Nguyen }
254ee7caa75SVy Nguyen
255ee7caa75SVy Nguyen } // namespace exegesis
256ee7caa75SVy Nguyen } // namespace llvm
257ee7caa75SVy Nguyen
258a35480f8SVy Nguyen #endif // defined(__linux__) && defined(HAVE_LIBPFM) &&
259a35480f8SVy Nguyen // defined(LIBPFM_HAS_FIELD_CYCLES)
260