xref: /llvm-project/llvm/lib/XRay/Profile.cpp (revision 13d04e766a7bccfd4fcf6574d508f2dc24b06b6e)
1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Defines the XRay Profile class representing the latency profile generated by
11 // XRay's profiling mode.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/XRay/Profile.h"
15 
16 #include "llvm/Support/DataExtractor.h"
17 #include "llvm/Support/Error.h"
18 #include "llvm/Support/FileSystem.h"
19 #include "llvm/XRay/Trace.h"
20 #include <deque>
21 #include <memory>
22 
23 namespace llvm {
24 namespace xray {
25 
26 namespace {
27 
28 struct BlockHeader {
29   uint32_t Size;
30   uint32_t Number;
31   uint64_t Thread;
32 };
33 
34 static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
35                                              uint32_t &Offset) {
36   BlockHeader H;
37   uint32_t CurrentOffset = Offset;
38   H.Size = Extractor.getU32(&Offset);
39   if (Offset == CurrentOffset)
40     return make_error<StringError>(
41         Twine("Error parsing block header size at offset '") +
42             Twine(CurrentOffset) + "'",
43         std::make_error_code(std::errc::invalid_argument));
44   CurrentOffset = Offset;
45   H.Number = Extractor.getU32(&Offset);
46   if (Offset == CurrentOffset)
47     return make_error<StringError>(
48         Twine("Error parsing block header number at offset '") +
49             Twine(CurrentOffset) + "'",
50         std::make_error_code(std::errc::invalid_argument));
51   CurrentOffset = Offset;
52   H.Thread = Extractor.getU64(&Offset);
53   if (Offset == CurrentOffset)
54     return make_error<StringError>(
55         Twine("Error parsing block header thread id at offset '") +
56             Twine(CurrentOffset) + "'",
57         std::make_error_code(std::errc::invalid_argument));
58   return H;
59 }
60 
61 static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
62                                                        uint32_t &Offset) {
63   // We're reading a sequence of int32_t's until we find a 0.
64   std::vector<Profile::FuncID> Path;
65   auto CurrentOffset = Offset;
66   int32_t FuncId;
67   do {
68     FuncId = Extractor.getSigned(&Offset, 4);
69     if (CurrentOffset == Offset)
70       return make_error<StringError>(
71           Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
72           std::make_error_code(std::errc::invalid_argument));
73     CurrentOffset = Offset;
74     Path.push_back(FuncId);
75   } while (FuncId != 0);
76   return std::move(Path);
77 }
78 
79 static Expected<Profile::Data> readData(DataExtractor &Extractor,
80                                         uint32_t &Offset) {
81   // We expect a certain number of elements for Data:
82   //   - A 64-bit CallCount
83   //   - A 64-bit CumulativeLocalTime counter
84   Profile::Data D;
85   auto CurrentOffset = Offset;
86   D.CallCount = Extractor.getU64(&Offset);
87   if (CurrentOffset == Offset)
88     return make_error<StringError>(
89         Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
90             "'",
91         std::make_error_code(std::errc::invalid_argument));
92   CurrentOffset = Offset;
93   D.CumulativeLocalTime = Extractor.getU64(&Offset);
94   if (CurrentOffset == Offset)
95     return make_error<StringError>(
96         Twine("Error parsing cumulative local time at offset '") +
97             Twine(CurrentOffset) + "'",
98         std::make_error_code(std::errc::invalid_argument));
99   return D;
100 }
101 
102 } // namespace
103 
104 Error Profile::addBlock(Block &&B) {
105   if (B.PathData.empty())
106     return make_error<StringError>(
107         "Block may not have empty path data.",
108         std::make_error_code(std::errc::invalid_argument));
109 
110   Blocks.emplace_back(std::move(B));
111   return Error::success();
112 }
113 
114 Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
115   auto It = PathIDMap.find(P);
116   if (It == PathIDMap.end())
117     return make_error<StringError>(
118         Twine("PathID not found: ") + Twine(P),
119         std::make_error_code(std::errc::invalid_argument));
120   std::vector<Profile::FuncID> Path;
121   for (auto Node = It->second; Node; Node = Node->Caller)
122     Path.push_back(Node->Func);
123   return std::move(Path);
124 }
125 
126 Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
127   if (P.empty())
128     return 0;
129 
130   auto RootToLeafPath = reverse(P);
131 
132   // Find the root.
133   auto It = RootToLeafPath.begin();
134   auto PathRoot = *It++;
135   auto RootIt =
136       find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
137 
138   // If we've not seen this root before, remember it.
139   TrieNode *Node = nullptr;
140   if (RootIt == Roots.end()) {
141     NodeStorage.emplace_back();
142     Node = &NodeStorage.back();
143     Node->Func = PathRoot;
144     Roots.push_back(Node);
145   } else {
146     Node = *RootIt;
147   }
148 
149   // Now traverse the path, re-creating if necessary.
150   while (It != RootToLeafPath.end()) {
151     auto NodeFuncID = *It++;
152     auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
153       return N->Func == NodeFuncID;
154     });
155     if (CalleeIt == Node->Callees.end()) {
156       NodeStorage.emplace_back();
157       auto NewNode = &NodeStorage.back();
158       NewNode->Func = NodeFuncID;
159       NewNode->Caller = Node;
160       Node->Callees.push_back(NewNode);
161       Node = NewNode;
162     } else {
163       Node = *CalleeIt;
164     }
165   }
166 
167   // At this point, Node *must* be pointing at the leaf.
168   assert(Node->Func == P.front());
169   if (Node->ID == 0) {
170     Node->ID = NextID++;
171     PathIDMap.insert({Node->ID, Node});
172   }
173   return Node->ID;
174 }
175 
176 Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
177   Profile Merged;
178   using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
179   using PathDataMapPtr = std::unique_ptr<PathDataMap>;
180   using PathDataVector = decltype(Profile::Block::PathData);
181   using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
182   ThreadProfileIndexMap ThreadProfileIndex;
183 
184   for (const auto &P : {std::ref(L), std::ref(R)})
185     for (const auto &Block : P.get()) {
186       ThreadProfileIndexMap::iterator It;
187       std::tie(It, std::ignore) = ThreadProfileIndex.insert(
188           {Block.Thread, PathDataMapPtr{new PathDataMap()}});
189       for (const auto &PathAndData : Block.PathData) {
190         auto &PathID = PathAndData.first;
191         auto &Data = PathAndData.second;
192         auto NewPathID =
193             Merged.internPath(cantFail(P.get().expandPath(PathID)));
194         PathDataMap::iterator PathDataIt;
195         bool Inserted;
196         std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
197         if (!Inserted) {
198           auto &ExistingData = PathDataIt->second;
199           ExistingData.CallCount += Data.CallCount;
200           ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
201         }
202       }
203     }
204 
205   for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
206     PathDataVector PathAndData;
207     PathAndData.reserve(IndexedThreadBlock.second->size());
208     copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
209     cantFail(
210         Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
211   }
212   return Merged;
213 }
214 
215 Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
216   Profile Merged;
217   using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
218   PathDataMap PathData;
219   using PathDataVector = decltype(Profile::Block::PathData);
220   for (const auto &P : {std::ref(L), std::ref(R)})
221     for (const auto &Block : P.get())
222       for (const auto &PathAndData : Block.PathData) {
223         auto &PathId = PathAndData.first;
224         auto &Data = PathAndData.second;
225         auto NewPathID =
226             Merged.internPath(cantFail(P.get().expandPath(PathId)));
227         PathDataMap::iterator PathDataIt;
228         bool Inserted;
229         std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
230         if (!Inserted) {
231           auto &ExistingData = PathDataIt->second;
232           ExistingData.CallCount += Data.CallCount;
233           ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
234         }
235       }
236 
237   // In the end there's a single Block, for thread 0.
238   PathDataVector Block;
239   Block.reserve(PathData.size());
240   copy(PathData, std::back_inserter(Block));
241   cantFail(Merged.addBlock({0, std::move(Block)}));
242   return Merged;
243 }
244 
245 Expected<Profile> loadProfile(StringRef Filename) {
246   int Fd;
247   if (auto EC = sys::fs::openFileForRead(Filename, Fd))
248     return make_error<StringError>(
249         Twine("Cannot read profile from '") + Filename + "'", EC);
250 
251   uint64_t FileSize;
252   if (auto EC = sys::fs::file_size(Filename, FileSize))
253     return make_error<StringError>(
254         Twine("Cannot get filesize of '") + Filename + "'", EC);
255 
256   std::error_code EC;
257   sys::fs::mapped_file_region MappedFile(
258       Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
259   if (EC)
260     return make_error<StringError>(
261         Twine("Cannot mmap profile '") + Filename + "'", EC);
262   StringRef Data(MappedFile.data(), MappedFile.size());
263 
264   Profile P;
265   uint32_t Offset = 0;
266   DataExtractor Extractor(Data, true, 8);
267 
268   // For each block we get from the file:
269   while (Offset != MappedFile.size()) {
270     auto HeaderOrError = readBlockHeader(Extractor, Offset);
271     if (!HeaderOrError)
272       return HeaderOrError.takeError();
273 
274     // TODO: Maybe store this header information for each block, even just for
275     // debugging?
276     const auto &Header = HeaderOrError.get();
277 
278     // Read in the path data.
279     auto PathOrError = readPath(Extractor, Offset);
280     if (!PathOrError)
281       return PathOrError.takeError();
282     const auto &Path = PathOrError.get();
283 
284     // For each path we encounter, we should intern it to get a PathID.
285     auto DataOrError = readData(Extractor, Offset);
286     if (!DataOrError)
287       return DataOrError.takeError();
288     auto &Data = DataOrError.get();
289 
290     if (auto E =
291             P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
292                                       {{P.internPath(Path), std::move(Data)}}}))
293       return std::move(E);
294   }
295 
296   return P;
297 }
298 
299 namespace {
300 
301 struct StackEntry {
302   uint64_t Timestamp;
303   Profile::FuncID FuncId;
304 };
305 
306 } // namespace
307 
308 Expected<Profile> profileFromTrace(const Trace &T) {
309   Profile P;
310 
311   // The implementation of the algorithm re-creates the execution of
312   // the functions based on the trace data. To do this, we set up a number of
313   // data structures to track the execution context of every thread in the
314   // Trace.
315   DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
316   DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
317       ThreadPathData;
318 
319   //  We then do a pass through the Trace to account data on a per-thread-basis.
320   for (const auto &E : T) {
321     auto &TSD = ThreadStacks[E.TId];
322     switch (E.Type) {
323     case RecordTypes::ENTER:
324     case RecordTypes::ENTER_ARG:
325 
326       // Push entries into the function call stack.
327       TSD.push_back({E.TSC, E.FuncId});
328       break;
329 
330     case RecordTypes::EXIT:
331     case RecordTypes::TAIL_EXIT:
332 
333       // Exits cause some accounting to happen, based on the state of the stack.
334       // For each function we pop off the stack, we take note of the path and
335       // record the cumulative state for this path. As we're doing this, we
336       // intern the path into the Profile.
337       while (!TSD.empty()) {
338         auto Top = TSD.back();
339         auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
340         SmallVector<Profile::FuncID, 16> Path;
341         transform(reverse(TSD), std::back_inserter(Path),
342                   std::mem_fn(&StackEntry::FuncId));
343         auto InternedPath = P.internPath(Path);
344         auto &TPD = ThreadPathData[E.TId][InternedPath];
345         ++TPD.CallCount;
346         TPD.CumulativeLocalTime += FunctionLocalTime;
347         TSD.pop_back();
348 
349         // If we've matched the corresponding entry event for this function,
350         // then we exit the loop.
351         if (Top.FuncId == E.FuncId)
352           break;
353 
354         // FIXME: Consider the intermediate times and the cumulative tree time
355         // as well.
356       }
357 
358       break;
359     }
360   }
361 
362   // Once we've gone through the Trace, we now create one Block per thread in
363   // the Profile.
364   for (const auto &ThreadPaths : ThreadPathData) {
365     const auto &TID = ThreadPaths.first;
366     const auto &PathsData = ThreadPaths.second;
367     if (auto E = P.addBlock({
368             TID,
369             std::vector<std::pair<Profile::PathID, Profile::Data>>(
370                 PathsData.begin(), PathsData.end()),
371         }))
372       return std::move(E);
373   }
374 
375   return P;
376 }
377 
378 } // namespace xray
379 } // namespace llvm
380