1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Defines the XRay Profile class representing the latency profile generated by 11 // XRay's profiling mode. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/XRay/Profile.h" 15 16 #include "llvm/Support/DataExtractor.h" 17 #include "llvm/Support/Error.h" 18 #include "llvm/Support/FileSystem.h" 19 #include "llvm/XRay/Trace.h" 20 #include <deque> 21 #include <memory> 22 23 namespace llvm { 24 namespace xray { 25 26 namespace { 27 28 struct BlockHeader { 29 uint32_t Size; 30 uint32_t Number; 31 uint64_t Thread; 32 }; 33 34 static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor, 35 uint32_t &Offset) { 36 BlockHeader H; 37 uint32_t CurrentOffset = Offset; 38 H.Size = Extractor.getU32(&Offset); 39 if (Offset == CurrentOffset) 40 return make_error<StringError>( 41 Twine("Error parsing block header size at offset '") + 42 Twine(CurrentOffset) + "'", 43 std::make_error_code(std::errc::invalid_argument)); 44 CurrentOffset = Offset; 45 H.Number = Extractor.getU32(&Offset); 46 if (Offset == CurrentOffset) 47 return make_error<StringError>( 48 Twine("Error parsing block header number at offset '") + 49 Twine(CurrentOffset) + "'", 50 std::make_error_code(std::errc::invalid_argument)); 51 CurrentOffset = Offset; 52 H.Thread = Extractor.getU64(&Offset); 53 if (Offset == CurrentOffset) 54 return make_error<StringError>( 55 Twine("Error parsing block header thread id at offset '") + 56 Twine(CurrentOffset) + "'", 57 std::make_error_code(std::errc::invalid_argument)); 58 return H; 59 } 60 61 static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor, 62 uint32_t &Offset) { 63 // We're reading a sequence of int32_t's until we find a 0. 64 std::vector<Profile::FuncID> Path; 65 auto CurrentOffset = Offset; 66 int32_t FuncId; 67 do { 68 FuncId = Extractor.getSigned(&Offset, 4); 69 if (CurrentOffset == Offset) 70 return make_error<StringError>( 71 Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'", 72 std::make_error_code(std::errc::invalid_argument)); 73 CurrentOffset = Offset; 74 Path.push_back(FuncId); 75 } while (FuncId != 0); 76 return std::move(Path); 77 } 78 79 static Expected<Profile::Data> readData(DataExtractor &Extractor, 80 uint32_t &Offset) { 81 // We expect a certain number of elements for Data: 82 // - A 64-bit CallCount 83 // - A 64-bit CumulativeLocalTime counter 84 Profile::Data D; 85 auto CurrentOffset = Offset; 86 D.CallCount = Extractor.getU64(&Offset); 87 if (CurrentOffset == Offset) 88 return make_error<StringError>( 89 Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) + 90 "'", 91 std::make_error_code(std::errc::invalid_argument)); 92 CurrentOffset = Offset; 93 D.CumulativeLocalTime = Extractor.getU64(&Offset); 94 if (CurrentOffset == Offset) 95 return make_error<StringError>( 96 Twine("Error parsing cumulative local time at offset '") + 97 Twine(CurrentOffset) + "'", 98 std::make_error_code(std::errc::invalid_argument)); 99 return D; 100 } 101 102 } // namespace 103 104 Error Profile::addBlock(Block &&B) { 105 if (B.PathData.empty()) 106 return make_error<StringError>( 107 "Block may not have empty path data.", 108 std::make_error_code(std::errc::invalid_argument)); 109 110 Blocks.emplace_back(std::move(B)); 111 return Error::success(); 112 } 113 114 Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const { 115 auto It = PathIDMap.find(P); 116 if (It == PathIDMap.end()) 117 return make_error<StringError>( 118 Twine("PathID not found: ") + Twine(P), 119 std::make_error_code(std::errc::invalid_argument)); 120 std::vector<Profile::FuncID> Path; 121 for (auto Node = It->second; Node; Node = Node->Caller) 122 Path.push_back(Node->Func); 123 return std::move(Path); 124 } 125 126 Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { 127 if (P.empty()) 128 return 0; 129 130 auto RootToLeafPath = reverse(P); 131 132 // Find the root. 133 auto It = RootToLeafPath.begin(); 134 auto PathRoot = *It++; 135 auto RootIt = 136 find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); 137 138 // If we've not seen this root before, remember it. 139 TrieNode *Node = nullptr; 140 if (RootIt == Roots.end()) { 141 NodeStorage.emplace_back(); 142 Node = &NodeStorage.back(); 143 Node->Func = PathRoot; 144 Roots.push_back(Node); 145 } else { 146 Node = *RootIt; 147 } 148 149 // Now traverse the path, re-creating if necessary. 150 while (It != RootToLeafPath.end()) { 151 auto NodeFuncID = *It++; 152 auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) { 153 return N->Func == NodeFuncID; 154 }); 155 if (CalleeIt == Node->Callees.end()) { 156 NodeStorage.emplace_back(); 157 auto NewNode = &NodeStorage.back(); 158 NewNode->Func = NodeFuncID; 159 NewNode->Caller = Node; 160 Node->Callees.push_back(NewNode); 161 Node = NewNode; 162 } else { 163 Node = *CalleeIt; 164 } 165 } 166 167 // At this point, Node *must* be pointing at the leaf. 168 assert(Node->Func == P.front()); 169 if (Node->ID == 0) { 170 Node->ID = NextID++; 171 PathIDMap.insert({Node->ID, Node}); 172 } 173 return Node->ID; 174 } 175 176 Profile mergeProfilesByThread(const Profile &L, const Profile &R) { 177 Profile Merged; 178 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; 179 using PathDataMapPtr = std::unique_ptr<PathDataMap>; 180 using PathDataVector = decltype(Profile::Block::PathData); 181 using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>; 182 ThreadProfileIndexMap ThreadProfileIndex; 183 184 for (const auto &P : {std::ref(L), std::ref(R)}) 185 for (const auto &Block : P.get()) { 186 ThreadProfileIndexMap::iterator It; 187 std::tie(It, std::ignore) = ThreadProfileIndex.insert( 188 {Block.Thread, PathDataMapPtr{new PathDataMap()}}); 189 for (const auto &PathAndData : Block.PathData) { 190 auto &PathID = PathAndData.first; 191 auto &Data = PathAndData.second; 192 auto NewPathID = 193 Merged.internPath(cantFail(P.get().expandPath(PathID))); 194 PathDataMap::iterator PathDataIt; 195 bool Inserted; 196 std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data}); 197 if (!Inserted) { 198 auto &ExistingData = PathDataIt->second; 199 ExistingData.CallCount += Data.CallCount; 200 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; 201 } 202 } 203 } 204 205 for (const auto &IndexedThreadBlock : ThreadProfileIndex) { 206 PathDataVector PathAndData; 207 PathAndData.reserve(IndexedThreadBlock.second->size()); 208 copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData)); 209 cantFail( 210 Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)})); 211 } 212 return Merged; 213 } 214 215 Profile mergeProfilesByStack(const Profile &L, const Profile &R) { 216 Profile Merged; 217 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; 218 PathDataMap PathData; 219 using PathDataVector = decltype(Profile::Block::PathData); 220 for (const auto &P : {std::ref(L), std::ref(R)}) 221 for (const auto &Block : P.get()) 222 for (const auto &PathAndData : Block.PathData) { 223 auto &PathId = PathAndData.first; 224 auto &Data = PathAndData.second; 225 auto NewPathID = 226 Merged.internPath(cantFail(P.get().expandPath(PathId))); 227 PathDataMap::iterator PathDataIt; 228 bool Inserted; 229 std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data}); 230 if (!Inserted) { 231 auto &ExistingData = PathDataIt->second; 232 ExistingData.CallCount += Data.CallCount; 233 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; 234 } 235 } 236 237 // In the end there's a single Block, for thread 0. 238 PathDataVector Block; 239 Block.reserve(PathData.size()); 240 copy(PathData, std::back_inserter(Block)); 241 cantFail(Merged.addBlock({0, std::move(Block)})); 242 return Merged; 243 } 244 245 Expected<Profile> loadProfile(StringRef Filename) { 246 int Fd; 247 if (auto EC = sys::fs::openFileForRead(Filename, Fd)) 248 return make_error<StringError>( 249 Twine("Cannot read profile from '") + Filename + "'", EC); 250 251 uint64_t FileSize; 252 if (auto EC = sys::fs::file_size(Filename, FileSize)) 253 return make_error<StringError>( 254 Twine("Cannot get filesize of '") + Filename + "'", EC); 255 256 std::error_code EC; 257 sys::fs::mapped_file_region MappedFile( 258 Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC); 259 if (EC) 260 return make_error<StringError>( 261 Twine("Cannot mmap profile '") + Filename + "'", EC); 262 StringRef Data(MappedFile.data(), MappedFile.size()); 263 264 Profile P; 265 uint32_t Offset = 0; 266 DataExtractor Extractor(Data, true, 8); 267 268 // For each block we get from the file: 269 while (Offset != MappedFile.size()) { 270 auto HeaderOrError = readBlockHeader(Extractor, Offset); 271 if (!HeaderOrError) 272 return HeaderOrError.takeError(); 273 274 // TODO: Maybe store this header information for each block, even just for 275 // debugging? 276 const auto &Header = HeaderOrError.get(); 277 278 // Read in the path data. 279 auto PathOrError = readPath(Extractor, Offset); 280 if (!PathOrError) 281 return PathOrError.takeError(); 282 const auto &Path = PathOrError.get(); 283 284 // For each path we encounter, we should intern it to get a PathID. 285 auto DataOrError = readData(Extractor, Offset); 286 if (!DataOrError) 287 return DataOrError.takeError(); 288 auto &Data = DataOrError.get(); 289 290 if (auto E = 291 P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread}, 292 {{P.internPath(Path), std::move(Data)}}})) 293 return std::move(E); 294 } 295 296 return P; 297 } 298 299 namespace { 300 301 struct StackEntry { 302 uint64_t Timestamp; 303 Profile::FuncID FuncId; 304 }; 305 306 } // namespace 307 308 Expected<Profile> profileFromTrace(const Trace &T) { 309 Profile P; 310 311 // The implementation of the algorithm re-creates the execution of 312 // the functions based on the trace data. To do this, we set up a number of 313 // data structures to track the execution context of every thread in the 314 // Trace. 315 DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks; 316 DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>> 317 ThreadPathData; 318 319 // We then do a pass through the Trace to account data on a per-thread-basis. 320 for (const auto &E : T) { 321 auto &TSD = ThreadStacks[E.TId]; 322 switch (E.Type) { 323 case RecordTypes::ENTER: 324 case RecordTypes::ENTER_ARG: 325 326 // Push entries into the function call stack. 327 TSD.push_back({E.TSC, E.FuncId}); 328 break; 329 330 case RecordTypes::EXIT: 331 case RecordTypes::TAIL_EXIT: 332 333 // Exits cause some accounting to happen, based on the state of the stack. 334 // For each function we pop off the stack, we take note of the path and 335 // record the cumulative state for this path. As we're doing this, we 336 // intern the path into the Profile. 337 while (!TSD.empty()) { 338 auto Top = TSD.back(); 339 auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC); 340 SmallVector<Profile::FuncID, 16> Path; 341 transform(reverse(TSD), std::back_inserter(Path), 342 std::mem_fn(&StackEntry::FuncId)); 343 auto InternedPath = P.internPath(Path); 344 auto &TPD = ThreadPathData[E.TId][InternedPath]; 345 ++TPD.CallCount; 346 TPD.CumulativeLocalTime += FunctionLocalTime; 347 TSD.pop_back(); 348 349 // If we've matched the corresponding entry event for this function, 350 // then we exit the loop. 351 if (Top.FuncId == E.FuncId) 352 break; 353 354 // FIXME: Consider the intermediate times and the cumulative tree time 355 // as well. 356 } 357 358 break; 359 } 360 } 361 362 // Once we've gone through the Trace, we now create one Block per thread in 363 // the Profile. 364 for (const auto &ThreadPaths : ThreadPathData) { 365 const auto &TID = ThreadPaths.first; 366 const auto &PathsData = ThreadPaths.second; 367 if (auto E = P.addBlock({ 368 TID, 369 std::vector<std::pair<Profile::PathID, Profile::Data>>( 370 PathsData.begin(), PathsData.end()), 371 })) 372 return std::move(E); 373 } 374 375 return P; 376 } 377 378 } // namespace xray 379 } // namespace llvm 380