1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Defines the XRay Profile class representing the latency profile generated by 11 // XRay's profiling mode. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/XRay/Profile.h" 15 16 #include "llvm/Support/DataExtractor.h" 17 #include "llvm/Support/Error.h" 18 #include "llvm/Support/FileSystem.h" 19 #include "llvm/XRay/Trace.h" 20 #include <deque> 21 #include <memory> 22 23 namespace llvm { 24 namespace xray { 25 26 Profile::Profile(const Profile &O) { 27 // We need to re-create all the tries from the original (O), into the current 28 // Profile being initialized, through the Block instances we see. 29 for (const auto &Block : O) { 30 Blocks.push_back({Block.Thread, {}}); 31 auto &B = Blocks.back(); 32 for (const auto &PathData : Block.PathData) 33 B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))), 34 PathData.second}); 35 } 36 } 37 38 Profile &Profile::operator=(const Profile &O) { 39 Profile P = O; 40 *this = std::move(P); 41 return *this; 42 } 43 44 namespace { 45 46 struct BlockHeader { 47 uint32_t Size; 48 uint32_t Number; 49 uint64_t Thread; 50 }; 51 52 static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor, 53 uint32_t &Offset) { 54 BlockHeader H; 55 uint32_t CurrentOffset = Offset; 56 H.Size = Extractor.getU32(&Offset); 57 if (Offset == CurrentOffset) 58 return make_error<StringError>( 59 Twine("Error parsing block header size at offset '") + 60 Twine(CurrentOffset) + "'", 61 std::make_error_code(std::errc::invalid_argument)); 62 CurrentOffset = Offset; 63 H.Number = Extractor.getU32(&Offset); 64 if (Offset == CurrentOffset) 65 return make_error<StringError>( 66 Twine("Error parsing block header number at offset '") + 67 Twine(CurrentOffset) + "'", 68 std::make_error_code(std::errc::invalid_argument)); 69 CurrentOffset = Offset; 70 H.Thread = Extractor.getU64(&Offset); 71 if (Offset == CurrentOffset) 72 return make_error<StringError>( 73 Twine("Error parsing block header thread id at offset '") + 74 Twine(CurrentOffset) + "'", 75 std::make_error_code(std::errc::invalid_argument)); 76 return H; 77 } 78 79 static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor, 80 uint32_t &Offset) { 81 // We're reading a sequence of int32_t's until we find a 0. 82 std::vector<Profile::FuncID> Path; 83 auto CurrentOffset = Offset; 84 int32_t FuncId; 85 do { 86 FuncId = Extractor.getSigned(&Offset, 4); 87 if (CurrentOffset == Offset) 88 return make_error<StringError>( 89 Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'", 90 std::make_error_code(std::errc::invalid_argument)); 91 CurrentOffset = Offset; 92 Path.push_back(FuncId); 93 } while (FuncId != 0); 94 return std::move(Path); 95 } 96 97 static Expected<Profile::Data> readData(DataExtractor &Extractor, 98 uint32_t &Offset) { 99 // We expect a certain number of elements for Data: 100 // - A 64-bit CallCount 101 // - A 64-bit CumulativeLocalTime counter 102 Profile::Data D; 103 auto CurrentOffset = Offset; 104 D.CallCount = Extractor.getU64(&Offset); 105 if (CurrentOffset == Offset) 106 return make_error<StringError>( 107 Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) + 108 "'", 109 std::make_error_code(std::errc::invalid_argument)); 110 CurrentOffset = Offset; 111 D.CumulativeLocalTime = Extractor.getU64(&Offset); 112 if (CurrentOffset == Offset) 113 return make_error<StringError>( 114 Twine("Error parsing cumulative local time at offset '") + 115 Twine(CurrentOffset) + "'", 116 std::make_error_code(std::errc::invalid_argument)); 117 return D; 118 } 119 120 } // namespace 121 122 Error Profile::addBlock(Block &&B) { 123 if (B.PathData.empty()) 124 return make_error<StringError>( 125 "Block may not have empty path data.", 126 std::make_error_code(std::errc::invalid_argument)); 127 128 Blocks.emplace_back(std::move(B)); 129 return Error::success(); 130 } 131 132 Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const { 133 auto It = PathIDMap.find(P); 134 if (It == PathIDMap.end()) 135 return make_error<StringError>( 136 Twine("PathID not found: ") + Twine(P), 137 std::make_error_code(std::errc::invalid_argument)); 138 std::vector<Profile::FuncID> Path; 139 for (auto Node = It->second; Node; Node = Node->Caller) 140 Path.push_back(Node->Func); 141 return std::move(Path); 142 } 143 144 Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { 145 if (P.empty()) 146 return 0; 147 148 auto RootToLeafPath = reverse(P); 149 150 // Find the root. 151 auto It = RootToLeafPath.begin(); 152 auto PathRoot = *It++; 153 auto RootIt = 154 find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); 155 156 // If we've not seen this root before, remember it. 157 TrieNode *Node = nullptr; 158 if (RootIt == Roots.end()) { 159 NodeStorage.emplace_back(); 160 Node = &NodeStorage.back(); 161 Node->Func = PathRoot; 162 Roots.push_back(Node); 163 } else { 164 Node = *RootIt; 165 } 166 167 // Now traverse the path, re-creating if necessary. 168 while (It != RootToLeafPath.end()) { 169 auto NodeFuncID = *It++; 170 auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) { 171 return N->Func == NodeFuncID; 172 }); 173 if (CalleeIt == Node->Callees.end()) { 174 NodeStorage.emplace_back(); 175 auto NewNode = &NodeStorage.back(); 176 NewNode->Func = NodeFuncID; 177 NewNode->Caller = Node; 178 Node->Callees.push_back(NewNode); 179 Node = NewNode; 180 } else { 181 Node = *CalleeIt; 182 } 183 } 184 185 // At this point, Node *must* be pointing at the leaf. 186 assert(Node->Func == P.front()); 187 if (Node->ID == 0) { 188 Node->ID = NextID++; 189 PathIDMap.insert({Node->ID, Node}); 190 } 191 return Node->ID; 192 } 193 194 Profile mergeProfilesByThread(const Profile &L, const Profile &R) { 195 Profile Merged; 196 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; 197 using PathDataMapPtr = std::unique_ptr<PathDataMap>; 198 using PathDataVector = decltype(Profile::Block::PathData); 199 using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>; 200 ThreadProfileIndexMap ThreadProfileIndex; 201 202 for (const auto &P : {std::ref(L), std::ref(R)}) 203 for (const auto &Block : P.get()) { 204 ThreadProfileIndexMap::iterator It; 205 std::tie(It, std::ignore) = ThreadProfileIndex.insert( 206 {Block.Thread, PathDataMapPtr{new PathDataMap()}}); 207 for (const auto &PathAndData : Block.PathData) { 208 auto &PathID = PathAndData.first; 209 auto &Data = PathAndData.second; 210 auto NewPathID = 211 Merged.internPath(cantFail(P.get().expandPath(PathID))); 212 PathDataMap::iterator PathDataIt; 213 bool Inserted; 214 std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data}); 215 if (!Inserted) { 216 auto &ExistingData = PathDataIt->second; 217 ExistingData.CallCount += Data.CallCount; 218 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; 219 } 220 } 221 } 222 223 for (const auto &IndexedThreadBlock : ThreadProfileIndex) { 224 PathDataVector PathAndData; 225 PathAndData.reserve(IndexedThreadBlock.second->size()); 226 copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData)); 227 cantFail( 228 Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)})); 229 } 230 return Merged; 231 } 232 233 Profile mergeProfilesByStack(const Profile &L, const Profile &R) { 234 Profile Merged; 235 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; 236 PathDataMap PathData; 237 using PathDataVector = decltype(Profile::Block::PathData); 238 for (const auto &P : {std::ref(L), std::ref(R)}) 239 for (const auto &Block : P.get()) 240 for (const auto &PathAndData : Block.PathData) { 241 auto &PathId = PathAndData.first; 242 auto &Data = PathAndData.second; 243 auto NewPathID = 244 Merged.internPath(cantFail(P.get().expandPath(PathId))); 245 PathDataMap::iterator PathDataIt; 246 bool Inserted; 247 std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data}); 248 if (!Inserted) { 249 auto &ExistingData = PathDataIt->second; 250 ExistingData.CallCount += Data.CallCount; 251 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; 252 } 253 } 254 255 // In the end there's a single Block, for thread 0. 256 PathDataVector Block; 257 Block.reserve(PathData.size()); 258 copy(PathData, std::back_inserter(Block)); 259 cantFail(Merged.addBlock({0, std::move(Block)})); 260 return Merged; 261 } 262 263 Expected<Profile> loadProfile(StringRef Filename) { 264 int Fd; 265 if (auto EC = sys::fs::openFileForRead(Filename, Fd)) 266 return make_error<StringError>( 267 Twine("Cannot read profile from '") + Filename + "'", EC); 268 269 uint64_t FileSize; 270 if (auto EC = sys::fs::file_size(Filename, FileSize)) 271 return make_error<StringError>( 272 Twine("Cannot get filesize of '") + Filename + "'", EC); 273 274 std::error_code EC; 275 sys::fs::mapped_file_region MappedFile( 276 Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC); 277 if (EC) 278 return make_error<StringError>( 279 Twine("Cannot mmap profile '") + Filename + "'", EC); 280 StringRef Data(MappedFile.data(), MappedFile.size()); 281 282 Profile P; 283 uint32_t Offset = 0; 284 DataExtractor Extractor(Data, true, 8); 285 286 // For each block we get from the file: 287 while (Offset != MappedFile.size()) { 288 auto HeaderOrError = readBlockHeader(Extractor, Offset); 289 if (!HeaderOrError) 290 return HeaderOrError.takeError(); 291 292 // TODO: Maybe store this header information for each block, even just for 293 // debugging? 294 const auto &Header = HeaderOrError.get(); 295 296 // Read in the path data. 297 auto PathOrError = readPath(Extractor, Offset); 298 if (!PathOrError) 299 return PathOrError.takeError(); 300 const auto &Path = PathOrError.get(); 301 302 // For each path we encounter, we should intern it to get a PathID. 303 auto DataOrError = readData(Extractor, Offset); 304 if (!DataOrError) 305 return DataOrError.takeError(); 306 auto &Data = DataOrError.get(); 307 308 if (auto E = 309 P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread}, 310 {{P.internPath(Path), std::move(Data)}}})) 311 return std::move(E); 312 } 313 314 return P; 315 } 316 317 namespace { 318 319 struct StackEntry { 320 uint64_t Timestamp; 321 Profile::FuncID FuncId; 322 }; 323 324 } // namespace 325 326 Expected<Profile> profileFromTrace(const Trace &T) { 327 Profile P; 328 329 // The implementation of the algorithm re-creates the execution of 330 // the functions based on the trace data. To do this, we set up a number of 331 // data structures to track the execution context of every thread in the 332 // Trace. 333 DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks; 334 DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>> 335 ThreadPathData; 336 337 // We then do a pass through the Trace to account data on a per-thread-basis. 338 for (const auto &E : T) { 339 auto &TSD = ThreadStacks[E.TId]; 340 switch (E.Type) { 341 case RecordTypes::ENTER: 342 case RecordTypes::ENTER_ARG: 343 344 // Push entries into the function call stack. 345 TSD.push_back({E.TSC, E.FuncId}); 346 break; 347 348 case RecordTypes::EXIT: 349 case RecordTypes::TAIL_EXIT: 350 351 // Exits cause some accounting to happen, based on the state of the stack. 352 // For each function we pop off the stack, we take note of the path and 353 // record the cumulative state for this path. As we're doing this, we 354 // intern the path into the Profile. 355 while (!TSD.empty()) { 356 auto Top = TSD.back(); 357 auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC); 358 SmallVector<Profile::FuncID, 16> Path; 359 transform(reverse(TSD), std::back_inserter(Path), 360 std::mem_fn(&StackEntry::FuncId)); 361 auto InternedPath = P.internPath(Path); 362 auto &TPD = ThreadPathData[E.TId][InternedPath]; 363 ++TPD.CallCount; 364 TPD.CumulativeLocalTime += FunctionLocalTime; 365 TSD.pop_back(); 366 367 // If we've matched the corresponding entry event for this function, 368 // then we exit the loop. 369 if (Top.FuncId == E.FuncId) 370 break; 371 372 // FIXME: Consider the intermediate times and the cumulative tree time 373 // as well. 374 } 375 376 break; 377 } 378 } 379 380 // Once we've gone through the Trace, we now create one Block per thread in 381 // the Profile. 382 for (const auto &ThreadPaths : ThreadPathData) { 383 const auto &TID = ThreadPaths.first; 384 const auto &PathsData = ThreadPaths.second; 385 if (auto E = P.addBlock({ 386 TID, 387 std::vector<std::pair<Profile::PathID, Profile::Data>>( 388 PathsData.begin(), PathsData.end()), 389 })) 390 return std::move(E); 391 } 392 393 return P; 394 } 395 396 } // namespace xray 397 } // namespace llvm 398