1 //===- Trace.cpp - XRay Trace Loading implementation. ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // XRay log reader implementation. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/XRay/Trace.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/Support/DataExtractor.h" 16 #include "llvm/Support/Error.h" 17 #include "llvm/Support/FileSystem.h" 18 #include "llvm/XRay/FileHeaderReader.h" 19 #include "llvm/XRay/YAMLXRayRecord.h" 20 21 using namespace llvm; 22 using namespace llvm::xray; 23 using llvm::yaml::Input; 24 25 namespace { 26 using XRayRecordStorage = 27 std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type; 28 29 // This is the number of bytes in the "body" of a MetadataRecord in FDR Mode. 30 // This already excludes the first byte, which indicates the type of metadata 31 // record it is. 32 constexpr auto kFDRMetadataBodySize = 15; 33 34 Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, 35 XRayFileHeader &FileHeader, 36 std::vector<XRayRecord> &Records) { 37 if (Data.size() < 32) 38 return make_error<StringError>( 39 "Not enough bytes for an XRay log.", 40 std::make_error_code(std::errc::invalid_argument)); 41 42 if (Data.size() - 32 == 0 || Data.size() % 32 != 0) 43 return make_error<StringError>( 44 "Invalid-sized XRay data.", 45 std::make_error_code(std::errc::invalid_argument)); 46 47 DataExtractor Reader(Data, IsLittleEndian, 8); 48 uint32_t OffsetPtr = 0; 49 auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr); 50 if (!FileHeaderOrError) 51 return FileHeaderOrError.takeError(); 52 FileHeader = std::move(FileHeaderOrError.get()); 53 54 // Each record after the header will be 32 bytes, in the following format: 55 // 56 // (2) uint16 : record type 57 // (1) uint8 : cpu id 58 // (1) uint8 : type 59 // (4) sint32 : function id 60 // (8) uint64 : tsc 61 // (4) uint32 : thread id 62 // (4) uint32 : process id 63 // (8) - : padding 64 while (Reader.isValidOffset(OffsetPtr)) { 65 if (!Reader.isValidOffsetForDataOfSize(OffsetPtr, 32)) 66 return createStringError( 67 std::make_error_code(std::errc::executable_format_error), 68 "Not enough bytes to read a full record at offset %d.", OffsetPtr); 69 auto PreReadOffset = OffsetPtr; 70 auto RecordType = Reader.getU16(&OffsetPtr); 71 if (OffsetPtr == PreReadOffset) 72 return createStringError( 73 std::make_error_code(std::errc::executable_format_error), 74 "Failed reading record type at offset %d.", OffsetPtr); 75 76 switch (RecordType) { 77 case 0: { // Normal records. 78 Records.emplace_back(); 79 auto &Record = Records.back(); 80 Record.RecordType = RecordType; 81 82 PreReadOffset = OffsetPtr; 83 Record.CPU = Reader.getU8(&OffsetPtr); 84 if (OffsetPtr == PreReadOffset) 85 return createStringError( 86 std::make_error_code(std::errc::executable_format_error), 87 "Failed reading CPU field at offset %d.", OffsetPtr); 88 89 PreReadOffset = OffsetPtr; 90 auto Type = Reader.getU8(&OffsetPtr); 91 if (OffsetPtr == PreReadOffset) 92 return createStringError( 93 std::make_error_code(std::errc::executable_format_error), 94 "Failed reading record type field at offset %d.", OffsetPtr); 95 96 switch (Type) { 97 case 0: 98 Record.Type = RecordTypes::ENTER; 99 break; 100 case 1: 101 Record.Type = RecordTypes::EXIT; 102 break; 103 case 2: 104 Record.Type = RecordTypes::TAIL_EXIT; 105 break; 106 case 3: 107 Record.Type = RecordTypes::ENTER_ARG; 108 break; 109 default: 110 return createStringError( 111 std::make_error_code(std::errc::executable_format_error), 112 "Unknown record type '%d' at offset %d.", Type, OffsetPtr); 113 } 114 115 PreReadOffset = OffsetPtr; 116 Record.FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t)); 117 if (OffsetPtr == PreReadOffset) 118 return createStringError( 119 std::make_error_code(std::errc::executable_format_error), 120 "Failed reading function id field at offset %d.", OffsetPtr); 121 122 PreReadOffset = OffsetPtr; 123 Record.TSC = Reader.getU64(&OffsetPtr); 124 if (OffsetPtr == PreReadOffset) 125 return createStringError( 126 std::make_error_code(std::errc::executable_format_error), 127 "Failed reading TSC field at offset %d.", OffsetPtr); 128 129 PreReadOffset = OffsetPtr; 130 Record.TId = Reader.getU32(&OffsetPtr); 131 if (OffsetPtr == PreReadOffset) 132 return createStringError( 133 std::make_error_code(std::errc::executable_format_error), 134 "Failed reading thread id field at offset %d.", OffsetPtr); 135 136 PreReadOffset = OffsetPtr; 137 Record.PId = Reader.getU32(&OffsetPtr); 138 if (OffsetPtr == PreReadOffset) 139 return createStringError( 140 std::make_error_code(std::errc::executable_format_error), 141 "Failed reading process id at offset %d.", OffsetPtr); 142 143 break; 144 } 145 case 1: { // Arg payload record. 146 auto &Record = Records.back(); 147 148 // We skip the next two bytes of the record, because we don't need the 149 // type and the CPU record for arg payloads. 150 OffsetPtr += 2; 151 PreReadOffset = OffsetPtr; 152 int32_t FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t)); 153 if (OffsetPtr == PreReadOffset) 154 return createStringError( 155 std::make_error_code(std::errc::executable_format_error), 156 "Failed reading function id field at offset %d.", OffsetPtr); 157 158 PreReadOffset = OffsetPtr; 159 auto TId = Reader.getU32(&OffsetPtr); 160 if (OffsetPtr == PreReadOffset) 161 return createStringError( 162 std::make_error_code(std::errc::executable_format_error), 163 "Failed reading thread id field at offset %d.", OffsetPtr); 164 165 PreReadOffset = OffsetPtr; 166 auto PId = Reader.getU32(&OffsetPtr); 167 if (OffsetPtr == PreReadOffset) 168 return createStringError( 169 std::make_error_code(std::errc::executable_format_error), 170 "Failed reading process id field at offset %d.", OffsetPtr); 171 172 // Make a check for versions above 3 for the Pid field 173 if (Record.FuncId != FuncId || Record.TId != TId || 174 (FileHeader.Version >= 3 ? Record.PId != PId : false)) 175 return createStringError( 176 std::make_error_code(std::errc::executable_format_error), 177 "Corrupted log, found arg payload following non-matching " 178 "function+thread record. Record for function %d != %d at offset " 179 "%d", 180 Record.FuncId, FuncId, OffsetPtr); 181 182 PreReadOffset = OffsetPtr; 183 auto Arg = Reader.getU64(&OffsetPtr); 184 if (OffsetPtr == PreReadOffset) 185 return createStringError( 186 std::make_error_code(std::errc::executable_format_error), 187 "Failed reading argument payload at offset %d.", OffsetPtr); 188 189 Record.CallArgs.push_back(Arg); 190 break; 191 } 192 default: 193 return createStringError( 194 std::make_error_code(std::errc::executable_format_error), 195 "Unknown record type '%d' at offset %d.", RecordType, OffsetPtr); 196 } 197 // Advance the offset pointer enough bytes to align to 32-byte records for 198 // basic mode logs. 199 OffsetPtr += 8; 200 } 201 return Error::success(); 202 } 203 204 /// When reading from a Flight Data Recorder mode log, metadata records are 205 /// sparse compared to packed function records, so we must maintain state as we 206 /// read through the sequence of entries. This allows the reader to denormalize 207 /// the CPUId and Thread Id onto each Function Record and transform delta 208 /// encoded TSC values into absolute encodings on each record. 209 struct FDRState { 210 uint16_t CPUId; 211 int32_t ThreadId; 212 int32_t ProcessId; 213 uint64_t BaseTSC; 214 215 /// Encode some of the state transitions for the FDR log reader as explicit 216 /// checks. These are expectations for the next Record in the stream. 217 enum class Token { 218 NEW_BUFFER_RECORD_OR_EOF, 219 WALLCLOCK_RECORD, 220 NEW_CPU_ID_RECORD, 221 FUNCTION_SEQUENCE, 222 SCAN_TO_END_OF_THREAD_BUF, 223 CUSTOM_EVENT_DATA, 224 CALL_ARGUMENT, 225 BUFFER_EXTENTS, 226 PID_RECORD, 227 }; 228 Token Expects; 229 230 // Each threads buffer may have trailing garbage to scan over, so we track our 231 // progress. 232 uint64_t CurrentBufferSize; 233 uint64_t CurrentBufferConsumed; 234 }; 235 236 const char *fdrStateToTwine(const FDRState::Token &state) { 237 switch (state) { 238 case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF: 239 return "NEW_BUFFER_RECORD_OR_EOF"; 240 case FDRState::Token::WALLCLOCK_RECORD: 241 return "WALLCLOCK_RECORD"; 242 case FDRState::Token::NEW_CPU_ID_RECORD: 243 return "NEW_CPU_ID_RECORD"; 244 case FDRState::Token::FUNCTION_SEQUENCE: 245 return "FUNCTION_SEQUENCE"; 246 case FDRState::Token::SCAN_TO_END_OF_THREAD_BUF: 247 return "SCAN_TO_END_OF_THREAD_BUF"; 248 case FDRState::Token::CUSTOM_EVENT_DATA: 249 return "CUSTOM_EVENT_DATA"; 250 case FDRState::Token::CALL_ARGUMENT: 251 return "CALL_ARGUMENT"; 252 case FDRState::Token::BUFFER_EXTENTS: 253 return "BUFFER_EXTENTS"; 254 case FDRState::Token::PID_RECORD: 255 return "PID_RECORD"; 256 } 257 return "UNKNOWN"; 258 } 259 260 /// State transition when a NewBufferRecord is encountered. 261 Error processFDRNewBufferRecord(FDRState &State, DataExtractor &RecordExtractor, 262 uint32_t &OffsetPtr) { 263 if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF) 264 return createStringError( 265 std::make_error_code(std::errc::executable_format_error), 266 "Malformed log: Read New Buffer record kind out of sequence; expected: " 267 "%s at offset %d.", 268 fdrStateToTwine(State.Expects), OffsetPtr); 269 270 auto PreReadOffset = OffsetPtr; 271 State.ThreadId = RecordExtractor.getSigned(&OffsetPtr, 4); 272 if (OffsetPtr == PreReadOffset) 273 return createStringError( 274 std::make_error_code(std::errc::executable_format_error), 275 "Failed reading the thread id at offset %d.", OffsetPtr); 276 State.Expects = FDRState::Token::WALLCLOCK_RECORD; 277 278 // Advance the offset pointer by enough bytes representing the remaining 279 // padding in a metadata record. 280 OffsetPtr += kFDRMetadataBodySize - 4; 281 assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize); 282 return Error::success(); 283 } 284 285 /// State transition when an EndOfBufferRecord is encountered. 286 Error processFDREndOfBufferRecord(FDRState &State, uint32_t &OffsetPtr) { 287 if (State.Expects == FDRState::Token::NEW_BUFFER_RECORD_OR_EOF) 288 return createStringError( 289 std::make_error_code(std::errc::executable_format_error), 290 "Malformed log: Received EOB message without current buffer; expected: " 291 "%s at offset %d.", 292 fdrStateToTwine(State.Expects), OffsetPtr); 293 294 State.Expects = FDRState::Token::SCAN_TO_END_OF_THREAD_BUF; 295 296 // Advance the offset pointer by enough bytes representing the remaining 297 // padding in a metadata record. 298 OffsetPtr += kFDRMetadataBodySize; 299 return Error::success(); 300 } 301 302 /// State transition when a NewCPUIdRecord is encountered. 303 Error processFDRNewCPUIdRecord(FDRState &State, DataExtractor &RecordExtractor, 304 uint32_t &OffsetPtr) { 305 if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE && 306 State.Expects != FDRState::Token::NEW_CPU_ID_RECORD) 307 return make_error<StringError>( 308 Twine("Malformed log. Read NewCPUId record kind out of sequence; " 309 "expected: ") + 310 fdrStateToTwine(State.Expects), 311 std::make_error_code(std::errc::executable_format_error)); 312 auto BeginOffset = OffsetPtr; 313 auto PreReadOffset = OffsetPtr; 314 State.CPUId = RecordExtractor.getU16(&OffsetPtr); 315 if (OffsetPtr == PreReadOffset) 316 return createStringError( 317 std::make_error_code(std::errc::executable_format_error), 318 "Failed reading the CPU field at offset %d.", OffsetPtr); 319 320 PreReadOffset = OffsetPtr; 321 State.BaseTSC = RecordExtractor.getU64(&OffsetPtr); 322 if (OffsetPtr == PreReadOffset) 323 return createStringError( 324 std::make_error_code(std::errc::executable_format_error), 325 "Failed reading the base TSC field at offset %d.", OffsetPtr); 326 327 State.Expects = FDRState::Token::FUNCTION_SEQUENCE; 328 329 // Advance the offset pointer by a few bytes, to account for the padding in 330 // CPU ID metadata records that we've already advanced through. 331 OffsetPtr += kFDRMetadataBodySize - (OffsetPtr - BeginOffset); 332 assert(OffsetPtr - BeginOffset == kFDRMetadataBodySize); 333 return Error::success(); 334 } 335 336 /// State transition when a TSCWrapRecord (overflow detection) is encountered. 337 Error processFDRTSCWrapRecord(FDRState &State, DataExtractor &RecordExtractor, 338 uint32_t &OffsetPtr) { 339 if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE) 340 return make_error<StringError>( 341 Twine("Malformed log. Read TSCWrap record kind out of sequence; " 342 "expecting: ") + 343 fdrStateToTwine(State.Expects), 344 std::make_error_code(std::errc::executable_format_error)); 345 auto PreReadOffset = OffsetPtr; 346 State.BaseTSC = RecordExtractor.getU64(&OffsetPtr); 347 if (OffsetPtr == PreReadOffset) 348 return createStringError( 349 std::make_error_code(std::errc::executable_format_error), 350 "Failed reading the base TSC field at offset %d.", OffsetPtr); 351 352 // Advance the offset pointer by a few more bytes, accounting for the padding 353 // in the metadata record after reading the base TSC. 354 OffsetPtr += kFDRMetadataBodySize - 8; 355 assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize); 356 return Error::success(); 357 } 358 359 /// State transition when a WallTimeMarkerRecord is encountered. 360 Error processFDRWallTimeRecord(FDRState &State, DataExtractor &RecordExtractor, 361 uint32_t &OffsetPtr) { 362 if (State.Expects != FDRState::Token::WALLCLOCK_RECORD) 363 return make_error<StringError>( 364 Twine("Malformed log. Read Wallclock record kind out of sequence; " 365 "expecting: ") + 366 fdrStateToTwine(State.Expects), 367 std::make_error_code(std::errc::executable_format_error)); 368 369 // Read in the data from the walltime record. 370 auto PreReadOffset = OffsetPtr; 371 auto WallTime = RecordExtractor.getU64(&OffsetPtr); 372 if (OffsetPtr == PreReadOffset) 373 return createStringError( 374 std::make_error_code(std::errc::executable_format_error), 375 "Failed reading the walltime record at offset %d.", OffsetPtr); 376 377 // TODO: Someday, reconcile the TSC ticks to wall clock time for presentation 378 // purposes. For now, we're ignoring these records. 379 (void)WallTime; 380 State.Expects = FDRState::Token::NEW_CPU_ID_RECORD; 381 382 // Advance the offset pointer by a few more bytes, accounting for the padding 383 // in the metadata record after reading in the walltime data. 384 OffsetPtr += kFDRMetadataBodySize - 8; 385 assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize); 386 return Error::success(); 387 } 388 389 /// State transition when a PidRecord is encountered. 390 Error processFDRPidRecord(FDRState &State, DataExtractor &RecordExtractor, 391 uint32_t &OffsetPtr) { 392 if (State.Expects != FDRState::Token::PID_RECORD) 393 return make_error<StringError>( 394 Twine("Malformed log. Read Pid record kind out of sequence; " 395 "expected: ") + 396 fdrStateToTwine(State.Expects), 397 std::make_error_code(std::errc::executable_format_error)); 398 auto PreReadOffset = OffsetPtr; 399 State.ProcessId = RecordExtractor.getSigned(&OffsetPtr, 4); 400 if (OffsetPtr == PreReadOffset) 401 return createStringError( 402 std::make_error_code(std::errc::executable_format_error), 403 "Failed reading the process ID at offset %d.", OffsetPtr); 404 State.Expects = FDRState::Token::NEW_CPU_ID_RECORD; 405 406 // Advance the offset pointer by a few more bytes, accounting for the padding 407 // in the metadata record after reading in the PID. 408 OffsetPtr += kFDRMetadataBodySize - 4; 409 assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize); 410 return Error::success(); 411 } 412 413 /// State transition when a CustomEventMarker is encountered. 414 Error processCustomEventMarker(FDRState &State, DataExtractor &RecordExtractor, 415 uint32_t &OffsetPtr) { 416 // We can encounter a CustomEventMarker anywhere in the log, so we can handle 417 // it regardless of the expectation. However, we do set the expectation to 418 // read a set number of fixed bytes, as described in the metadata. 419 auto BeginOffset = OffsetPtr; 420 auto PreReadOffset = OffsetPtr; 421 uint32_t DataSize = RecordExtractor.getU32(&OffsetPtr); 422 if (OffsetPtr == PreReadOffset) 423 return createStringError( 424 std::make_error_code(std::errc::executable_format_error), 425 "Failed reading a custom event marker at offset %d.", OffsetPtr); 426 427 PreReadOffset = OffsetPtr; 428 uint64_t TSC = RecordExtractor.getU64(&OffsetPtr); 429 if (OffsetPtr == PreReadOffset) 430 return createStringError( 431 std::make_error_code(std::errc::executable_format_error), 432 "Failed reading the TSC at offset %d.", OffsetPtr); 433 434 // FIXME: Actually represent the record through the API. For now we only 435 // skip through the data. 436 (void)TSC; 437 // Advance the offset ptr by the size of the data associated with the custom 438 // event, as well as the padding associated with the remainder of the metadata 439 // record. 440 OffsetPtr += (kFDRMetadataBodySize - (OffsetPtr - BeginOffset)) + DataSize; 441 if (!RecordExtractor.isValidOffset(OffsetPtr)) 442 return createStringError( 443 std::make_error_code(std::errc::executable_format_error), 444 "Reading custom event data moves past addressable trace data (starting " 445 "at offset %d, advancing to offset %d).", 446 BeginOffset, OffsetPtr); 447 return Error::success(); 448 } 449 450 /// State transition when an BufferExtents record is encountered. 451 Error processBufferExtents(FDRState &State, DataExtractor &RecordExtractor, 452 uint32_t &OffsetPtr) { 453 if (State.Expects != FDRState::Token::BUFFER_EXTENTS) 454 return make_error<StringError>( 455 Twine("Malformed log. Buffer Extents unexpected; expected: ") + 456 fdrStateToTwine(State.Expects), 457 std::make_error_code(std::errc::executable_format_error)); 458 459 auto PreReadOffset = OffsetPtr; 460 State.CurrentBufferSize = RecordExtractor.getU64(&OffsetPtr); 461 if (OffsetPtr == PreReadOffset) 462 return createStringError( 463 std::make_error_code(std::errc::executable_format_error), 464 "Failed to read current buffer size at offset %d.", OffsetPtr); 465 466 State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF; 467 468 // Advance the offset pointer by enough bytes accounting for the padding in a 469 // metadata record, after we read in the buffer extents. 470 OffsetPtr += kFDRMetadataBodySize - 8; 471 return Error::success(); 472 } 473 474 /// State transition when a CallArgumentRecord is encountered. 475 Error processFDRCallArgumentRecord(FDRState &State, 476 DataExtractor &RecordExtractor, 477 std::vector<XRayRecord> &Records, 478 uint32_t &OffsetPtr) { 479 auto &Enter = Records.back(); 480 if (Enter.Type != RecordTypes::ENTER && Enter.Type != RecordTypes::ENTER_ARG) 481 return make_error<StringError>( 482 "CallArgument needs to be right after a function entry", 483 std::make_error_code(std::errc::executable_format_error)); 484 485 auto PreReadOffset = OffsetPtr; 486 auto Arg = RecordExtractor.getU64(&OffsetPtr); 487 if (OffsetPtr == PreReadOffset) 488 return createStringError( 489 std::make_error_code(std::errc::executable_format_error), 490 "Failed to read argument record at offset %d.", OffsetPtr); 491 492 Enter.Type = RecordTypes::ENTER_ARG; 493 Enter.CallArgs.emplace_back(Arg); 494 495 // Advance the offset pointer by enough bytes accounting for the padding in a 496 // metadata record, after reading the payload. 497 OffsetPtr += kFDRMetadataBodySize - 8; 498 return Error::success(); 499 } 500 501 /// Advances the state machine for reading the FDR record type by reading one 502 /// Metadata Record and updating the State appropriately based on the kind of 503 /// record encountered. The RecordKind is encoded in the first byte of the 504 /// Record, which the caller should pass in because they have already read it 505 /// to determine that this is a metadata record as opposed to a function record. 506 /// 507 /// Beginning with Version 2 of the FDR log, we do not depend on the size of the 508 /// buffer, but rather use the extents to determine how far to read in the log 509 /// for this particular buffer. 510 /// 511 /// In Version 3, FDR log now includes a pid metadata record after 512 /// WallTimeMarker 513 Error processFDRMetadataRecord(FDRState &State, DataExtractor &RecordExtractor, 514 uint32_t &OffsetPtr, 515 std::vector<XRayRecord> &Records, 516 uint16_t Version, uint8_t FirstByte) { 517 // The remaining 7 bits of the first byte are the RecordKind enum for each 518 // Metadata Record. 519 switch (FirstByte >> 1) { 520 case 0: // NewBuffer 521 if (auto E = processFDRNewBufferRecord(State, RecordExtractor, OffsetPtr)) 522 return E; 523 break; 524 case 1: // EndOfBuffer 525 if (Version >= 2) 526 return make_error<StringError>( 527 "Since Version 2 of FDR logging, we no longer support EOB records.", 528 std::make_error_code(std::errc::executable_format_error)); 529 if (auto E = processFDREndOfBufferRecord(State, OffsetPtr)) 530 return E; 531 break; 532 case 2: // NewCPUId 533 if (auto E = processFDRNewCPUIdRecord(State, RecordExtractor, OffsetPtr)) 534 return E; 535 break; 536 case 3: // TSCWrap 537 if (auto E = processFDRTSCWrapRecord(State, RecordExtractor, OffsetPtr)) 538 return E; 539 break; 540 case 4: // WallTimeMarker 541 if (auto E = processFDRWallTimeRecord(State, RecordExtractor, OffsetPtr)) 542 return E; 543 // In Version 3 and and above, a PidRecord is expected after WallTimeRecord 544 if (Version >= 3) 545 State.Expects = FDRState::Token::PID_RECORD; 546 break; 547 case 5: // CustomEventMarker 548 if (auto E = processCustomEventMarker(State, RecordExtractor, OffsetPtr)) 549 return E; 550 break; 551 case 6: // CallArgument 552 if (auto E = processFDRCallArgumentRecord(State, RecordExtractor, Records, 553 OffsetPtr)) 554 return E; 555 break; 556 case 7: // BufferExtents 557 if (auto E = processBufferExtents(State, RecordExtractor, OffsetPtr)) 558 return E; 559 break; 560 case 9: // Pid 561 if (auto E = processFDRPidRecord(State, RecordExtractor, OffsetPtr)) 562 return E; 563 break; 564 default: 565 return createStringError( 566 std::make_error_code(std::errc::executable_format_error), 567 "Illegal metadata record type: '%d' at offset %d.", FirstByte >> 1, 568 OffsetPtr); 569 } 570 return Error::success(); 571 } 572 573 /// Reads a function record from an FDR format log, appending a new XRayRecord 574 /// to the vector being populated and updating the State with a new value 575 /// reference value to interpret TSC deltas. 576 /// 577 /// The XRayRecord constructed includes information from the function record 578 /// processed here as well as Thread ID and CPU ID formerly extracted into 579 /// State. 580 Error processFDRFunctionRecord(FDRState &State, DataExtractor &RecordExtractor, 581 uint32_t &OffsetPtr, uint8_t FirstByte, 582 std::vector<XRayRecord> &Records) { 583 switch (State.Expects) { 584 case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF: 585 return make_error<StringError>( 586 "Malformed log. Received Function Record before new buffer setup.", 587 std::make_error_code(std::errc::executable_format_error)); 588 case FDRState::Token::WALLCLOCK_RECORD: 589 return make_error<StringError>( 590 "Malformed log. Received Function Record when expecting wallclock.", 591 std::make_error_code(std::errc::executable_format_error)); 592 case FDRState::Token::PID_RECORD: 593 return make_error<StringError>( 594 "Malformed log. Received Function Record when expecting pid.", 595 std::make_error_code(std::errc::executable_format_error)); 596 case FDRState::Token::NEW_CPU_ID_RECORD: 597 return make_error<StringError>( 598 "Malformed log. Received Function Record before first CPU record.", 599 std::make_error_code(std::errc::executable_format_error)); 600 default: 601 Records.emplace_back(); 602 auto &Record = Records.back(); 603 Record.RecordType = 0; // Record is type NORMAL. 604 // Back up one byte to re-read the first byte, which is important for 605 // computing the function id for a record. 606 --OffsetPtr; 607 608 auto PreReadOffset = OffsetPtr; 609 uint32_t FuncIdBitField = RecordExtractor.getU32(&OffsetPtr); 610 if (OffsetPtr == PreReadOffset) 611 return createStringError( 612 std::make_error_code(std::errc::executable_format_error), 613 "Failed reading truncated function id field at offset %d.", 614 OffsetPtr); 615 616 FirstByte = FuncIdBitField & 0xffu; 617 // Strip off record type bit and use the next three bits. 618 auto T = (FirstByte >> 1) & 0x07; 619 switch (T) { 620 case static_cast<decltype(T)>(RecordTypes::ENTER): 621 Record.Type = RecordTypes::ENTER; 622 break; 623 case static_cast<decltype(T)>(RecordTypes::EXIT): 624 Record.Type = RecordTypes::EXIT; 625 break; 626 case static_cast<decltype(T)>(RecordTypes::TAIL_EXIT): 627 Record.Type = RecordTypes::TAIL_EXIT; 628 break; 629 case static_cast<decltype(T)>(RecordTypes::ENTER_ARG): 630 Record.Type = RecordTypes::ENTER_ARG; 631 State.Expects = FDRState::Token::CALL_ARGUMENT; 632 break; 633 default: 634 return createStringError( 635 std::make_error_code(std::errc::executable_format_error), 636 "Illegal function record type '%d' at offset %d.", T, OffsetPtr); 637 } 638 Record.CPU = State.CPUId; 639 Record.TId = State.ThreadId; 640 Record.PId = State.ProcessId; 641 642 // Despite function Id being a signed int on XRayRecord, 643 // when it is written to an FDR format, the top bits are truncated, 644 // so it is effectively an unsigned value. When we shift off the 645 // top four bits, we want the shift to be logical, so we read as 646 // uint32_t. 647 Record.FuncId = FuncIdBitField >> 4; 648 649 // FunctionRecords have a 32 bit delta from the previous absolute TSC 650 // or TSC delta. If this would overflow, we should read a TSCWrap record 651 // with an absolute TSC reading. 652 PreReadOffset = OffsetPtr; 653 uint64_t NewTSC = State.BaseTSC + RecordExtractor.getU32(&OffsetPtr); 654 if (OffsetPtr == PreReadOffset) 655 return createStringError( 656 std::make_error_code(std::errc::executable_format_error), 657 "Failed reading TSC delta at offset %d.", OffsetPtr); 658 659 State.BaseTSC = NewTSC; 660 Record.TSC = NewTSC; 661 } 662 return Error::success(); 663 } 664 665 /// Reads a log in FDR mode for version 1 of this binary format. FDR mode is 666 /// defined as part of the compiler-rt project in xray_fdr_logging.h, and such 667 /// a log consists of the familiar 32 bit XRayHeader, followed by sequences of 668 /// of interspersed 16 byte Metadata Records and 8 byte Function Records. 669 /// 670 /// The following is an attempt to document the grammar of the format, which is 671 /// parsed by this function for little-endian machines. Since the format makes 672 /// use of BitFields, when we support big-endian architectures, we will need to 673 /// adjust not only the endianness parameter to llvm's RecordExtractor, but also 674 /// the bit twiddling logic, which is consistent with the little-endian 675 /// convention that BitFields within a struct will first be packed into the 676 /// least significant bits the address they belong to. 677 /// 678 /// We expect a format complying with the grammar in the following pseudo-EBNF 679 /// in Version 1 of the FDR log. 680 /// 681 /// FDRLog: XRayFileHeader ThreadBuffer* 682 /// XRayFileHeader: 32 bytes to identify the log as FDR with machine metadata. 683 /// Includes BufferSize 684 /// ThreadBuffer: NewBuffer WallClockTime NewCPUId FunctionSequence EOB 685 /// BufSize: 8 byte unsigned integer indicating how large the buffer is. 686 /// NewBuffer: 16 byte metadata record with Thread Id. 687 /// WallClockTime: 16 byte metadata record with human readable time. 688 /// Pid: 16 byte metadata record with Pid 689 /// NewCPUId: 16 byte metadata record with CPUId and a 64 bit TSC reading. 690 /// EOB: 16 byte record in a thread buffer plus mem garbage to fill BufSize. 691 /// FunctionSequence: NewCPUId | TSCWrap | FunctionRecord 692 /// TSCWrap: 16 byte metadata record with a full 64 bit TSC reading. 693 /// FunctionRecord: 8 byte record with FunctionId, entry/exit, and TSC delta. 694 /// 695 /// In Version 2, we make the following changes: 696 /// 697 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime NewCPUId 698 /// FunctionSequence 699 /// BufferExtents: 16 byte metdata record describing how many usable bytes are 700 /// in the buffer. This is measured from the start of the buffer 701 /// and must always be at least 48 (bytes). 702 /// 703 /// In Version 3, we make the following changes: 704 /// 705 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime Pid NewCPUId 706 /// FunctionSequence 707 /// EOB: *deprecated* 708 Error loadFDRLog(StringRef Data, bool IsLittleEndian, 709 XRayFileHeader &FileHeader, std::vector<XRayRecord> &Records) { 710 711 if (Data.size() < 32) 712 return make_error<StringError>( 713 "Not enough bytes for an XRay log.", 714 std::make_error_code(std::errc::invalid_argument)); 715 716 DataExtractor Reader(Data, IsLittleEndian, 8); 717 uint32_t OffsetPtr = 0; 718 auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr); 719 if (!FileHeaderOrError) 720 return FileHeaderOrError.takeError(); 721 FileHeader = std::move(FileHeaderOrError.get()); 722 723 uint64_t BufferSize = 0; 724 { 725 StringRef ExtraDataRef(FileHeader.FreeFormData, 16); 726 DataExtractor ExtraDataExtractor(ExtraDataRef, IsLittleEndian, 8); 727 uint32_t ExtraDataOffset = 0; 728 BufferSize = ExtraDataExtractor.getU64(&ExtraDataOffset); 729 } 730 731 FDRState::Token InitialExpectation; 732 switch (FileHeader.Version) { 733 case 1: 734 InitialExpectation = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF; 735 break; 736 case 2: 737 case 3: 738 InitialExpectation = FDRState::Token::BUFFER_EXTENTS; 739 break; 740 default: 741 return make_error<StringError>( 742 Twine("Unsupported version '") + Twine(FileHeader.Version) + "'", 743 std::make_error_code(std::errc::executable_format_error)); 744 } 745 FDRState State{0, 0, 0, 0, InitialExpectation, BufferSize, 0}; 746 747 // RecordSize will tell the loop how far to seek ahead based on the record 748 // type that we have just read. 749 while (Reader.isValidOffset(OffsetPtr)) { 750 auto BeginOffset = OffsetPtr; 751 if (State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF) { 752 OffsetPtr += State.CurrentBufferSize - State.CurrentBufferConsumed; 753 State.CurrentBufferConsumed = 0; 754 State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF; 755 continue; 756 } 757 auto PreReadOffset = OffsetPtr; 758 uint8_t BitField = Reader.getU8(&OffsetPtr); 759 if (OffsetPtr == PreReadOffset) 760 return createStringError( 761 std::make_error_code(std::errc::executable_format_error), 762 "Failed reading first byte of record at offset %d.", OffsetPtr); 763 bool isMetadataRecord = BitField & 0x01uL; 764 bool isBufferExtents = 765 (BitField >> 1) == 7; // BufferExtents record kind == 7 766 if (isMetadataRecord) { 767 if (auto E = processFDRMetadataRecord(State, Reader, OffsetPtr, Records, 768 FileHeader.Version, BitField)) 769 return E; 770 } else { // Process Function Record 771 if (auto E = processFDRFunctionRecord(State, Reader, OffsetPtr, BitField, 772 Records)) 773 return E; 774 } 775 776 // The BufferExtents record is technically not part of the buffer, so we 777 // don't count the size of that record against the buffer's actual size. 778 if (!isBufferExtents) 779 State.CurrentBufferConsumed += OffsetPtr - BeginOffset; 780 781 assert(State.CurrentBufferConsumed <= State.CurrentBufferSize); 782 783 if ((FileHeader.Version == 2 || FileHeader.Version == 3) && 784 State.CurrentBufferSize == State.CurrentBufferConsumed) { 785 // In Version 2 of the log, we don't need to scan to the end of the thread 786 // buffer if we've already consumed all the bytes we need to. 787 State.Expects = FDRState::Token::BUFFER_EXTENTS; 788 State.CurrentBufferSize = BufferSize; 789 State.CurrentBufferConsumed = 0; 790 } 791 } 792 793 // Having iterated over everything we've been given, we've either consumed 794 // everything and ended up in the end state, or were told to skip the rest. 795 bool Finished = State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF && 796 State.CurrentBufferSize == State.CurrentBufferConsumed; 797 if ((State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF && 798 State.Expects != FDRState::Token::BUFFER_EXTENTS) && 799 !Finished) 800 return make_error<StringError>( 801 Twine("Encountered EOF with unexpected state expectation ") + 802 fdrStateToTwine(State.Expects) + 803 ". Remaining expected bytes in thread buffer total " + 804 Twine(State.CurrentBufferSize - State.CurrentBufferConsumed), 805 std::make_error_code(std::errc::executable_format_error)); 806 807 return Error::success(); 808 } 809 810 Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, 811 std::vector<XRayRecord> &Records) { 812 YAMLXRayTrace Trace; 813 Input In(Data); 814 In >> Trace; 815 if (In.error()) 816 return make_error<StringError>("Failed loading YAML Data.", In.error()); 817 818 FileHeader.Version = Trace.Header.Version; 819 FileHeader.Type = Trace.Header.Type; 820 FileHeader.ConstantTSC = Trace.Header.ConstantTSC; 821 FileHeader.NonstopTSC = Trace.Header.NonstopTSC; 822 FileHeader.CycleFrequency = Trace.Header.CycleFrequency; 823 824 if (FileHeader.Version != 1) 825 return make_error<StringError>( 826 Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version), 827 std::make_error_code(std::errc::invalid_argument)); 828 829 Records.clear(); 830 std::transform(Trace.Records.begin(), Trace.Records.end(), 831 std::back_inserter(Records), [&](const YAMLXRayRecord &R) { 832 return XRayRecord{R.RecordType, R.CPU, R.Type, R.FuncId, 833 R.TSC, R.TId, R.PId, R.CallArgs}; 834 }); 835 return Error::success(); 836 } 837 } // namespace 838 839 Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) { 840 int Fd; 841 if (auto EC = sys::fs::openFileForRead(Filename, Fd)) { 842 return make_error<StringError>( 843 Twine("Cannot read log from '") + Filename + "'", EC); 844 } 845 846 uint64_t FileSize; 847 if (auto EC = sys::fs::file_size(Filename, FileSize)) { 848 return make_error<StringError>( 849 Twine("Cannot read log from '") + Filename + "'", EC); 850 } 851 if (FileSize < 4) { 852 return make_error<StringError>( 853 Twine("File '") + Filename + "' too small for XRay.", 854 std::make_error_code(std::errc::executable_format_error)); 855 } 856 857 // Map the opened file into memory and use a StringRef to access it later. 858 std::error_code EC; 859 sys::fs::mapped_file_region MappedFile( 860 Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC); 861 if (EC) { 862 return make_error<StringError>( 863 Twine("Cannot read log from '") + Filename + "'", EC); 864 } 865 auto Data = StringRef(MappedFile.data(), MappedFile.size()); 866 867 // TODO: Lift the endianness and implementation selection here. 868 DataExtractor LittleEndianDE(Data, true, 8); 869 auto TraceOrError = loadTrace(LittleEndianDE, Sort); 870 if (!TraceOrError) { 871 DataExtractor BigEndianDE(Data, false, 8); 872 TraceOrError = loadTrace(BigEndianDE, Sort); 873 } 874 return TraceOrError; 875 } 876 877 Expected<Trace> llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) { 878 // Attempt to detect the file type using file magic. We have a slight bias 879 // towards the binary format, and we do this by making sure that the first 4 880 // bytes of the binary file is some combination of the following byte 881 // patterns: (observe the code loading them assumes they're little endian) 882 // 883 // 0x01 0x00 0x00 0x00 - version 1, "naive" format 884 // 0x01 0x00 0x01 0x00 - version 1, "flight data recorder" format 885 // 0x02 0x00 0x01 0x00 - version 2, "flight data recorder" format 886 // 887 // YAML files don't typically have those first four bytes as valid text so we 888 // try loading assuming YAML if we don't find these bytes. 889 // 890 // Only if we can't load either the binary or the YAML format will we yield an 891 // error. 892 DataExtractor HeaderExtractor(DE.getData(), DE.isLittleEndian(), 8); 893 uint32_t OffsetPtr = 0; 894 uint16_t Version = HeaderExtractor.getU16(&OffsetPtr); 895 uint16_t Type = HeaderExtractor.getU16(&OffsetPtr); 896 897 enum BinaryFormatType { NAIVE_FORMAT = 0, FLIGHT_DATA_RECORDER_FORMAT = 1 }; 898 899 Trace T; 900 switch (Type) { 901 case NAIVE_FORMAT: 902 if (Version == 1 || Version == 2 || Version == 3) { 903 if (auto E = loadNaiveFormatLog(DE.getData(), DE.isLittleEndian(), 904 T.FileHeader, T.Records)) 905 return std::move(E); 906 } else { 907 return make_error<StringError>( 908 Twine("Unsupported version for Basic/Naive Mode logging: ") + 909 Twine(Version), 910 std::make_error_code(std::errc::executable_format_error)); 911 } 912 break; 913 case FLIGHT_DATA_RECORDER_FORMAT: 914 if (Version == 1 || Version == 2 || Version == 3) { 915 if (auto E = loadFDRLog(DE.getData(), DE.isLittleEndian(), T.FileHeader, 916 T.Records)) 917 return std::move(E); 918 } else { 919 return make_error<StringError>( 920 Twine("Unsupported version for FDR Mode logging: ") + Twine(Version), 921 std::make_error_code(std::errc::executable_format_error)); 922 } 923 break; 924 default: 925 if (auto E = loadYAMLLog(DE.getData(), T.FileHeader, T.Records)) 926 return std::move(E); 927 } 928 929 if (Sort) 930 std::stable_sort(T.Records.begin(), T.Records.end(), 931 [&](const XRayRecord &L, const XRayRecord &R) { 932 return L.TSC < R.TSC; 933 }); 934 935 return std::move(T); 936 } 937