xref: /llvm-project/llvm/lib/XRay/Trace.cpp (revision 4cae04873bcdb5eb629e4e71b36a0a87fe79083c)
1 //===- Trace.cpp - XRay Trace Loading implementation. ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // XRay log reader implementation.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/XRay/Trace.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Support/DataExtractor.h"
16 #include "llvm/Support/Error.h"
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/XRay/FileHeaderReader.h"
19 #include "llvm/XRay/YAMLXRayRecord.h"
20 
21 using namespace llvm;
22 using namespace llvm::xray;
23 using llvm::yaml::Input;
24 
25 namespace {
26 using XRayRecordStorage =
27     std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
28 
29 // This is the number of bytes in the "body" of a MetadataRecord in FDR Mode.
30 // This already excludes the first byte, which indicates the type of metadata
31 // record it is.
32 constexpr auto kFDRMetadataBodySize = 15;
33 
34 Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
35                          XRayFileHeader &FileHeader,
36                          std::vector<XRayRecord> &Records) {
37   if (Data.size() < 32)
38     return make_error<StringError>(
39         "Not enough bytes for an XRay log.",
40         std::make_error_code(std::errc::invalid_argument));
41 
42   if (Data.size() - 32 == 0 || Data.size() % 32 != 0)
43     return make_error<StringError>(
44         "Invalid-sized XRay data.",
45         std::make_error_code(std::errc::invalid_argument));
46 
47   DataExtractor Reader(Data, IsLittleEndian, 8);
48   uint32_t OffsetPtr = 0;
49   auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr);
50   if (!FileHeaderOrError)
51     return FileHeaderOrError.takeError();
52   FileHeader = std::move(FileHeaderOrError.get());
53 
54   // Each record after the header will be 32 bytes, in the following format:
55   //
56   //   (2)   uint16 : record type
57   //   (1)   uint8  : cpu id
58   //   (1)   uint8  : type
59   //   (4)   sint32 : function id
60   //   (8)   uint64 : tsc
61   //   (4)   uint32 : thread id
62   //   (4)   uint32 : process id
63   //   (8)   -      : padding
64   while (Reader.isValidOffset(OffsetPtr)) {
65     if (!Reader.isValidOffsetForDataOfSize(OffsetPtr, 32))
66       return createStringError(
67           std::make_error_code(std::errc::executable_format_error),
68           "Not enough bytes to read a full record at offset %d.", OffsetPtr);
69     auto PreReadOffset = OffsetPtr;
70     auto RecordType = Reader.getU16(&OffsetPtr);
71     if (OffsetPtr == PreReadOffset)
72       return createStringError(
73           std::make_error_code(std::errc::executable_format_error),
74           "Failed reading record type at offset %d.", OffsetPtr);
75 
76     switch (RecordType) {
77     case 0: { // Normal records.
78       Records.emplace_back();
79       auto &Record = Records.back();
80       Record.RecordType = RecordType;
81 
82       PreReadOffset = OffsetPtr;
83       Record.CPU = Reader.getU8(&OffsetPtr);
84       if (OffsetPtr == PreReadOffset)
85         return createStringError(
86             std::make_error_code(std::errc::executable_format_error),
87             "Failed reading CPU field at offset %d.", OffsetPtr);
88 
89       PreReadOffset = OffsetPtr;
90       auto Type = Reader.getU8(&OffsetPtr);
91       if (OffsetPtr == PreReadOffset)
92         return createStringError(
93             std::make_error_code(std::errc::executable_format_error),
94             "Failed reading record type field at offset %d.", OffsetPtr);
95 
96       switch (Type) {
97       case 0:
98         Record.Type = RecordTypes::ENTER;
99         break;
100       case 1:
101         Record.Type = RecordTypes::EXIT;
102         break;
103       case 2:
104         Record.Type = RecordTypes::TAIL_EXIT;
105         break;
106       case 3:
107         Record.Type = RecordTypes::ENTER_ARG;
108         break;
109       default:
110         return createStringError(
111             std::make_error_code(std::errc::executable_format_error),
112             "Unknown record type '%d' at offset %d.", Type, OffsetPtr);
113       }
114 
115       PreReadOffset = OffsetPtr;
116       Record.FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
117       if (OffsetPtr == PreReadOffset)
118         return createStringError(
119             std::make_error_code(std::errc::executable_format_error),
120             "Failed reading function id field at offset %d.", OffsetPtr);
121 
122       PreReadOffset = OffsetPtr;
123       Record.TSC = Reader.getU64(&OffsetPtr);
124       if (OffsetPtr == PreReadOffset)
125         return createStringError(
126             std::make_error_code(std::errc::executable_format_error),
127             "Failed reading TSC field at offset %d.", OffsetPtr);
128 
129       PreReadOffset = OffsetPtr;
130       Record.TId = Reader.getU32(&OffsetPtr);
131       if (OffsetPtr == PreReadOffset)
132         return createStringError(
133             std::make_error_code(std::errc::executable_format_error),
134             "Failed reading thread id field at offset %d.", OffsetPtr);
135 
136       PreReadOffset = OffsetPtr;
137       Record.PId = Reader.getU32(&OffsetPtr);
138       if (OffsetPtr == PreReadOffset)
139         return createStringError(
140             std::make_error_code(std::errc::executable_format_error),
141             "Failed reading process id at offset %d.", OffsetPtr);
142 
143       break;
144     }
145     case 1: { // Arg payload record.
146       auto &Record = Records.back();
147 
148       // We skip the next two bytes of the record, because we don't need the
149       // type and the CPU record for arg payloads.
150       OffsetPtr += 2;
151       PreReadOffset = OffsetPtr;
152       int32_t FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
153       if (OffsetPtr == PreReadOffset)
154         return createStringError(
155             std::make_error_code(std::errc::executable_format_error),
156             "Failed reading function id field at offset %d.", OffsetPtr);
157 
158       PreReadOffset = OffsetPtr;
159       auto TId = Reader.getU32(&OffsetPtr);
160       if (OffsetPtr == PreReadOffset)
161         return createStringError(
162             std::make_error_code(std::errc::executable_format_error),
163             "Failed reading thread id field at offset %d.", OffsetPtr);
164 
165       PreReadOffset = OffsetPtr;
166       auto PId = Reader.getU32(&OffsetPtr);
167       if (OffsetPtr == PreReadOffset)
168         return createStringError(
169             std::make_error_code(std::errc::executable_format_error),
170             "Failed reading process id field at offset %d.", OffsetPtr);
171 
172       // Make a check for versions above 3 for the Pid field
173       if (Record.FuncId != FuncId || Record.TId != TId ||
174           (FileHeader.Version >= 3 ? Record.PId != PId : false))
175         return createStringError(
176             std::make_error_code(std::errc::executable_format_error),
177             "Corrupted log, found arg payload following non-matching "
178             "function+thread record. Record for function %d != %d at offset "
179             "%d",
180             Record.FuncId, FuncId, OffsetPtr);
181 
182       PreReadOffset = OffsetPtr;
183       auto Arg = Reader.getU64(&OffsetPtr);
184       if (OffsetPtr == PreReadOffset)
185         return createStringError(
186             std::make_error_code(std::errc::executable_format_error),
187             "Failed reading argument payload at offset %d.", OffsetPtr);
188 
189       Record.CallArgs.push_back(Arg);
190       break;
191     }
192     default:
193       return createStringError(
194           std::make_error_code(std::errc::executable_format_error),
195           "Unknown record type '%d' at offset %d.", RecordType, OffsetPtr);
196     }
197     // Advance the offset pointer enough bytes to align to 32-byte records for
198     // basic mode logs.
199     OffsetPtr += 8;
200   }
201   return Error::success();
202 }
203 
204 /// When reading from a Flight Data Recorder mode log, metadata records are
205 /// sparse compared to packed function records, so we must maintain state as we
206 /// read through the sequence of entries. This allows the reader to denormalize
207 /// the CPUId and Thread Id onto each Function Record and transform delta
208 /// encoded TSC values into absolute encodings on each record.
209 struct FDRState {
210   uint16_t CPUId;
211   int32_t ThreadId;
212   int32_t ProcessId;
213   uint64_t BaseTSC;
214 
215   /// Encode some of the state transitions for the FDR log reader as explicit
216   /// checks. These are expectations for the next Record in the stream.
217   enum class Token {
218     NEW_BUFFER_RECORD_OR_EOF,
219     WALLCLOCK_RECORD,
220     NEW_CPU_ID_RECORD,
221     FUNCTION_SEQUENCE,
222     SCAN_TO_END_OF_THREAD_BUF,
223     CUSTOM_EVENT_DATA,
224     CALL_ARGUMENT,
225     BUFFER_EXTENTS,
226     PID_RECORD,
227   };
228   Token Expects;
229 
230   // Each threads buffer may have trailing garbage to scan over, so we track our
231   // progress.
232   uint64_t CurrentBufferSize;
233   uint64_t CurrentBufferConsumed;
234 };
235 
236 const char *fdrStateToTwine(const FDRState::Token &state) {
237   switch (state) {
238   case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
239     return "NEW_BUFFER_RECORD_OR_EOF";
240   case FDRState::Token::WALLCLOCK_RECORD:
241     return "WALLCLOCK_RECORD";
242   case FDRState::Token::NEW_CPU_ID_RECORD:
243     return "NEW_CPU_ID_RECORD";
244   case FDRState::Token::FUNCTION_SEQUENCE:
245     return "FUNCTION_SEQUENCE";
246   case FDRState::Token::SCAN_TO_END_OF_THREAD_BUF:
247     return "SCAN_TO_END_OF_THREAD_BUF";
248   case FDRState::Token::CUSTOM_EVENT_DATA:
249     return "CUSTOM_EVENT_DATA";
250   case FDRState::Token::CALL_ARGUMENT:
251     return "CALL_ARGUMENT";
252   case FDRState::Token::BUFFER_EXTENTS:
253     return "BUFFER_EXTENTS";
254   case FDRState::Token::PID_RECORD:
255     return "PID_RECORD";
256   }
257   return "UNKNOWN";
258 }
259 
260 /// State transition when a NewBufferRecord is encountered.
261 Error processFDRNewBufferRecord(FDRState &State, DataExtractor &RecordExtractor,
262                                 uint32_t &OffsetPtr) {
263   if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
264     return createStringError(
265         std::make_error_code(std::errc::executable_format_error),
266         "Malformed log: Read New Buffer record kind out of sequence; expected: "
267         "%s at offset %d.",
268         fdrStateToTwine(State.Expects), OffsetPtr);
269 
270   auto PreReadOffset = OffsetPtr;
271   State.ThreadId = RecordExtractor.getSigned(&OffsetPtr, 4);
272   if (OffsetPtr == PreReadOffset)
273     return createStringError(
274         std::make_error_code(std::errc::executable_format_error),
275         "Failed reading the thread id at offset %d.", OffsetPtr);
276   State.Expects = FDRState::Token::WALLCLOCK_RECORD;
277 
278   // Advance the offset pointer by enough bytes representing the remaining
279   // padding in a metadata record.
280   OffsetPtr += kFDRMetadataBodySize - 4;
281   assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize);
282   return Error::success();
283 }
284 
285 /// State transition when an EndOfBufferRecord is encountered.
286 Error processFDREndOfBufferRecord(FDRState &State, uint32_t &OffsetPtr) {
287   if (State.Expects == FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
288     return createStringError(
289         std::make_error_code(std::errc::executable_format_error),
290         "Malformed log: Received EOB message without current buffer; expected: "
291         "%s at offset %d.",
292         fdrStateToTwine(State.Expects), OffsetPtr);
293 
294   State.Expects = FDRState::Token::SCAN_TO_END_OF_THREAD_BUF;
295 
296   // Advance the offset pointer by enough bytes representing the remaining
297   // padding in a metadata record.
298   OffsetPtr += kFDRMetadataBodySize;
299   return Error::success();
300 }
301 
302 /// State transition when a NewCPUIdRecord is encountered.
303 Error processFDRNewCPUIdRecord(FDRState &State, DataExtractor &RecordExtractor,
304                                uint32_t &OffsetPtr) {
305   if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE &&
306       State.Expects != FDRState::Token::NEW_CPU_ID_RECORD)
307     return make_error<StringError>(
308         Twine("Malformed log. Read NewCPUId record kind out of sequence; "
309               "expected: ") +
310             fdrStateToTwine(State.Expects),
311         std::make_error_code(std::errc::executable_format_error));
312   auto BeginOffset = OffsetPtr;
313   auto PreReadOffset = OffsetPtr;
314   State.CPUId = RecordExtractor.getU16(&OffsetPtr);
315   if (OffsetPtr == PreReadOffset)
316     return createStringError(
317         std::make_error_code(std::errc::executable_format_error),
318         "Failed reading the CPU field at offset %d.", OffsetPtr);
319 
320   PreReadOffset = OffsetPtr;
321   State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
322   if (OffsetPtr == PreReadOffset)
323     return createStringError(
324         std::make_error_code(std::errc::executable_format_error),
325         "Failed reading the base TSC field at offset %d.", OffsetPtr);
326 
327   State.Expects = FDRState::Token::FUNCTION_SEQUENCE;
328 
329   // Advance the offset pointer by a few bytes, to account for the padding in
330   // CPU ID metadata records that we've already advanced through.
331   OffsetPtr += kFDRMetadataBodySize - (OffsetPtr - BeginOffset);
332   assert(OffsetPtr - BeginOffset == kFDRMetadataBodySize);
333   return Error::success();
334 }
335 
336 /// State transition when a TSCWrapRecord (overflow detection) is encountered.
337 Error processFDRTSCWrapRecord(FDRState &State, DataExtractor &RecordExtractor,
338                               uint32_t &OffsetPtr) {
339   if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE)
340     return make_error<StringError>(
341         Twine("Malformed log. Read TSCWrap record kind out of sequence; "
342               "expecting: ") +
343             fdrStateToTwine(State.Expects),
344         std::make_error_code(std::errc::executable_format_error));
345   auto PreReadOffset = OffsetPtr;
346   State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
347   if (OffsetPtr == PreReadOffset)
348     return createStringError(
349         std::make_error_code(std::errc::executable_format_error),
350         "Failed reading the base TSC field at offset %d.", OffsetPtr);
351 
352   // Advance the offset pointer by a few more bytes, accounting for the padding
353   // in the metadata record after reading the base TSC.
354   OffsetPtr += kFDRMetadataBodySize - 8;
355   assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize);
356   return Error::success();
357 }
358 
359 /// State transition when a WallTimeMarkerRecord is encountered.
360 Error processFDRWallTimeRecord(FDRState &State, DataExtractor &RecordExtractor,
361                                uint32_t &OffsetPtr) {
362   if (State.Expects != FDRState::Token::WALLCLOCK_RECORD)
363     return make_error<StringError>(
364         Twine("Malformed log. Read Wallclock record kind out of sequence; "
365               "expecting: ") +
366             fdrStateToTwine(State.Expects),
367         std::make_error_code(std::errc::executable_format_error));
368 
369   // Read in the data from the walltime record.
370   auto PreReadOffset = OffsetPtr;
371   auto WallTime = RecordExtractor.getU64(&OffsetPtr);
372   if (OffsetPtr == PreReadOffset)
373     return createStringError(
374         std::make_error_code(std::errc::executable_format_error),
375         "Failed reading the walltime record at offset %d.", OffsetPtr);
376 
377   // TODO: Someday, reconcile the TSC ticks to wall clock time for presentation
378   // purposes. For now, we're ignoring these records.
379   (void)WallTime;
380   State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
381 
382   // Advance the offset pointer by a few more bytes, accounting for the padding
383   // in the metadata record after reading in the walltime data.
384   OffsetPtr += kFDRMetadataBodySize - 8;
385   assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize);
386   return Error::success();
387 }
388 
389 /// State transition when a PidRecord is encountered.
390 Error processFDRPidRecord(FDRState &State, DataExtractor &RecordExtractor,
391                           uint32_t &OffsetPtr) {
392   if (State.Expects != FDRState::Token::PID_RECORD)
393     return make_error<StringError>(
394         Twine("Malformed log. Read Pid record kind out of sequence; "
395               "expected: ") +
396             fdrStateToTwine(State.Expects),
397         std::make_error_code(std::errc::executable_format_error));
398   auto PreReadOffset = OffsetPtr;
399   State.ProcessId = RecordExtractor.getSigned(&OffsetPtr, 4);
400   if (OffsetPtr == PreReadOffset)
401     return createStringError(
402         std::make_error_code(std::errc::executable_format_error),
403         "Failed reading the process ID at offset %d.", OffsetPtr);
404   State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
405 
406   // Advance the offset pointer by a few more bytes, accounting for the padding
407   // in the metadata record after reading in the PID.
408   OffsetPtr += kFDRMetadataBodySize - 4;
409   assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize);
410   return Error::success();
411 }
412 
413 /// State transition when a CustomEventMarker is encountered.
414 Error processCustomEventMarker(FDRState &State, DataExtractor &RecordExtractor,
415                                uint32_t &OffsetPtr) {
416   // We can encounter a CustomEventMarker anywhere in the log, so we can handle
417   // it regardless of the expectation. However, we do set the expectation to
418   // read a set number of fixed bytes, as described in the metadata.
419   auto BeginOffset = OffsetPtr;
420   auto PreReadOffset = OffsetPtr;
421   uint32_t DataSize = RecordExtractor.getU32(&OffsetPtr);
422   if (OffsetPtr == PreReadOffset)
423     return createStringError(
424         std::make_error_code(std::errc::executable_format_error),
425         "Failed reading a custom event marker at offset %d.", OffsetPtr);
426 
427   PreReadOffset = OffsetPtr;
428   uint64_t TSC = RecordExtractor.getU64(&OffsetPtr);
429   if (OffsetPtr == PreReadOffset)
430     return createStringError(
431         std::make_error_code(std::errc::executable_format_error),
432         "Failed reading the TSC at offset %d.", OffsetPtr);
433 
434   // FIXME: Actually represent the record through the API. For now we only
435   // skip through the data.
436   (void)TSC;
437   // Advance the offset ptr by the size of the data associated with the custom
438   // event, as well as the padding associated with the remainder of the metadata
439   // record.
440   OffsetPtr += (kFDRMetadataBodySize - (OffsetPtr - BeginOffset)) + DataSize;
441   if (!RecordExtractor.isValidOffset(OffsetPtr))
442     return createStringError(
443         std::make_error_code(std::errc::executable_format_error),
444         "Reading custom event data moves past addressable trace data (starting "
445         "at offset %d, advancing to offset %d).",
446         BeginOffset, OffsetPtr);
447   return Error::success();
448 }
449 
450 /// State transition when an BufferExtents record is encountered.
451 Error processBufferExtents(FDRState &State, DataExtractor &RecordExtractor,
452                            uint32_t &OffsetPtr) {
453   if (State.Expects != FDRState::Token::BUFFER_EXTENTS)
454     return make_error<StringError>(
455         Twine("Malformed log. Buffer Extents unexpected; expected: ") +
456             fdrStateToTwine(State.Expects),
457         std::make_error_code(std::errc::executable_format_error));
458 
459   auto PreReadOffset = OffsetPtr;
460   State.CurrentBufferSize = RecordExtractor.getU64(&OffsetPtr);
461   if (OffsetPtr == PreReadOffset)
462     return createStringError(
463         std::make_error_code(std::errc::executable_format_error),
464         "Failed to read current buffer size at offset %d.", OffsetPtr);
465 
466   State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
467 
468   // Advance the offset pointer by enough bytes accounting for the padding in a
469   // metadata record, after we read in the buffer extents.
470   OffsetPtr += kFDRMetadataBodySize - 8;
471   return Error::success();
472 }
473 
474 /// State transition when a CallArgumentRecord is encountered.
475 Error processFDRCallArgumentRecord(FDRState &State,
476                                    DataExtractor &RecordExtractor,
477                                    std::vector<XRayRecord> &Records,
478                                    uint32_t &OffsetPtr) {
479   auto &Enter = Records.back();
480   if (Enter.Type != RecordTypes::ENTER && Enter.Type != RecordTypes::ENTER_ARG)
481     return make_error<StringError>(
482         "CallArgument needs to be right after a function entry",
483         std::make_error_code(std::errc::executable_format_error));
484 
485   auto PreReadOffset = OffsetPtr;
486   auto Arg = RecordExtractor.getU64(&OffsetPtr);
487   if (OffsetPtr == PreReadOffset)
488     return createStringError(
489         std::make_error_code(std::errc::executable_format_error),
490         "Failed to read argument record at offset %d.", OffsetPtr);
491 
492   Enter.Type = RecordTypes::ENTER_ARG;
493   Enter.CallArgs.emplace_back(Arg);
494 
495   // Advance the offset pointer by enough bytes accounting for the padding in a
496   // metadata record, after reading the payload.
497   OffsetPtr += kFDRMetadataBodySize - 8;
498   return Error::success();
499 }
500 
501 /// Advances the state machine for reading the FDR record type by reading one
502 /// Metadata Record and updating the State appropriately based on the kind of
503 /// record encountered. The RecordKind is encoded in the first byte of the
504 /// Record, which the caller should pass in because they have already read it
505 /// to determine that this is a metadata record as opposed to a function record.
506 ///
507 /// Beginning with Version 2 of the FDR log, we do not depend on the size of the
508 /// buffer, but rather use the extents to determine how far to read in the log
509 /// for this particular buffer.
510 ///
511 /// In Version 3, FDR log now includes a pid metadata record after
512 /// WallTimeMarker
513 Error processFDRMetadataRecord(FDRState &State, DataExtractor &RecordExtractor,
514                                uint32_t &OffsetPtr,
515                                std::vector<XRayRecord> &Records,
516                                uint16_t Version, uint8_t FirstByte) {
517   // The remaining 7 bits of the first byte are the RecordKind enum for each
518   // Metadata Record.
519   switch (FirstByte >> 1) {
520   case 0: // NewBuffer
521     if (auto E = processFDRNewBufferRecord(State, RecordExtractor, OffsetPtr))
522       return E;
523     break;
524   case 1: // EndOfBuffer
525     if (Version >= 2)
526       return make_error<StringError>(
527           "Since Version 2 of FDR logging, we no longer support EOB records.",
528           std::make_error_code(std::errc::executable_format_error));
529     if (auto E = processFDREndOfBufferRecord(State, OffsetPtr))
530       return E;
531     break;
532   case 2: // NewCPUId
533     if (auto E = processFDRNewCPUIdRecord(State, RecordExtractor, OffsetPtr))
534       return E;
535     break;
536   case 3: // TSCWrap
537     if (auto E = processFDRTSCWrapRecord(State, RecordExtractor, OffsetPtr))
538       return E;
539     break;
540   case 4: // WallTimeMarker
541     if (auto E = processFDRWallTimeRecord(State, RecordExtractor, OffsetPtr))
542       return E;
543     // In Version 3 and and above, a PidRecord is expected after WallTimeRecord
544     if (Version >= 3)
545       State.Expects = FDRState::Token::PID_RECORD;
546     break;
547   case 5: // CustomEventMarker
548     if (auto E = processCustomEventMarker(State, RecordExtractor, OffsetPtr))
549       return E;
550     break;
551   case 6: // CallArgument
552     if (auto E = processFDRCallArgumentRecord(State, RecordExtractor, Records,
553                                               OffsetPtr))
554       return E;
555     break;
556   case 7: // BufferExtents
557     if (auto E = processBufferExtents(State, RecordExtractor, OffsetPtr))
558       return E;
559     break;
560   case 9: // Pid
561     if (auto E = processFDRPidRecord(State, RecordExtractor, OffsetPtr))
562       return E;
563     break;
564   default:
565     return createStringError(
566         std::make_error_code(std::errc::executable_format_error),
567         "Illegal metadata record type: '%d' at offset %d.", FirstByte >> 1,
568         OffsetPtr);
569   }
570   return Error::success();
571 }
572 
573 /// Reads a function record from an FDR format log, appending a new XRayRecord
574 /// to the vector being populated and updating the State with a new value
575 /// reference value to interpret TSC deltas.
576 ///
577 /// The XRayRecord constructed includes information from the function record
578 /// processed here as well as Thread ID and CPU ID formerly extracted into
579 /// State.
580 Error processFDRFunctionRecord(FDRState &State, DataExtractor &RecordExtractor,
581                                uint32_t &OffsetPtr, uint8_t FirstByte,
582                                std::vector<XRayRecord> &Records) {
583   switch (State.Expects) {
584   case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
585     return make_error<StringError>(
586         "Malformed log. Received Function Record before new buffer setup.",
587         std::make_error_code(std::errc::executable_format_error));
588   case FDRState::Token::WALLCLOCK_RECORD:
589     return make_error<StringError>(
590         "Malformed log. Received Function Record when expecting wallclock.",
591         std::make_error_code(std::errc::executable_format_error));
592   case FDRState::Token::PID_RECORD:
593     return make_error<StringError>(
594         "Malformed log. Received Function Record when expecting pid.",
595         std::make_error_code(std::errc::executable_format_error));
596   case FDRState::Token::NEW_CPU_ID_RECORD:
597     return make_error<StringError>(
598         "Malformed log. Received Function Record before first CPU record.",
599         std::make_error_code(std::errc::executable_format_error));
600   default:
601     Records.emplace_back();
602     auto &Record = Records.back();
603     Record.RecordType = 0; // Record is type NORMAL.
604     // Back up one byte to re-read the first byte, which is important for
605     // computing the function id for a record.
606     --OffsetPtr;
607 
608     auto PreReadOffset = OffsetPtr;
609     uint32_t FuncIdBitField = RecordExtractor.getU32(&OffsetPtr);
610     if (OffsetPtr == PreReadOffset)
611       return createStringError(
612           std::make_error_code(std::errc::executable_format_error),
613           "Failed reading truncated function id field at offset %d.",
614           OffsetPtr);
615 
616     FirstByte = FuncIdBitField & 0xffu;
617     // Strip off record type bit and use the next three bits.
618     auto T = (FirstByte >> 1) & 0x07;
619     switch (T) {
620     case static_cast<decltype(T)>(RecordTypes::ENTER):
621       Record.Type = RecordTypes::ENTER;
622       break;
623     case static_cast<decltype(T)>(RecordTypes::EXIT):
624       Record.Type = RecordTypes::EXIT;
625       break;
626     case static_cast<decltype(T)>(RecordTypes::TAIL_EXIT):
627       Record.Type = RecordTypes::TAIL_EXIT;
628       break;
629     case static_cast<decltype(T)>(RecordTypes::ENTER_ARG):
630       Record.Type = RecordTypes::ENTER_ARG;
631       State.Expects = FDRState::Token::CALL_ARGUMENT;
632       break;
633     default:
634       return createStringError(
635           std::make_error_code(std::errc::executable_format_error),
636           "Illegal function record type '%d' at offset %d.", T, OffsetPtr);
637     }
638     Record.CPU = State.CPUId;
639     Record.TId = State.ThreadId;
640     Record.PId = State.ProcessId;
641 
642     // Despite function Id being a signed int on XRayRecord,
643     // when it is written to an FDR format, the top bits are truncated,
644     // so it is effectively an unsigned value. When we shift off the
645     // top four bits, we want the shift to be logical, so we read as
646     // uint32_t.
647     Record.FuncId = FuncIdBitField >> 4;
648 
649     // FunctionRecords have a 32 bit delta from the previous absolute TSC
650     // or TSC delta. If this would overflow, we should read a TSCWrap record
651     // with an absolute TSC reading.
652     PreReadOffset = OffsetPtr;
653     uint64_t NewTSC = State.BaseTSC + RecordExtractor.getU32(&OffsetPtr);
654     if (OffsetPtr == PreReadOffset)
655       return createStringError(
656           std::make_error_code(std::errc::executable_format_error),
657           "Failed reading TSC delta at offset %d.", OffsetPtr);
658 
659     State.BaseTSC = NewTSC;
660     Record.TSC = NewTSC;
661   }
662   return Error::success();
663 }
664 
665 /// Reads a log in FDR mode for version 1 of this binary format. FDR mode is
666 /// defined as part of the compiler-rt project in xray_fdr_logging.h, and such
667 /// a log consists of the familiar 32 bit XRayHeader, followed by sequences of
668 /// of interspersed 16 byte Metadata Records and 8 byte Function Records.
669 ///
670 /// The following is an attempt to document the grammar of the format, which is
671 /// parsed by this function for little-endian machines. Since the format makes
672 /// use of BitFields, when we support big-endian architectures, we will need to
673 /// adjust not only the endianness parameter to llvm's RecordExtractor, but also
674 /// the bit twiddling logic, which is consistent with the little-endian
675 /// convention that BitFields within a struct will first be packed into the
676 /// least significant bits the address they belong to.
677 ///
678 /// We expect a format complying with the grammar in the following pseudo-EBNF
679 /// in Version 1 of the FDR log.
680 ///
681 /// FDRLog: XRayFileHeader ThreadBuffer*
682 /// XRayFileHeader: 32 bytes to identify the log as FDR with machine metadata.
683 ///     Includes BufferSize
684 /// ThreadBuffer: NewBuffer WallClockTime NewCPUId FunctionSequence EOB
685 /// BufSize: 8 byte unsigned integer indicating how large the buffer is.
686 /// NewBuffer: 16 byte metadata record with Thread Id.
687 /// WallClockTime: 16 byte metadata record with human readable time.
688 /// Pid: 16 byte metadata record with Pid
689 /// NewCPUId: 16 byte metadata record with CPUId and a 64 bit TSC reading.
690 /// EOB: 16 byte record in a thread buffer plus mem garbage to fill BufSize.
691 /// FunctionSequence: NewCPUId | TSCWrap | FunctionRecord
692 /// TSCWrap: 16 byte metadata record with a full 64 bit TSC reading.
693 /// FunctionRecord: 8 byte record with FunctionId, entry/exit, and TSC delta.
694 ///
695 /// In Version 2, we make the following changes:
696 ///
697 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime NewCPUId
698 ///               FunctionSequence
699 /// BufferExtents: 16 byte metdata record describing how many usable bytes are
700 ///                in the buffer. This is measured from the start of the buffer
701 ///                and must always be at least 48 (bytes).
702 ///
703 /// In Version 3, we make the following changes:
704 ///
705 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime Pid NewCPUId
706 ///               FunctionSequence
707 /// EOB: *deprecated*
708 Error loadFDRLog(StringRef Data, bool IsLittleEndian,
709                  XRayFileHeader &FileHeader, std::vector<XRayRecord> &Records) {
710 
711   if (Data.size() < 32)
712     return make_error<StringError>(
713         "Not enough bytes for an XRay log.",
714         std::make_error_code(std::errc::invalid_argument));
715 
716   DataExtractor Reader(Data, IsLittleEndian, 8);
717   uint32_t OffsetPtr = 0;
718   auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr);
719   if (!FileHeaderOrError)
720     return FileHeaderOrError.takeError();
721   FileHeader = std::move(FileHeaderOrError.get());
722 
723   uint64_t BufferSize = 0;
724   {
725     StringRef ExtraDataRef(FileHeader.FreeFormData, 16);
726     DataExtractor ExtraDataExtractor(ExtraDataRef, IsLittleEndian, 8);
727     uint32_t ExtraDataOffset = 0;
728     BufferSize = ExtraDataExtractor.getU64(&ExtraDataOffset);
729   }
730 
731   FDRState::Token InitialExpectation;
732   switch (FileHeader.Version) {
733   case 1:
734     InitialExpectation = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
735     break;
736   case 2:
737   case 3:
738     InitialExpectation = FDRState::Token::BUFFER_EXTENTS;
739     break;
740   default:
741     return make_error<StringError>(
742         Twine("Unsupported version '") + Twine(FileHeader.Version) + "'",
743         std::make_error_code(std::errc::executable_format_error));
744   }
745   FDRState State{0, 0, 0, 0, InitialExpectation, BufferSize, 0};
746 
747   // RecordSize will tell the loop how far to seek ahead based on the record
748   // type that we have just read.
749   while (Reader.isValidOffset(OffsetPtr)) {
750     auto BeginOffset = OffsetPtr;
751     if (State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF) {
752       OffsetPtr += State.CurrentBufferSize - State.CurrentBufferConsumed;
753       State.CurrentBufferConsumed = 0;
754       State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
755       continue;
756     }
757     auto PreReadOffset = OffsetPtr;
758     uint8_t BitField = Reader.getU8(&OffsetPtr);
759     if (OffsetPtr == PreReadOffset)
760       return createStringError(
761           std::make_error_code(std::errc::executable_format_error),
762           "Failed reading first byte of record at offset %d.", OffsetPtr);
763     bool isMetadataRecord = BitField & 0x01uL;
764     bool isBufferExtents =
765         (BitField >> 1) == 7; // BufferExtents record kind == 7
766     if (isMetadataRecord) {
767       if (auto E = processFDRMetadataRecord(State, Reader, OffsetPtr, Records,
768                                             FileHeader.Version, BitField))
769         return E;
770     } else { // Process Function Record
771       if (auto E = processFDRFunctionRecord(State, Reader, OffsetPtr, BitField,
772                                             Records))
773         return E;
774     }
775 
776     // The BufferExtents record is technically not part of the buffer, so we
777     // don't count the size of that record against the buffer's actual size.
778     if (!isBufferExtents)
779       State.CurrentBufferConsumed += OffsetPtr - BeginOffset;
780 
781     assert(State.CurrentBufferConsumed <= State.CurrentBufferSize);
782 
783     if ((FileHeader.Version == 2 || FileHeader.Version == 3) &&
784         State.CurrentBufferSize == State.CurrentBufferConsumed) {
785       // In Version 2 of the log, we don't need to scan to the end of the thread
786       // buffer if we've already consumed all the bytes we need to.
787       State.Expects = FDRState::Token::BUFFER_EXTENTS;
788       State.CurrentBufferSize = BufferSize;
789       State.CurrentBufferConsumed = 0;
790     }
791   }
792 
793   // Having iterated over everything we've been given, we've either consumed
794   // everything and ended up in the end state, or were told to skip the rest.
795   bool Finished = State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF &&
796                   State.CurrentBufferSize == State.CurrentBufferConsumed;
797   if ((State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF &&
798        State.Expects != FDRState::Token::BUFFER_EXTENTS) &&
799       !Finished)
800     return make_error<StringError>(
801         Twine("Encountered EOF with unexpected state expectation ") +
802             fdrStateToTwine(State.Expects) +
803             ". Remaining expected bytes in thread buffer total " +
804             Twine(State.CurrentBufferSize - State.CurrentBufferConsumed),
805         std::make_error_code(std::errc::executable_format_error));
806 
807   return Error::success();
808 }
809 
810 Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
811                   std::vector<XRayRecord> &Records) {
812   YAMLXRayTrace Trace;
813   Input In(Data);
814   In >> Trace;
815   if (In.error())
816     return make_error<StringError>("Failed loading YAML Data.", In.error());
817 
818   FileHeader.Version = Trace.Header.Version;
819   FileHeader.Type = Trace.Header.Type;
820   FileHeader.ConstantTSC = Trace.Header.ConstantTSC;
821   FileHeader.NonstopTSC = Trace.Header.NonstopTSC;
822   FileHeader.CycleFrequency = Trace.Header.CycleFrequency;
823 
824   if (FileHeader.Version != 1)
825     return make_error<StringError>(
826         Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version),
827         std::make_error_code(std::errc::invalid_argument));
828 
829   Records.clear();
830   std::transform(Trace.Records.begin(), Trace.Records.end(),
831                  std::back_inserter(Records), [&](const YAMLXRayRecord &R) {
832                    return XRayRecord{R.RecordType, R.CPU, R.Type, R.FuncId,
833                                      R.TSC,        R.TId, R.PId,  R.CallArgs};
834                  });
835   return Error::success();
836 }
837 } // namespace
838 
839 Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
840   int Fd;
841   if (auto EC = sys::fs::openFileForRead(Filename, Fd)) {
842     return make_error<StringError>(
843         Twine("Cannot read log from '") + Filename + "'", EC);
844   }
845 
846   uint64_t FileSize;
847   if (auto EC = sys::fs::file_size(Filename, FileSize)) {
848     return make_error<StringError>(
849         Twine("Cannot read log from '") + Filename + "'", EC);
850   }
851   if (FileSize < 4) {
852     return make_error<StringError>(
853         Twine("File '") + Filename + "' too small for XRay.",
854         std::make_error_code(std::errc::executable_format_error));
855   }
856 
857   // Map the opened file into memory and use a StringRef to access it later.
858   std::error_code EC;
859   sys::fs::mapped_file_region MappedFile(
860       Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
861   if (EC) {
862     return make_error<StringError>(
863         Twine("Cannot read log from '") + Filename + "'", EC);
864   }
865   auto Data = StringRef(MappedFile.data(), MappedFile.size());
866 
867   // TODO: Lift the endianness and implementation selection here.
868   DataExtractor LittleEndianDE(Data, true, 8);
869   auto TraceOrError = loadTrace(LittleEndianDE, Sort);
870   if (!TraceOrError) {
871     DataExtractor BigEndianDE(Data, false, 8);
872     TraceOrError = loadTrace(BigEndianDE, Sort);
873   }
874   return TraceOrError;
875 }
876 
877 Expected<Trace> llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) {
878   // Attempt to detect the file type using file magic. We have a slight bias
879   // towards the binary format, and we do this by making sure that the first 4
880   // bytes of the binary file is some combination of the following byte
881   // patterns: (observe the code loading them assumes they're little endian)
882   //
883   //   0x01 0x00 0x00 0x00 - version 1, "naive" format
884   //   0x01 0x00 0x01 0x00 - version 1, "flight data recorder" format
885   //   0x02 0x00 0x01 0x00 - version 2, "flight data recorder" format
886   //
887   // YAML files don't typically have those first four bytes as valid text so we
888   // try loading assuming YAML if we don't find these bytes.
889   //
890   // Only if we can't load either the binary or the YAML format will we yield an
891   // error.
892   DataExtractor HeaderExtractor(DE.getData(), DE.isLittleEndian(), 8);
893   uint32_t OffsetPtr = 0;
894   uint16_t Version = HeaderExtractor.getU16(&OffsetPtr);
895   uint16_t Type = HeaderExtractor.getU16(&OffsetPtr);
896 
897   enum BinaryFormatType { NAIVE_FORMAT = 0, FLIGHT_DATA_RECORDER_FORMAT = 1 };
898 
899   Trace T;
900   switch (Type) {
901   case NAIVE_FORMAT:
902     if (Version == 1 || Version == 2 || Version == 3) {
903       if (auto E = loadNaiveFormatLog(DE.getData(), DE.isLittleEndian(),
904                                       T.FileHeader, T.Records))
905         return std::move(E);
906     } else {
907       return make_error<StringError>(
908           Twine("Unsupported version for Basic/Naive Mode logging: ") +
909               Twine(Version),
910           std::make_error_code(std::errc::executable_format_error));
911     }
912     break;
913   case FLIGHT_DATA_RECORDER_FORMAT:
914     if (Version == 1 || Version == 2 || Version == 3) {
915       if (auto E = loadFDRLog(DE.getData(), DE.isLittleEndian(), T.FileHeader,
916                               T.Records))
917         return std::move(E);
918     } else {
919       return make_error<StringError>(
920           Twine("Unsupported version for FDR Mode logging: ") + Twine(Version),
921           std::make_error_code(std::errc::executable_format_error));
922     }
923     break;
924   default:
925     if (auto E = loadYAMLLog(DE.getData(), T.FileHeader, T.Records))
926       return std::move(E);
927   }
928 
929   if (Sort)
930     std::stable_sort(T.Records.begin(), T.Records.end(),
931                      [&](const XRayRecord &L, const XRayRecord &R) {
932                        return L.TSC < R.TSC;
933                      });
934 
935   return std::move(T);
936 }
937