18bcb0991SDimitry Andric //===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===// 28bcb0991SDimitry Andric // 38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68bcb0991SDimitry Andric // 78bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 88bcb0991SDimitry Andric 98bcb0991SDimitry Andric #include "llvm/ObjectYAML/MinidumpYAML.h" 108bcb0991SDimitry Andric #include "llvm/ObjectYAML/yaml2obj.h" 118bcb0991SDimitry Andric #include "llvm/Support/ConvertUTF.h" 128bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h" 13*bdd1243dSDimitry Andric #include <optional> 148bcb0991SDimitry Andric 158bcb0991SDimitry Andric using namespace llvm; 168bcb0991SDimitry Andric using namespace llvm::minidump; 178bcb0991SDimitry Andric using namespace llvm::MinidumpYAML; 188bcb0991SDimitry Andric 198bcb0991SDimitry Andric namespace { 208bcb0991SDimitry Andric /// A helper class to manage the placement of various structures into the final 218bcb0991SDimitry Andric /// minidump binary. Space for objects can be allocated via various allocate*** 228bcb0991SDimitry Andric /// methods, while the final minidump file is written by calling the writeTo 238bcb0991SDimitry Andric /// method. The plain versions of allocation functions take a reference to the 248bcb0991SDimitry Andric /// data which is to be written (and hence the data must be available until 258bcb0991SDimitry Andric /// writeTo is called), while the "New" versions allocate the data in an 268bcb0991SDimitry Andric /// allocator-managed buffer, which is available until the allocator object is 278bcb0991SDimitry Andric /// destroyed. For both kinds of functions, it is possible to modify the 288bcb0991SDimitry Andric /// data for which the space has been "allocated" until the final writeTo call. 298bcb0991SDimitry Andric /// This is useful for "linking" the allocated structures via their offsets. 308bcb0991SDimitry Andric class BlobAllocator { 318bcb0991SDimitry Andric public: 328bcb0991SDimitry Andric size_t tell() const { return NextOffset; } 338bcb0991SDimitry Andric 348bcb0991SDimitry Andric size_t allocateCallback(size_t Size, 358bcb0991SDimitry Andric std::function<void(raw_ostream &)> Callback) { 368bcb0991SDimitry Andric size_t Offset = NextOffset; 378bcb0991SDimitry Andric NextOffset += Size; 388bcb0991SDimitry Andric Callbacks.push_back(std::move(Callback)); 398bcb0991SDimitry Andric return Offset; 408bcb0991SDimitry Andric } 418bcb0991SDimitry Andric 428bcb0991SDimitry Andric size_t allocateBytes(ArrayRef<uint8_t> Data) { 438bcb0991SDimitry Andric return allocateCallback( 448bcb0991SDimitry Andric Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); }); 458bcb0991SDimitry Andric } 468bcb0991SDimitry Andric 478bcb0991SDimitry Andric size_t allocateBytes(yaml::BinaryRef Data) { 488bcb0991SDimitry Andric return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) { 498bcb0991SDimitry Andric Data.writeAsBinary(OS); 508bcb0991SDimitry Andric }); 518bcb0991SDimitry Andric } 528bcb0991SDimitry Andric 538bcb0991SDimitry Andric template <typename T> size_t allocateArray(ArrayRef<T> Data) { 548bcb0991SDimitry Andric return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()), 558bcb0991SDimitry Andric sizeof(T) * Data.size()}); 568bcb0991SDimitry Andric } 578bcb0991SDimitry Andric 588bcb0991SDimitry Andric template <typename T, typename RangeType> 598bcb0991SDimitry Andric std::pair<size_t, MutableArrayRef<T>> 608bcb0991SDimitry Andric allocateNewArray(const iterator_range<RangeType> &Range); 618bcb0991SDimitry Andric 628bcb0991SDimitry Andric template <typename T> size_t allocateObject(const T &Data) { 63*bdd1243dSDimitry Andric return allocateArray(ArrayRef(Data)); 648bcb0991SDimitry Andric } 658bcb0991SDimitry Andric 668bcb0991SDimitry Andric template <typename T, typename... Types> 678bcb0991SDimitry Andric std::pair<size_t, T *> allocateNewObject(Types &&... Args) { 688bcb0991SDimitry Andric T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...); 698bcb0991SDimitry Andric return {allocateObject(*Object), Object}; 708bcb0991SDimitry Andric } 718bcb0991SDimitry Andric 728bcb0991SDimitry Andric size_t allocateString(StringRef Str); 738bcb0991SDimitry Andric 748bcb0991SDimitry Andric void writeTo(raw_ostream &OS) const; 758bcb0991SDimitry Andric 768bcb0991SDimitry Andric private: 778bcb0991SDimitry Andric size_t NextOffset = 0; 788bcb0991SDimitry Andric 798bcb0991SDimitry Andric BumpPtrAllocator Temporaries; 808bcb0991SDimitry Andric std::vector<std::function<void(raw_ostream &)>> Callbacks; 818bcb0991SDimitry Andric }; 828bcb0991SDimitry Andric } // namespace 838bcb0991SDimitry Andric 848bcb0991SDimitry Andric template <typename T, typename RangeType> 858bcb0991SDimitry Andric std::pair<size_t, MutableArrayRef<T>> 868bcb0991SDimitry Andric BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) { 878bcb0991SDimitry Andric size_t Num = std::distance(Range.begin(), Range.end()); 888bcb0991SDimitry Andric MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num); 898bcb0991SDimitry Andric std::uninitialized_copy(Range.begin(), Range.end(), Array.begin()); 908bcb0991SDimitry Andric return {allocateArray(Array), Array}; 918bcb0991SDimitry Andric } 928bcb0991SDimitry Andric 938bcb0991SDimitry Andric size_t BlobAllocator::allocateString(StringRef Str) { 948bcb0991SDimitry Andric SmallVector<UTF16, 32> WStr; 958bcb0991SDimitry Andric bool OK = convertUTF8ToUTF16String(Str, WStr); 968bcb0991SDimitry Andric assert(OK && "Invalid UTF8 in Str?"); 978bcb0991SDimitry Andric (void)OK; 988bcb0991SDimitry Andric 998bcb0991SDimitry Andric // The utf16 string is null-terminated, but the terminator is not counted in 1008bcb0991SDimitry Andric // the string size. 1018bcb0991SDimitry Andric WStr.push_back(0); 1028bcb0991SDimitry Andric size_t Result = 1038bcb0991SDimitry Andric allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first; 1048bcb0991SDimitry Andric allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end())); 1058bcb0991SDimitry Andric return Result; 1068bcb0991SDimitry Andric } 1078bcb0991SDimitry Andric 1088bcb0991SDimitry Andric void BlobAllocator::writeTo(raw_ostream &OS) const { 1098bcb0991SDimitry Andric size_t BeginOffset = OS.tell(); 1108bcb0991SDimitry Andric for (const auto &Callback : Callbacks) 1118bcb0991SDimitry Andric Callback(OS); 1128bcb0991SDimitry Andric assert(OS.tell() == BeginOffset + NextOffset && 1138bcb0991SDimitry Andric "Callbacks wrote an unexpected number of bytes."); 1148bcb0991SDimitry Andric (void)BeginOffset; 1158bcb0991SDimitry Andric } 1168bcb0991SDimitry Andric 1178bcb0991SDimitry Andric static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) { 1188bcb0991SDimitry Andric return {support::ulittle32_t(Data.binary_size()), 1198bcb0991SDimitry Andric support::ulittle32_t(File.allocateBytes(Data))}; 1208bcb0991SDimitry Andric } 1218bcb0991SDimitry Andric 1228bcb0991SDimitry Andric static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) { 1238bcb0991SDimitry Andric File.allocateObject(S.MDExceptionStream); 1248bcb0991SDimitry Andric 1258bcb0991SDimitry Andric size_t DataEnd = File.tell(); 1268bcb0991SDimitry Andric 1278bcb0991SDimitry Andric // Lay out the thread context data, (which is not a part of the stream). 1288bcb0991SDimitry Andric // TODO: This usually (always?) matches the thread context of the 1298bcb0991SDimitry Andric // corresponding thread, and may overlap memory regions as well. We could 1308bcb0991SDimitry Andric // add a level of indirection to the MinidumpYAML format (like an array of 1318bcb0991SDimitry Andric // Blobs that the LocationDescriptors index into) to be able to distinguish 1328bcb0991SDimitry Andric // the cases where location descriptions overlap vs happen to reference 1338bcb0991SDimitry Andric // identical data. 1348bcb0991SDimitry Andric S.MDExceptionStream.ThreadContext = layout(File, S.ThreadContext); 1358bcb0991SDimitry Andric 1368bcb0991SDimitry Andric return DataEnd; 1378bcb0991SDimitry Andric } 1388bcb0991SDimitry Andric 1398bcb0991SDimitry Andric static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) { 1408bcb0991SDimitry Andric Range.Entry.Memory = layout(File, Range.Content); 1418bcb0991SDimitry Andric } 1428bcb0991SDimitry Andric 1438bcb0991SDimitry Andric static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) { 1448bcb0991SDimitry Andric M.Entry.ModuleNameRVA = File.allocateString(M.Name); 1458bcb0991SDimitry Andric 1468bcb0991SDimitry Andric M.Entry.CvRecord = layout(File, M.CvRecord); 1478bcb0991SDimitry Andric M.Entry.MiscRecord = layout(File, M.MiscRecord); 1488bcb0991SDimitry Andric } 1498bcb0991SDimitry Andric 1508bcb0991SDimitry Andric static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) { 1518bcb0991SDimitry Andric T.Entry.Stack.Memory = layout(File, T.Stack); 1528bcb0991SDimitry Andric T.Entry.Context = layout(File, T.Context); 1538bcb0991SDimitry Andric } 1548bcb0991SDimitry Andric 1558bcb0991SDimitry Andric template <typename EntryT> 1568bcb0991SDimitry Andric static size_t layout(BlobAllocator &File, 1578bcb0991SDimitry Andric MinidumpYAML::detail::ListStream<EntryT> &S) { 1588bcb0991SDimitry Andric 1598bcb0991SDimitry Andric File.allocateNewObject<support::ulittle32_t>(S.Entries.size()); 1608bcb0991SDimitry Andric for (auto &E : S.Entries) 1618bcb0991SDimitry Andric File.allocateObject(E.Entry); 1628bcb0991SDimitry Andric 1638bcb0991SDimitry Andric size_t DataEnd = File.tell(); 1648bcb0991SDimitry Andric 1658bcb0991SDimitry Andric // Lay out the auxiliary data, (which is not a part of the stream). 1668bcb0991SDimitry Andric DataEnd = File.tell(); 1678bcb0991SDimitry Andric for (auto &E : S.Entries) 1688bcb0991SDimitry Andric layout(File, E); 1698bcb0991SDimitry Andric 1708bcb0991SDimitry Andric return DataEnd; 1718bcb0991SDimitry Andric } 1728bcb0991SDimitry Andric 1738bcb0991SDimitry Andric static Directory layout(BlobAllocator &File, Stream &S) { 1748bcb0991SDimitry Andric Directory Result; 1758bcb0991SDimitry Andric Result.Type = S.Type; 1768bcb0991SDimitry Andric Result.Location.RVA = File.tell(); 177*bdd1243dSDimitry Andric std::optional<size_t> DataEnd; 1788bcb0991SDimitry Andric switch (S.Kind) { 1798bcb0991SDimitry Andric case Stream::StreamKind::Exception: 1808bcb0991SDimitry Andric DataEnd = layout(File, cast<MinidumpYAML::ExceptionStream>(S)); 1818bcb0991SDimitry Andric break; 1828bcb0991SDimitry Andric case Stream::StreamKind::MemoryInfoList: { 1838bcb0991SDimitry Andric MemoryInfoListStream &InfoList = cast<MemoryInfoListStream>(S); 1848bcb0991SDimitry Andric File.allocateNewObject<minidump::MemoryInfoListHeader>( 1858bcb0991SDimitry Andric sizeof(minidump::MemoryInfoListHeader), sizeof(minidump::MemoryInfo), 1868bcb0991SDimitry Andric InfoList.Infos.size()); 187*bdd1243dSDimitry Andric File.allocateArray(ArrayRef(InfoList.Infos)); 1888bcb0991SDimitry Andric break; 1898bcb0991SDimitry Andric } 1908bcb0991SDimitry Andric case Stream::StreamKind::MemoryList: 1918bcb0991SDimitry Andric DataEnd = layout(File, cast<MemoryListStream>(S)); 1928bcb0991SDimitry Andric break; 1938bcb0991SDimitry Andric case Stream::StreamKind::ModuleList: 1948bcb0991SDimitry Andric DataEnd = layout(File, cast<ModuleListStream>(S)); 1958bcb0991SDimitry Andric break; 1968bcb0991SDimitry Andric case Stream::StreamKind::RawContent: { 1978bcb0991SDimitry Andric RawContentStream &Raw = cast<RawContentStream>(S); 1988bcb0991SDimitry Andric File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { 1998bcb0991SDimitry Andric Raw.Content.writeAsBinary(OS); 2008bcb0991SDimitry Andric assert(Raw.Content.binary_size() <= Raw.Size); 2018bcb0991SDimitry Andric OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); 2028bcb0991SDimitry Andric }); 2038bcb0991SDimitry Andric break; 2048bcb0991SDimitry Andric } 2058bcb0991SDimitry Andric case Stream::StreamKind::SystemInfo: { 2068bcb0991SDimitry Andric SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S); 2078bcb0991SDimitry Andric File.allocateObject(SystemInfo.Info); 2088bcb0991SDimitry Andric // The CSD string is not a part of the stream. 2098bcb0991SDimitry Andric DataEnd = File.tell(); 2108bcb0991SDimitry Andric SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion); 2118bcb0991SDimitry Andric break; 2128bcb0991SDimitry Andric } 2138bcb0991SDimitry Andric case Stream::StreamKind::TextContent: 2148bcb0991SDimitry Andric File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text)); 2158bcb0991SDimitry Andric break; 2168bcb0991SDimitry Andric case Stream::StreamKind::ThreadList: 2178bcb0991SDimitry Andric DataEnd = layout(File, cast<ThreadListStream>(S)); 2188bcb0991SDimitry Andric break; 2198bcb0991SDimitry Andric } 2208bcb0991SDimitry Andric // If DataEnd is not set, we assume everything we generated is a part of the 2218bcb0991SDimitry Andric // stream. 2228bcb0991SDimitry Andric Result.Location.DataSize = 22381ad6265SDimitry Andric DataEnd.value_or(File.tell()) - Result.Location.RVA; 2248bcb0991SDimitry Andric return Result; 2258bcb0991SDimitry Andric } 2268bcb0991SDimitry Andric 2278bcb0991SDimitry Andric namespace llvm { 2288bcb0991SDimitry Andric namespace yaml { 2298bcb0991SDimitry Andric 2308bcb0991SDimitry Andric bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out, 2318bcb0991SDimitry Andric ErrorHandler /*EH*/) { 2328bcb0991SDimitry Andric BlobAllocator File; 2338bcb0991SDimitry Andric File.allocateObject(Obj.Header); 2348bcb0991SDimitry Andric 2358bcb0991SDimitry Andric std::vector<Directory> StreamDirectory(Obj.Streams.size()); 236*bdd1243dSDimitry Andric Obj.Header.StreamDirectoryRVA = File.allocateArray(ArrayRef(StreamDirectory)); 2378bcb0991SDimitry Andric Obj.Header.NumberOfStreams = StreamDirectory.size(); 2388bcb0991SDimitry Andric 2398bcb0991SDimitry Andric for (auto &Stream : enumerate(Obj.Streams)) 2408bcb0991SDimitry Andric StreamDirectory[Stream.index()] = layout(File, *Stream.value()); 2418bcb0991SDimitry Andric 2428bcb0991SDimitry Andric File.writeTo(Out); 2438bcb0991SDimitry Andric return true; 2448bcb0991SDimitry Andric } 2458bcb0991SDimitry Andric 2468bcb0991SDimitry Andric } // namespace yaml 2478bcb0991SDimitry Andric } // namespace llvm 248