1 //===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ObjectYAML/MinidumpYAML.h" 10 #include "llvm/ObjectYAML/yaml2obj.h" 11 #include "llvm/Support/ConvertUTF.h" 12 #include "llvm/Support/raw_ostream.h" 13 #include <optional> 14 15 using namespace llvm; 16 using namespace llvm::minidump; 17 using namespace llvm::MinidumpYAML; 18 19 namespace { 20 /// A helper class to manage the placement of various structures into the final 21 /// minidump binary. Space for objects can be allocated via various allocate*** 22 /// methods, while the final minidump file is written by calling the writeTo 23 /// method. The plain versions of allocation functions take a reference to the 24 /// data which is to be written (and hence the data must be available until 25 /// writeTo is called), while the "New" versions allocate the data in an 26 /// allocator-managed buffer, which is available until the allocator object is 27 /// destroyed. For both kinds of functions, it is possible to modify the 28 /// data for which the space has been "allocated" until the final writeTo call. 29 /// This is useful for "linking" the allocated structures via their offsets. 30 class BlobAllocator { 31 public: 32 size_t tell() const { return NextOffset; } 33 34 size_t allocateCallback(size_t Size, 35 std::function<void(raw_ostream &)> Callback) { 36 size_t Offset = NextOffset; 37 NextOffset += Size; 38 Callbacks.push_back(std::move(Callback)); 39 return Offset; 40 } 41 42 size_t allocateBytes(ArrayRef<uint8_t> Data) { 43 return allocateCallback( 44 Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); }); 45 } 46 47 size_t allocateBytes(yaml::BinaryRef Data) { 48 return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) { 49 Data.writeAsBinary(OS); 50 }); 51 } 52 53 template <typename T> size_t allocateArray(ArrayRef<T> Data) { 54 return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()), 55 sizeof(T) * Data.size()}); 56 } 57 58 template <typename T, typename RangeType> 59 std::pair<size_t, MutableArrayRef<T>> 60 allocateNewArray(const iterator_range<RangeType> &Range); 61 62 template <typename T> size_t allocateObject(const T &Data) { 63 return allocateArray(ArrayRef(Data)); 64 } 65 66 template <typename T, typename... Types> 67 std::pair<size_t, T *> allocateNewObject(Types &&... Args) { 68 T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...); 69 return {allocateObject(*Object), Object}; 70 } 71 72 size_t allocateString(StringRef Str); 73 74 void writeTo(raw_ostream &OS) const; 75 76 private: 77 size_t NextOffset = 0; 78 79 BumpPtrAllocator Temporaries; 80 std::vector<std::function<void(raw_ostream &)>> Callbacks; 81 }; 82 } // namespace 83 84 template <typename T, typename RangeType> 85 std::pair<size_t, MutableArrayRef<T>> 86 BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) { 87 size_t Num = std::distance(Range.begin(), Range.end()); 88 MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num); 89 std::uninitialized_copy(Range.begin(), Range.end(), Array.begin()); 90 return {allocateArray(Array), Array}; 91 } 92 93 size_t BlobAllocator::allocateString(StringRef Str) { 94 SmallVector<UTF16, 32> WStr; 95 bool OK = convertUTF8ToUTF16String(Str, WStr); 96 assert(OK && "Invalid UTF8 in Str?"); 97 (void)OK; 98 99 // The utf16 string is null-terminated, but the terminator is not counted in 100 // the string size. 101 WStr.push_back(0); 102 size_t Result = 103 allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first; 104 allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end())); 105 return Result; 106 } 107 108 void BlobAllocator::writeTo(raw_ostream &OS) const { 109 size_t BeginOffset = OS.tell(); 110 for (const auto &Callback : Callbacks) 111 Callback(OS); 112 assert(OS.tell() == BeginOffset + NextOffset && 113 "Callbacks wrote an unexpected number of bytes."); 114 (void)BeginOffset; 115 } 116 117 static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) { 118 return {support::ulittle32_t(Data.binary_size()), 119 support::ulittle32_t(File.allocateBytes(Data))}; 120 } 121 122 static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) { 123 File.allocateObject(S.MDExceptionStream); 124 125 size_t DataEnd = File.tell(); 126 127 // Lay out the thread context data, (which is not a part of the stream). 128 // TODO: This usually (always?) matches the thread context of the 129 // corresponding thread, and may overlap memory regions as well. We could 130 // add a level of indirection to the MinidumpYAML format (like an array of 131 // Blobs that the LocationDescriptors index into) to be able to distinguish 132 // the cases where location descriptions overlap vs happen to reference 133 // identical data. 134 S.MDExceptionStream.ThreadContext = layout(File, S.ThreadContext); 135 136 return DataEnd; 137 } 138 139 static size_t layout(BlobAllocator &File, MinidumpYAML::Memory64ListStream &S) { 140 size_t BaseRVA = File.tell() + sizeof(minidump::Memory64ListHeader); 141 BaseRVA += S.Entries.size() * sizeof(minidump::MemoryDescriptor_64); 142 S.Header.BaseRVA = BaseRVA; 143 S.Header.NumberOfMemoryRanges = S.Entries.size(); 144 File.allocateObject(S.Header); 145 for (auto &E : S.Entries) 146 File.allocateObject(E.Entry); 147 148 // Save the new offset for the stream size. 149 size_t DataEnd = File.tell(); 150 for (auto &E : S.Entries) { 151 File.allocateBytes(E.Content); 152 if (E.Entry.DataSize > E.Content.binary_size()) { 153 size_t Padding = E.Entry.DataSize - E.Content.binary_size(); 154 File.allocateCallback(Padding, [Padding](raw_ostream &OS) { 155 OS << std::string(Padding, '\0'); 156 }); 157 } 158 } 159 160 return DataEnd; 161 } 162 163 static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) { 164 Range.Entry.Memory = layout(File, Range.Content); 165 } 166 167 static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) { 168 M.Entry.ModuleNameRVA = File.allocateString(M.Name); 169 170 M.Entry.CvRecord = layout(File, M.CvRecord); 171 M.Entry.MiscRecord = layout(File, M.MiscRecord); 172 } 173 174 static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) { 175 T.Entry.Stack.Memory = layout(File, T.Stack); 176 T.Entry.Context = layout(File, T.Context); 177 } 178 179 template <typename EntryT> 180 static size_t layout(BlobAllocator &File, 181 MinidumpYAML::detail::ListStream<EntryT> &S) { 182 183 File.allocateNewObject<support::ulittle32_t>(S.Entries.size()); 184 for (auto &E : S.Entries) 185 File.allocateObject(E.Entry); 186 187 size_t DataEnd = File.tell(); 188 189 // Lay out the auxiliary data, (which is not a part of the stream). 190 DataEnd = File.tell(); 191 for (auto &E : S.Entries) 192 layout(File, E); 193 194 return DataEnd; 195 } 196 197 static Directory layout(BlobAllocator &File, Stream &S) { 198 Directory Result; 199 Result.Type = S.Type; 200 Result.Location.RVA = File.tell(); 201 std::optional<size_t> DataEnd; 202 switch (S.Kind) { 203 case Stream::StreamKind::Exception: 204 DataEnd = layout(File, cast<MinidumpYAML::ExceptionStream>(S)); 205 break; 206 case Stream::StreamKind::MemoryInfoList: { 207 MemoryInfoListStream &InfoList = cast<MemoryInfoListStream>(S); 208 File.allocateNewObject<minidump::MemoryInfoListHeader>( 209 sizeof(minidump::MemoryInfoListHeader), sizeof(minidump::MemoryInfo), 210 InfoList.Infos.size()); 211 File.allocateArray(ArrayRef(InfoList.Infos)); 212 break; 213 } 214 case Stream::StreamKind::MemoryList: 215 DataEnd = layout(File, cast<MemoryListStream>(S)); 216 break; 217 case Stream::StreamKind::Memory64List: 218 DataEnd = layout(File, cast<Memory64ListStream>(S)); 219 break; 220 case Stream::StreamKind::ModuleList: 221 DataEnd = layout(File, cast<ModuleListStream>(S)); 222 break; 223 case Stream::StreamKind::RawContent: { 224 RawContentStream &Raw = cast<RawContentStream>(S); 225 File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { 226 Raw.Content.writeAsBinary(OS); 227 assert(Raw.Content.binary_size() <= Raw.Size); 228 OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); 229 }); 230 break; 231 } 232 case Stream::StreamKind::SystemInfo: { 233 SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S); 234 File.allocateObject(SystemInfo.Info); 235 // The CSD string is not a part of the stream. 236 DataEnd = File.tell(); 237 SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion); 238 break; 239 } 240 case Stream::StreamKind::TextContent: 241 File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text)); 242 break; 243 case Stream::StreamKind::ThreadList: 244 DataEnd = layout(File, cast<ThreadListStream>(S)); 245 break; 246 } 247 // If DataEnd is not set, we assume everything we generated is a part of the 248 // stream. 249 Result.Location.DataSize = 250 DataEnd.value_or(File.tell()) - Result.Location.RVA; 251 return Result; 252 } 253 254 namespace llvm { 255 namespace yaml { 256 257 bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out, 258 ErrorHandler /*EH*/) { 259 BlobAllocator File; 260 File.allocateObject(Obj.Header); 261 262 std::vector<Directory> StreamDirectory(Obj.Streams.size()); 263 Obj.Header.StreamDirectoryRVA = File.allocateArray(ArrayRef(StreamDirectory)); 264 Obj.Header.NumberOfStreams = StreamDirectory.size(); 265 266 for (const auto &[Index, Stream] : enumerate(Obj.Streams)) 267 StreamDirectory[Index] = layout(File, *Stream); 268 269 File.writeTo(Out); 270 return true; 271 } 272 273 } // namespace yaml 274 } // namespace llvm 275