xref: /freebsd-src/contrib/llvm-project/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/Log.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/BinaryFormat/Magic.h"
23 #include "llvm/BinaryFormat/Wasm.h"
24 #include "llvm/Support/Endian.h"
25 #include "llvm/Support/Format.h"
26 
27 using namespace lldb;
28 using namespace lldb_private;
29 using namespace lldb_private::wasm;
30 
31 LLDB_PLUGIN_DEFINE(ObjectFileWasm)
32 
33 static const uint32_t kWasmHeaderSize =
34     sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
35 
36 /// Checks whether the data buffer starts with a valid Wasm module header.
37 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
38   if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
39     return false;
40 
41   if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
42       llvm::file_magic::wasm_object)
43     return false;
44 
45   uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
46 
47   uint32_t version = llvm::support::endian::read32le(Ptr);
48   return version == llvm::wasm::WasmVersion;
49 }
50 
51 static llvm::Optional<ConstString>
52 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
53   // A Wasm string is encoded as a vector of UTF-8 codes.
54   // Vectors are encoded with their u32 length followed by the element
55   // sequence.
56   uint64_t len = data.getULEB128(c);
57   if (!c) {
58     consumeError(c.takeError());
59     return llvm::None;
60   }
61 
62   if (len >= (uint64_t(1) << 32)) {
63     return llvm::None;
64   }
65 
66   llvm::SmallVector<uint8_t, 32> str_storage;
67   data.getU8(c, str_storage, len);
68   if (!c) {
69     consumeError(c.takeError());
70     return llvm::None;
71   }
72 
73   llvm::StringRef str = toStringRef(makeArrayRef(str_storage));
74   return ConstString(str);
75 }
76 
77 char ObjectFileWasm::ID;
78 
79 void ObjectFileWasm::Initialize() {
80   PluginManager::RegisterPlugin(GetPluginNameStatic(),
81                                 GetPluginDescriptionStatic(), CreateInstance,
82                                 CreateMemoryInstance, GetModuleSpecifications);
83 }
84 
85 void ObjectFileWasm::Terminate() {
86   PluginManager::UnregisterPlugin(CreateInstance);
87 }
88 
89 ObjectFile *
90 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp,
91                                offset_t data_offset, const FileSpec *file,
92                                offset_t file_offset, offset_t length) {
93   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
94 
95   if (!data_sp) {
96     data_sp = MapFileData(*file, length, file_offset);
97     if (!data_sp) {
98       LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
99                 file->GetPath().c_str());
100       return nullptr;
101     }
102     data_offset = 0;
103   }
104 
105   assert(data_sp);
106   if (!ValidateModuleHeader(data_sp)) {
107     LLDB_LOGF(log,
108               "Failed to create ObjectFileWasm instance: invalid Wasm header");
109     return nullptr;
110   }
111 
112   // Update the data to contain the entire file if it doesn't contain it
113   // already.
114   if (data_sp->GetByteSize() < length) {
115     data_sp = MapFileData(*file, length, file_offset);
116     if (!data_sp) {
117       LLDB_LOGF(log,
118                 "Failed to create ObjectFileWasm instance: cannot read file %s",
119                 file->GetPath().c_str());
120       return nullptr;
121     }
122     data_offset = 0;
123   }
124 
125   std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
126       module_sp, data_sp, data_offset, file, file_offset, length));
127   ArchSpec spec = objfile_up->GetArchitecture();
128   if (spec && objfile_up->SetModulesArchitecture(spec)) {
129     LLDB_LOGF(log,
130               "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
131               static_cast<void *>(objfile_up.get()),
132               static_cast<void *>(objfile_up->GetModule().get()),
133               objfile_up->GetModule()->GetSpecificationDescription().c_str(),
134               file ? file->GetPath().c_str() : "<NULL>");
135     return objfile_up.release();
136   }
137 
138   LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
139   return nullptr;
140 }
141 
142 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
143                                                  DataBufferSP &data_sp,
144                                                  const ProcessSP &process_sp,
145                                                  addr_t header_addr) {
146   if (!ValidateModuleHeader(data_sp))
147     return nullptr;
148 
149   std::unique_ptr<ObjectFileWasm> objfile_up(
150       new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
151   ArchSpec spec = objfile_up->GetArchitecture();
152   if (spec && objfile_up->SetModulesArchitecture(spec))
153     return objfile_up.release();
154   return nullptr;
155 }
156 
157 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
158   // Buffer sufficient to read a section header and find the pointer to the next
159   // section.
160   const uint32_t kBufferSize = 1024;
161   DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
162 
163   llvm::DataExtractor data = section_header_data.GetAsLLVM();
164   llvm::DataExtractor::Cursor c(0);
165 
166   // Each section consists of:
167   // - a one-byte section id,
168   // - the u32 size of the contents, in bytes,
169   // - the actual contents.
170   uint8_t section_id = data.getU8(c);
171   uint64_t payload_len = data.getULEB128(c);
172   if (!c)
173     return !llvm::errorToBool(c.takeError());
174 
175   if (payload_len >= (uint64_t(1) << 32))
176     return false;
177 
178   if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
179     // Custom sections have the id 0. Their contents consist of a name
180     // identifying the custom section, followed by an uninterpreted sequence
181     // of bytes.
182     lldb::offset_t prev_offset = c.tell();
183     llvm::Optional<ConstString> sect_name = GetWasmString(data, c);
184     if (!sect_name)
185       return false;
186 
187     if (payload_len < c.tell() - prev_offset)
188       return false;
189 
190     uint32_t section_length = payload_len - (c.tell() - prev_offset);
191     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
192                                         section_id, *sect_name});
193     *offset_ptr += (c.tell() + section_length);
194   } else if (section_id <= llvm::wasm::WASM_SEC_TAG) {
195     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
196                                         static_cast<uint32_t>(payload_len),
197                                         section_id, ConstString()});
198     *offset_ptr += (c.tell() + payload_len);
199   } else {
200     // Invalid section id.
201     return false;
202   }
203   return true;
204 }
205 
206 bool ObjectFileWasm::DecodeSections() {
207   lldb::offset_t offset = kWasmHeaderSize;
208   if (IsInMemory()) {
209     offset += m_memory_addr;
210   }
211 
212   while (DecodeNextSection(&offset))
213     ;
214   return true;
215 }
216 
217 size_t ObjectFileWasm::GetModuleSpecifications(
218     const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
219     offset_t file_offset, offset_t length, ModuleSpecList &specs) {
220   if (!ValidateModuleHeader(data_sp)) {
221     return 0;
222   }
223 
224   ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
225   specs.Append(spec);
226   return 1;
227 }
228 
229 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP &data_sp,
230                                offset_t data_offset, const FileSpec *file,
231                                offset_t offset, offset_t length)
232     : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
233       m_arch("wasm32-unknown-unknown-wasm") {
234   m_data.SetAddressByteSize(4);
235 }
236 
237 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
238                                lldb::DataBufferSP &header_data_sp,
239                                const lldb::ProcessSP &process_sp,
240                                lldb::addr_t header_addr)
241     : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
242       m_arch("wasm32-unknown-unknown-wasm") {}
243 
244 bool ObjectFileWasm::ParseHeader() {
245   // We already parsed the header during initialization.
246   return true;
247 }
248 
249 Symtab *ObjectFileWasm::GetSymtab() { return nullptr; }
250 
251 static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
252   if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
253     return llvm::StringSwitch<SectionType>(Name)
254         .Case("abbrev", eSectionTypeDWARFDebugAbbrev)
255         .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo)
256         .Case("addr", eSectionTypeDWARFDebugAddr)
257         .Case("aranges", eSectionTypeDWARFDebugAranges)
258         .Case("cu_index", eSectionTypeDWARFDebugCuIndex)
259         .Case("frame", eSectionTypeDWARFDebugFrame)
260         .Case("info", eSectionTypeDWARFDebugInfo)
261         .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo)
262         .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine)
263         .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr)
264         .Case("loc", eSectionTypeDWARFDebugLoc)
265         .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo)
266         .Case("loclists", eSectionTypeDWARFDebugLocLists)
267         .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo)
268         .Case("macinfo", eSectionTypeDWARFDebugMacInfo)
269         .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro)
270         .Case("names", eSectionTypeDWARFDebugNames)
271         .Case("pubnames", eSectionTypeDWARFDebugPubNames)
272         .Case("pubtypes", eSectionTypeDWARFDebugPubTypes)
273         .Case("ranges", eSectionTypeDWARFDebugRanges)
274         .Case("rnglists", eSectionTypeDWARFDebugRngLists)
275         .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo)
276         .Case("str", eSectionTypeDWARFDebugStr)
277         .Case("str.dwo", eSectionTypeDWARFDebugStrDwo)
278         .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets)
279         .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo)
280         .Case("tu_index", eSectionTypeDWARFDebugTuIndex)
281         .Case("types", eSectionTypeDWARFDebugTypes)
282         .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo)
283         .Default(eSectionTypeOther);
284   }
285   return eSectionTypeOther;
286 }
287 
288 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
289   if (m_sections_up)
290     return;
291 
292   m_sections_up = std::make_unique<SectionList>();
293 
294   if (m_sect_infos.empty()) {
295     DecodeSections();
296   }
297 
298   for (const section_info &sect_info : m_sect_infos) {
299     SectionType section_type = eSectionTypeOther;
300     ConstString section_name;
301     offset_t file_offset = sect_info.offset & 0xffffffff;
302     addr_t vm_addr = file_offset;
303     size_t vm_size = sect_info.size;
304 
305     if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
306       section_type = eSectionTypeCode;
307       section_name = ConstString("code");
308 
309       // A code address in DWARF for WebAssembly is the offset of an
310       // instruction relative within the Code section of the WebAssembly file.
311       // For this reason Section::GetFileAddress() must return zero for the
312       // Code section.
313       vm_addr = 0;
314     } else {
315       section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
316       if (section_type == eSectionTypeOther)
317         continue;
318       section_name = sect_info.name;
319       if (!IsInMemory()) {
320         vm_size = 0;
321         vm_addr = 0;
322       }
323     }
324 
325     SectionSP section_sp(
326         new Section(GetModule(), // Module to which this section belongs.
327                     this,        // ObjectFile to which this section belongs and
328                                  // should read section data from.
329                     section_type,   // Section ID.
330                     section_name,   // Section name.
331                     section_type,   // Section type.
332                     vm_addr,        // VM address.
333                     vm_size,        // VM size in bytes of this section.
334                     file_offset,    // Offset of this section in the file.
335                     sect_info.size, // Size of the section as found in the file.
336                     0,              // Alignment of the section
337                     0,              // Flags for this section.
338                     1));            // Number of host bytes per target byte
339     m_sections_up->AddSection(section_sp);
340     unified_section_list.AddSection(section_sp);
341   }
342 }
343 
344 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
345                                     bool value_is_offset) {
346   /// In WebAssembly, linear memory is disjointed from code space. The VM can
347   /// load multiple instances of a module, which logically share the same code.
348   /// We represent a wasm32 code address with 64-bits, like:
349   /// 63            32 31             0
350   /// +---------------+---------------+
351   /// +   module_id   |     offset    |
352   /// +---------------+---------------+
353   /// where the lower 32 bits represent a module offset (relative to the module
354   /// start not to the beginning of the code section) and the higher 32 bits
355   /// uniquely identify the module in the WebAssembly VM.
356   /// In other words, we assume that each WebAssembly module is loaded by the
357   /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
358   /// 0x0000000400000000 for module_id == 4.
359   /// These 64-bit addresses will be used to request code ranges for a specific
360   /// module from the WebAssembly engine.
361 
362   assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
363          m_memory_addr == load_address);
364 
365   ModuleSP module_sp = GetModule();
366   if (!module_sp)
367     return false;
368 
369   DecodeSections();
370 
371   size_t num_loaded_sections = 0;
372   SectionList *section_list = GetSectionList();
373   if (!section_list)
374     return false;
375 
376   const size_t num_sections = section_list->GetSize();
377   for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
378     SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
379     if (target.SetSectionLoadAddress(
380             section_sp, load_address | section_sp->GetFileOffset())) {
381       ++num_loaded_sections;
382     }
383   }
384 
385   return num_loaded_sections > 0;
386 }
387 
388 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
389   DataExtractor data;
390   if (m_file) {
391     if (offset < GetByteSize()) {
392       size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
393       auto buffer_sp = MapFileData(m_file, size, offset);
394       return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
395     }
396   } else {
397     ProcessSP process_sp(m_process_wp.lock());
398     if (process_sp) {
399       auto data_up = std::make_unique<DataBufferHeap>(size, 0);
400       Status readmem_error;
401       size_t bytes_read = process_sp->ReadMemory(
402           offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
403       if (bytes_read > 0) {
404         DataBufferSP buffer_sp(data_up.release());
405         data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());
406       }
407     }
408   }
409 
410   data.SetByteOrder(GetByteOrder());
411   return data;
412 }
413 
414 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
415   static ConstString g_sect_name_external_debug_info("external_debug_info");
416 
417   for (const section_info &sect_info : m_sect_infos) {
418     if (g_sect_name_external_debug_info == sect_info.name) {
419       const uint32_t kBufferSize = 1024;
420       DataExtractor section_header_data =
421           ReadImageData(sect_info.offset, kBufferSize);
422       llvm::DataExtractor data = section_header_data.GetAsLLVM();
423       llvm::DataExtractor::Cursor c(0);
424       llvm::Optional<ConstString> symbols_url = GetWasmString(data, c);
425       if (symbols_url)
426         return FileSpec(symbols_url->GetStringRef());
427     }
428   }
429   return llvm::None;
430 }
431 
432 void ObjectFileWasm::Dump(Stream *s) {
433   ModuleSP module_sp(GetModule());
434   if (!module_sp)
435     return;
436 
437   std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
438 
439   llvm::raw_ostream &ostream = s->AsRawOstream();
440   ostream << static_cast<void *>(this) << ": ";
441   s->Indent();
442   ostream << "ObjectFileWasm, file = '";
443   m_file.Dump(ostream);
444   ostream << "', arch = ";
445   ostream << GetArchitecture().GetArchitectureName() << "\n";
446 
447   SectionList *sections = GetSectionList();
448   if (sections) {
449     sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
450                    UINT32_MAX);
451   }
452   ostream << "\n";
453   DumpSectionHeaders(ostream);
454   ostream << "\n";
455 }
456 
457 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
458                                        const section_info_t &sh) {
459   ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
460           << llvm::format_hex(sh.offset, 10) << " "
461           << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
462           << "\n";
463 }
464 
465 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
466   ostream << "Section Headers\n";
467   ostream << "IDX  name             addr       size       id\n";
468   ostream << "==== ---------------- ---------- ---------- ------\n";
469 
470   uint32_t idx = 0;
471   for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
472        ++pos, ++idx) {
473     ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
474     ObjectFileWasm::DumpSectionHeader(ostream, *pos);
475   }
476 }
477