xref: /llvm-project/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp (revision 3ec28da6d6430a00b46780555a87acd43fcab790)
1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/Log.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/BinaryFormat/Magic.h"
23 #include "llvm/BinaryFormat/Wasm.h"
24 #include "llvm/Support/Endian.h"
25 #include "llvm/Support/Format.h"
26 
27 using namespace lldb;
28 using namespace lldb_private;
29 using namespace lldb_private::wasm;
30 
31 static const uint32_t kWasmHeaderSize =
32     sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
33 
34 /// Checks whether the data buffer starts with a valid Wasm module header.
35 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
36   if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
37     return false;
38 
39   if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
40       llvm::file_magic::wasm_object)
41     return false;
42 
43   uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
44 
45   uint32_t version = llvm::support::endian::read32le(Ptr);
46   return version == llvm::wasm::WasmVersion;
47 }
48 
49 static llvm::Optional<ConstString>
50 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
51   // A Wasm string is encoded as a vector of UTF-8 codes.
52   // Vectors are encoded with their u32 length followed by the element
53   // sequence.
54   uint64_t len = data.getULEB128(c);
55   if (!c) {
56     consumeError(c.takeError());
57     return llvm::None;
58   }
59 
60   if (len >= (uint64_t(1) << 32)) {
61     return llvm::None;
62   }
63 
64   llvm::SmallVector<uint8_t, 32> str_storage;
65   data.getU8(c, str_storage, len);
66   if (!c) {
67     consumeError(c.takeError());
68     return llvm::None;
69   }
70 
71   llvm::StringRef str = toStringRef(makeArrayRef(str_storage));
72   return ConstString(str);
73 }
74 
75 char ObjectFileWasm::ID;
76 
77 void ObjectFileWasm::Initialize() {
78   PluginManager::RegisterPlugin(GetPluginNameStatic(),
79                                 GetPluginDescriptionStatic(), CreateInstance,
80                                 CreateMemoryInstance, GetModuleSpecifications);
81 }
82 
83 void ObjectFileWasm::Terminate() {
84   PluginManager::UnregisterPlugin(CreateInstance);
85 }
86 
87 ConstString ObjectFileWasm::GetPluginNameStatic() {
88   static ConstString g_name("wasm");
89   return g_name;
90 }
91 
92 ObjectFile *
93 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp,
94                                offset_t data_offset, const FileSpec *file,
95                                offset_t file_offset, offset_t length) {
96   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
97 
98   if (!data_sp) {
99     data_sp = MapFileData(*file, length, file_offset);
100     if (!data_sp) {
101       LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
102                 file->GetPath().c_str());
103       return nullptr;
104     }
105     data_offset = 0;
106   }
107 
108   assert(data_sp);
109   if (!ValidateModuleHeader(data_sp)) {
110     LLDB_LOGF(log,
111               "Failed to create ObjectFileWasm instance: invalid Wasm header");
112     return nullptr;
113   }
114 
115   // Update the data to contain the entire file if it doesn't contain it
116   // already.
117   if (data_sp->GetByteSize() < length) {
118     data_sp = MapFileData(*file, length, file_offset);
119     if (!data_sp) {
120       LLDB_LOGF(log,
121                 "Failed to create ObjectFileWasm instance: cannot read file %s",
122                 file->GetPath().c_str());
123       return nullptr;
124     }
125     data_offset = 0;
126   }
127 
128   std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
129       module_sp, data_sp, data_offset, file, file_offset, length));
130   ArchSpec spec = objfile_up->GetArchitecture();
131   if (spec && objfile_up->SetModulesArchitecture(spec)) {
132     LLDB_LOGF(log,
133               "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
134               static_cast<void *>(objfile_up.get()),
135               static_cast<void *>(objfile_up->GetModule().get()),
136               objfile_up->GetModule()->GetSpecificationDescription().c_str(),
137               file ? file->GetPath().c_str() : "<NULL>");
138     return objfile_up.release();
139   }
140 
141   LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
142   return nullptr;
143 }
144 
145 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
146                                                  DataBufferSP &data_sp,
147                                                  const ProcessSP &process_sp,
148                                                  addr_t header_addr) {
149   if (!ValidateModuleHeader(data_sp))
150     return nullptr;
151 
152   std::unique_ptr<ObjectFileWasm> objfile_up(
153       new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
154   ArchSpec spec = objfile_up->GetArchitecture();
155   if (spec && objfile_up->SetModulesArchitecture(spec))
156     return objfile_up.release();
157   return nullptr;
158 }
159 
160 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
161   // Buffer sufficient to read a section header and find the pointer to the next
162   // section.
163   const uint32_t kBufferSize = 1024;
164   DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
165 
166   llvm::DataExtractor data = section_header_data.GetAsLLVM();
167   llvm::DataExtractor::Cursor c(0);
168 
169   // Each section consists of:
170   // - a one-byte section id,
171   // - the u32 size of the contents, in bytes,
172   // - the actual contents.
173   uint8_t section_id = data.getU8(c);
174   uint64_t payload_len = data.getULEB128(c);
175   if (!c)
176     return !llvm::errorToBool(c.takeError());
177 
178   if (payload_len >= (uint64_t(1) << 32))
179     return false;
180 
181   if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
182     // Custom sections have the id 0. Their contents consist of a name
183     // identifying the custom section, followed by an uninterpreted sequence
184     // of bytes.
185     lldb::offset_t prev_offset = c.tell();
186     llvm::Optional<ConstString> sect_name = GetWasmString(data, c);
187     if (!sect_name)
188       return false;
189 
190     if (payload_len < c.tell() - prev_offset)
191       return false;
192 
193     uint32_t section_length = payload_len - (c.tell() - prev_offset);
194     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
195                                         section_id, *sect_name});
196     *offset_ptr += (c.tell() + section_length);
197   } else if (section_id <= llvm::wasm::WASM_SEC_EVENT) {
198     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
199                                         static_cast<uint32_t>(payload_len),
200                                         section_id, ConstString()});
201     *offset_ptr += (c.tell() + payload_len);
202   } else {
203     // Invalid section id.
204     return false;
205   }
206   return true;
207 }
208 
209 bool ObjectFileWasm::DecodeSections() {
210   lldb::offset_t offset = kWasmHeaderSize;
211   if (IsInMemory()) {
212     offset += m_memory_addr;
213   }
214 
215   while (DecodeNextSection(&offset))
216     ;
217   return true;
218 }
219 
220 size_t ObjectFileWasm::GetModuleSpecifications(
221     const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
222     offset_t file_offset, offset_t length, ModuleSpecList &specs) {
223   if (!ValidateModuleHeader(data_sp)) {
224     return 0;
225   }
226 
227   ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
228   specs.Append(spec);
229   return 1;
230 }
231 
232 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP &data_sp,
233                                offset_t data_offset, const FileSpec *file,
234                                offset_t offset, offset_t length)
235     : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
236       m_arch("wasm32-unknown-unknown-wasm") {
237   m_data.SetAddressByteSize(4);
238 }
239 
240 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
241                                lldb::DataBufferSP &header_data_sp,
242                                const lldb::ProcessSP &process_sp,
243                                lldb::addr_t header_addr)
244     : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
245       m_arch("wasm32-unknown-unknown-wasm") {}
246 
247 bool ObjectFileWasm::ParseHeader() {
248   // We already parsed the header during initialization.
249   return true;
250 }
251 
252 Symtab *ObjectFileWasm::GetSymtab() { return nullptr; }
253 
254 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
255   if (m_sections_up)
256     return;
257 
258   m_sections_up = std::make_unique<SectionList>();
259 
260   if (m_sect_infos.empty()) {
261     DecodeSections();
262   }
263 
264   for (const section_info &sect_info : m_sect_infos) {
265     SectionType section_type = eSectionTypeOther;
266     ConstString section_name;
267     offset_t file_offset = sect_info.offset & 0xffffffff;
268     addr_t vm_addr = file_offset;
269     size_t vm_size = sect_info.size;
270 
271     if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
272       section_type = eSectionTypeCode;
273       section_name = ConstString("code");
274 
275       // A code address in DWARF for WebAssembly is the offset of an
276       // instruction relative within the Code section of the WebAssembly file.
277       // For this reason Section::GetFileAddress() must return zero for the
278       // Code section.
279       vm_addr = 0;
280     } else {
281       section_type =
282           llvm::StringSwitch<SectionType>(sect_info.name.GetStringRef())
283               .Case(".debug_abbrev", eSectionTypeDWARFDebugAbbrev)
284               .Case(".debug_addr", eSectionTypeDWARFDebugAddr)
285               .Case(".debug_aranges", eSectionTypeDWARFDebugAranges)
286               .Case(".debug_cu_index", eSectionTypeDWARFDebugCuIndex)
287               .Case(".debug_frame", eSectionTypeDWARFDebugFrame)
288               .Case(".debug_info", eSectionTypeDWARFDebugInfo)
289               .Case(".debug_line", eSectionTypeDWARFDebugLine)
290               .Case(".debug_line_str", eSectionTypeDWARFDebugLineStr)
291               .Case(".debug_loc", eSectionTypeDWARFDebugLoc)
292               .Case(".debug_loclists", eSectionTypeDWARFDebugLocLists)
293               .Case(".debug_macinfo", eSectionTypeDWARFDebugMacInfo)
294               .Case(".debug_macro", eSectionTypeDWARFDebugMacro)
295               .Case(".debug_names", eSectionTypeDWARFDebugNames)
296               .Case(".debug_pubnames", eSectionTypeDWARFDebugPubNames)
297               .Case(".debug_pubtypes", eSectionTypeDWARFDebugPubTypes)
298               .Case(".debug_ranges", eSectionTypeDWARFDebugRanges)
299               .Case(".debug_rnglists", eSectionTypeDWARFDebugRngLists)
300               .Case(".debug_str", eSectionTypeDWARFDebugStr)
301               .Case(".debug_str_offsets", eSectionTypeDWARFDebugStrOffsets)
302               .Case(".debug_types", eSectionTypeDWARFDebugTypes)
303               .Default(eSectionTypeOther);
304       if (section_type == eSectionTypeOther)
305         continue;
306       section_name = sect_info.name;
307       if (!IsInMemory()) {
308         vm_size = 0;
309         vm_addr = 0;
310       }
311     }
312 
313     SectionSP section_sp(
314         new Section(GetModule(), // Module to which this section belongs.
315                     this,        // ObjectFile to which this section belongs and
316                                  // should read section data from.
317                     section_type,   // Section ID.
318                     section_name,   // Section name.
319                     section_type,   // Section type.
320                     vm_addr,        // VM address.
321                     vm_size,        // VM size in bytes of this section.
322                     file_offset,    // Offset of this section in the file.
323                     sect_info.size, // Size of the section as found in the file.
324                     0,              // Alignment of the section
325                     0,              // Flags for this section.
326                     1));            // Number of host bytes per target byte
327     m_sections_up->AddSection(section_sp);
328     unified_section_list.AddSection(section_sp);
329   }
330 }
331 
332 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
333                                     bool value_is_offset) {
334   /// In WebAssembly, linear memory is disjointed from code space. The VM can
335   /// load multiple instances of a module, which logically share the same code.
336   /// We represent a wasm32 code address with 64-bits, like:
337   /// 63            32 31             0
338   /// +---------------+---------------+
339   /// +   module_id   |     offset    |
340   /// +---------------+---------------+
341   /// where the lower 32 bits represent a module offset (relative to the module
342   /// start not to the beginning of the code section) and the higher 32 bits
343   /// uniquely identify the module in the WebAssembly VM.
344   /// In other words, we assume that each WebAssembly module is loaded by the
345   /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
346   /// 0x0000000400000000 for module_id == 4.
347   /// These 64-bit addresses will be used to request code ranges for a specific
348   /// module from the WebAssembly engine.
349 
350   assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
351          m_memory_addr == load_address);
352 
353   ModuleSP module_sp = GetModule();
354   if (!module_sp)
355     return false;
356 
357   DecodeSections();
358 
359   size_t num_loaded_sections = 0;
360   SectionList *section_list = GetSectionList();
361   if (!section_list)
362     return false;
363 
364   const size_t num_sections = section_list->GetSize();
365   for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
366     SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
367     if (target.SetSectionLoadAddress(
368             section_sp, load_address | section_sp->GetFileOffset())) {
369       ++num_loaded_sections;
370     }
371   }
372 
373   return num_loaded_sections > 0;
374 }
375 
376 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
377   DataExtractor data;
378   if (m_file) {
379     if (offset < GetByteSize()) {
380       size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
381       auto buffer_sp = MapFileData(m_file, size, offset);
382       return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
383     }
384   } else {
385     ProcessSP process_sp(m_process_wp.lock());
386     if (process_sp) {
387       auto data_up = std::make_unique<DataBufferHeap>(size, 0);
388       Status readmem_error;
389       size_t bytes_read = process_sp->ReadMemory(
390           offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
391       if (bytes_read > 0) {
392         DataBufferSP buffer_sp(data_up.release());
393         data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());
394       }
395     }
396   }
397 
398   data.SetByteOrder(GetByteOrder());
399   return data;
400 }
401 
402 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
403   static ConstString g_sect_name_external_debug_info("external_debug_info");
404 
405   for (const section_info &sect_info : m_sect_infos) {
406     if (g_sect_name_external_debug_info == sect_info.name) {
407       const uint32_t kBufferSize = 1024;
408       DataExtractor section_header_data =
409           ReadImageData(sect_info.offset, kBufferSize);
410       llvm::DataExtractor data = section_header_data.GetAsLLVM();
411       llvm::DataExtractor::Cursor c(0);
412       llvm::Optional<ConstString> symbols_url = GetWasmString(data, c);
413       if (symbols_url)
414         return FileSpec(symbols_url->GetStringRef());
415     }
416   }
417   return llvm::None;
418 }
419 
420 void ObjectFileWasm::Dump(Stream *s) {
421   ModuleSP module_sp(GetModule());
422   if (!module_sp)
423     return;
424 
425   std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
426 
427   llvm::raw_ostream &ostream = s->AsRawOstream();
428   ostream << static_cast<void *>(this) << ": ";
429   s->Indent();
430   ostream << "ObjectFileWasm, file = '";
431   m_file.Dump(ostream);
432   ostream << "', arch = ";
433   ostream << GetArchitecture().GetArchitectureName() << "\n";
434 
435   SectionList *sections = GetSectionList();
436   if (sections) {
437     sections->Dump(s, nullptr, true, UINT32_MAX);
438   }
439   ostream << "\n";
440   DumpSectionHeaders(ostream);
441   ostream << "\n";
442 }
443 
444 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
445                                        const section_info_t &sh) {
446   ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
447           << llvm::format_hex(sh.offset, 10) << " "
448           << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
449           << "\n";
450 }
451 
452 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
453   ostream << "Section Headers\n";
454   ostream << "IDX  name             addr       size       id\n";
455   ostream << "==== ---------------- ---------- ---------- ------\n";
456 
457   uint32_t idx = 0;
458   for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
459        ++pos, ++idx) {
460     ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
461     ObjectFileWasm::DumpSectionHeader(ostream, *pos);
462   }
463 }
464