1 //===-- ObjectFileWasm.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ObjectFileWasm.h" 10 #include "lldb/Core/Module.h" 11 #include "lldb/Core/ModuleSpec.h" 12 #include "lldb/Core/PluginManager.h" 13 #include "lldb/Core/Section.h" 14 #include "lldb/Target/Process.h" 15 #include "lldb/Target/SectionLoadList.h" 16 #include "lldb/Target/Target.h" 17 #include "lldb/Utility/DataBufferHeap.h" 18 #include "lldb/Utility/Log.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/BinaryFormat/Magic.h" 23 #include "llvm/BinaryFormat/Wasm.h" 24 #include "llvm/Support/Endian.h" 25 #include "llvm/Support/Format.h" 26 27 using namespace lldb; 28 using namespace lldb_private; 29 using namespace lldb_private::wasm; 30 31 LLDB_PLUGIN_DEFINE(ObjectFileWasm) 32 33 static const uint32_t kWasmHeaderSize = 34 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); 35 36 /// Checks whether the data buffer starts with a valid Wasm module header. 37 static bool ValidateModuleHeader(const DataBufferSP &data_sp) { 38 if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) 39 return false; 40 41 if (llvm::identify_magic(toStringRef(data_sp->GetData())) != 42 llvm::file_magic::wasm_object) 43 return false; 44 45 uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic); 46 47 uint32_t version = llvm::support::endian::read32le(Ptr); 48 return version == llvm::wasm::WasmVersion; 49 } 50 51 static llvm::Optional<ConstString> 52 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { 53 // A Wasm string is encoded as a vector of UTF-8 codes. 54 // Vectors are encoded with their u32 length followed by the element 55 // sequence. 56 uint64_t len = data.getULEB128(c); 57 if (!c) { 58 consumeError(c.takeError()); 59 return llvm::None; 60 } 61 62 if (len >= (uint64_t(1) << 32)) { 63 return llvm::None; 64 } 65 66 llvm::SmallVector<uint8_t, 32> str_storage; 67 data.getU8(c, str_storage, len); 68 if (!c) { 69 consumeError(c.takeError()); 70 return llvm::None; 71 } 72 73 llvm::StringRef str = toStringRef(makeArrayRef(str_storage)); 74 return ConstString(str); 75 } 76 77 char ObjectFileWasm::ID; 78 79 void ObjectFileWasm::Initialize() { 80 PluginManager::RegisterPlugin(GetPluginNameStatic(), 81 GetPluginDescriptionStatic(), CreateInstance, 82 CreateMemoryInstance, GetModuleSpecifications); 83 } 84 85 void ObjectFileWasm::Terminate() { 86 PluginManager::UnregisterPlugin(CreateInstance); 87 } 88 89 ObjectFile * 90 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp, 91 offset_t data_offset, const FileSpec *file, 92 offset_t file_offset, offset_t length) { 93 Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT)); 94 95 if (!data_sp) { 96 data_sp = MapFileData(*file, length, file_offset); 97 if (!data_sp) { 98 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s", 99 file->GetPath().c_str()); 100 return nullptr; 101 } 102 data_offset = 0; 103 } 104 105 assert(data_sp); 106 if (!ValidateModuleHeader(data_sp)) { 107 LLDB_LOGF(log, 108 "Failed to create ObjectFileWasm instance: invalid Wasm header"); 109 return nullptr; 110 } 111 112 // Update the data to contain the entire file if it doesn't contain it 113 // already. 114 if (data_sp->GetByteSize() < length) { 115 data_sp = MapFileData(*file, length, file_offset); 116 if (!data_sp) { 117 LLDB_LOGF(log, 118 "Failed to create ObjectFileWasm instance: cannot read file %s", 119 file->GetPath().c_str()); 120 return nullptr; 121 } 122 data_offset = 0; 123 } 124 125 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm( 126 module_sp, data_sp, data_offset, file, file_offset, length)); 127 ArchSpec spec = objfile_up->GetArchitecture(); 128 if (spec && objfile_up->SetModulesArchitecture(spec)) { 129 LLDB_LOGF(log, 130 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s", 131 static_cast<void *>(objfile_up.get()), 132 static_cast<void *>(objfile_up->GetModule().get()), 133 objfile_up->GetModule()->GetSpecificationDescription().c_str(), 134 file ? file->GetPath().c_str() : "<NULL>"); 135 return objfile_up.release(); 136 } 137 138 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance"); 139 return nullptr; 140 } 141 142 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp, 143 DataBufferSP &data_sp, 144 const ProcessSP &process_sp, 145 addr_t header_addr) { 146 if (!ValidateModuleHeader(data_sp)) 147 return nullptr; 148 149 std::unique_ptr<ObjectFileWasm> objfile_up( 150 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr)); 151 ArchSpec spec = objfile_up->GetArchitecture(); 152 if (spec && objfile_up->SetModulesArchitecture(spec)) 153 return objfile_up.release(); 154 return nullptr; 155 } 156 157 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { 158 // Buffer sufficient to read a section header and find the pointer to the next 159 // section. 160 const uint32_t kBufferSize = 1024; 161 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize); 162 163 llvm::DataExtractor data = section_header_data.GetAsLLVM(); 164 llvm::DataExtractor::Cursor c(0); 165 166 // Each section consists of: 167 // - a one-byte section id, 168 // - the u32 size of the contents, in bytes, 169 // - the actual contents. 170 uint8_t section_id = data.getU8(c); 171 uint64_t payload_len = data.getULEB128(c); 172 if (!c) 173 return !llvm::errorToBool(c.takeError()); 174 175 if (payload_len >= (uint64_t(1) << 32)) 176 return false; 177 178 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) { 179 // Custom sections have the id 0. Their contents consist of a name 180 // identifying the custom section, followed by an uninterpreted sequence 181 // of bytes. 182 lldb::offset_t prev_offset = c.tell(); 183 llvm::Optional<ConstString> sect_name = GetWasmString(data, c); 184 if (!sect_name) 185 return false; 186 187 if (payload_len < c.tell() - prev_offset) 188 return false; 189 190 uint32_t section_length = payload_len - (c.tell() - prev_offset); 191 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length, 192 section_id, *sect_name}); 193 *offset_ptr += (c.tell() + section_length); 194 } else if (section_id <= llvm::wasm::WASM_SEC_TAG) { 195 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), 196 static_cast<uint32_t>(payload_len), 197 section_id, ConstString()}); 198 *offset_ptr += (c.tell() + payload_len); 199 } else { 200 // Invalid section id. 201 return false; 202 } 203 return true; 204 } 205 206 bool ObjectFileWasm::DecodeSections() { 207 lldb::offset_t offset = kWasmHeaderSize; 208 if (IsInMemory()) { 209 offset += m_memory_addr; 210 } 211 212 while (DecodeNextSection(&offset)) 213 ; 214 return true; 215 } 216 217 size_t ObjectFileWasm::GetModuleSpecifications( 218 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, 219 offset_t file_offset, offset_t length, ModuleSpecList &specs) { 220 if (!ValidateModuleHeader(data_sp)) { 221 return 0; 222 } 223 224 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm")); 225 specs.Append(spec); 226 return 1; 227 } 228 229 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP &data_sp, 230 offset_t data_offset, const FileSpec *file, 231 offset_t offset, offset_t length) 232 : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), 233 m_arch("wasm32-unknown-unknown-wasm") { 234 m_data.SetAddressByteSize(4); 235 } 236 237 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp, 238 lldb::DataBufferSP &header_data_sp, 239 const lldb::ProcessSP &process_sp, 240 lldb::addr_t header_addr) 241 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), 242 m_arch("wasm32-unknown-unknown-wasm") {} 243 244 bool ObjectFileWasm::ParseHeader() { 245 // We already parsed the header during initialization. 246 return true; 247 } 248 249 Symtab *ObjectFileWasm::GetSymtab() { return nullptr; } 250 251 static SectionType GetSectionTypeFromName(llvm::StringRef Name) { 252 if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) { 253 return llvm::StringSwitch<SectionType>(Name) 254 .Case("abbrev", eSectionTypeDWARFDebugAbbrev) 255 .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo) 256 .Case("addr", eSectionTypeDWARFDebugAddr) 257 .Case("aranges", eSectionTypeDWARFDebugAranges) 258 .Case("cu_index", eSectionTypeDWARFDebugCuIndex) 259 .Case("frame", eSectionTypeDWARFDebugFrame) 260 .Case("info", eSectionTypeDWARFDebugInfo) 261 .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo) 262 .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine) 263 .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr) 264 .Case("loc", eSectionTypeDWARFDebugLoc) 265 .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo) 266 .Case("loclists", eSectionTypeDWARFDebugLocLists) 267 .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo) 268 .Case("macinfo", eSectionTypeDWARFDebugMacInfo) 269 .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro) 270 .Case("names", eSectionTypeDWARFDebugNames) 271 .Case("pubnames", eSectionTypeDWARFDebugPubNames) 272 .Case("pubtypes", eSectionTypeDWARFDebugPubTypes) 273 .Case("ranges", eSectionTypeDWARFDebugRanges) 274 .Case("rnglists", eSectionTypeDWARFDebugRngLists) 275 .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo) 276 .Case("str", eSectionTypeDWARFDebugStr) 277 .Case("str.dwo", eSectionTypeDWARFDebugStrDwo) 278 .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets) 279 .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo) 280 .Case("tu_index", eSectionTypeDWARFDebugTuIndex) 281 .Case("types", eSectionTypeDWARFDebugTypes) 282 .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo) 283 .Default(eSectionTypeOther); 284 } 285 return eSectionTypeOther; 286 } 287 288 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { 289 if (m_sections_up) 290 return; 291 292 m_sections_up = std::make_unique<SectionList>(); 293 294 if (m_sect_infos.empty()) { 295 DecodeSections(); 296 } 297 298 for (const section_info §_info : m_sect_infos) { 299 SectionType section_type = eSectionTypeOther; 300 ConstString section_name; 301 offset_t file_offset = sect_info.offset & 0xffffffff; 302 addr_t vm_addr = file_offset; 303 size_t vm_size = sect_info.size; 304 305 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { 306 section_type = eSectionTypeCode; 307 section_name = ConstString("code"); 308 309 // A code address in DWARF for WebAssembly is the offset of an 310 // instruction relative within the Code section of the WebAssembly file. 311 // For this reason Section::GetFileAddress() must return zero for the 312 // Code section. 313 vm_addr = 0; 314 } else { 315 section_type = GetSectionTypeFromName(sect_info.name.GetStringRef()); 316 if (section_type == eSectionTypeOther) 317 continue; 318 section_name = sect_info.name; 319 if (!IsInMemory()) { 320 vm_size = 0; 321 vm_addr = 0; 322 } 323 } 324 325 SectionSP section_sp( 326 new Section(GetModule(), // Module to which this section belongs. 327 this, // ObjectFile to which this section belongs and 328 // should read section data from. 329 section_type, // Section ID. 330 section_name, // Section name. 331 section_type, // Section type. 332 vm_addr, // VM address. 333 vm_size, // VM size in bytes of this section. 334 file_offset, // Offset of this section in the file. 335 sect_info.size, // Size of the section as found in the file. 336 0, // Alignment of the section 337 0, // Flags for this section. 338 1)); // Number of host bytes per target byte 339 m_sections_up->AddSection(section_sp); 340 unified_section_list.AddSection(section_sp); 341 } 342 } 343 344 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, 345 bool value_is_offset) { 346 /// In WebAssembly, linear memory is disjointed from code space. The VM can 347 /// load multiple instances of a module, which logically share the same code. 348 /// We represent a wasm32 code address with 64-bits, like: 349 /// 63 32 31 0 350 /// +---------------+---------------+ 351 /// + module_id | offset | 352 /// +---------------+---------------+ 353 /// where the lower 32 bits represent a module offset (relative to the module 354 /// start not to the beginning of the code section) and the higher 32 bits 355 /// uniquely identify the module in the WebAssembly VM. 356 /// In other words, we assume that each WebAssembly module is loaded by the 357 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like 358 /// 0x0000000400000000 for module_id == 4. 359 /// These 64-bit addresses will be used to request code ranges for a specific 360 /// module from the WebAssembly engine. 361 362 assert(m_memory_addr == LLDB_INVALID_ADDRESS || 363 m_memory_addr == load_address); 364 365 ModuleSP module_sp = GetModule(); 366 if (!module_sp) 367 return false; 368 369 DecodeSections(); 370 371 size_t num_loaded_sections = 0; 372 SectionList *section_list = GetSectionList(); 373 if (!section_list) 374 return false; 375 376 const size_t num_sections = section_list->GetSize(); 377 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 378 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 379 if (target.SetSectionLoadAddress( 380 section_sp, load_address | section_sp->GetFileOffset())) { 381 ++num_loaded_sections; 382 } 383 } 384 385 return num_loaded_sections > 0; 386 } 387 388 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) { 389 DataExtractor data; 390 if (m_file) { 391 if (offset < GetByteSize()) { 392 size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset); 393 auto buffer_sp = MapFileData(m_file, size, offset); 394 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); 395 } 396 } else { 397 ProcessSP process_sp(m_process_wp.lock()); 398 if (process_sp) { 399 auto data_up = std::make_unique<DataBufferHeap>(size, 0); 400 Status readmem_error; 401 size_t bytes_read = process_sp->ReadMemory( 402 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error); 403 if (bytes_read > 0) { 404 DataBufferSP buffer_sp(data_up.release()); 405 data.SetData(buffer_sp, 0, buffer_sp->GetByteSize()); 406 } 407 } 408 } 409 410 data.SetByteOrder(GetByteOrder()); 411 return data; 412 } 413 414 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { 415 static ConstString g_sect_name_external_debug_info("external_debug_info"); 416 417 for (const section_info §_info : m_sect_infos) { 418 if (g_sect_name_external_debug_info == sect_info.name) { 419 const uint32_t kBufferSize = 1024; 420 DataExtractor section_header_data = 421 ReadImageData(sect_info.offset, kBufferSize); 422 llvm::DataExtractor data = section_header_data.GetAsLLVM(); 423 llvm::DataExtractor::Cursor c(0); 424 llvm::Optional<ConstString> symbols_url = GetWasmString(data, c); 425 if (symbols_url) 426 return FileSpec(symbols_url->GetStringRef()); 427 } 428 } 429 return llvm::None; 430 } 431 432 void ObjectFileWasm::Dump(Stream *s) { 433 ModuleSP module_sp(GetModule()); 434 if (!module_sp) 435 return; 436 437 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 438 439 llvm::raw_ostream &ostream = s->AsRawOstream(); 440 ostream << static_cast<void *>(this) << ": "; 441 s->Indent(); 442 ostream << "ObjectFileWasm, file = '"; 443 m_file.Dump(ostream); 444 ostream << "', arch = "; 445 ostream << GetArchitecture().GetArchitectureName() << "\n"; 446 447 SectionList *sections = GetSectionList(); 448 if (sections) { 449 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, 450 UINT32_MAX); 451 } 452 ostream << "\n"; 453 DumpSectionHeaders(ostream); 454 ostream << "\n"; 455 } 456 457 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream, 458 const section_info_t &sh) { 459 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " " 460 << llvm::format_hex(sh.offset, 10) << " " 461 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6) 462 << "\n"; 463 } 464 465 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) { 466 ostream << "Section Headers\n"; 467 ostream << "IDX name addr size id\n"; 468 ostream << "==== ---------------- ---------- ---------- ------\n"; 469 470 uint32_t idx = 0; 471 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end(); 472 ++pos, ++idx) { 473 ostream << "[" << llvm::format_decimal(idx, 2) << "] "; 474 ObjectFileWasm::DumpSectionHeader(ostream, *pos); 475 } 476 } 477