1 //===-- ObjectFileWasm.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ObjectFileWasm.h" 10 #include "lldb/Core/Module.h" 11 #include "lldb/Core/ModuleSpec.h" 12 #include "lldb/Core/PluginManager.h" 13 #include "lldb/Core/Section.h" 14 #include "lldb/Target/Process.h" 15 #include "lldb/Target/SectionLoadList.h" 16 #include "lldb/Target/Target.h" 17 #include "lldb/Utility/DataBufferHeap.h" 18 #include "lldb/Utility/Log.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/BinaryFormat/Magic.h" 23 #include "llvm/BinaryFormat/Wasm.h" 24 #include "llvm/Support/Endian.h" 25 #include "llvm/Support/Format.h" 26 27 using namespace lldb; 28 using namespace lldb_private; 29 using namespace lldb_private::wasm; 30 31 static const uint32_t kWasmHeaderSize = 32 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); 33 34 /// Checks whether the data buffer starts with a valid Wasm module header. 35 static bool ValidateModuleHeader(const DataBufferSP &data_sp) { 36 if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) 37 return false; 38 39 if (llvm::identify_magic(toStringRef(data_sp->GetData())) != 40 llvm::file_magic::wasm_object) 41 return false; 42 43 uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic); 44 45 uint32_t version = llvm::support::endian::read32le(Ptr); 46 return version == llvm::wasm::WasmVersion; 47 } 48 49 static llvm::Optional<ConstString> 50 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { 51 // A Wasm string is encoded as a vector of UTF-8 codes. 52 // Vectors are encoded with their u32 length followed by the element 53 // sequence. 54 uint64_t len = data.getULEB128(c); 55 if (!c) { 56 consumeError(c.takeError()); 57 return llvm::None; 58 } 59 60 if (len >= (uint64_t(1) << 32)) { 61 return llvm::None; 62 } 63 64 llvm::SmallVector<uint8_t, 32> str_storage; 65 data.getU8(c, str_storage, len); 66 if (!c) { 67 consumeError(c.takeError()); 68 return llvm::None; 69 } 70 71 llvm::StringRef str = toStringRef(makeArrayRef(str_storage)); 72 return ConstString(str); 73 } 74 75 char ObjectFileWasm::ID; 76 77 void ObjectFileWasm::Initialize() { 78 PluginManager::RegisterPlugin(GetPluginNameStatic(), 79 GetPluginDescriptionStatic(), CreateInstance, 80 CreateMemoryInstance, GetModuleSpecifications); 81 } 82 83 void ObjectFileWasm::Terminate() { 84 PluginManager::UnregisterPlugin(CreateInstance); 85 } 86 87 ConstString ObjectFileWasm::GetPluginNameStatic() { 88 static ConstString g_name("wasm"); 89 return g_name; 90 } 91 92 ObjectFile * 93 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp, 94 offset_t data_offset, const FileSpec *file, 95 offset_t file_offset, offset_t length) { 96 Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT)); 97 98 if (!data_sp) { 99 data_sp = MapFileData(*file, length, file_offset); 100 if (!data_sp) { 101 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s", 102 file->GetPath().c_str()); 103 return nullptr; 104 } 105 data_offset = 0; 106 } 107 108 assert(data_sp); 109 if (!ValidateModuleHeader(data_sp)) { 110 LLDB_LOGF(log, 111 "Failed to create ObjectFileWasm instance: invalid Wasm header"); 112 return nullptr; 113 } 114 115 // Update the data to contain the entire file if it doesn't contain it 116 // already. 117 if (data_sp->GetByteSize() < length) { 118 data_sp = MapFileData(*file, length, file_offset); 119 if (!data_sp) { 120 LLDB_LOGF(log, 121 "Failed to create ObjectFileWasm instance: cannot read file %s", 122 file->GetPath().c_str()); 123 return nullptr; 124 } 125 data_offset = 0; 126 } 127 128 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm( 129 module_sp, data_sp, data_offset, file, file_offset, length)); 130 ArchSpec spec = objfile_up->GetArchitecture(); 131 if (spec && objfile_up->SetModulesArchitecture(spec)) { 132 LLDB_LOGF(log, 133 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s", 134 static_cast<void *>(objfile_up.get()), 135 static_cast<void *>(objfile_up->GetModule().get()), 136 objfile_up->GetModule()->GetSpecificationDescription().c_str(), 137 file ? file->GetPath().c_str() : "<NULL>"); 138 return objfile_up.release(); 139 } 140 141 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance"); 142 return nullptr; 143 } 144 145 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp, 146 DataBufferSP &data_sp, 147 const ProcessSP &process_sp, 148 addr_t header_addr) { 149 if (!ValidateModuleHeader(data_sp)) 150 return nullptr; 151 152 std::unique_ptr<ObjectFileWasm> objfile_up( 153 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr)); 154 ArchSpec spec = objfile_up->GetArchitecture(); 155 if (spec && objfile_up->SetModulesArchitecture(spec)) 156 return objfile_up.release(); 157 return nullptr; 158 } 159 160 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { 161 // Buffer sufficient to read a section header and find the pointer to the next 162 // section. 163 const uint32_t kBufferSize = 1024; 164 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize); 165 166 llvm::DataExtractor data = section_header_data.GetAsLLVM(); 167 llvm::DataExtractor::Cursor c(0); 168 169 // Each section consists of: 170 // - a one-byte section id, 171 // - the u32 size of the contents, in bytes, 172 // - the actual contents. 173 uint8_t section_id = data.getU8(c); 174 uint64_t payload_len = data.getULEB128(c); 175 if (!c) 176 return !llvm::errorToBool(c.takeError()); 177 178 if (payload_len >= (uint64_t(1) << 32)) 179 return false; 180 181 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) { 182 // Custom sections have the id 0. Their contents consist of a name 183 // identifying the custom section, followed by an uninterpreted sequence 184 // of bytes. 185 lldb::offset_t prev_offset = c.tell(); 186 llvm::Optional<ConstString> sect_name = GetWasmString(data, c); 187 if (!sect_name) 188 return false; 189 190 if (payload_len < c.tell() - prev_offset) 191 return false; 192 193 uint32_t section_length = payload_len - (c.tell() - prev_offset); 194 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length, 195 section_id, *sect_name}); 196 *offset_ptr += (c.tell() + section_length); 197 } else if (section_id <= llvm::wasm::WASM_SEC_EVENT) { 198 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), 199 static_cast<uint32_t>(payload_len), 200 section_id, ConstString()}); 201 *offset_ptr += (c.tell() + payload_len); 202 } else { 203 // Invalid section id. 204 return false; 205 } 206 return true; 207 } 208 209 bool ObjectFileWasm::DecodeSections() { 210 lldb::offset_t offset = kWasmHeaderSize; 211 if (IsInMemory()) { 212 offset += m_memory_addr; 213 } 214 215 while (DecodeNextSection(&offset)) 216 ; 217 return true; 218 } 219 220 size_t ObjectFileWasm::GetModuleSpecifications( 221 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, 222 offset_t file_offset, offset_t length, ModuleSpecList &specs) { 223 if (!ValidateModuleHeader(data_sp)) { 224 return 0; 225 } 226 227 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm")); 228 specs.Append(spec); 229 return 1; 230 } 231 232 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP &data_sp, 233 offset_t data_offset, const FileSpec *file, 234 offset_t offset, offset_t length) 235 : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), 236 m_arch("wasm32-unknown-unknown-wasm"), m_code_section_offset(0) { 237 m_data.SetAddressByteSize(4); 238 } 239 240 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp, 241 lldb::DataBufferSP &header_data_sp, 242 const lldb::ProcessSP &process_sp, 243 lldb::addr_t header_addr) 244 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), 245 m_arch("wasm32-unknown-unknown-wasm"), m_code_section_offset(0) {} 246 247 bool ObjectFileWasm::ParseHeader() { 248 // We already parsed the header during initialization. 249 return true; 250 } 251 252 Symtab *ObjectFileWasm::GetSymtab() { return nullptr; } 253 254 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { 255 if (m_sections_up) 256 return; 257 258 m_sections_up = std::make_unique<SectionList>(); 259 260 if (m_sect_infos.empty()) { 261 DecodeSections(); 262 } 263 264 for (const section_info §_info : m_sect_infos) { 265 SectionType section_type = eSectionTypeOther; 266 ConstString section_name; 267 offset_t file_offset = 0; 268 addr_t vm_addr = 0; 269 size_t vm_size = 0; 270 271 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { 272 section_type = eSectionTypeCode; 273 section_name = ConstString("code"); 274 m_code_section_offset = sect_info.offset & 0xffffffff; 275 vm_size = sect_info.size; 276 } else { 277 section_type = 278 llvm::StringSwitch<SectionType>(sect_info.name.GetStringRef()) 279 .Case(".debug_abbrev", eSectionTypeDWARFDebugAbbrev) 280 .Case(".debug_addr", eSectionTypeDWARFDebugAddr) 281 .Case(".debug_aranges", eSectionTypeDWARFDebugAranges) 282 .Case(".debug_cu_index", eSectionTypeDWARFDebugCuIndex) 283 .Case(".debug_frame", eSectionTypeDWARFDebugFrame) 284 .Case(".debug_info", eSectionTypeDWARFDebugInfo) 285 .Case(".debug_line", eSectionTypeDWARFDebugLine) 286 .Case(".debug_line_str", eSectionTypeDWARFDebugLineStr) 287 .Case(".debug_loc", eSectionTypeDWARFDebugLoc) 288 .Case(".debug_loclists", eSectionTypeDWARFDebugLocLists) 289 .Case(".debug_macinfo", eSectionTypeDWARFDebugMacInfo) 290 .Case(".debug_macro", eSectionTypeDWARFDebugMacro) 291 .Case(".debug_names", eSectionTypeDWARFDebugNames) 292 .Case(".debug_pubnames", eSectionTypeDWARFDebugPubNames) 293 .Case(".debug_pubtypes", eSectionTypeDWARFDebugPubTypes) 294 .Case(".debug_ranges", eSectionTypeDWARFDebugRanges) 295 .Case(".debug_rnglists", eSectionTypeDWARFDebugRngLists) 296 .Case(".debug_str", eSectionTypeDWARFDebugStr) 297 .Case(".debug_str_offsets", eSectionTypeDWARFDebugStrOffsets) 298 .Case(".debug_types", eSectionTypeDWARFDebugTypes) 299 .Default(eSectionTypeOther); 300 if (section_type == eSectionTypeOther) 301 continue; 302 section_name = sect_info.name; 303 file_offset = sect_info.offset & 0xffffffff; 304 if (IsInMemory()) { 305 vm_addr = sect_info.offset & 0xffffffff; 306 vm_size = sect_info.size; 307 } 308 } 309 310 SectionSP section_sp( 311 new Section(GetModule(), // Module to which this section belongs. 312 this, // ObjectFile to which this section belongs and 313 // should read section data from. 314 section_type, // Section ID. 315 section_name, // Section name. 316 section_type, // Section type. 317 vm_addr, // VM address. 318 vm_size, // VM size in bytes of this section. 319 file_offset, // Offset of this section in the file. 320 sect_info.size, // Size of the section as found in the file. 321 0, // Alignment of the section 322 0, // Flags for this section. 323 1)); // Number of host bytes per target byte 324 m_sections_up->AddSection(section_sp); 325 unified_section_list.AddSection(section_sp); 326 } 327 } 328 329 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, 330 bool value_is_offset) { 331 /// In WebAssembly, linear memory is disjointed from code space. The VM can 332 /// load multiple instances of a module, which logically share the same code. 333 /// We represent a wasm32 code address with 64-bits, like: 334 /// 63 32 31 0 335 /// +---------------+---------------+ 336 /// + module_id | offset | 337 /// +---------------+---------------+ 338 /// where the lower 32 bits represent a module offset (relative to the module 339 /// start not to the beginning of the code section) and the higher 32 bits 340 /// uniquely identify the module in the WebAssembly VM. 341 /// In other words, we assume that each WebAssembly module is loaded by the 342 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like 343 /// 0x0000000400000000 for module_id == 4. 344 /// These 64-bit addresses will be used to request code ranges for a specific 345 /// module from the WebAssembly engine. 346 ModuleSP module_sp = GetModule(); 347 if (!module_sp) 348 return false; 349 350 DecodeSections(); 351 352 size_t num_loaded_sections = 0; 353 SectionList *section_list = GetSectionList(); 354 if (!section_list) 355 return false; 356 357 const size_t num_sections = section_list->GetSize(); 358 size_t sect_idx = 0; 359 360 for (sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 361 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 362 if (target.GetSectionLoadList().SetSectionLoadAddress( 363 section_sp, load_address | section_sp->GetFileAddress())) { 364 ++num_loaded_sections; 365 } 366 } 367 368 return num_loaded_sections > 0; 369 } 370 371 DataExtractor ObjectFileWasm::ReadImageData(uint64_t offset, size_t size) { 372 DataExtractor data; 373 if (m_file) { 374 if (offset < GetByteSize()) { 375 size = std::min(size, (size_t) (GetByteSize() - offset)); 376 auto buffer_sp = MapFileData(m_file, size, offset); 377 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); 378 } 379 } else { 380 ProcessSP process_sp(m_process_wp.lock()); 381 if (process_sp) { 382 auto data_up = std::make_unique<DataBufferHeap>(size, 0); 383 Status readmem_error; 384 size_t bytes_read = process_sp->ReadMemory( 385 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error); 386 if (bytes_read > 0) { 387 DataBufferSP buffer_sp(data_up.release()); 388 data.SetData(buffer_sp, 0, buffer_sp->GetByteSize()); 389 } 390 } 391 } 392 393 data.SetByteOrder(GetByteOrder()); 394 return data; 395 } 396 397 llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { 398 static ConstString g_sect_name_external_debug_info("external_debug_info"); 399 400 for (const section_info §_info : m_sect_infos) { 401 if (g_sect_name_external_debug_info == sect_info.name) { 402 const uint32_t kBufferSize = 1024; 403 DataExtractor section_header_data = 404 ReadImageData(sect_info.offset, kBufferSize); 405 llvm::DataExtractor data = section_header_data.GetAsLLVM(); 406 llvm::DataExtractor::Cursor c(0); 407 llvm::Optional<ConstString> symbols_url = GetWasmString(data, c); 408 if (symbols_url) 409 return FileSpec(symbols_url->GetStringRef()); 410 } 411 } 412 return llvm::None; 413 } 414 415 void ObjectFileWasm::Dump(Stream *s) { 416 ModuleSP module_sp(GetModule()); 417 if (!module_sp) 418 return; 419 420 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 421 422 llvm::raw_ostream &ostream = s->AsRawOstream(); 423 ostream << static_cast<void *>(this) << ": "; 424 s->Indent(); 425 ostream << "ObjectFileWasm, file = '"; 426 m_file.Dump(ostream); 427 ostream << "', arch = "; 428 ostream << GetArchitecture().GetArchitectureName() << "\n"; 429 430 SectionList *sections = GetSectionList(); 431 if (sections) { 432 sections->Dump(s, nullptr, true, UINT32_MAX); 433 } 434 ostream << "\n"; 435 DumpSectionHeaders(ostream); 436 ostream << "\n"; 437 } 438 439 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream, 440 const section_info_t &sh) { 441 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " " 442 << llvm::format_hex(sh.offset, 10) << " " 443 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6) 444 << "\n"; 445 } 446 447 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) { 448 ostream << "Section Headers\n"; 449 ostream << "IDX name addr size id\n"; 450 ostream << "==== ---------------- ---------- ---------- ------\n"; 451 452 uint32_t idx = 0; 453 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end(); 454 ++pos, ++idx) { 455 ostream << "[" << llvm::format_decimal(idx, 2) << "] "; 456 ObjectFileWasm::DumpSectionHeader(ostream, *pos); 457 } 458 } 459