1 //===-- ObjectFileBreakpad.cpp -------------------------------- -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h" 11 #include "lldb/Core/ModuleSpec.h" 12 #include "lldb/Core/PluginManager.h" 13 #include "lldb/Core/Section.h" 14 #include "lldb/Utility/DataBuffer.h" 15 #include "llvm/ADT/StringExtras.h" 16 17 using namespace lldb; 18 using namespace lldb_private; 19 using namespace lldb_private::breakpad; 20 21 namespace { 22 struct Header { 23 ArchSpec arch; 24 UUID uuid; 25 static llvm::Optional<Header> parse(llvm::StringRef text); 26 }; 27 28 enum class Token { Unknown, Module, Info, File, Func, Public, Stack }; 29 } // namespace 30 31 static Token toToken(llvm::StringRef str) { 32 return llvm::StringSwitch<Token>(str) 33 .Case("MODULE", Token::Module) 34 .Case("INFO", Token::Info) 35 .Case("FILE", Token::File) 36 .Case("FUNC", Token::Func) 37 .Case("PUBLIC", Token::Public) 38 .Case("STACK", Token::Stack) 39 .Default(Token::Unknown); 40 } 41 42 static llvm::StringRef toString(Token t) { 43 switch (t) { 44 case Token::Unknown: 45 return ""; 46 case Token::Module: 47 return "MODULE"; 48 case Token::Info: 49 return "INFO"; 50 case Token::File: 51 return "FILE"; 52 case Token::Func: 53 return "FUNC"; 54 case Token::Public: 55 return "PUBLIC"; 56 case Token::Stack: 57 return "STACK"; 58 } 59 llvm_unreachable("Unknown token!"); 60 } 61 62 static llvm::Triple::OSType toOS(llvm::StringRef str) { 63 using llvm::Triple; 64 return llvm::StringSwitch<Triple::OSType>(str) 65 .Case("Linux", Triple::Linux) 66 .Case("mac", Triple::MacOSX) 67 .Case("windows", Triple::Win32) 68 .Default(Triple::UnknownOS); 69 } 70 71 static llvm::Triple::ArchType toArch(llvm::StringRef str) { 72 using llvm::Triple; 73 return llvm::StringSwitch<Triple::ArchType>(str) 74 .Case("arm", Triple::arm) 75 .Case("arm64", Triple::aarch64) 76 .Case("mips", Triple::mips) 77 .Case("ppc", Triple::ppc) 78 .Case("ppc64", Triple::ppc64) 79 .Case("s390", Triple::systemz) 80 .Case("sparc", Triple::sparc) 81 .Case("sparcv9", Triple::sparcv9) 82 .Case("x86", Triple::x86) 83 .Case("x86_64", Triple::x86_64) 84 .Default(Triple::UnknownArch); 85 } 86 87 static llvm::StringRef consume_front(llvm::StringRef &str, size_t n) { 88 llvm::StringRef result = str.take_front(n); 89 str = str.drop_front(n); 90 return result; 91 } 92 93 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { 94 struct uuid_data { 95 llvm::support::ulittle32_t uuid1; 96 llvm::support::ulittle16_t uuid2[2]; 97 uint8_t uuid3[8]; 98 llvm::support::ulittle32_t age; 99 } data; 100 static_assert(sizeof(data) == 20, ""); 101 // The textual module id encoding should be between 33 and 40 bytes long, 102 // depending on the size of the age field, which is of variable length. 103 // The first three chunks of the id are encoded in big endian, so we need to 104 // byte-swap those. 105 if (str.size() < 33 || str.size() > 40) 106 return UUID(); 107 uint32_t t; 108 if (to_integer(consume_front(str, 8), t, 16)) 109 data.uuid1 = t; 110 else 111 return UUID(); 112 for (int i = 0; i < 2; ++i) { 113 if (to_integer(consume_front(str, 4), t, 16)) 114 data.uuid2[i] = t; 115 else 116 return UUID(); 117 } 118 for (int i = 0; i < 8; ++i) { 119 if (!to_integer(consume_front(str, 2), data.uuid3[i], 16)) 120 return UUID(); 121 } 122 if (to_integer(str, t, 16)) 123 data.age = t; 124 else 125 return UUID(); 126 127 // On non-windows, the age field should always be zero, so we don't include to 128 // match the native uuid format of these platforms. 129 return UUID::fromData(&data, os == llvm::Triple::Win32 ? 20 : 16); 130 } 131 132 llvm::Optional<Header> Header::parse(llvm::StringRef text) { 133 // A valid module should start with something like: 134 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out 135 // optionally followed by 136 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] 137 llvm::StringRef token, line; 138 std::tie(line, text) = text.split('\n'); 139 std::tie(token, line) = getToken(line); 140 if (toToken(token) != Token::Module) 141 return llvm::None; 142 143 std::tie(token, line) = getToken(line); 144 llvm::Triple triple; 145 triple.setOS(toOS(token)); 146 if (triple.getOS() == llvm::Triple::UnknownOS) 147 return llvm::None; 148 149 std::tie(token, line) = getToken(line); 150 triple.setArch(toArch(token)); 151 if (triple.getArch() == llvm::Triple::UnknownArch) 152 return llvm::None; 153 154 llvm::StringRef module_id; 155 std::tie(module_id, line) = getToken(line); 156 157 std::tie(line, text) = text.split('\n'); 158 std::tie(token, line) = getToken(line); 159 if (token == "INFO") { 160 std::tie(token, line) = getToken(line); 161 if (token != "CODE_ID") 162 return llvm::None; 163 164 std::tie(token, line) = getToken(line); 165 // If we don't have any text following the code id (e.g. on linux), we 166 // should use the module id as UUID. Otherwise, we revert back to the module 167 // id. 168 if (line.trim().empty()) { 169 UUID uuid; 170 if (uuid.SetFromStringRef(token, token.size() / 2) != token.size()) 171 return llvm::None; 172 173 return Header{ArchSpec(triple), uuid}; 174 } 175 } 176 177 // We reach here if we don't have a INFO CODE_ID section, or we chose not to 178 // use it. In either case, we need to properly decode the module id, whose 179 // fields are encoded in big-endian. 180 UUID uuid = parseModuleId(triple.getOS(), module_id); 181 if (!uuid) 182 return llvm::None; 183 184 return Header{ArchSpec(triple), uuid}; 185 } 186 187 void ObjectFileBreakpad::Initialize() { 188 PluginManager::RegisterPlugin(GetPluginNameStatic(), 189 GetPluginDescriptionStatic(), CreateInstance, 190 CreateMemoryInstance, GetModuleSpecifications); 191 } 192 193 void ObjectFileBreakpad::Terminate() { 194 PluginManager::UnregisterPlugin(CreateInstance); 195 } 196 197 ConstString ObjectFileBreakpad::GetPluginNameStatic() { 198 static ConstString g_name("breakpad"); 199 return g_name; 200 } 201 202 ObjectFile *ObjectFileBreakpad::CreateInstance( 203 const ModuleSP &module_sp, DataBufferSP &data_sp, offset_t data_offset, 204 const FileSpec *file, offset_t file_offset, offset_t length) { 205 if (!data_sp) { 206 data_sp = MapFileData(*file, length, file_offset); 207 if (!data_sp) 208 return nullptr; 209 data_offset = 0; 210 } 211 auto text = toStringRef(data_sp->GetData()); 212 llvm::Optional<Header> header = Header::parse(text); 213 if (!header) 214 return nullptr; 215 216 // Update the data to contain the entire file if it doesn't already 217 if (data_sp->GetByteSize() < length) { 218 data_sp = MapFileData(*file, length, file_offset); 219 if (!data_sp) 220 return nullptr; 221 data_offset = 0; 222 } 223 224 return new ObjectFileBreakpad(module_sp, data_sp, data_offset, file, 225 file_offset, length, std::move(header->arch), 226 std::move(header->uuid)); 227 } 228 229 ObjectFile *ObjectFileBreakpad::CreateMemoryInstance( 230 const ModuleSP &module_sp, DataBufferSP &data_sp, 231 const ProcessSP &process_sp, addr_t header_addr) { 232 return nullptr; 233 } 234 235 size_t ObjectFileBreakpad::GetModuleSpecifications( 236 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, 237 offset_t file_offset, offset_t length, ModuleSpecList &specs) { 238 auto text = toStringRef(data_sp->GetData()); 239 llvm::Optional<Header> header = Header::parse(text); 240 if (!header) 241 return 0; 242 ModuleSpec spec(file, std::move(header->arch)); 243 spec.GetUUID() = std::move(header->uuid); 244 specs.Append(spec); 245 return 1; 246 } 247 248 ObjectFileBreakpad::ObjectFileBreakpad(const ModuleSP &module_sp, 249 DataBufferSP &data_sp, 250 offset_t data_offset, 251 const FileSpec *file, offset_t offset, 252 offset_t length, ArchSpec arch, 253 UUID uuid) 254 : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), 255 m_arch(std::move(arch)), m_uuid(std::move(uuid)) {} 256 257 bool ObjectFileBreakpad::ParseHeader() { 258 // We already parsed the header during initialization. 259 return true; 260 } 261 262 Symtab *ObjectFileBreakpad::GetSymtab() { 263 // TODO 264 return nullptr; 265 } 266 267 bool ObjectFileBreakpad::GetUUID(UUID *uuid) { 268 *uuid = m_uuid; 269 return true; 270 } 271 272 void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) { 273 if (m_sections_ap) 274 return; 275 m_sections_ap = llvm::make_unique<SectionList>(); 276 277 Token current_section = Token::Unknown; 278 offset_t section_start; 279 llvm::StringRef text = toStringRef(m_data.GetData()); 280 uint32_t next_section_id = 1; 281 auto maybe_add_section = [&](const uint8_t *end_ptr) { 282 if (current_section == Token::Unknown) 283 return; // We have been called before parsing the first line. 284 285 offset_t end_offset = end_ptr - m_data.GetDataStart(); 286 auto section_sp = std::make_shared<Section>( 287 GetModule(), this, next_section_id++, 288 ConstString(toString(current_section)), eSectionTypeOther, 289 /*file_vm_addr*/ 0, /*vm_size*/ 0, section_start, 290 end_offset - section_start, /*log2align*/ 0, /*flags*/ 0); 291 m_sections_ap->AddSection(section_sp); 292 unified_section_list.AddSection(section_sp); 293 }; 294 while (!text.empty()) { 295 llvm::StringRef line; 296 std::tie(line, text) = text.split('\n'); 297 298 Token token = toToken(getToken(line).first); 299 if (token == Token::Unknown) { 300 // We assume this is a line record, which logically belongs to the Func 301 // section. Errors will be handled when parsing the Func section. 302 token = Token::Func; 303 } 304 if (token == current_section) 305 continue; 306 307 // Changing sections, finish off the previous one, if there was any. 308 maybe_add_section(line.bytes_begin()); 309 // And start a new one. 310 current_section = token; 311 section_start = line.bytes_begin() - m_data.GetDataStart(); 312 } 313 // Finally, add the last section. 314 maybe_add_section(m_data.GetDataEnd()); 315 } 316