1 //===-- ObjectFileMachO.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/ScopeExit.h" 10 #include "llvm/ADT/StringRef.h" 11 12 #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h" 13 #include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h" 14 #include "Plugins/Process/Utility/RegisterContextDarwin_i386.h" 15 #include "Plugins/Process/Utility/RegisterContextDarwin_x86_64.h" 16 #include "lldb/Core/Debugger.h" 17 #include "lldb/Core/Module.h" 18 #include "lldb/Core/ModuleSpec.h" 19 #include "lldb/Core/PluginManager.h" 20 #include "lldb/Core/Progress.h" 21 #include "lldb/Core/Section.h" 22 #include "lldb/Host/Host.h" 23 #include "lldb/Symbol/DWARFCallFrameInfo.h" 24 #include "lldb/Symbol/ObjectFile.h" 25 #include "lldb/Target/DynamicLoader.h" 26 #include "lldb/Target/MemoryRegionInfo.h" 27 #include "lldb/Target/Platform.h" 28 #include "lldb/Target/Process.h" 29 #include "lldb/Target/SectionLoadList.h" 30 #include "lldb/Target/Target.h" 31 #include "lldb/Target/Thread.h" 32 #include "lldb/Target/ThreadList.h" 33 #include "lldb/Utility/ArchSpec.h" 34 #include "lldb/Utility/DataBuffer.h" 35 #include "lldb/Utility/FileSpec.h" 36 #include "lldb/Utility/FileSpecList.h" 37 #include "lldb/Utility/LLDBLog.h" 38 #include "lldb/Utility/Log.h" 39 #include "lldb/Utility/RangeMap.h" 40 #include "lldb/Utility/RegisterValue.h" 41 #include "lldb/Utility/Status.h" 42 #include "lldb/Utility/StreamString.h" 43 #include "lldb/Utility/Timer.h" 44 #include "lldb/Utility/UUID.h" 45 46 #include "lldb/Host/SafeMachO.h" 47 48 #include "llvm/ADT/DenseSet.h" 49 #include "llvm/Support/FormatVariadic.h" 50 #include "llvm/Support/MemoryBuffer.h" 51 52 #include "ObjectFileMachO.h" 53 54 #if defined(__APPLE__) 55 #include <TargetConditionals.h> 56 // GetLLDBSharedCacheUUID() needs to call dlsym() 57 #include <dlfcn.h> 58 #include <mach/mach_init.h> 59 #include <mach/vm_map.h> 60 #include <lldb/Host/SafeMachO.h> 61 #endif 62 63 #ifndef __APPLE__ 64 #include "lldb/Utility/AppleUuidCompatibility.h" 65 #else 66 #include <uuid/uuid.h> 67 #endif 68 69 #include <bitset> 70 #include <memory> 71 #include <optional> 72 73 // Unfortunately the signpost header pulls in the system MachO header, too. 74 #ifdef CPU_TYPE_ARM 75 #undef CPU_TYPE_ARM 76 #endif 77 #ifdef CPU_TYPE_ARM64 78 #undef CPU_TYPE_ARM64 79 #endif 80 #ifdef CPU_TYPE_ARM64_32 81 #undef CPU_TYPE_ARM64_32 82 #endif 83 #ifdef CPU_TYPE_I386 84 #undef CPU_TYPE_I386 85 #endif 86 #ifdef CPU_TYPE_X86_64 87 #undef CPU_TYPE_X86_64 88 #endif 89 #ifdef MH_DYLINKER 90 #undef MH_DYLINKER 91 #endif 92 #ifdef MH_OBJECT 93 #undef MH_OBJECT 94 #endif 95 #ifdef LC_VERSION_MIN_MACOSX 96 #undef LC_VERSION_MIN_MACOSX 97 #endif 98 #ifdef LC_VERSION_MIN_IPHONEOS 99 #undef LC_VERSION_MIN_IPHONEOS 100 #endif 101 #ifdef LC_VERSION_MIN_TVOS 102 #undef LC_VERSION_MIN_TVOS 103 #endif 104 #ifdef LC_VERSION_MIN_WATCHOS 105 #undef LC_VERSION_MIN_WATCHOS 106 #endif 107 #ifdef LC_BUILD_VERSION 108 #undef LC_BUILD_VERSION 109 #endif 110 #ifdef PLATFORM_MACOS 111 #undef PLATFORM_MACOS 112 #endif 113 #ifdef PLATFORM_MACCATALYST 114 #undef PLATFORM_MACCATALYST 115 #endif 116 #ifdef PLATFORM_IOS 117 #undef PLATFORM_IOS 118 #endif 119 #ifdef PLATFORM_IOSSIMULATOR 120 #undef PLATFORM_IOSSIMULATOR 121 #endif 122 #ifdef PLATFORM_TVOS 123 #undef PLATFORM_TVOS 124 #endif 125 #ifdef PLATFORM_TVOSSIMULATOR 126 #undef PLATFORM_TVOSSIMULATOR 127 #endif 128 #ifdef PLATFORM_WATCHOS 129 #undef PLATFORM_WATCHOS 130 #endif 131 #ifdef PLATFORM_WATCHOSSIMULATOR 132 #undef PLATFORM_WATCHOSSIMULATOR 133 #endif 134 135 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull 136 using namespace lldb; 137 using namespace lldb_private; 138 using namespace llvm::MachO; 139 140 static constexpr llvm::StringLiteral g_loader_path = "@loader_path"; 141 static constexpr llvm::StringLiteral g_executable_path = "@executable_path"; 142 143 LLDB_PLUGIN_DEFINE(ObjectFileMachO) 144 145 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name, 146 const char *alt_name, size_t reg_byte_size, 147 Stream &data) { 148 const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name); 149 if (reg_info == nullptr) 150 reg_info = reg_ctx->GetRegisterInfoByName(alt_name); 151 if (reg_info) { 152 lldb_private::RegisterValue reg_value; 153 if (reg_ctx->ReadRegister(reg_info, reg_value)) { 154 if (reg_info->byte_size >= reg_byte_size) 155 data.Write(reg_value.GetBytes(), reg_byte_size); 156 else { 157 data.Write(reg_value.GetBytes(), reg_info->byte_size); 158 for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i) 159 data.PutChar(0); 160 } 161 return; 162 } 163 } 164 // Just write zeros if all else fails 165 for (size_t i = 0; i < reg_byte_size; ++i) 166 data.PutChar(0); 167 } 168 169 class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 { 170 public: 171 RegisterContextDarwin_x86_64_Mach(lldb_private::Thread &thread, 172 const DataExtractor &data) 173 : RegisterContextDarwin_x86_64(thread, 0) { 174 SetRegisterDataFrom_LC_THREAD(data); 175 } 176 177 void InvalidateAllRegisters() override { 178 // Do nothing... registers are always valid... 179 } 180 181 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 182 lldb::offset_t offset = 0; 183 SetError(GPRRegSet, Read, -1); 184 SetError(FPURegSet, Read, -1); 185 SetError(EXCRegSet, Read, -1); 186 bool done = false; 187 188 while (!done) { 189 int flavor = data.GetU32(&offset); 190 if (flavor == 0) 191 done = true; 192 else { 193 uint32_t i; 194 uint32_t count = data.GetU32(&offset); 195 switch (flavor) { 196 case GPRRegSet: 197 for (i = 0; i < count; ++i) 198 (&gpr.rax)[i] = data.GetU64(&offset); 199 SetError(GPRRegSet, Read, 0); 200 done = true; 201 202 break; 203 case FPURegSet: 204 // TODO: fill in FPU regs.... 205 // SetError (FPURegSet, Read, -1); 206 done = true; 207 208 break; 209 case EXCRegSet: 210 exc.trapno = data.GetU32(&offset); 211 exc.err = data.GetU32(&offset); 212 exc.faultvaddr = data.GetU64(&offset); 213 SetError(EXCRegSet, Read, 0); 214 done = true; 215 break; 216 case 7: 217 case 8: 218 case 9: 219 // fancy flavors that encapsulate of the above flavors... 220 break; 221 222 default: 223 done = true; 224 break; 225 } 226 } 227 } 228 } 229 230 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 231 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 232 if (reg_ctx_sp) { 233 RegisterContext *reg_ctx = reg_ctx_sp.get(); 234 235 data.PutHex32(GPRRegSet); // Flavor 236 data.PutHex32(GPRWordCount); 237 PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data); 238 PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data); 239 PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data); 240 PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data); 241 PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data); 242 PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data); 243 PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data); 244 PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data); 245 PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data); 246 PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data); 247 PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data); 248 PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data); 249 PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data); 250 PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data); 251 PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data); 252 PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data); 253 PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data); 254 PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data); 255 PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data); 256 PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data); 257 PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data); 258 259 // // Write out the FPU registers 260 // const size_t fpu_byte_size = sizeof(FPU); 261 // size_t bytes_written = 0; 262 // data.PutHex32 (FPURegSet); 263 // data.PutHex32 (fpu_byte_size/sizeof(uint64_t)); 264 // bytes_written += data.PutHex32(0); // uint32_t pad[0] 265 // bytes_written += data.PutHex32(0); // uint32_t pad[1] 266 // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2, 267 // data); // uint16_t fcw; // "fctrl" 268 // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2, 269 // data); // uint16_t fsw; // "fstat" 270 // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1, 271 // data); // uint8_t ftw; // "ftag" 272 // bytes_written += data.PutHex8 (0); // uint8_t pad1; 273 // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2, 274 // data); // uint16_t fop; // "fop" 275 // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4, 276 // data); // uint32_t ip; // "fioff" 277 // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2, 278 // data); // uint16_t cs; // "fiseg" 279 // bytes_written += data.PutHex16 (0); // uint16_t pad2; 280 // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4, 281 // data); // uint32_t dp; // "fooff" 282 // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2, 283 // data); // uint16_t ds; // "foseg" 284 // bytes_written += data.PutHex16 (0); // uint16_t pad3; 285 // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4, 286 // data); // uint32_t mxcsr; 287 // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL, 288 // 4, data);// uint32_t mxcsrmask; 289 // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL, 290 // sizeof(MMSReg), data); 291 // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL, 292 // sizeof(MMSReg), data); 293 // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL, 294 // sizeof(MMSReg), data); 295 // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL, 296 // sizeof(MMSReg), data); 297 // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL, 298 // sizeof(MMSReg), data); 299 // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL, 300 // sizeof(MMSReg), data); 301 // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL, 302 // sizeof(MMSReg), data); 303 // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL, 304 // sizeof(MMSReg), data); 305 // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL, 306 // sizeof(XMMReg), data); 307 // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL, 308 // sizeof(XMMReg), data); 309 // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL, 310 // sizeof(XMMReg), data); 311 // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL, 312 // sizeof(XMMReg), data); 313 // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL, 314 // sizeof(XMMReg), data); 315 // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL, 316 // sizeof(XMMReg), data); 317 // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL, 318 // sizeof(XMMReg), data); 319 // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL, 320 // sizeof(XMMReg), data); 321 // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL, 322 // sizeof(XMMReg), data); 323 // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL, 324 // sizeof(XMMReg), data); 325 // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL, 326 // sizeof(XMMReg), data); 327 // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL, 328 // sizeof(XMMReg), data); 329 // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL, 330 // sizeof(XMMReg), data); 331 // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL, 332 // sizeof(XMMReg), data); 333 // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL, 334 // sizeof(XMMReg), data); 335 // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL, 336 // sizeof(XMMReg), data); 337 // 338 // // Fill rest with zeros 339 // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++ 340 // i) 341 // data.PutChar(0); 342 343 // Write out the EXC registers 344 data.PutHex32(EXCRegSet); 345 data.PutHex32(EXCWordCount); 346 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data); 347 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data); 348 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data); 349 return true; 350 } 351 return false; 352 } 353 354 protected: 355 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; } 356 357 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; } 358 359 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; } 360 361 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 362 return 0; 363 } 364 365 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 366 return 0; 367 } 368 369 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 370 return 0; 371 } 372 }; 373 374 class RegisterContextDarwin_i386_Mach : public RegisterContextDarwin_i386 { 375 public: 376 RegisterContextDarwin_i386_Mach(lldb_private::Thread &thread, 377 const DataExtractor &data) 378 : RegisterContextDarwin_i386(thread, 0) { 379 SetRegisterDataFrom_LC_THREAD(data); 380 } 381 382 void InvalidateAllRegisters() override { 383 // Do nothing... registers are always valid... 384 } 385 386 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 387 lldb::offset_t offset = 0; 388 SetError(GPRRegSet, Read, -1); 389 SetError(FPURegSet, Read, -1); 390 SetError(EXCRegSet, Read, -1); 391 bool done = false; 392 393 while (!done) { 394 int flavor = data.GetU32(&offset); 395 if (flavor == 0) 396 done = true; 397 else { 398 uint32_t i; 399 uint32_t count = data.GetU32(&offset); 400 switch (flavor) { 401 case GPRRegSet: 402 for (i = 0; i < count; ++i) 403 (&gpr.eax)[i] = data.GetU32(&offset); 404 SetError(GPRRegSet, Read, 0); 405 done = true; 406 407 break; 408 case FPURegSet: 409 // TODO: fill in FPU regs.... 410 // SetError (FPURegSet, Read, -1); 411 done = true; 412 413 break; 414 case EXCRegSet: 415 exc.trapno = data.GetU32(&offset); 416 exc.err = data.GetU32(&offset); 417 exc.faultvaddr = data.GetU32(&offset); 418 SetError(EXCRegSet, Read, 0); 419 done = true; 420 break; 421 case 7: 422 case 8: 423 case 9: 424 // fancy flavors that encapsulate of the above flavors... 425 break; 426 427 default: 428 done = true; 429 break; 430 } 431 } 432 } 433 } 434 435 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 436 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 437 if (reg_ctx_sp) { 438 RegisterContext *reg_ctx = reg_ctx_sp.get(); 439 440 data.PutHex32(GPRRegSet); // Flavor 441 data.PutHex32(GPRWordCount); 442 PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data); 443 PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data); 444 PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data); 445 PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data); 446 PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data); 447 PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data); 448 PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data); 449 PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data); 450 PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data); 451 PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data); 452 PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data); 453 PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data); 454 PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data); 455 PrintRegisterValue(reg_ctx, "es", nullptr, 4, data); 456 PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data); 457 PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data); 458 459 // Write out the EXC registers 460 data.PutHex32(EXCRegSet); 461 data.PutHex32(EXCWordCount); 462 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data); 463 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data); 464 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data); 465 return true; 466 } 467 return false; 468 } 469 470 protected: 471 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; } 472 473 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; } 474 475 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; } 476 477 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 478 return 0; 479 } 480 481 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 482 return 0; 483 } 484 485 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 486 return 0; 487 } 488 }; 489 490 class RegisterContextDarwin_arm_Mach : public RegisterContextDarwin_arm { 491 public: 492 RegisterContextDarwin_arm_Mach(lldb_private::Thread &thread, 493 const DataExtractor &data) 494 : RegisterContextDarwin_arm(thread, 0) { 495 SetRegisterDataFrom_LC_THREAD(data); 496 } 497 498 void InvalidateAllRegisters() override { 499 // Do nothing... registers are always valid... 500 } 501 502 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 503 lldb::offset_t offset = 0; 504 SetError(GPRRegSet, Read, -1); 505 SetError(FPURegSet, Read, -1); 506 SetError(EXCRegSet, Read, -1); 507 bool done = false; 508 509 while (!done) { 510 int flavor = data.GetU32(&offset); 511 uint32_t count = data.GetU32(&offset); 512 lldb::offset_t next_thread_state = offset + (count * 4); 513 switch (flavor) { 514 case GPRAltRegSet: 515 case GPRRegSet: { 516 // r0-r15, plus CPSR 517 uint32_t gpr_buf_count = (sizeof(gpr.r) / sizeof(gpr.r[0])) + 1; 518 if (count == gpr_buf_count) { 519 for (uint32_t i = 0; i < (count - 1); ++i) { 520 gpr.r[i] = data.GetU32(&offset); 521 } 522 gpr.cpsr = data.GetU32(&offset); 523 524 SetError(GPRRegSet, Read, 0); 525 } 526 } 527 offset = next_thread_state; 528 break; 529 530 case FPURegSet: { 531 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats; 532 const int fpu_reg_buf_size = sizeof(fpu.floats); 533 if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle, 534 fpu_reg_buf) == fpu_reg_buf_size) { 535 offset += fpu_reg_buf_size; 536 fpu.fpscr = data.GetU32(&offset); 537 SetError(FPURegSet, Read, 0); 538 } else { 539 done = true; 540 } 541 } 542 offset = next_thread_state; 543 break; 544 545 case EXCRegSet: 546 if (count == 3) { 547 exc.exception = data.GetU32(&offset); 548 exc.fsr = data.GetU32(&offset); 549 exc.far = data.GetU32(&offset); 550 SetError(EXCRegSet, Read, 0); 551 } 552 done = true; 553 offset = next_thread_state; 554 break; 555 556 // Unknown register set flavor, stop trying to parse. 557 default: 558 done = true; 559 } 560 } 561 } 562 563 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 564 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 565 if (reg_ctx_sp) { 566 RegisterContext *reg_ctx = reg_ctx_sp.get(); 567 568 data.PutHex32(GPRRegSet); // Flavor 569 data.PutHex32(GPRWordCount); 570 PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data); 571 PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data); 572 PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data); 573 PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data); 574 PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data); 575 PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data); 576 PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data); 577 PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data); 578 PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data); 579 PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data); 580 PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data); 581 PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data); 582 PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data); 583 PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data); 584 PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data); 585 PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data); 586 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data); 587 588 // Write out the EXC registers 589 // data.PutHex32 (EXCRegSet); 590 // data.PutHex32 (EXCWordCount); 591 // WriteRegister (reg_ctx, "exception", NULL, 4, data); 592 // WriteRegister (reg_ctx, "fsr", NULL, 4, data); 593 // WriteRegister (reg_ctx, "far", NULL, 4, data); 594 return true; 595 } 596 return false; 597 } 598 599 protected: 600 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; } 601 602 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; } 603 604 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; } 605 606 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; } 607 608 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 609 return 0; 610 } 611 612 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 613 return 0; 614 } 615 616 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 617 return 0; 618 } 619 620 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override { 621 return -1; 622 } 623 }; 624 625 class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 { 626 public: 627 RegisterContextDarwin_arm64_Mach(lldb_private::Thread &thread, 628 const DataExtractor &data) 629 : RegisterContextDarwin_arm64(thread, 0) { 630 SetRegisterDataFrom_LC_THREAD(data); 631 } 632 633 void InvalidateAllRegisters() override { 634 // Do nothing... registers are always valid... 635 } 636 637 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 638 lldb::offset_t offset = 0; 639 SetError(GPRRegSet, Read, -1); 640 SetError(FPURegSet, Read, -1); 641 SetError(EXCRegSet, Read, -1); 642 bool done = false; 643 while (!done) { 644 int flavor = data.GetU32(&offset); 645 uint32_t count = data.GetU32(&offset); 646 lldb::offset_t next_thread_state = offset + (count * 4); 647 switch (flavor) { 648 case GPRRegSet: 649 // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1 650 // 32-bit register) 651 if (count >= (33 * 2) + 1) { 652 for (uint32_t i = 0; i < 29; ++i) 653 gpr.x[i] = data.GetU64(&offset); 654 gpr.fp = data.GetU64(&offset); 655 gpr.lr = data.GetU64(&offset); 656 gpr.sp = data.GetU64(&offset); 657 gpr.pc = data.GetU64(&offset); 658 gpr.cpsr = data.GetU32(&offset); 659 SetError(GPRRegSet, Read, 0); 660 } 661 offset = next_thread_state; 662 break; 663 case FPURegSet: { 664 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0]; 665 const int fpu_reg_buf_size = sizeof(fpu); 666 if (fpu_reg_buf_size == count * sizeof(uint32_t) && 667 data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle, 668 fpu_reg_buf) == fpu_reg_buf_size) { 669 SetError(FPURegSet, Read, 0); 670 } else { 671 done = true; 672 } 673 } 674 offset = next_thread_state; 675 break; 676 case EXCRegSet: 677 if (count == 4) { 678 exc.far = data.GetU64(&offset); 679 exc.esr = data.GetU32(&offset); 680 exc.exception = data.GetU32(&offset); 681 SetError(EXCRegSet, Read, 0); 682 } 683 offset = next_thread_state; 684 break; 685 default: 686 done = true; 687 break; 688 } 689 } 690 } 691 692 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 693 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 694 if (reg_ctx_sp) { 695 RegisterContext *reg_ctx = reg_ctx_sp.get(); 696 697 data.PutHex32(GPRRegSet); // Flavor 698 data.PutHex32(GPRWordCount); 699 PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data); 700 PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data); 701 PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data); 702 PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data); 703 PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data); 704 PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data); 705 PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data); 706 PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data); 707 PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data); 708 PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data); 709 PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data); 710 PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data); 711 PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data); 712 PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data); 713 PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data); 714 PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data); 715 PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data); 716 PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data); 717 PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data); 718 PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data); 719 PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data); 720 PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data); 721 PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data); 722 PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data); 723 PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data); 724 PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data); 725 PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data); 726 PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data); 727 PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data); 728 PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data); 729 PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data); 730 PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data); 731 PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data); 732 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data); 733 data.PutHex32(0); // uint32_t pad at the end 734 735 // Write out the EXC registers 736 data.PutHex32(EXCRegSet); 737 data.PutHex32(EXCWordCount); 738 PrintRegisterValue(reg_ctx, "far", nullptr, 8, data); 739 PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data); 740 PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data); 741 return true; 742 } 743 return false; 744 } 745 746 protected: 747 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; } 748 749 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; } 750 751 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; } 752 753 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; } 754 755 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 756 return 0; 757 } 758 759 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 760 return 0; 761 } 762 763 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 764 return 0; 765 } 766 767 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override { 768 return -1; 769 } 770 }; 771 772 static uint32_t MachHeaderSizeFromMagic(uint32_t magic) { 773 switch (magic) { 774 case MH_MAGIC: 775 case MH_CIGAM: 776 return sizeof(struct llvm::MachO::mach_header); 777 778 case MH_MAGIC_64: 779 case MH_CIGAM_64: 780 return sizeof(struct llvm::MachO::mach_header_64); 781 break; 782 783 default: 784 break; 785 } 786 return 0; 787 } 788 789 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008 790 791 char ObjectFileMachO::ID; 792 793 void ObjectFileMachO::Initialize() { 794 PluginManager::RegisterPlugin( 795 GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance, 796 CreateMemoryInstance, GetModuleSpecifications, SaveCore); 797 } 798 799 void ObjectFileMachO::Terminate() { 800 PluginManager::UnregisterPlugin(CreateInstance); 801 } 802 803 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp, 804 DataBufferSP data_sp, 805 lldb::offset_t data_offset, 806 const FileSpec *file, 807 lldb::offset_t file_offset, 808 lldb::offset_t length) { 809 if (!data_sp) { 810 data_sp = MapFileData(*file, length, file_offset); 811 if (!data_sp) 812 return nullptr; 813 data_offset = 0; 814 } 815 816 if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length)) 817 return nullptr; 818 819 // Update the data to contain the entire file if it doesn't already 820 if (data_sp->GetByteSize() < length) { 821 data_sp = MapFileData(*file, length, file_offset); 822 if (!data_sp) 823 return nullptr; 824 data_offset = 0; 825 } 826 auto objfile_up = std::make_unique<ObjectFileMachO>( 827 module_sp, data_sp, data_offset, file, file_offset, length); 828 if (!objfile_up || !objfile_up->ParseHeader()) 829 return nullptr; 830 831 return objfile_up.release(); 832 } 833 834 ObjectFile *ObjectFileMachO::CreateMemoryInstance( 835 const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp, 836 const ProcessSP &process_sp, lldb::addr_t header_addr) { 837 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) { 838 std::unique_ptr<ObjectFile> objfile_up( 839 new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr)); 840 if (objfile_up.get() && objfile_up->ParseHeader()) 841 return objfile_up.release(); 842 } 843 return nullptr; 844 } 845 846 size_t ObjectFileMachO::GetModuleSpecifications( 847 const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp, 848 lldb::offset_t data_offset, lldb::offset_t file_offset, 849 lldb::offset_t length, lldb_private::ModuleSpecList &specs) { 850 const size_t initial_count = specs.GetSize(); 851 852 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) { 853 DataExtractor data; 854 data.SetData(data_sp); 855 llvm::MachO::mach_header header; 856 if (ParseHeader(data, &data_offset, header)) { 857 size_t header_and_load_cmds = 858 header.sizeofcmds + MachHeaderSizeFromMagic(header.magic); 859 if (header_and_load_cmds >= data_sp->GetByteSize()) { 860 data_sp = MapFileData(file, header_and_load_cmds, file_offset); 861 data.SetData(data_sp); 862 data_offset = MachHeaderSizeFromMagic(header.magic); 863 } 864 if (data_sp) { 865 ModuleSpec base_spec; 866 base_spec.GetFileSpec() = file; 867 base_spec.SetObjectOffset(file_offset); 868 base_spec.SetObjectSize(length); 869 GetAllArchSpecs(header, data, data_offset, base_spec, specs); 870 } 871 } 872 } 873 return specs.GetSize() - initial_count; 874 } 875 876 ConstString ObjectFileMachO::GetSegmentNameTEXT() { 877 static ConstString g_segment_name_TEXT("__TEXT"); 878 return g_segment_name_TEXT; 879 } 880 881 ConstString ObjectFileMachO::GetSegmentNameDATA() { 882 static ConstString g_segment_name_DATA("__DATA"); 883 return g_segment_name_DATA; 884 } 885 886 ConstString ObjectFileMachO::GetSegmentNameDATA_DIRTY() { 887 static ConstString g_segment_name("__DATA_DIRTY"); 888 return g_segment_name; 889 } 890 891 ConstString ObjectFileMachO::GetSegmentNameDATA_CONST() { 892 static ConstString g_segment_name("__DATA_CONST"); 893 return g_segment_name; 894 } 895 896 ConstString ObjectFileMachO::GetSegmentNameOBJC() { 897 static ConstString g_segment_name_OBJC("__OBJC"); 898 return g_segment_name_OBJC; 899 } 900 901 ConstString ObjectFileMachO::GetSegmentNameLINKEDIT() { 902 static ConstString g_section_name_LINKEDIT("__LINKEDIT"); 903 return g_section_name_LINKEDIT; 904 } 905 906 ConstString ObjectFileMachO::GetSegmentNameDWARF() { 907 static ConstString g_section_name("__DWARF"); 908 return g_section_name; 909 } 910 911 ConstString ObjectFileMachO::GetSegmentNameLLVM_COV() { 912 static ConstString g_section_name("__LLVM_COV"); 913 return g_section_name; 914 } 915 916 ConstString ObjectFileMachO::GetSectionNameEHFrame() { 917 static ConstString g_section_name_eh_frame("__eh_frame"); 918 return g_section_name_eh_frame; 919 } 920 921 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp, 922 lldb::addr_t data_offset, 923 lldb::addr_t data_length) { 924 DataExtractor data; 925 data.SetData(data_sp, data_offset, data_length); 926 lldb::offset_t offset = 0; 927 uint32_t magic = data.GetU32(&offset); 928 929 offset += 4; // cputype 930 offset += 4; // cpusubtype 931 uint32_t filetype = data.GetU32(&offset); 932 933 // A fileset has a Mach-O header but is not an 934 // individual file and must be handled via an 935 // ObjectContainer plugin. 936 if (filetype == llvm::MachO::MH_FILESET) 937 return false; 938 939 return MachHeaderSizeFromMagic(magic) != 0; 940 } 941 942 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp, 943 DataBufferSP data_sp, 944 lldb::offset_t data_offset, 945 const FileSpec *file, 946 lldb::offset_t file_offset, 947 lldb::offset_t length) 948 : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset), 949 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(), 950 m_thread_context_offsets_valid(false), m_reexported_dylibs(), 951 m_allow_assembly_emulation_unwind_plans(true) { 952 ::memset(&m_header, 0, sizeof(m_header)); 953 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 954 } 955 956 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp, 957 lldb::WritableDataBufferSP header_data_sp, 958 const lldb::ProcessSP &process_sp, 959 lldb::addr_t header_addr) 960 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), 961 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(), 962 m_thread_context_offsets_valid(false), m_reexported_dylibs(), 963 m_allow_assembly_emulation_unwind_plans(true) { 964 ::memset(&m_header, 0, sizeof(m_header)); 965 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 966 } 967 968 bool ObjectFileMachO::ParseHeader(DataExtractor &data, 969 lldb::offset_t *data_offset_ptr, 970 llvm::MachO::mach_header &header) { 971 data.SetByteOrder(endian::InlHostByteOrder()); 972 // Leave magic in the original byte order 973 header.magic = data.GetU32(data_offset_ptr); 974 bool can_parse = false; 975 bool is_64_bit = false; 976 switch (header.magic) { 977 case MH_MAGIC: 978 data.SetByteOrder(endian::InlHostByteOrder()); 979 data.SetAddressByteSize(4); 980 can_parse = true; 981 break; 982 983 case MH_MAGIC_64: 984 data.SetByteOrder(endian::InlHostByteOrder()); 985 data.SetAddressByteSize(8); 986 can_parse = true; 987 is_64_bit = true; 988 break; 989 990 case MH_CIGAM: 991 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 992 ? eByteOrderLittle 993 : eByteOrderBig); 994 data.SetAddressByteSize(4); 995 can_parse = true; 996 break; 997 998 case MH_CIGAM_64: 999 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1000 ? eByteOrderLittle 1001 : eByteOrderBig); 1002 data.SetAddressByteSize(8); 1003 is_64_bit = true; 1004 can_parse = true; 1005 break; 1006 1007 default: 1008 break; 1009 } 1010 1011 if (can_parse) { 1012 data.GetU32(data_offset_ptr, &header.cputype, 6); 1013 if (is_64_bit) 1014 *data_offset_ptr += 4; 1015 return true; 1016 } else { 1017 memset(&header, 0, sizeof(header)); 1018 } 1019 return false; 1020 } 1021 1022 bool ObjectFileMachO::ParseHeader() { 1023 ModuleSP module_sp(GetModule()); 1024 if (!module_sp) 1025 return false; 1026 1027 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 1028 bool can_parse = false; 1029 lldb::offset_t offset = 0; 1030 m_data.SetByteOrder(endian::InlHostByteOrder()); 1031 // Leave magic in the original byte order 1032 m_header.magic = m_data.GetU32(&offset); 1033 switch (m_header.magic) { 1034 case MH_MAGIC: 1035 m_data.SetByteOrder(endian::InlHostByteOrder()); 1036 m_data.SetAddressByteSize(4); 1037 can_parse = true; 1038 break; 1039 1040 case MH_MAGIC_64: 1041 m_data.SetByteOrder(endian::InlHostByteOrder()); 1042 m_data.SetAddressByteSize(8); 1043 can_parse = true; 1044 break; 1045 1046 case MH_CIGAM: 1047 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1048 ? eByteOrderLittle 1049 : eByteOrderBig); 1050 m_data.SetAddressByteSize(4); 1051 can_parse = true; 1052 break; 1053 1054 case MH_CIGAM_64: 1055 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1056 ? eByteOrderLittle 1057 : eByteOrderBig); 1058 m_data.SetAddressByteSize(8); 1059 can_parse = true; 1060 break; 1061 1062 default: 1063 break; 1064 } 1065 1066 if (can_parse) { 1067 m_data.GetU32(&offset, &m_header.cputype, 6); 1068 1069 ModuleSpecList all_specs; 1070 ModuleSpec base_spec; 1071 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic), 1072 base_spec, all_specs); 1073 1074 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 1075 ArchSpec mach_arch = 1076 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture(); 1077 1078 // Check if the module has a required architecture 1079 const ArchSpec &module_arch = module_sp->GetArchitecture(); 1080 if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch)) 1081 continue; 1082 1083 if (SetModulesArchitecture(mach_arch)) { 1084 const size_t header_and_lc_size = 1085 m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic); 1086 if (m_data.GetByteSize() < header_and_lc_size) { 1087 DataBufferSP data_sp; 1088 ProcessSP process_sp(m_process_wp.lock()); 1089 if (process_sp) { 1090 data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size); 1091 } else { 1092 // Read in all only the load command data from the file on disk 1093 data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset); 1094 if (data_sp->GetByteSize() != header_and_lc_size) 1095 continue; 1096 } 1097 if (data_sp) 1098 m_data.SetData(data_sp); 1099 } 1100 } 1101 return true; 1102 } 1103 // None found. 1104 return false; 1105 } else { 1106 memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header)); 1107 } 1108 return false; 1109 } 1110 1111 ByteOrder ObjectFileMachO::GetByteOrder() const { 1112 return m_data.GetByteOrder(); 1113 } 1114 1115 bool ObjectFileMachO::IsExecutable() const { 1116 return m_header.filetype == MH_EXECUTE; 1117 } 1118 1119 bool ObjectFileMachO::IsDynamicLoader() const { 1120 return m_header.filetype == MH_DYLINKER; 1121 } 1122 1123 bool ObjectFileMachO::IsSharedCacheBinary() const { 1124 return m_header.flags & MH_DYLIB_IN_CACHE; 1125 } 1126 1127 bool ObjectFileMachO::IsKext() const { 1128 return m_header.filetype == MH_KEXT_BUNDLE; 1129 } 1130 1131 uint32_t ObjectFileMachO::GetAddressByteSize() const { 1132 return m_data.GetAddressByteSize(); 1133 } 1134 1135 AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { 1136 Symtab *symtab = GetSymtab(); 1137 if (!symtab) 1138 return AddressClass::eUnknown; 1139 1140 Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr); 1141 if (symbol) { 1142 if (symbol->ValueIsAddress()) { 1143 SectionSP section_sp(symbol->GetAddressRef().GetSection()); 1144 if (section_sp) { 1145 const lldb::SectionType section_type = section_sp->GetType(); 1146 switch (section_type) { 1147 case eSectionTypeInvalid: 1148 return AddressClass::eUnknown; 1149 1150 case eSectionTypeCode: 1151 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) { 1152 // For ARM we have a bit in the n_desc field of the symbol that 1153 // tells us ARM/Thumb which is bit 0x0008. 1154 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB) 1155 return AddressClass::eCodeAlternateISA; 1156 } 1157 return AddressClass::eCode; 1158 1159 case eSectionTypeContainer: 1160 return AddressClass::eUnknown; 1161 1162 case eSectionTypeData: 1163 case eSectionTypeDataCString: 1164 case eSectionTypeDataCStringPointers: 1165 case eSectionTypeDataSymbolAddress: 1166 case eSectionTypeData4: 1167 case eSectionTypeData8: 1168 case eSectionTypeData16: 1169 case eSectionTypeDataPointers: 1170 case eSectionTypeZeroFill: 1171 case eSectionTypeDataObjCMessageRefs: 1172 case eSectionTypeDataObjCCFStrings: 1173 case eSectionTypeGoSymtab: 1174 return AddressClass::eData; 1175 1176 case eSectionTypeDebug: 1177 case eSectionTypeDWARFDebugAbbrev: 1178 case eSectionTypeDWARFDebugAbbrevDwo: 1179 case eSectionTypeDWARFDebugAddr: 1180 case eSectionTypeDWARFDebugAranges: 1181 case eSectionTypeDWARFDebugCuIndex: 1182 case eSectionTypeDWARFDebugFrame: 1183 case eSectionTypeDWARFDebugInfo: 1184 case eSectionTypeDWARFDebugInfoDwo: 1185 case eSectionTypeDWARFDebugLine: 1186 case eSectionTypeDWARFDebugLineStr: 1187 case eSectionTypeDWARFDebugLoc: 1188 case eSectionTypeDWARFDebugLocDwo: 1189 case eSectionTypeDWARFDebugLocLists: 1190 case eSectionTypeDWARFDebugLocListsDwo: 1191 case eSectionTypeDWARFDebugMacInfo: 1192 case eSectionTypeDWARFDebugMacro: 1193 case eSectionTypeDWARFDebugNames: 1194 case eSectionTypeDWARFDebugPubNames: 1195 case eSectionTypeDWARFDebugPubTypes: 1196 case eSectionTypeDWARFDebugRanges: 1197 case eSectionTypeDWARFDebugRngLists: 1198 case eSectionTypeDWARFDebugRngListsDwo: 1199 case eSectionTypeDWARFDebugStr: 1200 case eSectionTypeDWARFDebugStrDwo: 1201 case eSectionTypeDWARFDebugStrOffsets: 1202 case eSectionTypeDWARFDebugStrOffsetsDwo: 1203 case eSectionTypeDWARFDebugTuIndex: 1204 case eSectionTypeDWARFDebugTypes: 1205 case eSectionTypeDWARFDebugTypesDwo: 1206 case eSectionTypeDWARFAppleNames: 1207 case eSectionTypeDWARFAppleTypes: 1208 case eSectionTypeDWARFAppleNamespaces: 1209 case eSectionTypeDWARFAppleObjC: 1210 case eSectionTypeDWARFGNUDebugAltLink: 1211 case eSectionTypeCTF: 1212 case eSectionTypeSwiftModules: 1213 return AddressClass::eDebug; 1214 1215 case eSectionTypeEHFrame: 1216 case eSectionTypeARMexidx: 1217 case eSectionTypeARMextab: 1218 case eSectionTypeCompactUnwind: 1219 return AddressClass::eRuntime; 1220 1221 case eSectionTypeAbsoluteAddress: 1222 case eSectionTypeELFSymbolTable: 1223 case eSectionTypeELFDynamicSymbols: 1224 case eSectionTypeELFRelocationEntries: 1225 case eSectionTypeELFDynamicLinkInfo: 1226 case eSectionTypeOther: 1227 return AddressClass::eUnknown; 1228 } 1229 } 1230 } 1231 1232 const SymbolType symbol_type = symbol->GetType(); 1233 switch (symbol_type) { 1234 case eSymbolTypeAny: 1235 return AddressClass::eUnknown; 1236 case eSymbolTypeAbsolute: 1237 return AddressClass::eUnknown; 1238 1239 case eSymbolTypeCode: 1240 case eSymbolTypeTrampoline: 1241 case eSymbolTypeResolver: 1242 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) { 1243 // For ARM we have a bit in the n_desc field of the symbol that tells 1244 // us ARM/Thumb which is bit 0x0008. 1245 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB) 1246 return AddressClass::eCodeAlternateISA; 1247 } 1248 return AddressClass::eCode; 1249 1250 case eSymbolTypeData: 1251 return AddressClass::eData; 1252 case eSymbolTypeRuntime: 1253 return AddressClass::eRuntime; 1254 case eSymbolTypeException: 1255 return AddressClass::eRuntime; 1256 case eSymbolTypeSourceFile: 1257 return AddressClass::eDebug; 1258 case eSymbolTypeHeaderFile: 1259 return AddressClass::eDebug; 1260 case eSymbolTypeObjectFile: 1261 return AddressClass::eDebug; 1262 case eSymbolTypeCommonBlock: 1263 return AddressClass::eDebug; 1264 case eSymbolTypeBlock: 1265 return AddressClass::eDebug; 1266 case eSymbolTypeLocal: 1267 return AddressClass::eData; 1268 case eSymbolTypeParam: 1269 return AddressClass::eData; 1270 case eSymbolTypeVariable: 1271 return AddressClass::eData; 1272 case eSymbolTypeVariableType: 1273 return AddressClass::eDebug; 1274 case eSymbolTypeLineEntry: 1275 return AddressClass::eDebug; 1276 case eSymbolTypeLineHeader: 1277 return AddressClass::eDebug; 1278 case eSymbolTypeScopeBegin: 1279 return AddressClass::eDebug; 1280 case eSymbolTypeScopeEnd: 1281 return AddressClass::eDebug; 1282 case eSymbolTypeAdditional: 1283 return AddressClass::eUnknown; 1284 case eSymbolTypeCompiler: 1285 return AddressClass::eDebug; 1286 case eSymbolTypeInstrumentation: 1287 return AddressClass::eDebug; 1288 case eSymbolTypeUndefined: 1289 return AddressClass::eUnknown; 1290 case eSymbolTypeObjCClass: 1291 return AddressClass::eRuntime; 1292 case eSymbolTypeObjCMetaClass: 1293 return AddressClass::eRuntime; 1294 case eSymbolTypeObjCIVar: 1295 return AddressClass::eRuntime; 1296 case eSymbolTypeReExported: 1297 return AddressClass::eRuntime; 1298 } 1299 } 1300 return AddressClass::eUnknown; 1301 } 1302 1303 bool ObjectFileMachO::IsStripped() { 1304 if (m_dysymtab.cmd == 0) { 1305 ModuleSP module_sp(GetModule()); 1306 if (module_sp) { 1307 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1308 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1309 const lldb::offset_t load_cmd_offset = offset; 1310 1311 llvm::MachO::load_command lc = {}; 1312 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr) 1313 break; 1314 if (lc.cmd == LC_DYSYMTAB) { 1315 m_dysymtab.cmd = lc.cmd; 1316 m_dysymtab.cmdsize = lc.cmdsize; 1317 if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym, 1318 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) == 1319 nullptr) { 1320 // Clear m_dysymtab if we were unable to read all items from the 1321 // load command 1322 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 1323 } 1324 } 1325 offset = load_cmd_offset + lc.cmdsize; 1326 } 1327 } 1328 } 1329 if (m_dysymtab.cmd) 1330 return m_dysymtab.nlocalsym <= 1; 1331 return false; 1332 } 1333 1334 ObjectFileMachO::EncryptedFileRanges ObjectFileMachO::GetEncryptedFileRanges() { 1335 EncryptedFileRanges result; 1336 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1337 1338 llvm::MachO::encryption_info_command encryption_cmd; 1339 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1340 const lldb::offset_t load_cmd_offset = offset; 1341 if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr) 1342 break; 1343 1344 // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the 1345 // 3 fields we care about, so treat them the same. 1346 if (encryption_cmd.cmd == LC_ENCRYPTION_INFO || 1347 encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) { 1348 if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) { 1349 if (encryption_cmd.cryptid != 0) { 1350 EncryptedFileRanges::Entry entry; 1351 entry.SetRangeBase(encryption_cmd.cryptoff); 1352 entry.SetByteSize(encryption_cmd.cryptsize); 1353 result.Append(entry); 1354 } 1355 } 1356 } 1357 offset = load_cmd_offset + encryption_cmd.cmdsize; 1358 } 1359 1360 return result; 1361 } 1362 1363 void ObjectFileMachO::SanitizeSegmentCommand( 1364 llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) { 1365 if (m_length == 0 || seg_cmd.filesize == 0) 1366 return; 1367 1368 if (IsSharedCacheBinary() && !IsInMemory()) { 1369 // In shared cache images, the load commands are relative to the 1370 // shared cache file, and not the specific image we are 1371 // examining. Let's fix this up so that it looks like a normal 1372 // image. 1373 if (strncmp(seg_cmd.segname, GetSegmentNameTEXT().GetCString(), 1374 sizeof(seg_cmd.segname)) == 0) 1375 m_text_address = seg_cmd.vmaddr; 1376 if (strncmp(seg_cmd.segname, GetSegmentNameLINKEDIT().GetCString(), 1377 sizeof(seg_cmd.segname)) == 0) 1378 m_linkedit_original_offset = seg_cmd.fileoff; 1379 1380 seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address; 1381 } 1382 1383 if (seg_cmd.fileoff > m_length) { 1384 // We have a load command that says it extends past the end of the file. 1385 // This is likely a corrupt file. We don't have any way to return an error 1386 // condition here (this method was likely invoked from something like 1387 // ObjectFile::GetSectionList()), so we just null out the section contents, 1388 // and dump a message to stdout. The most common case here is core file 1389 // debugging with a truncated file. 1390 const char *lc_segment_name = 1391 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT"; 1392 GetModule()->ReportWarning( 1393 "load command {0} {1} has a fileoff ({2:x16}) that extends beyond " 1394 "the end of the file ({3:x16}), ignoring this section", 1395 cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length); 1396 1397 seg_cmd.fileoff = 0; 1398 seg_cmd.filesize = 0; 1399 } 1400 1401 if (seg_cmd.fileoff + seg_cmd.filesize > m_length) { 1402 // We have a load command that says it extends past the end of the file. 1403 // This is likely a corrupt file. We don't have any way to return an error 1404 // condition here (this method was likely invoked from something like 1405 // ObjectFile::GetSectionList()), so we just null out the section contents, 1406 // and dump a message to stdout. The most common case here is core file 1407 // debugging with a truncated file. 1408 const char *lc_segment_name = 1409 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT"; 1410 GetModule()->ReportWarning( 1411 "load command {0} {1} has a fileoff + filesize ({2:x16}) that " 1412 "extends beyond the end of the file ({4:x16}), the segment will be " 1413 "truncated to match", 1414 cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length); 1415 1416 // Truncate the length 1417 seg_cmd.filesize = m_length - seg_cmd.fileoff; 1418 } 1419 } 1420 1421 static uint32_t 1422 GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) { 1423 uint32_t result = 0; 1424 if (seg_cmd.initprot & VM_PROT_READ) 1425 result |= ePermissionsReadable; 1426 if (seg_cmd.initprot & VM_PROT_WRITE) 1427 result |= ePermissionsWritable; 1428 if (seg_cmd.initprot & VM_PROT_EXECUTE) 1429 result |= ePermissionsExecutable; 1430 return result; 1431 } 1432 1433 static lldb::SectionType GetSectionType(uint32_t flags, 1434 ConstString section_name) { 1435 1436 if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS)) 1437 return eSectionTypeCode; 1438 1439 uint32_t mach_sect_type = flags & SECTION_TYPE; 1440 static ConstString g_sect_name_objc_data("__objc_data"); 1441 static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs"); 1442 static ConstString g_sect_name_objc_selrefs("__objc_selrefs"); 1443 static ConstString g_sect_name_objc_classrefs("__objc_classrefs"); 1444 static ConstString g_sect_name_objc_superrefs("__objc_superrefs"); 1445 static ConstString g_sect_name_objc_const("__objc_const"); 1446 static ConstString g_sect_name_objc_classlist("__objc_classlist"); 1447 static ConstString g_sect_name_cfstring("__cfstring"); 1448 1449 static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev"); 1450 static ConstString g_sect_name_dwarf_debug_abbrev_dwo("__debug_abbrev.dwo"); 1451 static ConstString g_sect_name_dwarf_debug_addr("__debug_addr"); 1452 static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges"); 1453 static ConstString g_sect_name_dwarf_debug_cu_index("__debug_cu_index"); 1454 static ConstString g_sect_name_dwarf_debug_frame("__debug_frame"); 1455 static ConstString g_sect_name_dwarf_debug_info("__debug_info"); 1456 static ConstString g_sect_name_dwarf_debug_info_dwo("__debug_info.dwo"); 1457 static ConstString g_sect_name_dwarf_debug_line("__debug_line"); 1458 static ConstString g_sect_name_dwarf_debug_line_dwo("__debug_line.dwo"); 1459 static ConstString g_sect_name_dwarf_debug_line_str("__debug_line_str"); 1460 static ConstString g_sect_name_dwarf_debug_loc("__debug_loc"); 1461 static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists"); 1462 static ConstString g_sect_name_dwarf_debug_loclists_dwo("__debug_loclists.dwo"); 1463 static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo"); 1464 static ConstString g_sect_name_dwarf_debug_macro("__debug_macro"); 1465 static ConstString g_sect_name_dwarf_debug_macro_dwo("__debug_macro.dwo"); 1466 static ConstString g_sect_name_dwarf_debug_names("__debug_names"); 1467 static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames"); 1468 static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes"); 1469 static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges"); 1470 static ConstString g_sect_name_dwarf_debug_rnglists("__debug_rnglists"); 1471 static ConstString g_sect_name_dwarf_debug_str("__debug_str"); 1472 static ConstString g_sect_name_dwarf_debug_str_dwo("__debug_str.dwo"); 1473 static ConstString g_sect_name_dwarf_debug_str_offs("__debug_str_offs"); 1474 static ConstString g_sect_name_dwarf_debug_str_offs_dwo("__debug_str_offs.dwo"); 1475 static ConstString g_sect_name_dwarf_debug_tu_index("__debug_tu_index"); 1476 static ConstString g_sect_name_dwarf_debug_types("__debug_types"); 1477 static ConstString g_sect_name_dwarf_apple_names("__apple_names"); 1478 static ConstString g_sect_name_dwarf_apple_types("__apple_types"); 1479 static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac"); 1480 static ConstString g_sect_name_dwarf_apple_objc("__apple_objc"); 1481 static ConstString g_sect_name_eh_frame("__eh_frame"); 1482 static ConstString g_sect_name_compact_unwind("__unwind_info"); 1483 static ConstString g_sect_name_text("__text"); 1484 static ConstString g_sect_name_data("__data"); 1485 static ConstString g_sect_name_go_symtab("__gosymtab"); 1486 static ConstString g_sect_name_ctf("__ctf"); 1487 static ConstString g_sect_name_swift_ast("__swift_ast"); 1488 1489 if (section_name == g_sect_name_dwarf_debug_abbrev) 1490 return eSectionTypeDWARFDebugAbbrev; 1491 if (section_name == g_sect_name_dwarf_debug_abbrev_dwo) 1492 return eSectionTypeDWARFDebugAbbrevDwo; 1493 if (section_name == g_sect_name_dwarf_debug_addr) 1494 return eSectionTypeDWARFDebugAddr; 1495 if (section_name == g_sect_name_dwarf_debug_aranges) 1496 return eSectionTypeDWARFDebugAranges; 1497 if (section_name == g_sect_name_dwarf_debug_cu_index) 1498 return eSectionTypeDWARFDebugCuIndex; 1499 if (section_name == g_sect_name_dwarf_debug_frame) 1500 return eSectionTypeDWARFDebugFrame; 1501 if (section_name == g_sect_name_dwarf_debug_info) 1502 return eSectionTypeDWARFDebugInfo; 1503 if (section_name == g_sect_name_dwarf_debug_info_dwo) 1504 return eSectionTypeDWARFDebugInfoDwo; 1505 if (section_name == g_sect_name_dwarf_debug_line) 1506 return eSectionTypeDWARFDebugLine; 1507 if (section_name == g_sect_name_dwarf_debug_line_dwo) 1508 return eSectionTypeDWARFDebugLine; // Same as debug_line. 1509 if (section_name == g_sect_name_dwarf_debug_line_str) 1510 return eSectionTypeDWARFDebugLineStr; 1511 if (section_name == g_sect_name_dwarf_debug_loc) 1512 return eSectionTypeDWARFDebugLoc; 1513 if (section_name == g_sect_name_dwarf_debug_loclists) 1514 return eSectionTypeDWARFDebugLocLists; 1515 if (section_name == g_sect_name_dwarf_debug_loclists_dwo) 1516 return eSectionTypeDWARFDebugLocListsDwo; 1517 if (section_name == g_sect_name_dwarf_debug_macinfo) 1518 return eSectionTypeDWARFDebugMacInfo; 1519 if (section_name == g_sect_name_dwarf_debug_macro) 1520 return eSectionTypeDWARFDebugMacro; 1521 if (section_name == g_sect_name_dwarf_debug_macro_dwo) 1522 return eSectionTypeDWARFDebugMacInfo; // Same as debug_macro. 1523 if (section_name == g_sect_name_dwarf_debug_names) 1524 return eSectionTypeDWARFDebugNames; 1525 if (section_name == g_sect_name_dwarf_debug_pubnames) 1526 return eSectionTypeDWARFDebugPubNames; 1527 if (section_name == g_sect_name_dwarf_debug_pubtypes) 1528 return eSectionTypeDWARFDebugPubTypes; 1529 if (section_name == g_sect_name_dwarf_debug_ranges) 1530 return eSectionTypeDWARFDebugRanges; 1531 if (section_name == g_sect_name_dwarf_debug_rnglists) 1532 return eSectionTypeDWARFDebugRngLists; 1533 if (section_name == g_sect_name_dwarf_debug_str) 1534 return eSectionTypeDWARFDebugStr; 1535 if (section_name == g_sect_name_dwarf_debug_str_dwo) 1536 return eSectionTypeDWARFDebugStrDwo; 1537 if (section_name == g_sect_name_dwarf_debug_str_offs) 1538 return eSectionTypeDWARFDebugStrOffsets; 1539 if (section_name == g_sect_name_dwarf_debug_str_offs_dwo) 1540 return eSectionTypeDWARFDebugStrOffsetsDwo; 1541 if (section_name == g_sect_name_dwarf_debug_tu_index) 1542 return eSectionTypeDWARFDebugTuIndex; 1543 if (section_name == g_sect_name_dwarf_debug_types) 1544 return eSectionTypeDWARFDebugTypes; 1545 if (section_name == g_sect_name_dwarf_apple_names) 1546 return eSectionTypeDWARFAppleNames; 1547 if (section_name == g_sect_name_dwarf_apple_types) 1548 return eSectionTypeDWARFAppleTypes; 1549 if (section_name == g_sect_name_dwarf_apple_namespaces) 1550 return eSectionTypeDWARFAppleNamespaces; 1551 if (section_name == g_sect_name_dwarf_apple_objc) 1552 return eSectionTypeDWARFAppleObjC; 1553 if (section_name == g_sect_name_objc_selrefs) 1554 return eSectionTypeDataCStringPointers; 1555 if (section_name == g_sect_name_objc_msgrefs) 1556 return eSectionTypeDataObjCMessageRefs; 1557 if (section_name == g_sect_name_eh_frame) 1558 return eSectionTypeEHFrame; 1559 if (section_name == g_sect_name_compact_unwind) 1560 return eSectionTypeCompactUnwind; 1561 if (section_name == g_sect_name_cfstring) 1562 return eSectionTypeDataObjCCFStrings; 1563 if (section_name == g_sect_name_go_symtab) 1564 return eSectionTypeGoSymtab; 1565 if (section_name == g_sect_name_ctf) 1566 return eSectionTypeCTF; 1567 if (section_name == g_sect_name_swift_ast) 1568 return eSectionTypeSwiftModules; 1569 if (section_name == g_sect_name_objc_data || 1570 section_name == g_sect_name_objc_classrefs || 1571 section_name == g_sect_name_objc_superrefs || 1572 section_name == g_sect_name_objc_const || 1573 section_name == g_sect_name_objc_classlist) { 1574 return eSectionTypeDataPointers; 1575 } 1576 1577 switch (mach_sect_type) { 1578 // TODO: categorize sections by other flags for regular sections 1579 case S_REGULAR: 1580 if (section_name == g_sect_name_text) 1581 return eSectionTypeCode; 1582 if (section_name == g_sect_name_data) 1583 return eSectionTypeData; 1584 return eSectionTypeOther; 1585 case S_ZEROFILL: 1586 return eSectionTypeZeroFill; 1587 case S_CSTRING_LITERALS: // section with only literal C strings 1588 return eSectionTypeDataCString; 1589 case S_4BYTE_LITERALS: // section with only 4 byte literals 1590 return eSectionTypeData4; 1591 case S_8BYTE_LITERALS: // section with only 8 byte literals 1592 return eSectionTypeData8; 1593 case S_LITERAL_POINTERS: // section with only pointers to literals 1594 return eSectionTypeDataPointers; 1595 case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers 1596 return eSectionTypeDataPointers; 1597 case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers 1598 return eSectionTypeDataPointers; 1599 case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in 1600 // the reserved2 field 1601 return eSectionTypeCode; 1602 case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for 1603 // initialization 1604 return eSectionTypeDataPointers; 1605 case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for 1606 // termination 1607 return eSectionTypeDataPointers; 1608 case S_COALESCED: 1609 return eSectionTypeOther; 1610 case S_GB_ZEROFILL: 1611 return eSectionTypeZeroFill; 1612 case S_INTERPOSING: // section with only pairs of function pointers for 1613 // interposing 1614 return eSectionTypeCode; 1615 case S_16BYTE_LITERALS: // section with only 16 byte literals 1616 return eSectionTypeData16; 1617 case S_DTRACE_DOF: 1618 return eSectionTypeDebug; 1619 case S_LAZY_DYLIB_SYMBOL_POINTERS: 1620 return eSectionTypeDataPointers; 1621 default: 1622 return eSectionTypeOther; 1623 } 1624 } 1625 1626 struct ObjectFileMachO::SegmentParsingContext { 1627 const EncryptedFileRanges EncryptedRanges; 1628 lldb_private::SectionList &UnifiedList; 1629 uint32_t NextSegmentIdx = 0; 1630 uint32_t NextSectionIdx = 0; 1631 bool FileAddressesChanged = false; 1632 1633 SegmentParsingContext(EncryptedFileRanges EncryptedRanges, 1634 lldb_private::SectionList &UnifiedList) 1635 : EncryptedRanges(std::move(EncryptedRanges)), UnifiedList(UnifiedList) {} 1636 }; 1637 1638 void ObjectFileMachO::ProcessSegmentCommand( 1639 const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset, 1640 uint32_t cmd_idx, SegmentParsingContext &context) { 1641 llvm::MachO::segment_command_64 load_cmd; 1642 memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_)); 1643 1644 if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16)) 1645 return; 1646 1647 ModuleSP module_sp = GetModule(); 1648 const bool is_core = GetType() == eTypeCoreFile; 1649 const bool is_dsym = (m_header.filetype == MH_DSYM); 1650 bool add_section = true; 1651 bool add_to_unified = true; 1652 ConstString const_segname( 1653 load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname))); 1654 1655 SectionSP unified_section_sp( 1656 context.UnifiedList.FindSectionByName(const_segname)); 1657 if (is_dsym && unified_section_sp) { 1658 if (const_segname == GetSegmentNameLINKEDIT()) { 1659 // We need to keep the __LINKEDIT segment private to this object file 1660 // only 1661 add_to_unified = false; 1662 } else { 1663 // This is the dSYM file and this section has already been created by the 1664 // object file, no need to create it. 1665 add_section = false; 1666 } 1667 } 1668 load_cmd.vmaddr = m_data.GetAddress(&offset); 1669 load_cmd.vmsize = m_data.GetAddress(&offset); 1670 load_cmd.fileoff = m_data.GetAddress(&offset); 1671 load_cmd.filesize = m_data.GetAddress(&offset); 1672 if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4)) 1673 return; 1674 1675 SanitizeSegmentCommand(load_cmd, cmd_idx); 1676 1677 const uint32_t segment_permissions = GetSegmentPermissions(load_cmd); 1678 const bool segment_is_encrypted = 1679 (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0; 1680 1681 // Use a segment ID of the segment index shifted left by 8 so they never 1682 // conflict with any of the sections. 1683 SectionSP segment_sp; 1684 if (add_section && (const_segname || is_core)) { 1685 segment_sp = std::make_shared<Section>( 1686 module_sp, // Module to which this section belongs 1687 this, // Object file to which this sections belongs 1688 ++context.NextSegmentIdx 1689 << 8, // Section ID is the 1 based segment index 1690 // shifted right by 8 bits as not to collide with any of the 256 1691 // section IDs that are possible 1692 const_segname, // Name of this section 1693 eSectionTypeContainer, // This section is a container of other 1694 // sections. 1695 load_cmd.vmaddr, // File VM address == addresses as they are 1696 // found in the object file 1697 load_cmd.vmsize, // VM size in bytes of this section 1698 load_cmd.fileoff, // Offset to the data for this section in 1699 // the file 1700 load_cmd.filesize, // Size in bytes of this section as found 1701 // in the file 1702 0, // Segments have no alignment information 1703 load_cmd.flags); // Flags for this section 1704 1705 segment_sp->SetIsEncrypted(segment_is_encrypted); 1706 m_sections_up->AddSection(segment_sp); 1707 segment_sp->SetPermissions(segment_permissions); 1708 if (add_to_unified) 1709 context.UnifiedList.AddSection(segment_sp); 1710 } else if (unified_section_sp) { 1711 // If this is a dSYM and the file addresses in the dSYM differ from the 1712 // file addresses in the ObjectFile, we must use the file base address for 1713 // the Section from the dSYM for the DWARF to resolve correctly. 1714 // This only happens with binaries in the shared cache in practice; 1715 // normally a mismatch like this would give a binary & dSYM that do not 1716 // match UUIDs. When a binary is included in the shared cache, its 1717 // segments are rearranged to optimize the shared cache, so its file 1718 // addresses will differ from what the ObjectFile had originally, 1719 // and what the dSYM has. 1720 if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) { 1721 Log *log = GetLog(LLDBLog::Symbols); 1722 if (log) { 1723 log->Printf( 1724 "Installing dSYM's %s segment file address over ObjectFile's " 1725 "so symbol table/debug info resolves correctly for %s", 1726 const_segname.AsCString(), 1727 module_sp->GetFileSpec().GetFilename().AsCString()); 1728 } 1729 1730 // Make sure we've parsed the symbol table from the ObjectFile before 1731 // we go around changing its Sections. 1732 module_sp->GetObjectFile()->GetSymtab(); 1733 // eh_frame would present the same problems but we parse that on a per- 1734 // function basis as-needed so it's more difficult to remove its use of 1735 // the Sections. Realistically, the environments where this code path 1736 // will be taken will not have eh_frame sections. 1737 1738 unified_section_sp->SetFileAddress(load_cmd.vmaddr); 1739 1740 // Notify the module that the section addresses have been changed once 1741 // we're done so any file-address caches can be updated. 1742 context.FileAddressesChanged = true; 1743 } 1744 m_sections_up->AddSection(unified_section_sp); 1745 } 1746 1747 llvm::MachO::section_64 sect64; 1748 ::memset(§64, 0, sizeof(sect64)); 1749 // Push a section into our mach sections for the section at index zero 1750 // (NO_SECT) if we don't have any mach sections yet... 1751 if (m_mach_sections.empty()) 1752 m_mach_sections.push_back(sect64); 1753 uint32_t segment_sect_idx; 1754 const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1; 1755 1756 const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8; 1757 for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects; 1758 ++segment_sect_idx) { 1759 if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname, 1760 sizeof(sect64.sectname)) == nullptr) 1761 break; 1762 if (m_data.GetU8(&offset, (uint8_t *)sect64.segname, 1763 sizeof(sect64.segname)) == nullptr) 1764 break; 1765 sect64.addr = m_data.GetAddress(&offset); 1766 sect64.size = m_data.GetAddress(&offset); 1767 1768 if (m_data.GetU32(&offset, §64.offset, num_u32s) == nullptr) 1769 break; 1770 1771 if (IsSharedCacheBinary() && !IsInMemory()) { 1772 sect64.offset = sect64.addr - m_text_address; 1773 } 1774 1775 // Keep a list of mach sections around in case we need to get at data that 1776 // isn't stored in the abstracted Sections. 1777 m_mach_sections.push_back(sect64); 1778 1779 if (add_section) { 1780 ConstString section_name( 1781 sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname))); 1782 if (!const_segname) { 1783 // We have a segment with no name so we need to conjure up segments 1784 // that correspond to the section's segname if there isn't already such 1785 // a section. If there is such a section, we resize the section so that 1786 // it spans all sections. We also mark these sections as fake so 1787 // address matches don't hit if they land in the gaps between the child 1788 // sections. 1789 const_segname.SetTrimmedCStringWithLength(sect64.segname, 1790 sizeof(sect64.segname)); 1791 segment_sp = context.UnifiedList.FindSectionByName(const_segname); 1792 if (segment_sp.get()) { 1793 Section *segment = segment_sp.get(); 1794 // Grow the section size as needed. 1795 const lldb::addr_t sect64_min_addr = sect64.addr; 1796 const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size; 1797 const lldb::addr_t curr_seg_byte_size = segment->GetByteSize(); 1798 const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress(); 1799 const lldb::addr_t curr_seg_max_addr = 1800 curr_seg_min_addr + curr_seg_byte_size; 1801 if (sect64_min_addr >= curr_seg_min_addr) { 1802 const lldb::addr_t new_seg_byte_size = 1803 sect64_max_addr - curr_seg_min_addr; 1804 // Only grow the section size if needed 1805 if (new_seg_byte_size > curr_seg_byte_size) 1806 segment->SetByteSize(new_seg_byte_size); 1807 } else { 1808 // We need to change the base address of the segment and adjust the 1809 // child section offsets for all existing children. 1810 const lldb::addr_t slide_amount = 1811 sect64_min_addr - curr_seg_min_addr; 1812 segment->Slide(slide_amount, false); 1813 segment->GetChildren().Slide(-slide_amount, false); 1814 segment->SetByteSize(curr_seg_max_addr - sect64_min_addr); 1815 } 1816 1817 // Grow the section size as needed. 1818 if (sect64.offset) { 1819 const lldb::addr_t segment_min_file_offset = 1820 segment->GetFileOffset(); 1821 const lldb::addr_t segment_max_file_offset = 1822 segment_min_file_offset + segment->GetFileSize(); 1823 1824 const lldb::addr_t section_min_file_offset = sect64.offset; 1825 const lldb::addr_t section_max_file_offset = 1826 section_min_file_offset + sect64.size; 1827 const lldb::addr_t new_file_offset = 1828 std::min(section_min_file_offset, segment_min_file_offset); 1829 const lldb::addr_t new_file_size = 1830 std::max(section_max_file_offset, segment_max_file_offset) - 1831 new_file_offset; 1832 segment->SetFileOffset(new_file_offset); 1833 segment->SetFileSize(new_file_size); 1834 } 1835 } else { 1836 // Create a fake section for the section's named segment 1837 segment_sp = std::make_shared<Section>( 1838 segment_sp, // Parent section 1839 module_sp, // Module to which this section belongs 1840 this, // Object file to which this section belongs 1841 ++context.NextSegmentIdx 1842 << 8, // Section ID is the 1 based segment index 1843 // shifted right by 8 bits as not to 1844 // collide with any of the 256 section IDs 1845 // that are possible 1846 const_segname, // Name of this section 1847 eSectionTypeContainer, // This section is a container of 1848 // other sections. 1849 sect64.addr, // File VM address == addresses as they are 1850 // found in the object file 1851 sect64.size, // VM size in bytes of this section 1852 sect64.offset, // Offset to the data for this section in 1853 // the file 1854 sect64.offset ? sect64.size : 0, // Size in bytes of 1855 // this section as 1856 // found in the file 1857 sect64.align, 1858 load_cmd.flags); // Flags for this section 1859 segment_sp->SetIsFake(true); 1860 segment_sp->SetPermissions(segment_permissions); 1861 m_sections_up->AddSection(segment_sp); 1862 if (add_to_unified) 1863 context.UnifiedList.AddSection(segment_sp); 1864 segment_sp->SetIsEncrypted(segment_is_encrypted); 1865 } 1866 } 1867 assert(segment_sp.get()); 1868 1869 lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name); 1870 1871 SectionSP section_sp(new Section( 1872 segment_sp, module_sp, this, ++context.NextSectionIdx, section_name, 1873 sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size, 1874 sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align, 1875 sect64.flags)); 1876 // Set the section to be encrypted to match the segment 1877 1878 bool section_is_encrypted = false; 1879 if (!segment_is_encrypted && load_cmd.filesize != 0) 1880 section_is_encrypted = context.EncryptedRanges.FindEntryThatContains( 1881 sect64.offset) != nullptr; 1882 1883 section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted); 1884 section_sp->SetPermissions(segment_permissions); 1885 segment_sp->GetChildren().AddSection(section_sp); 1886 1887 if (segment_sp->IsFake()) { 1888 segment_sp.reset(); 1889 const_segname.Clear(); 1890 } 1891 } 1892 } 1893 if (segment_sp && is_dsym) { 1894 if (first_segment_sectID <= context.NextSectionIdx) { 1895 lldb::user_id_t sect_uid; 1896 for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx; 1897 ++sect_uid) { 1898 SectionSP curr_section_sp( 1899 segment_sp->GetChildren().FindSectionByID(sect_uid)); 1900 SectionSP next_section_sp; 1901 if (sect_uid + 1 <= context.NextSectionIdx) 1902 next_section_sp = 1903 segment_sp->GetChildren().FindSectionByID(sect_uid + 1); 1904 1905 if (curr_section_sp.get()) { 1906 if (curr_section_sp->GetByteSize() == 0) { 1907 if (next_section_sp.get() != nullptr) 1908 curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() - 1909 curr_section_sp->GetFileAddress()); 1910 else 1911 curr_section_sp->SetByteSize(load_cmd.vmsize); 1912 } 1913 } 1914 } 1915 } 1916 } 1917 } 1918 1919 void ObjectFileMachO::ProcessDysymtabCommand( 1920 const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) { 1921 m_dysymtab.cmd = load_cmd.cmd; 1922 m_dysymtab.cmdsize = load_cmd.cmdsize; 1923 m_data.GetU32(&offset, &m_dysymtab.ilocalsym, 1924 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2); 1925 } 1926 1927 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) { 1928 if (m_sections_up) 1929 return; 1930 1931 m_sections_up = std::make_unique<SectionList>(); 1932 1933 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1934 // bool dump_sections = false; 1935 ModuleSP module_sp(GetModule()); 1936 1937 offset = MachHeaderSizeFromMagic(m_header.magic); 1938 1939 SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list); 1940 llvm::MachO::load_command load_cmd; 1941 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1942 const lldb::offset_t load_cmd_offset = offset; 1943 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 1944 break; 1945 1946 if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64) 1947 ProcessSegmentCommand(load_cmd, offset, i, context); 1948 else if (load_cmd.cmd == LC_DYSYMTAB) 1949 ProcessDysymtabCommand(load_cmd, offset); 1950 1951 offset = load_cmd_offset + load_cmd.cmdsize; 1952 } 1953 1954 if (context.FileAddressesChanged && module_sp) 1955 module_sp->SectionFileAddressesChanged(); 1956 } 1957 1958 class MachSymtabSectionInfo { 1959 public: 1960 MachSymtabSectionInfo(SectionList *section_list) 1961 : m_section_list(section_list), m_section_infos() { 1962 // Get the number of sections down to a depth of 1 to include all segments 1963 // and their sections, but no other sections that may be added for debug 1964 // map or 1965 m_section_infos.resize(section_list->GetNumSections(1)); 1966 } 1967 1968 SectionSP GetSection(uint8_t n_sect, addr_t file_addr) { 1969 if (n_sect == 0) 1970 return SectionSP(); 1971 if (n_sect < m_section_infos.size()) { 1972 if (!m_section_infos[n_sect].section_sp) { 1973 SectionSP section_sp(m_section_list->FindSectionByID(n_sect)); 1974 m_section_infos[n_sect].section_sp = section_sp; 1975 if (section_sp) { 1976 m_section_infos[n_sect].vm_range.SetBaseAddress( 1977 section_sp->GetFileAddress()); 1978 m_section_infos[n_sect].vm_range.SetByteSize( 1979 section_sp->GetByteSize()); 1980 } else { 1981 std::string filename = "<unknown>"; 1982 SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0)); 1983 if (first_section_sp) 1984 filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath(); 1985 1986 Debugger::ReportError( 1987 llvm::formatv("unable to find section {0} for a symbol in " 1988 "{1}, corrupt file?", 1989 n_sect, filename)); 1990 } 1991 } 1992 if (m_section_infos[n_sect].vm_range.Contains(file_addr)) { 1993 // Symbol is in section. 1994 return m_section_infos[n_sect].section_sp; 1995 } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 && 1996 m_section_infos[n_sect].vm_range.GetBaseAddress() == 1997 file_addr) { 1998 // Symbol is in section with zero size, but has the same start address 1999 // as the section. This can happen with linker symbols (symbols that 2000 // start with the letter 'l' or 'L'. 2001 return m_section_infos[n_sect].section_sp; 2002 } 2003 } 2004 return m_section_list->FindSectionContainingFileAddress(file_addr); 2005 } 2006 2007 protected: 2008 struct SectionInfo { 2009 SectionInfo() : vm_range(), section_sp() {} 2010 2011 VMRange vm_range; 2012 SectionSP section_sp; 2013 }; 2014 SectionList *m_section_list; 2015 std::vector<SectionInfo> m_section_infos; 2016 }; 2017 2018 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63) 2019 struct TrieEntry { 2020 void Dump() const { 2021 printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"", 2022 static_cast<unsigned long long>(address), 2023 static_cast<unsigned long long>(flags), 2024 static_cast<unsigned long long>(other), name.GetCString()); 2025 if (import_name) 2026 printf(" -> \"%s\"\n", import_name.GetCString()); 2027 else 2028 printf("\n"); 2029 } 2030 ConstString name; 2031 uint64_t address = LLDB_INVALID_ADDRESS; 2032 uint64_t flags = 2033 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, 2034 // TRIE_SYMBOL_IS_THUMB 2035 uint64_t other = 0; 2036 ConstString import_name; 2037 }; 2038 2039 struct TrieEntryWithOffset { 2040 lldb::offset_t nodeOffset; 2041 TrieEntry entry; 2042 2043 TrieEntryWithOffset(lldb::offset_t offset) : nodeOffset(offset), entry() {} 2044 2045 void Dump(uint32_t idx) const { 2046 printf("[%3u] 0x%16.16llx: ", idx, 2047 static_cast<unsigned long long>(nodeOffset)); 2048 entry.Dump(); 2049 } 2050 2051 bool operator<(const TrieEntryWithOffset &other) const { 2052 return (nodeOffset < other.nodeOffset); 2053 } 2054 }; 2055 2056 static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset, 2057 const bool is_arm, addr_t text_seg_base_addr, 2058 std::vector<llvm::StringRef> &nameSlices, 2059 std::set<lldb::addr_t> &resolver_addresses, 2060 std::vector<TrieEntryWithOffset> &reexports, 2061 std::vector<TrieEntryWithOffset> &ext_symbols) { 2062 if (!data.ValidOffset(offset)) 2063 return true; 2064 2065 // Terminal node -- end of a branch, possibly add this to 2066 // the symbol table or resolver table. 2067 const uint64_t terminalSize = data.GetULEB128(&offset); 2068 lldb::offset_t children_offset = offset + terminalSize; 2069 if (terminalSize != 0) { 2070 TrieEntryWithOffset e(offset); 2071 e.entry.flags = data.GetULEB128(&offset); 2072 const char *import_name = nullptr; 2073 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 2074 e.entry.address = 0; 2075 e.entry.other = data.GetULEB128(&offset); // dylib ordinal 2076 import_name = data.GetCStr(&offset); 2077 } else { 2078 e.entry.address = data.GetULEB128(&offset); 2079 if (text_seg_base_addr != LLDB_INVALID_ADDRESS) 2080 e.entry.address += text_seg_base_addr; 2081 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 2082 e.entry.other = data.GetULEB128(&offset); 2083 uint64_t resolver_addr = e.entry.other; 2084 if (text_seg_base_addr != LLDB_INVALID_ADDRESS) 2085 resolver_addr += text_seg_base_addr; 2086 if (is_arm) 2087 resolver_addr &= THUMB_ADDRESS_BIT_MASK; 2088 resolver_addresses.insert(resolver_addr); 2089 } else 2090 e.entry.other = 0; 2091 } 2092 bool add_this_entry = false; 2093 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) && 2094 import_name && import_name[0]) { 2095 // add symbols that are reexport symbols with a valid import name. 2096 add_this_entry = true; 2097 } else if (e.entry.flags == 0 && 2098 (import_name == nullptr || import_name[0] == '\0')) { 2099 // add externally visible symbols, in case the nlist record has 2100 // been stripped/omitted. 2101 add_this_entry = true; 2102 } 2103 if (add_this_entry) { 2104 std::string name; 2105 if (!nameSlices.empty()) { 2106 for (auto name_slice : nameSlices) 2107 name.append(name_slice.data(), name_slice.size()); 2108 } 2109 if (name.size() > 1) { 2110 // Skip the leading '_' 2111 e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1); 2112 } 2113 if (import_name) { 2114 // Skip the leading '_' 2115 e.entry.import_name.SetCString(import_name + 1); 2116 } 2117 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) { 2118 reexports.push_back(e); 2119 } else { 2120 if (is_arm && (e.entry.address & 1)) { 2121 e.entry.flags |= TRIE_SYMBOL_IS_THUMB; 2122 e.entry.address &= THUMB_ADDRESS_BIT_MASK; 2123 } 2124 ext_symbols.push_back(e); 2125 } 2126 } 2127 } 2128 2129 const uint8_t childrenCount = data.GetU8(&children_offset); 2130 for (uint8_t i = 0; i < childrenCount; ++i) { 2131 const char *cstr = data.GetCStr(&children_offset); 2132 if (cstr) 2133 nameSlices.push_back(llvm::StringRef(cstr)); 2134 else 2135 return false; // Corrupt data 2136 lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset); 2137 if (childNodeOffset) { 2138 if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr, 2139 nameSlices, resolver_addresses, reexports, 2140 ext_symbols)) { 2141 return false; 2142 } 2143 } 2144 nameSlices.pop_back(); 2145 } 2146 return true; 2147 } 2148 2149 static SymbolType GetSymbolType(const char *&symbol_name, 2150 bool &demangled_is_synthesized, 2151 const SectionSP &text_section_sp, 2152 const SectionSP &data_section_sp, 2153 const SectionSP &data_dirty_section_sp, 2154 const SectionSP &data_const_section_sp, 2155 const SectionSP &symbol_section) { 2156 SymbolType type = eSymbolTypeInvalid; 2157 2158 const char *symbol_sect_name = symbol_section->GetName().AsCString(); 2159 if (symbol_section->IsDescendant(text_section_sp.get())) { 2160 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS | 2161 S_ATTR_SELF_MODIFYING_CODE | 2162 S_ATTR_SOME_INSTRUCTIONS)) 2163 type = eSymbolTypeData; 2164 else 2165 type = eSymbolTypeCode; 2166 } else if (symbol_section->IsDescendant(data_section_sp.get()) || 2167 symbol_section->IsDescendant(data_dirty_section_sp.get()) || 2168 symbol_section->IsDescendant(data_const_section_sp.get())) { 2169 if (symbol_sect_name && 2170 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) { 2171 type = eSymbolTypeRuntime; 2172 2173 if (symbol_name) { 2174 llvm::StringRef symbol_name_ref(symbol_name); 2175 if (symbol_name_ref.starts_with("OBJC_")) { 2176 static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_"); 2177 static const llvm::StringRef g_objc_v2_prefix_metaclass( 2178 "OBJC_METACLASS_$_"); 2179 static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_"); 2180 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 2181 symbol_name = symbol_name + g_objc_v2_prefix_class.size(); 2182 type = eSymbolTypeObjCClass; 2183 demangled_is_synthesized = true; 2184 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_metaclass)) { 2185 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size(); 2186 type = eSymbolTypeObjCMetaClass; 2187 demangled_is_synthesized = true; 2188 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) { 2189 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size(); 2190 type = eSymbolTypeObjCIVar; 2191 demangled_is_synthesized = true; 2192 } 2193 } 2194 } 2195 } else if (symbol_sect_name && 2196 ::strstr(symbol_sect_name, "__gcc_except_tab") == 2197 symbol_sect_name) { 2198 type = eSymbolTypeException; 2199 } else { 2200 type = eSymbolTypeData; 2201 } 2202 } else if (symbol_sect_name && 2203 ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) { 2204 type = eSymbolTypeTrampoline; 2205 } 2206 return type; 2207 } 2208 2209 static std::optional<struct nlist_64> 2210 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset, 2211 size_t nlist_byte_size) { 2212 struct nlist_64 nlist; 2213 if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size)) 2214 return {}; 2215 nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset); 2216 nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset); 2217 nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset); 2218 nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset); 2219 nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset); 2220 return nlist; 2221 } 2222 2223 enum { DebugSymbols = true, NonDebugSymbols = false }; 2224 2225 void ObjectFileMachO::ParseSymtab(Symtab &symtab) { 2226 ModuleSP module_sp(GetModule()); 2227 if (!module_sp) 2228 return; 2229 2230 Log *log = GetLog(LLDBLog::Symbols); 2231 2232 const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec(); 2233 const char *file_name = file.GetFilename().AsCString("<Unknown>"); 2234 LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name); 2235 LLDB_LOG(log, "Parsing symbol table for {0}", file_name); 2236 Progress progress("Parsing symbol table", file_name); 2237 2238 llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0}; 2239 llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0}; 2240 llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0}; 2241 llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 2242 llvm::MachO::dysymtab_command dysymtab = m_dysymtab; 2243 // The data element of type bool indicates that this entry is thumb 2244 // code. 2245 typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts; 2246 2247 // Record the address of every function/data that we add to the symtab. 2248 // We add symbols to the table in the order of most information (nlist 2249 // records) to least (function starts), and avoid duplicating symbols 2250 // via this set. 2251 llvm::DenseSet<addr_t> symbols_added; 2252 2253 // We are using a llvm::DenseSet for "symbols_added" so we must be sure we 2254 // do not add the tombstone or empty keys to the set. 2255 auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) { 2256 // Don't add the tombstone or empty keys. 2257 if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1) 2258 return; 2259 symbols_added.insert(file_addr); 2260 }; 2261 FunctionStarts function_starts; 2262 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 2263 uint32_t i; 2264 FileSpecList dylib_files; 2265 llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_"); 2266 llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_"); 2267 llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_"); 2268 UUID image_uuid; 2269 2270 for (i = 0; i < m_header.ncmds; ++i) { 2271 const lldb::offset_t cmd_offset = offset; 2272 // Read in the load command and load command size 2273 llvm::MachO::load_command lc; 2274 if (m_data.GetU32(&offset, &lc, 2) == nullptr) 2275 break; 2276 // Watch for the symbol table load command 2277 switch (lc.cmd) { 2278 case LC_SYMTAB: 2279 symtab_load_command.cmd = lc.cmd; 2280 symtab_load_command.cmdsize = lc.cmdsize; 2281 // Read in the rest of the symtab load command 2282 if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) == 2283 nullptr) // fill in symoff, nsyms, stroff, strsize fields 2284 return; 2285 break; 2286 2287 case LC_DYLD_INFO: 2288 case LC_DYLD_INFO_ONLY: 2289 if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) { 2290 dyld_info.cmd = lc.cmd; 2291 dyld_info.cmdsize = lc.cmdsize; 2292 } else { 2293 memset(&dyld_info, 0, sizeof(dyld_info)); 2294 } 2295 break; 2296 2297 case LC_LOAD_DYLIB: 2298 case LC_LOAD_WEAK_DYLIB: 2299 case LC_REEXPORT_DYLIB: 2300 case LC_LOADFVMLIB: 2301 case LC_LOAD_UPWARD_DYLIB: { 2302 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset); 2303 const char *path = m_data.PeekCStr(name_offset); 2304 if (path) { 2305 FileSpec file_spec(path); 2306 // Strip the path if there is @rpath, @executable, etc so we just use 2307 // the basename 2308 if (path[0] == '@') 2309 file_spec.ClearDirectory(); 2310 2311 if (lc.cmd == LC_REEXPORT_DYLIB) { 2312 m_reexported_dylibs.AppendIfUnique(file_spec); 2313 } 2314 2315 dylib_files.Append(file_spec); 2316 } 2317 } break; 2318 2319 case LC_DYLD_EXPORTS_TRIE: 2320 exports_trie_load_command.cmd = lc.cmd; 2321 exports_trie_load_command.cmdsize = lc.cmdsize; 2322 if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) == 2323 nullptr) // fill in offset and size fields 2324 memset(&exports_trie_load_command, 0, 2325 sizeof(exports_trie_load_command)); 2326 break; 2327 case LC_FUNCTION_STARTS: 2328 function_starts_load_command.cmd = lc.cmd; 2329 function_starts_load_command.cmdsize = lc.cmdsize; 2330 if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) == 2331 nullptr) // fill in data offset and size fields 2332 memset(&function_starts_load_command, 0, 2333 sizeof(function_starts_load_command)); 2334 break; 2335 2336 case LC_UUID: { 2337 const uint8_t *uuid_bytes = m_data.PeekData(offset, 16); 2338 2339 if (uuid_bytes) 2340 image_uuid = UUID(uuid_bytes, 16); 2341 break; 2342 } 2343 2344 default: 2345 break; 2346 } 2347 offset = cmd_offset + lc.cmdsize; 2348 } 2349 2350 if (!symtab_load_command.cmd) 2351 return; 2352 2353 SectionList *section_list = GetSectionList(); 2354 if (section_list == nullptr) 2355 return; 2356 2357 const uint32_t addr_byte_size = m_data.GetAddressByteSize(); 2358 const ByteOrder byte_order = m_data.GetByteOrder(); 2359 bool bit_width_32 = addr_byte_size == 4; 2360 const size_t nlist_byte_size = 2361 bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64); 2362 2363 DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size); 2364 DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size); 2365 DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size); 2366 DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order, 2367 addr_byte_size); 2368 DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size); 2369 2370 const addr_t nlist_data_byte_size = 2371 symtab_load_command.nsyms * nlist_byte_size; 2372 const addr_t strtab_data_byte_size = symtab_load_command.strsize; 2373 addr_t strtab_addr = LLDB_INVALID_ADDRESS; 2374 2375 ProcessSP process_sp(m_process_wp.lock()); 2376 Process *process = process_sp.get(); 2377 2378 uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete; 2379 bool is_shared_cache_image = IsSharedCacheBinary(); 2380 bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory(); 2381 SectionSP linkedit_section_sp( 2382 section_list->FindSectionByName(GetSegmentNameLINKEDIT())); 2383 2384 if (process && m_header.filetype != llvm::MachO::MH_OBJECT && 2385 !is_local_shared_cache_image) { 2386 Target &target = process->GetTarget(); 2387 2388 memory_module_load_level = target.GetMemoryModuleLoadLevel(); 2389 2390 // Reading mach file from memory in a process or core file... 2391 2392 if (linkedit_section_sp) { 2393 addr_t linkedit_load_addr = 2394 linkedit_section_sp->GetLoadBaseAddress(&target); 2395 if (linkedit_load_addr == LLDB_INVALID_ADDRESS) { 2396 // We might be trying to access the symbol table before the 2397 // __LINKEDIT's load address has been set in the target. We can't 2398 // fail to read the symbol table, so calculate the right address 2399 // manually 2400 linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage( 2401 m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get()); 2402 } 2403 2404 const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset(); 2405 const addr_t symoff_addr = linkedit_load_addr + 2406 symtab_load_command.symoff - 2407 linkedit_file_offset; 2408 strtab_addr = linkedit_load_addr + symtab_load_command.stroff - 2409 linkedit_file_offset; 2410 2411 // Always load dyld - the dynamic linker - from memory if we didn't 2412 // find a binary anywhere else. lldb will not register 2413 // dylib/framework/bundle loads/unloads if we don't have the dyld 2414 // symbols, we force dyld to load from memory despite the user's 2415 // target.memory-module-load-level setting. 2416 if (memory_module_load_level == eMemoryModuleLoadLevelComplete || 2417 m_header.filetype == llvm::MachO::MH_DYLINKER) { 2418 DataBufferSP nlist_data_sp( 2419 ReadMemory(process_sp, symoff_addr, nlist_data_byte_size)); 2420 if (nlist_data_sp) 2421 nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize()); 2422 if (dysymtab.nindirectsyms != 0) { 2423 const addr_t indirect_syms_addr = linkedit_load_addr + 2424 dysymtab.indirectsymoff - 2425 linkedit_file_offset; 2426 DataBufferSP indirect_syms_data_sp(ReadMemory( 2427 process_sp, indirect_syms_addr, dysymtab.nindirectsyms * 4)); 2428 if (indirect_syms_data_sp) 2429 indirect_symbol_index_data.SetData( 2430 indirect_syms_data_sp, 0, 2431 indirect_syms_data_sp->GetByteSize()); 2432 // If this binary is outside the shared cache, 2433 // cache the string table. 2434 // Binaries in the shared cache all share a giant string table, 2435 // and we can't share the string tables across multiple 2436 // ObjectFileMachO's, so we'd end up re-reading this mega-strtab 2437 // for every binary in the shared cache - it would be a big perf 2438 // problem. For binaries outside the shared cache, it's faster to 2439 // read the entire strtab at once instead of piece-by-piece as we 2440 // process the nlist records. 2441 if (!is_shared_cache_image) { 2442 DataBufferSP strtab_data_sp( 2443 ReadMemory(process_sp, strtab_addr, strtab_data_byte_size)); 2444 if (strtab_data_sp) { 2445 strtab_data.SetData(strtab_data_sp, 0, 2446 strtab_data_sp->GetByteSize()); 2447 } 2448 } 2449 } 2450 if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) { 2451 if (function_starts_load_command.cmd) { 2452 const addr_t func_start_addr = 2453 linkedit_load_addr + function_starts_load_command.dataoff - 2454 linkedit_file_offset; 2455 DataBufferSP func_start_data_sp( 2456 ReadMemory(process_sp, func_start_addr, 2457 function_starts_load_command.datasize)); 2458 if (func_start_data_sp) 2459 function_starts_data.SetData(func_start_data_sp, 0, 2460 func_start_data_sp->GetByteSize()); 2461 } 2462 } 2463 } 2464 } 2465 } else { 2466 if (is_local_shared_cache_image) { 2467 // The load commands in shared cache images are relative to the 2468 // beginning of the shared cache, not the library image. The 2469 // data we get handed when creating the ObjectFileMachO starts 2470 // at the beginning of a specific library and spans to the end 2471 // of the cache to be able to reach the shared LINKEDIT 2472 // segments. We need to convert the load command offsets to be 2473 // relative to the beginning of our specific image. 2474 lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset(); 2475 lldb::offset_t linkedit_slide = 2476 linkedit_offset - m_linkedit_original_offset; 2477 symtab_load_command.symoff += linkedit_slide; 2478 symtab_load_command.stroff += linkedit_slide; 2479 dyld_info.export_off += linkedit_slide; 2480 dysymtab.indirectsymoff += linkedit_slide; 2481 function_starts_load_command.dataoff += linkedit_slide; 2482 exports_trie_load_command.dataoff += linkedit_slide; 2483 } 2484 2485 nlist_data.SetData(m_data, symtab_load_command.symoff, 2486 nlist_data_byte_size); 2487 strtab_data.SetData(m_data, symtab_load_command.stroff, 2488 strtab_data_byte_size); 2489 2490 // We shouldn't have exports data from both the LC_DYLD_INFO command 2491 // AND the LC_DYLD_EXPORTS_TRIE command in the same binary: 2492 lldbassert(!((dyld_info.export_size > 0) 2493 && (exports_trie_load_command.datasize > 0))); 2494 if (dyld_info.export_size > 0) { 2495 dyld_trie_data.SetData(m_data, dyld_info.export_off, 2496 dyld_info.export_size); 2497 } else if (exports_trie_load_command.datasize > 0) { 2498 dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff, 2499 exports_trie_load_command.datasize); 2500 } 2501 2502 if (dysymtab.nindirectsyms != 0) { 2503 indirect_symbol_index_data.SetData(m_data, dysymtab.indirectsymoff, 2504 dysymtab.nindirectsyms * 4); 2505 } 2506 if (function_starts_load_command.cmd) { 2507 function_starts_data.SetData(m_data, function_starts_load_command.dataoff, 2508 function_starts_load_command.datasize); 2509 } 2510 } 2511 2512 const bool have_strtab_data = strtab_data.GetByteSize() > 0; 2513 2514 ConstString g_segment_name_TEXT = GetSegmentNameTEXT(); 2515 ConstString g_segment_name_DATA = GetSegmentNameDATA(); 2516 ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY(); 2517 ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST(); 2518 ConstString g_segment_name_OBJC = GetSegmentNameOBJC(); 2519 ConstString g_section_name_eh_frame = GetSectionNameEHFrame(); 2520 SectionSP text_section_sp( 2521 section_list->FindSectionByName(g_segment_name_TEXT)); 2522 SectionSP data_section_sp( 2523 section_list->FindSectionByName(g_segment_name_DATA)); 2524 SectionSP data_dirty_section_sp( 2525 section_list->FindSectionByName(g_segment_name_DATA_DIRTY)); 2526 SectionSP data_const_section_sp( 2527 section_list->FindSectionByName(g_segment_name_DATA_CONST)); 2528 SectionSP objc_section_sp( 2529 section_list->FindSectionByName(g_segment_name_OBJC)); 2530 SectionSP eh_frame_section_sp; 2531 if (text_section_sp.get()) 2532 eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName( 2533 g_section_name_eh_frame); 2534 else 2535 eh_frame_section_sp = 2536 section_list->FindSectionByName(g_section_name_eh_frame); 2537 2538 const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM); 2539 const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions(); 2540 2541 // lldb works best if it knows the start address of all functions in a 2542 // module. Linker symbols or debug info are normally the best source of 2543 // information for start addr / size but they may be stripped in a released 2544 // binary. Two additional sources of information exist in Mach-O binaries: 2545 // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each 2546 // function's start address in the 2547 // binary, relative to the text section. 2548 // eh_frame - the eh_frame FDEs have the start addr & size of 2549 // each function 2550 // LC_FUNCTION_STARTS is the fastest source to read in, and is present on 2551 // all modern binaries. 2552 // Binaries built to run on older releases may need to use eh_frame 2553 // information. 2554 2555 if (text_section_sp && function_starts_data.GetByteSize()) { 2556 FunctionStarts::Entry function_start_entry; 2557 function_start_entry.data = false; 2558 lldb::offset_t function_start_offset = 0; 2559 function_start_entry.addr = text_section_sp->GetFileAddress(); 2560 uint64_t delta; 2561 while ((delta = function_starts_data.GetULEB128(&function_start_offset)) > 2562 0) { 2563 // Now append the current entry 2564 function_start_entry.addr += delta; 2565 if (is_arm) { 2566 if (function_start_entry.addr & 1) { 2567 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; 2568 function_start_entry.data = true; 2569 } else if (always_thumb) { 2570 function_start_entry.data = true; 2571 } 2572 } 2573 function_starts.Append(function_start_entry); 2574 } 2575 } else { 2576 // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the 2577 // load command claiming an eh_frame but it doesn't actually have the 2578 // eh_frame content. And if we have a dSYM, we don't need to do any of 2579 // this fill-in-the-missing-symbols works anyway - the debug info should 2580 // give us all the functions in the module. 2581 if (text_section_sp.get() && eh_frame_section_sp.get() && 2582 m_type != eTypeDebugInfo) { 2583 DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp, 2584 DWARFCallFrameInfo::EH); 2585 DWARFCallFrameInfo::FunctionAddressAndSizeVector functions; 2586 eh_frame.GetFunctionAddressAndSizeVector(functions); 2587 addr_t text_base_addr = text_section_sp->GetFileAddress(); 2588 size_t count = functions.GetSize(); 2589 for (size_t i = 0; i < count; ++i) { 2590 const DWARFCallFrameInfo::FunctionAddressAndSizeVector::Entry *func = 2591 functions.GetEntryAtIndex(i); 2592 if (func) { 2593 FunctionStarts::Entry function_start_entry; 2594 function_start_entry.addr = func->base - text_base_addr; 2595 if (is_arm) { 2596 if (function_start_entry.addr & 1) { 2597 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; 2598 function_start_entry.data = true; 2599 } else if (always_thumb) { 2600 function_start_entry.data = true; 2601 } 2602 } 2603 function_starts.Append(function_start_entry); 2604 } 2605 } 2606 } 2607 } 2608 2609 const size_t function_starts_count = function_starts.GetSize(); 2610 2611 // For user process binaries (executables, dylibs, frameworks, bundles), if 2612 // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're 2613 // going to assume the binary has been stripped. Don't allow assembly 2614 // language instruction emulation because we don't know proper function 2615 // start boundaries. 2616 // 2617 // For all other types of binaries (kernels, stand-alone bare board 2618 // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame 2619 // sections - we should not make any assumptions about them based on that. 2620 if (function_starts_count == 0 && CalculateStrata() == eStrataUser) { 2621 m_allow_assembly_emulation_unwind_plans = false; 2622 Log *unwind_or_symbol_log(GetLog(LLDBLog::Symbols | LLDBLog::Unwind)); 2623 2624 if (unwind_or_symbol_log) 2625 module_sp->LogMessage( 2626 unwind_or_symbol_log, 2627 "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds"); 2628 } 2629 2630 const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get() 2631 ? eh_frame_section_sp->GetID() 2632 : static_cast<user_id_t>(NO_SECT); 2633 2634 uint32_t N_SO_index = UINT32_MAX; 2635 2636 MachSymtabSectionInfo section_info(section_list); 2637 std::vector<uint32_t> N_FUN_indexes; 2638 std::vector<uint32_t> N_NSYM_indexes; 2639 std::vector<uint32_t> N_INCL_indexes; 2640 std::vector<uint32_t> N_BRAC_indexes; 2641 std::vector<uint32_t> N_COMM_indexes; 2642 typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap; 2643 typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap; 2644 typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap; 2645 ValueToSymbolIndexMap N_FUN_addr_to_sym_idx; 2646 ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx; 2647 ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx; 2648 // Any symbols that get merged into another will get an entry in this map 2649 // so we know 2650 NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx; 2651 uint32_t nlist_idx = 0; 2652 Symbol *symbol_ptr = nullptr; 2653 2654 uint32_t sym_idx = 0; 2655 Symbol *sym = nullptr; 2656 size_t num_syms = 0; 2657 std::string memory_symbol_name; 2658 uint32_t unmapped_local_symbols_found = 0; 2659 2660 std::vector<TrieEntryWithOffset> reexport_trie_entries; 2661 std::vector<TrieEntryWithOffset> external_sym_trie_entries; 2662 std::set<lldb::addr_t> resolver_addresses; 2663 2664 const size_t dyld_trie_data_size = dyld_trie_data.GetByteSize(); 2665 if (dyld_trie_data_size > 0) { 2666 LLDB_LOG(log, "Parsing {0} bytes of dyld trie data", dyld_trie_data_size); 2667 SectionSP text_segment_sp = 2668 GetSectionList()->FindSectionByName(GetSegmentNameTEXT()); 2669 lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS; 2670 if (text_segment_sp) 2671 text_segment_file_addr = text_segment_sp->GetFileAddress(); 2672 std::vector<llvm::StringRef> nameSlices; 2673 ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr, 2674 nameSlices, resolver_addresses, reexport_trie_entries, 2675 external_sym_trie_entries); 2676 } 2677 2678 typedef std::set<ConstString> IndirectSymbols; 2679 IndirectSymbols indirect_symbol_names; 2680 2681 #if TARGET_OS_IPHONE 2682 2683 // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been 2684 // optimized by moving LOCAL symbols out of the memory mapped portion of 2685 // the DSC. The symbol information has all been retained, but it isn't 2686 // available in the normal nlist data. However, there *are* duplicate 2687 // entries of *some* 2688 // LOCAL symbols in the normal nlist data. To handle this situation 2689 // correctly, we must first attempt 2690 // to parse any DSC unmapped symbol information. If we find any, we set a 2691 // flag that tells the normal nlist parser to ignore all LOCAL symbols. 2692 2693 if (IsSharedCacheBinary()) { 2694 // Before we can start mapping the DSC, we need to make certain the 2695 // target process is actually using the cache we can find. 2696 2697 // Next we need to determine the correct path for the dyld shared cache. 2698 2699 ArchSpec header_arch = GetArchitecture(); 2700 2701 UUID dsc_uuid; 2702 UUID process_shared_cache_uuid; 2703 addr_t process_shared_cache_base_addr; 2704 2705 if (process) { 2706 GetProcessSharedCacheUUID(process, process_shared_cache_base_addr, 2707 process_shared_cache_uuid); 2708 } 2709 2710 __block bool found_image = false; 2711 __block void *nlist_buffer = nullptr; 2712 __block unsigned nlist_count = 0; 2713 __block char *string_table = nullptr; 2714 __block vm_offset_t vm_nlist_memory = 0; 2715 __block mach_msg_type_number_t vm_nlist_bytes_read = 0; 2716 __block vm_offset_t vm_string_memory = 0; 2717 __block mach_msg_type_number_t vm_string_bytes_read = 0; 2718 2719 auto _ = llvm::make_scope_exit(^{ 2720 if (vm_nlist_memory) 2721 vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read); 2722 if (vm_string_memory) 2723 vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read); 2724 }); 2725 2726 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap; 2727 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName; 2728 UndefinedNameToDescMap undefined_name_to_desc; 2729 SymbolIndexToName reexport_shlib_needs_fixup; 2730 2731 dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) { 2732 uuid_t cache_uuid; 2733 dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid); 2734 if (found_image) 2735 return; 2736 2737 if (process_shared_cache_uuid.IsValid() && 2738 process_shared_cache_uuid != UUID::fromData(&cache_uuid, 16)) 2739 return; 2740 2741 dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) { 2742 uuid_t dsc_image_uuid; 2743 if (found_image) 2744 return; 2745 2746 dyld_image_copy_uuid(image, &dsc_image_uuid); 2747 if (image_uuid != UUID::fromData(dsc_image_uuid, 16)) 2748 return; 2749 2750 found_image = true; 2751 2752 // Compute the size of the string table. We need to ask dyld for a 2753 // new SPI to avoid this step. 2754 dyld_image_local_nlist_content_4Symbolication( 2755 image, ^(const void *nlistStart, uint64_t nlistCount, 2756 const char *stringTable) { 2757 if (!nlistStart || !nlistCount) 2758 return; 2759 2760 // The buffers passed here are valid only inside the block. 2761 // Use vm_read to make a cheap copy of them available for our 2762 // processing later. 2763 kern_return_t ret = 2764 vm_read(mach_task_self(), (vm_address_t)nlistStart, 2765 nlist_byte_size * nlistCount, &vm_nlist_memory, 2766 &vm_nlist_bytes_read); 2767 if (ret != KERN_SUCCESS) 2768 return; 2769 assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount); 2770 2771 // We don't know the size of the string table. It's cheaper 2772 // to map the whole VM region than to determine the size by 2773 // parsing all the nlist entries. 2774 vm_address_t string_address = (vm_address_t)stringTable; 2775 vm_size_t region_size; 2776 mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64; 2777 vm_region_basic_info_data_t info; 2778 memory_object_name_t object; 2779 ret = vm_region_64(mach_task_self(), &string_address, 2780 ®ion_size, VM_REGION_BASIC_INFO_64, 2781 (vm_region_info_t)&info, &info_count, &object); 2782 if (ret != KERN_SUCCESS) 2783 return; 2784 2785 ret = vm_read(mach_task_self(), (vm_address_t)stringTable, 2786 region_size - 2787 ((vm_address_t)stringTable - string_address), 2788 &vm_string_memory, &vm_string_bytes_read); 2789 if (ret != KERN_SUCCESS) 2790 return; 2791 2792 nlist_buffer = (void *)vm_nlist_memory; 2793 string_table = (char *)vm_string_memory; 2794 nlist_count = nlistCount; 2795 }); 2796 }); 2797 }); 2798 if (nlist_buffer) { 2799 DataExtractor dsc_local_symbols_data(nlist_buffer, 2800 nlist_count * nlist_byte_size, 2801 byte_order, addr_byte_size); 2802 unmapped_local_symbols_found = nlist_count; 2803 2804 // The normal nlist code cannot correctly size the Symbols 2805 // array, we need to allocate it here. 2806 sym = symtab.Resize( 2807 symtab_load_command.nsyms + m_dysymtab.nindirectsyms + 2808 unmapped_local_symbols_found - m_dysymtab.nlocalsym); 2809 num_syms = symtab.GetNumSymbols(); 2810 2811 lldb::offset_t nlist_data_offset = 0; 2812 2813 for (uint32_t nlist_index = 0; 2814 nlist_index < nlist_count; 2815 nlist_index++) { 2816 ///////////////////////////// 2817 { 2818 std::optional<struct nlist_64> nlist_maybe = 2819 ParseNList(dsc_local_symbols_data, nlist_data_offset, 2820 nlist_byte_size); 2821 if (!nlist_maybe) 2822 break; 2823 struct nlist_64 nlist = *nlist_maybe; 2824 2825 SymbolType type = eSymbolTypeInvalid; 2826 const char *symbol_name = string_table + nlist.n_strx; 2827 2828 if (symbol_name == NULL) { 2829 // No symbol should be NULL, even the symbols with no 2830 // string values should have an offset zero which 2831 // points to an empty C-string 2832 Debugger::ReportError(llvm::formatv( 2833 "DSC unmapped local symbol[{0}] has invalid " 2834 "string table offset {1:x} in {2}, ignoring symbol", 2835 nlist_index, nlist.n_strx, 2836 module_sp->GetFileSpec().GetPath()); 2837 continue; 2838 } 2839 if (symbol_name[0] == '\0') 2840 symbol_name = NULL; 2841 2842 const char *symbol_name_non_abi_mangled = NULL; 2843 2844 SectionSP symbol_section; 2845 uint32_t symbol_byte_size = 0; 2846 bool add_nlist = true; 2847 bool is_debug = ((nlist.n_type & N_STAB) != 0); 2848 bool demangled_is_synthesized = false; 2849 bool is_gsym = false; 2850 bool set_value = true; 2851 2852 assert(sym_idx < num_syms); 2853 2854 sym[sym_idx].SetDebug(is_debug); 2855 2856 if (is_debug) { 2857 switch (nlist.n_type) { 2858 case N_GSYM: 2859 // global symbol: name,,NO_SECT,type,0 2860 // Sometimes the N_GSYM value contains the address. 2861 2862 // FIXME: In the .o files, we have a GSYM and a debug 2863 // symbol for all the ObjC data. They 2864 // have the same address, but we want to ensure that 2865 // we always find only the real symbol, 'cause we 2866 // don't currently correctly attribute the 2867 // GSYM one to the ObjCClass/Ivar/MetaClass 2868 // symbol type. This is a temporary hack to make 2869 // sure the ObjectiveC symbols get treated correctly. 2870 // To do this right, we should coalesce all the GSYM 2871 // & global symbols that have the same address. 2872 2873 is_gsym = true; 2874 sym[sym_idx].SetExternal(true); 2875 2876 if (symbol_name && symbol_name[0] == '_' && 2877 symbol_name[1] == 'O') { 2878 llvm::StringRef symbol_name_ref(symbol_name); 2879 if (symbol_name_ref.starts_with( 2880 g_objc_v2_prefix_class)) { 2881 symbol_name_non_abi_mangled = symbol_name + 1; 2882 symbol_name = 2883 symbol_name + g_objc_v2_prefix_class.size(); 2884 type = eSymbolTypeObjCClass; 2885 demangled_is_synthesized = true; 2886 2887 } else if (symbol_name_ref.starts_with( 2888 g_objc_v2_prefix_metaclass)) { 2889 symbol_name_non_abi_mangled = symbol_name + 1; 2890 symbol_name = 2891 symbol_name + g_objc_v2_prefix_metaclass.size(); 2892 type = eSymbolTypeObjCMetaClass; 2893 demangled_is_synthesized = true; 2894 } else if (symbol_name_ref.starts_with( 2895 g_objc_v2_prefix_ivar)) { 2896 symbol_name_non_abi_mangled = symbol_name + 1; 2897 symbol_name = 2898 symbol_name + g_objc_v2_prefix_ivar.size(); 2899 type = eSymbolTypeObjCIVar; 2900 demangled_is_synthesized = true; 2901 } 2902 } else { 2903 if (nlist.n_value != 0) 2904 symbol_section = section_info.GetSection( 2905 nlist.n_sect, nlist.n_value); 2906 type = eSymbolTypeData; 2907 } 2908 break; 2909 2910 case N_FNAME: 2911 // procedure name (f77 kludge): name,,NO_SECT,0,0 2912 type = eSymbolTypeCompiler; 2913 break; 2914 2915 case N_FUN: 2916 // procedure: name,,n_sect,linenumber,address 2917 if (symbol_name) { 2918 type = eSymbolTypeCode; 2919 symbol_section = section_info.GetSection( 2920 nlist.n_sect, nlist.n_value); 2921 2922 N_FUN_addr_to_sym_idx.insert( 2923 std::make_pair(nlist.n_value, sym_idx)); 2924 // We use the current number of symbols in the 2925 // symbol table in lieu of using nlist_idx in case 2926 // we ever start trimming entries out 2927 N_FUN_indexes.push_back(sym_idx); 2928 } else { 2929 type = eSymbolTypeCompiler; 2930 2931 if (!N_FUN_indexes.empty()) { 2932 // Copy the size of the function into the 2933 // original 2934 // STAB entry so we don't have 2935 // to hunt for it later 2936 symtab.SymbolAtIndex(N_FUN_indexes.back()) 2937 ->SetByteSize(nlist.n_value); 2938 N_FUN_indexes.pop_back(); 2939 // We don't really need the end function STAB as 2940 // it contains the size which we already placed 2941 // with the original symbol, so don't add it if 2942 // we want a minimal symbol table 2943 add_nlist = false; 2944 } 2945 } 2946 break; 2947 2948 case N_STSYM: 2949 // static symbol: name,,n_sect,type,address 2950 N_STSYM_addr_to_sym_idx.insert( 2951 std::make_pair(nlist.n_value, sym_idx)); 2952 symbol_section = section_info.GetSection(nlist.n_sect, 2953 nlist.n_value); 2954 if (symbol_name && symbol_name[0]) { 2955 type = ObjectFile::GetSymbolTypeFromName( 2956 symbol_name + 1, eSymbolTypeData); 2957 } 2958 break; 2959 2960 case N_LCSYM: 2961 // .lcomm symbol: name,,n_sect,type,address 2962 symbol_section = section_info.GetSection(nlist.n_sect, 2963 nlist.n_value); 2964 type = eSymbolTypeCommonBlock; 2965 break; 2966 2967 case N_BNSYM: 2968 // We use the current number of symbols in the symbol 2969 // table in lieu of using nlist_idx in case we ever 2970 // start trimming entries out Skip these if we want 2971 // minimal symbol tables 2972 add_nlist = false; 2973 break; 2974 2975 case N_ENSYM: 2976 // Set the size of the N_BNSYM to the terminating 2977 // index of this N_ENSYM so that we can always skip 2978 // the entire symbol if we need to navigate more 2979 // quickly at the source level when parsing STABS 2980 // Skip these if we want minimal symbol tables 2981 add_nlist = false; 2982 break; 2983 2984 case N_OPT: 2985 // emitted with gcc2_compiled and in gcc source 2986 type = eSymbolTypeCompiler; 2987 break; 2988 2989 case N_RSYM: 2990 // register sym: name,,NO_SECT,type,register 2991 type = eSymbolTypeVariable; 2992 break; 2993 2994 case N_SLINE: 2995 // src line: 0,,n_sect,linenumber,address 2996 symbol_section = section_info.GetSection(nlist.n_sect, 2997 nlist.n_value); 2998 type = eSymbolTypeLineEntry; 2999 break; 3000 3001 case N_SSYM: 3002 // structure elt: name,,NO_SECT,type,struct_offset 3003 type = eSymbolTypeVariableType; 3004 break; 3005 3006 case N_SO: 3007 // source file name 3008 type = eSymbolTypeSourceFile; 3009 if (symbol_name == NULL) { 3010 add_nlist = false; 3011 if (N_SO_index != UINT32_MAX) { 3012 // Set the size of the N_SO to the terminating 3013 // index of this N_SO so that we can always skip 3014 // the entire N_SO if we need to navigate more 3015 // quickly at the source level when parsing STABS 3016 symbol_ptr = symtab.SymbolAtIndex(N_SO_index); 3017 symbol_ptr->SetByteSize(sym_idx); 3018 symbol_ptr->SetSizeIsSibling(true); 3019 } 3020 N_NSYM_indexes.clear(); 3021 N_INCL_indexes.clear(); 3022 N_BRAC_indexes.clear(); 3023 N_COMM_indexes.clear(); 3024 N_FUN_indexes.clear(); 3025 N_SO_index = UINT32_MAX; 3026 } else { 3027 // We use the current number of symbols in the 3028 // symbol table in lieu of using nlist_idx in case 3029 // we ever start trimming entries out 3030 const bool N_SO_has_full_path = symbol_name[0] == '/'; 3031 if (N_SO_has_full_path) { 3032 if ((N_SO_index == sym_idx - 1) && 3033 ((sym_idx - 1) < num_syms)) { 3034 // We have two consecutive N_SO entries where 3035 // the first contains a directory and the 3036 // second contains a full path. 3037 sym[sym_idx - 1].GetMangled().SetValue( 3038 ConstString(symbol_name)); 3039 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3040 add_nlist = false; 3041 } else { 3042 // This is the first entry in a N_SO that 3043 // contains a directory or 3044 // a full path to the source file 3045 N_SO_index = sym_idx; 3046 } 3047 } else if ((N_SO_index == sym_idx - 1) && 3048 ((sym_idx - 1) < num_syms)) { 3049 // This is usually the second N_SO entry that 3050 // contains just the filename, so here we combine 3051 // it with the first one if we are minimizing the 3052 // symbol table 3053 const char *so_path = sym[sym_idx - 1] 3054 .GetMangled() 3055 .GetDemangledName() 3056 .AsCString(); 3057 if (so_path && so_path[0]) { 3058 std::string full_so_path(so_path); 3059 const size_t double_slash_pos = 3060 full_so_path.find("//"); 3061 if (double_slash_pos != std::string::npos) { 3062 // The linker has been generating bad N_SO 3063 // entries with doubled up paths 3064 // in the format "%s%s" where the first 3065 // string in the DW_AT_comp_dir, and the 3066 // second is the directory for the source 3067 // file so you end up with a path that looks 3068 // like "/tmp/src//tmp/src/" 3069 FileSpec so_dir(so_path); 3070 if (!FileSystem::Instance().Exists(so_dir)) { 3071 so_dir.SetFile( 3072 &full_so_path[double_slash_pos + 1], 3073 FileSpec::Style::native); 3074 if (FileSystem::Instance().Exists(so_dir)) { 3075 // Trim off the incorrect path 3076 full_so_path.erase(0, double_slash_pos + 1); 3077 } 3078 } 3079 } 3080 if (*full_so_path.rbegin() != '/') 3081 full_so_path += '/'; 3082 full_so_path += symbol_name; 3083 sym[sym_idx - 1].GetMangled().SetValue( 3084 ConstString(full_so_path.c_str())); 3085 add_nlist = false; 3086 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3087 } 3088 } else { 3089 // This could be a relative path to a N_SO 3090 N_SO_index = sym_idx; 3091 } 3092 } 3093 break; 3094 3095 case N_OSO: 3096 // object file name: name,,0,0,st_mtime 3097 type = eSymbolTypeObjectFile; 3098 break; 3099 3100 case N_LSYM: 3101 // local sym: name,,NO_SECT,type,offset 3102 type = eSymbolTypeLocal; 3103 break; 3104 3105 // INCL scopes 3106 case N_BINCL: 3107 // include file beginning: name,,NO_SECT,0,sum We use 3108 // the current number of symbols in the symbol table 3109 // in lieu of using nlist_idx in case we ever start 3110 // trimming entries out 3111 N_INCL_indexes.push_back(sym_idx); 3112 type = eSymbolTypeScopeBegin; 3113 break; 3114 3115 case N_EINCL: 3116 // include file end: name,,NO_SECT,0,0 3117 // Set the size of the N_BINCL to the terminating 3118 // index of this N_EINCL so that we can always skip 3119 // the entire symbol if we need to navigate more 3120 // quickly at the source level when parsing STABS 3121 if (!N_INCL_indexes.empty()) { 3122 symbol_ptr = 3123 symtab.SymbolAtIndex(N_INCL_indexes.back()); 3124 symbol_ptr->SetByteSize(sym_idx + 1); 3125 symbol_ptr->SetSizeIsSibling(true); 3126 N_INCL_indexes.pop_back(); 3127 } 3128 type = eSymbolTypeScopeEnd; 3129 break; 3130 3131 case N_SOL: 3132 // #included file name: name,,n_sect,0,address 3133 type = eSymbolTypeHeaderFile; 3134 3135 // We currently don't use the header files on darwin 3136 add_nlist = false; 3137 break; 3138 3139 case N_PARAMS: 3140 // compiler parameters: name,,NO_SECT,0,0 3141 type = eSymbolTypeCompiler; 3142 break; 3143 3144 case N_VERSION: 3145 // compiler version: name,,NO_SECT,0,0 3146 type = eSymbolTypeCompiler; 3147 break; 3148 3149 case N_OLEVEL: 3150 // compiler -O level: name,,NO_SECT,0,0 3151 type = eSymbolTypeCompiler; 3152 break; 3153 3154 case N_PSYM: 3155 // parameter: name,,NO_SECT,type,offset 3156 type = eSymbolTypeVariable; 3157 break; 3158 3159 case N_ENTRY: 3160 // alternate entry: name,,n_sect,linenumber,address 3161 symbol_section = section_info.GetSection(nlist.n_sect, 3162 nlist.n_value); 3163 type = eSymbolTypeLineEntry; 3164 break; 3165 3166 // Left and Right Braces 3167 case N_LBRAC: 3168 // left bracket: 0,,NO_SECT,nesting level,address We 3169 // use the current number of symbols in the symbol 3170 // table in lieu of using nlist_idx in case we ever 3171 // start trimming entries out 3172 symbol_section = section_info.GetSection(nlist.n_sect, 3173 nlist.n_value); 3174 N_BRAC_indexes.push_back(sym_idx); 3175 type = eSymbolTypeScopeBegin; 3176 break; 3177 3178 case N_RBRAC: 3179 // right bracket: 0,,NO_SECT,nesting level,address 3180 // Set the size of the N_LBRAC to the terminating 3181 // index of this N_RBRAC so that we can always skip 3182 // the entire symbol if we need to navigate more 3183 // quickly at the source level when parsing STABS 3184 symbol_section = section_info.GetSection(nlist.n_sect, 3185 nlist.n_value); 3186 if (!N_BRAC_indexes.empty()) { 3187 symbol_ptr = 3188 symtab.SymbolAtIndex(N_BRAC_indexes.back()); 3189 symbol_ptr->SetByteSize(sym_idx + 1); 3190 symbol_ptr->SetSizeIsSibling(true); 3191 N_BRAC_indexes.pop_back(); 3192 } 3193 type = eSymbolTypeScopeEnd; 3194 break; 3195 3196 case N_EXCL: 3197 // deleted include file: name,,NO_SECT,0,sum 3198 type = eSymbolTypeHeaderFile; 3199 break; 3200 3201 // COMM scopes 3202 case N_BCOMM: 3203 // begin common: name,,NO_SECT,0,0 3204 // We use the current number of symbols in the symbol 3205 // table in lieu of using nlist_idx in case we ever 3206 // start trimming entries out 3207 type = eSymbolTypeScopeBegin; 3208 N_COMM_indexes.push_back(sym_idx); 3209 break; 3210 3211 case N_ECOML: 3212 // end common (local name): 0,,n_sect,0,address 3213 symbol_section = section_info.GetSection(nlist.n_sect, 3214 nlist.n_value); 3215 // Fall through 3216 3217 case N_ECOMM: 3218 // end common: name,,n_sect,0,0 3219 // Set the size of the N_BCOMM to the terminating 3220 // index of this N_ECOMM/N_ECOML so that we can 3221 // always skip the entire symbol if we need to 3222 // navigate more quickly at the source level when 3223 // parsing STABS 3224 if (!N_COMM_indexes.empty()) { 3225 symbol_ptr = 3226 symtab.SymbolAtIndex(N_COMM_indexes.back()); 3227 symbol_ptr->SetByteSize(sym_idx + 1); 3228 symbol_ptr->SetSizeIsSibling(true); 3229 N_COMM_indexes.pop_back(); 3230 } 3231 type = eSymbolTypeScopeEnd; 3232 break; 3233 3234 case N_LENG: 3235 // second stab entry with length information 3236 type = eSymbolTypeAdditional; 3237 break; 3238 3239 default: 3240 break; 3241 } 3242 } else { 3243 // uint8_t n_pext = N_PEXT & nlist.n_type; 3244 uint8_t n_type = N_TYPE & nlist.n_type; 3245 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0); 3246 3247 switch (n_type) { 3248 case N_INDR: { 3249 const char *reexport_name_cstr = 3250 strtab_data.PeekCStr(nlist.n_value); 3251 if (reexport_name_cstr && reexport_name_cstr[0]) { 3252 type = eSymbolTypeReExported; 3253 ConstString reexport_name( 3254 reexport_name_cstr + 3255 ((reexport_name_cstr[0] == '_') ? 1 : 0)); 3256 sym[sym_idx].SetReExportedSymbolName(reexport_name); 3257 set_value = false; 3258 reexport_shlib_needs_fixup[sym_idx] = reexport_name; 3259 indirect_symbol_names.insert(ConstString( 3260 symbol_name + ((symbol_name[0] == '_') ? 1 : 0))); 3261 } else 3262 type = eSymbolTypeUndefined; 3263 } break; 3264 3265 case N_UNDF: 3266 if (symbol_name && symbol_name[0]) { 3267 ConstString undefined_name( 3268 symbol_name + ((symbol_name[0] == '_') ? 1 : 0)); 3269 undefined_name_to_desc[undefined_name] = nlist.n_desc; 3270 } 3271 // Fall through 3272 case N_PBUD: 3273 type = eSymbolTypeUndefined; 3274 break; 3275 3276 case N_ABS: 3277 type = eSymbolTypeAbsolute; 3278 break; 3279 3280 case N_SECT: { 3281 symbol_section = section_info.GetSection(nlist.n_sect, 3282 nlist.n_value); 3283 3284 if (symbol_section == NULL) { 3285 // TODO: warn about this? 3286 add_nlist = false; 3287 break; 3288 } 3289 3290 if (TEXT_eh_frame_sectID == nlist.n_sect) { 3291 type = eSymbolTypeException; 3292 } else { 3293 uint32_t section_type = 3294 symbol_section->Get() & SECTION_TYPE; 3295 3296 switch (section_type) { 3297 case S_CSTRING_LITERALS: 3298 type = eSymbolTypeData; 3299 break; // section with only literal C strings 3300 case S_4BYTE_LITERALS: 3301 type = eSymbolTypeData; 3302 break; // section with only 4 byte literals 3303 case S_8BYTE_LITERALS: 3304 type = eSymbolTypeData; 3305 break; // section with only 8 byte literals 3306 case S_LITERAL_POINTERS: 3307 type = eSymbolTypeTrampoline; 3308 break; // section with only pointers to literals 3309 case S_NON_LAZY_SYMBOL_POINTERS: 3310 type = eSymbolTypeTrampoline; 3311 break; // section with only non-lazy symbol 3312 // pointers 3313 case S_LAZY_SYMBOL_POINTERS: 3314 type = eSymbolTypeTrampoline; 3315 break; // section with only lazy symbol pointers 3316 case S_SYMBOL_STUBS: 3317 type = eSymbolTypeTrampoline; 3318 break; // section with only symbol stubs, byte 3319 // size of stub in the reserved2 field 3320 case S_MOD_INIT_FUNC_POINTERS: 3321 type = eSymbolTypeCode; 3322 break; // section with only function pointers for 3323 // initialization 3324 case S_MOD_TERM_FUNC_POINTERS: 3325 type = eSymbolTypeCode; 3326 break; // section with only function pointers for 3327 // termination 3328 case S_INTERPOSING: 3329 type = eSymbolTypeTrampoline; 3330 break; // section with only pairs of function 3331 // pointers for interposing 3332 case S_16BYTE_LITERALS: 3333 type = eSymbolTypeData; 3334 break; // section with only 16 byte literals 3335 case S_DTRACE_DOF: 3336 type = eSymbolTypeInstrumentation; 3337 break; 3338 case S_LAZY_DYLIB_SYMBOL_POINTERS: 3339 type = eSymbolTypeTrampoline; 3340 break; 3341 default: 3342 switch (symbol_section->GetType()) { 3343 case lldb::eSectionTypeCode: 3344 type = eSymbolTypeCode; 3345 break; 3346 case eSectionTypeData: 3347 case eSectionTypeDataCString: // Inlined C string 3348 // data 3349 case eSectionTypeDataCStringPointers: // Pointers 3350 // to C 3351 // string 3352 // data 3353 case eSectionTypeDataSymbolAddress: // Address of 3354 // a symbol in 3355 // the symbol 3356 // table 3357 case eSectionTypeData4: 3358 case eSectionTypeData8: 3359 case eSectionTypeData16: 3360 type = eSymbolTypeData; 3361 break; 3362 default: 3363 break; 3364 } 3365 break; 3366 } 3367 3368 if (type == eSymbolTypeInvalid) { 3369 const char *symbol_sect_name = 3370 symbol_section->GetName().AsCString(); 3371 if (symbol_section->IsDescendant( 3372 text_section_sp.get())) { 3373 if (symbol_section->IsClear( 3374 S_ATTR_PURE_INSTRUCTIONS | 3375 S_ATTR_SELF_MODIFYING_CODE | 3376 S_ATTR_SOME_INSTRUCTIONS)) 3377 type = eSymbolTypeData; 3378 else 3379 type = eSymbolTypeCode; 3380 } else if (symbol_section->IsDescendant( 3381 data_section_sp.get()) || 3382 symbol_section->IsDescendant( 3383 data_dirty_section_sp.get()) || 3384 symbol_section->IsDescendant( 3385 data_const_section_sp.get())) { 3386 if (symbol_sect_name && 3387 ::strstr(symbol_sect_name, "__objc") == 3388 symbol_sect_name) { 3389 type = eSymbolTypeRuntime; 3390 3391 if (symbol_name) { 3392 llvm::StringRef symbol_name_ref(symbol_name); 3393 if (symbol_name_ref.starts_with("_OBJC_")) { 3394 llvm::StringRef 3395 g_objc_v2_prefix_class( 3396 "_OBJC_CLASS_$_"); 3397 llvm::StringRef 3398 g_objc_v2_prefix_metaclass( 3399 "_OBJC_METACLASS_$_"); 3400 llvm::StringRef 3401 g_objc_v2_prefix_ivar("_OBJC_IVAR_$_"); 3402 if (symbol_name_ref.starts_with( 3403 g_objc_v2_prefix_class)) { 3404 symbol_name_non_abi_mangled = 3405 symbol_name + 1; 3406 symbol_name = 3407 symbol_name + 3408 g_objc_v2_prefix_class.size(); 3409 type = eSymbolTypeObjCClass; 3410 demangled_is_synthesized = true; 3411 } else if ( 3412 symbol_name_ref.starts_with( 3413 g_objc_v2_prefix_metaclass)) { 3414 symbol_name_non_abi_mangled = 3415 symbol_name + 1; 3416 symbol_name = 3417 symbol_name + 3418 g_objc_v2_prefix_metaclass.size(); 3419 type = eSymbolTypeObjCMetaClass; 3420 demangled_is_synthesized = true; 3421 } else if (symbol_name_ref.starts_with( 3422 g_objc_v2_prefix_ivar)) { 3423 symbol_name_non_abi_mangled = 3424 symbol_name + 1; 3425 symbol_name = 3426 symbol_name + 3427 g_objc_v2_prefix_ivar.size(); 3428 type = eSymbolTypeObjCIVar; 3429 demangled_is_synthesized = true; 3430 } 3431 } 3432 } 3433 } else if (symbol_sect_name && 3434 ::strstr(symbol_sect_name, 3435 "__gcc_except_tab") == 3436 symbol_sect_name) { 3437 type = eSymbolTypeException; 3438 } else { 3439 type = eSymbolTypeData; 3440 } 3441 } else if (symbol_sect_name && 3442 ::strstr(symbol_sect_name, "__IMPORT") == 3443 symbol_sect_name) { 3444 type = eSymbolTypeTrampoline; 3445 } else if (symbol_section->IsDescendant( 3446 objc_section_sp.get())) { 3447 type = eSymbolTypeRuntime; 3448 if (symbol_name && symbol_name[0] == '.') { 3449 llvm::StringRef symbol_name_ref(symbol_name); 3450 llvm::StringRef 3451 g_objc_v1_prefix_class(".objc_class_name_"); 3452 if (symbol_name_ref.starts_with( 3453 g_objc_v1_prefix_class)) { 3454 symbol_name_non_abi_mangled = symbol_name; 3455 symbol_name = symbol_name + 3456 g_objc_v1_prefix_class.size(); 3457 type = eSymbolTypeObjCClass; 3458 demangled_is_synthesized = true; 3459 } 3460 } 3461 } 3462 } 3463 } 3464 } break; 3465 } 3466 } 3467 3468 if (add_nlist) { 3469 uint64_t symbol_value = nlist.n_value; 3470 if (symbol_name_non_abi_mangled) { 3471 sym[sym_idx].GetMangled().SetMangledName( 3472 ConstString(symbol_name_non_abi_mangled)); 3473 sym[sym_idx].GetMangled().SetDemangledName( 3474 ConstString(symbol_name)); 3475 } else { 3476 if (symbol_name && symbol_name[0] == '_') { 3477 symbol_name++; // Skip the leading underscore 3478 } 3479 3480 if (symbol_name) { 3481 ConstString const_symbol_name(symbol_name); 3482 sym[sym_idx].GetMangled().SetValue(const_symbol_name); 3483 if (is_gsym && is_debug) { 3484 const char *gsym_name = 3485 sym[sym_idx] 3486 .GetMangled() 3487 .GetName(Mangled::ePreferMangled) 3488 .GetCString(); 3489 if (gsym_name) 3490 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; 3491 } 3492 } 3493 } 3494 if (symbol_section) { 3495 const addr_t section_file_addr = 3496 symbol_section->GetFileAddress(); 3497 if (symbol_byte_size == 0 && 3498 function_starts_count > 0) { 3499 addr_t symbol_lookup_file_addr = nlist.n_value; 3500 // Do an exact address match for non-ARM addresses, 3501 // else get the closest since the symbol might be a 3502 // thumb symbol which has an address with bit zero 3503 // set 3504 FunctionStarts::Entry *func_start_entry = 3505 function_starts.FindEntry(symbol_lookup_file_addr, 3506 !is_arm); 3507 if (is_arm && func_start_entry) { 3508 // Verify that the function start address is the 3509 // symbol address (ARM) or the symbol address + 1 3510 // (thumb) 3511 if (func_start_entry->addr != 3512 symbol_lookup_file_addr && 3513 func_start_entry->addr != 3514 (symbol_lookup_file_addr + 1)) { 3515 // Not the right entry, NULL it out... 3516 func_start_entry = NULL; 3517 } 3518 } 3519 if (func_start_entry) { 3520 func_start_entry->data = true; 3521 3522 addr_t symbol_file_addr = func_start_entry->addr; 3523 uint32_t symbol_flags = 0; 3524 if (is_arm) { 3525 if (symbol_file_addr & 1) 3526 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; 3527 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 3528 } 3529 3530 const FunctionStarts::Entry *next_func_start_entry = 3531 function_starts.FindNextEntry(func_start_entry); 3532 const addr_t section_end_file_addr = 3533 section_file_addr + 3534 symbol_section->GetByteSize(); 3535 if (next_func_start_entry) { 3536 addr_t next_symbol_file_addr = 3537 next_func_start_entry->addr; 3538 // Be sure the clear the Thumb address bit when 3539 // we calculate the size from the current and 3540 // next address 3541 if (is_arm) 3542 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 3543 symbol_byte_size = std::min<lldb::addr_t>( 3544 next_symbol_file_addr - symbol_file_addr, 3545 section_end_file_addr - symbol_file_addr); 3546 } else { 3547 symbol_byte_size = 3548 section_end_file_addr - symbol_file_addr; 3549 } 3550 } 3551 } 3552 symbol_value -= section_file_addr; 3553 } 3554 3555 if (is_debug == false) { 3556 if (type == eSymbolTypeCode) { 3557 // See if we can find a N_FUN entry for any code 3558 // symbols. If we do find a match, and the name 3559 // matches, then we can merge the two into just the 3560 // function symbol to avoid duplicate entries in 3561 // the symbol table 3562 auto range = 3563 N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); 3564 if (range.first != range.second) { 3565 bool found_it = false; 3566 for (auto pos = range.first; pos != range.second; 3567 ++pos) { 3568 if (sym[sym_idx].GetMangled().GetName( 3569 Mangled::ePreferMangled) == 3570 sym[pos->second].GetMangled().GetName( 3571 Mangled::ePreferMangled)) { 3572 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 3573 // We just need the flags from the linker 3574 // symbol, so put these flags 3575 // into the N_FUN flags to avoid duplicate 3576 // symbols in the symbol table 3577 sym[pos->second].SetExternal( 3578 sym[sym_idx].IsExternal()); 3579 sym[pos->second].SetFlags(nlist.n_type << 16 | 3580 nlist.n_desc); 3581 if (resolver_addresses.find(nlist.n_value) != 3582 resolver_addresses.end()) 3583 sym[pos->second].SetType(eSymbolTypeResolver); 3584 sym[sym_idx].Clear(); 3585 found_it = true; 3586 break; 3587 } 3588 } 3589 if (found_it) 3590 continue; 3591 } else { 3592 if (resolver_addresses.find(nlist.n_value) != 3593 resolver_addresses.end()) 3594 type = eSymbolTypeResolver; 3595 } 3596 } else if (type == eSymbolTypeData || 3597 type == eSymbolTypeObjCClass || 3598 type == eSymbolTypeObjCMetaClass || 3599 type == eSymbolTypeObjCIVar) { 3600 // See if we can find a N_STSYM entry for any data 3601 // symbols. If we do find a match, and the name 3602 // matches, then we can merge the two into just the 3603 // Static symbol to avoid duplicate entries in the 3604 // symbol table 3605 auto range = N_STSYM_addr_to_sym_idx.equal_range( 3606 nlist.n_value); 3607 if (range.first != range.second) { 3608 bool found_it = false; 3609 for (auto pos = range.first; pos != range.second; 3610 ++pos) { 3611 if (sym[sym_idx].GetMangled().GetName( 3612 Mangled::ePreferMangled) == 3613 sym[pos->second].GetMangled().GetName( 3614 Mangled::ePreferMangled)) { 3615 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 3616 // We just need the flags from the linker 3617 // symbol, so put these flags 3618 // into the N_STSYM flags to avoid duplicate 3619 // symbols in the symbol table 3620 sym[pos->second].SetExternal( 3621 sym[sym_idx].IsExternal()); 3622 sym[pos->second].SetFlags(nlist.n_type << 16 | 3623 nlist.n_desc); 3624 sym[sym_idx].Clear(); 3625 found_it = true; 3626 break; 3627 } 3628 } 3629 if (found_it) 3630 continue; 3631 } else { 3632 const char *gsym_name = 3633 sym[sym_idx] 3634 .GetMangled() 3635 .GetName(Mangled::ePreferMangled) 3636 .GetCString(); 3637 if (gsym_name) { 3638 // Combine N_GSYM stab entries with the non 3639 // stab symbol 3640 ConstNameToSymbolIndexMap::const_iterator pos = 3641 N_GSYM_name_to_sym_idx.find(gsym_name); 3642 if (pos != N_GSYM_name_to_sym_idx.end()) { 3643 const uint32_t GSYM_sym_idx = pos->second; 3644 m_nlist_idx_to_sym_idx[nlist_idx] = 3645 GSYM_sym_idx; 3646 // Copy the address, because often the N_GSYM 3647 // address has an invalid address of zero 3648 // when the global is a common symbol 3649 sym[GSYM_sym_idx].GetAddressRef().SetSection( 3650 symbol_section); 3651 sym[GSYM_sym_idx].GetAddressRef().SetOffset( 3652 symbol_value); 3653 add_symbol_addr(sym[GSYM_sym_idx] 3654 .GetAddress() 3655 .GetFileAddress()); 3656 // We just need the flags from the linker 3657 // symbol, so put these flags 3658 // into the N_GSYM flags to avoid duplicate 3659 // symbols in the symbol table 3660 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | 3661 nlist.n_desc); 3662 sym[sym_idx].Clear(); 3663 continue; 3664 } 3665 } 3666 } 3667 } 3668 } 3669 3670 sym[sym_idx].SetID(nlist_idx); 3671 sym[sym_idx].SetType(type); 3672 if (set_value) { 3673 sym[sym_idx].GetAddressRef().SetSection(symbol_section); 3674 sym[sym_idx].GetAddressRef().SetOffset(symbol_value); 3675 add_symbol_addr( 3676 sym[sym_idx].GetAddress().GetFileAddress()); 3677 } 3678 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 3679 3680 if (symbol_byte_size > 0) 3681 sym[sym_idx].SetByteSize(symbol_byte_size); 3682 3683 if (demangled_is_synthesized) 3684 sym[sym_idx].SetDemangledNameIsSynthesized(true); 3685 ++sym_idx; 3686 } else { 3687 sym[sym_idx].Clear(); 3688 } 3689 } 3690 ///////////////////////////// 3691 } 3692 } 3693 3694 for (const auto &pos : reexport_shlib_needs_fixup) { 3695 const auto undef_pos = undefined_name_to_desc.find(pos.second); 3696 if (undef_pos != undefined_name_to_desc.end()) { 3697 const uint8_t dylib_ordinal = 3698 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second); 3699 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize()) 3700 sym[pos.first].SetReExportedSymbolSharedLibrary( 3701 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1)); 3702 } 3703 } 3704 } 3705 3706 #endif 3707 lldb::offset_t nlist_data_offset = 0; 3708 3709 if (nlist_data.GetByteSize() > 0) { 3710 3711 // If the sym array was not created while parsing the DSC unmapped 3712 // symbols, create it now. 3713 if (sym == nullptr) { 3714 sym = 3715 symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms); 3716 num_syms = symtab.GetNumSymbols(); 3717 } 3718 3719 if (unmapped_local_symbols_found) { 3720 assert(m_dysymtab.ilocalsym == 0); 3721 nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size); 3722 nlist_idx = m_dysymtab.nlocalsym; 3723 } else { 3724 nlist_idx = 0; 3725 } 3726 3727 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap; 3728 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName; 3729 UndefinedNameToDescMap undefined_name_to_desc; 3730 SymbolIndexToName reexport_shlib_needs_fixup; 3731 3732 // Symtab parsing is a huge mess. Everything is entangled and the code 3733 // requires access to a ridiculous amount of variables. LLDB depends 3734 // heavily on the proper merging of symbols and to get that right we need 3735 // to make sure we have parsed all the debug symbols first. Therefore we 3736 // invoke the lambda twice, once to parse only the debug symbols and then 3737 // once more to parse the remaining symbols. 3738 auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx, 3739 bool debug_only) { 3740 const bool is_debug = ((nlist.n_type & N_STAB) != 0); 3741 if (is_debug != debug_only) 3742 return true; 3743 3744 const char *symbol_name_non_abi_mangled = nullptr; 3745 const char *symbol_name = nullptr; 3746 3747 if (have_strtab_data) { 3748 symbol_name = strtab_data.PeekCStr(nlist.n_strx); 3749 3750 if (symbol_name == nullptr) { 3751 // No symbol should be NULL, even the symbols with no string values 3752 // should have an offset zero which points to an empty C-string 3753 Debugger::ReportError(llvm::formatv( 3754 "symbol[{0}] has invalid string table offset {1:x} in {2}, " 3755 "ignoring symbol", 3756 nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath())); 3757 return true; 3758 } 3759 if (symbol_name[0] == '\0') 3760 symbol_name = nullptr; 3761 } else { 3762 const addr_t str_addr = strtab_addr + nlist.n_strx; 3763 Status str_error; 3764 if (process->ReadCStringFromMemory(str_addr, memory_symbol_name, 3765 str_error)) 3766 symbol_name = memory_symbol_name.c_str(); 3767 } 3768 3769 SymbolType type = eSymbolTypeInvalid; 3770 SectionSP symbol_section; 3771 bool add_nlist = true; 3772 bool is_gsym = false; 3773 bool demangled_is_synthesized = false; 3774 bool set_value = true; 3775 3776 assert(sym_idx < num_syms); 3777 sym[sym_idx].SetDebug(is_debug); 3778 3779 if (is_debug) { 3780 switch (nlist.n_type) { 3781 case N_GSYM: 3782 // global symbol: name,,NO_SECT,type,0 3783 // Sometimes the N_GSYM value contains the address. 3784 3785 // FIXME: In the .o files, we have a GSYM and a debug symbol for all 3786 // the ObjC data. They 3787 // have the same address, but we want to ensure that we always find 3788 // only the real symbol, 'cause we don't currently correctly 3789 // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol 3790 // type. This is a temporary hack to make sure the ObjectiveC 3791 // symbols get treated correctly. To do this right, we should 3792 // coalesce all the GSYM & global symbols that have the same 3793 // address. 3794 is_gsym = true; 3795 sym[sym_idx].SetExternal(true); 3796 3797 if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') { 3798 llvm::StringRef symbol_name_ref(symbol_name); 3799 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 3800 symbol_name_non_abi_mangled = symbol_name + 1; 3801 symbol_name = symbol_name + g_objc_v2_prefix_class.size(); 3802 type = eSymbolTypeObjCClass; 3803 demangled_is_synthesized = true; 3804 3805 } else if (symbol_name_ref.starts_with( 3806 g_objc_v2_prefix_metaclass)) { 3807 symbol_name_non_abi_mangled = symbol_name + 1; 3808 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size(); 3809 type = eSymbolTypeObjCMetaClass; 3810 demangled_is_synthesized = true; 3811 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) { 3812 symbol_name_non_abi_mangled = symbol_name + 1; 3813 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size(); 3814 type = eSymbolTypeObjCIVar; 3815 demangled_is_synthesized = true; 3816 } 3817 } else { 3818 if (nlist.n_value != 0) 3819 symbol_section = 3820 section_info.GetSection(nlist.n_sect, nlist.n_value); 3821 type = eSymbolTypeData; 3822 } 3823 break; 3824 3825 case N_FNAME: 3826 // procedure name (f77 kludge): name,,NO_SECT,0,0 3827 type = eSymbolTypeCompiler; 3828 break; 3829 3830 case N_FUN: 3831 // procedure: name,,n_sect,linenumber,address 3832 if (symbol_name) { 3833 type = eSymbolTypeCode; 3834 symbol_section = 3835 section_info.GetSection(nlist.n_sect, nlist.n_value); 3836 3837 N_FUN_addr_to_sym_idx.insert( 3838 std::make_pair(nlist.n_value, sym_idx)); 3839 // We use the current number of symbols in the symbol table in 3840 // lieu of using nlist_idx in case we ever start trimming entries 3841 // out 3842 N_FUN_indexes.push_back(sym_idx); 3843 } else { 3844 type = eSymbolTypeCompiler; 3845 3846 if (!N_FUN_indexes.empty()) { 3847 // Copy the size of the function into the original STAB entry 3848 // so we don't have to hunt for it later 3849 symtab.SymbolAtIndex(N_FUN_indexes.back()) 3850 ->SetByteSize(nlist.n_value); 3851 N_FUN_indexes.pop_back(); 3852 // We don't really need the end function STAB as it contains 3853 // the size which we already placed with the original symbol, 3854 // so don't add it if we want a minimal symbol table 3855 add_nlist = false; 3856 } 3857 } 3858 break; 3859 3860 case N_STSYM: 3861 // static symbol: name,,n_sect,type,address 3862 N_STSYM_addr_to_sym_idx.insert( 3863 std::make_pair(nlist.n_value, sym_idx)); 3864 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3865 if (symbol_name && symbol_name[0]) { 3866 type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1, 3867 eSymbolTypeData); 3868 } 3869 break; 3870 3871 case N_LCSYM: 3872 // .lcomm symbol: name,,n_sect,type,address 3873 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3874 type = eSymbolTypeCommonBlock; 3875 break; 3876 3877 case N_BNSYM: 3878 // We use the current number of symbols in the symbol table in lieu 3879 // of using nlist_idx in case we ever start trimming entries out 3880 // Skip these if we want minimal symbol tables 3881 add_nlist = false; 3882 break; 3883 3884 case N_ENSYM: 3885 // Set the size of the N_BNSYM to the terminating index of this 3886 // N_ENSYM so that we can always skip the entire symbol if we need 3887 // to navigate more quickly at the source level when parsing STABS 3888 // Skip these if we want minimal symbol tables 3889 add_nlist = false; 3890 break; 3891 3892 case N_OPT: 3893 // emitted with gcc2_compiled and in gcc source 3894 type = eSymbolTypeCompiler; 3895 break; 3896 3897 case N_RSYM: 3898 // register sym: name,,NO_SECT,type,register 3899 type = eSymbolTypeVariable; 3900 break; 3901 3902 case N_SLINE: 3903 // src line: 0,,n_sect,linenumber,address 3904 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3905 type = eSymbolTypeLineEntry; 3906 break; 3907 3908 case N_SSYM: 3909 // structure elt: name,,NO_SECT,type,struct_offset 3910 type = eSymbolTypeVariableType; 3911 break; 3912 3913 case N_SO: 3914 // source file name 3915 type = eSymbolTypeSourceFile; 3916 if (symbol_name == nullptr) { 3917 add_nlist = false; 3918 if (N_SO_index != UINT32_MAX) { 3919 // Set the size of the N_SO to the terminating index of this 3920 // N_SO so that we can always skip the entire N_SO if we need 3921 // to navigate more quickly at the source level when parsing 3922 // STABS 3923 symbol_ptr = symtab.SymbolAtIndex(N_SO_index); 3924 symbol_ptr->SetByteSize(sym_idx); 3925 symbol_ptr->SetSizeIsSibling(true); 3926 } 3927 N_NSYM_indexes.clear(); 3928 N_INCL_indexes.clear(); 3929 N_BRAC_indexes.clear(); 3930 N_COMM_indexes.clear(); 3931 N_FUN_indexes.clear(); 3932 N_SO_index = UINT32_MAX; 3933 } else { 3934 // We use the current number of symbols in the symbol table in 3935 // lieu of using nlist_idx in case we ever start trimming entries 3936 // out 3937 const bool N_SO_has_full_path = symbol_name[0] == '/'; 3938 if (N_SO_has_full_path) { 3939 if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) { 3940 // We have two consecutive N_SO entries where the first 3941 // contains a directory and the second contains a full path. 3942 sym[sym_idx - 1].GetMangled().SetValue( 3943 ConstString(symbol_name)); 3944 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3945 add_nlist = false; 3946 } else { 3947 // This is the first entry in a N_SO that contains a 3948 // directory or a full path to the source file 3949 N_SO_index = sym_idx; 3950 } 3951 } else if ((N_SO_index == sym_idx - 1) && 3952 ((sym_idx - 1) < num_syms)) { 3953 // This is usually the second N_SO entry that contains just the 3954 // filename, so here we combine it with the first one if we are 3955 // minimizing the symbol table 3956 const char *so_path = 3957 sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString(); 3958 if (so_path && so_path[0]) { 3959 std::string full_so_path(so_path); 3960 const size_t double_slash_pos = full_so_path.find("//"); 3961 if (double_slash_pos != std::string::npos) { 3962 // The linker has been generating bad N_SO entries with 3963 // doubled up paths in the format "%s%s" where the first 3964 // string in the DW_AT_comp_dir, and the second is the 3965 // directory for the source file so you end up with a path 3966 // that looks like "/tmp/src//tmp/src/" 3967 FileSpec so_dir(so_path); 3968 if (!FileSystem::Instance().Exists(so_dir)) { 3969 so_dir.SetFile(&full_so_path[double_slash_pos + 1], 3970 FileSpec::Style::native); 3971 if (FileSystem::Instance().Exists(so_dir)) { 3972 // Trim off the incorrect path 3973 full_so_path.erase(0, double_slash_pos + 1); 3974 } 3975 } 3976 } 3977 if (*full_so_path.rbegin() != '/') 3978 full_so_path += '/'; 3979 full_so_path += symbol_name; 3980 sym[sym_idx - 1].GetMangled().SetValue( 3981 ConstString(full_so_path.c_str())); 3982 add_nlist = false; 3983 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3984 } 3985 } else { 3986 // This could be a relative path to a N_SO 3987 N_SO_index = sym_idx; 3988 } 3989 } 3990 break; 3991 3992 case N_OSO: 3993 // object file name: name,,0,0,st_mtime 3994 type = eSymbolTypeObjectFile; 3995 break; 3996 3997 case N_LSYM: 3998 // local sym: name,,NO_SECT,type,offset 3999 type = eSymbolTypeLocal; 4000 break; 4001 4002 // INCL scopes 4003 case N_BINCL: 4004 // include file beginning: name,,NO_SECT,0,sum We use the current 4005 // number of symbols in the symbol table in lieu of using nlist_idx 4006 // in case we ever start trimming entries out 4007 N_INCL_indexes.push_back(sym_idx); 4008 type = eSymbolTypeScopeBegin; 4009 break; 4010 4011 case N_EINCL: 4012 // include file end: name,,NO_SECT,0,0 4013 // Set the size of the N_BINCL to the terminating index of this 4014 // N_EINCL so that we can always skip the entire symbol if we need 4015 // to navigate more quickly at the source level when parsing STABS 4016 if (!N_INCL_indexes.empty()) { 4017 symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back()); 4018 symbol_ptr->SetByteSize(sym_idx + 1); 4019 symbol_ptr->SetSizeIsSibling(true); 4020 N_INCL_indexes.pop_back(); 4021 } 4022 type = eSymbolTypeScopeEnd; 4023 break; 4024 4025 case N_SOL: 4026 // #included file name: name,,n_sect,0,address 4027 type = eSymbolTypeHeaderFile; 4028 4029 // We currently don't use the header files on darwin 4030 add_nlist = false; 4031 break; 4032 4033 case N_PARAMS: 4034 // compiler parameters: name,,NO_SECT,0,0 4035 type = eSymbolTypeCompiler; 4036 break; 4037 4038 case N_VERSION: 4039 // compiler version: name,,NO_SECT,0,0 4040 type = eSymbolTypeCompiler; 4041 break; 4042 4043 case N_OLEVEL: 4044 // compiler -O level: name,,NO_SECT,0,0 4045 type = eSymbolTypeCompiler; 4046 break; 4047 4048 case N_PSYM: 4049 // parameter: name,,NO_SECT,type,offset 4050 type = eSymbolTypeVariable; 4051 break; 4052 4053 case N_ENTRY: 4054 // alternate entry: name,,n_sect,linenumber,address 4055 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4056 type = eSymbolTypeLineEntry; 4057 break; 4058 4059 // Left and Right Braces 4060 case N_LBRAC: 4061 // left bracket: 0,,NO_SECT,nesting level,address We use the 4062 // current number of symbols in the symbol table in lieu of using 4063 // nlist_idx in case we ever start trimming entries out 4064 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4065 N_BRAC_indexes.push_back(sym_idx); 4066 type = eSymbolTypeScopeBegin; 4067 break; 4068 4069 case N_RBRAC: 4070 // right bracket: 0,,NO_SECT,nesting level,address Set the size of 4071 // the N_LBRAC to the terminating index of this N_RBRAC so that we 4072 // can always skip the entire symbol if we need to navigate more 4073 // quickly at the source level when parsing STABS 4074 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4075 if (!N_BRAC_indexes.empty()) { 4076 symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back()); 4077 symbol_ptr->SetByteSize(sym_idx + 1); 4078 symbol_ptr->SetSizeIsSibling(true); 4079 N_BRAC_indexes.pop_back(); 4080 } 4081 type = eSymbolTypeScopeEnd; 4082 break; 4083 4084 case N_EXCL: 4085 // deleted include file: name,,NO_SECT,0,sum 4086 type = eSymbolTypeHeaderFile; 4087 break; 4088 4089 // COMM scopes 4090 case N_BCOMM: 4091 // begin common: name,,NO_SECT,0,0 4092 // We use the current number of symbols in the symbol table in lieu 4093 // of using nlist_idx in case we ever start trimming entries out 4094 type = eSymbolTypeScopeBegin; 4095 N_COMM_indexes.push_back(sym_idx); 4096 break; 4097 4098 case N_ECOML: 4099 // end common (local name): 0,,n_sect,0,address 4100 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4101 [[fallthrough]]; 4102 4103 case N_ECOMM: 4104 // end common: name,,n_sect,0,0 4105 // Set the size of the N_BCOMM to the terminating index of this 4106 // N_ECOMM/N_ECOML so that we can always skip the entire symbol if 4107 // we need to navigate more quickly at the source level when 4108 // parsing STABS 4109 if (!N_COMM_indexes.empty()) { 4110 symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back()); 4111 symbol_ptr->SetByteSize(sym_idx + 1); 4112 symbol_ptr->SetSizeIsSibling(true); 4113 N_COMM_indexes.pop_back(); 4114 } 4115 type = eSymbolTypeScopeEnd; 4116 break; 4117 4118 case N_LENG: 4119 // second stab entry with length information 4120 type = eSymbolTypeAdditional; 4121 break; 4122 4123 default: 4124 break; 4125 } 4126 } else { 4127 uint8_t n_type = N_TYPE & nlist.n_type; 4128 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0); 4129 4130 switch (n_type) { 4131 case N_INDR: { 4132 const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value); 4133 if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) { 4134 type = eSymbolTypeReExported; 4135 ConstString reexport_name(reexport_name_cstr + 4136 ((reexport_name_cstr[0] == '_') ? 1 : 0)); 4137 sym[sym_idx].SetReExportedSymbolName(reexport_name); 4138 set_value = false; 4139 reexport_shlib_needs_fixup[sym_idx] = reexport_name; 4140 indirect_symbol_names.insert( 4141 ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0))); 4142 } else 4143 type = eSymbolTypeUndefined; 4144 } break; 4145 4146 case N_UNDF: 4147 if (symbol_name && symbol_name[0]) { 4148 ConstString undefined_name(symbol_name + 4149 ((symbol_name[0] == '_') ? 1 : 0)); 4150 undefined_name_to_desc[undefined_name] = nlist.n_desc; 4151 } 4152 [[fallthrough]]; 4153 4154 case N_PBUD: 4155 type = eSymbolTypeUndefined; 4156 break; 4157 4158 case N_ABS: 4159 type = eSymbolTypeAbsolute; 4160 break; 4161 4162 case N_SECT: { 4163 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4164 4165 if (!symbol_section) { 4166 // TODO: warn about this? 4167 add_nlist = false; 4168 break; 4169 } 4170 4171 if (TEXT_eh_frame_sectID == nlist.n_sect) { 4172 type = eSymbolTypeException; 4173 } else { 4174 uint32_t section_type = symbol_section->Get() & SECTION_TYPE; 4175 4176 switch (section_type) { 4177 case S_CSTRING_LITERALS: 4178 type = eSymbolTypeData; 4179 break; // section with only literal C strings 4180 case S_4BYTE_LITERALS: 4181 type = eSymbolTypeData; 4182 break; // section with only 4 byte literals 4183 case S_8BYTE_LITERALS: 4184 type = eSymbolTypeData; 4185 break; // section with only 8 byte literals 4186 case S_LITERAL_POINTERS: 4187 type = eSymbolTypeTrampoline; 4188 break; // section with only pointers to literals 4189 case S_NON_LAZY_SYMBOL_POINTERS: 4190 type = eSymbolTypeTrampoline; 4191 break; // section with only non-lazy symbol pointers 4192 case S_LAZY_SYMBOL_POINTERS: 4193 type = eSymbolTypeTrampoline; 4194 break; // section with only lazy symbol pointers 4195 case S_SYMBOL_STUBS: 4196 type = eSymbolTypeTrampoline; 4197 break; // section with only symbol stubs, byte size of stub in 4198 // the reserved2 field 4199 case S_MOD_INIT_FUNC_POINTERS: 4200 type = eSymbolTypeCode; 4201 break; // section with only function pointers for initialization 4202 case S_MOD_TERM_FUNC_POINTERS: 4203 type = eSymbolTypeCode; 4204 break; // section with only function pointers for termination 4205 case S_INTERPOSING: 4206 type = eSymbolTypeTrampoline; 4207 break; // section with only pairs of function pointers for 4208 // interposing 4209 case S_16BYTE_LITERALS: 4210 type = eSymbolTypeData; 4211 break; // section with only 16 byte literals 4212 case S_DTRACE_DOF: 4213 type = eSymbolTypeInstrumentation; 4214 break; 4215 case S_LAZY_DYLIB_SYMBOL_POINTERS: 4216 type = eSymbolTypeTrampoline; 4217 break; 4218 default: 4219 switch (symbol_section->GetType()) { 4220 case lldb::eSectionTypeCode: 4221 type = eSymbolTypeCode; 4222 break; 4223 case eSectionTypeData: 4224 case eSectionTypeDataCString: // Inlined C string data 4225 case eSectionTypeDataCStringPointers: // Pointers to C string 4226 // data 4227 case eSectionTypeDataSymbolAddress: // Address of a symbol in 4228 // the symbol table 4229 case eSectionTypeData4: 4230 case eSectionTypeData8: 4231 case eSectionTypeData16: 4232 type = eSymbolTypeData; 4233 break; 4234 default: 4235 break; 4236 } 4237 break; 4238 } 4239 4240 if (type == eSymbolTypeInvalid) { 4241 const char *symbol_sect_name = 4242 symbol_section->GetName().AsCString(); 4243 if (symbol_section->IsDescendant(text_section_sp.get())) { 4244 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS | 4245 S_ATTR_SELF_MODIFYING_CODE | 4246 S_ATTR_SOME_INSTRUCTIONS)) 4247 type = eSymbolTypeData; 4248 else 4249 type = eSymbolTypeCode; 4250 } else if (symbol_section->IsDescendant(data_section_sp.get()) || 4251 symbol_section->IsDescendant( 4252 data_dirty_section_sp.get()) || 4253 symbol_section->IsDescendant( 4254 data_const_section_sp.get())) { 4255 if (symbol_sect_name && 4256 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) { 4257 type = eSymbolTypeRuntime; 4258 4259 if (symbol_name) { 4260 llvm::StringRef symbol_name_ref(symbol_name); 4261 if (symbol_name_ref.starts_with("_OBJC_")) { 4262 llvm::StringRef g_objc_v2_prefix_class( 4263 "_OBJC_CLASS_$_"); 4264 llvm::StringRef g_objc_v2_prefix_metaclass( 4265 "_OBJC_METACLASS_$_"); 4266 llvm::StringRef g_objc_v2_prefix_ivar( 4267 "_OBJC_IVAR_$_"); 4268 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 4269 symbol_name_non_abi_mangled = symbol_name + 1; 4270 symbol_name = 4271 symbol_name + g_objc_v2_prefix_class.size(); 4272 type = eSymbolTypeObjCClass; 4273 demangled_is_synthesized = true; 4274 } else if (symbol_name_ref.starts_with( 4275 g_objc_v2_prefix_metaclass)) { 4276 symbol_name_non_abi_mangled = symbol_name + 1; 4277 symbol_name = 4278 symbol_name + g_objc_v2_prefix_metaclass.size(); 4279 type = eSymbolTypeObjCMetaClass; 4280 demangled_is_synthesized = true; 4281 } else if (symbol_name_ref.starts_with( 4282 g_objc_v2_prefix_ivar)) { 4283 symbol_name_non_abi_mangled = symbol_name + 1; 4284 symbol_name = 4285 symbol_name + g_objc_v2_prefix_ivar.size(); 4286 type = eSymbolTypeObjCIVar; 4287 demangled_is_synthesized = true; 4288 } 4289 } 4290 } 4291 } else if (symbol_sect_name && 4292 ::strstr(symbol_sect_name, "__gcc_except_tab") == 4293 symbol_sect_name) { 4294 type = eSymbolTypeException; 4295 } else { 4296 type = eSymbolTypeData; 4297 } 4298 } else if (symbol_sect_name && 4299 ::strstr(symbol_sect_name, "__IMPORT") == 4300 symbol_sect_name) { 4301 type = eSymbolTypeTrampoline; 4302 } else if (symbol_section->IsDescendant(objc_section_sp.get())) { 4303 type = eSymbolTypeRuntime; 4304 if (symbol_name && symbol_name[0] == '.') { 4305 llvm::StringRef symbol_name_ref(symbol_name); 4306 llvm::StringRef g_objc_v1_prefix_class( 4307 ".objc_class_name_"); 4308 if (symbol_name_ref.starts_with(g_objc_v1_prefix_class)) { 4309 symbol_name_non_abi_mangled = symbol_name; 4310 symbol_name = symbol_name + g_objc_v1_prefix_class.size(); 4311 type = eSymbolTypeObjCClass; 4312 demangled_is_synthesized = true; 4313 } 4314 } 4315 } 4316 } 4317 } 4318 } break; 4319 } 4320 } 4321 4322 if (!add_nlist) { 4323 sym[sym_idx].Clear(); 4324 return true; 4325 } 4326 4327 uint64_t symbol_value = nlist.n_value; 4328 4329 if (symbol_name_non_abi_mangled) { 4330 sym[sym_idx].GetMangled().SetMangledName( 4331 ConstString(symbol_name_non_abi_mangled)); 4332 sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name)); 4333 } else { 4334 4335 if (symbol_name && symbol_name[0] == '_') { 4336 symbol_name++; // Skip the leading underscore 4337 } 4338 4339 if (symbol_name) { 4340 ConstString const_symbol_name(symbol_name); 4341 sym[sym_idx].GetMangled().SetValue(const_symbol_name); 4342 } 4343 } 4344 4345 if (is_gsym) { 4346 const char *gsym_name = sym[sym_idx] 4347 .GetMangled() 4348 .GetName(Mangled::ePreferMangled) 4349 .GetCString(); 4350 if (gsym_name) 4351 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; 4352 } 4353 4354 if (symbol_section) { 4355 const addr_t section_file_addr = symbol_section->GetFileAddress(); 4356 symbol_value -= section_file_addr; 4357 } 4358 4359 if (!is_debug) { 4360 if (type == eSymbolTypeCode) { 4361 // See if we can find a N_FUN entry for any code symbols. If we do 4362 // find a match, and the name matches, then we can merge the two into 4363 // just the function symbol to avoid duplicate entries in the symbol 4364 // table. 4365 std::pair<ValueToSymbolIndexMap::const_iterator, 4366 ValueToSymbolIndexMap::const_iterator> 4367 range; 4368 range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); 4369 if (range.first != range.second) { 4370 for (ValueToSymbolIndexMap::const_iterator pos = range.first; 4371 pos != range.second; ++pos) { 4372 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == 4373 sym[pos->second].GetMangled().GetName( 4374 Mangled::ePreferMangled)) { 4375 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 4376 // We just need the flags from the linker symbol, so put these 4377 // flags into the N_FUN flags to avoid duplicate symbols in the 4378 // symbol table. 4379 sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); 4380 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4381 if (resolver_addresses.find(nlist.n_value) != 4382 resolver_addresses.end()) 4383 sym[pos->second].SetType(eSymbolTypeResolver); 4384 sym[sym_idx].Clear(); 4385 return true; 4386 } 4387 } 4388 } else { 4389 if (resolver_addresses.find(nlist.n_value) != 4390 resolver_addresses.end()) 4391 type = eSymbolTypeResolver; 4392 } 4393 } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass || 4394 type == eSymbolTypeObjCMetaClass || 4395 type == eSymbolTypeObjCIVar) { 4396 // See if we can find a N_STSYM entry for any data symbols. If we do 4397 // find a match, and the name matches, then we can merge the two into 4398 // just the Static symbol to avoid duplicate entries in the symbol 4399 // table. 4400 std::pair<ValueToSymbolIndexMap::const_iterator, 4401 ValueToSymbolIndexMap::const_iterator> 4402 range; 4403 range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value); 4404 if (range.first != range.second) { 4405 for (ValueToSymbolIndexMap::const_iterator pos = range.first; 4406 pos != range.second; ++pos) { 4407 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == 4408 sym[pos->second].GetMangled().GetName( 4409 Mangled::ePreferMangled)) { 4410 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 4411 // We just need the flags from the linker symbol, so put these 4412 // flags into the N_STSYM flags to avoid duplicate symbols in 4413 // the symbol table. 4414 sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); 4415 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4416 sym[sym_idx].Clear(); 4417 return true; 4418 } 4419 } 4420 } else { 4421 // Combine N_GSYM stab entries with the non stab symbol. 4422 const char *gsym_name = sym[sym_idx] 4423 .GetMangled() 4424 .GetName(Mangled::ePreferMangled) 4425 .GetCString(); 4426 if (gsym_name) { 4427 ConstNameToSymbolIndexMap::const_iterator pos = 4428 N_GSYM_name_to_sym_idx.find(gsym_name); 4429 if (pos != N_GSYM_name_to_sym_idx.end()) { 4430 const uint32_t GSYM_sym_idx = pos->second; 4431 m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx; 4432 // Copy the address, because often the N_GSYM address has an 4433 // invalid address of zero when the global is a common symbol. 4434 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section); 4435 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value); 4436 add_symbol_addr( 4437 sym[GSYM_sym_idx].GetAddress().GetFileAddress()); 4438 // We just need the flags from the linker symbol, so put these 4439 // flags into the N_GSYM flags to avoid duplicate symbols in 4440 // the symbol table. 4441 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4442 sym[sym_idx].Clear(); 4443 return true; 4444 } 4445 } 4446 } 4447 } 4448 } 4449 4450 sym[sym_idx].SetID(nlist_idx); 4451 sym[sym_idx].SetType(type); 4452 if (set_value) { 4453 sym[sym_idx].GetAddressRef().SetSection(symbol_section); 4454 sym[sym_idx].GetAddressRef().SetOffset(symbol_value); 4455 if (symbol_section) 4456 add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress()); 4457 } 4458 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4459 if (nlist.n_desc & N_WEAK_REF) 4460 sym[sym_idx].SetIsWeak(true); 4461 4462 if (demangled_is_synthesized) 4463 sym[sym_idx].SetDemangledNameIsSynthesized(true); 4464 4465 ++sym_idx; 4466 return true; 4467 }; 4468 4469 // First parse all the nlists but don't process them yet. See the next 4470 // comment for an explanation why. 4471 std::vector<struct nlist_64> nlists; 4472 nlists.reserve(symtab_load_command.nsyms); 4473 for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) { 4474 if (auto nlist = 4475 ParseNList(nlist_data, nlist_data_offset, nlist_byte_size)) 4476 nlists.push_back(*nlist); 4477 else 4478 break; 4479 } 4480 4481 // Now parse all the debug symbols. This is needed to merge non-debug 4482 // symbols in the next step. Non-debug symbols are always coalesced into 4483 // the debug symbol. Doing this in one step would mean that some symbols 4484 // won't be merged. 4485 nlist_idx = 0; 4486 for (auto &nlist : nlists) { 4487 if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols)) 4488 break; 4489 } 4490 4491 // Finally parse all the non debug symbols. 4492 nlist_idx = 0; 4493 for (auto &nlist : nlists) { 4494 if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols)) 4495 break; 4496 } 4497 4498 for (const auto &pos : reexport_shlib_needs_fixup) { 4499 const auto undef_pos = undefined_name_to_desc.find(pos.second); 4500 if (undef_pos != undefined_name_to_desc.end()) { 4501 const uint8_t dylib_ordinal = 4502 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second); 4503 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize()) 4504 sym[pos.first].SetReExportedSymbolSharedLibrary( 4505 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1)); 4506 } 4507 } 4508 } 4509 4510 // Count how many trie symbols we'll add to the symbol table 4511 int trie_symbol_table_augment_count = 0; 4512 for (auto &e : external_sym_trie_entries) { 4513 if (!symbols_added.contains(e.entry.address)) 4514 trie_symbol_table_augment_count++; 4515 } 4516 4517 if (num_syms < sym_idx + trie_symbol_table_augment_count) { 4518 num_syms = sym_idx + trie_symbol_table_augment_count; 4519 sym = symtab.Resize(num_syms); 4520 } 4521 uint32_t synthetic_sym_id = symtab_load_command.nsyms; 4522 4523 // Add symbols from the trie to the symbol table. 4524 for (auto &e : external_sym_trie_entries) { 4525 if (symbols_added.contains(e.entry.address)) 4526 continue; 4527 4528 // Find the section that this trie address is in, use that to annotate 4529 // symbol type as we add the trie address and name to the symbol table. 4530 Address symbol_addr; 4531 if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) { 4532 SectionSP symbol_section(symbol_addr.GetSection()); 4533 const char *symbol_name = e.entry.name.GetCString(); 4534 bool demangled_is_synthesized = false; 4535 SymbolType type = 4536 GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp, 4537 data_section_sp, data_dirty_section_sp, 4538 data_const_section_sp, symbol_section); 4539 4540 sym[sym_idx].SetType(type); 4541 if (symbol_section) { 4542 sym[sym_idx].SetID(synthetic_sym_id++); 4543 sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name)); 4544 if (demangled_is_synthesized) 4545 sym[sym_idx].SetDemangledNameIsSynthesized(true); 4546 sym[sym_idx].SetIsSynthetic(true); 4547 sym[sym_idx].SetExternal(true); 4548 sym[sym_idx].GetAddressRef() = symbol_addr; 4549 add_symbol_addr(symbol_addr.GetFileAddress()); 4550 if (e.entry.flags & TRIE_SYMBOL_IS_THUMB) 4551 sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB); 4552 ++sym_idx; 4553 } 4554 } 4555 } 4556 4557 if (function_starts_count > 0) { 4558 uint32_t num_synthetic_function_symbols = 0; 4559 for (i = 0; i < function_starts_count; ++i) { 4560 if (!symbols_added.contains(function_starts.GetEntryRef(i).addr)) 4561 ++num_synthetic_function_symbols; 4562 } 4563 4564 if (num_synthetic_function_symbols > 0) { 4565 if (num_syms < sym_idx + num_synthetic_function_symbols) { 4566 num_syms = sym_idx + num_synthetic_function_symbols; 4567 sym = symtab.Resize(num_syms); 4568 } 4569 for (i = 0; i < function_starts_count; ++i) { 4570 const FunctionStarts::Entry *func_start_entry = 4571 function_starts.GetEntryAtIndex(i); 4572 if (!symbols_added.contains(func_start_entry->addr)) { 4573 addr_t symbol_file_addr = func_start_entry->addr; 4574 uint32_t symbol_flags = 0; 4575 if (func_start_entry->data) 4576 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; 4577 Address symbol_addr; 4578 if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) { 4579 SectionSP symbol_section(symbol_addr.GetSection()); 4580 if (symbol_section) { 4581 sym[sym_idx].SetID(synthetic_sym_id++); 4582 // Don't set the name for any synthetic symbols, the Symbol 4583 // object will generate one if needed when the name is accessed 4584 // via accessors. 4585 sym[sym_idx].GetMangled().SetDemangledName(ConstString()); 4586 sym[sym_idx].SetType(eSymbolTypeCode); 4587 sym[sym_idx].SetIsSynthetic(true); 4588 sym[sym_idx].GetAddressRef() = symbol_addr; 4589 add_symbol_addr(symbol_addr.GetFileAddress()); 4590 if (symbol_flags) 4591 sym[sym_idx].SetFlags(symbol_flags); 4592 ++sym_idx; 4593 } 4594 } 4595 } 4596 } 4597 } 4598 } 4599 4600 // Trim our symbols down to just what we ended up with after removing any 4601 // symbols. 4602 if (sym_idx < num_syms) { 4603 num_syms = sym_idx; 4604 sym = symtab.Resize(num_syms); 4605 } 4606 4607 // Now synthesize indirect symbols 4608 if (m_dysymtab.nindirectsyms != 0) { 4609 if (indirect_symbol_index_data.GetByteSize()) { 4610 NListIndexToSymbolIndexMap::const_iterator end_index_pos = 4611 m_nlist_idx_to_sym_idx.end(); 4612 4613 for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size(); 4614 ++sect_idx) { 4615 if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) == 4616 S_SYMBOL_STUBS) { 4617 uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2; 4618 if (symbol_stub_byte_size == 0) 4619 continue; 4620 4621 const uint32_t num_symbol_stubs = 4622 m_mach_sections[sect_idx].size / symbol_stub_byte_size; 4623 4624 if (num_symbol_stubs == 0) 4625 continue; 4626 4627 const uint32_t symbol_stub_index_offset = 4628 m_mach_sections[sect_idx].reserved1; 4629 for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) { 4630 const uint32_t symbol_stub_index = 4631 symbol_stub_index_offset + stub_idx; 4632 const lldb::addr_t symbol_stub_addr = 4633 m_mach_sections[sect_idx].addr + 4634 (stub_idx * symbol_stub_byte_size); 4635 lldb::offset_t symbol_stub_offset = symbol_stub_index * 4; 4636 if (indirect_symbol_index_data.ValidOffsetForDataOfSize( 4637 symbol_stub_offset, 4)) { 4638 const uint32_t stub_sym_id = 4639 indirect_symbol_index_data.GetU32(&symbol_stub_offset); 4640 if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL)) 4641 continue; 4642 4643 NListIndexToSymbolIndexMap::const_iterator index_pos = 4644 m_nlist_idx_to_sym_idx.find(stub_sym_id); 4645 Symbol *stub_symbol = nullptr; 4646 if (index_pos != end_index_pos) { 4647 // We have a remapping from the original nlist index to a 4648 // current symbol index, so just look this up by index 4649 stub_symbol = symtab.SymbolAtIndex(index_pos->second); 4650 } else { 4651 // We need to lookup a symbol using the original nlist symbol 4652 // index since this index is coming from the S_SYMBOL_STUBS 4653 stub_symbol = symtab.FindSymbolByID(stub_sym_id); 4654 } 4655 4656 if (stub_symbol) { 4657 Address so_addr(symbol_stub_addr, section_list); 4658 4659 if (stub_symbol->GetType() == eSymbolTypeUndefined) { 4660 // Change the external symbol into a trampoline that makes 4661 // sense These symbols were N_UNDF N_EXT, and are useless 4662 // to us, so we can re-use them so we don't have to make up 4663 // a synthetic symbol for no good reason. 4664 if (resolver_addresses.find(symbol_stub_addr) == 4665 resolver_addresses.end()) 4666 stub_symbol->SetType(eSymbolTypeTrampoline); 4667 else 4668 stub_symbol->SetType(eSymbolTypeResolver); 4669 stub_symbol->SetExternal(false); 4670 stub_symbol->GetAddressRef() = so_addr; 4671 stub_symbol->SetByteSize(symbol_stub_byte_size); 4672 } else { 4673 // Make a synthetic symbol to describe the trampoline stub 4674 Mangled stub_symbol_mangled_name(stub_symbol->GetMangled()); 4675 if (sym_idx >= num_syms) { 4676 sym = symtab.Resize(++num_syms); 4677 stub_symbol = nullptr; // this pointer no longer valid 4678 } 4679 sym[sym_idx].SetID(synthetic_sym_id++); 4680 sym[sym_idx].GetMangled() = stub_symbol_mangled_name; 4681 if (resolver_addresses.find(symbol_stub_addr) == 4682 resolver_addresses.end()) 4683 sym[sym_idx].SetType(eSymbolTypeTrampoline); 4684 else 4685 sym[sym_idx].SetType(eSymbolTypeResolver); 4686 sym[sym_idx].SetIsSynthetic(true); 4687 sym[sym_idx].GetAddressRef() = so_addr; 4688 add_symbol_addr(so_addr.GetFileAddress()); 4689 sym[sym_idx].SetByteSize(symbol_stub_byte_size); 4690 ++sym_idx; 4691 } 4692 } else { 4693 if (log) 4694 log->Warning("symbol stub referencing symbol table symbol " 4695 "%u that isn't in our minimal symbol table, " 4696 "fix this!!!", 4697 stub_sym_id); 4698 } 4699 } 4700 } 4701 } 4702 } 4703 } 4704 } 4705 4706 if (!reexport_trie_entries.empty()) { 4707 for (const auto &e : reexport_trie_entries) { 4708 if (e.entry.import_name) { 4709 // Only add indirect symbols from the Trie entries if we didn't have 4710 // a N_INDR nlist entry for this already 4711 if (indirect_symbol_names.find(e.entry.name) == 4712 indirect_symbol_names.end()) { 4713 // Make a synthetic symbol to describe re-exported symbol. 4714 if (sym_idx >= num_syms) 4715 sym = symtab.Resize(++num_syms); 4716 sym[sym_idx].SetID(synthetic_sym_id++); 4717 sym[sym_idx].GetMangled() = Mangled(e.entry.name); 4718 sym[sym_idx].SetType(eSymbolTypeReExported); 4719 sym[sym_idx].SetIsSynthetic(true); 4720 sym[sym_idx].SetReExportedSymbolName(e.entry.import_name); 4721 if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) { 4722 sym[sym_idx].SetReExportedSymbolSharedLibrary( 4723 dylib_files.GetFileSpecAtIndex(e.entry.other - 1)); 4724 } 4725 ++sym_idx; 4726 } 4727 } 4728 } 4729 } 4730 } 4731 4732 void ObjectFileMachO::Dump(Stream *s) { 4733 ModuleSP module_sp(GetModule()); 4734 if (module_sp) { 4735 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 4736 s->Printf("%p: ", static_cast<void *>(this)); 4737 s->Indent(); 4738 if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64) 4739 s->PutCString("ObjectFileMachO64"); 4740 else 4741 s->PutCString("ObjectFileMachO32"); 4742 4743 *s << ", file = '" << m_file; 4744 ModuleSpecList all_specs; 4745 ModuleSpec base_spec; 4746 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic), 4747 base_spec, all_specs); 4748 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 4749 *s << "', triple"; 4750 if (e) 4751 s->Printf("[%d]", i); 4752 *s << " = "; 4753 *s << all_specs.GetModuleSpecRefAtIndex(i) 4754 .GetArchitecture() 4755 .GetTriple() 4756 .getTriple(); 4757 } 4758 *s << "\n"; 4759 SectionList *sections = GetSectionList(); 4760 if (sections) 4761 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, 4762 UINT32_MAX); 4763 4764 if (m_symtab_up) 4765 m_symtab_up->Dump(s, nullptr, eSortOrderNone); 4766 } 4767 } 4768 4769 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header, 4770 const lldb_private::DataExtractor &data, 4771 lldb::offset_t lc_offset) { 4772 uint32_t i; 4773 llvm::MachO::uuid_command load_cmd; 4774 4775 lldb::offset_t offset = lc_offset; 4776 for (i = 0; i < header.ncmds; ++i) { 4777 const lldb::offset_t cmd_offset = offset; 4778 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 4779 break; 4780 4781 if (load_cmd.cmd == LC_UUID) { 4782 const uint8_t *uuid_bytes = data.PeekData(offset, 16); 4783 4784 if (uuid_bytes) { 4785 // OpenCL on Mac OS X uses the same UUID for each of its object files. 4786 // We pretend these object files have no UUID to prevent crashing. 4787 4788 const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8, 4789 0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63, 4790 0xbb, 0x14, 0xf0, 0x0d}; 4791 4792 if (!memcmp(uuid_bytes, opencl_uuid, 16)) 4793 return UUID(); 4794 4795 return UUID(uuid_bytes, 16); 4796 } 4797 return UUID(); 4798 } 4799 offset = cmd_offset + load_cmd.cmdsize; 4800 } 4801 return UUID(); 4802 } 4803 4804 static llvm::StringRef GetOSName(uint32_t cmd) { 4805 switch (cmd) { 4806 case llvm::MachO::LC_VERSION_MIN_IPHONEOS: 4807 return llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4808 case llvm::MachO::LC_VERSION_MIN_MACOSX: 4809 return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX); 4810 case llvm::MachO::LC_VERSION_MIN_TVOS: 4811 return llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4812 case llvm::MachO::LC_VERSION_MIN_WATCHOS: 4813 return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4814 default: 4815 llvm_unreachable("unexpected LC_VERSION load command"); 4816 } 4817 } 4818 4819 namespace { 4820 struct OSEnv { 4821 llvm::StringRef os_type; 4822 llvm::StringRef environment; 4823 OSEnv(uint32_t cmd) { 4824 switch (cmd) { 4825 case llvm::MachO::PLATFORM_MACOS: 4826 os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX); 4827 return; 4828 case llvm::MachO::PLATFORM_IOS: 4829 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4830 return; 4831 case llvm::MachO::PLATFORM_TVOS: 4832 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4833 return; 4834 case llvm::MachO::PLATFORM_WATCHOS: 4835 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4836 return; 4837 case llvm::MachO::PLATFORM_BRIDGEOS: 4838 os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS); 4839 return; 4840 case llvm::MachO::PLATFORM_DRIVERKIT: 4841 os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit); 4842 return; 4843 case llvm::MachO::PLATFORM_MACCATALYST: 4844 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4845 environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI); 4846 return; 4847 case llvm::MachO::PLATFORM_IOSSIMULATOR: 4848 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4849 environment = 4850 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4851 return; 4852 case llvm::MachO::PLATFORM_TVOSSIMULATOR: 4853 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4854 environment = 4855 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4856 return; 4857 case llvm::MachO::PLATFORM_WATCHOSSIMULATOR: 4858 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4859 environment = 4860 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4861 return; 4862 case llvm::MachO::PLATFORM_XROS: 4863 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS); 4864 return; 4865 case llvm::MachO::PLATFORM_XROS_SIMULATOR: 4866 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS); 4867 environment = 4868 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4869 return; 4870 default: { 4871 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 4872 LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION"); 4873 } 4874 } 4875 } 4876 }; 4877 4878 struct MinOS { 4879 uint32_t major_version, minor_version, patch_version; 4880 MinOS(uint32_t version) 4881 : major_version(version >> 16), minor_version((version >> 8) & 0xffu), 4882 patch_version(version & 0xffu) {} 4883 }; 4884 } // namespace 4885 4886 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, 4887 const lldb_private::DataExtractor &data, 4888 lldb::offset_t lc_offset, 4889 ModuleSpec &base_spec, 4890 lldb_private::ModuleSpecList &all_specs) { 4891 auto &base_arch = base_spec.GetArchitecture(); 4892 base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype); 4893 if (!base_arch.IsValid()) 4894 return; 4895 4896 bool found_any = false; 4897 auto add_triple = [&](const llvm::Triple &triple) { 4898 auto spec = base_spec; 4899 spec.GetArchitecture().GetTriple() = triple; 4900 if (spec.GetArchitecture().IsValid()) { 4901 spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset); 4902 all_specs.Append(spec); 4903 found_any = true; 4904 } 4905 }; 4906 4907 // Set OS to an unspecified unknown or a "*" so it can match any OS 4908 llvm::Triple base_triple = base_arch.GetTriple(); 4909 base_triple.setOS(llvm::Triple::UnknownOS); 4910 base_triple.setOSName(llvm::StringRef()); 4911 4912 if (header.filetype == MH_PRELOAD) { 4913 if (header.cputype == CPU_TYPE_ARM) { 4914 // If this is a 32-bit arm binary, and it's a standalone binary, force 4915 // the Vendor to Apple so we don't accidentally pick up the generic 4916 // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the 4917 // frame pointer register; most other armv7 ABIs use a combination of 4918 // r7 and r11. 4919 base_triple.setVendor(llvm::Triple::Apple); 4920 } else { 4921 // Set vendor to an unspecified unknown or a "*" so it can match any 4922 // vendor This is required for correct behavior of EFI debugging on 4923 // x86_64 4924 base_triple.setVendor(llvm::Triple::UnknownVendor); 4925 base_triple.setVendorName(llvm::StringRef()); 4926 } 4927 return add_triple(base_triple); 4928 } 4929 4930 llvm::MachO::load_command load_cmd; 4931 4932 // See if there is an LC_VERSION_MIN_* load command that can give 4933 // us the OS type. 4934 lldb::offset_t offset = lc_offset; 4935 for (uint32_t i = 0; i < header.ncmds; ++i) { 4936 const lldb::offset_t cmd_offset = offset; 4937 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 4938 break; 4939 4940 llvm::MachO::version_min_command version_min; 4941 switch (load_cmd.cmd) { 4942 case llvm::MachO::LC_VERSION_MIN_MACOSX: 4943 case llvm::MachO::LC_VERSION_MIN_IPHONEOS: 4944 case llvm::MachO::LC_VERSION_MIN_TVOS: 4945 case llvm::MachO::LC_VERSION_MIN_WATCHOS: { 4946 if (load_cmd.cmdsize != sizeof(version_min)) 4947 break; 4948 if (data.ExtractBytes(cmd_offset, sizeof(version_min), 4949 data.GetByteOrder(), &version_min) == 0) 4950 break; 4951 MinOS min_os(version_min.version); 4952 llvm::SmallString<32> os_name; 4953 llvm::raw_svector_ostream os(os_name); 4954 os << GetOSName(load_cmd.cmd) << min_os.major_version << '.' 4955 << min_os.minor_version << '.' << min_os.patch_version; 4956 4957 auto triple = base_triple; 4958 triple.setOSName(os.str()); 4959 4960 // Disambiguate legacy simulator platforms. 4961 if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX && 4962 (base_triple.getArch() == llvm::Triple::x86_64 || 4963 base_triple.getArch() == llvm::Triple::x86)) { 4964 // The combination of legacy LC_VERSION_MIN load command and 4965 // x86 architecture always indicates a simulator environment. 4966 // The combination of LC_VERSION_MIN and arm architecture only 4967 // appears for native binaries. Back-deploying simulator 4968 // binaries on Apple Silicon Macs use the modern unambigous 4969 // LC_BUILD_VERSION load commands; no special handling required. 4970 triple.setEnvironment(llvm::Triple::Simulator); 4971 } 4972 add_triple(triple); 4973 break; 4974 } 4975 default: 4976 break; 4977 } 4978 4979 offset = cmd_offset + load_cmd.cmdsize; 4980 } 4981 4982 // See if there are LC_BUILD_VERSION load commands that can give 4983 // us the OS type. 4984 offset = lc_offset; 4985 for (uint32_t i = 0; i < header.ncmds; ++i) { 4986 const lldb::offset_t cmd_offset = offset; 4987 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 4988 break; 4989 4990 do { 4991 if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) { 4992 llvm::MachO::build_version_command build_version; 4993 if (load_cmd.cmdsize < sizeof(build_version)) { 4994 // Malformed load command. 4995 break; 4996 } 4997 if (data.ExtractBytes(cmd_offset, sizeof(build_version), 4998 data.GetByteOrder(), &build_version) == 0) 4999 break; 5000 MinOS min_os(build_version.minos); 5001 OSEnv os_env(build_version.platform); 5002 llvm::SmallString<16> os_name; 5003 llvm::raw_svector_ostream os(os_name); 5004 os << os_env.os_type << min_os.major_version << '.' 5005 << min_os.minor_version << '.' << min_os.patch_version; 5006 auto triple = base_triple; 5007 triple.setOSName(os.str()); 5008 os_name.clear(); 5009 if (!os_env.environment.empty()) 5010 triple.setEnvironmentName(os_env.environment); 5011 add_triple(triple); 5012 } 5013 } while (false); 5014 offset = cmd_offset + load_cmd.cmdsize; 5015 } 5016 5017 if (!found_any) { 5018 add_triple(base_triple); 5019 } 5020 } 5021 5022 ArchSpec ObjectFileMachO::GetArchitecture( 5023 ModuleSP module_sp, const llvm::MachO::mach_header &header, 5024 const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) { 5025 ModuleSpecList all_specs; 5026 ModuleSpec base_spec; 5027 GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic), 5028 base_spec, all_specs); 5029 5030 // If the object file offers multiple alternative load commands, 5031 // pick the one that matches the module. 5032 if (module_sp) { 5033 const ArchSpec &module_arch = module_sp->GetArchitecture(); 5034 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 5035 ArchSpec mach_arch = 5036 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture(); 5037 if (module_arch.IsCompatibleMatch(mach_arch)) 5038 return mach_arch; 5039 } 5040 } 5041 5042 // Return the first arch we found. 5043 if (all_specs.GetSize() == 0) 5044 return {}; 5045 return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture(); 5046 } 5047 5048 UUID ObjectFileMachO::GetUUID() { 5049 ModuleSP module_sp(GetModule()); 5050 if (module_sp) { 5051 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5052 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5053 return GetUUID(m_header, m_data, offset); 5054 } 5055 return UUID(); 5056 } 5057 5058 uint32_t ObjectFileMachO::GetDependentModules(FileSpecList &files) { 5059 ModuleSP module_sp = GetModule(); 5060 if (!module_sp) 5061 return 0; 5062 5063 uint32_t count = 0; 5064 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5065 llvm::MachO::load_command load_cmd; 5066 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5067 std::vector<std::string> rpath_paths; 5068 std::vector<std::string> rpath_relative_paths; 5069 std::vector<std::string> at_exec_relative_paths; 5070 uint32_t i; 5071 for (i = 0; i < m_header.ncmds; ++i) { 5072 const uint32_t cmd_offset = offset; 5073 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5074 break; 5075 5076 switch (load_cmd.cmd) { 5077 case LC_RPATH: 5078 case LC_LOAD_DYLIB: 5079 case LC_LOAD_WEAK_DYLIB: 5080 case LC_REEXPORT_DYLIB: 5081 case LC_LOAD_DYLINKER: 5082 case LC_LOADFVMLIB: 5083 case LC_LOAD_UPWARD_DYLIB: { 5084 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset); 5085 // For LC_LOAD_DYLIB there is an alternate encoding 5086 // which adds a uint32_t `flags` field for `DYLD_USE_*` 5087 // flags. This can be detected by a timestamp field with 5088 // the `DYLIB_USE_MARKER` constant value. 5089 bool is_delayed_init = false; 5090 uint32_t use_command_marker = m_data.GetU32(&offset); 5091 if (use_command_marker == 0x1a741800 /* DYLIB_USE_MARKER */) { 5092 offset += 4; /* uint32_t current_version */ 5093 offset += 4; /* uint32_t compat_version */ 5094 uint32_t flags = m_data.GetU32(&offset); 5095 // If this LC_LOAD_DYLIB is marked delay-init, 5096 // don't report it as a dependent library -- it 5097 // may be loaded in the process at some point, 5098 // but will most likely not be load at launch. 5099 if (flags & 0x08 /* DYLIB_USE_DELAYED_INIT */) 5100 is_delayed_init = true; 5101 } 5102 const char *path = m_data.PeekCStr(name_offset); 5103 if (path && !is_delayed_init) { 5104 if (load_cmd.cmd == LC_RPATH) 5105 rpath_paths.push_back(path); 5106 else { 5107 if (path[0] == '@') { 5108 if (strncmp(path, "@rpath", strlen("@rpath")) == 0) 5109 rpath_relative_paths.push_back(path + strlen("@rpath")); 5110 else if (strncmp(path, "@executable_path", 5111 strlen("@executable_path")) == 0) 5112 at_exec_relative_paths.push_back(path + 5113 strlen("@executable_path")); 5114 } else { 5115 FileSpec file_spec(path); 5116 if (files.AppendIfUnique(file_spec)) 5117 count++; 5118 } 5119 } 5120 } 5121 } break; 5122 5123 default: 5124 break; 5125 } 5126 offset = cmd_offset + load_cmd.cmdsize; 5127 } 5128 5129 FileSpec this_file_spec(m_file); 5130 FileSystem::Instance().Resolve(this_file_spec); 5131 5132 if (!rpath_paths.empty()) { 5133 // Fixup all LC_RPATH values to be absolute paths. 5134 const std::string this_directory = 5135 this_file_spec.GetDirectory().GetString(); 5136 for (auto &rpath : rpath_paths) { 5137 if (llvm::StringRef(rpath).starts_with(g_loader_path)) 5138 rpath = this_directory + rpath.substr(g_loader_path.size()); 5139 else if (llvm::StringRef(rpath).starts_with(g_executable_path)) 5140 rpath = this_directory + rpath.substr(g_executable_path.size()); 5141 } 5142 5143 for (const auto &rpath_relative_path : rpath_relative_paths) { 5144 for (const auto &rpath : rpath_paths) { 5145 std::string path = rpath; 5146 path += rpath_relative_path; 5147 // It is OK to resolve this path because we must find a file on disk 5148 // for us to accept it anyway if it is rpath relative. 5149 FileSpec file_spec(path); 5150 FileSystem::Instance().Resolve(file_spec); 5151 if (FileSystem::Instance().Exists(file_spec) && 5152 files.AppendIfUnique(file_spec)) { 5153 count++; 5154 break; 5155 } 5156 } 5157 } 5158 } 5159 5160 // We may have @executable_paths but no RPATHS. Figure those out here. 5161 // Only do this if this object file is the executable. We have no way to 5162 // get back to the actual executable otherwise, so we won't get the right 5163 // path. 5164 if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) { 5165 FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent(); 5166 for (const auto &at_exec_relative_path : at_exec_relative_paths) { 5167 FileSpec file_spec = 5168 exec_dir.CopyByAppendingPathComponent(at_exec_relative_path); 5169 if (FileSystem::Instance().Exists(file_spec) && 5170 files.AppendIfUnique(file_spec)) 5171 count++; 5172 } 5173 } 5174 return count; 5175 } 5176 5177 lldb_private::Address ObjectFileMachO::GetEntryPointAddress() { 5178 // If the object file is not an executable it can't hold the entry point. 5179 // m_entry_point_address is initialized to an invalid address, so we can just 5180 // return that. If m_entry_point_address is valid it means we've found it 5181 // already, so return the cached value. 5182 5183 if ((!IsExecutable() && !IsDynamicLoader()) || 5184 m_entry_point_address.IsValid()) { 5185 return m_entry_point_address; 5186 } 5187 5188 // Otherwise, look for the UnixThread or Thread command. The data for the 5189 // Thread command is given in /usr/include/mach-o.h, but it is basically: 5190 // 5191 // uint32_t flavor - this is the flavor argument you would pass to 5192 // thread_get_state 5193 // uint32_t count - this is the count of longs in the thread state data 5194 // struct XXX_thread_state state - this is the structure from 5195 // <machine/thread_status.h> corresponding to the flavor. 5196 // <repeat this trio> 5197 // 5198 // So we just keep reading the various register flavors till we find the GPR 5199 // one, then read the PC out of there. 5200 // FIXME: We will need to have a "RegisterContext data provider" class at some 5201 // point that can get all the registers 5202 // out of data in this form & attach them to a given thread. That should 5203 // underlie the MacOS X User process plugin, and we'll also need it for the 5204 // MacOS X Core File process plugin. When we have that we can also use it 5205 // here. 5206 // 5207 // For now we hard-code the offsets and flavors we need: 5208 // 5209 // 5210 5211 ModuleSP module_sp(GetModule()); 5212 if (module_sp) { 5213 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5214 llvm::MachO::load_command load_cmd; 5215 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5216 uint32_t i; 5217 lldb::addr_t start_address = LLDB_INVALID_ADDRESS; 5218 bool done = false; 5219 5220 for (i = 0; i < m_header.ncmds; ++i) { 5221 const lldb::offset_t cmd_offset = offset; 5222 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5223 break; 5224 5225 switch (load_cmd.cmd) { 5226 case LC_UNIXTHREAD: 5227 case LC_THREAD: { 5228 while (offset < cmd_offset + load_cmd.cmdsize) { 5229 uint32_t flavor = m_data.GetU32(&offset); 5230 uint32_t count = m_data.GetU32(&offset); 5231 if (count == 0) { 5232 // We've gotten off somehow, log and exit; 5233 return m_entry_point_address; 5234 } 5235 5236 switch (m_header.cputype) { 5237 case llvm::MachO::CPU_TYPE_ARM: 5238 if (flavor == 1 || 5239 flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32 5240 // from mach/arm/thread_status.h 5241 { 5242 offset += 60; // This is the offset of pc in the GPR thread state 5243 // data structure. 5244 start_address = m_data.GetU32(&offset); 5245 done = true; 5246 } 5247 break; 5248 case llvm::MachO::CPU_TYPE_ARM64: 5249 case llvm::MachO::CPU_TYPE_ARM64_32: 5250 if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h 5251 { 5252 offset += 256; // This is the offset of pc in the GPR thread state 5253 // data structure. 5254 start_address = m_data.GetU64(&offset); 5255 done = true; 5256 } 5257 break; 5258 case llvm::MachO::CPU_TYPE_I386: 5259 if (flavor == 5260 1) // x86_THREAD_STATE32 from mach/i386/thread_status.h 5261 { 5262 offset += 40; // This is the offset of eip in the GPR thread state 5263 // data structure. 5264 start_address = m_data.GetU32(&offset); 5265 done = true; 5266 } 5267 break; 5268 case llvm::MachO::CPU_TYPE_X86_64: 5269 if (flavor == 5270 4) // x86_THREAD_STATE64 from mach/i386/thread_status.h 5271 { 5272 offset += 16 * 8; // This is the offset of rip in the GPR thread 5273 // state data structure. 5274 start_address = m_data.GetU64(&offset); 5275 done = true; 5276 } 5277 break; 5278 default: 5279 return m_entry_point_address; 5280 } 5281 // Haven't found the GPR flavor yet, skip over the data for this 5282 // flavor: 5283 if (done) 5284 break; 5285 offset += count * 4; 5286 } 5287 } break; 5288 case LC_MAIN: { 5289 uint64_t entryoffset = m_data.GetU64(&offset); 5290 SectionSP text_segment_sp = 5291 GetSectionList()->FindSectionByName(GetSegmentNameTEXT()); 5292 if (text_segment_sp) { 5293 done = true; 5294 start_address = text_segment_sp->GetFileAddress() + entryoffset; 5295 } 5296 } break; 5297 5298 default: 5299 break; 5300 } 5301 if (done) 5302 break; 5303 5304 // Go to the next load command: 5305 offset = cmd_offset + load_cmd.cmdsize; 5306 } 5307 5308 if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) { 5309 if (GetSymtab()) { 5310 Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType( 5311 ConstString("_dyld_start"), SymbolType::eSymbolTypeCode, 5312 Symtab::eDebugAny, Symtab::eVisibilityAny); 5313 if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) { 5314 start_address = dyld_start_sym->GetAddress().GetFileAddress(); 5315 } 5316 } 5317 } 5318 5319 if (start_address != LLDB_INVALID_ADDRESS) { 5320 // We got the start address from the load commands, so now resolve that 5321 // address in the sections of this ObjectFile: 5322 if (!m_entry_point_address.ResolveAddressUsingFileSections( 5323 start_address, GetSectionList())) { 5324 m_entry_point_address.Clear(); 5325 } 5326 } else { 5327 // We couldn't read the UnixThread load command - maybe it wasn't there. 5328 // As a fallback look for the "start" symbol in the main executable. 5329 5330 ModuleSP module_sp(GetModule()); 5331 5332 if (module_sp) { 5333 SymbolContextList contexts; 5334 SymbolContext context; 5335 module_sp->FindSymbolsWithNameAndType(ConstString("start"), 5336 eSymbolTypeCode, contexts); 5337 if (contexts.GetSize()) { 5338 if (contexts.GetContextAtIndex(0, context)) 5339 m_entry_point_address = context.symbol->GetAddress(); 5340 } 5341 } 5342 } 5343 } 5344 5345 return m_entry_point_address; 5346 } 5347 5348 lldb_private::Address ObjectFileMachO::GetBaseAddress() { 5349 lldb_private::Address header_addr; 5350 SectionList *section_list = GetSectionList(); 5351 if (section_list) { 5352 SectionSP text_segment_sp( 5353 section_list->FindSectionByName(GetSegmentNameTEXT())); 5354 if (text_segment_sp) { 5355 header_addr.SetSection(text_segment_sp); 5356 header_addr.SetOffset(0); 5357 } 5358 } 5359 return header_addr; 5360 } 5361 5362 uint32_t ObjectFileMachO::GetNumThreadContexts() { 5363 ModuleSP module_sp(GetModule()); 5364 if (module_sp) { 5365 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5366 if (!m_thread_context_offsets_valid) { 5367 m_thread_context_offsets_valid = true; 5368 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5369 FileRangeArray::Entry file_range; 5370 llvm::MachO::thread_command thread_cmd; 5371 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5372 const uint32_t cmd_offset = offset; 5373 if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr) 5374 break; 5375 5376 if (thread_cmd.cmd == LC_THREAD) { 5377 file_range.SetRangeBase(offset); 5378 file_range.SetByteSize(thread_cmd.cmdsize - 8); 5379 m_thread_context_offsets.Append(file_range); 5380 } 5381 offset = cmd_offset + thread_cmd.cmdsize; 5382 } 5383 } 5384 } 5385 return m_thread_context_offsets.GetSize(); 5386 } 5387 5388 std::vector<std::tuple<offset_t, offset_t>> 5389 ObjectFileMachO::FindLC_NOTEByName(std::string name) { 5390 std::vector<std::tuple<offset_t, offset_t>> results; 5391 ModuleSP module_sp(GetModule()); 5392 if (module_sp) { 5393 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5394 5395 offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5396 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5397 const uint32_t cmd_offset = offset; 5398 llvm::MachO::load_command lc = {}; 5399 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr) 5400 break; 5401 if (lc.cmd == LC_NOTE) { 5402 char data_owner[17]; 5403 m_data.CopyData(offset, 16, data_owner); 5404 data_owner[16] = '\0'; 5405 offset += 16; 5406 5407 if (name == data_owner) { 5408 offset_t payload_offset = m_data.GetU64_unchecked(&offset); 5409 offset_t payload_size = m_data.GetU64_unchecked(&offset); 5410 results.push_back({payload_offset, payload_size}); 5411 } 5412 } 5413 offset = cmd_offset + lc.cmdsize; 5414 } 5415 } 5416 return results; 5417 } 5418 5419 std::string ObjectFileMachO::GetIdentifierString() { 5420 Log *log( 5421 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader)); 5422 ModuleSP module_sp(GetModule()); 5423 if (module_sp) { 5424 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5425 5426 auto lc_notes = FindLC_NOTEByName("kern ver str"); 5427 for (auto lc_note : lc_notes) { 5428 offset_t payload_offset = std::get<0>(lc_note); 5429 offset_t payload_size = std::get<1>(lc_note); 5430 uint32_t version; 5431 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) { 5432 if (version == 1) { 5433 uint32_t strsize = payload_size - sizeof(uint32_t); 5434 std::string result(strsize, '\0'); 5435 m_data.CopyData(payload_offset, strsize, result.data()); 5436 LLDB_LOGF(log, "LC_NOTE 'kern ver str' found with text '%s'", 5437 result.c_str()); 5438 return result; 5439 } 5440 } 5441 } 5442 5443 // Second, make a pass over the load commands looking for an obsolete 5444 // LC_IDENT load command. 5445 offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5446 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5447 const uint32_t cmd_offset = offset; 5448 llvm::MachO::ident_command ident_command; 5449 if (m_data.GetU32(&offset, &ident_command, 2) == nullptr) 5450 break; 5451 if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) { 5452 std::string result(ident_command.cmdsize, '\0'); 5453 if (m_data.CopyData(offset, ident_command.cmdsize, result.data()) == 5454 ident_command.cmdsize) { 5455 LLDB_LOGF(log, "LC_IDENT found with text '%s'", result.c_str()); 5456 return result; 5457 } 5458 } 5459 offset = cmd_offset + ident_command.cmdsize; 5460 } 5461 } 5462 return {}; 5463 } 5464 5465 AddressableBits ObjectFileMachO::GetAddressableBits() { 5466 AddressableBits addressable_bits; 5467 5468 Log *log(GetLog(LLDBLog::Process)); 5469 ModuleSP module_sp(GetModule()); 5470 if (module_sp) { 5471 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5472 auto lc_notes = FindLC_NOTEByName("addrable bits"); 5473 for (auto lc_note : lc_notes) { 5474 offset_t payload_offset = std::get<0>(lc_note); 5475 uint32_t version; 5476 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) { 5477 if (version == 3) { 5478 uint32_t num_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5479 addressable_bits.SetAddressableBits(num_addr_bits); 5480 LLDB_LOGF(log, 5481 "LC_NOTE 'addrable bits' v3 found, value %d " 5482 "bits", 5483 num_addr_bits); 5484 } 5485 if (version == 4) { 5486 uint32_t lo_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5487 uint32_t hi_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5488 5489 if (lo_addr_bits == hi_addr_bits) 5490 addressable_bits.SetAddressableBits(lo_addr_bits); 5491 else 5492 addressable_bits.SetAddressableBits(lo_addr_bits, hi_addr_bits); 5493 LLDB_LOGF(log, "LC_NOTE 'addrable bits' v4 found, value %d & %d bits", 5494 lo_addr_bits, hi_addr_bits); 5495 } 5496 } 5497 } 5498 } 5499 return addressable_bits; 5500 } 5501 5502 bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value, 5503 bool &value_is_offset, 5504 UUID &uuid, 5505 ObjectFile::BinaryType &type) { 5506 Log *log( 5507 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader)); 5508 value = LLDB_INVALID_ADDRESS; 5509 value_is_offset = false; 5510 uuid.Clear(); 5511 uint32_t log2_pagesize = 0; // not currently passed up to caller 5512 uint32_t platform = 0; // not currently passed up to caller 5513 ModuleSP module_sp(GetModule()); 5514 if (module_sp) { 5515 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5516 5517 auto lc_notes = FindLC_NOTEByName("main bin spec"); 5518 for (auto lc_note : lc_notes) { 5519 offset_t payload_offset = std::get<0>(lc_note); 5520 5521 // struct main_bin_spec 5522 // { 5523 // uint32_t version; // currently 2 5524 // uint32_t type; // 0 == unspecified, 1 == kernel, 5525 // // 2 == user process, 5526 // // 3 == standalone binary 5527 // uint64_t address; // UINT64_MAX if address not specified 5528 // uint64_t slide; // slide, UINT64_MAX if unspecified 5529 // // 0 if no slide needs to be applied to 5530 // // file address 5531 // uuid_t uuid; // all zero's if uuid not specified 5532 // uint32_t log2_pagesize; // process page size in log base 2, 5533 // // e.g. 4k pages are 12. 5534 // // 0 for unspecified 5535 // uint32_t platform; // The Mach-O platform for this corefile. 5536 // // 0 for unspecified. 5537 // // The values are defined in 5538 // // <mach-o/loader.h>, PLATFORM_*. 5539 // } __attribute((packed)); 5540 5541 // "main bin spec" (main binary specification) data payload is 5542 // formatted: 5543 // uint32_t version [currently 1] 5544 // uint32_t type [0 == unspecified, 1 == kernel, 5545 // 2 == user process, 3 == firmware ] 5546 // uint64_t address [ UINT64_MAX if address not specified ] 5547 // uuid_t uuid [ all zero's if uuid not specified ] 5548 // uint32_t log2_pagesize [ process page size in log base 5549 // 2, e.g. 4k pages are 12. 5550 // 0 for unspecified ] 5551 // uint32_t unused [ for alignment ] 5552 5553 uint32_t version; 5554 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr && 5555 version <= 2) { 5556 uint32_t binspec_type = 0; 5557 uuid_t raw_uuid; 5558 memset(raw_uuid, 0, sizeof(uuid_t)); 5559 5560 if (!m_data.GetU32(&payload_offset, &binspec_type, 1)) 5561 return false; 5562 if (!m_data.GetU64(&payload_offset, &value, 1)) 5563 return false; 5564 uint64_t slide = LLDB_INVALID_ADDRESS; 5565 if (version > 1 && !m_data.GetU64(&payload_offset, &slide, 1)) 5566 return false; 5567 if (value == LLDB_INVALID_ADDRESS && slide != LLDB_INVALID_ADDRESS) { 5568 value = slide; 5569 value_is_offset = true; 5570 } 5571 5572 if (m_data.CopyData(payload_offset, sizeof(uuid_t), raw_uuid) != 0) { 5573 uuid = UUID(raw_uuid, sizeof(uuid_t)); 5574 // convert the "main bin spec" type into our 5575 // ObjectFile::BinaryType enum 5576 const char *typestr = "unrecognized type"; 5577 switch (binspec_type) { 5578 case 0: 5579 type = eBinaryTypeUnknown; 5580 typestr = "uknown"; 5581 break; 5582 case 1: 5583 type = eBinaryTypeKernel; 5584 typestr = "xnu kernel"; 5585 break; 5586 case 2: 5587 type = eBinaryTypeUser; 5588 typestr = "userland dyld"; 5589 break; 5590 case 3: 5591 type = eBinaryTypeStandalone; 5592 typestr = "standalone"; 5593 break; 5594 } 5595 LLDB_LOGF(log, 5596 "LC_NOTE 'main bin spec' found, version %d type %d " 5597 "(%s), value 0x%" PRIx64 " value-is-slide==%s uuid %s", 5598 version, type, typestr, value, 5599 value_is_offset ? "true" : "false", 5600 uuid.GetAsString().c_str()); 5601 if (!m_data.GetU32(&payload_offset, &log2_pagesize, 1)) 5602 return false; 5603 if (version > 1 && !m_data.GetU32(&payload_offset, &platform, 1)) 5604 return false; 5605 return true; 5606 } 5607 } 5608 } 5609 } 5610 return false; 5611 } 5612 5613 bool ObjectFileMachO::GetCorefileThreadExtraInfos( 5614 std::vector<lldb::tid_t> &tids) { 5615 tids.clear(); 5616 ModuleSP module_sp(GetModule()); 5617 if (module_sp) { 5618 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5619 5620 Log *log(GetLog(LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread)); 5621 auto lc_notes = FindLC_NOTEByName("process metadata"); 5622 for (auto lc_note : lc_notes) { 5623 offset_t payload_offset = std::get<0>(lc_note); 5624 offset_t strsize = std::get<1>(lc_note); 5625 std::string buf(strsize, '\0'); 5626 if (m_data.CopyData(payload_offset, strsize, buf.data()) != strsize) { 5627 LLDB_LOGF(log, 5628 "Unable to read %" PRIu64 5629 " bytes of 'process metadata' LC_NOTE JSON contents", 5630 strsize); 5631 return false; 5632 } 5633 while (buf.back() == '\0') 5634 buf.resize(buf.size() - 1); 5635 StructuredData::ObjectSP object_sp = StructuredData::ParseJSON(buf); 5636 StructuredData::Dictionary *dict = object_sp->GetAsDictionary(); 5637 if (!dict) { 5638 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not " 5639 "get a dictionary."); 5640 return false; 5641 } 5642 StructuredData::Array *threads; 5643 if (!dict->GetValueForKeyAsArray("threads", threads) || !threads) { 5644 LLDB_LOGF(log, 5645 "'process metadata' LC_NOTE does not have a 'threads' key"); 5646 return false; 5647 } 5648 if (threads->GetSize() != GetNumThreadContexts()) { 5649 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, number of " 5650 "threads does not match number of LC_THREADS."); 5651 return false; 5652 } 5653 const size_t num_threads = threads->GetSize(); 5654 for (size_t i = 0; i < num_threads; i++) { 5655 std::optional<StructuredData::Dictionary *> maybe_thread = 5656 threads->GetItemAtIndexAsDictionary(i); 5657 if (!maybe_thread) { 5658 LLDB_LOGF(log, 5659 "Unable to read 'process metadata' LC_NOTE, threads " 5660 "array does not have a dictionary at index %zu.", 5661 i); 5662 return false; 5663 } 5664 StructuredData::Dictionary *thread = *maybe_thread; 5665 lldb::tid_t tid = LLDB_INVALID_THREAD_ID; 5666 if (thread->GetValueForKeyAsInteger<lldb::tid_t>("thread_id", tid)) 5667 if (tid == 0) 5668 tid = LLDB_INVALID_THREAD_ID; 5669 tids.push_back(tid); 5670 } 5671 5672 if (log) { 5673 StreamString logmsg; 5674 logmsg.Printf("LC_NOTE 'process metadata' found: "); 5675 dict->Dump(logmsg, /* pretty_print */ false); 5676 LLDB_LOGF(log, "%s", logmsg.GetData()); 5677 } 5678 return true; 5679 } 5680 } 5681 return false; 5682 } 5683 5684 lldb::RegisterContextSP 5685 ObjectFileMachO::GetThreadContextAtIndex(uint32_t idx, 5686 lldb_private::Thread &thread) { 5687 lldb::RegisterContextSP reg_ctx_sp; 5688 5689 ModuleSP module_sp(GetModule()); 5690 if (module_sp) { 5691 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5692 if (!m_thread_context_offsets_valid) 5693 GetNumThreadContexts(); 5694 5695 const FileRangeArray::Entry *thread_context_file_range = 5696 m_thread_context_offsets.GetEntryAtIndex(idx); 5697 if (thread_context_file_range) { 5698 5699 DataExtractor data(m_data, thread_context_file_range->GetRangeBase(), 5700 thread_context_file_range->GetByteSize()); 5701 5702 switch (m_header.cputype) { 5703 case llvm::MachO::CPU_TYPE_ARM64: 5704 case llvm::MachO::CPU_TYPE_ARM64_32: 5705 reg_ctx_sp = 5706 std::make_shared<RegisterContextDarwin_arm64_Mach>(thread, data); 5707 break; 5708 5709 case llvm::MachO::CPU_TYPE_ARM: 5710 reg_ctx_sp = 5711 std::make_shared<RegisterContextDarwin_arm_Mach>(thread, data); 5712 break; 5713 5714 case llvm::MachO::CPU_TYPE_I386: 5715 reg_ctx_sp = 5716 std::make_shared<RegisterContextDarwin_i386_Mach>(thread, data); 5717 break; 5718 5719 case llvm::MachO::CPU_TYPE_X86_64: 5720 reg_ctx_sp = 5721 std::make_shared<RegisterContextDarwin_x86_64_Mach>(thread, data); 5722 break; 5723 } 5724 } 5725 } 5726 return reg_ctx_sp; 5727 } 5728 5729 ObjectFile::Type ObjectFileMachO::CalculateType() { 5730 switch (m_header.filetype) { 5731 case MH_OBJECT: // 0x1u 5732 if (GetAddressByteSize() == 4) { 5733 // 32 bit kexts are just object files, but they do have a valid 5734 // UUID load command. 5735 if (GetUUID()) { 5736 // this checking for the UUID load command is not enough we could 5737 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as 5738 // this is required of kexts 5739 if (m_strata == eStrataInvalid) 5740 m_strata = eStrataKernel; 5741 return eTypeSharedLibrary; 5742 } 5743 } 5744 return eTypeObjectFile; 5745 5746 case MH_EXECUTE: 5747 return eTypeExecutable; // 0x2u 5748 case MH_FVMLIB: 5749 return eTypeSharedLibrary; // 0x3u 5750 case MH_CORE: 5751 return eTypeCoreFile; // 0x4u 5752 case MH_PRELOAD: 5753 return eTypeSharedLibrary; // 0x5u 5754 case MH_DYLIB: 5755 return eTypeSharedLibrary; // 0x6u 5756 case MH_DYLINKER: 5757 return eTypeDynamicLinker; // 0x7u 5758 case MH_BUNDLE: 5759 return eTypeSharedLibrary; // 0x8u 5760 case MH_DYLIB_STUB: 5761 return eTypeStubLibrary; // 0x9u 5762 case MH_DSYM: 5763 return eTypeDebugInfo; // 0xAu 5764 case MH_KEXT_BUNDLE: 5765 return eTypeSharedLibrary; // 0xBu 5766 default: 5767 break; 5768 } 5769 return eTypeUnknown; 5770 } 5771 5772 ObjectFile::Strata ObjectFileMachO::CalculateStrata() { 5773 switch (m_header.filetype) { 5774 case MH_OBJECT: // 0x1u 5775 { 5776 // 32 bit kexts are just object files, but they do have a valid 5777 // UUID load command. 5778 if (GetUUID()) { 5779 // this checking for the UUID load command is not enough we could 5780 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as 5781 // this is required of kexts 5782 if (m_type == eTypeInvalid) 5783 m_type = eTypeSharedLibrary; 5784 5785 return eStrataKernel; 5786 } 5787 } 5788 return eStrataUnknown; 5789 5790 case MH_EXECUTE: // 0x2u 5791 // Check for the MH_DYLDLINK bit in the flags 5792 if (m_header.flags & MH_DYLDLINK) { 5793 return eStrataUser; 5794 } else { 5795 SectionList *section_list = GetSectionList(); 5796 if (section_list) { 5797 static ConstString g_kld_section_name("__KLD"); 5798 if (section_list->FindSectionByName(g_kld_section_name)) 5799 return eStrataKernel; 5800 } 5801 } 5802 return eStrataRawImage; 5803 5804 case MH_FVMLIB: 5805 return eStrataUser; // 0x3u 5806 case MH_CORE: 5807 return eStrataUnknown; // 0x4u 5808 case MH_PRELOAD: 5809 return eStrataRawImage; // 0x5u 5810 case MH_DYLIB: 5811 return eStrataUser; // 0x6u 5812 case MH_DYLINKER: 5813 return eStrataUser; // 0x7u 5814 case MH_BUNDLE: 5815 return eStrataUser; // 0x8u 5816 case MH_DYLIB_STUB: 5817 return eStrataUser; // 0x9u 5818 case MH_DSYM: 5819 return eStrataUnknown; // 0xAu 5820 case MH_KEXT_BUNDLE: 5821 return eStrataKernel; // 0xBu 5822 default: 5823 break; 5824 } 5825 return eStrataUnknown; 5826 } 5827 5828 llvm::VersionTuple ObjectFileMachO::GetVersion() { 5829 ModuleSP module_sp(GetModule()); 5830 if (module_sp) { 5831 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5832 llvm::MachO::dylib_command load_cmd; 5833 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5834 uint32_t version_cmd = 0; 5835 uint64_t version = 0; 5836 uint32_t i; 5837 for (i = 0; i < m_header.ncmds; ++i) { 5838 const lldb::offset_t cmd_offset = offset; 5839 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5840 break; 5841 5842 if (load_cmd.cmd == LC_ID_DYLIB) { 5843 if (version_cmd == 0) { 5844 version_cmd = load_cmd.cmd; 5845 if (m_data.GetU32(&offset, &load_cmd.dylib, 4) == nullptr) 5846 break; 5847 version = load_cmd.dylib.current_version; 5848 } 5849 break; // Break for now unless there is another more complete version 5850 // number load command in the future. 5851 } 5852 offset = cmd_offset + load_cmd.cmdsize; 5853 } 5854 5855 if (version_cmd == LC_ID_DYLIB) { 5856 unsigned major = (version & 0xFFFF0000ull) >> 16; 5857 unsigned minor = (version & 0x0000FF00ull) >> 8; 5858 unsigned subminor = (version & 0x000000FFull); 5859 return llvm::VersionTuple(major, minor, subminor); 5860 } 5861 } 5862 return llvm::VersionTuple(); 5863 } 5864 5865 ArchSpec ObjectFileMachO::GetArchitecture() { 5866 ModuleSP module_sp(GetModule()); 5867 ArchSpec arch; 5868 if (module_sp) { 5869 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5870 5871 return GetArchitecture(module_sp, m_header, m_data, 5872 MachHeaderSizeFromMagic(m_header.magic)); 5873 } 5874 return arch; 5875 } 5876 5877 void ObjectFileMachO::GetProcessSharedCacheUUID(Process *process, 5878 addr_t &base_addr, UUID &uuid) { 5879 uuid.Clear(); 5880 base_addr = LLDB_INVALID_ADDRESS; 5881 if (process && process->GetDynamicLoader()) { 5882 DynamicLoader *dl = process->GetDynamicLoader(); 5883 LazyBool using_shared_cache; 5884 LazyBool private_shared_cache; 5885 dl->GetSharedCacheInformation(base_addr, uuid, using_shared_cache, 5886 private_shared_cache); 5887 } 5888 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 5889 LLDB_LOGF( 5890 log, 5891 "inferior process shared cache has a UUID of %s, base address 0x%" PRIx64, 5892 uuid.GetAsString().c_str(), base_addr); 5893 } 5894 5895 // From dyld SPI header dyld_process_info.h 5896 typedef void *dyld_process_info; 5897 struct lldb_copy__dyld_process_cache_info { 5898 uuid_t cacheUUID; // UUID of cache used by process 5899 uint64_t cacheBaseAddress; // load address of dyld shared cache 5900 bool noCache; // process is running without a dyld cache 5901 bool privateCache; // process is using a private copy of its dyld cache 5902 }; 5903 5904 // #including mach/mach.h pulls in machine.h & CPU_TYPE_ARM etc conflicts with 5905 // llvm enum definitions llvm::MachO::CPU_TYPE_ARM turning them into compile 5906 // errors. So we need to use the actual underlying types of task_t and 5907 // kern_return_t below. 5908 extern "C" unsigned int /*task_t*/ mach_task_self(); 5909 5910 void ObjectFileMachO::GetLLDBSharedCacheUUID(addr_t &base_addr, UUID &uuid) { 5911 uuid.Clear(); 5912 base_addr = LLDB_INVALID_ADDRESS; 5913 5914 #if defined(__APPLE__) 5915 uint8_t *(*dyld_get_all_image_infos)(void); 5916 dyld_get_all_image_infos = 5917 (uint8_t * (*)()) dlsym(RTLD_DEFAULT, "_dyld_get_all_image_infos"); 5918 if (dyld_get_all_image_infos) { 5919 uint8_t *dyld_all_image_infos_address = dyld_get_all_image_infos(); 5920 if (dyld_all_image_infos_address) { 5921 uint32_t *version = (uint32_t *) 5922 dyld_all_image_infos_address; // version <mach-o/dyld_images.h> 5923 if (*version >= 13) { 5924 uuid_t *sharedCacheUUID_address = 0; 5925 int wordsize = sizeof(uint8_t *); 5926 if (wordsize == 8) { 5927 sharedCacheUUID_address = 5928 (uuid_t *)((uint8_t *)dyld_all_image_infos_address + 5929 160); // sharedCacheUUID <mach-o/dyld_images.h> 5930 if (*version >= 15) 5931 base_addr = 5932 *(uint64_t 5933 *)((uint8_t *)dyld_all_image_infos_address + 5934 176); // sharedCacheBaseAddress <mach-o/dyld_images.h> 5935 } else { 5936 sharedCacheUUID_address = 5937 (uuid_t *)((uint8_t *)dyld_all_image_infos_address + 5938 84); // sharedCacheUUID <mach-o/dyld_images.h> 5939 if (*version >= 15) { 5940 base_addr = 0; 5941 base_addr = 5942 *(uint32_t 5943 *)((uint8_t *)dyld_all_image_infos_address + 5944 100); // sharedCacheBaseAddress <mach-o/dyld_images.h> 5945 } 5946 } 5947 uuid = UUID(sharedCacheUUID_address, sizeof(uuid_t)); 5948 } 5949 } 5950 } else { 5951 // Exists in macOS 10.12 and later, iOS 10.0 and later - dyld SPI 5952 dyld_process_info (*dyld_process_info_create)( 5953 unsigned int /* task_t */ task, uint64_t timestamp, 5954 unsigned int /*kern_return_t*/ *kernelError); 5955 void (*dyld_process_info_get_cache)(void *info, void *cacheInfo); 5956 void (*dyld_process_info_release)(dyld_process_info info); 5957 5958 dyld_process_info_create = (void *(*)(unsigned int /* task_t */, uint64_t, 5959 unsigned int /*kern_return_t*/ *)) 5960 dlsym(RTLD_DEFAULT, "_dyld_process_info_create"); 5961 dyld_process_info_get_cache = (void (*)(void *, void *))dlsym( 5962 RTLD_DEFAULT, "_dyld_process_info_get_cache"); 5963 dyld_process_info_release = 5964 (void (*)(void *))dlsym(RTLD_DEFAULT, "_dyld_process_info_release"); 5965 5966 if (dyld_process_info_create && dyld_process_info_get_cache) { 5967 unsigned int /*kern_return_t */ kern_ret; 5968 dyld_process_info process_info = 5969 dyld_process_info_create(::mach_task_self(), 0, &kern_ret); 5970 if (process_info) { 5971 struct lldb_copy__dyld_process_cache_info sc_info; 5972 memset(&sc_info, 0, sizeof(struct lldb_copy__dyld_process_cache_info)); 5973 dyld_process_info_get_cache(process_info, &sc_info); 5974 if (sc_info.cacheBaseAddress != 0) { 5975 base_addr = sc_info.cacheBaseAddress; 5976 uuid = UUID(sc_info.cacheUUID, sizeof(uuid_t)); 5977 } 5978 dyld_process_info_release(process_info); 5979 } 5980 } 5981 } 5982 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 5983 if (log && uuid.IsValid()) 5984 LLDB_LOGF(log, 5985 "lldb's in-memory shared cache has a UUID of %s base address of " 5986 "0x%" PRIx64, 5987 uuid.GetAsString().c_str(), base_addr); 5988 #endif 5989 } 5990 5991 static llvm::VersionTuple FindMinimumVersionInfo(DataExtractor &data, 5992 lldb::offset_t offset, 5993 size_t ncmds) { 5994 for (size_t i = 0; i < ncmds; i++) { 5995 const lldb::offset_t load_cmd_offset = offset; 5996 llvm::MachO::load_command lc = {}; 5997 if (data.GetU32(&offset, &lc.cmd, 2) == nullptr) 5998 break; 5999 6000 uint32_t version = 0; 6001 if (lc.cmd == llvm::MachO::LC_VERSION_MIN_MACOSX || 6002 lc.cmd == llvm::MachO::LC_VERSION_MIN_IPHONEOS || 6003 lc.cmd == llvm::MachO::LC_VERSION_MIN_TVOS || 6004 lc.cmd == llvm::MachO::LC_VERSION_MIN_WATCHOS) { 6005 // struct version_min_command { 6006 // uint32_t cmd; // LC_VERSION_MIN_* 6007 // uint32_t cmdsize; 6008 // uint32_t version; // X.Y.Z encoded in nibbles xxxx.yy.zz 6009 // uint32_t sdk; 6010 // }; 6011 // We want to read version. 6012 version = data.GetU32(&offset); 6013 } else if (lc.cmd == llvm::MachO::LC_BUILD_VERSION) { 6014 // struct build_version_command { 6015 // uint32_t cmd; // LC_BUILD_VERSION 6016 // uint32_t cmdsize; 6017 // uint32_t platform; 6018 // uint32_t minos; // X.Y.Z encoded in nibbles xxxx.yy.zz 6019 // uint32_t sdk; 6020 // uint32_t ntools; 6021 // }; 6022 // We want to read minos. 6023 offset += sizeof(uint32_t); // Skip over platform 6024 version = data.GetU32(&offset); // Extract minos 6025 } 6026 6027 if (version) { 6028 const uint32_t xxxx = version >> 16; 6029 const uint32_t yy = (version >> 8) & 0xffu; 6030 const uint32_t zz = version & 0xffu; 6031 if (xxxx) 6032 return llvm::VersionTuple(xxxx, yy, zz); 6033 } 6034 offset = load_cmd_offset + lc.cmdsize; 6035 } 6036 return llvm::VersionTuple(); 6037 } 6038 6039 llvm::VersionTuple ObjectFileMachO::GetMinimumOSVersion() { 6040 if (!m_min_os_version) 6041 m_min_os_version = FindMinimumVersionInfo( 6042 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds); 6043 return *m_min_os_version; 6044 } 6045 6046 llvm::VersionTuple ObjectFileMachO::GetSDKVersion() { 6047 if (!m_sdk_versions) 6048 m_sdk_versions = FindMinimumVersionInfo( 6049 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds); 6050 return *m_sdk_versions; 6051 } 6052 6053 bool ObjectFileMachO::GetIsDynamicLinkEditor() { 6054 return m_header.filetype == llvm::MachO::MH_DYLINKER; 6055 } 6056 6057 bool ObjectFileMachO::CanTrustAddressRanges() { 6058 // Dsymutil guarantees that the .debug_aranges accelerator is complete and can 6059 // be trusted by LLDB. 6060 return m_header.filetype == llvm::MachO::MH_DSYM; 6061 } 6062 6063 bool ObjectFileMachO::AllowAssemblyEmulationUnwindPlans() { 6064 return m_allow_assembly_emulation_unwind_plans; 6065 } 6066 6067 Section *ObjectFileMachO::GetMachHeaderSection() { 6068 // Find the first address of the mach header which is the first non-zero file 6069 // sized section whose file offset is zero. This is the base file address of 6070 // the mach-o file which can be subtracted from the vmaddr of the other 6071 // segments found in memory and added to the load address 6072 ModuleSP module_sp = GetModule(); 6073 if (!module_sp) 6074 return nullptr; 6075 SectionList *section_list = GetSectionList(); 6076 if (!section_list) 6077 return nullptr; 6078 6079 // Some binaries can have a TEXT segment with a non-zero file offset. 6080 // Binaries in the shared cache are one example. Some hand-generated 6081 // binaries may not be laid out in the normal TEXT,DATA,LC_SYMTAB order 6082 // in the file, even though they're laid out correctly in vmaddr terms. 6083 SectionSP text_segment_sp = 6084 section_list->FindSectionByName(GetSegmentNameTEXT()); 6085 if (text_segment_sp.get() && SectionIsLoadable(text_segment_sp.get())) 6086 return text_segment_sp.get(); 6087 6088 const size_t num_sections = section_list->GetSize(); 6089 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6090 Section *section = section_list->GetSectionAtIndex(sect_idx).get(); 6091 if (section->GetFileOffset() == 0 && SectionIsLoadable(section)) 6092 return section; 6093 } 6094 6095 return nullptr; 6096 } 6097 6098 bool ObjectFileMachO::SectionIsLoadable(const Section *section) { 6099 if (!section) 6100 return false; 6101 if (section->IsThreadSpecific()) 6102 return false; 6103 if (GetModule().get() != section->GetModule().get()) 6104 return false; 6105 // firmware style binaries with llvm gcov segment do 6106 // not have that segment mapped into memory. 6107 if (section->GetName() == GetSegmentNameLLVM_COV()) { 6108 const Strata strata = GetStrata(); 6109 if (strata == eStrataKernel || strata == eStrataRawImage) 6110 return false; 6111 } 6112 // Be careful with __LINKEDIT and __DWARF segments 6113 if (section->GetName() == GetSegmentNameLINKEDIT() || 6114 section->GetName() == GetSegmentNameDWARF()) { 6115 // Only map __LINKEDIT and __DWARF if we have an in memory image and 6116 // this isn't a kernel binary like a kext or mach_kernel. 6117 const bool is_memory_image = (bool)m_process_wp.lock(); 6118 const Strata strata = GetStrata(); 6119 if (is_memory_image == false || strata == eStrataKernel) 6120 return false; 6121 } 6122 return true; 6123 } 6124 6125 lldb::addr_t ObjectFileMachO::CalculateSectionLoadAddressForMemoryImage( 6126 lldb::addr_t header_load_address, const Section *header_section, 6127 const Section *section) { 6128 ModuleSP module_sp = GetModule(); 6129 if (module_sp && header_section && section && 6130 header_load_address != LLDB_INVALID_ADDRESS) { 6131 lldb::addr_t file_addr = header_section->GetFileAddress(); 6132 if (file_addr != LLDB_INVALID_ADDRESS && SectionIsLoadable(section)) 6133 return section->GetFileAddress() - file_addr + header_load_address; 6134 } 6135 return LLDB_INVALID_ADDRESS; 6136 } 6137 6138 bool ObjectFileMachO::SetLoadAddress(Target &target, lldb::addr_t value, 6139 bool value_is_offset) { 6140 Log *log(GetLog(LLDBLog::DynamicLoader)); 6141 ModuleSP module_sp = GetModule(); 6142 if (!module_sp) 6143 return false; 6144 6145 SectionList *section_list = GetSectionList(); 6146 if (!section_list) 6147 return false; 6148 6149 size_t num_loaded_sections = 0; 6150 const size_t num_sections = section_list->GetSize(); 6151 6152 // Warn if some top-level segments map to the same address. The binary may be 6153 // malformed. 6154 const bool warn_multiple = true; 6155 6156 if (log) { 6157 StreamString logmsg; 6158 logmsg << "ObjectFileMachO::SetLoadAddress "; 6159 if (GetFileSpec()) 6160 logmsg << "path='" << GetFileSpec().GetPath() << "' "; 6161 if (GetUUID()) { 6162 logmsg << "uuid=" << GetUUID().GetAsString(); 6163 } 6164 LLDB_LOGF(log, "%s", logmsg.GetData()); 6165 } 6166 if (value_is_offset) { 6167 // "value" is an offset to apply to each top level segment 6168 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6169 // Iterate through the object file sections to find all of the 6170 // sections that size on disk (to avoid __PAGEZERO) and load them 6171 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 6172 if (SectionIsLoadable(section_sp.get())) { 6173 LLDB_LOGF(log, 6174 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is " 6175 "0x%" PRIx64, 6176 section_sp->GetName().AsCString(), 6177 section_sp->GetFileAddress() + value); 6178 if (target.GetSectionLoadList().SetSectionLoadAddress( 6179 section_sp, section_sp->GetFileAddress() + value, 6180 warn_multiple)) 6181 ++num_loaded_sections; 6182 } 6183 } 6184 } else { 6185 // "value" is the new base address of the mach_header, adjust each 6186 // section accordingly 6187 6188 Section *mach_header_section = GetMachHeaderSection(); 6189 if (mach_header_section) { 6190 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6191 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 6192 6193 lldb::addr_t section_load_addr = 6194 CalculateSectionLoadAddressForMemoryImage( 6195 value, mach_header_section, section_sp.get()); 6196 if (section_load_addr != LLDB_INVALID_ADDRESS) { 6197 LLDB_LOGF(log, 6198 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is " 6199 "0x%" PRIx64, 6200 section_sp->GetName().AsCString(), section_load_addr); 6201 if (target.GetSectionLoadList().SetSectionLoadAddress( 6202 section_sp, section_load_addr, warn_multiple)) 6203 ++num_loaded_sections; 6204 } 6205 } 6206 } 6207 } 6208 return num_loaded_sections > 0; 6209 } 6210 6211 struct all_image_infos_header { 6212 uint32_t version; // currently 1 6213 uint32_t imgcount; // number of binary images 6214 uint64_t entries_fileoff; // file offset in the corefile of where the array of 6215 // struct entry's begin. 6216 uint32_t entries_size; // size of 'struct entry'. 6217 uint32_t unused; 6218 }; 6219 6220 struct image_entry { 6221 uint64_t filepath_offset; // offset in corefile to c-string of the file path, 6222 // UINT64_MAX if unavailable. 6223 uuid_t uuid; // uint8_t[16]. should be set to all zeroes if 6224 // uuid is unknown. 6225 uint64_t load_address; // UINT64_MAX if unknown. 6226 uint64_t seg_addrs_offset; // offset to the array of struct segment_vmaddr's. 6227 uint32_t segment_count; // The number of segments for this binary. 6228 uint32_t unused; 6229 6230 image_entry() { 6231 filepath_offset = UINT64_MAX; 6232 memset(&uuid, 0, sizeof(uuid_t)); 6233 segment_count = 0; 6234 load_address = UINT64_MAX; 6235 seg_addrs_offset = UINT64_MAX; 6236 unused = 0; 6237 } 6238 image_entry(const image_entry &rhs) { 6239 filepath_offset = rhs.filepath_offset; 6240 memcpy(&uuid, &rhs.uuid, sizeof(uuid_t)); 6241 segment_count = rhs.segment_count; 6242 seg_addrs_offset = rhs.seg_addrs_offset; 6243 load_address = rhs.load_address; 6244 unused = rhs.unused; 6245 } 6246 }; 6247 6248 struct segment_vmaddr { 6249 char segname[16]; 6250 uint64_t vmaddr; 6251 uint64_t unused; 6252 6253 segment_vmaddr() { 6254 memset(&segname, 0, 16); 6255 vmaddr = UINT64_MAX; 6256 unused = 0; 6257 } 6258 segment_vmaddr(const segment_vmaddr &rhs) { 6259 memcpy(&segname, &rhs.segname, 16); 6260 vmaddr = rhs.vmaddr; 6261 unused = rhs.unused; 6262 } 6263 }; 6264 6265 // Write the payload for the "all image infos" LC_NOTE into 6266 // the supplied all_image_infos_payload, assuming that this 6267 // will be written into the corefile starting at 6268 // initial_file_offset. 6269 // 6270 // The placement of this payload is a little tricky. We're 6271 // laying this out as 6272 // 6273 // 1. header (struct all_image_info_header) 6274 // 2. Array of fixed-size (struct image_entry)'s, one 6275 // per binary image present in the process. 6276 // 3. Arrays of (struct segment_vmaddr)'s, a varying number 6277 // for each binary image. 6278 // 4. Variable length c-strings of binary image filepaths, 6279 // one per binary. 6280 // 6281 // To compute where everything will be laid out in the 6282 // payload, we need to iterate over the images and calculate 6283 // how many segment_vmaddr structures each image will need, 6284 // and how long each image's filepath c-string is. There 6285 // are some multiple passes over the image list while calculating 6286 // everything. 6287 6288 static offset_t 6289 CreateAllImageInfosPayload(const lldb::ProcessSP &process_sp, 6290 offset_t initial_file_offset, 6291 StreamString &all_image_infos_payload, 6292 lldb_private::SaveCoreOptions &options) { 6293 Target &target = process_sp->GetTarget(); 6294 ModuleList modules = target.GetImages(); 6295 6296 // stack-only corefiles have no reason to include binaries that 6297 // are not executing; we're trying to make the smallest corefile 6298 // we can, so leave the rest out. 6299 if (options.GetStyle() == SaveCoreStyle::eSaveCoreStackOnly) 6300 modules.Clear(); 6301 6302 std::set<std::string> executing_uuids; 6303 std::vector<ThreadSP> thread_list = 6304 process_sp->CalculateCoreFileThreadList(options); 6305 for (const ThreadSP &thread_sp : thread_list) { 6306 uint32_t stack_frame_count = thread_sp->GetStackFrameCount(); 6307 for (uint32_t j = 0; j < stack_frame_count; j++) { 6308 StackFrameSP stack_frame_sp = thread_sp->GetStackFrameAtIndex(j); 6309 Address pc = stack_frame_sp->GetFrameCodeAddress(); 6310 ModuleSP module_sp = pc.GetModule(); 6311 if (module_sp) { 6312 UUID uuid = module_sp->GetUUID(); 6313 if (uuid.IsValid()) { 6314 executing_uuids.insert(uuid.GetAsString()); 6315 modules.AppendIfNeeded(module_sp); 6316 } 6317 } 6318 } 6319 } 6320 size_t modules_count = modules.GetSize(); 6321 6322 struct all_image_infos_header infos; 6323 infos.version = 1; 6324 infos.imgcount = modules_count; 6325 infos.entries_size = sizeof(image_entry); 6326 infos.entries_fileoff = initial_file_offset + sizeof(all_image_infos_header); 6327 infos.unused = 0; 6328 6329 all_image_infos_payload.PutHex32(infos.version); 6330 all_image_infos_payload.PutHex32(infos.imgcount); 6331 all_image_infos_payload.PutHex64(infos.entries_fileoff); 6332 all_image_infos_payload.PutHex32(infos.entries_size); 6333 all_image_infos_payload.PutHex32(infos.unused); 6334 6335 // First create the structures for all of the segment name+vmaddr vectors 6336 // for each module, so we will know the size of them as we add the 6337 // module entries. 6338 std::vector<std::vector<segment_vmaddr>> modules_segment_vmaddrs; 6339 for (size_t i = 0; i < modules_count; i++) { 6340 ModuleSP module = modules.GetModuleAtIndex(i); 6341 6342 SectionList *sections = module->GetSectionList(); 6343 size_t sections_count = sections->GetSize(); 6344 std::vector<segment_vmaddr> segment_vmaddrs; 6345 for (size_t j = 0; j < sections_count; j++) { 6346 SectionSP section = sections->GetSectionAtIndex(j); 6347 if (!section->GetParent().get()) { 6348 addr_t vmaddr = section->GetLoadBaseAddress(&target); 6349 if (vmaddr == LLDB_INVALID_ADDRESS) 6350 continue; 6351 ConstString name = section->GetName(); 6352 segment_vmaddr seg_vmaddr; 6353 // This is the uncommon case where strncpy is exactly 6354 // the right one, doesn't need to be nul terminated. 6355 // The segment name in a Mach-O LC_SEGMENT/LC_SEGMENT_64 is char[16] and 6356 // is not guaranteed to be nul-terminated if all 16 characters are 6357 // used. 6358 // coverity[buffer_size_warning] 6359 strncpy(seg_vmaddr.segname, name.AsCString(), 6360 sizeof(seg_vmaddr.segname)); 6361 seg_vmaddr.vmaddr = vmaddr; 6362 seg_vmaddr.unused = 0; 6363 segment_vmaddrs.push_back(seg_vmaddr); 6364 } 6365 } 6366 modules_segment_vmaddrs.push_back(segment_vmaddrs); 6367 } 6368 6369 offset_t size_of_vmaddr_structs = 0; 6370 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) { 6371 size_of_vmaddr_structs += 6372 modules_segment_vmaddrs[i].size() * sizeof(segment_vmaddr); 6373 } 6374 6375 offset_t size_of_filepath_cstrings = 0; 6376 for (size_t i = 0; i < modules_count; i++) { 6377 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6378 size_of_filepath_cstrings += module_sp->GetFileSpec().GetPath().size() + 1; 6379 } 6380 6381 // Calculate the file offsets of our "all image infos" payload in the 6382 // corefile. initial_file_offset the original value passed in to this method. 6383 6384 offset_t start_of_entries = 6385 initial_file_offset + sizeof(all_image_infos_header); 6386 offset_t start_of_seg_vmaddrs = 6387 start_of_entries + sizeof(image_entry) * modules_count; 6388 offset_t start_of_filenames = start_of_seg_vmaddrs + size_of_vmaddr_structs; 6389 6390 offset_t final_file_offset = start_of_filenames + size_of_filepath_cstrings; 6391 6392 // Now write the one-per-module 'struct image_entry' into the 6393 // StringStream; keep track of where the struct segment_vmaddr 6394 // entries for each module will end up in the corefile. 6395 6396 offset_t current_string_offset = start_of_filenames; 6397 offset_t current_segaddrs_offset = start_of_seg_vmaddrs; 6398 std::vector<struct image_entry> image_entries; 6399 for (size_t i = 0; i < modules_count; i++) { 6400 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6401 6402 struct image_entry ent; 6403 memcpy(&ent.uuid, module_sp->GetUUID().GetBytes().data(), sizeof(ent.uuid)); 6404 if (modules_segment_vmaddrs[i].size() > 0) { 6405 ent.segment_count = modules_segment_vmaddrs[i].size(); 6406 ent.seg_addrs_offset = current_segaddrs_offset; 6407 } 6408 ent.filepath_offset = current_string_offset; 6409 ObjectFile *objfile = module_sp->GetObjectFile(); 6410 if (objfile) { 6411 Address base_addr(objfile->GetBaseAddress()); 6412 if (base_addr.IsValid()) { 6413 ent.load_address = base_addr.GetLoadAddress(&target); 6414 } 6415 } 6416 6417 all_image_infos_payload.PutHex64(ent.filepath_offset); 6418 all_image_infos_payload.PutRawBytes(ent.uuid, sizeof(ent.uuid)); 6419 all_image_infos_payload.PutHex64(ent.load_address); 6420 all_image_infos_payload.PutHex64(ent.seg_addrs_offset); 6421 all_image_infos_payload.PutHex32(ent.segment_count); 6422 6423 if (executing_uuids.find(module_sp->GetUUID().GetAsString()) != 6424 executing_uuids.end()) 6425 all_image_infos_payload.PutHex32(1); 6426 else 6427 all_image_infos_payload.PutHex32(0); 6428 6429 current_segaddrs_offset += ent.segment_count * sizeof(segment_vmaddr); 6430 current_string_offset += module_sp->GetFileSpec().GetPath().size() + 1; 6431 } 6432 6433 // Now write the struct segment_vmaddr entries into the StringStream. 6434 6435 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) { 6436 if (modules_segment_vmaddrs[i].size() == 0) 6437 continue; 6438 for (struct segment_vmaddr segvm : modules_segment_vmaddrs[i]) { 6439 all_image_infos_payload.PutRawBytes(segvm.segname, sizeof(segvm.segname)); 6440 all_image_infos_payload.PutHex64(segvm.vmaddr); 6441 all_image_infos_payload.PutHex64(segvm.unused); 6442 } 6443 } 6444 6445 for (size_t i = 0; i < modules_count; i++) { 6446 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6447 std::string filepath = module_sp->GetFileSpec().GetPath(); 6448 all_image_infos_payload.PutRawBytes(filepath.data(), filepath.size() + 1); 6449 } 6450 6451 return final_file_offset; 6452 } 6453 6454 // Temp struct used to combine contiguous memory regions with 6455 // identical permissions. 6456 struct page_object { 6457 addr_t addr; 6458 addr_t size; 6459 uint32_t prot; 6460 }; 6461 6462 bool ObjectFileMachO::SaveCore(const lldb::ProcessSP &process_sp, 6463 lldb_private::SaveCoreOptions &options, 6464 Status &error) { 6465 // The FileSpec and Process are already checked in PluginManager::SaveCore. 6466 assert(options.GetOutputFile().has_value()); 6467 assert(process_sp); 6468 const FileSpec outfile = options.GetOutputFile().value(); 6469 6470 // MachO defaults to dirty pages 6471 if (options.GetStyle() == SaveCoreStyle::eSaveCoreUnspecified) 6472 options.SetStyle(eSaveCoreDirtyOnly); 6473 6474 Target &target = process_sp->GetTarget(); 6475 const ArchSpec target_arch = target.GetArchitecture(); 6476 const llvm::Triple &target_triple = target_arch.GetTriple(); 6477 if (target_triple.getVendor() == llvm::Triple::Apple && 6478 (target_triple.getOS() == llvm::Triple::MacOSX || 6479 target_triple.getOS() == llvm::Triple::IOS || 6480 target_triple.getOS() == llvm::Triple::WatchOS || 6481 target_triple.getOS() == llvm::Triple::TvOS || 6482 target_triple.getOS() == llvm::Triple::XROS)) { 6483 // NEED_BRIDGEOS_TRIPLE target_triple.getOS() == llvm::Triple::BridgeOS)) 6484 // { 6485 bool make_core = false; 6486 switch (target_arch.GetMachine()) { 6487 case llvm::Triple::aarch64: 6488 case llvm::Triple::aarch64_32: 6489 case llvm::Triple::arm: 6490 case llvm::Triple::thumb: 6491 case llvm::Triple::x86: 6492 case llvm::Triple::x86_64: 6493 make_core = true; 6494 break; 6495 default: 6496 error = Status::FromErrorStringWithFormat( 6497 "unsupported core architecture: %s", target_triple.str().c_str()); 6498 break; 6499 } 6500 6501 if (make_core) { 6502 CoreFileMemoryRanges core_ranges; 6503 error = process_sp->CalculateCoreFileSaveRanges(options, core_ranges); 6504 if (error.Success()) { 6505 const uint32_t addr_byte_size = target_arch.GetAddressByteSize(); 6506 const ByteOrder byte_order = target_arch.GetByteOrder(); 6507 std::vector<llvm::MachO::segment_command_64> segment_load_commands; 6508 for (const auto &core_range_info : core_ranges) { 6509 // TODO: Refactor RangeDataVector to have a data iterator. 6510 const auto &core_range = core_range_info.data; 6511 uint32_t cmd_type = LC_SEGMENT_64; 6512 uint32_t segment_size = sizeof(llvm::MachO::segment_command_64); 6513 if (addr_byte_size == 4) { 6514 cmd_type = LC_SEGMENT; 6515 segment_size = sizeof(llvm::MachO::segment_command); 6516 } 6517 // Skip any ranges with no read/write/execute permissions and empty 6518 // ranges. 6519 if (core_range.lldb_permissions == 0 || core_range.range.size() == 0) 6520 continue; 6521 uint32_t vm_prot = 0; 6522 if (core_range.lldb_permissions & ePermissionsReadable) 6523 vm_prot |= VM_PROT_READ; 6524 if (core_range.lldb_permissions & ePermissionsWritable) 6525 vm_prot |= VM_PROT_WRITE; 6526 if (core_range.lldb_permissions & ePermissionsExecutable) 6527 vm_prot |= VM_PROT_EXECUTE; 6528 const addr_t vm_addr = core_range.range.start(); 6529 const addr_t vm_size = core_range.range.size(); 6530 llvm::MachO::segment_command_64 segment = { 6531 cmd_type, // uint32_t cmd; 6532 segment_size, // uint32_t cmdsize; 6533 {0}, // char segname[16]; 6534 vm_addr, // uint64_t vmaddr; // uint32_t for 32-bit Mach-O 6535 vm_size, // uint64_t vmsize; // uint32_t for 32-bit Mach-O 6536 0, // uint64_t fileoff; // uint32_t for 32-bit Mach-O 6537 vm_size, // uint64_t filesize; // uint32_t for 32-bit Mach-O 6538 vm_prot, // uint32_t maxprot; 6539 vm_prot, // uint32_t initprot; 6540 0, // uint32_t nsects; 6541 0}; // uint32_t flags; 6542 segment_load_commands.push_back(segment); 6543 } 6544 6545 StreamString buffer(Stream::eBinary, addr_byte_size, byte_order); 6546 6547 llvm::MachO::mach_header_64 mach_header; 6548 mach_header.magic = addr_byte_size == 8 ? MH_MAGIC_64 : MH_MAGIC; 6549 mach_header.cputype = target_arch.GetMachOCPUType(); 6550 mach_header.cpusubtype = target_arch.GetMachOCPUSubType(); 6551 mach_header.filetype = MH_CORE; 6552 mach_header.ncmds = segment_load_commands.size(); 6553 mach_header.flags = 0; 6554 mach_header.reserved = 0; 6555 ThreadList &thread_list = process_sp->GetThreadList(); 6556 const uint32_t num_threads = thread_list.GetSize(); 6557 6558 // Make an array of LC_THREAD data items. Each one contains the 6559 // contents of the LC_THREAD load command. The data doesn't contain 6560 // the load command + load command size, we will add the load command 6561 // and load command size as we emit the data. 6562 std::vector<StreamString> LC_THREAD_datas(num_threads); 6563 for (auto &LC_THREAD_data : LC_THREAD_datas) { 6564 LC_THREAD_data.GetFlags().Set(Stream::eBinary); 6565 LC_THREAD_data.SetAddressByteSize(addr_byte_size); 6566 LC_THREAD_data.SetByteOrder(byte_order); 6567 } 6568 for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { 6569 ThreadSP thread_sp(thread_list.GetThreadAtIndex(thread_idx)); 6570 if (thread_sp) { 6571 switch (mach_header.cputype) { 6572 case llvm::MachO::CPU_TYPE_ARM64: 6573 case llvm::MachO::CPU_TYPE_ARM64_32: 6574 RegisterContextDarwin_arm64_Mach::Create_LC_THREAD( 6575 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6576 break; 6577 6578 case llvm::MachO::CPU_TYPE_ARM: 6579 RegisterContextDarwin_arm_Mach::Create_LC_THREAD( 6580 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6581 break; 6582 6583 case llvm::MachO::CPU_TYPE_I386: 6584 RegisterContextDarwin_i386_Mach::Create_LC_THREAD( 6585 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6586 break; 6587 6588 case llvm::MachO::CPU_TYPE_X86_64: 6589 RegisterContextDarwin_x86_64_Mach::Create_LC_THREAD( 6590 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6591 break; 6592 } 6593 } 6594 } 6595 6596 // The size of the load command is the size of the segments... 6597 if (addr_byte_size == 8) { 6598 mach_header.sizeofcmds = segment_load_commands.size() * 6599 sizeof(llvm::MachO::segment_command_64); 6600 } else { 6601 mach_header.sizeofcmds = segment_load_commands.size() * 6602 sizeof(llvm::MachO::segment_command); 6603 } 6604 6605 // and the size of all LC_THREAD load command 6606 for (const auto &LC_THREAD_data : LC_THREAD_datas) { 6607 ++mach_header.ncmds; 6608 mach_header.sizeofcmds += 8 + LC_THREAD_data.GetSize(); 6609 } 6610 6611 // Bits will be set to indicate which bits are NOT used in 6612 // addressing in this process or 0 for unknown. 6613 uint64_t address_mask = process_sp->GetCodeAddressMask(); 6614 if (address_mask != LLDB_INVALID_ADDRESS_MASK) { 6615 // LC_NOTE "addrable bits" 6616 mach_header.ncmds++; 6617 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6618 } 6619 6620 // LC_NOTE "process metadata" 6621 mach_header.ncmds++; 6622 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6623 6624 // LC_NOTE "all image infos" 6625 mach_header.ncmds++; 6626 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6627 6628 // Write the mach header 6629 buffer.PutHex32(mach_header.magic); 6630 buffer.PutHex32(mach_header.cputype); 6631 buffer.PutHex32(mach_header.cpusubtype); 6632 buffer.PutHex32(mach_header.filetype); 6633 buffer.PutHex32(mach_header.ncmds); 6634 buffer.PutHex32(mach_header.sizeofcmds); 6635 buffer.PutHex32(mach_header.flags); 6636 if (addr_byte_size == 8) { 6637 buffer.PutHex32(mach_header.reserved); 6638 } 6639 6640 // Skip the mach header and all load commands and align to the next 6641 // 0x1000 byte boundary 6642 addr_t file_offset = buffer.GetSize() + mach_header.sizeofcmds; 6643 6644 file_offset = llvm::alignTo(file_offset, 16); 6645 std::vector<std::unique_ptr<LCNoteEntry>> lc_notes; 6646 6647 // Add "addrable bits" LC_NOTE when an address mask is available 6648 if (address_mask != LLDB_INVALID_ADDRESS_MASK) { 6649 std::unique_ptr<LCNoteEntry> addrable_bits_lcnote_up( 6650 new LCNoteEntry(addr_byte_size, byte_order)); 6651 addrable_bits_lcnote_up->name = "addrable bits"; 6652 addrable_bits_lcnote_up->payload_file_offset = file_offset; 6653 int bits = std::bitset<64>(~address_mask).count(); 6654 addrable_bits_lcnote_up->payload.PutHex32(4); // version 6655 addrable_bits_lcnote_up->payload.PutHex32( 6656 bits); // # of bits used for low addresses 6657 addrable_bits_lcnote_up->payload.PutHex32( 6658 bits); // # of bits used for high addresses 6659 addrable_bits_lcnote_up->payload.PutHex32(0); // reserved 6660 6661 file_offset += addrable_bits_lcnote_up->payload.GetSize(); 6662 6663 lc_notes.push_back(std::move(addrable_bits_lcnote_up)); 6664 } 6665 6666 // Add "process metadata" LC_NOTE 6667 std::unique_ptr<LCNoteEntry> thread_extrainfo_lcnote_up( 6668 new LCNoteEntry(addr_byte_size, byte_order)); 6669 thread_extrainfo_lcnote_up->name = "process metadata"; 6670 thread_extrainfo_lcnote_up->payload_file_offset = file_offset; 6671 6672 StructuredData::DictionarySP dict( 6673 std::make_shared<StructuredData::Dictionary>()); 6674 StructuredData::ArraySP threads( 6675 std::make_shared<StructuredData::Array>()); 6676 for (const ThreadSP &thread_sp : 6677 process_sp->CalculateCoreFileThreadList(options)) { 6678 StructuredData::DictionarySP thread( 6679 std::make_shared<StructuredData::Dictionary>()); 6680 thread->AddIntegerItem("thread_id", thread_sp->GetID()); 6681 threads->AddItem(thread); 6682 } 6683 dict->AddItem("threads", threads); 6684 StreamString strm; 6685 dict->Dump(strm, /* pretty */ false); 6686 thread_extrainfo_lcnote_up->payload.PutRawBytes(strm.GetData(), 6687 strm.GetSize()); 6688 6689 file_offset += thread_extrainfo_lcnote_up->payload.GetSize(); 6690 file_offset = llvm::alignTo(file_offset, 16); 6691 lc_notes.push_back(std::move(thread_extrainfo_lcnote_up)); 6692 6693 // Add "all image infos" LC_NOTE 6694 std::unique_ptr<LCNoteEntry> all_image_infos_lcnote_up( 6695 new LCNoteEntry(addr_byte_size, byte_order)); 6696 all_image_infos_lcnote_up->name = "all image infos"; 6697 all_image_infos_lcnote_up->payload_file_offset = file_offset; 6698 file_offset = CreateAllImageInfosPayload( 6699 process_sp, file_offset, all_image_infos_lcnote_up->payload, 6700 options); 6701 lc_notes.push_back(std::move(all_image_infos_lcnote_up)); 6702 6703 // Add LC_NOTE load commands 6704 for (auto &lcnote : lc_notes) { 6705 // Add the LC_NOTE load command to the file. 6706 buffer.PutHex32(LC_NOTE); 6707 buffer.PutHex32(sizeof(llvm::MachO::note_command)); 6708 char namebuf[16]; 6709 memset(namebuf, 0, sizeof(namebuf)); 6710 // This is the uncommon case where strncpy is exactly 6711 // the right one, doesn't need to be nul terminated. 6712 // LC_NOTE name field is char[16] and is not guaranteed to be 6713 // nul-terminated. 6714 // coverity[buffer_size_warning] 6715 strncpy(namebuf, lcnote->name.c_str(), sizeof(namebuf)); 6716 buffer.PutRawBytes(namebuf, sizeof(namebuf)); 6717 buffer.PutHex64(lcnote->payload_file_offset); 6718 buffer.PutHex64(lcnote->payload.GetSize()); 6719 } 6720 6721 // Align to 4096-byte page boundary for the LC_SEGMENTs. 6722 file_offset = llvm::alignTo(file_offset, 4096); 6723 6724 for (auto &segment : segment_load_commands) { 6725 segment.fileoff = file_offset; 6726 file_offset += segment.filesize; 6727 } 6728 6729 // Write out all of the LC_THREAD load commands 6730 for (const auto &LC_THREAD_data : LC_THREAD_datas) { 6731 const size_t LC_THREAD_data_size = LC_THREAD_data.GetSize(); 6732 buffer.PutHex32(LC_THREAD); 6733 buffer.PutHex32(8 + LC_THREAD_data_size); // cmd + cmdsize + data 6734 buffer.Write(LC_THREAD_data.GetString().data(), LC_THREAD_data_size); 6735 } 6736 6737 // Write out all of the segment load commands 6738 for (const auto &segment : segment_load_commands) { 6739 buffer.PutHex32(segment.cmd); 6740 buffer.PutHex32(segment.cmdsize); 6741 buffer.PutRawBytes(segment.segname, sizeof(segment.segname)); 6742 if (addr_byte_size == 8) { 6743 buffer.PutHex64(segment.vmaddr); 6744 buffer.PutHex64(segment.vmsize); 6745 buffer.PutHex64(segment.fileoff); 6746 buffer.PutHex64(segment.filesize); 6747 } else { 6748 buffer.PutHex32(static_cast<uint32_t>(segment.vmaddr)); 6749 buffer.PutHex32(static_cast<uint32_t>(segment.vmsize)); 6750 buffer.PutHex32(static_cast<uint32_t>(segment.fileoff)); 6751 buffer.PutHex32(static_cast<uint32_t>(segment.filesize)); 6752 } 6753 buffer.PutHex32(segment.maxprot); 6754 buffer.PutHex32(segment.initprot); 6755 buffer.PutHex32(segment.nsects); 6756 buffer.PutHex32(segment.flags); 6757 } 6758 6759 std::string core_file_path(outfile.GetPath()); 6760 auto core_file = FileSystem::Instance().Open( 6761 outfile, File::eOpenOptionWriteOnly | File::eOpenOptionTruncate | 6762 File::eOpenOptionCanCreate); 6763 if (!core_file) { 6764 error = Status::FromError(core_file.takeError()); 6765 } else { 6766 // Read 1 page at a time 6767 uint8_t bytes[0x1000]; 6768 // Write the mach header and load commands out to the core file 6769 size_t bytes_written = buffer.GetString().size(); 6770 error = 6771 core_file.get()->Write(buffer.GetString().data(), bytes_written); 6772 if (error.Success()) { 6773 6774 for (auto &lcnote : lc_notes) { 6775 if (core_file.get()->SeekFromStart(lcnote->payload_file_offset) == 6776 -1) { 6777 error = Status::FromErrorStringWithFormat( 6778 "Unable to seek to corefile pos " 6779 "to write '%s' LC_NOTE payload", 6780 lcnote->name.c_str()); 6781 return false; 6782 } 6783 bytes_written = lcnote->payload.GetSize(); 6784 error = core_file.get()->Write(lcnote->payload.GetData(), 6785 bytes_written); 6786 if (!error.Success()) 6787 return false; 6788 } 6789 6790 // Now write the file data for all memory segments in the process 6791 for (const auto &segment : segment_load_commands) { 6792 if (core_file.get()->SeekFromStart(segment.fileoff) == -1) { 6793 error = Status::FromErrorStringWithFormat( 6794 "unable to seek to offset 0x%" PRIx64 " in '%s'", 6795 segment.fileoff, core_file_path.c_str()); 6796 break; 6797 } 6798 6799 target.GetDebugger().GetAsyncOutputStream()->Printf( 6800 "Saving %" PRId64 6801 " bytes of data for memory region at 0x%" PRIx64 "\n", 6802 segment.vmsize, segment.vmaddr); 6803 addr_t bytes_left = segment.vmsize; 6804 addr_t addr = segment.vmaddr; 6805 Status memory_read_error; 6806 while (bytes_left > 0 && error.Success()) { 6807 const size_t bytes_to_read = 6808 bytes_left > sizeof(bytes) ? sizeof(bytes) : bytes_left; 6809 6810 // In a savecore setting, we don't really care about caching, 6811 // as the data is dumped and very likely never read again, 6812 // so we call ReadMemoryFromInferior to bypass it. 6813 const size_t bytes_read = process_sp->ReadMemoryFromInferior( 6814 addr, bytes, bytes_to_read, memory_read_error); 6815 6816 if (bytes_read == bytes_to_read) { 6817 size_t bytes_written = bytes_read; 6818 error = core_file.get()->Write(bytes, bytes_written); 6819 bytes_left -= bytes_read; 6820 addr += bytes_read; 6821 } else { 6822 // Some pages within regions are not readable, those should 6823 // be zero filled 6824 memset(bytes, 0, bytes_to_read); 6825 size_t bytes_written = bytes_to_read; 6826 error = core_file.get()->Write(bytes, bytes_written); 6827 bytes_left -= bytes_to_read; 6828 addr += bytes_to_read; 6829 } 6830 } 6831 } 6832 } 6833 } 6834 } 6835 } 6836 return true; // This is the right plug to handle saving core files for 6837 // this process 6838 } 6839 return false; 6840 } 6841 6842 ObjectFileMachO::MachOCorefileAllImageInfos 6843 ObjectFileMachO::GetCorefileAllImageInfos() { 6844 MachOCorefileAllImageInfos image_infos; 6845 Log *log(GetLog(LLDBLog::Object | LLDBLog::Symbols | LLDBLog::Process | 6846 LLDBLog::DynamicLoader)); 6847 6848 auto lc_notes = FindLC_NOTEByName("all image infos"); 6849 for (auto lc_note : lc_notes) { 6850 offset_t payload_offset = std::get<0>(lc_note); 6851 // Read the struct all_image_infos_header. 6852 uint32_t version = m_data.GetU32(&payload_offset); 6853 if (version != 1) { 6854 return image_infos; 6855 } 6856 uint32_t imgcount = m_data.GetU32(&payload_offset); 6857 uint64_t entries_fileoff = m_data.GetU64(&payload_offset); 6858 // 'entries_size' is not used, nor is the 'unused' entry. 6859 // offset += 4; // uint32_t entries_size; 6860 // offset += 4; // uint32_t unused; 6861 6862 LLDB_LOGF(log, "LC_NOTE 'all image infos' found version %d with %d images", 6863 version, imgcount); 6864 payload_offset = entries_fileoff; 6865 for (uint32_t i = 0; i < imgcount; i++) { 6866 // Read the struct image_entry. 6867 offset_t filepath_offset = m_data.GetU64(&payload_offset); 6868 uuid_t uuid; 6869 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)), 6870 sizeof(uuid_t)); 6871 uint64_t load_address = m_data.GetU64(&payload_offset); 6872 offset_t seg_addrs_offset = m_data.GetU64(&payload_offset); 6873 uint32_t segment_count = m_data.GetU32(&payload_offset); 6874 uint32_t currently_executing = m_data.GetU32(&payload_offset); 6875 6876 MachOCorefileImageEntry image_entry; 6877 image_entry.filename = (const char *)m_data.GetCStr(&filepath_offset); 6878 image_entry.uuid = UUID(uuid, sizeof(uuid_t)); 6879 image_entry.load_address = load_address; 6880 image_entry.currently_executing = currently_executing; 6881 6882 offset_t seg_vmaddrs_offset = seg_addrs_offset; 6883 for (uint32_t j = 0; j < segment_count; j++) { 6884 char segname[17]; 6885 m_data.CopyData(seg_vmaddrs_offset, 16, segname); 6886 segname[16] = '\0'; 6887 seg_vmaddrs_offset += 16; 6888 uint64_t vmaddr = m_data.GetU64(&seg_vmaddrs_offset); 6889 seg_vmaddrs_offset += 8; /* unused */ 6890 6891 std::tuple<ConstString, addr_t> new_seg{ConstString(segname), vmaddr}; 6892 image_entry.segment_load_addresses.push_back(new_seg); 6893 } 6894 LLDB_LOGF(log, " image entry: %s %s 0x%" PRIx64 " %s", 6895 image_entry.filename.c_str(), 6896 image_entry.uuid.GetAsString().c_str(), 6897 image_entry.load_address, 6898 image_entry.currently_executing ? "currently executing" 6899 : "not currently executing"); 6900 image_infos.all_image_infos.push_back(image_entry); 6901 } 6902 } 6903 6904 lc_notes = FindLC_NOTEByName("load binary"); 6905 for (auto lc_note : lc_notes) { 6906 offset_t payload_offset = std::get<0>(lc_note); 6907 uint32_t version = m_data.GetU32(&payload_offset); 6908 if (version == 1) { 6909 uuid_t uuid; 6910 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)), 6911 sizeof(uuid_t)); 6912 uint64_t load_address = m_data.GetU64(&payload_offset); 6913 uint64_t slide = m_data.GetU64(&payload_offset); 6914 std::string filename = m_data.GetCStr(&payload_offset); 6915 6916 MachOCorefileImageEntry image_entry; 6917 image_entry.filename = filename; 6918 image_entry.uuid = UUID(uuid, sizeof(uuid_t)); 6919 image_entry.load_address = load_address; 6920 image_entry.slide = slide; 6921 image_entry.currently_executing = true; 6922 image_infos.all_image_infos.push_back(image_entry); 6923 LLDB_LOGF(log, 6924 "LC_NOTE 'load binary' found, filename %s uuid %s load " 6925 "address 0x%" PRIx64 " slide 0x%" PRIx64, 6926 filename.c_str(), 6927 image_entry.uuid.IsValid() 6928 ? image_entry.uuid.GetAsString().c_str() 6929 : "00000000-0000-0000-0000-000000000000", 6930 load_address, slide); 6931 } 6932 } 6933 6934 return image_infos; 6935 } 6936 6937 bool ObjectFileMachO::LoadCoreFileImages(lldb_private::Process &process) { 6938 MachOCorefileAllImageInfos image_infos = GetCorefileAllImageInfos(); 6939 Log *log = GetLog(LLDBLog::Object | LLDBLog::DynamicLoader); 6940 Status error; 6941 6942 bool found_platform_binary = false; 6943 ModuleList added_modules; 6944 for (MachOCorefileImageEntry &image : image_infos.all_image_infos) { 6945 ModuleSP module_sp, local_filesystem_module_sp; 6946 6947 // If this is a platform binary, it has been loaded (or registered with 6948 // the DynamicLoader to be loaded), we don't need to do any further 6949 // processing. We're not going to call ModulesDidLoad on this in this 6950 // method, so notify==true. 6951 if (process.GetTarget() 6952 .GetDebugger() 6953 .GetPlatformList() 6954 .LoadPlatformBinaryAndSetup(&process, image.load_address, 6955 true /* notify */)) { 6956 LLDB_LOGF(log, 6957 "ObjectFileMachO::%s binary at 0x%" PRIx64 6958 " is a platform binary, has been handled by a Platform plugin.", 6959 __FUNCTION__, image.load_address); 6960 continue; 6961 } 6962 6963 bool value_is_offset = image.load_address == LLDB_INVALID_ADDRESS; 6964 uint64_t value = value_is_offset ? image.slide : image.load_address; 6965 if (value_is_offset && value == LLDB_INVALID_ADDRESS) { 6966 // We have neither address nor slide; so we will find the binary 6967 // by UUID and load it at slide/offset 0. 6968 value = 0; 6969 } 6970 6971 // We have either a UUID, or we have a load address which 6972 // and can try to read load commands and find a UUID. 6973 if (image.uuid.IsValid() || 6974 (!value_is_offset && value != LLDB_INVALID_ADDRESS)) { 6975 const bool set_load_address = image.segment_load_addresses.size() == 0; 6976 const bool notify = false; 6977 // Userland Darwin binaries will have segment load addresses via 6978 // the `all image infos` LC_NOTE. 6979 const bool allow_memory_image_last_resort = 6980 image.segment_load_addresses.size(); 6981 module_sp = DynamicLoader::LoadBinaryWithUUIDAndAddress( 6982 &process, image.filename, image.uuid, value, value_is_offset, 6983 image.currently_executing, notify, set_load_address, 6984 allow_memory_image_last_resort); 6985 } 6986 6987 // We have a ModuleSP to load in the Target. Load it at the 6988 // correct address/slide and notify/load scripting resources. 6989 if (module_sp) { 6990 added_modules.Append(module_sp, false /* notify */); 6991 6992 // We have a list of segment load address 6993 if (image.segment_load_addresses.size() > 0) { 6994 if (log) { 6995 std::string uuidstr = image.uuid.GetAsString(); 6996 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' " 6997 "UUID %s with section load addresses", 6998 module_sp->GetFileSpec().GetPath().c_str(), 6999 uuidstr.c_str()); 7000 } 7001 for (auto name_vmaddr_tuple : image.segment_load_addresses) { 7002 SectionList *sectlist = module_sp->GetObjectFile()->GetSectionList(); 7003 if (sectlist) { 7004 SectionSP sect_sp = 7005 sectlist->FindSectionByName(std::get<0>(name_vmaddr_tuple)); 7006 if (sect_sp) { 7007 process.GetTarget().SetSectionLoadAddress( 7008 sect_sp, std::get<1>(name_vmaddr_tuple)); 7009 } 7010 } 7011 } 7012 } else { 7013 if (log) { 7014 std::string uuidstr = image.uuid.GetAsString(); 7015 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' " 7016 "UUID %s with %s 0x%" PRIx64, 7017 module_sp->GetFileSpec().GetPath().c_str(), 7018 uuidstr.c_str(), 7019 value_is_offset ? "slide" : "load address", value); 7020 } 7021 bool changed; 7022 module_sp->SetLoadAddress(process.GetTarget(), value, value_is_offset, 7023 changed); 7024 } 7025 } 7026 } 7027 if (added_modules.GetSize() > 0) { 7028 process.GetTarget().ModulesDidLoad(added_modules); 7029 process.Flush(); 7030 return true; 7031 } 7032 // Return true if the only binary we found was the platform binary, 7033 // and it was loaded outside the scope of this method. 7034 if (found_platform_binary) 7035 return true; 7036 7037 // No binaries. 7038 return false; 7039 } 7040