1 //===-- ObjectFileMachO.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/ScopeExit.h" 10 #include "llvm/ADT/StringRef.h" 11 12 #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h" 13 #include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h" 14 #include "Plugins/Process/Utility/RegisterContextDarwin_i386.h" 15 #include "Plugins/Process/Utility/RegisterContextDarwin_x86_64.h" 16 #include "lldb/Core/Debugger.h" 17 #include "lldb/Core/Module.h" 18 #include "lldb/Core/ModuleSpec.h" 19 #include "lldb/Core/PluginManager.h" 20 #include "lldb/Core/Progress.h" 21 #include "lldb/Core/Section.h" 22 #include "lldb/Host/Host.h" 23 #include "lldb/Symbol/DWARFCallFrameInfo.h" 24 #include "lldb/Symbol/ObjectFile.h" 25 #include "lldb/Target/DynamicLoader.h" 26 #include "lldb/Target/MemoryRegionInfo.h" 27 #include "lldb/Target/Platform.h" 28 #include "lldb/Target/Process.h" 29 #include "lldb/Target/SectionLoadList.h" 30 #include "lldb/Target/Target.h" 31 #include "lldb/Target/Thread.h" 32 #include "lldb/Target/ThreadList.h" 33 #include "lldb/Utility/ArchSpec.h" 34 #include "lldb/Utility/DataBuffer.h" 35 #include "lldb/Utility/FileSpec.h" 36 #include "lldb/Utility/FileSpecList.h" 37 #include "lldb/Utility/LLDBLog.h" 38 #include "lldb/Utility/Log.h" 39 #include "lldb/Utility/RangeMap.h" 40 #include "lldb/Utility/RegisterValue.h" 41 #include "lldb/Utility/Status.h" 42 #include "lldb/Utility/StreamString.h" 43 #include "lldb/Utility/Timer.h" 44 #include "lldb/Utility/UUID.h" 45 46 #include "lldb/Host/SafeMachO.h" 47 48 #include "llvm/ADT/DenseSet.h" 49 #include "llvm/Support/FormatVariadic.h" 50 #include "llvm/Support/MemoryBuffer.h" 51 52 #include "ObjectFileMachO.h" 53 54 #if defined(__APPLE__) 55 #include <TargetConditionals.h> 56 // GetLLDBSharedCacheUUID() needs to call dlsym() 57 #include <dlfcn.h> 58 #include <mach/mach_init.h> 59 #include <mach/vm_map.h> 60 #include <lldb/Host/SafeMachO.h> 61 #endif 62 63 #ifndef __APPLE__ 64 #include "lldb/Utility/AppleUuidCompatibility.h" 65 #else 66 #include <uuid/uuid.h> 67 #endif 68 69 #include <bitset> 70 #include <memory> 71 #include <optional> 72 73 // Unfortunately the signpost header pulls in the system MachO header, too. 74 #ifdef CPU_TYPE_ARM 75 #undef CPU_TYPE_ARM 76 #endif 77 #ifdef CPU_TYPE_ARM64 78 #undef CPU_TYPE_ARM64 79 #endif 80 #ifdef CPU_TYPE_ARM64_32 81 #undef CPU_TYPE_ARM64_32 82 #endif 83 #ifdef CPU_TYPE_I386 84 #undef CPU_TYPE_I386 85 #endif 86 #ifdef CPU_TYPE_X86_64 87 #undef CPU_TYPE_X86_64 88 #endif 89 #ifdef MH_DYLINKER 90 #undef MH_DYLINKER 91 #endif 92 #ifdef MH_OBJECT 93 #undef MH_OBJECT 94 #endif 95 #ifdef LC_VERSION_MIN_MACOSX 96 #undef LC_VERSION_MIN_MACOSX 97 #endif 98 #ifdef LC_VERSION_MIN_IPHONEOS 99 #undef LC_VERSION_MIN_IPHONEOS 100 #endif 101 #ifdef LC_VERSION_MIN_TVOS 102 #undef LC_VERSION_MIN_TVOS 103 #endif 104 #ifdef LC_VERSION_MIN_WATCHOS 105 #undef LC_VERSION_MIN_WATCHOS 106 #endif 107 #ifdef LC_BUILD_VERSION 108 #undef LC_BUILD_VERSION 109 #endif 110 #ifdef PLATFORM_MACOS 111 #undef PLATFORM_MACOS 112 #endif 113 #ifdef PLATFORM_MACCATALYST 114 #undef PLATFORM_MACCATALYST 115 #endif 116 #ifdef PLATFORM_IOS 117 #undef PLATFORM_IOS 118 #endif 119 #ifdef PLATFORM_IOSSIMULATOR 120 #undef PLATFORM_IOSSIMULATOR 121 #endif 122 #ifdef PLATFORM_TVOS 123 #undef PLATFORM_TVOS 124 #endif 125 #ifdef PLATFORM_TVOSSIMULATOR 126 #undef PLATFORM_TVOSSIMULATOR 127 #endif 128 #ifdef PLATFORM_WATCHOS 129 #undef PLATFORM_WATCHOS 130 #endif 131 #ifdef PLATFORM_WATCHOSSIMULATOR 132 #undef PLATFORM_WATCHOSSIMULATOR 133 #endif 134 135 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull 136 using namespace lldb; 137 using namespace lldb_private; 138 using namespace llvm::MachO; 139 140 static constexpr llvm::StringLiteral g_loader_path = "@loader_path"; 141 static constexpr llvm::StringLiteral g_executable_path = "@executable_path"; 142 143 LLDB_PLUGIN_DEFINE(ObjectFileMachO) 144 145 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name, 146 const char *alt_name, size_t reg_byte_size, 147 Stream &data) { 148 const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name); 149 if (reg_info == nullptr) 150 reg_info = reg_ctx->GetRegisterInfoByName(alt_name); 151 if (reg_info) { 152 lldb_private::RegisterValue reg_value; 153 if (reg_ctx->ReadRegister(reg_info, reg_value)) { 154 if (reg_info->byte_size >= reg_byte_size) 155 data.Write(reg_value.GetBytes(), reg_byte_size); 156 else { 157 data.Write(reg_value.GetBytes(), reg_info->byte_size); 158 for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i) 159 data.PutChar(0); 160 } 161 return; 162 } 163 } 164 // Just write zeros if all else fails 165 for (size_t i = 0; i < reg_byte_size; ++i) 166 data.PutChar(0); 167 } 168 169 class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 { 170 public: 171 RegisterContextDarwin_x86_64_Mach(lldb_private::Thread &thread, 172 const DataExtractor &data) 173 : RegisterContextDarwin_x86_64(thread, 0) { 174 SetRegisterDataFrom_LC_THREAD(data); 175 } 176 177 void InvalidateAllRegisters() override { 178 // Do nothing... registers are always valid... 179 } 180 181 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 182 lldb::offset_t offset = 0; 183 SetError(GPRRegSet, Read, -1); 184 SetError(FPURegSet, Read, -1); 185 SetError(EXCRegSet, Read, -1); 186 bool done = false; 187 188 while (!done) { 189 int flavor = data.GetU32(&offset); 190 if (flavor == 0) 191 done = true; 192 else { 193 uint32_t i; 194 uint32_t count = data.GetU32(&offset); 195 switch (flavor) { 196 case GPRRegSet: 197 for (i = 0; i < count; ++i) 198 (&gpr.rax)[i] = data.GetU64(&offset); 199 SetError(GPRRegSet, Read, 0); 200 done = true; 201 202 break; 203 case FPURegSet: 204 // TODO: fill in FPU regs.... 205 // SetError (FPURegSet, Read, -1); 206 done = true; 207 208 break; 209 case EXCRegSet: 210 exc.trapno = data.GetU32(&offset); 211 exc.err = data.GetU32(&offset); 212 exc.faultvaddr = data.GetU64(&offset); 213 SetError(EXCRegSet, Read, 0); 214 done = true; 215 break; 216 case 7: 217 case 8: 218 case 9: 219 // fancy flavors that encapsulate of the above flavors... 220 break; 221 222 default: 223 done = true; 224 break; 225 } 226 } 227 } 228 } 229 230 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 231 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 232 if (reg_ctx_sp) { 233 RegisterContext *reg_ctx = reg_ctx_sp.get(); 234 235 data.PutHex32(GPRRegSet); // Flavor 236 data.PutHex32(GPRWordCount); 237 PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data); 238 PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data); 239 PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data); 240 PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data); 241 PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data); 242 PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data); 243 PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data); 244 PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data); 245 PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data); 246 PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data); 247 PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data); 248 PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data); 249 PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data); 250 PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data); 251 PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data); 252 PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data); 253 PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data); 254 PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data); 255 PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data); 256 PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data); 257 PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data); 258 259 // // Write out the FPU registers 260 // const size_t fpu_byte_size = sizeof(FPU); 261 // size_t bytes_written = 0; 262 // data.PutHex32 (FPURegSet); 263 // data.PutHex32 (fpu_byte_size/sizeof(uint64_t)); 264 // bytes_written += data.PutHex32(0); // uint32_t pad[0] 265 // bytes_written += data.PutHex32(0); // uint32_t pad[1] 266 // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2, 267 // data); // uint16_t fcw; // "fctrl" 268 // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2, 269 // data); // uint16_t fsw; // "fstat" 270 // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1, 271 // data); // uint8_t ftw; // "ftag" 272 // bytes_written += data.PutHex8 (0); // uint8_t pad1; 273 // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2, 274 // data); // uint16_t fop; // "fop" 275 // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4, 276 // data); // uint32_t ip; // "fioff" 277 // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2, 278 // data); // uint16_t cs; // "fiseg" 279 // bytes_written += data.PutHex16 (0); // uint16_t pad2; 280 // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4, 281 // data); // uint32_t dp; // "fooff" 282 // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2, 283 // data); // uint16_t ds; // "foseg" 284 // bytes_written += data.PutHex16 (0); // uint16_t pad3; 285 // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4, 286 // data); // uint32_t mxcsr; 287 // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL, 288 // 4, data);// uint32_t mxcsrmask; 289 // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL, 290 // sizeof(MMSReg), data); 291 // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL, 292 // sizeof(MMSReg), data); 293 // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL, 294 // sizeof(MMSReg), data); 295 // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL, 296 // sizeof(MMSReg), data); 297 // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL, 298 // sizeof(MMSReg), data); 299 // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL, 300 // sizeof(MMSReg), data); 301 // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL, 302 // sizeof(MMSReg), data); 303 // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL, 304 // sizeof(MMSReg), data); 305 // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL, 306 // sizeof(XMMReg), data); 307 // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL, 308 // sizeof(XMMReg), data); 309 // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL, 310 // sizeof(XMMReg), data); 311 // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL, 312 // sizeof(XMMReg), data); 313 // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL, 314 // sizeof(XMMReg), data); 315 // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL, 316 // sizeof(XMMReg), data); 317 // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL, 318 // sizeof(XMMReg), data); 319 // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL, 320 // sizeof(XMMReg), data); 321 // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL, 322 // sizeof(XMMReg), data); 323 // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL, 324 // sizeof(XMMReg), data); 325 // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL, 326 // sizeof(XMMReg), data); 327 // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL, 328 // sizeof(XMMReg), data); 329 // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL, 330 // sizeof(XMMReg), data); 331 // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL, 332 // sizeof(XMMReg), data); 333 // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL, 334 // sizeof(XMMReg), data); 335 // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL, 336 // sizeof(XMMReg), data); 337 // 338 // // Fill rest with zeros 339 // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++ 340 // i) 341 // data.PutChar(0); 342 343 // Write out the EXC registers 344 data.PutHex32(EXCRegSet); 345 data.PutHex32(EXCWordCount); 346 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data); 347 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data); 348 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data); 349 return true; 350 } 351 return false; 352 } 353 354 protected: 355 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; } 356 357 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; } 358 359 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; } 360 361 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 362 return 0; 363 } 364 365 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 366 return 0; 367 } 368 369 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 370 return 0; 371 } 372 }; 373 374 class RegisterContextDarwin_i386_Mach : public RegisterContextDarwin_i386 { 375 public: 376 RegisterContextDarwin_i386_Mach(lldb_private::Thread &thread, 377 const DataExtractor &data) 378 : RegisterContextDarwin_i386(thread, 0) { 379 SetRegisterDataFrom_LC_THREAD(data); 380 } 381 382 void InvalidateAllRegisters() override { 383 // Do nothing... registers are always valid... 384 } 385 386 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 387 lldb::offset_t offset = 0; 388 SetError(GPRRegSet, Read, -1); 389 SetError(FPURegSet, Read, -1); 390 SetError(EXCRegSet, Read, -1); 391 bool done = false; 392 393 while (!done) { 394 int flavor = data.GetU32(&offset); 395 if (flavor == 0) 396 done = true; 397 else { 398 uint32_t i; 399 uint32_t count = data.GetU32(&offset); 400 switch (flavor) { 401 case GPRRegSet: 402 for (i = 0; i < count; ++i) 403 (&gpr.eax)[i] = data.GetU32(&offset); 404 SetError(GPRRegSet, Read, 0); 405 done = true; 406 407 break; 408 case FPURegSet: 409 // TODO: fill in FPU regs.... 410 // SetError (FPURegSet, Read, -1); 411 done = true; 412 413 break; 414 case EXCRegSet: 415 exc.trapno = data.GetU32(&offset); 416 exc.err = data.GetU32(&offset); 417 exc.faultvaddr = data.GetU32(&offset); 418 SetError(EXCRegSet, Read, 0); 419 done = true; 420 break; 421 case 7: 422 case 8: 423 case 9: 424 // fancy flavors that encapsulate of the above flavors... 425 break; 426 427 default: 428 done = true; 429 break; 430 } 431 } 432 } 433 } 434 435 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 436 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 437 if (reg_ctx_sp) { 438 RegisterContext *reg_ctx = reg_ctx_sp.get(); 439 440 data.PutHex32(GPRRegSet); // Flavor 441 data.PutHex32(GPRWordCount); 442 PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data); 443 PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data); 444 PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data); 445 PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data); 446 PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data); 447 PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data); 448 PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data); 449 PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data); 450 PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data); 451 PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data); 452 PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data); 453 PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data); 454 PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data); 455 PrintRegisterValue(reg_ctx, "es", nullptr, 4, data); 456 PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data); 457 PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data); 458 459 // Write out the EXC registers 460 data.PutHex32(EXCRegSet); 461 data.PutHex32(EXCWordCount); 462 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data); 463 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data); 464 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data); 465 return true; 466 } 467 return false; 468 } 469 470 protected: 471 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; } 472 473 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; } 474 475 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; } 476 477 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 478 return 0; 479 } 480 481 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 482 return 0; 483 } 484 485 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 486 return 0; 487 } 488 }; 489 490 class RegisterContextDarwin_arm_Mach : public RegisterContextDarwin_arm { 491 public: 492 RegisterContextDarwin_arm_Mach(lldb_private::Thread &thread, 493 const DataExtractor &data) 494 : RegisterContextDarwin_arm(thread, 0) { 495 SetRegisterDataFrom_LC_THREAD(data); 496 } 497 498 void InvalidateAllRegisters() override { 499 // Do nothing... registers are always valid... 500 } 501 502 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 503 lldb::offset_t offset = 0; 504 SetError(GPRRegSet, Read, -1); 505 SetError(FPURegSet, Read, -1); 506 SetError(EXCRegSet, Read, -1); 507 bool done = false; 508 509 while (!done) { 510 int flavor = data.GetU32(&offset); 511 uint32_t count = data.GetU32(&offset); 512 lldb::offset_t next_thread_state = offset + (count * 4); 513 switch (flavor) { 514 case GPRAltRegSet: 515 case GPRRegSet: { 516 // r0-r15, plus CPSR 517 uint32_t gpr_buf_count = (sizeof(gpr.r) / sizeof(gpr.r[0])) + 1; 518 if (count == gpr_buf_count) { 519 for (uint32_t i = 0; i < (count - 1); ++i) { 520 gpr.r[i] = data.GetU32(&offset); 521 } 522 gpr.cpsr = data.GetU32(&offset); 523 524 SetError(GPRRegSet, Read, 0); 525 } 526 } 527 offset = next_thread_state; 528 break; 529 530 case FPURegSet: { 531 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats; 532 const int fpu_reg_buf_size = sizeof(fpu.floats); 533 if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle, 534 fpu_reg_buf) == fpu_reg_buf_size) { 535 offset += fpu_reg_buf_size; 536 fpu.fpscr = data.GetU32(&offset); 537 SetError(FPURegSet, Read, 0); 538 } else { 539 done = true; 540 } 541 } 542 offset = next_thread_state; 543 break; 544 545 case EXCRegSet: 546 if (count == 3) { 547 exc.exception = data.GetU32(&offset); 548 exc.fsr = data.GetU32(&offset); 549 exc.far = data.GetU32(&offset); 550 SetError(EXCRegSet, Read, 0); 551 } 552 done = true; 553 offset = next_thread_state; 554 break; 555 556 // Unknown register set flavor, stop trying to parse. 557 default: 558 done = true; 559 } 560 } 561 } 562 563 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 564 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 565 if (reg_ctx_sp) { 566 RegisterContext *reg_ctx = reg_ctx_sp.get(); 567 568 data.PutHex32(GPRRegSet); // Flavor 569 data.PutHex32(GPRWordCount); 570 PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data); 571 PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data); 572 PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data); 573 PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data); 574 PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data); 575 PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data); 576 PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data); 577 PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data); 578 PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data); 579 PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data); 580 PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data); 581 PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data); 582 PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data); 583 PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data); 584 PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data); 585 PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data); 586 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data); 587 588 // Write out the EXC registers 589 // data.PutHex32 (EXCRegSet); 590 // data.PutHex32 (EXCWordCount); 591 // WriteRegister (reg_ctx, "exception", NULL, 4, data); 592 // WriteRegister (reg_ctx, "fsr", NULL, 4, data); 593 // WriteRegister (reg_ctx, "far", NULL, 4, data); 594 return true; 595 } 596 return false; 597 } 598 599 protected: 600 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; } 601 602 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; } 603 604 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; } 605 606 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; } 607 608 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 609 return 0; 610 } 611 612 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 613 return 0; 614 } 615 616 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 617 return 0; 618 } 619 620 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override { 621 return -1; 622 } 623 }; 624 625 class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 { 626 public: 627 RegisterContextDarwin_arm64_Mach(lldb_private::Thread &thread, 628 const DataExtractor &data) 629 : RegisterContextDarwin_arm64(thread, 0) { 630 SetRegisterDataFrom_LC_THREAD(data); 631 } 632 633 void InvalidateAllRegisters() override { 634 // Do nothing... registers are always valid... 635 } 636 637 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 638 lldb::offset_t offset = 0; 639 SetError(GPRRegSet, Read, -1); 640 SetError(FPURegSet, Read, -1); 641 SetError(EXCRegSet, Read, -1); 642 bool done = false; 643 while (!done) { 644 int flavor = data.GetU32(&offset); 645 uint32_t count = data.GetU32(&offset); 646 lldb::offset_t next_thread_state = offset + (count * 4); 647 switch (flavor) { 648 case GPRRegSet: 649 // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1 650 // 32-bit register) 651 if (count >= (33 * 2) + 1) { 652 for (uint32_t i = 0; i < 29; ++i) 653 gpr.x[i] = data.GetU64(&offset); 654 gpr.fp = data.GetU64(&offset); 655 gpr.lr = data.GetU64(&offset); 656 gpr.sp = data.GetU64(&offset); 657 gpr.pc = data.GetU64(&offset); 658 gpr.cpsr = data.GetU32(&offset); 659 SetError(GPRRegSet, Read, 0); 660 } 661 offset = next_thread_state; 662 break; 663 case FPURegSet: { 664 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0]; 665 const int fpu_reg_buf_size = sizeof(fpu); 666 if (fpu_reg_buf_size == count * sizeof(uint32_t) && 667 data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle, 668 fpu_reg_buf) == fpu_reg_buf_size) { 669 SetError(FPURegSet, Read, 0); 670 } else { 671 done = true; 672 } 673 } 674 offset = next_thread_state; 675 break; 676 case EXCRegSet: 677 if (count == 4) { 678 exc.far = data.GetU64(&offset); 679 exc.esr = data.GetU32(&offset); 680 exc.exception = data.GetU32(&offset); 681 SetError(EXCRegSet, Read, 0); 682 } 683 offset = next_thread_state; 684 break; 685 default: 686 done = true; 687 break; 688 } 689 } 690 } 691 692 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 693 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 694 if (reg_ctx_sp) { 695 RegisterContext *reg_ctx = reg_ctx_sp.get(); 696 697 data.PutHex32(GPRRegSet); // Flavor 698 data.PutHex32(GPRWordCount); 699 PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data); 700 PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data); 701 PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data); 702 PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data); 703 PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data); 704 PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data); 705 PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data); 706 PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data); 707 PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data); 708 PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data); 709 PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data); 710 PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data); 711 PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data); 712 PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data); 713 PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data); 714 PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data); 715 PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data); 716 PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data); 717 PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data); 718 PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data); 719 PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data); 720 PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data); 721 PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data); 722 PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data); 723 PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data); 724 PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data); 725 PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data); 726 PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data); 727 PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data); 728 PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data); 729 PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data); 730 PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data); 731 PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data); 732 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data); 733 data.PutHex32(0); // uint32_t pad at the end 734 735 // Write out the EXC registers 736 data.PutHex32(EXCRegSet); 737 data.PutHex32(EXCWordCount); 738 PrintRegisterValue(reg_ctx, "far", nullptr, 8, data); 739 PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data); 740 PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data); 741 return true; 742 } 743 return false; 744 } 745 746 protected: 747 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; } 748 749 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; } 750 751 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; } 752 753 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; } 754 755 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 756 return 0; 757 } 758 759 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 760 return 0; 761 } 762 763 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 764 return 0; 765 } 766 767 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override { 768 return -1; 769 } 770 }; 771 772 static uint32_t MachHeaderSizeFromMagic(uint32_t magic) { 773 switch (magic) { 774 case MH_MAGIC: 775 case MH_CIGAM: 776 return sizeof(struct llvm::MachO::mach_header); 777 778 case MH_MAGIC_64: 779 case MH_CIGAM_64: 780 return sizeof(struct llvm::MachO::mach_header_64); 781 break; 782 783 default: 784 break; 785 } 786 return 0; 787 } 788 789 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008 790 791 char ObjectFileMachO::ID; 792 793 void ObjectFileMachO::Initialize() { 794 PluginManager::RegisterPlugin( 795 GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance, 796 CreateMemoryInstance, GetModuleSpecifications, SaveCore); 797 } 798 799 void ObjectFileMachO::Terminate() { 800 PluginManager::UnregisterPlugin(CreateInstance); 801 } 802 803 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp, 804 DataBufferSP data_sp, 805 lldb::offset_t data_offset, 806 const FileSpec *file, 807 lldb::offset_t file_offset, 808 lldb::offset_t length) { 809 if (!data_sp) { 810 data_sp = MapFileData(*file, length, file_offset); 811 if (!data_sp) 812 return nullptr; 813 data_offset = 0; 814 } 815 816 if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length)) 817 return nullptr; 818 819 // Update the data to contain the entire file if it doesn't already 820 if (data_sp->GetByteSize() < length) { 821 data_sp = MapFileData(*file, length, file_offset); 822 if (!data_sp) 823 return nullptr; 824 data_offset = 0; 825 } 826 auto objfile_up = std::make_unique<ObjectFileMachO>( 827 module_sp, data_sp, data_offset, file, file_offset, length); 828 if (!objfile_up || !objfile_up->ParseHeader()) 829 return nullptr; 830 831 return objfile_up.release(); 832 } 833 834 ObjectFile *ObjectFileMachO::CreateMemoryInstance( 835 const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp, 836 const ProcessSP &process_sp, lldb::addr_t header_addr) { 837 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) { 838 std::unique_ptr<ObjectFile> objfile_up( 839 new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr)); 840 if (objfile_up.get() && objfile_up->ParseHeader()) 841 return objfile_up.release(); 842 } 843 return nullptr; 844 } 845 846 size_t ObjectFileMachO::GetModuleSpecifications( 847 const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp, 848 lldb::offset_t data_offset, lldb::offset_t file_offset, 849 lldb::offset_t length, lldb_private::ModuleSpecList &specs) { 850 const size_t initial_count = specs.GetSize(); 851 852 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) { 853 DataExtractor data; 854 data.SetData(data_sp); 855 llvm::MachO::mach_header header; 856 if (ParseHeader(data, &data_offset, header)) { 857 size_t header_and_load_cmds = 858 header.sizeofcmds + MachHeaderSizeFromMagic(header.magic); 859 if (header_and_load_cmds >= data_sp->GetByteSize()) { 860 data_sp = MapFileData(file, header_and_load_cmds, file_offset); 861 data.SetData(data_sp); 862 data_offset = MachHeaderSizeFromMagic(header.magic); 863 } 864 if (data_sp) { 865 ModuleSpec base_spec; 866 base_spec.GetFileSpec() = file; 867 base_spec.SetObjectOffset(file_offset); 868 base_spec.SetObjectSize(length); 869 GetAllArchSpecs(header, data, data_offset, base_spec, specs); 870 } 871 } 872 } 873 return specs.GetSize() - initial_count; 874 } 875 876 ConstString ObjectFileMachO::GetSegmentNameTEXT() { 877 static ConstString g_segment_name_TEXT("__TEXT"); 878 return g_segment_name_TEXT; 879 } 880 881 ConstString ObjectFileMachO::GetSegmentNameDATA() { 882 static ConstString g_segment_name_DATA("__DATA"); 883 return g_segment_name_DATA; 884 } 885 886 ConstString ObjectFileMachO::GetSegmentNameDATA_DIRTY() { 887 static ConstString g_segment_name("__DATA_DIRTY"); 888 return g_segment_name; 889 } 890 891 ConstString ObjectFileMachO::GetSegmentNameDATA_CONST() { 892 static ConstString g_segment_name("__DATA_CONST"); 893 return g_segment_name; 894 } 895 896 ConstString ObjectFileMachO::GetSegmentNameOBJC() { 897 static ConstString g_segment_name_OBJC("__OBJC"); 898 return g_segment_name_OBJC; 899 } 900 901 ConstString ObjectFileMachO::GetSegmentNameLINKEDIT() { 902 static ConstString g_section_name_LINKEDIT("__LINKEDIT"); 903 return g_section_name_LINKEDIT; 904 } 905 906 ConstString ObjectFileMachO::GetSegmentNameDWARF() { 907 static ConstString g_section_name("__DWARF"); 908 return g_section_name; 909 } 910 911 ConstString ObjectFileMachO::GetSegmentNameLLVM_COV() { 912 static ConstString g_section_name("__LLVM_COV"); 913 return g_section_name; 914 } 915 916 ConstString ObjectFileMachO::GetSectionNameEHFrame() { 917 static ConstString g_section_name_eh_frame("__eh_frame"); 918 return g_section_name_eh_frame; 919 } 920 921 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp, 922 lldb::addr_t data_offset, 923 lldb::addr_t data_length) { 924 DataExtractor data; 925 data.SetData(data_sp, data_offset, data_length); 926 lldb::offset_t offset = 0; 927 uint32_t magic = data.GetU32(&offset); 928 929 offset += 4; // cputype 930 offset += 4; // cpusubtype 931 uint32_t filetype = data.GetU32(&offset); 932 933 // A fileset has a Mach-O header but is not an 934 // individual file and must be handled via an 935 // ObjectContainer plugin. 936 if (filetype == llvm::MachO::MH_FILESET) 937 return false; 938 939 return MachHeaderSizeFromMagic(magic) != 0; 940 } 941 942 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp, 943 DataBufferSP data_sp, 944 lldb::offset_t data_offset, 945 const FileSpec *file, 946 lldb::offset_t file_offset, 947 lldb::offset_t length) 948 : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset), 949 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(), 950 m_thread_context_offsets_valid(false), m_reexported_dylibs(), 951 m_allow_assembly_emulation_unwind_plans(true) { 952 ::memset(&m_header, 0, sizeof(m_header)); 953 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 954 } 955 956 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp, 957 lldb::WritableDataBufferSP header_data_sp, 958 const lldb::ProcessSP &process_sp, 959 lldb::addr_t header_addr) 960 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), 961 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(), 962 m_thread_context_offsets_valid(false), m_reexported_dylibs(), 963 m_allow_assembly_emulation_unwind_plans(true) { 964 ::memset(&m_header, 0, sizeof(m_header)); 965 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 966 } 967 968 bool ObjectFileMachO::ParseHeader(DataExtractor &data, 969 lldb::offset_t *data_offset_ptr, 970 llvm::MachO::mach_header &header) { 971 data.SetByteOrder(endian::InlHostByteOrder()); 972 // Leave magic in the original byte order 973 header.magic = data.GetU32(data_offset_ptr); 974 bool can_parse = false; 975 bool is_64_bit = false; 976 switch (header.magic) { 977 case MH_MAGIC: 978 data.SetByteOrder(endian::InlHostByteOrder()); 979 data.SetAddressByteSize(4); 980 can_parse = true; 981 break; 982 983 case MH_MAGIC_64: 984 data.SetByteOrder(endian::InlHostByteOrder()); 985 data.SetAddressByteSize(8); 986 can_parse = true; 987 is_64_bit = true; 988 break; 989 990 case MH_CIGAM: 991 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 992 ? eByteOrderLittle 993 : eByteOrderBig); 994 data.SetAddressByteSize(4); 995 can_parse = true; 996 break; 997 998 case MH_CIGAM_64: 999 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1000 ? eByteOrderLittle 1001 : eByteOrderBig); 1002 data.SetAddressByteSize(8); 1003 is_64_bit = true; 1004 can_parse = true; 1005 break; 1006 1007 default: 1008 break; 1009 } 1010 1011 if (can_parse) { 1012 data.GetU32(data_offset_ptr, &header.cputype, 6); 1013 if (is_64_bit) 1014 *data_offset_ptr += 4; 1015 return true; 1016 } else { 1017 memset(&header, 0, sizeof(header)); 1018 } 1019 return false; 1020 } 1021 1022 bool ObjectFileMachO::ParseHeader() { 1023 ModuleSP module_sp(GetModule()); 1024 if (!module_sp) 1025 return false; 1026 1027 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 1028 bool can_parse = false; 1029 lldb::offset_t offset = 0; 1030 m_data.SetByteOrder(endian::InlHostByteOrder()); 1031 // Leave magic in the original byte order 1032 m_header.magic = m_data.GetU32(&offset); 1033 switch (m_header.magic) { 1034 case MH_MAGIC: 1035 m_data.SetByteOrder(endian::InlHostByteOrder()); 1036 m_data.SetAddressByteSize(4); 1037 can_parse = true; 1038 break; 1039 1040 case MH_MAGIC_64: 1041 m_data.SetByteOrder(endian::InlHostByteOrder()); 1042 m_data.SetAddressByteSize(8); 1043 can_parse = true; 1044 break; 1045 1046 case MH_CIGAM: 1047 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1048 ? eByteOrderLittle 1049 : eByteOrderBig); 1050 m_data.SetAddressByteSize(4); 1051 can_parse = true; 1052 break; 1053 1054 case MH_CIGAM_64: 1055 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1056 ? eByteOrderLittle 1057 : eByteOrderBig); 1058 m_data.SetAddressByteSize(8); 1059 can_parse = true; 1060 break; 1061 1062 default: 1063 break; 1064 } 1065 1066 if (can_parse) { 1067 m_data.GetU32(&offset, &m_header.cputype, 6); 1068 1069 ModuleSpecList all_specs; 1070 ModuleSpec base_spec; 1071 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic), 1072 base_spec, all_specs); 1073 1074 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 1075 ArchSpec mach_arch = 1076 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture(); 1077 1078 // Check if the module has a required architecture 1079 const ArchSpec &module_arch = module_sp->GetArchitecture(); 1080 if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch)) 1081 continue; 1082 1083 if (SetModulesArchitecture(mach_arch)) { 1084 const size_t header_and_lc_size = 1085 m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic); 1086 if (m_data.GetByteSize() < header_and_lc_size) { 1087 DataBufferSP data_sp; 1088 ProcessSP process_sp(m_process_wp.lock()); 1089 if (process_sp) { 1090 data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size); 1091 } else { 1092 // Read in all only the load command data from the file on disk 1093 data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset); 1094 if (data_sp->GetByteSize() != header_and_lc_size) 1095 continue; 1096 } 1097 if (data_sp) 1098 m_data.SetData(data_sp); 1099 } 1100 } 1101 return true; 1102 } 1103 // None found. 1104 return false; 1105 } else { 1106 memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header)); 1107 } 1108 return false; 1109 } 1110 1111 ByteOrder ObjectFileMachO::GetByteOrder() const { 1112 return m_data.GetByteOrder(); 1113 } 1114 1115 bool ObjectFileMachO::IsExecutable() const { 1116 return m_header.filetype == MH_EXECUTE; 1117 } 1118 1119 bool ObjectFileMachO::IsDynamicLoader() const { 1120 return m_header.filetype == MH_DYLINKER; 1121 } 1122 1123 bool ObjectFileMachO::IsSharedCacheBinary() const { 1124 return m_header.flags & MH_DYLIB_IN_CACHE; 1125 } 1126 1127 bool ObjectFileMachO::IsKext() const { 1128 return m_header.filetype == MH_KEXT_BUNDLE; 1129 } 1130 1131 uint32_t ObjectFileMachO::GetAddressByteSize() const { 1132 return m_data.GetAddressByteSize(); 1133 } 1134 1135 AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { 1136 Symtab *symtab = GetSymtab(); 1137 if (!symtab) 1138 return AddressClass::eUnknown; 1139 1140 Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr); 1141 if (symbol) { 1142 if (symbol->ValueIsAddress()) { 1143 SectionSP section_sp(symbol->GetAddressRef().GetSection()); 1144 if (section_sp) { 1145 const lldb::SectionType section_type = section_sp->GetType(); 1146 switch (section_type) { 1147 case eSectionTypeInvalid: 1148 return AddressClass::eUnknown; 1149 1150 case eSectionTypeCode: 1151 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) { 1152 // For ARM we have a bit in the n_desc field of the symbol that 1153 // tells us ARM/Thumb which is bit 0x0008. 1154 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB) 1155 return AddressClass::eCodeAlternateISA; 1156 } 1157 return AddressClass::eCode; 1158 1159 case eSectionTypeContainer: 1160 return AddressClass::eUnknown; 1161 1162 case eSectionTypeData: 1163 case eSectionTypeDataCString: 1164 case eSectionTypeDataCStringPointers: 1165 case eSectionTypeDataSymbolAddress: 1166 case eSectionTypeData4: 1167 case eSectionTypeData8: 1168 case eSectionTypeData16: 1169 case eSectionTypeDataPointers: 1170 case eSectionTypeZeroFill: 1171 case eSectionTypeDataObjCMessageRefs: 1172 case eSectionTypeDataObjCCFStrings: 1173 case eSectionTypeGoSymtab: 1174 return AddressClass::eData; 1175 1176 case eSectionTypeDebug: 1177 case eSectionTypeDWARFDebugAbbrev: 1178 case eSectionTypeDWARFDebugAbbrevDwo: 1179 case eSectionTypeDWARFDebugAddr: 1180 case eSectionTypeDWARFDebugAranges: 1181 case eSectionTypeDWARFDebugCuIndex: 1182 case eSectionTypeDWARFDebugFrame: 1183 case eSectionTypeDWARFDebugInfo: 1184 case eSectionTypeDWARFDebugInfoDwo: 1185 case eSectionTypeDWARFDebugLine: 1186 case eSectionTypeDWARFDebugLineStr: 1187 case eSectionTypeDWARFDebugLoc: 1188 case eSectionTypeDWARFDebugLocDwo: 1189 case eSectionTypeDWARFDebugLocLists: 1190 case eSectionTypeDWARFDebugLocListsDwo: 1191 case eSectionTypeDWARFDebugMacInfo: 1192 case eSectionTypeDWARFDebugMacro: 1193 case eSectionTypeDWARFDebugNames: 1194 case eSectionTypeDWARFDebugPubNames: 1195 case eSectionTypeDWARFDebugPubTypes: 1196 case eSectionTypeDWARFDebugRanges: 1197 case eSectionTypeDWARFDebugRngLists: 1198 case eSectionTypeDWARFDebugRngListsDwo: 1199 case eSectionTypeDWARFDebugStr: 1200 case eSectionTypeDWARFDebugStrDwo: 1201 case eSectionTypeDWARFDebugStrOffsets: 1202 case eSectionTypeDWARFDebugStrOffsetsDwo: 1203 case eSectionTypeDWARFDebugTuIndex: 1204 case eSectionTypeDWARFDebugTypes: 1205 case eSectionTypeDWARFDebugTypesDwo: 1206 case eSectionTypeDWARFAppleNames: 1207 case eSectionTypeDWARFAppleTypes: 1208 case eSectionTypeDWARFAppleNamespaces: 1209 case eSectionTypeDWARFAppleObjC: 1210 case eSectionTypeDWARFGNUDebugAltLink: 1211 case eSectionTypeCTF: 1212 case eSectionTypeSwiftModules: 1213 return AddressClass::eDebug; 1214 1215 case eSectionTypeEHFrame: 1216 case eSectionTypeARMexidx: 1217 case eSectionTypeARMextab: 1218 case eSectionTypeCompactUnwind: 1219 return AddressClass::eRuntime; 1220 1221 case eSectionTypeAbsoluteAddress: 1222 case eSectionTypeELFSymbolTable: 1223 case eSectionTypeELFDynamicSymbols: 1224 case eSectionTypeELFRelocationEntries: 1225 case eSectionTypeELFDynamicLinkInfo: 1226 case eSectionTypeOther: 1227 return AddressClass::eUnknown; 1228 } 1229 } 1230 } 1231 1232 const SymbolType symbol_type = symbol->GetType(); 1233 switch (symbol_type) { 1234 case eSymbolTypeAny: 1235 return AddressClass::eUnknown; 1236 case eSymbolTypeAbsolute: 1237 return AddressClass::eUnknown; 1238 1239 case eSymbolTypeCode: 1240 case eSymbolTypeTrampoline: 1241 case eSymbolTypeResolver: 1242 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) { 1243 // For ARM we have a bit in the n_desc field of the symbol that tells 1244 // us ARM/Thumb which is bit 0x0008. 1245 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB) 1246 return AddressClass::eCodeAlternateISA; 1247 } 1248 return AddressClass::eCode; 1249 1250 case eSymbolTypeData: 1251 return AddressClass::eData; 1252 case eSymbolTypeRuntime: 1253 return AddressClass::eRuntime; 1254 case eSymbolTypeException: 1255 return AddressClass::eRuntime; 1256 case eSymbolTypeSourceFile: 1257 return AddressClass::eDebug; 1258 case eSymbolTypeHeaderFile: 1259 return AddressClass::eDebug; 1260 case eSymbolTypeObjectFile: 1261 return AddressClass::eDebug; 1262 case eSymbolTypeCommonBlock: 1263 return AddressClass::eDebug; 1264 case eSymbolTypeBlock: 1265 return AddressClass::eDebug; 1266 case eSymbolTypeLocal: 1267 return AddressClass::eData; 1268 case eSymbolTypeParam: 1269 return AddressClass::eData; 1270 case eSymbolTypeVariable: 1271 return AddressClass::eData; 1272 case eSymbolTypeVariableType: 1273 return AddressClass::eDebug; 1274 case eSymbolTypeLineEntry: 1275 return AddressClass::eDebug; 1276 case eSymbolTypeLineHeader: 1277 return AddressClass::eDebug; 1278 case eSymbolTypeScopeBegin: 1279 return AddressClass::eDebug; 1280 case eSymbolTypeScopeEnd: 1281 return AddressClass::eDebug; 1282 case eSymbolTypeAdditional: 1283 return AddressClass::eUnknown; 1284 case eSymbolTypeCompiler: 1285 return AddressClass::eDebug; 1286 case eSymbolTypeInstrumentation: 1287 return AddressClass::eDebug; 1288 case eSymbolTypeUndefined: 1289 return AddressClass::eUnknown; 1290 case eSymbolTypeObjCClass: 1291 return AddressClass::eRuntime; 1292 case eSymbolTypeObjCMetaClass: 1293 return AddressClass::eRuntime; 1294 case eSymbolTypeObjCIVar: 1295 return AddressClass::eRuntime; 1296 case eSymbolTypeReExported: 1297 return AddressClass::eRuntime; 1298 } 1299 } 1300 return AddressClass::eUnknown; 1301 } 1302 1303 bool ObjectFileMachO::IsStripped() { 1304 if (m_dysymtab.cmd == 0) { 1305 ModuleSP module_sp(GetModule()); 1306 if (module_sp) { 1307 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1308 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1309 const lldb::offset_t load_cmd_offset = offset; 1310 1311 llvm::MachO::load_command lc = {}; 1312 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr) 1313 break; 1314 if (lc.cmd == LC_DYSYMTAB) { 1315 m_dysymtab.cmd = lc.cmd; 1316 m_dysymtab.cmdsize = lc.cmdsize; 1317 if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym, 1318 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) == 1319 nullptr) { 1320 // Clear m_dysymtab if we were unable to read all items from the 1321 // load command 1322 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 1323 } 1324 } 1325 offset = load_cmd_offset + lc.cmdsize; 1326 } 1327 } 1328 } 1329 if (m_dysymtab.cmd) 1330 return m_dysymtab.nlocalsym <= 1; 1331 return false; 1332 } 1333 1334 ObjectFileMachO::EncryptedFileRanges ObjectFileMachO::GetEncryptedFileRanges() { 1335 EncryptedFileRanges result; 1336 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1337 1338 llvm::MachO::encryption_info_command encryption_cmd; 1339 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1340 const lldb::offset_t load_cmd_offset = offset; 1341 if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr) 1342 break; 1343 1344 // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the 1345 // 3 fields we care about, so treat them the same. 1346 if (encryption_cmd.cmd == LC_ENCRYPTION_INFO || 1347 encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) { 1348 if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) { 1349 if (encryption_cmd.cryptid != 0) { 1350 EncryptedFileRanges::Entry entry; 1351 entry.SetRangeBase(encryption_cmd.cryptoff); 1352 entry.SetByteSize(encryption_cmd.cryptsize); 1353 result.Append(entry); 1354 } 1355 } 1356 } 1357 offset = load_cmd_offset + encryption_cmd.cmdsize; 1358 } 1359 1360 return result; 1361 } 1362 1363 void ObjectFileMachO::SanitizeSegmentCommand( 1364 llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) { 1365 if (m_length == 0 || seg_cmd.filesize == 0) 1366 return; 1367 1368 if (IsSharedCacheBinary() && !IsInMemory()) { 1369 // In shared cache images, the load commands are relative to the 1370 // shared cache file, and not the specific image we are 1371 // examining. Let's fix this up so that it looks like a normal 1372 // image. 1373 if (strncmp(seg_cmd.segname, GetSegmentNameTEXT().GetCString(), 1374 sizeof(seg_cmd.segname)) == 0) 1375 m_text_address = seg_cmd.vmaddr; 1376 if (strncmp(seg_cmd.segname, GetSegmentNameLINKEDIT().GetCString(), 1377 sizeof(seg_cmd.segname)) == 0) 1378 m_linkedit_original_offset = seg_cmd.fileoff; 1379 1380 seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address; 1381 } 1382 1383 if (seg_cmd.fileoff > m_length) { 1384 // We have a load command that says it extends past the end of the file. 1385 // This is likely a corrupt file. We don't have any way to return an error 1386 // condition here (this method was likely invoked from something like 1387 // ObjectFile::GetSectionList()), so we just null out the section contents, 1388 // and dump a message to stdout. The most common case here is core file 1389 // debugging with a truncated file. 1390 const char *lc_segment_name = 1391 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT"; 1392 GetModule()->ReportWarning( 1393 "load command {0} {1} has a fileoff ({2:x16}) that extends beyond " 1394 "the end of the file ({3:x16}), ignoring this section", 1395 cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length); 1396 1397 seg_cmd.fileoff = 0; 1398 seg_cmd.filesize = 0; 1399 } 1400 1401 if (seg_cmd.fileoff + seg_cmd.filesize > m_length) { 1402 // We have a load command that says it extends past the end of the file. 1403 // This is likely a corrupt file. We don't have any way to return an error 1404 // condition here (this method was likely invoked from something like 1405 // ObjectFile::GetSectionList()), so we just null out the section contents, 1406 // and dump a message to stdout. The most common case here is core file 1407 // debugging with a truncated file. 1408 const char *lc_segment_name = 1409 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT"; 1410 GetModule()->ReportWarning( 1411 "load command {0} {1} has a fileoff + filesize ({2:x16}) that " 1412 "extends beyond the end of the file ({3:x16}), the segment will be " 1413 "truncated to match", 1414 cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length); 1415 1416 // Truncate the length 1417 seg_cmd.filesize = m_length - seg_cmd.fileoff; 1418 } 1419 } 1420 1421 static uint32_t 1422 GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) { 1423 uint32_t result = 0; 1424 if (seg_cmd.initprot & VM_PROT_READ) 1425 result |= ePermissionsReadable; 1426 if (seg_cmd.initprot & VM_PROT_WRITE) 1427 result |= ePermissionsWritable; 1428 if (seg_cmd.initprot & VM_PROT_EXECUTE) 1429 result |= ePermissionsExecutable; 1430 return result; 1431 } 1432 1433 static lldb::SectionType GetSectionType(uint32_t flags, 1434 ConstString section_name) { 1435 1436 if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS)) 1437 return eSectionTypeCode; 1438 1439 uint32_t mach_sect_type = flags & SECTION_TYPE; 1440 static ConstString g_sect_name_objc_data("__objc_data"); 1441 static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs"); 1442 static ConstString g_sect_name_objc_selrefs("__objc_selrefs"); 1443 static ConstString g_sect_name_objc_classrefs("__objc_classrefs"); 1444 static ConstString g_sect_name_objc_superrefs("__objc_superrefs"); 1445 static ConstString g_sect_name_objc_const("__objc_const"); 1446 static ConstString g_sect_name_objc_classlist("__objc_classlist"); 1447 static ConstString g_sect_name_cfstring("__cfstring"); 1448 1449 static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev"); 1450 static ConstString g_sect_name_dwarf_debug_abbrev_dwo("__debug_abbrev.dwo"); 1451 static ConstString g_sect_name_dwarf_debug_addr("__debug_addr"); 1452 static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges"); 1453 static ConstString g_sect_name_dwarf_debug_cu_index("__debug_cu_index"); 1454 static ConstString g_sect_name_dwarf_debug_frame("__debug_frame"); 1455 static ConstString g_sect_name_dwarf_debug_info("__debug_info"); 1456 static ConstString g_sect_name_dwarf_debug_info_dwo("__debug_info.dwo"); 1457 static ConstString g_sect_name_dwarf_debug_line("__debug_line"); 1458 static ConstString g_sect_name_dwarf_debug_line_dwo("__debug_line.dwo"); 1459 static ConstString g_sect_name_dwarf_debug_line_str("__debug_line_str"); 1460 static ConstString g_sect_name_dwarf_debug_loc("__debug_loc"); 1461 static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists"); 1462 static ConstString g_sect_name_dwarf_debug_loclists_dwo("__debug_loclists.dwo"); 1463 static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo"); 1464 static ConstString g_sect_name_dwarf_debug_macro("__debug_macro"); 1465 static ConstString g_sect_name_dwarf_debug_macro_dwo("__debug_macro.dwo"); 1466 static ConstString g_sect_name_dwarf_debug_names("__debug_names"); 1467 static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames"); 1468 static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes"); 1469 static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges"); 1470 static ConstString g_sect_name_dwarf_debug_rnglists("__debug_rnglists"); 1471 static ConstString g_sect_name_dwarf_debug_str("__debug_str"); 1472 static ConstString g_sect_name_dwarf_debug_str_dwo("__debug_str.dwo"); 1473 static ConstString g_sect_name_dwarf_debug_str_offs("__debug_str_offs"); 1474 static ConstString g_sect_name_dwarf_debug_str_offs_dwo("__debug_str_offs.dwo"); 1475 static ConstString g_sect_name_dwarf_debug_tu_index("__debug_tu_index"); 1476 static ConstString g_sect_name_dwarf_debug_types("__debug_types"); 1477 static ConstString g_sect_name_dwarf_apple_names("__apple_names"); 1478 static ConstString g_sect_name_dwarf_apple_types("__apple_types"); 1479 static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac"); 1480 static ConstString g_sect_name_dwarf_apple_objc("__apple_objc"); 1481 static ConstString g_sect_name_eh_frame("__eh_frame"); 1482 static ConstString g_sect_name_compact_unwind("__unwind_info"); 1483 static ConstString g_sect_name_text("__text"); 1484 static ConstString g_sect_name_data("__data"); 1485 static ConstString g_sect_name_go_symtab("__gosymtab"); 1486 static ConstString g_sect_name_ctf("__ctf"); 1487 static ConstString g_sect_name_swift_ast("__swift_ast"); 1488 1489 if (section_name == g_sect_name_dwarf_debug_abbrev) 1490 return eSectionTypeDWARFDebugAbbrev; 1491 if (section_name == g_sect_name_dwarf_debug_abbrev_dwo) 1492 return eSectionTypeDWARFDebugAbbrevDwo; 1493 if (section_name == g_sect_name_dwarf_debug_addr) 1494 return eSectionTypeDWARFDebugAddr; 1495 if (section_name == g_sect_name_dwarf_debug_aranges) 1496 return eSectionTypeDWARFDebugAranges; 1497 if (section_name == g_sect_name_dwarf_debug_cu_index) 1498 return eSectionTypeDWARFDebugCuIndex; 1499 if (section_name == g_sect_name_dwarf_debug_frame) 1500 return eSectionTypeDWARFDebugFrame; 1501 if (section_name == g_sect_name_dwarf_debug_info) 1502 return eSectionTypeDWARFDebugInfo; 1503 if (section_name == g_sect_name_dwarf_debug_info_dwo) 1504 return eSectionTypeDWARFDebugInfoDwo; 1505 if (section_name == g_sect_name_dwarf_debug_line) 1506 return eSectionTypeDWARFDebugLine; 1507 if (section_name == g_sect_name_dwarf_debug_line_dwo) 1508 return eSectionTypeDWARFDebugLine; // Same as debug_line. 1509 if (section_name == g_sect_name_dwarf_debug_line_str) 1510 return eSectionTypeDWARFDebugLineStr; 1511 if (section_name == g_sect_name_dwarf_debug_loc) 1512 return eSectionTypeDWARFDebugLoc; 1513 if (section_name == g_sect_name_dwarf_debug_loclists) 1514 return eSectionTypeDWARFDebugLocLists; 1515 if (section_name == g_sect_name_dwarf_debug_loclists_dwo) 1516 return eSectionTypeDWARFDebugLocListsDwo; 1517 if (section_name == g_sect_name_dwarf_debug_macinfo) 1518 return eSectionTypeDWARFDebugMacInfo; 1519 if (section_name == g_sect_name_dwarf_debug_macro) 1520 return eSectionTypeDWARFDebugMacro; 1521 if (section_name == g_sect_name_dwarf_debug_macro_dwo) 1522 return eSectionTypeDWARFDebugMacInfo; // Same as debug_macro. 1523 if (section_name == g_sect_name_dwarf_debug_names) 1524 return eSectionTypeDWARFDebugNames; 1525 if (section_name == g_sect_name_dwarf_debug_pubnames) 1526 return eSectionTypeDWARFDebugPubNames; 1527 if (section_name == g_sect_name_dwarf_debug_pubtypes) 1528 return eSectionTypeDWARFDebugPubTypes; 1529 if (section_name == g_sect_name_dwarf_debug_ranges) 1530 return eSectionTypeDWARFDebugRanges; 1531 if (section_name == g_sect_name_dwarf_debug_rnglists) 1532 return eSectionTypeDWARFDebugRngLists; 1533 if (section_name == g_sect_name_dwarf_debug_str) 1534 return eSectionTypeDWARFDebugStr; 1535 if (section_name == g_sect_name_dwarf_debug_str_dwo) 1536 return eSectionTypeDWARFDebugStrDwo; 1537 if (section_name == g_sect_name_dwarf_debug_str_offs) 1538 return eSectionTypeDWARFDebugStrOffsets; 1539 if (section_name == g_sect_name_dwarf_debug_str_offs_dwo) 1540 return eSectionTypeDWARFDebugStrOffsetsDwo; 1541 if (section_name == g_sect_name_dwarf_debug_tu_index) 1542 return eSectionTypeDWARFDebugTuIndex; 1543 if (section_name == g_sect_name_dwarf_debug_types) 1544 return eSectionTypeDWARFDebugTypes; 1545 if (section_name == g_sect_name_dwarf_apple_names) 1546 return eSectionTypeDWARFAppleNames; 1547 if (section_name == g_sect_name_dwarf_apple_types) 1548 return eSectionTypeDWARFAppleTypes; 1549 if (section_name == g_sect_name_dwarf_apple_namespaces) 1550 return eSectionTypeDWARFAppleNamespaces; 1551 if (section_name == g_sect_name_dwarf_apple_objc) 1552 return eSectionTypeDWARFAppleObjC; 1553 if (section_name == g_sect_name_objc_selrefs) 1554 return eSectionTypeDataCStringPointers; 1555 if (section_name == g_sect_name_objc_msgrefs) 1556 return eSectionTypeDataObjCMessageRefs; 1557 if (section_name == g_sect_name_eh_frame) 1558 return eSectionTypeEHFrame; 1559 if (section_name == g_sect_name_compact_unwind) 1560 return eSectionTypeCompactUnwind; 1561 if (section_name == g_sect_name_cfstring) 1562 return eSectionTypeDataObjCCFStrings; 1563 if (section_name == g_sect_name_go_symtab) 1564 return eSectionTypeGoSymtab; 1565 if (section_name == g_sect_name_ctf) 1566 return eSectionTypeCTF; 1567 if (section_name == g_sect_name_swift_ast) 1568 return eSectionTypeSwiftModules; 1569 if (section_name == g_sect_name_objc_data || 1570 section_name == g_sect_name_objc_classrefs || 1571 section_name == g_sect_name_objc_superrefs || 1572 section_name == g_sect_name_objc_const || 1573 section_name == g_sect_name_objc_classlist) { 1574 return eSectionTypeDataPointers; 1575 } 1576 1577 switch (mach_sect_type) { 1578 // TODO: categorize sections by other flags for regular sections 1579 case S_REGULAR: 1580 if (section_name == g_sect_name_text) 1581 return eSectionTypeCode; 1582 if (section_name == g_sect_name_data) 1583 return eSectionTypeData; 1584 return eSectionTypeOther; 1585 case S_ZEROFILL: 1586 return eSectionTypeZeroFill; 1587 case S_CSTRING_LITERALS: // section with only literal C strings 1588 return eSectionTypeDataCString; 1589 case S_4BYTE_LITERALS: // section with only 4 byte literals 1590 return eSectionTypeData4; 1591 case S_8BYTE_LITERALS: // section with only 8 byte literals 1592 return eSectionTypeData8; 1593 case S_LITERAL_POINTERS: // section with only pointers to literals 1594 return eSectionTypeDataPointers; 1595 case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers 1596 return eSectionTypeDataPointers; 1597 case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers 1598 return eSectionTypeDataPointers; 1599 case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in 1600 // the reserved2 field 1601 return eSectionTypeCode; 1602 case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for 1603 // initialization 1604 return eSectionTypeDataPointers; 1605 case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for 1606 // termination 1607 return eSectionTypeDataPointers; 1608 case S_COALESCED: 1609 return eSectionTypeOther; 1610 case S_GB_ZEROFILL: 1611 return eSectionTypeZeroFill; 1612 case S_INTERPOSING: // section with only pairs of function pointers for 1613 // interposing 1614 return eSectionTypeCode; 1615 case S_16BYTE_LITERALS: // section with only 16 byte literals 1616 return eSectionTypeData16; 1617 case S_DTRACE_DOF: 1618 return eSectionTypeDebug; 1619 case S_LAZY_DYLIB_SYMBOL_POINTERS: 1620 return eSectionTypeDataPointers; 1621 default: 1622 return eSectionTypeOther; 1623 } 1624 } 1625 1626 struct ObjectFileMachO::SegmentParsingContext { 1627 const EncryptedFileRanges EncryptedRanges; 1628 lldb_private::SectionList &UnifiedList; 1629 uint32_t NextSegmentIdx = 0; 1630 uint32_t NextSectionIdx = 0; 1631 bool FileAddressesChanged = false; 1632 1633 SegmentParsingContext(EncryptedFileRanges EncryptedRanges, 1634 lldb_private::SectionList &UnifiedList) 1635 : EncryptedRanges(std::move(EncryptedRanges)), UnifiedList(UnifiedList) {} 1636 }; 1637 1638 void ObjectFileMachO::ProcessSegmentCommand( 1639 const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset, 1640 uint32_t cmd_idx, SegmentParsingContext &context) { 1641 llvm::MachO::segment_command_64 load_cmd; 1642 memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_)); 1643 1644 if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16)) 1645 return; 1646 1647 ModuleSP module_sp = GetModule(); 1648 const bool is_core = GetType() == eTypeCoreFile; 1649 const bool is_dsym = (m_header.filetype == MH_DSYM); 1650 bool add_section = true; 1651 bool add_to_unified = true; 1652 ConstString const_segname( 1653 load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname))); 1654 1655 SectionSP unified_section_sp( 1656 context.UnifiedList.FindSectionByName(const_segname)); 1657 if (is_dsym && unified_section_sp) { 1658 if (const_segname == GetSegmentNameLINKEDIT()) { 1659 // We need to keep the __LINKEDIT segment private to this object file 1660 // only 1661 add_to_unified = false; 1662 } else { 1663 // This is the dSYM file and this section has already been created by the 1664 // object file, no need to create it. 1665 add_section = false; 1666 } 1667 } 1668 load_cmd.vmaddr = m_data.GetAddress(&offset); 1669 load_cmd.vmsize = m_data.GetAddress(&offset); 1670 load_cmd.fileoff = m_data.GetAddress(&offset); 1671 load_cmd.filesize = m_data.GetAddress(&offset); 1672 if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4)) 1673 return; 1674 1675 SanitizeSegmentCommand(load_cmd, cmd_idx); 1676 1677 const uint32_t segment_permissions = GetSegmentPermissions(load_cmd); 1678 const bool segment_is_encrypted = 1679 (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0; 1680 1681 // Use a segment ID of the segment index shifted left by 8 so they never 1682 // conflict with any of the sections. 1683 SectionSP segment_sp; 1684 if (add_section && (const_segname || is_core)) { 1685 segment_sp = std::make_shared<Section>( 1686 module_sp, // Module to which this section belongs 1687 this, // Object file to which this sections belongs 1688 ++context.NextSegmentIdx 1689 << 8, // Section ID is the 1 based segment index 1690 // shifted right by 8 bits as not to collide with any of the 256 1691 // section IDs that are possible 1692 const_segname, // Name of this section 1693 eSectionTypeContainer, // This section is a container of other 1694 // sections. 1695 load_cmd.vmaddr, // File VM address == addresses as they are 1696 // found in the object file 1697 load_cmd.vmsize, // VM size in bytes of this section 1698 load_cmd.fileoff, // Offset to the data for this section in 1699 // the file 1700 load_cmd.filesize, // Size in bytes of this section as found 1701 // in the file 1702 0, // Segments have no alignment information 1703 load_cmd.flags); // Flags for this section 1704 1705 segment_sp->SetIsEncrypted(segment_is_encrypted); 1706 m_sections_up->AddSection(segment_sp); 1707 segment_sp->SetPermissions(segment_permissions); 1708 if (add_to_unified) 1709 context.UnifiedList.AddSection(segment_sp); 1710 } else if (unified_section_sp) { 1711 // If this is a dSYM and the file addresses in the dSYM differ from the 1712 // file addresses in the ObjectFile, we must use the file base address for 1713 // the Section from the dSYM for the DWARF to resolve correctly. 1714 // This only happens with binaries in the shared cache in practice; 1715 // normally a mismatch like this would give a binary & dSYM that do not 1716 // match UUIDs. When a binary is included in the shared cache, its 1717 // segments are rearranged to optimize the shared cache, so its file 1718 // addresses will differ from what the ObjectFile had originally, 1719 // and what the dSYM has. 1720 if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) { 1721 Log *log = GetLog(LLDBLog::Symbols); 1722 if (log) { 1723 log->Printf( 1724 "Installing dSYM's %s segment file address over ObjectFile's " 1725 "so symbol table/debug info resolves correctly for %s", 1726 const_segname.AsCString(), 1727 module_sp->GetFileSpec().GetFilename().AsCString()); 1728 } 1729 1730 // Make sure we've parsed the symbol table from the ObjectFile before 1731 // we go around changing its Sections. 1732 module_sp->GetObjectFile()->GetSymtab(); 1733 // eh_frame would present the same problems but we parse that on a per- 1734 // function basis as-needed so it's more difficult to remove its use of 1735 // the Sections. Realistically, the environments where this code path 1736 // will be taken will not have eh_frame sections. 1737 1738 unified_section_sp->SetFileAddress(load_cmd.vmaddr); 1739 1740 // Notify the module that the section addresses have been changed once 1741 // we're done so any file-address caches can be updated. 1742 context.FileAddressesChanged = true; 1743 } 1744 m_sections_up->AddSection(unified_section_sp); 1745 } 1746 1747 llvm::MachO::section_64 sect64; 1748 ::memset(§64, 0, sizeof(sect64)); 1749 // Push a section into our mach sections for the section at index zero 1750 // (NO_SECT) if we don't have any mach sections yet... 1751 if (m_mach_sections.empty()) 1752 m_mach_sections.push_back(sect64); 1753 uint32_t segment_sect_idx; 1754 const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1; 1755 1756 const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8; 1757 for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects; 1758 ++segment_sect_idx) { 1759 if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname, 1760 sizeof(sect64.sectname)) == nullptr) 1761 break; 1762 if (m_data.GetU8(&offset, (uint8_t *)sect64.segname, 1763 sizeof(sect64.segname)) == nullptr) 1764 break; 1765 sect64.addr = m_data.GetAddress(&offset); 1766 sect64.size = m_data.GetAddress(&offset); 1767 1768 if (m_data.GetU32(&offset, §64.offset, num_u32s) == nullptr) 1769 break; 1770 1771 if (IsSharedCacheBinary() && !IsInMemory()) { 1772 sect64.offset = sect64.addr - m_text_address; 1773 } 1774 1775 // Keep a list of mach sections around in case we need to get at data that 1776 // isn't stored in the abstracted Sections. 1777 m_mach_sections.push_back(sect64); 1778 1779 if (add_section) { 1780 ConstString section_name( 1781 sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname))); 1782 if (!const_segname) { 1783 // We have a segment with no name so we need to conjure up segments 1784 // that correspond to the section's segname if there isn't already such 1785 // a section. If there is such a section, we resize the section so that 1786 // it spans all sections. We also mark these sections as fake so 1787 // address matches don't hit if they land in the gaps between the child 1788 // sections. 1789 const_segname.SetTrimmedCStringWithLength(sect64.segname, 1790 sizeof(sect64.segname)); 1791 segment_sp = context.UnifiedList.FindSectionByName(const_segname); 1792 if (segment_sp.get()) { 1793 Section *segment = segment_sp.get(); 1794 // Grow the section size as needed. 1795 const lldb::addr_t sect64_min_addr = sect64.addr; 1796 const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size; 1797 const lldb::addr_t curr_seg_byte_size = segment->GetByteSize(); 1798 const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress(); 1799 const lldb::addr_t curr_seg_max_addr = 1800 curr_seg_min_addr + curr_seg_byte_size; 1801 if (sect64_min_addr >= curr_seg_min_addr) { 1802 const lldb::addr_t new_seg_byte_size = 1803 sect64_max_addr - curr_seg_min_addr; 1804 // Only grow the section size if needed 1805 if (new_seg_byte_size > curr_seg_byte_size) 1806 segment->SetByteSize(new_seg_byte_size); 1807 } else { 1808 // We need to change the base address of the segment and adjust the 1809 // child section offsets for all existing children. 1810 const lldb::addr_t slide_amount = 1811 sect64_min_addr - curr_seg_min_addr; 1812 segment->Slide(slide_amount, false); 1813 segment->GetChildren().Slide(-slide_amount, false); 1814 segment->SetByteSize(curr_seg_max_addr - sect64_min_addr); 1815 } 1816 1817 // Grow the section size as needed. 1818 if (sect64.offset) { 1819 const lldb::addr_t segment_min_file_offset = 1820 segment->GetFileOffset(); 1821 const lldb::addr_t segment_max_file_offset = 1822 segment_min_file_offset + segment->GetFileSize(); 1823 1824 const lldb::addr_t section_min_file_offset = sect64.offset; 1825 const lldb::addr_t section_max_file_offset = 1826 section_min_file_offset + sect64.size; 1827 const lldb::addr_t new_file_offset = 1828 std::min(section_min_file_offset, segment_min_file_offset); 1829 const lldb::addr_t new_file_size = 1830 std::max(section_max_file_offset, segment_max_file_offset) - 1831 new_file_offset; 1832 segment->SetFileOffset(new_file_offset); 1833 segment->SetFileSize(new_file_size); 1834 } 1835 } else { 1836 // Create a fake section for the section's named segment 1837 segment_sp = std::make_shared<Section>( 1838 segment_sp, // Parent section 1839 module_sp, // Module to which this section belongs 1840 this, // Object file to which this section belongs 1841 ++context.NextSegmentIdx 1842 << 8, // Section ID is the 1 based segment index 1843 // shifted right by 8 bits as not to 1844 // collide with any of the 256 section IDs 1845 // that are possible 1846 const_segname, // Name of this section 1847 eSectionTypeContainer, // This section is a container of 1848 // other sections. 1849 sect64.addr, // File VM address == addresses as they are 1850 // found in the object file 1851 sect64.size, // VM size in bytes of this section 1852 sect64.offset, // Offset to the data for this section in 1853 // the file 1854 sect64.offset ? sect64.size : 0, // Size in bytes of 1855 // this section as 1856 // found in the file 1857 sect64.align, 1858 load_cmd.flags); // Flags for this section 1859 segment_sp->SetIsFake(true); 1860 segment_sp->SetPermissions(segment_permissions); 1861 m_sections_up->AddSection(segment_sp); 1862 if (add_to_unified) 1863 context.UnifiedList.AddSection(segment_sp); 1864 segment_sp->SetIsEncrypted(segment_is_encrypted); 1865 } 1866 } 1867 assert(segment_sp.get()); 1868 1869 lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name); 1870 1871 SectionSP section_sp(new Section( 1872 segment_sp, module_sp, this, ++context.NextSectionIdx, section_name, 1873 sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size, 1874 sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align, 1875 sect64.flags)); 1876 // Set the section to be encrypted to match the segment 1877 1878 bool section_is_encrypted = false; 1879 if (!segment_is_encrypted && load_cmd.filesize != 0) 1880 section_is_encrypted = context.EncryptedRanges.FindEntryThatContains( 1881 sect64.offset) != nullptr; 1882 1883 section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted); 1884 section_sp->SetPermissions(segment_permissions); 1885 segment_sp->GetChildren().AddSection(section_sp); 1886 1887 if (segment_sp->IsFake()) { 1888 segment_sp.reset(); 1889 const_segname.Clear(); 1890 } 1891 } 1892 } 1893 if (segment_sp && is_dsym) { 1894 if (first_segment_sectID <= context.NextSectionIdx) { 1895 lldb::user_id_t sect_uid; 1896 for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx; 1897 ++sect_uid) { 1898 SectionSP curr_section_sp( 1899 segment_sp->GetChildren().FindSectionByID(sect_uid)); 1900 SectionSP next_section_sp; 1901 if (sect_uid + 1 <= context.NextSectionIdx) 1902 next_section_sp = 1903 segment_sp->GetChildren().FindSectionByID(sect_uid + 1); 1904 1905 if (curr_section_sp.get()) { 1906 if (curr_section_sp->GetByteSize() == 0) { 1907 if (next_section_sp.get() != nullptr) 1908 curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() - 1909 curr_section_sp->GetFileAddress()); 1910 else 1911 curr_section_sp->SetByteSize(load_cmd.vmsize); 1912 } 1913 } 1914 } 1915 } 1916 } 1917 } 1918 1919 void ObjectFileMachO::ProcessDysymtabCommand( 1920 const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) { 1921 m_dysymtab.cmd = load_cmd.cmd; 1922 m_dysymtab.cmdsize = load_cmd.cmdsize; 1923 m_data.GetU32(&offset, &m_dysymtab.ilocalsym, 1924 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2); 1925 } 1926 1927 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) { 1928 if (m_sections_up) 1929 return; 1930 1931 m_sections_up = std::make_unique<SectionList>(); 1932 1933 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1934 // bool dump_sections = false; 1935 ModuleSP module_sp(GetModule()); 1936 1937 offset = MachHeaderSizeFromMagic(m_header.magic); 1938 1939 SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list); 1940 llvm::MachO::load_command load_cmd; 1941 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1942 const lldb::offset_t load_cmd_offset = offset; 1943 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 1944 break; 1945 1946 if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64) 1947 ProcessSegmentCommand(load_cmd, offset, i, context); 1948 else if (load_cmd.cmd == LC_DYSYMTAB) 1949 ProcessDysymtabCommand(load_cmd, offset); 1950 1951 offset = load_cmd_offset + load_cmd.cmdsize; 1952 } 1953 1954 if (context.FileAddressesChanged && module_sp) 1955 module_sp->SectionFileAddressesChanged(); 1956 } 1957 1958 class MachSymtabSectionInfo { 1959 public: 1960 MachSymtabSectionInfo(SectionList *section_list) 1961 : m_section_list(section_list), m_section_infos() { 1962 // Get the number of sections down to a depth of 1 to include all segments 1963 // and their sections, but no other sections that may be added for debug 1964 // map or 1965 m_section_infos.resize(section_list->GetNumSections(1)); 1966 } 1967 1968 SectionSP GetSection(uint8_t n_sect, addr_t file_addr) { 1969 if (n_sect == 0) 1970 return SectionSP(); 1971 if (n_sect < m_section_infos.size()) { 1972 if (!m_section_infos[n_sect].section_sp) { 1973 SectionSP section_sp(m_section_list->FindSectionByID(n_sect)); 1974 m_section_infos[n_sect].section_sp = section_sp; 1975 if (section_sp) { 1976 m_section_infos[n_sect].vm_range.SetBaseAddress( 1977 section_sp->GetFileAddress()); 1978 m_section_infos[n_sect].vm_range.SetByteSize( 1979 section_sp->GetByteSize()); 1980 } else { 1981 std::string filename = "<unknown>"; 1982 SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0)); 1983 if (first_section_sp) 1984 filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath(); 1985 1986 Debugger::ReportError( 1987 llvm::formatv("unable to find section {0} for a symbol in " 1988 "{1}, corrupt file?", 1989 n_sect, filename)); 1990 } 1991 } 1992 if (m_section_infos[n_sect].vm_range.Contains(file_addr)) { 1993 // Symbol is in section. 1994 return m_section_infos[n_sect].section_sp; 1995 } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 && 1996 m_section_infos[n_sect].vm_range.GetBaseAddress() == 1997 file_addr) { 1998 // Symbol is in section with zero size, but has the same start address 1999 // as the section. This can happen with linker symbols (symbols that 2000 // start with the letter 'l' or 'L'. 2001 return m_section_infos[n_sect].section_sp; 2002 } 2003 } 2004 return m_section_list->FindSectionContainingFileAddress(file_addr); 2005 } 2006 2007 protected: 2008 struct SectionInfo { 2009 SectionInfo() : vm_range(), section_sp() {} 2010 2011 VMRange vm_range; 2012 SectionSP section_sp; 2013 }; 2014 SectionList *m_section_list; 2015 std::vector<SectionInfo> m_section_infos; 2016 }; 2017 2018 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63) 2019 struct TrieEntry { 2020 void Dump() const { 2021 printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"", 2022 static_cast<unsigned long long>(address), 2023 static_cast<unsigned long long>(flags), 2024 static_cast<unsigned long long>(other), name.GetCString()); 2025 if (import_name) 2026 printf(" -> \"%s\"\n", import_name.GetCString()); 2027 else 2028 printf("\n"); 2029 } 2030 ConstString name; 2031 uint64_t address = LLDB_INVALID_ADDRESS; 2032 uint64_t flags = 2033 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, 2034 // TRIE_SYMBOL_IS_THUMB 2035 uint64_t other = 0; 2036 ConstString import_name; 2037 }; 2038 2039 struct TrieEntryWithOffset { 2040 lldb::offset_t nodeOffset; 2041 TrieEntry entry; 2042 2043 TrieEntryWithOffset(lldb::offset_t offset) : nodeOffset(offset), entry() {} 2044 2045 void Dump(uint32_t idx) const { 2046 printf("[%3u] 0x%16.16llx: ", idx, 2047 static_cast<unsigned long long>(nodeOffset)); 2048 entry.Dump(); 2049 } 2050 2051 bool operator<(const TrieEntryWithOffset &other) const { 2052 return (nodeOffset < other.nodeOffset); 2053 } 2054 }; 2055 2056 static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset, 2057 const bool is_arm, addr_t text_seg_base_addr, 2058 std::vector<llvm::StringRef> &nameSlices, 2059 std::set<lldb::addr_t> &resolver_addresses, 2060 std::vector<TrieEntryWithOffset> &reexports, 2061 std::vector<TrieEntryWithOffset> &ext_symbols) { 2062 if (!data.ValidOffset(offset)) 2063 return true; 2064 2065 // Terminal node -- end of a branch, possibly add this to 2066 // the symbol table or resolver table. 2067 const uint64_t terminalSize = data.GetULEB128(&offset); 2068 lldb::offset_t children_offset = offset + terminalSize; 2069 if (terminalSize != 0) { 2070 TrieEntryWithOffset e(offset); 2071 e.entry.flags = data.GetULEB128(&offset); 2072 const char *import_name = nullptr; 2073 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 2074 e.entry.address = 0; 2075 e.entry.other = data.GetULEB128(&offset); // dylib ordinal 2076 import_name = data.GetCStr(&offset); 2077 } else { 2078 e.entry.address = data.GetULEB128(&offset); 2079 if (text_seg_base_addr != LLDB_INVALID_ADDRESS) 2080 e.entry.address += text_seg_base_addr; 2081 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 2082 e.entry.other = data.GetULEB128(&offset); 2083 uint64_t resolver_addr = e.entry.other; 2084 if (text_seg_base_addr != LLDB_INVALID_ADDRESS) 2085 resolver_addr += text_seg_base_addr; 2086 if (is_arm) 2087 resolver_addr &= THUMB_ADDRESS_BIT_MASK; 2088 resolver_addresses.insert(resolver_addr); 2089 } else 2090 e.entry.other = 0; 2091 } 2092 bool add_this_entry = false; 2093 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) && 2094 import_name && import_name[0]) { 2095 // add symbols that are reexport symbols with a valid import name. 2096 add_this_entry = true; 2097 } else if (e.entry.flags == 0 && 2098 (import_name == nullptr || import_name[0] == '\0')) { 2099 // add externally visible symbols, in case the nlist record has 2100 // been stripped/omitted. 2101 add_this_entry = true; 2102 } 2103 if (add_this_entry) { 2104 std::string name; 2105 if (!nameSlices.empty()) { 2106 for (auto name_slice : nameSlices) 2107 name.append(name_slice.data(), name_slice.size()); 2108 } 2109 if (name.size() > 1) { 2110 // Skip the leading '_' 2111 e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1); 2112 } 2113 if (import_name) { 2114 // Skip the leading '_' 2115 e.entry.import_name.SetCString(import_name + 1); 2116 } 2117 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) { 2118 reexports.push_back(e); 2119 } else { 2120 if (is_arm && (e.entry.address & 1)) { 2121 e.entry.flags |= TRIE_SYMBOL_IS_THUMB; 2122 e.entry.address &= THUMB_ADDRESS_BIT_MASK; 2123 } 2124 ext_symbols.push_back(e); 2125 } 2126 } 2127 } 2128 2129 const uint8_t childrenCount = data.GetU8(&children_offset); 2130 for (uint8_t i = 0; i < childrenCount; ++i) { 2131 const char *cstr = data.GetCStr(&children_offset); 2132 if (cstr) 2133 nameSlices.push_back(llvm::StringRef(cstr)); 2134 else 2135 return false; // Corrupt data 2136 lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset); 2137 if (childNodeOffset) { 2138 if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr, 2139 nameSlices, resolver_addresses, reexports, 2140 ext_symbols)) { 2141 return false; 2142 } 2143 } 2144 nameSlices.pop_back(); 2145 } 2146 return true; 2147 } 2148 2149 static SymbolType GetSymbolType(const char *&symbol_name, 2150 bool &demangled_is_synthesized, 2151 const SectionSP &text_section_sp, 2152 const SectionSP &data_section_sp, 2153 const SectionSP &data_dirty_section_sp, 2154 const SectionSP &data_const_section_sp, 2155 const SectionSP &symbol_section) { 2156 SymbolType type = eSymbolTypeInvalid; 2157 2158 const char *symbol_sect_name = symbol_section->GetName().AsCString(); 2159 if (symbol_section->IsDescendant(text_section_sp.get())) { 2160 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS | 2161 S_ATTR_SELF_MODIFYING_CODE | 2162 S_ATTR_SOME_INSTRUCTIONS)) 2163 type = eSymbolTypeData; 2164 else 2165 type = eSymbolTypeCode; 2166 } else if (symbol_section->IsDescendant(data_section_sp.get()) || 2167 symbol_section->IsDescendant(data_dirty_section_sp.get()) || 2168 symbol_section->IsDescendant(data_const_section_sp.get())) { 2169 if (symbol_sect_name && 2170 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) { 2171 type = eSymbolTypeRuntime; 2172 2173 if (symbol_name) { 2174 llvm::StringRef symbol_name_ref(symbol_name); 2175 if (symbol_name_ref.starts_with("OBJC_")) { 2176 static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_"); 2177 static const llvm::StringRef g_objc_v2_prefix_metaclass( 2178 "OBJC_METACLASS_$_"); 2179 static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_"); 2180 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 2181 symbol_name = symbol_name + g_objc_v2_prefix_class.size(); 2182 type = eSymbolTypeObjCClass; 2183 demangled_is_synthesized = true; 2184 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_metaclass)) { 2185 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size(); 2186 type = eSymbolTypeObjCMetaClass; 2187 demangled_is_synthesized = true; 2188 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) { 2189 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size(); 2190 type = eSymbolTypeObjCIVar; 2191 demangled_is_synthesized = true; 2192 } 2193 } 2194 } 2195 } else if (symbol_sect_name && 2196 ::strstr(symbol_sect_name, "__gcc_except_tab") == 2197 symbol_sect_name) { 2198 type = eSymbolTypeException; 2199 } else { 2200 type = eSymbolTypeData; 2201 } 2202 } else if (symbol_sect_name && 2203 ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) { 2204 type = eSymbolTypeTrampoline; 2205 } 2206 return type; 2207 } 2208 2209 static std::optional<struct nlist_64> 2210 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset, 2211 size_t nlist_byte_size) { 2212 struct nlist_64 nlist; 2213 if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size)) 2214 return {}; 2215 nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset); 2216 nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset); 2217 nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset); 2218 nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset); 2219 nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset); 2220 return nlist; 2221 } 2222 2223 enum { DebugSymbols = true, NonDebugSymbols = false }; 2224 2225 void ObjectFileMachO::ParseSymtab(Symtab &symtab) { 2226 ModuleSP module_sp(GetModule()); 2227 if (!module_sp) 2228 return; 2229 2230 Log *log = GetLog(LLDBLog::Symbols); 2231 2232 const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec(); 2233 const char *file_name = file.GetFilename().AsCString("<Unknown>"); 2234 LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name); 2235 LLDB_LOG(log, "Parsing symbol table for {0}", file_name); 2236 Progress progress("Parsing symbol table", file_name); 2237 2238 llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0}; 2239 llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0}; 2240 llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 2241 llvm::MachO::dysymtab_command dysymtab = m_dysymtab; 2242 SymtabCommandLargeOffsets symtab_load_command; 2243 // The data element of type bool indicates that this entry is thumb 2244 // code. 2245 typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts; 2246 2247 // Record the address of every function/data that we add to the symtab. 2248 // We add symbols to the table in the order of most information (nlist 2249 // records) to least (function starts), and avoid duplicating symbols 2250 // via this set. 2251 llvm::DenseSet<addr_t> symbols_added; 2252 2253 // We are using a llvm::DenseSet for "symbols_added" so we must be sure we 2254 // do not add the tombstone or empty keys to the set. 2255 auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) { 2256 // Don't add the tombstone or empty keys. 2257 if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1) 2258 return; 2259 symbols_added.insert(file_addr); 2260 }; 2261 FunctionStarts function_starts; 2262 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 2263 uint32_t i; 2264 FileSpecList dylib_files; 2265 llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_"); 2266 llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_"); 2267 llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_"); 2268 UUID image_uuid; 2269 2270 for (i = 0; i < m_header.ncmds; ++i) { 2271 const lldb::offset_t cmd_offset = offset; 2272 // Read in the load command and load command size 2273 llvm::MachO::load_command lc; 2274 if (m_data.GetU32(&offset, &lc, 2) == nullptr) 2275 break; 2276 // Watch for the symbol table load command 2277 switch (lc.cmd) { 2278 case LC_SYMTAB: 2279 // struct symtab_command { 2280 // uint32_t cmd; /* LC_SYMTAB */ 2281 // uint32_t cmdsize; /* sizeof(struct symtab_command) */ 2282 // uint32_t symoff; /* symbol table offset */ 2283 // uint32_t nsyms; /* number of symbol table entries */ 2284 // uint32_t stroff; /* string table offset */ 2285 // uint32_t strsize; /* string table size in bytes */ 2286 // }; 2287 symtab_load_command.cmd = lc.cmd; 2288 symtab_load_command.cmdsize = lc.cmdsize; 2289 symtab_load_command.symoff = m_data.GetU32(&offset); 2290 symtab_load_command.nsyms = m_data.GetU32(&offset); 2291 symtab_load_command.stroff = m_data.GetU32(&offset); 2292 symtab_load_command.strsize = m_data.GetU32(&offset); 2293 break; 2294 2295 case LC_DYLD_INFO: 2296 case LC_DYLD_INFO_ONLY: 2297 if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) { 2298 dyld_info.cmd = lc.cmd; 2299 dyld_info.cmdsize = lc.cmdsize; 2300 } else { 2301 memset(&dyld_info, 0, sizeof(dyld_info)); 2302 } 2303 break; 2304 2305 case LC_LOAD_DYLIB: 2306 case LC_LOAD_WEAK_DYLIB: 2307 case LC_REEXPORT_DYLIB: 2308 case LC_LOADFVMLIB: 2309 case LC_LOAD_UPWARD_DYLIB: { 2310 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset); 2311 const char *path = m_data.PeekCStr(name_offset); 2312 if (path) { 2313 FileSpec file_spec(path); 2314 // Strip the path if there is @rpath, @executable, etc so we just use 2315 // the basename 2316 if (path[0] == '@') 2317 file_spec.ClearDirectory(); 2318 2319 if (lc.cmd == LC_REEXPORT_DYLIB) { 2320 m_reexported_dylibs.AppendIfUnique(file_spec); 2321 } 2322 2323 dylib_files.Append(file_spec); 2324 } 2325 } break; 2326 2327 case LC_DYLD_EXPORTS_TRIE: 2328 exports_trie_load_command.cmd = lc.cmd; 2329 exports_trie_load_command.cmdsize = lc.cmdsize; 2330 if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) == 2331 nullptr) // fill in offset and size fields 2332 memset(&exports_trie_load_command, 0, 2333 sizeof(exports_trie_load_command)); 2334 break; 2335 case LC_FUNCTION_STARTS: 2336 function_starts_load_command.cmd = lc.cmd; 2337 function_starts_load_command.cmdsize = lc.cmdsize; 2338 if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) == 2339 nullptr) // fill in data offset and size fields 2340 memset(&function_starts_load_command, 0, 2341 sizeof(function_starts_load_command)); 2342 break; 2343 2344 case LC_UUID: { 2345 const uint8_t *uuid_bytes = m_data.PeekData(offset, 16); 2346 2347 if (uuid_bytes) 2348 image_uuid = UUID(uuid_bytes, 16); 2349 break; 2350 } 2351 2352 default: 2353 break; 2354 } 2355 offset = cmd_offset + lc.cmdsize; 2356 } 2357 2358 if (!symtab_load_command.cmd) 2359 return; 2360 2361 SectionList *section_list = GetSectionList(); 2362 if (section_list == nullptr) 2363 return; 2364 2365 const uint32_t addr_byte_size = m_data.GetAddressByteSize(); 2366 const ByteOrder byte_order = m_data.GetByteOrder(); 2367 bool bit_width_32 = addr_byte_size == 4; 2368 const size_t nlist_byte_size = 2369 bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64); 2370 2371 DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size); 2372 DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size); 2373 DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size); 2374 DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order, 2375 addr_byte_size); 2376 DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size); 2377 2378 const addr_t nlist_data_byte_size = 2379 symtab_load_command.nsyms * nlist_byte_size; 2380 const addr_t strtab_data_byte_size = symtab_load_command.strsize; 2381 addr_t strtab_addr = LLDB_INVALID_ADDRESS; 2382 2383 ProcessSP process_sp(m_process_wp.lock()); 2384 Process *process = process_sp.get(); 2385 2386 uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete; 2387 bool is_shared_cache_image = IsSharedCacheBinary(); 2388 bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory(); 2389 SectionSP linkedit_section_sp( 2390 section_list->FindSectionByName(GetSegmentNameLINKEDIT())); 2391 2392 if (process && m_header.filetype != llvm::MachO::MH_OBJECT && 2393 !is_local_shared_cache_image) { 2394 Target &target = process->GetTarget(); 2395 2396 memory_module_load_level = target.GetMemoryModuleLoadLevel(); 2397 2398 // Reading mach file from memory in a process or core file... 2399 2400 if (linkedit_section_sp) { 2401 addr_t linkedit_load_addr = 2402 linkedit_section_sp->GetLoadBaseAddress(&target); 2403 if (linkedit_load_addr == LLDB_INVALID_ADDRESS) { 2404 // We might be trying to access the symbol table before the 2405 // __LINKEDIT's load address has been set in the target. We can't 2406 // fail to read the symbol table, so calculate the right address 2407 // manually 2408 linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage( 2409 m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get()); 2410 } 2411 2412 const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset(); 2413 const addr_t symoff_addr = linkedit_load_addr + 2414 symtab_load_command.symoff - 2415 linkedit_file_offset; 2416 strtab_addr = linkedit_load_addr + symtab_load_command.stroff - 2417 linkedit_file_offset; 2418 2419 // Always load dyld - the dynamic linker - from memory if we didn't 2420 // find a binary anywhere else. lldb will not register 2421 // dylib/framework/bundle loads/unloads if we don't have the dyld 2422 // symbols, we force dyld to load from memory despite the user's 2423 // target.memory-module-load-level setting. 2424 if (memory_module_load_level == eMemoryModuleLoadLevelComplete || 2425 m_header.filetype == llvm::MachO::MH_DYLINKER) { 2426 DataBufferSP nlist_data_sp( 2427 ReadMemory(process_sp, symoff_addr, nlist_data_byte_size)); 2428 if (nlist_data_sp) 2429 nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize()); 2430 if (dysymtab.nindirectsyms != 0) { 2431 const addr_t indirect_syms_addr = linkedit_load_addr + 2432 dysymtab.indirectsymoff - 2433 linkedit_file_offset; 2434 DataBufferSP indirect_syms_data_sp(ReadMemory( 2435 process_sp, indirect_syms_addr, dysymtab.nindirectsyms * 4)); 2436 if (indirect_syms_data_sp) 2437 indirect_symbol_index_data.SetData( 2438 indirect_syms_data_sp, 0, indirect_syms_data_sp->GetByteSize()); 2439 // If this binary is outside the shared cache, 2440 // cache the string table. 2441 // Binaries in the shared cache all share a giant string table, 2442 // and we can't share the string tables across multiple 2443 // ObjectFileMachO's, so we'd end up re-reading this mega-strtab 2444 // for every binary in the shared cache - it would be a big perf 2445 // problem. For binaries outside the shared cache, it's faster to 2446 // read the entire strtab at once instead of piece-by-piece as we 2447 // process the nlist records. 2448 if (!is_shared_cache_image) { 2449 DataBufferSP strtab_data_sp( 2450 ReadMemory(process_sp, strtab_addr, strtab_data_byte_size)); 2451 if (strtab_data_sp) { 2452 strtab_data.SetData(strtab_data_sp, 0, 2453 strtab_data_sp->GetByteSize()); 2454 } 2455 } 2456 } 2457 if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) { 2458 if (function_starts_load_command.cmd) { 2459 const addr_t func_start_addr = 2460 linkedit_load_addr + function_starts_load_command.dataoff - 2461 linkedit_file_offset; 2462 DataBufferSP func_start_data_sp( 2463 ReadMemory(process_sp, func_start_addr, 2464 function_starts_load_command.datasize)); 2465 if (func_start_data_sp) 2466 function_starts_data.SetData(func_start_data_sp, 0, 2467 func_start_data_sp->GetByteSize()); 2468 } 2469 } 2470 } 2471 } 2472 } else { 2473 if (is_local_shared_cache_image) { 2474 // The load commands in shared cache images are relative to the 2475 // beginning of the shared cache, not the library image. The 2476 // data we get handed when creating the ObjectFileMachO starts 2477 // at the beginning of a specific library and spans to the end 2478 // of the cache to be able to reach the shared LINKEDIT 2479 // segments. We need to convert the load command offsets to be 2480 // relative to the beginning of our specific image. 2481 lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset(); 2482 lldb::offset_t linkedit_slide = 2483 linkedit_offset - m_linkedit_original_offset; 2484 symtab_load_command.symoff += linkedit_slide; 2485 symtab_load_command.stroff += linkedit_slide; 2486 dyld_info.export_off += linkedit_slide; 2487 dysymtab.indirectsymoff += linkedit_slide; 2488 function_starts_load_command.dataoff += linkedit_slide; 2489 exports_trie_load_command.dataoff += linkedit_slide; 2490 } 2491 2492 nlist_data.SetData(m_data, symtab_load_command.symoff, 2493 nlist_data_byte_size); 2494 strtab_data.SetData(m_data, symtab_load_command.stroff, 2495 strtab_data_byte_size); 2496 2497 // We shouldn't have exports data from both the LC_DYLD_INFO command 2498 // AND the LC_DYLD_EXPORTS_TRIE command in the same binary: 2499 lldbassert(!((dyld_info.export_size > 0) 2500 && (exports_trie_load_command.datasize > 0))); 2501 if (dyld_info.export_size > 0) { 2502 dyld_trie_data.SetData(m_data, dyld_info.export_off, 2503 dyld_info.export_size); 2504 } else if (exports_trie_load_command.datasize > 0) { 2505 dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff, 2506 exports_trie_load_command.datasize); 2507 } 2508 2509 if (dysymtab.nindirectsyms != 0) { 2510 indirect_symbol_index_data.SetData(m_data, dysymtab.indirectsymoff, 2511 dysymtab.nindirectsyms * 4); 2512 } 2513 if (function_starts_load_command.cmd) { 2514 function_starts_data.SetData(m_data, function_starts_load_command.dataoff, 2515 function_starts_load_command.datasize); 2516 } 2517 } 2518 2519 const bool have_strtab_data = strtab_data.GetByteSize() > 0; 2520 2521 ConstString g_segment_name_TEXT = GetSegmentNameTEXT(); 2522 ConstString g_segment_name_DATA = GetSegmentNameDATA(); 2523 ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY(); 2524 ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST(); 2525 ConstString g_segment_name_OBJC = GetSegmentNameOBJC(); 2526 ConstString g_section_name_eh_frame = GetSectionNameEHFrame(); 2527 SectionSP text_section_sp( 2528 section_list->FindSectionByName(g_segment_name_TEXT)); 2529 SectionSP data_section_sp( 2530 section_list->FindSectionByName(g_segment_name_DATA)); 2531 SectionSP data_dirty_section_sp( 2532 section_list->FindSectionByName(g_segment_name_DATA_DIRTY)); 2533 SectionSP data_const_section_sp( 2534 section_list->FindSectionByName(g_segment_name_DATA_CONST)); 2535 SectionSP objc_section_sp( 2536 section_list->FindSectionByName(g_segment_name_OBJC)); 2537 SectionSP eh_frame_section_sp; 2538 if (text_section_sp.get()) 2539 eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName( 2540 g_section_name_eh_frame); 2541 else 2542 eh_frame_section_sp = 2543 section_list->FindSectionByName(g_section_name_eh_frame); 2544 2545 const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM); 2546 const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions(); 2547 2548 // lldb works best if it knows the start address of all functions in a 2549 // module. Linker symbols or debug info are normally the best source of 2550 // information for start addr / size but they may be stripped in a released 2551 // binary. Two additional sources of information exist in Mach-O binaries: 2552 // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each 2553 // function's start address in the 2554 // binary, relative to the text section. 2555 // eh_frame - the eh_frame FDEs have the start addr & size of 2556 // each function 2557 // LC_FUNCTION_STARTS is the fastest source to read in, and is present on 2558 // all modern binaries. 2559 // Binaries built to run on older releases may need to use eh_frame 2560 // information. 2561 2562 if (text_section_sp && function_starts_data.GetByteSize()) { 2563 FunctionStarts::Entry function_start_entry; 2564 function_start_entry.data = false; 2565 lldb::offset_t function_start_offset = 0; 2566 function_start_entry.addr = text_section_sp->GetFileAddress(); 2567 uint64_t delta; 2568 while ((delta = function_starts_data.GetULEB128(&function_start_offset)) > 2569 0) { 2570 // Now append the current entry 2571 function_start_entry.addr += delta; 2572 if (is_arm) { 2573 if (function_start_entry.addr & 1) { 2574 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; 2575 function_start_entry.data = true; 2576 } else if (always_thumb) { 2577 function_start_entry.data = true; 2578 } 2579 } 2580 function_starts.Append(function_start_entry); 2581 } 2582 } else { 2583 // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the 2584 // load command claiming an eh_frame but it doesn't actually have the 2585 // eh_frame content. And if we have a dSYM, we don't need to do any of 2586 // this fill-in-the-missing-symbols works anyway - the debug info should 2587 // give us all the functions in the module. 2588 if (text_section_sp.get() && eh_frame_section_sp.get() && 2589 m_type != eTypeDebugInfo) { 2590 DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp, 2591 DWARFCallFrameInfo::EH); 2592 DWARFCallFrameInfo::FunctionAddressAndSizeVector functions; 2593 eh_frame.GetFunctionAddressAndSizeVector(functions); 2594 addr_t text_base_addr = text_section_sp->GetFileAddress(); 2595 size_t count = functions.GetSize(); 2596 for (size_t i = 0; i < count; ++i) { 2597 const DWARFCallFrameInfo::FunctionAddressAndSizeVector::Entry *func = 2598 functions.GetEntryAtIndex(i); 2599 if (func) { 2600 FunctionStarts::Entry function_start_entry; 2601 function_start_entry.addr = func->base - text_base_addr; 2602 if (is_arm) { 2603 if (function_start_entry.addr & 1) { 2604 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; 2605 function_start_entry.data = true; 2606 } else if (always_thumb) { 2607 function_start_entry.data = true; 2608 } 2609 } 2610 function_starts.Append(function_start_entry); 2611 } 2612 } 2613 } 2614 } 2615 2616 const size_t function_starts_count = function_starts.GetSize(); 2617 2618 // For user process binaries (executables, dylibs, frameworks, bundles), if 2619 // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're 2620 // going to assume the binary has been stripped. Don't allow assembly 2621 // language instruction emulation because we don't know proper function 2622 // start boundaries. 2623 // 2624 // For all other types of binaries (kernels, stand-alone bare board 2625 // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame 2626 // sections - we should not make any assumptions about them based on that. 2627 if (function_starts_count == 0 && CalculateStrata() == eStrataUser) { 2628 m_allow_assembly_emulation_unwind_plans = false; 2629 Log *unwind_or_symbol_log(GetLog(LLDBLog::Symbols | LLDBLog::Unwind)); 2630 2631 if (unwind_or_symbol_log) 2632 module_sp->LogMessage( 2633 unwind_or_symbol_log, 2634 "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds"); 2635 } 2636 2637 const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get() 2638 ? eh_frame_section_sp->GetID() 2639 : static_cast<user_id_t>(NO_SECT); 2640 2641 uint32_t N_SO_index = UINT32_MAX; 2642 2643 MachSymtabSectionInfo section_info(section_list); 2644 std::vector<uint32_t> N_FUN_indexes; 2645 std::vector<uint32_t> N_NSYM_indexes; 2646 std::vector<uint32_t> N_INCL_indexes; 2647 std::vector<uint32_t> N_BRAC_indexes; 2648 std::vector<uint32_t> N_COMM_indexes; 2649 typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap; 2650 typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap; 2651 typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap; 2652 ValueToSymbolIndexMap N_FUN_addr_to_sym_idx; 2653 ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx; 2654 ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx; 2655 // Any symbols that get merged into another will get an entry in this map 2656 // so we know 2657 NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx; 2658 uint32_t nlist_idx = 0; 2659 Symbol *symbol_ptr = nullptr; 2660 2661 uint32_t sym_idx = 0; 2662 Symbol *sym = nullptr; 2663 size_t num_syms = 0; 2664 std::string memory_symbol_name; 2665 uint32_t unmapped_local_symbols_found = 0; 2666 2667 std::vector<TrieEntryWithOffset> reexport_trie_entries; 2668 std::vector<TrieEntryWithOffset> external_sym_trie_entries; 2669 std::set<lldb::addr_t> resolver_addresses; 2670 2671 const size_t dyld_trie_data_size = dyld_trie_data.GetByteSize(); 2672 if (dyld_trie_data_size > 0) { 2673 LLDB_LOG(log, "Parsing {0} bytes of dyld trie data", dyld_trie_data_size); 2674 SectionSP text_segment_sp = 2675 GetSectionList()->FindSectionByName(GetSegmentNameTEXT()); 2676 lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS; 2677 if (text_segment_sp) 2678 text_segment_file_addr = text_segment_sp->GetFileAddress(); 2679 std::vector<llvm::StringRef> nameSlices; 2680 ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr, 2681 nameSlices, resolver_addresses, reexport_trie_entries, 2682 external_sym_trie_entries); 2683 } 2684 2685 typedef std::set<ConstString> IndirectSymbols; 2686 IndirectSymbols indirect_symbol_names; 2687 2688 #if TARGET_OS_IPHONE 2689 2690 // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been 2691 // optimized by moving LOCAL symbols out of the memory mapped portion of 2692 // the DSC. The symbol information has all been retained, but it isn't 2693 // available in the normal nlist data. However, there *are* duplicate 2694 // entries of *some* 2695 // LOCAL symbols in the normal nlist data. To handle this situation 2696 // correctly, we must first attempt 2697 // to parse any DSC unmapped symbol information. If we find any, we set a 2698 // flag that tells the normal nlist parser to ignore all LOCAL symbols. 2699 2700 if (IsSharedCacheBinary()) { 2701 // Before we can start mapping the DSC, we need to make certain the 2702 // target process is actually using the cache we can find. 2703 2704 // Next we need to determine the correct path for the dyld shared cache. 2705 2706 ArchSpec header_arch = GetArchitecture(); 2707 2708 UUID dsc_uuid; 2709 UUID process_shared_cache_uuid; 2710 addr_t process_shared_cache_base_addr; 2711 2712 if (process) { 2713 GetProcessSharedCacheUUID(process, process_shared_cache_base_addr, 2714 process_shared_cache_uuid); 2715 } 2716 2717 __block bool found_image = false; 2718 __block void *nlist_buffer = nullptr; 2719 __block unsigned nlist_count = 0; 2720 __block char *string_table = nullptr; 2721 __block vm_offset_t vm_nlist_memory = 0; 2722 __block mach_msg_type_number_t vm_nlist_bytes_read = 0; 2723 __block vm_offset_t vm_string_memory = 0; 2724 __block mach_msg_type_number_t vm_string_bytes_read = 0; 2725 2726 auto _ = llvm::make_scope_exit(^{ 2727 if (vm_nlist_memory) 2728 vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read); 2729 if (vm_string_memory) 2730 vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read); 2731 }); 2732 2733 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap; 2734 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName; 2735 UndefinedNameToDescMap undefined_name_to_desc; 2736 SymbolIndexToName reexport_shlib_needs_fixup; 2737 2738 dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) { 2739 uuid_t cache_uuid; 2740 dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid); 2741 if (found_image) 2742 return; 2743 2744 if (process_shared_cache_uuid.IsValid() && 2745 process_shared_cache_uuid != UUID::fromData(&cache_uuid, 16)) 2746 return; 2747 2748 dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) { 2749 uuid_t dsc_image_uuid; 2750 if (found_image) 2751 return; 2752 2753 dyld_image_copy_uuid(image, &dsc_image_uuid); 2754 if (image_uuid != UUID::fromData(dsc_image_uuid, 16)) 2755 return; 2756 2757 found_image = true; 2758 2759 // Compute the size of the string table. We need to ask dyld for a 2760 // new SPI to avoid this step. 2761 dyld_image_local_nlist_content_4Symbolication( 2762 image, ^(const void *nlistStart, uint64_t nlistCount, 2763 const char *stringTable) { 2764 if (!nlistStart || !nlistCount) 2765 return; 2766 2767 // The buffers passed here are valid only inside the block. 2768 // Use vm_read to make a cheap copy of them available for our 2769 // processing later. 2770 kern_return_t ret = 2771 vm_read(mach_task_self(), (vm_address_t)nlistStart, 2772 nlist_byte_size * nlistCount, &vm_nlist_memory, 2773 &vm_nlist_bytes_read); 2774 if (ret != KERN_SUCCESS) 2775 return; 2776 assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount); 2777 2778 // We don't know the size of the string table. It's cheaper 2779 // to map the whole VM region than to determine the size by 2780 // parsing all the nlist entries. 2781 vm_address_t string_address = (vm_address_t)stringTable; 2782 vm_size_t region_size; 2783 mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64; 2784 vm_region_basic_info_data_t info; 2785 memory_object_name_t object; 2786 ret = vm_region_64(mach_task_self(), &string_address, 2787 ®ion_size, VM_REGION_BASIC_INFO_64, 2788 (vm_region_info_t)&info, &info_count, &object); 2789 if (ret != KERN_SUCCESS) 2790 return; 2791 2792 ret = vm_read(mach_task_self(), (vm_address_t)stringTable, 2793 region_size - 2794 ((vm_address_t)stringTable - string_address), 2795 &vm_string_memory, &vm_string_bytes_read); 2796 if (ret != KERN_SUCCESS) 2797 return; 2798 2799 nlist_buffer = (void *)vm_nlist_memory; 2800 string_table = (char *)vm_string_memory; 2801 nlist_count = nlistCount; 2802 }); 2803 }); 2804 }); 2805 if (nlist_buffer) { 2806 DataExtractor dsc_local_symbols_data(nlist_buffer, 2807 nlist_count * nlist_byte_size, 2808 byte_order, addr_byte_size); 2809 unmapped_local_symbols_found = nlist_count; 2810 2811 // The normal nlist code cannot correctly size the Symbols 2812 // array, we need to allocate it here. 2813 sym = symtab.Resize( 2814 symtab_load_command.nsyms + m_dysymtab.nindirectsyms + 2815 unmapped_local_symbols_found - m_dysymtab.nlocalsym); 2816 num_syms = symtab.GetNumSymbols(); 2817 2818 lldb::offset_t nlist_data_offset = 0; 2819 2820 for (uint32_t nlist_index = 0; 2821 nlist_index < nlist_count; 2822 nlist_index++) { 2823 ///////////////////////////// 2824 { 2825 std::optional<struct nlist_64> nlist_maybe = 2826 ParseNList(dsc_local_symbols_data, nlist_data_offset, 2827 nlist_byte_size); 2828 if (!nlist_maybe) 2829 break; 2830 struct nlist_64 nlist = *nlist_maybe; 2831 2832 SymbolType type = eSymbolTypeInvalid; 2833 const char *symbol_name = string_table + nlist.n_strx; 2834 2835 if (symbol_name == NULL) { 2836 // No symbol should be NULL, even the symbols with no 2837 // string values should have an offset zero which 2838 // points to an empty C-string 2839 Debugger::ReportError(llvm::formatv( 2840 "DSC unmapped local symbol[{0}] has invalid " 2841 "string table offset {1:x} in {2}, ignoring symbol", 2842 nlist_index, nlist.n_strx, 2843 module_sp->GetFileSpec().GetPath()); 2844 continue; 2845 } 2846 if (symbol_name[0] == '\0') 2847 symbol_name = NULL; 2848 2849 const char *symbol_name_non_abi_mangled = NULL; 2850 2851 SectionSP symbol_section; 2852 uint32_t symbol_byte_size = 0; 2853 bool add_nlist = true; 2854 bool is_debug = ((nlist.n_type & N_STAB) != 0); 2855 bool demangled_is_synthesized = false; 2856 bool is_gsym = false; 2857 bool set_value = true; 2858 2859 assert(sym_idx < num_syms); 2860 2861 sym[sym_idx].SetDebug(is_debug); 2862 2863 if (is_debug) { 2864 switch (nlist.n_type) { 2865 case N_GSYM: 2866 // global symbol: name,,NO_SECT,type,0 2867 // Sometimes the N_GSYM value contains the address. 2868 2869 // FIXME: In the .o files, we have a GSYM and a debug 2870 // symbol for all the ObjC data. They 2871 // have the same address, but we want to ensure that 2872 // we always find only the real symbol, 'cause we 2873 // don't currently correctly attribute the 2874 // GSYM one to the ObjCClass/Ivar/MetaClass 2875 // symbol type. This is a temporary hack to make 2876 // sure the ObjectiveC symbols get treated correctly. 2877 // To do this right, we should coalesce all the GSYM 2878 // & global symbols that have the same address. 2879 2880 is_gsym = true; 2881 sym[sym_idx].SetExternal(true); 2882 2883 if (symbol_name && symbol_name[0] == '_' && 2884 symbol_name[1] == 'O') { 2885 llvm::StringRef symbol_name_ref(symbol_name); 2886 if (symbol_name_ref.starts_with( 2887 g_objc_v2_prefix_class)) { 2888 symbol_name_non_abi_mangled = symbol_name + 1; 2889 symbol_name = 2890 symbol_name + g_objc_v2_prefix_class.size(); 2891 type = eSymbolTypeObjCClass; 2892 demangled_is_synthesized = true; 2893 2894 } else if (symbol_name_ref.starts_with( 2895 g_objc_v2_prefix_metaclass)) { 2896 symbol_name_non_abi_mangled = symbol_name + 1; 2897 symbol_name = 2898 symbol_name + g_objc_v2_prefix_metaclass.size(); 2899 type = eSymbolTypeObjCMetaClass; 2900 demangled_is_synthesized = true; 2901 } else if (symbol_name_ref.starts_with( 2902 g_objc_v2_prefix_ivar)) { 2903 symbol_name_non_abi_mangled = symbol_name + 1; 2904 symbol_name = 2905 symbol_name + g_objc_v2_prefix_ivar.size(); 2906 type = eSymbolTypeObjCIVar; 2907 demangled_is_synthesized = true; 2908 } 2909 } else { 2910 if (nlist.n_value != 0) 2911 symbol_section = section_info.GetSection( 2912 nlist.n_sect, nlist.n_value); 2913 type = eSymbolTypeData; 2914 } 2915 break; 2916 2917 case N_FNAME: 2918 // procedure name (f77 kludge): name,,NO_SECT,0,0 2919 type = eSymbolTypeCompiler; 2920 break; 2921 2922 case N_FUN: 2923 // procedure: name,,n_sect,linenumber,address 2924 if (symbol_name) { 2925 type = eSymbolTypeCode; 2926 symbol_section = section_info.GetSection( 2927 nlist.n_sect, nlist.n_value); 2928 2929 N_FUN_addr_to_sym_idx.insert( 2930 std::make_pair(nlist.n_value, sym_idx)); 2931 // We use the current number of symbols in the 2932 // symbol table in lieu of using nlist_idx in case 2933 // we ever start trimming entries out 2934 N_FUN_indexes.push_back(sym_idx); 2935 } else { 2936 type = eSymbolTypeCompiler; 2937 2938 if (!N_FUN_indexes.empty()) { 2939 // Copy the size of the function into the 2940 // original 2941 // STAB entry so we don't have 2942 // to hunt for it later 2943 symtab.SymbolAtIndex(N_FUN_indexes.back()) 2944 ->SetByteSize(nlist.n_value); 2945 N_FUN_indexes.pop_back(); 2946 // We don't really need the end function STAB as 2947 // it contains the size which we already placed 2948 // with the original symbol, so don't add it if 2949 // we want a minimal symbol table 2950 add_nlist = false; 2951 } 2952 } 2953 break; 2954 2955 case N_STSYM: 2956 // static symbol: name,,n_sect,type,address 2957 N_STSYM_addr_to_sym_idx.insert( 2958 std::make_pair(nlist.n_value, sym_idx)); 2959 symbol_section = section_info.GetSection(nlist.n_sect, 2960 nlist.n_value); 2961 if (symbol_name && symbol_name[0]) { 2962 type = ObjectFile::GetSymbolTypeFromName( 2963 symbol_name + 1, eSymbolTypeData); 2964 } 2965 break; 2966 2967 case N_LCSYM: 2968 // .lcomm symbol: name,,n_sect,type,address 2969 symbol_section = section_info.GetSection(nlist.n_sect, 2970 nlist.n_value); 2971 type = eSymbolTypeCommonBlock; 2972 break; 2973 2974 case N_BNSYM: 2975 // We use the current number of symbols in the symbol 2976 // table in lieu of using nlist_idx in case we ever 2977 // start trimming entries out Skip these if we want 2978 // minimal symbol tables 2979 add_nlist = false; 2980 break; 2981 2982 case N_ENSYM: 2983 // Set the size of the N_BNSYM to the terminating 2984 // index of this N_ENSYM so that we can always skip 2985 // the entire symbol if we need to navigate more 2986 // quickly at the source level when parsing STABS 2987 // Skip these if we want minimal symbol tables 2988 add_nlist = false; 2989 break; 2990 2991 case N_OPT: 2992 // emitted with gcc2_compiled and in gcc source 2993 type = eSymbolTypeCompiler; 2994 break; 2995 2996 case N_RSYM: 2997 // register sym: name,,NO_SECT,type,register 2998 type = eSymbolTypeVariable; 2999 break; 3000 3001 case N_SLINE: 3002 // src line: 0,,n_sect,linenumber,address 3003 symbol_section = section_info.GetSection(nlist.n_sect, 3004 nlist.n_value); 3005 type = eSymbolTypeLineEntry; 3006 break; 3007 3008 case N_SSYM: 3009 // structure elt: name,,NO_SECT,type,struct_offset 3010 type = eSymbolTypeVariableType; 3011 break; 3012 3013 case N_SO: 3014 // source file name 3015 type = eSymbolTypeSourceFile; 3016 if (symbol_name == NULL) { 3017 add_nlist = false; 3018 if (N_SO_index != UINT32_MAX) { 3019 // Set the size of the N_SO to the terminating 3020 // index of this N_SO so that we can always skip 3021 // the entire N_SO if we need to navigate more 3022 // quickly at the source level when parsing STABS 3023 symbol_ptr = symtab.SymbolAtIndex(N_SO_index); 3024 symbol_ptr->SetByteSize(sym_idx); 3025 symbol_ptr->SetSizeIsSibling(true); 3026 } 3027 N_NSYM_indexes.clear(); 3028 N_INCL_indexes.clear(); 3029 N_BRAC_indexes.clear(); 3030 N_COMM_indexes.clear(); 3031 N_FUN_indexes.clear(); 3032 N_SO_index = UINT32_MAX; 3033 } else { 3034 // We use the current number of symbols in the 3035 // symbol table in lieu of using nlist_idx in case 3036 // we ever start trimming entries out 3037 const bool N_SO_has_full_path = symbol_name[0] == '/'; 3038 if (N_SO_has_full_path) { 3039 if ((N_SO_index == sym_idx - 1) && 3040 ((sym_idx - 1) < num_syms)) { 3041 // We have two consecutive N_SO entries where 3042 // the first contains a directory and the 3043 // second contains a full path. 3044 sym[sym_idx - 1].GetMangled().SetValue( 3045 ConstString(symbol_name)); 3046 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3047 add_nlist = false; 3048 } else { 3049 // This is the first entry in a N_SO that 3050 // contains a directory or 3051 // a full path to the source file 3052 N_SO_index = sym_idx; 3053 } 3054 } else if ((N_SO_index == sym_idx - 1) && 3055 ((sym_idx - 1) < num_syms)) { 3056 // This is usually the second N_SO entry that 3057 // contains just the filename, so here we combine 3058 // it with the first one if we are minimizing the 3059 // symbol table 3060 const char *so_path = sym[sym_idx - 1] 3061 .GetMangled() 3062 .GetDemangledName() 3063 .AsCString(); 3064 if (so_path && so_path[0]) { 3065 std::string full_so_path(so_path); 3066 const size_t double_slash_pos = 3067 full_so_path.find("//"); 3068 if (double_slash_pos != std::string::npos) { 3069 // The linker has been generating bad N_SO 3070 // entries with doubled up paths 3071 // in the format "%s%s" where the first 3072 // string in the DW_AT_comp_dir, and the 3073 // second is the directory for the source 3074 // file so you end up with a path that looks 3075 // like "/tmp/src//tmp/src/" 3076 FileSpec so_dir(so_path); 3077 if (!FileSystem::Instance().Exists(so_dir)) { 3078 so_dir.SetFile( 3079 &full_so_path[double_slash_pos + 1], 3080 FileSpec::Style::native); 3081 if (FileSystem::Instance().Exists(so_dir)) { 3082 // Trim off the incorrect path 3083 full_so_path.erase(0, double_slash_pos + 1); 3084 } 3085 } 3086 } 3087 if (*full_so_path.rbegin() != '/') 3088 full_so_path += '/'; 3089 full_so_path += symbol_name; 3090 sym[sym_idx - 1].GetMangled().SetValue( 3091 ConstString(full_so_path.c_str())); 3092 add_nlist = false; 3093 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3094 } 3095 } else { 3096 // This could be a relative path to a N_SO 3097 N_SO_index = sym_idx; 3098 } 3099 } 3100 break; 3101 3102 case N_OSO: 3103 // object file name: name,,0,0,st_mtime 3104 type = eSymbolTypeObjectFile; 3105 break; 3106 3107 case N_LSYM: 3108 // local sym: name,,NO_SECT,type,offset 3109 type = eSymbolTypeLocal; 3110 break; 3111 3112 // INCL scopes 3113 case N_BINCL: 3114 // include file beginning: name,,NO_SECT,0,sum We use 3115 // the current number of symbols in the symbol table 3116 // in lieu of using nlist_idx in case we ever start 3117 // trimming entries out 3118 N_INCL_indexes.push_back(sym_idx); 3119 type = eSymbolTypeScopeBegin; 3120 break; 3121 3122 case N_EINCL: 3123 // include file end: name,,NO_SECT,0,0 3124 // Set the size of the N_BINCL to the terminating 3125 // index of this N_EINCL so that we can always skip 3126 // the entire symbol if we need to navigate more 3127 // quickly at the source level when parsing STABS 3128 if (!N_INCL_indexes.empty()) { 3129 symbol_ptr = 3130 symtab.SymbolAtIndex(N_INCL_indexes.back()); 3131 symbol_ptr->SetByteSize(sym_idx + 1); 3132 symbol_ptr->SetSizeIsSibling(true); 3133 N_INCL_indexes.pop_back(); 3134 } 3135 type = eSymbolTypeScopeEnd; 3136 break; 3137 3138 case N_SOL: 3139 // #included file name: name,,n_sect,0,address 3140 type = eSymbolTypeHeaderFile; 3141 3142 // We currently don't use the header files on darwin 3143 add_nlist = false; 3144 break; 3145 3146 case N_PARAMS: 3147 // compiler parameters: name,,NO_SECT,0,0 3148 type = eSymbolTypeCompiler; 3149 break; 3150 3151 case N_VERSION: 3152 // compiler version: name,,NO_SECT,0,0 3153 type = eSymbolTypeCompiler; 3154 break; 3155 3156 case N_OLEVEL: 3157 // compiler -O level: name,,NO_SECT,0,0 3158 type = eSymbolTypeCompiler; 3159 break; 3160 3161 case N_PSYM: 3162 // parameter: name,,NO_SECT,type,offset 3163 type = eSymbolTypeVariable; 3164 break; 3165 3166 case N_ENTRY: 3167 // alternate entry: name,,n_sect,linenumber,address 3168 symbol_section = section_info.GetSection(nlist.n_sect, 3169 nlist.n_value); 3170 type = eSymbolTypeLineEntry; 3171 break; 3172 3173 // Left and Right Braces 3174 case N_LBRAC: 3175 // left bracket: 0,,NO_SECT,nesting level,address We 3176 // use the current number of symbols in the symbol 3177 // table in lieu of using nlist_idx in case we ever 3178 // start trimming entries out 3179 symbol_section = section_info.GetSection(nlist.n_sect, 3180 nlist.n_value); 3181 N_BRAC_indexes.push_back(sym_idx); 3182 type = eSymbolTypeScopeBegin; 3183 break; 3184 3185 case N_RBRAC: 3186 // right bracket: 0,,NO_SECT,nesting level,address 3187 // Set the size of the N_LBRAC to the terminating 3188 // index of this N_RBRAC so that we can always skip 3189 // the entire symbol if we need to navigate more 3190 // quickly at the source level when parsing STABS 3191 symbol_section = section_info.GetSection(nlist.n_sect, 3192 nlist.n_value); 3193 if (!N_BRAC_indexes.empty()) { 3194 symbol_ptr = 3195 symtab.SymbolAtIndex(N_BRAC_indexes.back()); 3196 symbol_ptr->SetByteSize(sym_idx + 1); 3197 symbol_ptr->SetSizeIsSibling(true); 3198 N_BRAC_indexes.pop_back(); 3199 } 3200 type = eSymbolTypeScopeEnd; 3201 break; 3202 3203 case N_EXCL: 3204 // deleted include file: name,,NO_SECT,0,sum 3205 type = eSymbolTypeHeaderFile; 3206 break; 3207 3208 // COMM scopes 3209 case N_BCOMM: 3210 // begin common: name,,NO_SECT,0,0 3211 // We use the current number of symbols in the symbol 3212 // table in lieu of using nlist_idx in case we ever 3213 // start trimming entries out 3214 type = eSymbolTypeScopeBegin; 3215 N_COMM_indexes.push_back(sym_idx); 3216 break; 3217 3218 case N_ECOML: 3219 // end common (local name): 0,,n_sect,0,address 3220 symbol_section = section_info.GetSection(nlist.n_sect, 3221 nlist.n_value); 3222 // Fall through 3223 3224 case N_ECOMM: 3225 // end common: name,,n_sect,0,0 3226 // Set the size of the N_BCOMM to the terminating 3227 // index of this N_ECOMM/N_ECOML so that we can 3228 // always skip the entire symbol if we need to 3229 // navigate more quickly at the source level when 3230 // parsing STABS 3231 if (!N_COMM_indexes.empty()) { 3232 symbol_ptr = 3233 symtab.SymbolAtIndex(N_COMM_indexes.back()); 3234 symbol_ptr->SetByteSize(sym_idx + 1); 3235 symbol_ptr->SetSizeIsSibling(true); 3236 N_COMM_indexes.pop_back(); 3237 } 3238 type = eSymbolTypeScopeEnd; 3239 break; 3240 3241 case N_LENG: 3242 // second stab entry with length information 3243 type = eSymbolTypeAdditional; 3244 break; 3245 3246 default: 3247 break; 3248 } 3249 } else { 3250 // uint8_t n_pext = N_PEXT & nlist.n_type; 3251 uint8_t n_type = N_TYPE & nlist.n_type; 3252 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0); 3253 3254 switch (n_type) { 3255 case N_INDR: { 3256 const char *reexport_name_cstr = 3257 strtab_data.PeekCStr(nlist.n_value); 3258 if (reexport_name_cstr && reexport_name_cstr[0]) { 3259 type = eSymbolTypeReExported; 3260 ConstString reexport_name( 3261 reexport_name_cstr + 3262 ((reexport_name_cstr[0] == '_') ? 1 : 0)); 3263 sym[sym_idx].SetReExportedSymbolName(reexport_name); 3264 set_value = false; 3265 reexport_shlib_needs_fixup[sym_idx] = reexport_name; 3266 indirect_symbol_names.insert(ConstString( 3267 symbol_name + ((symbol_name[0] == '_') ? 1 : 0))); 3268 } else 3269 type = eSymbolTypeUndefined; 3270 } break; 3271 3272 case N_UNDF: 3273 if (symbol_name && symbol_name[0]) { 3274 ConstString undefined_name( 3275 symbol_name + ((symbol_name[0] == '_') ? 1 : 0)); 3276 undefined_name_to_desc[undefined_name] = nlist.n_desc; 3277 } 3278 // Fall through 3279 case N_PBUD: 3280 type = eSymbolTypeUndefined; 3281 break; 3282 3283 case N_ABS: 3284 type = eSymbolTypeAbsolute; 3285 break; 3286 3287 case N_SECT: { 3288 symbol_section = section_info.GetSection(nlist.n_sect, 3289 nlist.n_value); 3290 3291 if (symbol_section == NULL) { 3292 // TODO: warn about this? 3293 add_nlist = false; 3294 break; 3295 } 3296 3297 if (TEXT_eh_frame_sectID == nlist.n_sect) { 3298 type = eSymbolTypeException; 3299 } else { 3300 uint32_t section_type = 3301 symbol_section->Get() & SECTION_TYPE; 3302 3303 switch (section_type) { 3304 case S_CSTRING_LITERALS: 3305 type = eSymbolTypeData; 3306 break; // section with only literal C strings 3307 case S_4BYTE_LITERALS: 3308 type = eSymbolTypeData; 3309 break; // section with only 4 byte literals 3310 case S_8BYTE_LITERALS: 3311 type = eSymbolTypeData; 3312 break; // section with only 8 byte literals 3313 case S_LITERAL_POINTERS: 3314 type = eSymbolTypeTrampoline; 3315 break; // section with only pointers to literals 3316 case S_NON_LAZY_SYMBOL_POINTERS: 3317 type = eSymbolTypeTrampoline; 3318 break; // section with only non-lazy symbol 3319 // pointers 3320 case S_LAZY_SYMBOL_POINTERS: 3321 type = eSymbolTypeTrampoline; 3322 break; // section with only lazy symbol pointers 3323 case S_SYMBOL_STUBS: 3324 type = eSymbolTypeTrampoline; 3325 break; // section with only symbol stubs, byte 3326 // size of stub in the reserved2 field 3327 case S_MOD_INIT_FUNC_POINTERS: 3328 type = eSymbolTypeCode; 3329 break; // section with only function pointers for 3330 // initialization 3331 case S_MOD_TERM_FUNC_POINTERS: 3332 type = eSymbolTypeCode; 3333 break; // section with only function pointers for 3334 // termination 3335 case S_INTERPOSING: 3336 type = eSymbolTypeTrampoline; 3337 break; // section with only pairs of function 3338 // pointers for interposing 3339 case S_16BYTE_LITERALS: 3340 type = eSymbolTypeData; 3341 break; // section with only 16 byte literals 3342 case S_DTRACE_DOF: 3343 type = eSymbolTypeInstrumentation; 3344 break; 3345 case S_LAZY_DYLIB_SYMBOL_POINTERS: 3346 type = eSymbolTypeTrampoline; 3347 break; 3348 default: 3349 switch (symbol_section->GetType()) { 3350 case lldb::eSectionTypeCode: 3351 type = eSymbolTypeCode; 3352 break; 3353 case eSectionTypeData: 3354 case eSectionTypeDataCString: // Inlined C string 3355 // data 3356 case eSectionTypeDataCStringPointers: // Pointers 3357 // to C 3358 // string 3359 // data 3360 case eSectionTypeDataSymbolAddress: // Address of 3361 // a symbol in 3362 // the symbol 3363 // table 3364 case eSectionTypeData4: 3365 case eSectionTypeData8: 3366 case eSectionTypeData16: 3367 type = eSymbolTypeData; 3368 break; 3369 default: 3370 break; 3371 } 3372 break; 3373 } 3374 3375 if (type == eSymbolTypeInvalid) { 3376 const char *symbol_sect_name = 3377 symbol_section->GetName().AsCString(); 3378 if (symbol_section->IsDescendant( 3379 text_section_sp.get())) { 3380 if (symbol_section->IsClear( 3381 S_ATTR_PURE_INSTRUCTIONS | 3382 S_ATTR_SELF_MODIFYING_CODE | 3383 S_ATTR_SOME_INSTRUCTIONS)) 3384 type = eSymbolTypeData; 3385 else 3386 type = eSymbolTypeCode; 3387 } else if (symbol_section->IsDescendant( 3388 data_section_sp.get()) || 3389 symbol_section->IsDescendant( 3390 data_dirty_section_sp.get()) || 3391 symbol_section->IsDescendant( 3392 data_const_section_sp.get())) { 3393 if (symbol_sect_name && 3394 ::strstr(symbol_sect_name, "__objc") == 3395 symbol_sect_name) { 3396 type = eSymbolTypeRuntime; 3397 3398 if (symbol_name) { 3399 llvm::StringRef symbol_name_ref(symbol_name); 3400 if (symbol_name_ref.starts_with("_OBJC_")) { 3401 llvm::StringRef 3402 g_objc_v2_prefix_class( 3403 "_OBJC_CLASS_$_"); 3404 llvm::StringRef 3405 g_objc_v2_prefix_metaclass( 3406 "_OBJC_METACLASS_$_"); 3407 llvm::StringRef 3408 g_objc_v2_prefix_ivar("_OBJC_IVAR_$_"); 3409 if (symbol_name_ref.starts_with( 3410 g_objc_v2_prefix_class)) { 3411 symbol_name_non_abi_mangled = 3412 symbol_name + 1; 3413 symbol_name = 3414 symbol_name + 3415 g_objc_v2_prefix_class.size(); 3416 type = eSymbolTypeObjCClass; 3417 demangled_is_synthesized = true; 3418 } else if ( 3419 symbol_name_ref.starts_with( 3420 g_objc_v2_prefix_metaclass)) { 3421 symbol_name_non_abi_mangled = 3422 symbol_name + 1; 3423 symbol_name = 3424 symbol_name + 3425 g_objc_v2_prefix_metaclass.size(); 3426 type = eSymbolTypeObjCMetaClass; 3427 demangled_is_synthesized = true; 3428 } else if (symbol_name_ref.starts_with( 3429 g_objc_v2_prefix_ivar)) { 3430 symbol_name_non_abi_mangled = 3431 symbol_name + 1; 3432 symbol_name = 3433 symbol_name + 3434 g_objc_v2_prefix_ivar.size(); 3435 type = eSymbolTypeObjCIVar; 3436 demangled_is_synthesized = true; 3437 } 3438 } 3439 } 3440 } else if (symbol_sect_name && 3441 ::strstr(symbol_sect_name, 3442 "__gcc_except_tab") == 3443 symbol_sect_name) { 3444 type = eSymbolTypeException; 3445 } else { 3446 type = eSymbolTypeData; 3447 } 3448 } else if (symbol_sect_name && 3449 ::strstr(symbol_sect_name, "__IMPORT") == 3450 symbol_sect_name) { 3451 type = eSymbolTypeTrampoline; 3452 } else if (symbol_section->IsDescendant( 3453 objc_section_sp.get())) { 3454 type = eSymbolTypeRuntime; 3455 if (symbol_name && symbol_name[0] == '.') { 3456 llvm::StringRef symbol_name_ref(symbol_name); 3457 llvm::StringRef 3458 g_objc_v1_prefix_class(".objc_class_name_"); 3459 if (symbol_name_ref.starts_with( 3460 g_objc_v1_prefix_class)) { 3461 symbol_name_non_abi_mangled = symbol_name; 3462 symbol_name = symbol_name + 3463 g_objc_v1_prefix_class.size(); 3464 type = eSymbolTypeObjCClass; 3465 demangled_is_synthesized = true; 3466 } 3467 } 3468 } 3469 } 3470 } 3471 } break; 3472 } 3473 } 3474 3475 if (add_nlist) { 3476 uint64_t symbol_value = nlist.n_value; 3477 if (symbol_name_non_abi_mangled) { 3478 sym[sym_idx].GetMangled().SetMangledName( 3479 ConstString(symbol_name_non_abi_mangled)); 3480 sym[sym_idx].GetMangled().SetDemangledName( 3481 ConstString(symbol_name)); 3482 } else { 3483 if (symbol_name && symbol_name[0] == '_') { 3484 symbol_name++; // Skip the leading underscore 3485 } 3486 3487 if (symbol_name) { 3488 ConstString const_symbol_name(symbol_name); 3489 sym[sym_idx].GetMangled().SetValue(const_symbol_name); 3490 if (is_gsym && is_debug) { 3491 const char *gsym_name = 3492 sym[sym_idx] 3493 .GetMangled() 3494 .GetName(Mangled::ePreferMangled) 3495 .GetCString(); 3496 if (gsym_name) 3497 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; 3498 } 3499 } 3500 } 3501 if (symbol_section) { 3502 const addr_t section_file_addr = 3503 symbol_section->GetFileAddress(); 3504 if (symbol_byte_size == 0 && 3505 function_starts_count > 0) { 3506 addr_t symbol_lookup_file_addr = nlist.n_value; 3507 // Do an exact address match for non-ARM addresses, 3508 // else get the closest since the symbol might be a 3509 // thumb symbol which has an address with bit zero 3510 // set 3511 FunctionStarts::Entry *func_start_entry = 3512 function_starts.FindEntry(symbol_lookup_file_addr, 3513 !is_arm); 3514 if (is_arm && func_start_entry) { 3515 // Verify that the function start address is the 3516 // symbol address (ARM) or the symbol address + 1 3517 // (thumb) 3518 if (func_start_entry->addr != 3519 symbol_lookup_file_addr && 3520 func_start_entry->addr != 3521 (symbol_lookup_file_addr + 1)) { 3522 // Not the right entry, NULL it out... 3523 func_start_entry = NULL; 3524 } 3525 } 3526 if (func_start_entry) { 3527 func_start_entry->data = true; 3528 3529 addr_t symbol_file_addr = func_start_entry->addr; 3530 uint32_t symbol_flags = 0; 3531 if (is_arm) { 3532 if (symbol_file_addr & 1) 3533 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; 3534 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 3535 } 3536 3537 const FunctionStarts::Entry *next_func_start_entry = 3538 function_starts.FindNextEntry(func_start_entry); 3539 const addr_t section_end_file_addr = 3540 section_file_addr + 3541 symbol_section->GetByteSize(); 3542 if (next_func_start_entry) { 3543 addr_t next_symbol_file_addr = 3544 next_func_start_entry->addr; 3545 // Be sure the clear the Thumb address bit when 3546 // we calculate the size from the current and 3547 // next address 3548 if (is_arm) 3549 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 3550 symbol_byte_size = std::min<lldb::addr_t>( 3551 next_symbol_file_addr - symbol_file_addr, 3552 section_end_file_addr - symbol_file_addr); 3553 } else { 3554 symbol_byte_size = 3555 section_end_file_addr - symbol_file_addr; 3556 } 3557 } 3558 } 3559 symbol_value -= section_file_addr; 3560 } 3561 3562 if (is_debug == false) { 3563 if (type == eSymbolTypeCode) { 3564 // See if we can find a N_FUN entry for any code 3565 // symbols. If we do find a match, and the name 3566 // matches, then we can merge the two into just the 3567 // function symbol to avoid duplicate entries in 3568 // the symbol table 3569 auto range = 3570 N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); 3571 if (range.first != range.second) { 3572 bool found_it = false; 3573 for (auto pos = range.first; pos != range.second; 3574 ++pos) { 3575 if (sym[sym_idx].GetMangled().GetName( 3576 Mangled::ePreferMangled) == 3577 sym[pos->second].GetMangled().GetName( 3578 Mangled::ePreferMangled)) { 3579 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 3580 // We just need the flags from the linker 3581 // symbol, so put these flags 3582 // into the N_FUN flags to avoid duplicate 3583 // symbols in the symbol table 3584 sym[pos->second].SetExternal( 3585 sym[sym_idx].IsExternal()); 3586 sym[pos->second].SetFlags(nlist.n_type << 16 | 3587 nlist.n_desc); 3588 if (resolver_addresses.find(nlist.n_value) != 3589 resolver_addresses.end()) 3590 sym[pos->second].SetType(eSymbolTypeResolver); 3591 sym[sym_idx].Clear(); 3592 found_it = true; 3593 break; 3594 } 3595 } 3596 if (found_it) 3597 continue; 3598 } else { 3599 if (resolver_addresses.find(nlist.n_value) != 3600 resolver_addresses.end()) 3601 type = eSymbolTypeResolver; 3602 } 3603 } else if (type == eSymbolTypeData || 3604 type == eSymbolTypeObjCClass || 3605 type == eSymbolTypeObjCMetaClass || 3606 type == eSymbolTypeObjCIVar) { 3607 // See if we can find a N_STSYM entry for any data 3608 // symbols. If we do find a match, and the name 3609 // matches, then we can merge the two into just the 3610 // Static symbol to avoid duplicate entries in the 3611 // symbol table 3612 auto range = N_STSYM_addr_to_sym_idx.equal_range( 3613 nlist.n_value); 3614 if (range.first != range.second) { 3615 bool found_it = false; 3616 for (auto pos = range.first; pos != range.second; 3617 ++pos) { 3618 if (sym[sym_idx].GetMangled().GetName( 3619 Mangled::ePreferMangled) == 3620 sym[pos->second].GetMangled().GetName( 3621 Mangled::ePreferMangled)) { 3622 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 3623 // We just need the flags from the linker 3624 // symbol, so put these flags 3625 // into the N_STSYM flags to avoid duplicate 3626 // symbols in the symbol table 3627 sym[pos->second].SetExternal( 3628 sym[sym_idx].IsExternal()); 3629 sym[pos->second].SetFlags(nlist.n_type << 16 | 3630 nlist.n_desc); 3631 sym[sym_idx].Clear(); 3632 found_it = true; 3633 break; 3634 } 3635 } 3636 if (found_it) 3637 continue; 3638 } else { 3639 const char *gsym_name = 3640 sym[sym_idx] 3641 .GetMangled() 3642 .GetName(Mangled::ePreferMangled) 3643 .GetCString(); 3644 if (gsym_name) { 3645 // Combine N_GSYM stab entries with the non 3646 // stab symbol 3647 ConstNameToSymbolIndexMap::const_iterator pos = 3648 N_GSYM_name_to_sym_idx.find(gsym_name); 3649 if (pos != N_GSYM_name_to_sym_idx.end()) { 3650 const uint32_t GSYM_sym_idx = pos->second; 3651 m_nlist_idx_to_sym_idx[nlist_idx] = 3652 GSYM_sym_idx; 3653 // Copy the address, because often the N_GSYM 3654 // address has an invalid address of zero 3655 // when the global is a common symbol 3656 sym[GSYM_sym_idx].GetAddressRef().SetSection( 3657 symbol_section); 3658 sym[GSYM_sym_idx].GetAddressRef().SetOffset( 3659 symbol_value); 3660 add_symbol_addr(sym[GSYM_sym_idx] 3661 .GetAddress() 3662 .GetFileAddress()); 3663 // We just need the flags from the linker 3664 // symbol, so put these flags 3665 // into the N_GSYM flags to avoid duplicate 3666 // symbols in the symbol table 3667 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | 3668 nlist.n_desc); 3669 sym[sym_idx].Clear(); 3670 continue; 3671 } 3672 } 3673 } 3674 } 3675 } 3676 3677 sym[sym_idx].SetID(nlist_idx); 3678 sym[sym_idx].SetType(type); 3679 if (set_value) { 3680 sym[sym_idx].GetAddressRef().SetSection(symbol_section); 3681 sym[sym_idx].GetAddressRef().SetOffset(symbol_value); 3682 add_symbol_addr( 3683 sym[sym_idx].GetAddress().GetFileAddress()); 3684 } 3685 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 3686 3687 if (symbol_byte_size > 0) 3688 sym[sym_idx].SetByteSize(symbol_byte_size); 3689 3690 if (demangled_is_synthesized) 3691 sym[sym_idx].SetDemangledNameIsSynthesized(true); 3692 ++sym_idx; 3693 } else { 3694 sym[sym_idx].Clear(); 3695 } 3696 } 3697 ///////////////////////////// 3698 } 3699 } 3700 3701 for (const auto &pos : reexport_shlib_needs_fixup) { 3702 const auto undef_pos = undefined_name_to_desc.find(pos.second); 3703 if (undef_pos != undefined_name_to_desc.end()) { 3704 const uint8_t dylib_ordinal = 3705 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second); 3706 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize()) 3707 sym[pos.first].SetReExportedSymbolSharedLibrary( 3708 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1)); 3709 } 3710 } 3711 } 3712 3713 #endif 3714 lldb::offset_t nlist_data_offset = 0; 3715 3716 if (nlist_data.GetByteSize() > 0) { 3717 3718 // If the sym array was not created while parsing the DSC unmapped 3719 // symbols, create it now. 3720 if (sym == nullptr) { 3721 sym = 3722 symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms); 3723 num_syms = symtab.GetNumSymbols(); 3724 } 3725 3726 if (unmapped_local_symbols_found) { 3727 assert(m_dysymtab.ilocalsym == 0); 3728 nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size); 3729 nlist_idx = m_dysymtab.nlocalsym; 3730 } else { 3731 nlist_idx = 0; 3732 } 3733 3734 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap; 3735 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName; 3736 UndefinedNameToDescMap undefined_name_to_desc; 3737 SymbolIndexToName reexport_shlib_needs_fixup; 3738 3739 // Symtab parsing is a huge mess. Everything is entangled and the code 3740 // requires access to a ridiculous amount of variables. LLDB depends 3741 // heavily on the proper merging of symbols and to get that right we need 3742 // to make sure we have parsed all the debug symbols first. Therefore we 3743 // invoke the lambda twice, once to parse only the debug symbols and then 3744 // once more to parse the remaining symbols. 3745 auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx, 3746 bool debug_only) { 3747 const bool is_debug = ((nlist.n_type & N_STAB) != 0); 3748 if (is_debug != debug_only) 3749 return true; 3750 3751 const char *symbol_name_non_abi_mangled = nullptr; 3752 const char *symbol_name = nullptr; 3753 3754 if (have_strtab_data) { 3755 symbol_name = strtab_data.PeekCStr(nlist.n_strx); 3756 3757 if (symbol_name == nullptr) { 3758 // No symbol should be NULL, even the symbols with no string values 3759 // should have an offset zero which points to an empty C-string 3760 Debugger::ReportError(llvm::formatv( 3761 "symbol[{0}] has invalid string table offset {1:x} in {2}, " 3762 "ignoring symbol", 3763 nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath())); 3764 return true; 3765 } 3766 if (symbol_name[0] == '\0') 3767 symbol_name = nullptr; 3768 } else { 3769 const addr_t str_addr = strtab_addr + nlist.n_strx; 3770 Status str_error; 3771 if (process->ReadCStringFromMemory(str_addr, memory_symbol_name, 3772 str_error)) 3773 symbol_name = memory_symbol_name.c_str(); 3774 } 3775 3776 SymbolType type = eSymbolTypeInvalid; 3777 SectionSP symbol_section; 3778 bool add_nlist = true; 3779 bool is_gsym = false; 3780 bool demangled_is_synthesized = false; 3781 bool set_value = true; 3782 3783 assert(sym_idx < num_syms); 3784 sym[sym_idx].SetDebug(is_debug); 3785 3786 if (is_debug) { 3787 switch (nlist.n_type) { 3788 case N_GSYM: 3789 // global symbol: name,,NO_SECT,type,0 3790 // Sometimes the N_GSYM value contains the address. 3791 3792 // FIXME: In the .o files, we have a GSYM and a debug symbol for all 3793 // the ObjC data. They 3794 // have the same address, but we want to ensure that we always find 3795 // only the real symbol, 'cause we don't currently correctly 3796 // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol 3797 // type. This is a temporary hack to make sure the ObjectiveC 3798 // symbols get treated correctly. To do this right, we should 3799 // coalesce all the GSYM & global symbols that have the same 3800 // address. 3801 is_gsym = true; 3802 sym[sym_idx].SetExternal(true); 3803 3804 if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') { 3805 llvm::StringRef symbol_name_ref(symbol_name); 3806 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 3807 symbol_name_non_abi_mangled = symbol_name + 1; 3808 symbol_name = symbol_name + g_objc_v2_prefix_class.size(); 3809 type = eSymbolTypeObjCClass; 3810 demangled_is_synthesized = true; 3811 3812 } else if (symbol_name_ref.starts_with( 3813 g_objc_v2_prefix_metaclass)) { 3814 symbol_name_non_abi_mangled = symbol_name + 1; 3815 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size(); 3816 type = eSymbolTypeObjCMetaClass; 3817 demangled_is_synthesized = true; 3818 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) { 3819 symbol_name_non_abi_mangled = symbol_name + 1; 3820 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size(); 3821 type = eSymbolTypeObjCIVar; 3822 demangled_is_synthesized = true; 3823 } 3824 } else { 3825 if (nlist.n_value != 0) 3826 symbol_section = 3827 section_info.GetSection(nlist.n_sect, nlist.n_value); 3828 type = eSymbolTypeData; 3829 } 3830 break; 3831 3832 case N_FNAME: 3833 // procedure name (f77 kludge): name,,NO_SECT,0,0 3834 type = eSymbolTypeCompiler; 3835 break; 3836 3837 case N_FUN: 3838 // procedure: name,,n_sect,linenumber,address 3839 if (symbol_name) { 3840 type = eSymbolTypeCode; 3841 symbol_section = 3842 section_info.GetSection(nlist.n_sect, nlist.n_value); 3843 3844 N_FUN_addr_to_sym_idx.insert( 3845 std::make_pair(nlist.n_value, sym_idx)); 3846 // We use the current number of symbols in the symbol table in 3847 // lieu of using nlist_idx in case we ever start trimming entries 3848 // out 3849 N_FUN_indexes.push_back(sym_idx); 3850 } else { 3851 type = eSymbolTypeCompiler; 3852 3853 if (!N_FUN_indexes.empty()) { 3854 // Copy the size of the function into the original STAB entry 3855 // so we don't have to hunt for it later 3856 symtab.SymbolAtIndex(N_FUN_indexes.back()) 3857 ->SetByteSize(nlist.n_value); 3858 N_FUN_indexes.pop_back(); 3859 // We don't really need the end function STAB as it contains 3860 // the size which we already placed with the original symbol, 3861 // so don't add it if we want a minimal symbol table 3862 add_nlist = false; 3863 } 3864 } 3865 break; 3866 3867 case N_STSYM: 3868 // static symbol: name,,n_sect,type,address 3869 N_STSYM_addr_to_sym_idx.insert( 3870 std::make_pair(nlist.n_value, sym_idx)); 3871 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3872 if (symbol_name && symbol_name[0]) { 3873 type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1, 3874 eSymbolTypeData); 3875 } 3876 break; 3877 3878 case N_LCSYM: 3879 // .lcomm symbol: name,,n_sect,type,address 3880 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3881 type = eSymbolTypeCommonBlock; 3882 break; 3883 3884 case N_BNSYM: 3885 // We use the current number of symbols in the symbol table in lieu 3886 // of using nlist_idx in case we ever start trimming entries out 3887 // Skip these if we want minimal symbol tables 3888 add_nlist = false; 3889 break; 3890 3891 case N_ENSYM: 3892 // Set the size of the N_BNSYM to the terminating index of this 3893 // N_ENSYM so that we can always skip the entire symbol if we need 3894 // to navigate more quickly at the source level when parsing STABS 3895 // Skip these if we want minimal symbol tables 3896 add_nlist = false; 3897 break; 3898 3899 case N_OPT: 3900 // emitted with gcc2_compiled and in gcc source 3901 type = eSymbolTypeCompiler; 3902 break; 3903 3904 case N_RSYM: 3905 // register sym: name,,NO_SECT,type,register 3906 type = eSymbolTypeVariable; 3907 break; 3908 3909 case N_SLINE: 3910 // src line: 0,,n_sect,linenumber,address 3911 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3912 type = eSymbolTypeLineEntry; 3913 break; 3914 3915 case N_SSYM: 3916 // structure elt: name,,NO_SECT,type,struct_offset 3917 type = eSymbolTypeVariableType; 3918 break; 3919 3920 case N_SO: 3921 // source file name 3922 type = eSymbolTypeSourceFile; 3923 if (symbol_name == nullptr) { 3924 add_nlist = false; 3925 if (N_SO_index != UINT32_MAX) { 3926 // Set the size of the N_SO to the terminating index of this 3927 // N_SO so that we can always skip the entire N_SO if we need 3928 // to navigate more quickly at the source level when parsing 3929 // STABS 3930 symbol_ptr = symtab.SymbolAtIndex(N_SO_index); 3931 symbol_ptr->SetByteSize(sym_idx); 3932 symbol_ptr->SetSizeIsSibling(true); 3933 } 3934 N_NSYM_indexes.clear(); 3935 N_INCL_indexes.clear(); 3936 N_BRAC_indexes.clear(); 3937 N_COMM_indexes.clear(); 3938 N_FUN_indexes.clear(); 3939 N_SO_index = UINT32_MAX; 3940 } else { 3941 // We use the current number of symbols in the symbol table in 3942 // lieu of using nlist_idx in case we ever start trimming entries 3943 // out 3944 const bool N_SO_has_full_path = symbol_name[0] == '/'; 3945 if (N_SO_has_full_path) { 3946 if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) { 3947 // We have two consecutive N_SO entries where the first 3948 // contains a directory and the second contains a full path. 3949 sym[sym_idx - 1].GetMangled().SetValue( 3950 ConstString(symbol_name)); 3951 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3952 add_nlist = false; 3953 } else { 3954 // This is the first entry in a N_SO that contains a 3955 // directory or a full path to the source file 3956 N_SO_index = sym_idx; 3957 } 3958 } else if ((N_SO_index == sym_idx - 1) && 3959 ((sym_idx - 1) < num_syms)) { 3960 // This is usually the second N_SO entry that contains just the 3961 // filename, so here we combine it with the first one if we are 3962 // minimizing the symbol table 3963 const char *so_path = 3964 sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString(); 3965 if (so_path && so_path[0]) { 3966 std::string full_so_path(so_path); 3967 const size_t double_slash_pos = full_so_path.find("//"); 3968 if (double_slash_pos != std::string::npos) { 3969 // The linker has been generating bad N_SO entries with 3970 // doubled up paths in the format "%s%s" where the first 3971 // string in the DW_AT_comp_dir, and the second is the 3972 // directory for the source file so you end up with a path 3973 // that looks like "/tmp/src//tmp/src/" 3974 FileSpec so_dir(so_path); 3975 if (!FileSystem::Instance().Exists(so_dir)) { 3976 so_dir.SetFile(&full_so_path[double_slash_pos + 1], 3977 FileSpec::Style::native); 3978 if (FileSystem::Instance().Exists(so_dir)) { 3979 // Trim off the incorrect path 3980 full_so_path.erase(0, double_slash_pos + 1); 3981 } 3982 } 3983 } 3984 if (*full_so_path.rbegin() != '/') 3985 full_so_path += '/'; 3986 full_so_path += symbol_name; 3987 sym[sym_idx - 1].GetMangled().SetValue( 3988 ConstString(full_so_path.c_str())); 3989 add_nlist = false; 3990 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3991 } 3992 } else { 3993 // This could be a relative path to a N_SO 3994 N_SO_index = sym_idx; 3995 } 3996 } 3997 break; 3998 3999 case N_OSO: 4000 // object file name: name,,0,0,st_mtime 4001 type = eSymbolTypeObjectFile; 4002 break; 4003 4004 case N_LSYM: 4005 // local sym: name,,NO_SECT,type,offset 4006 type = eSymbolTypeLocal; 4007 break; 4008 4009 // INCL scopes 4010 case N_BINCL: 4011 // include file beginning: name,,NO_SECT,0,sum We use the current 4012 // number of symbols in the symbol table in lieu of using nlist_idx 4013 // in case we ever start trimming entries out 4014 N_INCL_indexes.push_back(sym_idx); 4015 type = eSymbolTypeScopeBegin; 4016 break; 4017 4018 case N_EINCL: 4019 // include file end: name,,NO_SECT,0,0 4020 // Set the size of the N_BINCL to the terminating index of this 4021 // N_EINCL so that we can always skip the entire symbol if we need 4022 // to navigate more quickly at the source level when parsing STABS 4023 if (!N_INCL_indexes.empty()) { 4024 symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back()); 4025 symbol_ptr->SetByteSize(sym_idx + 1); 4026 symbol_ptr->SetSizeIsSibling(true); 4027 N_INCL_indexes.pop_back(); 4028 } 4029 type = eSymbolTypeScopeEnd; 4030 break; 4031 4032 case N_SOL: 4033 // #included file name: name,,n_sect,0,address 4034 type = eSymbolTypeHeaderFile; 4035 4036 // We currently don't use the header files on darwin 4037 add_nlist = false; 4038 break; 4039 4040 case N_PARAMS: 4041 // compiler parameters: name,,NO_SECT,0,0 4042 type = eSymbolTypeCompiler; 4043 break; 4044 4045 case N_VERSION: 4046 // compiler version: name,,NO_SECT,0,0 4047 type = eSymbolTypeCompiler; 4048 break; 4049 4050 case N_OLEVEL: 4051 // compiler -O level: name,,NO_SECT,0,0 4052 type = eSymbolTypeCompiler; 4053 break; 4054 4055 case N_PSYM: 4056 // parameter: name,,NO_SECT,type,offset 4057 type = eSymbolTypeVariable; 4058 break; 4059 4060 case N_ENTRY: 4061 // alternate entry: name,,n_sect,linenumber,address 4062 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4063 type = eSymbolTypeLineEntry; 4064 break; 4065 4066 // Left and Right Braces 4067 case N_LBRAC: 4068 // left bracket: 0,,NO_SECT,nesting level,address We use the 4069 // current number of symbols in the symbol table in lieu of using 4070 // nlist_idx in case we ever start trimming entries out 4071 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4072 N_BRAC_indexes.push_back(sym_idx); 4073 type = eSymbolTypeScopeBegin; 4074 break; 4075 4076 case N_RBRAC: 4077 // right bracket: 0,,NO_SECT,nesting level,address Set the size of 4078 // the N_LBRAC to the terminating index of this N_RBRAC so that we 4079 // can always skip the entire symbol if we need to navigate more 4080 // quickly at the source level when parsing STABS 4081 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4082 if (!N_BRAC_indexes.empty()) { 4083 symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back()); 4084 symbol_ptr->SetByteSize(sym_idx + 1); 4085 symbol_ptr->SetSizeIsSibling(true); 4086 N_BRAC_indexes.pop_back(); 4087 } 4088 type = eSymbolTypeScopeEnd; 4089 break; 4090 4091 case N_EXCL: 4092 // deleted include file: name,,NO_SECT,0,sum 4093 type = eSymbolTypeHeaderFile; 4094 break; 4095 4096 // COMM scopes 4097 case N_BCOMM: 4098 // begin common: name,,NO_SECT,0,0 4099 // We use the current number of symbols in the symbol table in lieu 4100 // of using nlist_idx in case we ever start trimming entries out 4101 type = eSymbolTypeScopeBegin; 4102 N_COMM_indexes.push_back(sym_idx); 4103 break; 4104 4105 case N_ECOML: 4106 // end common (local name): 0,,n_sect,0,address 4107 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4108 [[fallthrough]]; 4109 4110 case N_ECOMM: 4111 // end common: name,,n_sect,0,0 4112 // Set the size of the N_BCOMM to the terminating index of this 4113 // N_ECOMM/N_ECOML so that we can always skip the entire symbol if 4114 // we need to navigate more quickly at the source level when 4115 // parsing STABS 4116 if (!N_COMM_indexes.empty()) { 4117 symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back()); 4118 symbol_ptr->SetByteSize(sym_idx + 1); 4119 symbol_ptr->SetSizeIsSibling(true); 4120 N_COMM_indexes.pop_back(); 4121 } 4122 type = eSymbolTypeScopeEnd; 4123 break; 4124 4125 case N_LENG: 4126 // second stab entry with length information 4127 type = eSymbolTypeAdditional; 4128 break; 4129 4130 default: 4131 break; 4132 } 4133 } else { 4134 uint8_t n_type = N_TYPE & nlist.n_type; 4135 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0); 4136 4137 switch (n_type) { 4138 case N_INDR: { 4139 const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value); 4140 if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) { 4141 type = eSymbolTypeReExported; 4142 ConstString reexport_name(reexport_name_cstr + 4143 ((reexport_name_cstr[0] == '_') ? 1 : 0)); 4144 sym[sym_idx].SetReExportedSymbolName(reexport_name); 4145 set_value = false; 4146 reexport_shlib_needs_fixup[sym_idx] = reexport_name; 4147 indirect_symbol_names.insert( 4148 ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0))); 4149 } else 4150 type = eSymbolTypeUndefined; 4151 } break; 4152 4153 case N_UNDF: 4154 if (symbol_name && symbol_name[0]) { 4155 ConstString undefined_name(symbol_name + 4156 ((symbol_name[0] == '_') ? 1 : 0)); 4157 undefined_name_to_desc[undefined_name] = nlist.n_desc; 4158 } 4159 [[fallthrough]]; 4160 4161 case N_PBUD: 4162 type = eSymbolTypeUndefined; 4163 break; 4164 4165 case N_ABS: 4166 type = eSymbolTypeAbsolute; 4167 break; 4168 4169 case N_SECT: { 4170 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4171 4172 if (!symbol_section) { 4173 // TODO: warn about this? 4174 add_nlist = false; 4175 break; 4176 } 4177 4178 if (TEXT_eh_frame_sectID == nlist.n_sect) { 4179 type = eSymbolTypeException; 4180 } else { 4181 uint32_t section_type = symbol_section->Get() & SECTION_TYPE; 4182 4183 switch (section_type) { 4184 case S_CSTRING_LITERALS: 4185 type = eSymbolTypeData; 4186 break; // section with only literal C strings 4187 case S_4BYTE_LITERALS: 4188 type = eSymbolTypeData; 4189 break; // section with only 4 byte literals 4190 case S_8BYTE_LITERALS: 4191 type = eSymbolTypeData; 4192 break; // section with only 8 byte literals 4193 case S_LITERAL_POINTERS: 4194 type = eSymbolTypeTrampoline; 4195 break; // section with only pointers to literals 4196 case S_NON_LAZY_SYMBOL_POINTERS: 4197 type = eSymbolTypeTrampoline; 4198 break; // section with only non-lazy symbol pointers 4199 case S_LAZY_SYMBOL_POINTERS: 4200 type = eSymbolTypeTrampoline; 4201 break; // section with only lazy symbol pointers 4202 case S_SYMBOL_STUBS: 4203 type = eSymbolTypeTrampoline; 4204 break; // section with only symbol stubs, byte size of stub in 4205 // the reserved2 field 4206 case S_MOD_INIT_FUNC_POINTERS: 4207 type = eSymbolTypeCode; 4208 break; // section with only function pointers for initialization 4209 case S_MOD_TERM_FUNC_POINTERS: 4210 type = eSymbolTypeCode; 4211 break; // section with only function pointers for termination 4212 case S_INTERPOSING: 4213 type = eSymbolTypeTrampoline; 4214 break; // section with only pairs of function pointers for 4215 // interposing 4216 case S_16BYTE_LITERALS: 4217 type = eSymbolTypeData; 4218 break; // section with only 16 byte literals 4219 case S_DTRACE_DOF: 4220 type = eSymbolTypeInstrumentation; 4221 break; 4222 case S_LAZY_DYLIB_SYMBOL_POINTERS: 4223 type = eSymbolTypeTrampoline; 4224 break; 4225 default: 4226 switch (symbol_section->GetType()) { 4227 case lldb::eSectionTypeCode: 4228 type = eSymbolTypeCode; 4229 break; 4230 case eSectionTypeData: 4231 case eSectionTypeDataCString: // Inlined C string data 4232 case eSectionTypeDataCStringPointers: // Pointers to C string 4233 // data 4234 case eSectionTypeDataSymbolAddress: // Address of a symbol in 4235 // the symbol table 4236 case eSectionTypeData4: 4237 case eSectionTypeData8: 4238 case eSectionTypeData16: 4239 type = eSymbolTypeData; 4240 break; 4241 default: 4242 break; 4243 } 4244 break; 4245 } 4246 4247 if (type == eSymbolTypeInvalid) { 4248 const char *symbol_sect_name = 4249 symbol_section->GetName().AsCString(); 4250 if (symbol_section->IsDescendant(text_section_sp.get())) { 4251 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS | 4252 S_ATTR_SELF_MODIFYING_CODE | 4253 S_ATTR_SOME_INSTRUCTIONS)) 4254 type = eSymbolTypeData; 4255 else 4256 type = eSymbolTypeCode; 4257 } else if (symbol_section->IsDescendant(data_section_sp.get()) || 4258 symbol_section->IsDescendant( 4259 data_dirty_section_sp.get()) || 4260 symbol_section->IsDescendant( 4261 data_const_section_sp.get())) { 4262 if (symbol_sect_name && 4263 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) { 4264 type = eSymbolTypeRuntime; 4265 4266 if (symbol_name) { 4267 llvm::StringRef symbol_name_ref(symbol_name); 4268 if (symbol_name_ref.starts_with("_OBJC_")) { 4269 llvm::StringRef g_objc_v2_prefix_class( 4270 "_OBJC_CLASS_$_"); 4271 llvm::StringRef g_objc_v2_prefix_metaclass( 4272 "_OBJC_METACLASS_$_"); 4273 llvm::StringRef g_objc_v2_prefix_ivar( 4274 "_OBJC_IVAR_$_"); 4275 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 4276 symbol_name_non_abi_mangled = symbol_name + 1; 4277 symbol_name = 4278 symbol_name + g_objc_v2_prefix_class.size(); 4279 type = eSymbolTypeObjCClass; 4280 demangled_is_synthesized = true; 4281 } else if (symbol_name_ref.starts_with( 4282 g_objc_v2_prefix_metaclass)) { 4283 symbol_name_non_abi_mangled = symbol_name + 1; 4284 symbol_name = 4285 symbol_name + g_objc_v2_prefix_metaclass.size(); 4286 type = eSymbolTypeObjCMetaClass; 4287 demangled_is_synthesized = true; 4288 } else if (symbol_name_ref.starts_with( 4289 g_objc_v2_prefix_ivar)) { 4290 symbol_name_non_abi_mangled = symbol_name + 1; 4291 symbol_name = 4292 symbol_name + g_objc_v2_prefix_ivar.size(); 4293 type = eSymbolTypeObjCIVar; 4294 demangled_is_synthesized = true; 4295 } 4296 } 4297 } 4298 } else if (symbol_sect_name && 4299 ::strstr(symbol_sect_name, "__gcc_except_tab") == 4300 symbol_sect_name) { 4301 type = eSymbolTypeException; 4302 } else { 4303 type = eSymbolTypeData; 4304 } 4305 } else if (symbol_sect_name && 4306 ::strstr(symbol_sect_name, "__IMPORT") == 4307 symbol_sect_name) { 4308 type = eSymbolTypeTrampoline; 4309 } else if (symbol_section->IsDescendant(objc_section_sp.get())) { 4310 type = eSymbolTypeRuntime; 4311 if (symbol_name && symbol_name[0] == '.') { 4312 llvm::StringRef symbol_name_ref(symbol_name); 4313 llvm::StringRef g_objc_v1_prefix_class( 4314 ".objc_class_name_"); 4315 if (symbol_name_ref.starts_with(g_objc_v1_prefix_class)) { 4316 symbol_name_non_abi_mangled = symbol_name; 4317 symbol_name = symbol_name + g_objc_v1_prefix_class.size(); 4318 type = eSymbolTypeObjCClass; 4319 demangled_is_synthesized = true; 4320 } 4321 } 4322 } 4323 } 4324 } 4325 } break; 4326 } 4327 } 4328 4329 if (!add_nlist) { 4330 sym[sym_idx].Clear(); 4331 return true; 4332 } 4333 4334 uint64_t symbol_value = nlist.n_value; 4335 4336 if (symbol_name_non_abi_mangled) { 4337 sym[sym_idx].GetMangled().SetMangledName( 4338 ConstString(symbol_name_non_abi_mangled)); 4339 sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name)); 4340 } else { 4341 4342 if (symbol_name && symbol_name[0] == '_') { 4343 symbol_name++; // Skip the leading underscore 4344 } 4345 4346 if (symbol_name) { 4347 ConstString const_symbol_name(symbol_name); 4348 sym[sym_idx].GetMangled().SetValue(const_symbol_name); 4349 } 4350 } 4351 4352 if (is_gsym) { 4353 const char *gsym_name = sym[sym_idx] 4354 .GetMangled() 4355 .GetName(Mangled::ePreferMangled) 4356 .GetCString(); 4357 if (gsym_name) 4358 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; 4359 } 4360 4361 if (symbol_section) { 4362 const addr_t section_file_addr = symbol_section->GetFileAddress(); 4363 symbol_value -= section_file_addr; 4364 } 4365 4366 if (!is_debug) { 4367 if (type == eSymbolTypeCode) { 4368 // See if we can find a N_FUN entry for any code symbols. If we do 4369 // find a match, and the name matches, then we can merge the two into 4370 // just the function symbol to avoid duplicate entries in the symbol 4371 // table. 4372 std::pair<ValueToSymbolIndexMap::const_iterator, 4373 ValueToSymbolIndexMap::const_iterator> 4374 range; 4375 range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); 4376 if (range.first != range.second) { 4377 for (ValueToSymbolIndexMap::const_iterator pos = range.first; 4378 pos != range.second; ++pos) { 4379 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == 4380 sym[pos->second].GetMangled().GetName( 4381 Mangled::ePreferMangled)) { 4382 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 4383 // We just need the flags from the linker symbol, so put these 4384 // flags into the N_FUN flags to avoid duplicate symbols in the 4385 // symbol table. 4386 sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); 4387 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4388 if (resolver_addresses.find(nlist.n_value) != 4389 resolver_addresses.end()) 4390 sym[pos->second].SetType(eSymbolTypeResolver); 4391 sym[sym_idx].Clear(); 4392 return true; 4393 } 4394 } 4395 } else { 4396 if (resolver_addresses.find(nlist.n_value) != 4397 resolver_addresses.end()) 4398 type = eSymbolTypeResolver; 4399 } 4400 } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass || 4401 type == eSymbolTypeObjCMetaClass || 4402 type == eSymbolTypeObjCIVar) { 4403 // See if we can find a N_STSYM entry for any data symbols. If we do 4404 // find a match, and the name matches, then we can merge the two into 4405 // just the Static symbol to avoid duplicate entries in the symbol 4406 // table. 4407 std::pair<ValueToSymbolIndexMap::const_iterator, 4408 ValueToSymbolIndexMap::const_iterator> 4409 range; 4410 range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value); 4411 if (range.first != range.second) { 4412 for (ValueToSymbolIndexMap::const_iterator pos = range.first; 4413 pos != range.second; ++pos) { 4414 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == 4415 sym[pos->second].GetMangled().GetName( 4416 Mangled::ePreferMangled)) { 4417 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 4418 // We just need the flags from the linker symbol, so put these 4419 // flags into the N_STSYM flags to avoid duplicate symbols in 4420 // the symbol table. 4421 sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); 4422 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4423 sym[sym_idx].Clear(); 4424 return true; 4425 } 4426 } 4427 } else { 4428 // Combine N_GSYM stab entries with the non stab symbol. 4429 const char *gsym_name = sym[sym_idx] 4430 .GetMangled() 4431 .GetName(Mangled::ePreferMangled) 4432 .GetCString(); 4433 if (gsym_name) { 4434 ConstNameToSymbolIndexMap::const_iterator pos = 4435 N_GSYM_name_to_sym_idx.find(gsym_name); 4436 if (pos != N_GSYM_name_to_sym_idx.end()) { 4437 const uint32_t GSYM_sym_idx = pos->second; 4438 m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx; 4439 // Copy the address, because often the N_GSYM address has an 4440 // invalid address of zero when the global is a common symbol. 4441 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section); 4442 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value); 4443 add_symbol_addr( 4444 sym[GSYM_sym_idx].GetAddress().GetFileAddress()); 4445 // We just need the flags from the linker symbol, so put these 4446 // flags into the N_GSYM flags to avoid duplicate symbols in 4447 // the symbol table. 4448 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4449 sym[sym_idx].Clear(); 4450 return true; 4451 } 4452 } 4453 } 4454 } 4455 } 4456 4457 sym[sym_idx].SetID(nlist_idx); 4458 sym[sym_idx].SetType(type); 4459 if (set_value) { 4460 sym[sym_idx].GetAddressRef().SetSection(symbol_section); 4461 sym[sym_idx].GetAddressRef().SetOffset(symbol_value); 4462 if (symbol_section) 4463 add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress()); 4464 } 4465 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4466 if (nlist.n_desc & N_WEAK_REF) 4467 sym[sym_idx].SetIsWeak(true); 4468 4469 if (demangled_is_synthesized) 4470 sym[sym_idx].SetDemangledNameIsSynthesized(true); 4471 4472 ++sym_idx; 4473 return true; 4474 }; 4475 4476 // First parse all the nlists but don't process them yet. See the next 4477 // comment for an explanation why. 4478 std::vector<struct nlist_64> nlists; 4479 nlists.reserve(symtab_load_command.nsyms); 4480 for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) { 4481 if (auto nlist = 4482 ParseNList(nlist_data, nlist_data_offset, nlist_byte_size)) 4483 nlists.push_back(*nlist); 4484 else 4485 break; 4486 } 4487 4488 // Now parse all the debug symbols. This is needed to merge non-debug 4489 // symbols in the next step. Non-debug symbols are always coalesced into 4490 // the debug symbol. Doing this in one step would mean that some symbols 4491 // won't be merged. 4492 nlist_idx = 0; 4493 for (auto &nlist : nlists) { 4494 if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols)) 4495 break; 4496 } 4497 4498 // Finally parse all the non debug symbols. 4499 nlist_idx = 0; 4500 for (auto &nlist : nlists) { 4501 if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols)) 4502 break; 4503 } 4504 4505 for (const auto &pos : reexport_shlib_needs_fixup) { 4506 const auto undef_pos = undefined_name_to_desc.find(pos.second); 4507 if (undef_pos != undefined_name_to_desc.end()) { 4508 const uint8_t dylib_ordinal = 4509 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second); 4510 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize()) 4511 sym[pos.first].SetReExportedSymbolSharedLibrary( 4512 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1)); 4513 } 4514 } 4515 } 4516 4517 // Count how many trie symbols we'll add to the symbol table 4518 int trie_symbol_table_augment_count = 0; 4519 for (auto &e : external_sym_trie_entries) { 4520 if (!symbols_added.contains(e.entry.address)) 4521 trie_symbol_table_augment_count++; 4522 } 4523 4524 if (num_syms < sym_idx + trie_symbol_table_augment_count) { 4525 num_syms = sym_idx + trie_symbol_table_augment_count; 4526 sym = symtab.Resize(num_syms); 4527 } 4528 uint32_t synthetic_sym_id = symtab_load_command.nsyms; 4529 4530 // Add symbols from the trie to the symbol table. 4531 for (auto &e : external_sym_trie_entries) { 4532 if (symbols_added.contains(e.entry.address)) 4533 continue; 4534 4535 // Find the section that this trie address is in, use that to annotate 4536 // symbol type as we add the trie address and name to the symbol table. 4537 Address symbol_addr; 4538 if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) { 4539 SectionSP symbol_section(symbol_addr.GetSection()); 4540 const char *symbol_name = e.entry.name.GetCString(); 4541 bool demangled_is_synthesized = false; 4542 SymbolType type = 4543 GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp, 4544 data_section_sp, data_dirty_section_sp, 4545 data_const_section_sp, symbol_section); 4546 4547 sym[sym_idx].SetType(type); 4548 if (symbol_section) { 4549 sym[sym_idx].SetID(synthetic_sym_id++); 4550 sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name)); 4551 if (demangled_is_synthesized) 4552 sym[sym_idx].SetDemangledNameIsSynthesized(true); 4553 sym[sym_idx].SetIsSynthetic(true); 4554 sym[sym_idx].SetExternal(true); 4555 sym[sym_idx].GetAddressRef() = symbol_addr; 4556 add_symbol_addr(symbol_addr.GetFileAddress()); 4557 if (e.entry.flags & TRIE_SYMBOL_IS_THUMB) 4558 sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB); 4559 ++sym_idx; 4560 } 4561 } 4562 } 4563 4564 if (function_starts_count > 0) { 4565 uint32_t num_synthetic_function_symbols = 0; 4566 for (i = 0; i < function_starts_count; ++i) { 4567 if (!symbols_added.contains(function_starts.GetEntryRef(i).addr)) 4568 ++num_synthetic_function_symbols; 4569 } 4570 4571 if (num_synthetic_function_symbols > 0) { 4572 if (num_syms < sym_idx + num_synthetic_function_symbols) { 4573 num_syms = sym_idx + num_synthetic_function_symbols; 4574 sym = symtab.Resize(num_syms); 4575 } 4576 for (i = 0; i < function_starts_count; ++i) { 4577 const FunctionStarts::Entry *func_start_entry = 4578 function_starts.GetEntryAtIndex(i); 4579 if (!symbols_added.contains(func_start_entry->addr)) { 4580 addr_t symbol_file_addr = func_start_entry->addr; 4581 uint32_t symbol_flags = 0; 4582 if (func_start_entry->data) 4583 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; 4584 Address symbol_addr; 4585 if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) { 4586 SectionSP symbol_section(symbol_addr.GetSection()); 4587 if (symbol_section) { 4588 sym[sym_idx].SetID(synthetic_sym_id++); 4589 // Don't set the name for any synthetic symbols, the Symbol 4590 // object will generate one if needed when the name is accessed 4591 // via accessors. 4592 sym[sym_idx].GetMangled().SetDemangledName(ConstString()); 4593 sym[sym_idx].SetType(eSymbolTypeCode); 4594 sym[sym_idx].SetIsSynthetic(true); 4595 sym[sym_idx].GetAddressRef() = symbol_addr; 4596 add_symbol_addr(symbol_addr.GetFileAddress()); 4597 if (symbol_flags) 4598 sym[sym_idx].SetFlags(symbol_flags); 4599 ++sym_idx; 4600 } 4601 } 4602 } 4603 } 4604 } 4605 } 4606 4607 // Trim our symbols down to just what we ended up with after removing any 4608 // symbols. 4609 if (sym_idx < num_syms) { 4610 num_syms = sym_idx; 4611 sym = symtab.Resize(num_syms); 4612 } 4613 4614 // Now synthesize indirect symbols 4615 if (m_dysymtab.nindirectsyms != 0) { 4616 if (indirect_symbol_index_data.GetByteSize()) { 4617 NListIndexToSymbolIndexMap::const_iterator end_index_pos = 4618 m_nlist_idx_to_sym_idx.end(); 4619 4620 for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size(); 4621 ++sect_idx) { 4622 if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) == 4623 S_SYMBOL_STUBS) { 4624 uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2; 4625 if (symbol_stub_byte_size == 0) 4626 continue; 4627 4628 const uint32_t num_symbol_stubs = 4629 m_mach_sections[sect_idx].size / symbol_stub_byte_size; 4630 4631 if (num_symbol_stubs == 0) 4632 continue; 4633 4634 const uint32_t symbol_stub_index_offset = 4635 m_mach_sections[sect_idx].reserved1; 4636 for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) { 4637 const uint32_t symbol_stub_index = 4638 symbol_stub_index_offset + stub_idx; 4639 const lldb::addr_t symbol_stub_addr = 4640 m_mach_sections[sect_idx].addr + 4641 (stub_idx * symbol_stub_byte_size); 4642 lldb::offset_t symbol_stub_offset = symbol_stub_index * 4; 4643 if (indirect_symbol_index_data.ValidOffsetForDataOfSize( 4644 symbol_stub_offset, 4)) { 4645 const uint32_t stub_sym_id = 4646 indirect_symbol_index_data.GetU32(&symbol_stub_offset); 4647 if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL)) 4648 continue; 4649 4650 NListIndexToSymbolIndexMap::const_iterator index_pos = 4651 m_nlist_idx_to_sym_idx.find(stub_sym_id); 4652 Symbol *stub_symbol = nullptr; 4653 if (index_pos != end_index_pos) { 4654 // We have a remapping from the original nlist index to a 4655 // current symbol index, so just look this up by index 4656 stub_symbol = symtab.SymbolAtIndex(index_pos->second); 4657 } else { 4658 // We need to lookup a symbol using the original nlist symbol 4659 // index since this index is coming from the S_SYMBOL_STUBS 4660 stub_symbol = symtab.FindSymbolByID(stub_sym_id); 4661 } 4662 4663 if (stub_symbol) { 4664 Address so_addr(symbol_stub_addr, section_list); 4665 4666 if (stub_symbol->GetType() == eSymbolTypeUndefined) { 4667 // Change the external symbol into a trampoline that makes 4668 // sense These symbols were N_UNDF N_EXT, and are useless 4669 // to us, so we can re-use them so we don't have to make up 4670 // a synthetic symbol for no good reason. 4671 if (resolver_addresses.find(symbol_stub_addr) == 4672 resolver_addresses.end()) 4673 stub_symbol->SetType(eSymbolTypeTrampoline); 4674 else 4675 stub_symbol->SetType(eSymbolTypeResolver); 4676 stub_symbol->SetExternal(false); 4677 stub_symbol->GetAddressRef() = so_addr; 4678 stub_symbol->SetByteSize(symbol_stub_byte_size); 4679 } else { 4680 // Make a synthetic symbol to describe the trampoline stub 4681 Mangled stub_symbol_mangled_name(stub_symbol->GetMangled()); 4682 if (sym_idx >= num_syms) { 4683 sym = symtab.Resize(++num_syms); 4684 stub_symbol = nullptr; // this pointer no longer valid 4685 } 4686 sym[sym_idx].SetID(synthetic_sym_id++); 4687 sym[sym_idx].GetMangled() = stub_symbol_mangled_name; 4688 if (resolver_addresses.find(symbol_stub_addr) == 4689 resolver_addresses.end()) 4690 sym[sym_idx].SetType(eSymbolTypeTrampoline); 4691 else 4692 sym[sym_idx].SetType(eSymbolTypeResolver); 4693 sym[sym_idx].SetIsSynthetic(true); 4694 sym[sym_idx].GetAddressRef() = so_addr; 4695 add_symbol_addr(so_addr.GetFileAddress()); 4696 sym[sym_idx].SetByteSize(symbol_stub_byte_size); 4697 ++sym_idx; 4698 } 4699 } else { 4700 if (log) 4701 log->Warning("symbol stub referencing symbol table symbol " 4702 "%u that isn't in our minimal symbol table, " 4703 "fix this!!!", 4704 stub_sym_id); 4705 } 4706 } 4707 } 4708 } 4709 } 4710 } 4711 } 4712 4713 if (!reexport_trie_entries.empty()) { 4714 for (const auto &e : reexport_trie_entries) { 4715 if (e.entry.import_name) { 4716 // Only add indirect symbols from the Trie entries if we didn't have 4717 // a N_INDR nlist entry for this already 4718 if (indirect_symbol_names.find(e.entry.name) == 4719 indirect_symbol_names.end()) { 4720 // Make a synthetic symbol to describe re-exported symbol. 4721 if (sym_idx >= num_syms) 4722 sym = symtab.Resize(++num_syms); 4723 sym[sym_idx].SetID(synthetic_sym_id++); 4724 sym[sym_idx].GetMangled() = Mangled(e.entry.name); 4725 sym[sym_idx].SetType(eSymbolTypeReExported); 4726 sym[sym_idx].SetIsSynthetic(true); 4727 sym[sym_idx].SetReExportedSymbolName(e.entry.import_name); 4728 if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) { 4729 sym[sym_idx].SetReExportedSymbolSharedLibrary( 4730 dylib_files.GetFileSpecAtIndex(e.entry.other - 1)); 4731 } 4732 ++sym_idx; 4733 } 4734 } 4735 } 4736 } 4737 } 4738 4739 void ObjectFileMachO::Dump(Stream *s) { 4740 ModuleSP module_sp(GetModule()); 4741 if (module_sp) { 4742 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 4743 s->Printf("%p: ", static_cast<void *>(this)); 4744 s->Indent(); 4745 if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64) 4746 s->PutCString("ObjectFileMachO64"); 4747 else 4748 s->PutCString("ObjectFileMachO32"); 4749 4750 *s << ", file = '" << m_file; 4751 ModuleSpecList all_specs; 4752 ModuleSpec base_spec; 4753 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic), 4754 base_spec, all_specs); 4755 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 4756 *s << "', triple"; 4757 if (e) 4758 s->Printf("[%d]", i); 4759 *s << " = "; 4760 *s << all_specs.GetModuleSpecRefAtIndex(i) 4761 .GetArchitecture() 4762 .GetTriple() 4763 .getTriple(); 4764 } 4765 *s << "\n"; 4766 SectionList *sections = GetSectionList(); 4767 if (sections) 4768 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, 4769 UINT32_MAX); 4770 4771 if (m_symtab_up) 4772 m_symtab_up->Dump(s, nullptr, eSortOrderNone); 4773 } 4774 } 4775 4776 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header, 4777 const lldb_private::DataExtractor &data, 4778 lldb::offset_t lc_offset) { 4779 uint32_t i; 4780 llvm::MachO::uuid_command load_cmd; 4781 4782 lldb::offset_t offset = lc_offset; 4783 for (i = 0; i < header.ncmds; ++i) { 4784 const lldb::offset_t cmd_offset = offset; 4785 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 4786 break; 4787 4788 if (load_cmd.cmd == LC_UUID) { 4789 const uint8_t *uuid_bytes = data.PeekData(offset, 16); 4790 4791 if (uuid_bytes) { 4792 // OpenCL on Mac OS X uses the same UUID for each of its object files. 4793 // We pretend these object files have no UUID to prevent crashing. 4794 4795 const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8, 4796 0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63, 4797 0xbb, 0x14, 0xf0, 0x0d}; 4798 4799 if (!memcmp(uuid_bytes, opencl_uuid, 16)) 4800 return UUID(); 4801 4802 return UUID(uuid_bytes, 16); 4803 } 4804 return UUID(); 4805 } 4806 offset = cmd_offset + load_cmd.cmdsize; 4807 } 4808 return UUID(); 4809 } 4810 4811 static llvm::StringRef GetOSName(uint32_t cmd) { 4812 switch (cmd) { 4813 case llvm::MachO::LC_VERSION_MIN_IPHONEOS: 4814 return llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4815 case llvm::MachO::LC_VERSION_MIN_MACOSX: 4816 return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX); 4817 case llvm::MachO::LC_VERSION_MIN_TVOS: 4818 return llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4819 case llvm::MachO::LC_VERSION_MIN_WATCHOS: 4820 return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4821 default: 4822 llvm_unreachable("unexpected LC_VERSION load command"); 4823 } 4824 } 4825 4826 namespace { 4827 struct OSEnv { 4828 llvm::StringRef os_type; 4829 llvm::StringRef environment; 4830 OSEnv(uint32_t cmd) { 4831 switch (cmd) { 4832 case llvm::MachO::PLATFORM_MACOS: 4833 os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX); 4834 return; 4835 case llvm::MachO::PLATFORM_IOS: 4836 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4837 return; 4838 case llvm::MachO::PLATFORM_TVOS: 4839 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4840 return; 4841 case llvm::MachO::PLATFORM_WATCHOS: 4842 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4843 return; 4844 case llvm::MachO::PLATFORM_BRIDGEOS: 4845 os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS); 4846 return; 4847 case llvm::MachO::PLATFORM_DRIVERKIT: 4848 os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit); 4849 return; 4850 case llvm::MachO::PLATFORM_MACCATALYST: 4851 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4852 environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI); 4853 return; 4854 case llvm::MachO::PLATFORM_IOSSIMULATOR: 4855 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4856 environment = 4857 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4858 return; 4859 case llvm::MachO::PLATFORM_TVOSSIMULATOR: 4860 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4861 environment = 4862 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4863 return; 4864 case llvm::MachO::PLATFORM_WATCHOSSIMULATOR: 4865 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4866 environment = 4867 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4868 return; 4869 case llvm::MachO::PLATFORM_XROS: 4870 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS); 4871 return; 4872 case llvm::MachO::PLATFORM_XROS_SIMULATOR: 4873 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS); 4874 environment = 4875 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4876 return; 4877 default: { 4878 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 4879 LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION"); 4880 } 4881 } 4882 } 4883 }; 4884 4885 struct MinOS { 4886 uint32_t major_version, minor_version, patch_version; 4887 MinOS(uint32_t version) 4888 : major_version(version >> 16), minor_version((version >> 8) & 0xffu), 4889 patch_version(version & 0xffu) {} 4890 }; 4891 } // namespace 4892 4893 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, 4894 const lldb_private::DataExtractor &data, 4895 lldb::offset_t lc_offset, 4896 ModuleSpec &base_spec, 4897 lldb_private::ModuleSpecList &all_specs) { 4898 auto &base_arch = base_spec.GetArchitecture(); 4899 base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype); 4900 if (!base_arch.IsValid()) 4901 return; 4902 4903 bool found_any = false; 4904 auto add_triple = [&](const llvm::Triple &triple) { 4905 auto spec = base_spec; 4906 spec.GetArchitecture().GetTriple() = triple; 4907 if (spec.GetArchitecture().IsValid()) { 4908 spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset); 4909 all_specs.Append(spec); 4910 found_any = true; 4911 } 4912 }; 4913 4914 // Set OS to an unspecified unknown or a "*" so it can match any OS 4915 llvm::Triple base_triple = base_arch.GetTriple(); 4916 base_triple.setOS(llvm::Triple::UnknownOS); 4917 base_triple.setOSName(llvm::StringRef()); 4918 4919 if (header.filetype == MH_PRELOAD) { 4920 if (header.cputype == CPU_TYPE_ARM) { 4921 // If this is a 32-bit arm binary, and it's a standalone binary, force 4922 // the Vendor to Apple so we don't accidentally pick up the generic 4923 // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the 4924 // frame pointer register; most other armv7 ABIs use a combination of 4925 // r7 and r11. 4926 base_triple.setVendor(llvm::Triple::Apple); 4927 } else { 4928 // Set vendor to an unspecified unknown or a "*" so it can match any 4929 // vendor This is required for correct behavior of EFI debugging on 4930 // x86_64 4931 base_triple.setVendor(llvm::Triple::UnknownVendor); 4932 base_triple.setVendorName(llvm::StringRef()); 4933 } 4934 return add_triple(base_triple); 4935 } 4936 4937 llvm::MachO::load_command load_cmd; 4938 4939 // See if there is an LC_VERSION_MIN_* load command that can give 4940 // us the OS type. 4941 lldb::offset_t offset = lc_offset; 4942 for (uint32_t i = 0; i < header.ncmds; ++i) { 4943 const lldb::offset_t cmd_offset = offset; 4944 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 4945 break; 4946 4947 llvm::MachO::version_min_command version_min; 4948 switch (load_cmd.cmd) { 4949 case llvm::MachO::LC_VERSION_MIN_MACOSX: 4950 case llvm::MachO::LC_VERSION_MIN_IPHONEOS: 4951 case llvm::MachO::LC_VERSION_MIN_TVOS: 4952 case llvm::MachO::LC_VERSION_MIN_WATCHOS: { 4953 if (load_cmd.cmdsize != sizeof(version_min)) 4954 break; 4955 if (data.ExtractBytes(cmd_offset, sizeof(version_min), 4956 data.GetByteOrder(), &version_min) == 0) 4957 break; 4958 MinOS min_os(version_min.version); 4959 llvm::SmallString<32> os_name; 4960 llvm::raw_svector_ostream os(os_name); 4961 os << GetOSName(load_cmd.cmd) << min_os.major_version << '.' 4962 << min_os.minor_version << '.' << min_os.patch_version; 4963 4964 auto triple = base_triple; 4965 triple.setOSName(os.str()); 4966 4967 // Disambiguate legacy simulator platforms. 4968 if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX && 4969 (base_triple.getArch() == llvm::Triple::x86_64 || 4970 base_triple.getArch() == llvm::Triple::x86)) { 4971 // The combination of legacy LC_VERSION_MIN load command and 4972 // x86 architecture always indicates a simulator environment. 4973 // The combination of LC_VERSION_MIN and arm architecture only 4974 // appears for native binaries. Back-deploying simulator 4975 // binaries on Apple Silicon Macs use the modern unambigous 4976 // LC_BUILD_VERSION load commands; no special handling required. 4977 triple.setEnvironment(llvm::Triple::Simulator); 4978 } 4979 add_triple(triple); 4980 break; 4981 } 4982 default: 4983 break; 4984 } 4985 4986 offset = cmd_offset + load_cmd.cmdsize; 4987 } 4988 4989 // See if there are LC_BUILD_VERSION load commands that can give 4990 // us the OS type. 4991 offset = lc_offset; 4992 for (uint32_t i = 0; i < header.ncmds; ++i) { 4993 const lldb::offset_t cmd_offset = offset; 4994 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 4995 break; 4996 4997 do { 4998 if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) { 4999 llvm::MachO::build_version_command build_version; 5000 if (load_cmd.cmdsize < sizeof(build_version)) { 5001 // Malformed load command. 5002 break; 5003 } 5004 if (data.ExtractBytes(cmd_offset, sizeof(build_version), 5005 data.GetByteOrder(), &build_version) == 0) 5006 break; 5007 MinOS min_os(build_version.minos); 5008 OSEnv os_env(build_version.platform); 5009 llvm::SmallString<16> os_name; 5010 llvm::raw_svector_ostream os(os_name); 5011 os << os_env.os_type << min_os.major_version << '.' 5012 << min_os.minor_version << '.' << min_os.patch_version; 5013 auto triple = base_triple; 5014 triple.setOSName(os.str()); 5015 os_name.clear(); 5016 if (!os_env.environment.empty()) 5017 triple.setEnvironmentName(os_env.environment); 5018 add_triple(triple); 5019 } 5020 } while (false); 5021 offset = cmd_offset + load_cmd.cmdsize; 5022 } 5023 5024 if (!found_any) { 5025 add_triple(base_triple); 5026 } 5027 } 5028 5029 ArchSpec ObjectFileMachO::GetArchitecture( 5030 ModuleSP module_sp, const llvm::MachO::mach_header &header, 5031 const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) { 5032 ModuleSpecList all_specs; 5033 ModuleSpec base_spec; 5034 GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic), 5035 base_spec, all_specs); 5036 5037 // If the object file offers multiple alternative load commands, 5038 // pick the one that matches the module. 5039 if (module_sp) { 5040 const ArchSpec &module_arch = module_sp->GetArchitecture(); 5041 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 5042 ArchSpec mach_arch = 5043 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture(); 5044 if (module_arch.IsCompatibleMatch(mach_arch)) 5045 return mach_arch; 5046 } 5047 } 5048 5049 // Return the first arch we found. 5050 if (all_specs.GetSize() == 0) 5051 return {}; 5052 return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture(); 5053 } 5054 5055 UUID ObjectFileMachO::GetUUID() { 5056 ModuleSP module_sp(GetModule()); 5057 if (module_sp) { 5058 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5059 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5060 return GetUUID(m_header, m_data, offset); 5061 } 5062 return UUID(); 5063 } 5064 5065 uint32_t ObjectFileMachO::GetDependentModules(FileSpecList &files) { 5066 ModuleSP module_sp = GetModule(); 5067 if (!module_sp) 5068 return 0; 5069 5070 uint32_t count = 0; 5071 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5072 llvm::MachO::load_command load_cmd; 5073 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5074 std::vector<std::string> rpath_paths; 5075 std::vector<std::string> rpath_relative_paths; 5076 std::vector<std::string> at_exec_relative_paths; 5077 uint32_t i; 5078 for (i = 0; i < m_header.ncmds; ++i) { 5079 const uint32_t cmd_offset = offset; 5080 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5081 break; 5082 5083 switch (load_cmd.cmd) { 5084 case LC_RPATH: 5085 case LC_LOAD_DYLIB: 5086 case LC_LOAD_WEAK_DYLIB: 5087 case LC_REEXPORT_DYLIB: 5088 case LC_LOAD_DYLINKER: 5089 case LC_LOADFVMLIB: 5090 case LC_LOAD_UPWARD_DYLIB: { 5091 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset); 5092 // For LC_LOAD_DYLIB there is an alternate encoding 5093 // which adds a uint32_t `flags` field for `DYLD_USE_*` 5094 // flags. This can be detected by a timestamp field with 5095 // the `DYLIB_USE_MARKER` constant value. 5096 bool is_delayed_init = false; 5097 uint32_t use_command_marker = m_data.GetU32(&offset); 5098 if (use_command_marker == 0x1a741800 /* DYLIB_USE_MARKER */) { 5099 offset += 4; /* uint32_t current_version */ 5100 offset += 4; /* uint32_t compat_version */ 5101 uint32_t flags = m_data.GetU32(&offset); 5102 // If this LC_LOAD_DYLIB is marked delay-init, 5103 // don't report it as a dependent library -- it 5104 // may be loaded in the process at some point, 5105 // but will most likely not be load at launch. 5106 if (flags & 0x08 /* DYLIB_USE_DELAYED_INIT */) 5107 is_delayed_init = true; 5108 } 5109 const char *path = m_data.PeekCStr(name_offset); 5110 if (path && !is_delayed_init) { 5111 if (load_cmd.cmd == LC_RPATH) 5112 rpath_paths.push_back(path); 5113 else { 5114 if (path[0] == '@') { 5115 if (strncmp(path, "@rpath", strlen("@rpath")) == 0) 5116 rpath_relative_paths.push_back(path + strlen("@rpath")); 5117 else if (strncmp(path, "@executable_path", 5118 strlen("@executable_path")) == 0) 5119 at_exec_relative_paths.push_back(path + 5120 strlen("@executable_path")); 5121 } else { 5122 FileSpec file_spec(path); 5123 if (files.AppendIfUnique(file_spec)) 5124 count++; 5125 } 5126 } 5127 } 5128 } break; 5129 5130 default: 5131 break; 5132 } 5133 offset = cmd_offset + load_cmd.cmdsize; 5134 } 5135 5136 FileSpec this_file_spec(m_file); 5137 FileSystem::Instance().Resolve(this_file_spec); 5138 5139 if (!rpath_paths.empty()) { 5140 // Fixup all LC_RPATH values to be absolute paths. 5141 const std::string this_directory = 5142 this_file_spec.GetDirectory().GetString(); 5143 for (auto &rpath : rpath_paths) { 5144 if (llvm::StringRef(rpath).starts_with(g_loader_path)) 5145 rpath = this_directory + rpath.substr(g_loader_path.size()); 5146 else if (llvm::StringRef(rpath).starts_with(g_executable_path)) 5147 rpath = this_directory + rpath.substr(g_executable_path.size()); 5148 } 5149 5150 for (const auto &rpath_relative_path : rpath_relative_paths) { 5151 for (const auto &rpath : rpath_paths) { 5152 std::string path = rpath; 5153 path += rpath_relative_path; 5154 // It is OK to resolve this path because we must find a file on disk 5155 // for us to accept it anyway if it is rpath relative. 5156 FileSpec file_spec(path); 5157 FileSystem::Instance().Resolve(file_spec); 5158 if (FileSystem::Instance().Exists(file_spec) && 5159 files.AppendIfUnique(file_spec)) { 5160 count++; 5161 break; 5162 } 5163 } 5164 } 5165 } 5166 5167 // We may have @executable_paths but no RPATHS. Figure those out here. 5168 // Only do this if this object file is the executable. We have no way to 5169 // get back to the actual executable otherwise, so we won't get the right 5170 // path. 5171 if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) { 5172 FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent(); 5173 for (const auto &at_exec_relative_path : at_exec_relative_paths) { 5174 FileSpec file_spec = 5175 exec_dir.CopyByAppendingPathComponent(at_exec_relative_path); 5176 if (FileSystem::Instance().Exists(file_spec) && 5177 files.AppendIfUnique(file_spec)) 5178 count++; 5179 } 5180 } 5181 return count; 5182 } 5183 5184 lldb_private::Address ObjectFileMachO::GetEntryPointAddress() { 5185 // If the object file is not an executable it can't hold the entry point. 5186 // m_entry_point_address is initialized to an invalid address, so we can just 5187 // return that. If m_entry_point_address is valid it means we've found it 5188 // already, so return the cached value. 5189 5190 if ((!IsExecutable() && !IsDynamicLoader()) || 5191 m_entry_point_address.IsValid()) { 5192 return m_entry_point_address; 5193 } 5194 5195 // Otherwise, look for the UnixThread or Thread command. The data for the 5196 // Thread command is given in /usr/include/mach-o.h, but it is basically: 5197 // 5198 // uint32_t flavor - this is the flavor argument you would pass to 5199 // thread_get_state 5200 // uint32_t count - this is the count of longs in the thread state data 5201 // struct XXX_thread_state state - this is the structure from 5202 // <machine/thread_status.h> corresponding to the flavor. 5203 // <repeat this trio> 5204 // 5205 // So we just keep reading the various register flavors till we find the GPR 5206 // one, then read the PC out of there. 5207 // FIXME: We will need to have a "RegisterContext data provider" class at some 5208 // point that can get all the registers 5209 // out of data in this form & attach them to a given thread. That should 5210 // underlie the MacOS X User process plugin, and we'll also need it for the 5211 // MacOS X Core File process plugin. When we have that we can also use it 5212 // here. 5213 // 5214 // For now we hard-code the offsets and flavors we need: 5215 // 5216 // 5217 5218 ModuleSP module_sp(GetModule()); 5219 if (module_sp) { 5220 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5221 llvm::MachO::load_command load_cmd; 5222 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5223 uint32_t i; 5224 lldb::addr_t start_address = LLDB_INVALID_ADDRESS; 5225 bool done = false; 5226 5227 for (i = 0; i < m_header.ncmds; ++i) { 5228 const lldb::offset_t cmd_offset = offset; 5229 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5230 break; 5231 5232 switch (load_cmd.cmd) { 5233 case LC_UNIXTHREAD: 5234 case LC_THREAD: { 5235 while (offset < cmd_offset + load_cmd.cmdsize) { 5236 uint32_t flavor = m_data.GetU32(&offset); 5237 uint32_t count = m_data.GetU32(&offset); 5238 if (count == 0) { 5239 // We've gotten off somehow, log and exit; 5240 return m_entry_point_address; 5241 } 5242 5243 switch (m_header.cputype) { 5244 case llvm::MachO::CPU_TYPE_ARM: 5245 if (flavor == 1 || 5246 flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32 5247 // from mach/arm/thread_status.h 5248 { 5249 offset += 60; // This is the offset of pc in the GPR thread state 5250 // data structure. 5251 start_address = m_data.GetU32(&offset); 5252 done = true; 5253 } 5254 break; 5255 case llvm::MachO::CPU_TYPE_ARM64: 5256 case llvm::MachO::CPU_TYPE_ARM64_32: 5257 if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h 5258 { 5259 offset += 256; // This is the offset of pc in the GPR thread state 5260 // data structure. 5261 start_address = m_data.GetU64(&offset); 5262 done = true; 5263 } 5264 break; 5265 case llvm::MachO::CPU_TYPE_I386: 5266 if (flavor == 5267 1) // x86_THREAD_STATE32 from mach/i386/thread_status.h 5268 { 5269 offset += 40; // This is the offset of eip in the GPR thread state 5270 // data structure. 5271 start_address = m_data.GetU32(&offset); 5272 done = true; 5273 } 5274 break; 5275 case llvm::MachO::CPU_TYPE_X86_64: 5276 if (flavor == 5277 4) // x86_THREAD_STATE64 from mach/i386/thread_status.h 5278 { 5279 offset += 16 * 8; // This is the offset of rip in the GPR thread 5280 // state data structure. 5281 start_address = m_data.GetU64(&offset); 5282 done = true; 5283 } 5284 break; 5285 default: 5286 return m_entry_point_address; 5287 } 5288 // Haven't found the GPR flavor yet, skip over the data for this 5289 // flavor: 5290 if (done) 5291 break; 5292 offset += count * 4; 5293 } 5294 } break; 5295 case LC_MAIN: { 5296 uint64_t entryoffset = m_data.GetU64(&offset); 5297 SectionSP text_segment_sp = 5298 GetSectionList()->FindSectionByName(GetSegmentNameTEXT()); 5299 if (text_segment_sp) { 5300 done = true; 5301 start_address = text_segment_sp->GetFileAddress() + entryoffset; 5302 } 5303 } break; 5304 5305 default: 5306 break; 5307 } 5308 if (done) 5309 break; 5310 5311 // Go to the next load command: 5312 offset = cmd_offset + load_cmd.cmdsize; 5313 } 5314 5315 if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) { 5316 if (GetSymtab()) { 5317 Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType( 5318 ConstString("_dyld_start"), SymbolType::eSymbolTypeCode, 5319 Symtab::eDebugAny, Symtab::eVisibilityAny); 5320 if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) { 5321 start_address = dyld_start_sym->GetAddress().GetFileAddress(); 5322 } 5323 } 5324 } 5325 5326 if (start_address != LLDB_INVALID_ADDRESS) { 5327 // We got the start address from the load commands, so now resolve that 5328 // address in the sections of this ObjectFile: 5329 if (!m_entry_point_address.ResolveAddressUsingFileSections( 5330 start_address, GetSectionList())) { 5331 m_entry_point_address.Clear(); 5332 } 5333 } else { 5334 // We couldn't read the UnixThread load command - maybe it wasn't there. 5335 // As a fallback look for the "start" symbol in the main executable. 5336 5337 ModuleSP module_sp(GetModule()); 5338 5339 if (module_sp) { 5340 SymbolContextList contexts; 5341 SymbolContext context; 5342 module_sp->FindSymbolsWithNameAndType(ConstString("start"), 5343 eSymbolTypeCode, contexts); 5344 if (contexts.GetSize()) { 5345 if (contexts.GetContextAtIndex(0, context)) 5346 m_entry_point_address = context.symbol->GetAddress(); 5347 } 5348 } 5349 } 5350 } 5351 5352 return m_entry_point_address; 5353 } 5354 5355 lldb_private::Address ObjectFileMachO::GetBaseAddress() { 5356 lldb_private::Address header_addr; 5357 SectionList *section_list = GetSectionList(); 5358 if (section_list) { 5359 SectionSP text_segment_sp( 5360 section_list->FindSectionByName(GetSegmentNameTEXT())); 5361 if (text_segment_sp) { 5362 header_addr.SetSection(text_segment_sp); 5363 header_addr.SetOffset(0); 5364 } 5365 } 5366 return header_addr; 5367 } 5368 5369 uint32_t ObjectFileMachO::GetNumThreadContexts() { 5370 ModuleSP module_sp(GetModule()); 5371 if (module_sp) { 5372 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5373 if (!m_thread_context_offsets_valid) { 5374 m_thread_context_offsets_valid = true; 5375 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5376 FileRangeArray::Entry file_range; 5377 llvm::MachO::thread_command thread_cmd; 5378 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5379 const uint32_t cmd_offset = offset; 5380 if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr) 5381 break; 5382 5383 if (thread_cmd.cmd == LC_THREAD) { 5384 file_range.SetRangeBase(offset); 5385 file_range.SetByteSize(thread_cmd.cmdsize - 8); 5386 m_thread_context_offsets.Append(file_range); 5387 } 5388 offset = cmd_offset + thread_cmd.cmdsize; 5389 } 5390 } 5391 } 5392 return m_thread_context_offsets.GetSize(); 5393 } 5394 5395 std::vector<std::tuple<offset_t, offset_t>> 5396 ObjectFileMachO::FindLC_NOTEByName(std::string name) { 5397 std::vector<std::tuple<offset_t, offset_t>> results; 5398 ModuleSP module_sp(GetModule()); 5399 if (module_sp) { 5400 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5401 5402 offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5403 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5404 const uint32_t cmd_offset = offset; 5405 llvm::MachO::load_command lc = {}; 5406 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr) 5407 break; 5408 if (lc.cmd == LC_NOTE) { 5409 char data_owner[17]; 5410 m_data.CopyData(offset, 16, data_owner); 5411 data_owner[16] = '\0'; 5412 offset += 16; 5413 5414 if (name == data_owner) { 5415 offset_t payload_offset = m_data.GetU64_unchecked(&offset); 5416 offset_t payload_size = m_data.GetU64_unchecked(&offset); 5417 results.push_back({payload_offset, payload_size}); 5418 } 5419 } 5420 offset = cmd_offset + lc.cmdsize; 5421 } 5422 } 5423 return results; 5424 } 5425 5426 std::string ObjectFileMachO::GetIdentifierString() { 5427 Log *log( 5428 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader)); 5429 ModuleSP module_sp(GetModule()); 5430 if (module_sp) { 5431 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5432 5433 auto lc_notes = FindLC_NOTEByName("kern ver str"); 5434 for (auto lc_note : lc_notes) { 5435 offset_t payload_offset = std::get<0>(lc_note); 5436 offset_t payload_size = std::get<1>(lc_note); 5437 uint32_t version; 5438 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) { 5439 if (version == 1) { 5440 uint32_t strsize = payload_size - sizeof(uint32_t); 5441 std::string result(strsize, '\0'); 5442 m_data.CopyData(payload_offset, strsize, result.data()); 5443 LLDB_LOGF(log, "LC_NOTE 'kern ver str' found with text '%s'", 5444 result.c_str()); 5445 return result; 5446 } 5447 } 5448 } 5449 5450 // Second, make a pass over the load commands looking for an obsolete 5451 // LC_IDENT load command. 5452 offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5453 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5454 const uint32_t cmd_offset = offset; 5455 llvm::MachO::ident_command ident_command; 5456 if (m_data.GetU32(&offset, &ident_command, 2) == nullptr) 5457 break; 5458 if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) { 5459 std::string result(ident_command.cmdsize, '\0'); 5460 if (m_data.CopyData(offset, ident_command.cmdsize, result.data()) == 5461 ident_command.cmdsize) { 5462 LLDB_LOGF(log, "LC_IDENT found with text '%s'", result.c_str()); 5463 return result; 5464 } 5465 } 5466 offset = cmd_offset + ident_command.cmdsize; 5467 } 5468 } 5469 return {}; 5470 } 5471 5472 AddressableBits ObjectFileMachO::GetAddressableBits() { 5473 AddressableBits addressable_bits; 5474 5475 Log *log(GetLog(LLDBLog::Process)); 5476 ModuleSP module_sp(GetModule()); 5477 if (module_sp) { 5478 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5479 auto lc_notes = FindLC_NOTEByName("addrable bits"); 5480 for (auto lc_note : lc_notes) { 5481 offset_t payload_offset = std::get<0>(lc_note); 5482 uint32_t version; 5483 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) { 5484 if (version == 3) { 5485 uint32_t num_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5486 addressable_bits.SetAddressableBits(num_addr_bits); 5487 LLDB_LOGF(log, 5488 "LC_NOTE 'addrable bits' v3 found, value %d " 5489 "bits", 5490 num_addr_bits); 5491 } 5492 if (version == 4) { 5493 uint32_t lo_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5494 uint32_t hi_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5495 5496 if (lo_addr_bits == hi_addr_bits) 5497 addressable_bits.SetAddressableBits(lo_addr_bits); 5498 else 5499 addressable_bits.SetAddressableBits(lo_addr_bits, hi_addr_bits); 5500 LLDB_LOGF(log, "LC_NOTE 'addrable bits' v4 found, value %d & %d bits", 5501 lo_addr_bits, hi_addr_bits); 5502 } 5503 } 5504 } 5505 } 5506 return addressable_bits; 5507 } 5508 5509 bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value, 5510 bool &value_is_offset, 5511 UUID &uuid, 5512 ObjectFile::BinaryType &type) { 5513 Log *log( 5514 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader)); 5515 value = LLDB_INVALID_ADDRESS; 5516 value_is_offset = false; 5517 uuid.Clear(); 5518 uint32_t log2_pagesize = 0; // not currently passed up to caller 5519 uint32_t platform = 0; // not currently passed up to caller 5520 ModuleSP module_sp(GetModule()); 5521 if (module_sp) { 5522 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5523 5524 auto lc_notes = FindLC_NOTEByName("main bin spec"); 5525 for (auto lc_note : lc_notes) { 5526 offset_t payload_offset = std::get<0>(lc_note); 5527 5528 // struct main_bin_spec 5529 // { 5530 // uint32_t version; // currently 2 5531 // uint32_t type; // 0 == unspecified, 1 == kernel, 5532 // // 2 == user process, 5533 // // 3 == standalone binary 5534 // uint64_t address; // UINT64_MAX if address not specified 5535 // uint64_t slide; // slide, UINT64_MAX if unspecified 5536 // // 0 if no slide needs to be applied to 5537 // // file address 5538 // uuid_t uuid; // all zero's if uuid not specified 5539 // uint32_t log2_pagesize; // process page size in log base 2, 5540 // // e.g. 4k pages are 12. 5541 // // 0 for unspecified 5542 // uint32_t platform; // The Mach-O platform for this corefile. 5543 // // 0 for unspecified. 5544 // // The values are defined in 5545 // // <mach-o/loader.h>, PLATFORM_*. 5546 // } __attribute((packed)); 5547 5548 // "main bin spec" (main binary specification) data payload is 5549 // formatted: 5550 // uint32_t version [currently 1] 5551 // uint32_t type [0 == unspecified, 1 == kernel, 5552 // 2 == user process, 3 == firmware ] 5553 // uint64_t address [ UINT64_MAX if address not specified ] 5554 // uuid_t uuid [ all zero's if uuid not specified ] 5555 // uint32_t log2_pagesize [ process page size in log base 5556 // 2, e.g. 4k pages are 12. 5557 // 0 for unspecified ] 5558 // uint32_t unused [ for alignment ] 5559 5560 uint32_t version; 5561 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr && 5562 version <= 2) { 5563 uint32_t binspec_type = 0; 5564 uuid_t raw_uuid; 5565 memset(raw_uuid, 0, sizeof(uuid_t)); 5566 5567 if (!m_data.GetU32(&payload_offset, &binspec_type, 1)) 5568 return false; 5569 if (!m_data.GetU64(&payload_offset, &value, 1)) 5570 return false; 5571 uint64_t slide = LLDB_INVALID_ADDRESS; 5572 if (version > 1 && !m_data.GetU64(&payload_offset, &slide, 1)) 5573 return false; 5574 if (value == LLDB_INVALID_ADDRESS && slide != LLDB_INVALID_ADDRESS) { 5575 value = slide; 5576 value_is_offset = true; 5577 } 5578 5579 if (m_data.CopyData(payload_offset, sizeof(uuid_t), raw_uuid) != 0) { 5580 uuid = UUID(raw_uuid, sizeof(uuid_t)); 5581 // convert the "main bin spec" type into our 5582 // ObjectFile::BinaryType enum 5583 const char *typestr = "unrecognized type"; 5584 switch (binspec_type) { 5585 case 0: 5586 type = eBinaryTypeUnknown; 5587 typestr = "uknown"; 5588 break; 5589 case 1: 5590 type = eBinaryTypeKernel; 5591 typestr = "xnu kernel"; 5592 break; 5593 case 2: 5594 type = eBinaryTypeUser; 5595 typestr = "userland dyld"; 5596 break; 5597 case 3: 5598 type = eBinaryTypeStandalone; 5599 typestr = "standalone"; 5600 break; 5601 } 5602 LLDB_LOGF(log, 5603 "LC_NOTE 'main bin spec' found, version %d type %d " 5604 "(%s), value 0x%" PRIx64 " value-is-slide==%s uuid %s", 5605 version, type, typestr, value, 5606 value_is_offset ? "true" : "false", 5607 uuid.GetAsString().c_str()); 5608 if (!m_data.GetU32(&payload_offset, &log2_pagesize, 1)) 5609 return false; 5610 if (version > 1 && !m_data.GetU32(&payload_offset, &platform, 1)) 5611 return false; 5612 return true; 5613 } 5614 } 5615 } 5616 } 5617 return false; 5618 } 5619 5620 bool ObjectFileMachO::GetCorefileThreadExtraInfos( 5621 std::vector<lldb::tid_t> &tids) { 5622 tids.clear(); 5623 ModuleSP module_sp(GetModule()); 5624 if (module_sp) { 5625 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5626 5627 Log *log(GetLog(LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread)); 5628 auto lc_notes = FindLC_NOTEByName("process metadata"); 5629 for (auto lc_note : lc_notes) { 5630 offset_t payload_offset = std::get<0>(lc_note); 5631 offset_t strsize = std::get<1>(lc_note); 5632 std::string buf(strsize, '\0'); 5633 if (m_data.CopyData(payload_offset, strsize, buf.data()) != strsize) { 5634 LLDB_LOGF(log, 5635 "Unable to read %" PRIu64 5636 " bytes of 'process metadata' LC_NOTE JSON contents", 5637 strsize); 5638 return false; 5639 } 5640 while (buf.back() == '\0') 5641 buf.resize(buf.size() - 1); 5642 StructuredData::ObjectSP object_sp = StructuredData::ParseJSON(buf); 5643 StructuredData::Dictionary *dict = object_sp->GetAsDictionary(); 5644 if (!dict) { 5645 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not " 5646 "get a dictionary."); 5647 return false; 5648 } 5649 StructuredData::Array *threads; 5650 if (!dict->GetValueForKeyAsArray("threads", threads) || !threads) { 5651 LLDB_LOGF(log, 5652 "'process metadata' LC_NOTE does not have a 'threads' key"); 5653 return false; 5654 } 5655 if (threads->GetSize() != GetNumThreadContexts()) { 5656 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, number of " 5657 "threads does not match number of LC_THREADS."); 5658 return false; 5659 } 5660 const size_t num_threads = threads->GetSize(); 5661 for (size_t i = 0; i < num_threads; i++) { 5662 std::optional<StructuredData::Dictionary *> maybe_thread = 5663 threads->GetItemAtIndexAsDictionary(i); 5664 if (!maybe_thread) { 5665 LLDB_LOGF(log, 5666 "Unable to read 'process metadata' LC_NOTE, threads " 5667 "array does not have a dictionary at index %zu.", 5668 i); 5669 return false; 5670 } 5671 StructuredData::Dictionary *thread = *maybe_thread; 5672 lldb::tid_t tid = LLDB_INVALID_THREAD_ID; 5673 if (thread->GetValueForKeyAsInteger<lldb::tid_t>("thread_id", tid)) 5674 if (tid == 0) 5675 tid = LLDB_INVALID_THREAD_ID; 5676 tids.push_back(tid); 5677 } 5678 5679 if (log) { 5680 StreamString logmsg; 5681 logmsg.Printf("LC_NOTE 'process metadata' found: "); 5682 dict->Dump(logmsg, /* pretty_print */ false); 5683 LLDB_LOGF(log, "%s", logmsg.GetData()); 5684 } 5685 return true; 5686 } 5687 } 5688 return false; 5689 } 5690 5691 lldb::RegisterContextSP 5692 ObjectFileMachO::GetThreadContextAtIndex(uint32_t idx, 5693 lldb_private::Thread &thread) { 5694 lldb::RegisterContextSP reg_ctx_sp; 5695 5696 ModuleSP module_sp(GetModule()); 5697 if (module_sp) { 5698 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5699 if (!m_thread_context_offsets_valid) 5700 GetNumThreadContexts(); 5701 5702 const FileRangeArray::Entry *thread_context_file_range = 5703 m_thread_context_offsets.GetEntryAtIndex(idx); 5704 if (thread_context_file_range) { 5705 5706 DataExtractor data(m_data, thread_context_file_range->GetRangeBase(), 5707 thread_context_file_range->GetByteSize()); 5708 5709 switch (m_header.cputype) { 5710 case llvm::MachO::CPU_TYPE_ARM64: 5711 case llvm::MachO::CPU_TYPE_ARM64_32: 5712 reg_ctx_sp = 5713 std::make_shared<RegisterContextDarwin_arm64_Mach>(thread, data); 5714 break; 5715 5716 case llvm::MachO::CPU_TYPE_ARM: 5717 reg_ctx_sp = 5718 std::make_shared<RegisterContextDarwin_arm_Mach>(thread, data); 5719 break; 5720 5721 case llvm::MachO::CPU_TYPE_I386: 5722 reg_ctx_sp = 5723 std::make_shared<RegisterContextDarwin_i386_Mach>(thread, data); 5724 break; 5725 5726 case llvm::MachO::CPU_TYPE_X86_64: 5727 reg_ctx_sp = 5728 std::make_shared<RegisterContextDarwin_x86_64_Mach>(thread, data); 5729 break; 5730 } 5731 } 5732 } 5733 return reg_ctx_sp; 5734 } 5735 5736 ObjectFile::Type ObjectFileMachO::CalculateType() { 5737 switch (m_header.filetype) { 5738 case MH_OBJECT: // 0x1u 5739 if (GetAddressByteSize() == 4) { 5740 // 32 bit kexts are just object files, but they do have a valid 5741 // UUID load command. 5742 if (GetUUID()) { 5743 // this checking for the UUID load command is not enough we could 5744 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as 5745 // this is required of kexts 5746 if (m_strata == eStrataInvalid) 5747 m_strata = eStrataKernel; 5748 return eTypeSharedLibrary; 5749 } 5750 } 5751 return eTypeObjectFile; 5752 5753 case MH_EXECUTE: 5754 return eTypeExecutable; // 0x2u 5755 case MH_FVMLIB: 5756 return eTypeSharedLibrary; // 0x3u 5757 case MH_CORE: 5758 return eTypeCoreFile; // 0x4u 5759 case MH_PRELOAD: 5760 return eTypeSharedLibrary; // 0x5u 5761 case MH_DYLIB: 5762 return eTypeSharedLibrary; // 0x6u 5763 case MH_DYLINKER: 5764 return eTypeDynamicLinker; // 0x7u 5765 case MH_BUNDLE: 5766 return eTypeSharedLibrary; // 0x8u 5767 case MH_DYLIB_STUB: 5768 return eTypeStubLibrary; // 0x9u 5769 case MH_DSYM: 5770 return eTypeDebugInfo; // 0xAu 5771 case MH_KEXT_BUNDLE: 5772 return eTypeSharedLibrary; // 0xBu 5773 default: 5774 break; 5775 } 5776 return eTypeUnknown; 5777 } 5778 5779 ObjectFile::Strata ObjectFileMachO::CalculateStrata() { 5780 switch (m_header.filetype) { 5781 case MH_OBJECT: // 0x1u 5782 { 5783 // 32 bit kexts are just object files, but they do have a valid 5784 // UUID load command. 5785 if (GetUUID()) { 5786 // this checking for the UUID load command is not enough we could 5787 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as 5788 // this is required of kexts 5789 if (m_type == eTypeInvalid) 5790 m_type = eTypeSharedLibrary; 5791 5792 return eStrataKernel; 5793 } 5794 } 5795 return eStrataUnknown; 5796 5797 case MH_EXECUTE: // 0x2u 5798 // Check for the MH_DYLDLINK bit in the flags 5799 if (m_header.flags & MH_DYLDLINK) { 5800 return eStrataUser; 5801 } else { 5802 SectionList *section_list = GetSectionList(); 5803 if (section_list) { 5804 static ConstString g_kld_section_name("__KLD"); 5805 if (section_list->FindSectionByName(g_kld_section_name)) 5806 return eStrataKernel; 5807 } 5808 } 5809 return eStrataRawImage; 5810 5811 case MH_FVMLIB: 5812 return eStrataUser; // 0x3u 5813 case MH_CORE: 5814 return eStrataUnknown; // 0x4u 5815 case MH_PRELOAD: 5816 return eStrataRawImage; // 0x5u 5817 case MH_DYLIB: 5818 return eStrataUser; // 0x6u 5819 case MH_DYLINKER: 5820 return eStrataUser; // 0x7u 5821 case MH_BUNDLE: 5822 return eStrataUser; // 0x8u 5823 case MH_DYLIB_STUB: 5824 return eStrataUser; // 0x9u 5825 case MH_DSYM: 5826 return eStrataUnknown; // 0xAu 5827 case MH_KEXT_BUNDLE: 5828 return eStrataKernel; // 0xBu 5829 default: 5830 break; 5831 } 5832 return eStrataUnknown; 5833 } 5834 5835 llvm::VersionTuple ObjectFileMachO::GetVersion() { 5836 ModuleSP module_sp(GetModule()); 5837 if (module_sp) { 5838 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5839 llvm::MachO::dylib_command load_cmd; 5840 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5841 uint32_t version_cmd = 0; 5842 uint64_t version = 0; 5843 uint32_t i; 5844 for (i = 0; i < m_header.ncmds; ++i) { 5845 const lldb::offset_t cmd_offset = offset; 5846 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5847 break; 5848 5849 if (load_cmd.cmd == LC_ID_DYLIB) { 5850 if (version_cmd == 0) { 5851 version_cmd = load_cmd.cmd; 5852 if (m_data.GetU32(&offset, &load_cmd.dylib, 4) == nullptr) 5853 break; 5854 version = load_cmd.dylib.current_version; 5855 } 5856 break; // Break for now unless there is another more complete version 5857 // number load command in the future. 5858 } 5859 offset = cmd_offset + load_cmd.cmdsize; 5860 } 5861 5862 if (version_cmd == LC_ID_DYLIB) { 5863 unsigned major = (version & 0xFFFF0000ull) >> 16; 5864 unsigned minor = (version & 0x0000FF00ull) >> 8; 5865 unsigned subminor = (version & 0x000000FFull); 5866 return llvm::VersionTuple(major, minor, subminor); 5867 } 5868 } 5869 return llvm::VersionTuple(); 5870 } 5871 5872 ArchSpec ObjectFileMachO::GetArchitecture() { 5873 ModuleSP module_sp(GetModule()); 5874 ArchSpec arch; 5875 if (module_sp) { 5876 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5877 5878 return GetArchitecture(module_sp, m_header, m_data, 5879 MachHeaderSizeFromMagic(m_header.magic)); 5880 } 5881 return arch; 5882 } 5883 5884 void ObjectFileMachO::GetProcessSharedCacheUUID(Process *process, 5885 addr_t &base_addr, UUID &uuid) { 5886 uuid.Clear(); 5887 base_addr = LLDB_INVALID_ADDRESS; 5888 if (process && process->GetDynamicLoader()) { 5889 DynamicLoader *dl = process->GetDynamicLoader(); 5890 LazyBool using_shared_cache; 5891 LazyBool private_shared_cache; 5892 dl->GetSharedCacheInformation(base_addr, uuid, using_shared_cache, 5893 private_shared_cache); 5894 } 5895 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 5896 LLDB_LOGF( 5897 log, 5898 "inferior process shared cache has a UUID of %s, base address 0x%" PRIx64, 5899 uuid.GetAsString().c_str(), base_addr); 5900 } 5901 5902 // From dyld SPI header dyld_process_info.h 5903 typedef void *dyld_process_info; 5904 struct lldb_copy__dyld_process_cache_info { 5905 uuid_t cacheUUID; // UUID of cache used by process 5906 uint64_t cacheBaseAddress; // load address of dyld shared cache 5907 bool noCache; // process is running without a dyld cache 5908 bool privateCache; // process is using a private copy of its dyld cache 5909 }; 5910 5911 // #including mach/mach.h pulls in machine.h & CPU_TYPE_ARM etc conflicts with 5912 // llvm enum definitions llvm::MachO::CPU_TYPE_ARM turning them into compile 5913 // errors. So we need to use the actual underlying types of task_t and 5914 // kern_return_t below. 5915 extern "C" unsigned int /*task_t*/ mach_task_self(); 5916 5917 void ObjectFileMachO::GetLLDBSharedCacheUUID(addr_t &base_addr, UUID &uuid) { 5918 uuid.Clear(); 5919 base_addr = LLDB_INVALID_ADDRESS; 5920 5921 #if defined(__APPLE__) 5922 uint8_t *(*dyld_get_all_image_infos)(void); 5923 dyld_get_all_image_infos = 5924 (uint8_t * (*)()) dlsym(RTLD_DEFAULT, "_dyld_get_all_image_infos"); 5925 if (dyld_get_all_image_infos) { 5926 uint8_t *dyld_all_image_infos_address = dyld_get_all_image_infos(); 5927 if (dyld_all_image_infos_address) { 5928 uint32_t *version = (uint32_t *) 5929 dyld_all_image_infos_address; // version <mach-o/dyld_images.h> 5930 if (*version >= 13) { 5931 uuid_t *sharedCacheUUID_address = 0; 5932 int wordsize = sizeof(uint8_t *); 5933 if (wordsize == 8) { 5934 sharedCacheUUID_address = 5935 (uuid_t *)((uint8_t *)dyld_all_image_infos_address + 5936 160); // sharedCacheUUID <mach-o/dyld_images.h> 5937 if (*version >= 15) 5938 base_addr = 5939 *(uint64_t 5940 *)((uint8_t *)dyld_all_image_infos_address + 5941 176); // sharedCacheBaseAddress <mach-o/dyld_images.h> 5942 } else { 5943 sharedCacheUUID_address = 5944 (uuid_t *)((uint8_t *)dyld_all_image_infos_address + 5945 84); // sharedCacheUUID <mach-o/dyld_images.h> 5946 if (*version >= 15) { 5947 base_addr = 0; 5948 base_addr = 5949 *(uint32_t 5950 *)((uint8_t *)dyld_all_image_infos_address + 5951 100); // sharedCacheBaseAddress <mach-o/dyld_images.h> 5952 } 5953 } 5954 uuid = UUID(sharedCacheUUID_address, sizeof(uuid_t)); 5955 } 5956 } 5957 } else { 5958 // Exists in macOS 10.12 and later, iOS 10.0 and later - dyld SPI 5959 dyld_process_info (*dyld_process_info_create)( 5960 unsigned int /* task_t */ task, uint64_t timestamp, 5961 unsigned int /*kern_return_t*/ *kernelError); 5962 void (*dyld_process_info_get_cache)(void *info, void *cacheInfo); 5963 void (*dyld_process_info_release)(dyld_process_info info); 5964 5965 dyld_process_info_create = (void *(*)(unsigned int /* task_t */, uint64_t, 5966 unsigned int /*kern_return_t*/ *)) 5967 dlsym(RTLD_DEFAULT, "_dyld_process_info_create"); 5968 dyld_process_info_get_cache = (void (*)(void *, void *))dlsym( 5969 RTLD_DEFAULT, "_dyld_process_info_get_cache"); 5970 dyld_process_info_release = 5971 (void (*)(void *))dlsym(RTLD_DEFAULT, "_dyld_process_info_release"); 5972 5973 if (dyld_process_info_create && dyld_process_info_get_cache) { 5974 unsigned int /*kern_return_t */ kern_ret; 5975 dyld_process_info process_info = 5976 dyld_process_info_create(::mach_task_self(), 0, &kern_ret); 5977 if (process_info) { 5978 struct lldb_copy__dyld_process_cache_info sc_info; 5979 memset(&sc_info, 0, sizeof(struct lldb_copy__dyld_process_cache_info)); 5980 dyld_process_info_get_cache(process_info, &sc_info); 5981 if (sc_info.cacheBaseAddress != 0) { 5982 base_addr = sc_info.cacheBaseAddress; 5983 uuid = UUID(sc_info.cacheUUID, sizeof(uuid_t)); 5984 } 5985 dyld_process_info_release(process_info); 5986 } 5987 } 5988 } 5989 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 5990 if (log && uuid.IsValid()) 5991 LLDB_LOGF(log, 5992 "lldb's in-memory shared cache has a UUID of %s base address of " 5993 "0x%" PRIx64, 5994 uuid.GetAsString().c_str(), base_addr); 5995 #endif 5996 } 5997 5998 static llvm::VersionTuple FindMinimumVersionInfo(DataExtractor &data, 5999 lldb::offset_t offset, 6000 size_t ncmds) { 6001 for (size_t i = 0; i < ncmds; i++) { 6002 const lldb::offset_t load_cmd_offset = offset; 6003 llvm::MachO::load_command lc = {}; 6004 if (data.GetU32(&offset, &lc.cmd, 2) == nullptr) 6005 break; 6006 6007 uint32_t version = 0; 6008 if (lc.cmd == llvm::MachO::LC_VERSION_MIN_MACOSX || 6009 lc.cmd == llvm::MachO::LC_VERSION_MIN_IPHONEOS || 6010 lc.cmd == llvm::MachO::LC_VERSION_MIN_TVOS || 6011 lc.cmd == llvm::MachO::LC_VERSION_MIN_WATCHOS) { 6012 // struct version_min_command { 6013 // uint32_t cmd; // LC_VERSION_MIN_* 6014 // uint32_t cmdsize; 6015 // uint32_t version; // X.Y.Z encoded in nibbles xxxx.yy.zz 6016 // uint32_t sdk; 6017 // }; 6018 // We want to read version. 6019 version = data.GetU32(&offset); 6020 } else if (lc.cmd == llvm::MachO::LC_BUILD_VERSION) { 6021 // struct build_version_command { 6022 // uint32_t cmd; // LC_BUILD_VERSION 6023 // uint32_t cmdsize; 6024 // uint32_t platform; 6025 // uint32_t minos; // X.Y.Z encoded in nibbles xxxx.yy.zz 6026 // uint32_t sdk; 6027 // uint32_t ntools; 6028 // }; 6029 // We want to read minos. 6030 offset += sizeof(uint32_t); // Skip over platform 6031 version = data.GetU32(&offset); // Extract minos 6032 } 6033 6034 if (version) { 6035 const uint32_t xxxx = version >> 16; 6036 const uint32_t yy = (version >> 8) & 0xffu; 6037 const uint32_t zz = version & 0xffu; 6038 if (xxxx) 6039 return llvm::VersionTuple(xxxx, yy, zz); 6040 } 6041 offset = load_cmd_offset + lc.cmdsize; 6042 } 6043 return llvm::VersionTuple(); 6044 } 6045 6046 llvm::VersionTuple ObjectFileMachO::GetMinimumOSVersion() { 6047 if (!m_min_os_version) 6048 m_min_os_version = FindMinimumVersionInfo( 6049 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds); 6050 return *m_min_os_version; 6051 } 6052 6053 llvm::VersionTuple ObjectFileMachO::GetSDKVersion() { 6054 if (!m_sdk_versions) 6055 m_sdk_versions = FindMinimumVersionInfo( 6056 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds); 6057 return *m_sdk_versions; 6058 } 6059 6060 bool ObjectFileMachO::GetIsDynamicLinkEditor() { 6061 return m_header.filetype == llvm::MachO::MH_DYLINKER; 6062 } 6063 6064 bool ObjectFileMachO::CanTrustAddressRanges() { 6065 // Dsymutil guarantees that the .debug_aranges accelerator is complete and can 6066 // be trusted by LLDB. 6067 return m_header.filetype == llvm::MachO::MH_DSYM; 6068 } 6069 6070 bool ObjectFileMachO::AllowAssemblyEmulationUnwindPlans() { 6071 return m_allow_assembly_emulation_unwind_plans; 6072 } 6073 6074 Section *ObjectFileMachO::GetMachHeaderSection() { 6075 // Find the first address of the mach header which is the first non-zero file 6076 // sized section whose file offset is zero. This is the base file address of 6077 // the mach-o file which can be subtracted from the vmaddr of the other 6078 // segments found in memory and added to the load address 6079 ModuleSP module_sp = GetModule(); 6080 if (!module_sp) 6081 return nullptr; 6082 SectionList *section_list = GetSectionList(); 6083 if (!section_list) 6084 return nullptr; 6085 6086 // Some binaries can have a TEXT segment with a non-zero file offset. 6087 // Binaries in the shared cache are one example. Some hand-generated 6088 // binaries may not be laid out in the normal TEXT,DATA,LC_SYMTAB order 6089 // in the file, even though they're laid out correctly in vmaddr terms. 6090 SectionSP text_segment_sp = 6091 section_list->FindSectionByName(GetSegmentNameTEXT()); 6092 if (text_segment_sp.get() && SectionIsLoadable(text_segment_sp.get())) 6093 return text_segment_sp.get(); 6094 6095 const size_t num_sections = section_list->GetSize(); 6096 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6097 Section *section = section_list->GetSectionAtIndex(sect_idx).get(); 6098 if (section->GetFileOffset() == 0 && SectionIsLoadable(section)) 6099 return section; 6100 } 6101 6102 return nullptr; 6103 } 6104 6105 bool ObjectFileMachO::SectionIsLoadable(const Section *section) { 6106 if (!section) 6107 return false; 6108 if (section->IsThreadSpecific()) 6109 return false; 6110 if (GetModule().get() != section->GetModule().get()) 6111 return false; 6112 // firmware style binaries with llvm gcov segment do 6113 // not have that segment mapped into memory. 6114 if (section->GetName() == GetSegmentNameLLVM_COV()) { 6115 const Strata strata = GetStrata(); 6116 if (strata == eStrataKernel || strata == eStrataRawImage) 6117 return false; 6118 } 6119 // Be careful with __LINKEDIT and __DWARF segments 6120 if (section->GetName() == GetSegmentNameLINKEDIT() || 6121 section->GetName() == GetSegmentNameDWARF()) { 6122 // Only map __LINKEDIT and __DWARF if we have an in memory image and 6123 // this isn't a kernel binary like a kext or mach_kernel. 6124 const bool is_memory_image = (bool)m_process_wp.lock(); 6125 const Strata strata = GetStrata(); 6126 if (is_memory_image == false || strata == eStrataKernel) 6127 return false; 6128 } 6129 return true; 6130 } 6131 6132 lldb::addr_t ObjectFileMachO::CalculateSectionLoadAddressForMemoryImage( 6133 lldb::addr_t header_load_address, const Section *header_section, 6134 const Section *section) { 6135 ModuleSP module_sp = GetModule(); 6136 if (module_sp && header_section && section && 6137 header_load_address != LLDB_INVALID_ADDRESS) { 6138 lldb::addr_t file_addr = header_section->GetFileAddress(); 6139 if (file_addr != LLDB_INVALID_ADDRESS && SectionIsLoadable(section)) 6140 return section->GetFileAddress() - file_addr + header_load_address; 6141 } 6142 return LLDB_INVALID_ADDRESS; 6143 } 6144 6145 bool ObjectFileMachO::SetLoadAddress(Target &target, lldb::addr_t value, 6146 bool value_is_offset) { 6147 Log *log(GetLog(LLDBLog::DynamicLoader)); 6148 ModuleSP module_sp = GetModule(); 6149 if (!module_sp) 6150 return false; 6151 6152 SectionList *section_list = GetSectionList(); 6153 if (!section_list) 6154 return false; 6155 6156 size_t num_loaded_sections = 0; 6157 const size_t num_sections = section_list->GetSize(); 6158 6159 // Warn if some top-level segments map to the same address. The binary may be 6160 // malformed. 6161 const bool warn_multiple = true; 6162 6163 if (log) { 6164 StreamString logmsg; 6165 logmsg << "ObjectFileMachO::SetLoadAddress "; 6166 if (GetFileSpec()) 6167 logmsg << "path='" << GetFileSpec().GetPath() << "' "; 6168 if (GetUUID()) { 6169 logmsg << "uuid=" << GetUUID().GetAsString(); 6170 } 6171 LLDB_LOGF(log, "%s", logmsg.GetData()); 6172 } 6173 if (value_is_offset) { 6174 // "value" is an offset to apply to each top level segment 6175 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6176 // Iterate through the object file sections to find all of the 6177 // sections that size on disk (to avoid __PAGEZERO) and load them 6178 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 6179 if (SectionIsLoadable(section_sp.get())) { 6180 LLDB_LOGF(log, 6181 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is " 6182 "0x%" PRIx64, 6183 section_sp->GetName().AsCString(), 6184 section_sp->GetFileAddress() + value); 6185 if (target.GetSectionLoadList().SetSectionLoadAddress( 6186 section_sp, section_sp->GetFileAddress() + value, 6187 warn_multiple)) 6188 ++num_loaded_sections; 6189 } 6190 } 6191 } else { 6192 // "value" is the new base address of the mach_header, adjust each 6193 // section accordingly 6194 6195 Section *mach_header_section = GetMachHeaderSection(); 6196 if (mach_header_section) { 6197 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6198 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 6199 6200 lldb::addr_t section_load_addr = 6201 CalculateSectionLoadAddressForMemoryImage( 6202 value, mach_header_section, section_sp.get()); 6203 if (section_load_addr != LLDB_INVALID_ADDRESS) { 6204 LLDB_LOGF(log, 6205 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is " 6206 "0x%" PRIx64, 6207 section_sp->GetName().AsCString(), section_load_addr); 6208 if (target.GetSectionLoadList().SetSectionLoadAddress( 6209 section_sp, section_load_addr, warn_multiple)) 6210 ++num_loaded_sections; 6211 } 6212 } 6213 } 6214 } 6215 return num_loaded_sections > 0; 6216 } 6217 6218 struct all_image_infos_header { 6219 uint32_t version; // currently 1 6220 uint32_t imgcount; // number of binary images 6221 uint64_t entries_fileoff; // file offset in the corefile of where the array of 6222 // struct entry's begin. 6223 uint32_t entries_size; // size of 'struct entry'. 6224 uint32_t unused; 6225 }; 6226 6227 struct image_entry { 6228 uint64_t filepath_offset; // offset in corefile to c-string of the file path, 6229 // UINT64_MAX if unavailable. 6230 uuid_t uuid; // uint8_t[16]. should be set to all zeroes if 6231 // uuid is unknown. 6232 uint64_t load_address; // UINT64_MAX if unknown. 6233 uint64_t seg_addrs_offset; // offset to the array of struct segment_vmaddr's. 6234 uint32_t segment_count; // The number of segments for this binary. 6235 uint32_t unused; 6236 6237 image_entry() { 6238 filepath_offset = UINT64_MAX; 6239 memset(&uuid, 0, sizeof(uuid_t)); 6240 segment_count = 0; 6241 load_address = UINT64_MAX; 6242 seg_addrs_offset = UINT64_MAX; 6243 unused = 0; 6244 } 6245 image_entry(const image_entry &rhs) { 6246 filepath_offset = rhs.filepath_offset; 6247 memcpy(&uuid, &rhs.uuid, sizeof(uuid_t)); 6248 segment_count = rhs.segment_count; 6249 seg_addrs_offset = rhs.seg_addrs_offset; 6250 load_address = rhs.load_address; 6251 unused = rhs.unused; 6252 } 6253 }; 6254 6255 struct segment_vmaddr { 6256 char segname[16]; 6257 uint64_t vmaddr; 6258 uint64_t unused; 6259 6260 segment_vmaddr() { 6261 memset(&segname, 0, 16); 6262 vmaddr = UINT64_MAX; 6263 unused = 0; 6264 } 6265 segment_vmaddr(const segment_vmaddr &rhs) { 6266 memcpy(&segname, &rhs.segname, 16); 6267 vmaddr = rhs.vmaddr; 6268 unused = rhs.unused; 6269 } 6270 }; 6271 6272 // Write the payload for the "all image infos" LC_NOTE into 6273 // the supplied all_image_infos_payload, assuming that this 6274 // will be written into the corefile starting at 6275 // initial_file_offset. 6276 // 6277 // The placement of this payload is a little tricky. We're 6278 // laying this out as 6279 // 6280 // 1. header (struct all_image_info_header) 6281 // 2. Array of fixed-size (struct image_entry)'s, one 6282 // per binary image present in the process. 6283 // 3. Arrays of (struct segment_vmaddr)'s, a varying number 6284 // for each binary image. 6285 // 4. Variable length c-strings of binary image filepaths, 6286 // one per binary. 6287 // 6288 // To compute where everything will be laid out in the 6289 // payload, we need to iterate over the images and calculate 6290 // how many segment_vmaddr structures each image will need, 6291 // and how long each image's filepath c-string is. There 6292 // are some multiple passes over the image list while calculating 6293 // everything. 6294 6295 static offset_t 6296 CreateAllImageInfosPayload(const lldb::ProcessSP &process_sp, 6297 offset_t initial_file_offset, 6298 StreamString &all_image_infos_payload, 6299 lldb_private::SaveCoreOptions &options) { 6300 Target &target = process_sp->GetTarget(); 6301 ModuleList modules = target.GetImages(); 6302 6303 // stack-only corefiles have no reason to include binaries that 6304 // are not executing; we're trying to make the smallest corefile 6305 // we can, so leave the rest out. 6306 if (options.GetStyle() == SaveCoreStyle::eSaveCoreStackOnly) 6307 modules.Clear(); 6308 6309 std::set<std::string> executing_uuids; 6310 std::vector<ThreadSP> thread_list = 6311 process_sp->CalculateCoreFileThreadList(options); 6312 for (const ThreadSP &thread_sp : thread_list) { 6313 uint32_t stack_frame_count = thread_sp->GetStackFrameCount(); 6314 for (uint32_t j = 0; j < stack_frame_count; j++) { 6315 StackFrameSP stack_frame_sp = thread_sp->GetStackFrameAtIndex(j); 6316 Address pc = stack_frame_sp->GetFrameCodeAddress(); 6317 ModuleSP module_sp = pc.GetModule(); 6318 if (module_sp) { 6319 UUID uuid = module_sp->GetUUID(); 6320 if (uuid.IsValid()) { 6321 executing_uuids.insert(uuid.GetAsString()); 6322 modules.AppendIfNeeded(module_sp); 6323 } 6324 } 6325 } 6326 } 6327 size_t modules_count = modules.GetSize(); 6328 6329 struct all_image_infos_header infos; 6330 infos.version = 1; 6331 infos.imgcount = modules_count; 6332 infos.entries_size = sizeof(image_entry); 6333 infos.entries_fileoff = initial_file_offset + sizeof(all_image_infos_header); 6334 infos.unused = 0; 6335 6336 all_image_infos_payload.PutHex32(infos.version); 6337 all_image_infos_payload.PutHex32(infos.imgcount); 6338 all_image_infos_payload.PutHex64(infos.entries_fileoff); 6339 all_image_infos_payload.PutHex32(infos.entries_size); 6340 all_image_infos_payload.PutHex32(infos.unused); 6341 6342 // First create the structures for all of the segment name+vmaddr vectors 6343 // for each module, so we will know the size of them as we add the 6344 // module entries. 6345 std::vector<std::vector<segment_vmaddr>> modules_segment_vmaddrs; 6346 for (size_t i = 0; i < modules_count; i++) { 6347 ModuleSP module = modules.GetModuleAtIndex(i); 6348 6349 SectionList *sections = module->GetSectionList(); 6350 size_t sections_count = sections->GetSize(); 6351 std::vector<segment_vmaddr> segment_vmaddrs; 6352 for (size_t j = 0; j < sections_count; j++) { 6353 SectionSP section = sections->GetSectionAtIndex(j); 6354 if (!section->GetParent().get()) { 6355 addr_t vmaddr = section->GetLoadBaseAddress(&target); 6356 if (vmaddr == LLDB_INVALID_ADDRESS) 6357 continue; 6358 ConstString name = section->GetName(); 6359 segment_vmaddr seg_vmaddr; 6360 // This is the uncommon case where strncpy is exactly 6361 // the right one, doesn't need to be nul terminated. 6362 // The segment name in a Mach-O LC_SEGMENT/LC_SEGMENT_64 is char[16] and 6363 // is not guaranteed to be nul-terminated if all 16 characters are 6364 // used. 6365 // coverity[buffer_size_warning] 6366 strncpy(seg_vmaddr.segname, name.AsCString(), 6367 sizeof(seg_vmaddr.segname)); 6368 seg_vmaddr.vmaddr = vmaddr; 6369 seg_vmaddr.unused = 0; 6370 segment_vmaddrs.push_back(seg_vmaddr); 6371 } 6372 } 6373 modules_segment_vmaddrs.push_back(segment_vmaddrs); 6374 } 6375 6376 offset_t size_of_vmaddr_structs = 0; 6377 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) { 6378 size_of_vmaddr_structs += 6379 modules_segment_vmaddrs[i].size() * sizeof(segment_vmaddr); 6380 } 6381 6382 offset_t size_of_filepath_cstrings = 0; 6383 for (size_t i = 0; i < modules_count; i++) { 6384 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6385 size_of_filepath_cstrings += module_sp->GetFileSpec().GetPath().size() + 1; 6386 } 6387 6388 // Calculate the file offsets of our "all image infos" payload in the 6389 // corefile. initial_file_offset the original value passed in to this method. 6390 6391 offset_t start_of_entries = 6392 initial_file_offset + sizeof(all_image_infos_header); 6393 offset_t start_of_seg_vmaddrs = 6394 start_of_entries + sizeof(image_entry) * modules_count; 6395 offset_t start_of_filenames = start_of_seg_vmaddrs + size_of_vmaddr_structs; 6396 6397 offset_t final_file_offset = start_of_filenames + size_of_filepath_cstrings; 6398 6399 // Now write the one-per-module 'struct image_entry' into the 6400 // StringStream; keep track of where the struct segment_vmaddr 6401 // entries for each module will end up in the corefile. 6402 6403 offset_t current_string_offset = start_of_filenames; 6404 offset_t current_segaddrs_offset = start_of_seg_vmaddrs; 6405 std::vector<struct image_entry> image_entries; 6406 for (size_t i = 0; i < modules_count; i++) { 6407 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6408 6409 struct image_entry ent; 6410 memcpy(&ent.uuid, module_sp->GetUUID().GetBytes().data(), sizeof(ent.uuid)); 6411 if (modules_segment_vmaddrs[i].size() > 0) { 6412 ent.segment_count = modules_segment_vmaddrs[i].size(); 6413 ent.seg_addrs_offset = current_segaddrs_offset; 6414 } 6415 ent.filepath_offset = current_string_offset; 6416 ObjectFile *objfile = module_sp->GetObjectFile(); 6417 if (objfile) { 6418 Address base_addr(objfile->GetBaseAddress()); 6419 if (base_addr.IsValid()) { 6420 ent.load_address = base_addr.GetLoadAddress(&target); 6421 } 6422 } 6423 6424 all_image_infos_payload.PutHex64(ent.filepath_offset); 6425 all_image_infos_payload.PutRawBytes(ent.uuid, sizeof(ent.uuid)); 6426 all_image_infos_payload.PutHex64(ent.load_address); 6427 all_image_infos_payload.PutHex64(ent.seg_addrs_offset); 6428 all_image_infos_payload.PutHex32(ent.segment_count); 6429 6430 if (executing_uuids.find(module_sp->GetUUID().GetAsString()) != 6431 executing_uuids.end()) 6432 all_image_infos_payload.PutHex32(1); 6433 else 6434 all_image_infos_payload.PutHex32(0); 6435 6436 current_segaddrs_offset += ent.segment_count * sizeof(segment_vmaddr); 6437 current_string_offset += module_sp->GetFileSpec().GetPath().size() + 1; 6438 } 6439 6440 // Now write the struct segment_vmaddr entries into the StringStream. 6441 6442 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) { 6443 if (modules_segment_vmaddrs[i].size() == 0) 6444 continue; 6445 for (struct segment_vmaddr segvm : modules_segment_vmaddrs[i]) { 6446 all_image_infos_payload.PutRawBytes(segvm.segname, sizeof(segvm.segname)); 6447 all_image_infos_payload.PutHex64(segvm.vmaddr); 6448 all_image_infos_payload.PutHex64(segvm.unused); 6449 } 6450 } 6451 6452 for (size_t i = 0; i < modules_count; i++) { 6453 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6454 std::string filepath = module_sp->GetFileSpec().GetPath(); 6455 all_image_infos_payload.PutRawBytes(filepath.data(), filepath.size() + 1); 6456 } 6457 6458 return final_file_offset; 6459 } 6460 6461 // Temp struct used to combine contiguous memory regions with 6462 // identical permissions. 6463 struct page_object { 6464 addr_t addr; 6465 addr_t size; 6466 uint32_t prot; 6467 }; 6468 6469 bool ObjectFileMachO::SaveCore(const lldb::ProcessSP &process_sp, 6470 lldb_private::SaveCoreOptions &options, 6471 Status &error) { 6472 // The FileSpec and Process are already checked in PluginManager::SaveCore. 6473 assert(options.GetOutputFile().has_value()); 6474 assert(process_sp); 6475 const FileSpec outfile = options.GetOutputFile().value(); 6476 6477 // MachO defaults to dirty pages 6478 if (options.GetStyle() == SaveCoreStyle::eSaveCoreUnspecified) 6479 options.SetStyle(eSaveCoreDirtyOnly); 6480 6481 Target &target = process_sp->GetTarget(); 6482 const ArchSpec target_arch = target.GetArchitecture(); 6483 const llvm::Triple &target_triple = target_arch.GetTriple(); 6484 if (target_triple.getVendor() == llvm::Triple::Apple && 6485 (target_triple.getOS() == llvm::Triple::MacOSX || 6486 target_triple.getOS() == llvm::Triple::IOS || 6487 target_triple.getOS() == llvm::Triple::WatchOS || 6488 target_triple.getOS() == llvm::Triple::TvOS || 6489 target_triple.getOS() == llvm::Triple::XROS)) { 6490 // NEED_BRIDGEOS_TRIPLE target_triple.getOS() == llvm::Triple::BridgeOS)) 6491 // { 6492 bool make_core = false; 6493 switch (target_arch.GetMachine()) { 6494 case llvm::Triple::aarch64: 6495 case llvm::Triple::aarch64_32: 6496 case llvm::Triple::arm: 6497 case llvm::Triple::thumb: 6498 case llvm::Triple::x86: 6499 case llvm::Triple::x86_64: 6500 make_core = true; 6501 break; 6502 default: 6503 error = Status::FromErrorStringWithFormat( 6504 "unsupported core architecture: %s", target_triple.str().c_str()); 6505 break; 6506 } 6507 6508 if (make_core) { 6509 CoreFileMemoryRanges core_ranges; 6510 error = process_sp->CalculateCoreFileSaveRanges(options, core_ranges); 6511 if (error.Success()) { 6512 const uint32_t addr_byte_size = target_arch.GetAddressByteSize(); 6513 const ByteOrder byte_order = target_arch.GetByteOrder(); 6514 std::vector<llvm::MachO::segment_command_64> segment_load_commands; 6515 for (const auto &core_range_info : core_ranges) { 6516 // TODO: Refactor RangeDataVector to have a data iterator. 6517 const auto &core_range = core_range_info.data; 6518 uint32_t cmd_type = LC_SEGMENT_64; 6519 uint32_t segment_size = sizeof(llvm::MachO::segment_command_64); 6520 if (addr_byte_size == 4) { 6521 cmd_type = LC_SEGMENT; 6522 segment_size = sizeof(llvm::MachO::segment_command); 6523 } 6524 // Skip any ranges with no read/write/execute permissions and empty 6525 // ranges. 6526 if (core_range.lldb_permissions == 0 || core_range.range.size() == 0) 6527 continue; 6528 uint32_t vm_prot = 0; 6529 if (core_range.lldb_permissions & ePermissionsReadable) 6530 vm_prot |= VM_PROT_READ; 6531 if (core_range.lldb_permissions & ePermissionsWritable) 6532 vm_prot |= VM_PROT_WRITE; 6533 if (core_range.lldb_permissions & ePermissionsExecutable) 6534 vm_prot |= VM_PROT_EXECUTE; 6535 const addr_t vm_addr = core_range.range.start(); 6536 const addr_t vm_size = core_range.range.size(); 6537 llvm::MachO::segment_command_64 segment = { 6538 cmd_type, // uint32_t cmd; 6539 segment_size, // uint32_t cmdsize; 6540 {0}, // char segname[16]; 6541 vm_addr, // uint64_t vmaddr; // uint32_t for 32-bit Mach-O 6542 vm_size, // uint64_t vmsize; // uint32_t for 32-bit Mach-O 6543 0, // uint64_t fileoff; // uint32_t for 32-bit Mach-O 6544 vm_size, // uint64_t filesize; // uint32_t for 32-bit Mach-O 6545 vm_prot, // uint32_t maxprot; 6546 vm_prot, // uint32_t initprot; 6547 0, // uint32_t nsects; 6548 0}; // uint32_t flags; 6549 segment_load_commands.push_back(segment); 6550 } 6551 6552 StreamString buffer(Stream::eBinary, addr_byte_size, byte_order); 6553 6554 llvm::MachO::mach_header_64 mach_header; 6555 mach_header.magic = addr_byte_size == 8 ? MH_MAGIC_64 : MH_MAGIC; 6556 mach_header.cputype = target_arch.GetMachOCPUType(); 6557 mach_header.cpusubtype = target_arch.GetMachOCPUSubType(); 6558 mach_header.filetype = MH_CORE; 6559 mach_header.ncmds = segment_load_commands.size(); 6560 mach_header.flags = 0; 6561 mach_header.reserved = 0; 6562 ThreadList &thread_list = process_sp->GetThreadList(); 6563 const uint32_t num_threads = thread_list.GetSize(); 6564 6565 // Make an array of LC_THREAD data items. Each one contains the 6566 // contents of the LC_THREAD load command. The data doesn't contain 6567 // the load command + load command size, we will add the load command 6568 // and load command size as we emit the data. 6569 std::vector<StreamString> LC_THREAD_datas(num_threads); 6570 for (auto &LC_THREAD_data : LC_THREAD_datas) { 6571 LC_THREAD_data.GetFlags().Set(Stream::eBinary); 6572 LC_THREAD_data.SetAddressByteSize(addr_byte_size); 6573 LC_THREAD_data.SetByteOrder(byte_order); 6574 } 6575 for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { 6576 ThreadSP thread_sp(thread_list.GetThreadAtIndex(thread_idx)); 6577 if (thread_sp) { 6578 switch (mach_header.cputype) { 6579 case llvm::MachO::CPU_TYPE_ARM64: 6580 case llvm::MachO::CPU_TYPE_ARM64_32: 6581 RegisterContextDarwin_arm64_Mach::Create_LC_THREAD( 6582 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6583 break; 6584 6585 case llvm::MachO::CPU_TYPE_ARM: 6586 RegisterContextDarwin_arm_Mach::Create_LC_THREAD( 6587 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6588 break; 6589 6590 case llvm::MachO::CPU_TYPE_I386: 6591 RegisterContextDarwin_i386_Mach::Create_LC_THREAD( 6592 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6593 break; 6594 6595 case llvm::MachO::CPU_TYPE_X86_64: 6596 RegisterContextDarwin_x86_64_Mach::Create_LC_THREAD( 6597 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6598 break; 6599 } 6600 } 6601 } 6602 6603 // The size of the load command is the size of the segments... 6604 if (addr_byte_size == 8) { 6605 mach_header.sizeofcmds = segment_load_commands.size() * 6606 sizeof(llvm::MachO::segment_command_64); 6607 } else { 6608 mach_header.sizeofcmds = segment_load_commands.size() * 6609 sizeof(llvm::MachO::segment_command); 6610 } 6611 6612 // and the size of all LC_THREAD load command 6613 for (const auto &LC_THREAD_data : LC_THREAD_datas) { 6614 ++mach_header.ncmds; 6615 mach_header.sizeofcmds += 8 + LC_THREAD_data.GetSize(); 6616 } 6617 6618 // Bits will be set to indicate which bits are NOT used in 6619 // addressing in this process or 0 for unknown. 6620 uint64_t address_mask = process_sp->GetCodeAddressMask(); 6621 if (address_mask != LLDB_INVALID_ADDRESS_MASK) { 6622 // LC_NOTE "addrable bits" 6623 mach_header.ncmds++; 6624 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6625 } 6626 6627 // LC_NOTE "process metadata" 6628 mach_header.ncmds++; 6629 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6630 6631 // LC_NOTE "all image infos" 6632 mach_header.ncmds++; 6633 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6634 6635 // Write the mach header 6636 buffer.PutHex32(mach_header.magic); 6637 buffer.PutHex32(mach_header.cputype); 6638 buffer.PutHex32(mach_header.cpusubtype); 6639 buffer.PutHex32(mach_header.filetype); 6640 buffer.PutHex32(mach_header.ncmds); 6641 buffer.PutHex32(mach_header.sizeofcmds); 6642 buffer.PutHex32(mach_header.flags); 6643 if (addr_byte_size == 8) { 6644 buffer.PutHex32(mach_header.reserved); 6645 } 6646 6647 // Skip the mach header and all load commands and align to the next 6648 // 0x1000 byte boundary 6649 addr_t file_offset = buffer.GetSize() + mach_header.sizeofcmds; 6650 6651 file_offset = llvm::alignTo(file_offset, 16); 6652 std::vector<std::unique_ptr<LCNoteEntry>> lc_notes; 6653 6654 // Add "addrable bits" LC_NOTE when an address mask is available 6655 if (address_mask != LLDB_INVALID_ADDRESS_MASK) { 6656 std::unique_ptr<LCNoteEntry> addrable_bits_lcnote_up( 6657 new LCNoteEntry(addr_byte_size, byte_order)); 6658 addrable_bits_lcnote_up->name = "addrable bits"; 6659 addrable_bits_lcnote_up->payload_file_offset = file_offset; 6660 int bits = std::bitset<64>(~address_mask).count(); 6661 addrable_bits_lcnote_up->payload.PutHex32(4); // version 6662 addrable_bits_lcnote_up->payload.PutHex32( 6663 bits); // # of bits used for low addresses 6664 addrable_bits_lcnote_up->payload.PutHex32( 6665 bits); // # of bits used for high addresses 6666 addrable_bits_lcnote_up->payload.PutHex32(0); // reserved 6667 6668 file_offset += addrable_bits_lcnote_up->payload.GetSize(); 6669 6670 lc_notes.push_back(std::move(addrable_bits_lcnote_up)); 6671 } 6672 6673 // Add "process metadata" LC_NOTE 6674 std::unique_ptr<LCNoteEntry> thread_extrainfo_lcnote_up( 6675 new LCNoteEntry(addr_byte_size, byte_order)); 6676 thread_extrainfo_lcnote_up->name = "process metadata"; 6677 thread_extrainfo_lcnote_up->payload_file_offset = file_offset; 6678 6679 StructuredData::DictionarySP dict( 6680 std::make_shared<StructuredData::Dictionary>()); 6681 StructuredData::ArraySP threads( 6682 std::make_shared<StructuredData::Array>()); 6683 for (const ThreadSP &thread_sp : 6684 process_sp->CalculateCoreFileThreadList(options)) { 6685 StructuredData::DictionarySP thread( 6686 std::make_shared<StructuredData::Dictionary>()); 6687 thread->AddIntegerItem("thread_id", thread_sp->GetID()); 6688 threads->AddItem(thread); 6689 } 6690 dict->AddItem("threads", threads); 6691 StreamString strm; 6692 dict->Dump(strm, /* pretty */ false); 6693 thread_extrainfo_lcnote_up->payload.PutRawBytes(strm.GetData(), 6694 strm.GetSize()); 6695 6696 file_offset += thread_extrainfo_lcnote_up->payload.GetSize(); 6697 file_offset = llvm::alignTo(file_offset, 16); 6698 lc_notes.push_back(std::move(thread_extrainfo_lcnote_up)); 6699 6700 // Add "all image infos" LC_NOTE 6701 std::unique_ptr<LCNoteEntry> all_image_infos_lcnote_up( 6702 new LCNoteEntry(addr_byte_size, byte_order)); 6703 all_image_infos_lcnote_up->name = "all image infos"; 6704 all_image_infos_lcnote_up->payload_file_offset = file_offset; 6705 file_offset = CreateAllImageInfosPayload( 6706 process_sp, file_offset, all_image_infos_lcnote_up->payload, 6707 options); 6708 lc_notes.push_back(std::move(all_image_infos_lcnote_up)); 6709 6710 // Add LC_NOTE load commands 6711 for (auto &lcnote : lc_notes) { 6712 // Add the LC_NOTE load command to the file. 6713 buffer.PutHex32(LC_NOTE); 6714 buffer.PutHex32(sizeof(llvm::MachO::note_command)); 6715 char namebuf[16]; 6716 memset(namebuf, 0, sizeof(namebuf)); 6717 // This is the uncommon case where strncpy is exactly 6718 // the right one, doesn't need to be nul terminated. 6719 // LC_NOTE name field is char[16] and is not guaranteed to be 6720 // nul-terminated. 6721 // coverity[buffer_size_warning] 6722 strncpy(namebuf, lcnote->name.c_str(), sizeof(namebuf)); 6723 buffer.PutRawBytes(namebuf, sizeof(namebuf)); 6724 buffer.PutHex64(lcnote->payload_file_offset); 6725 buffer.PutHex64(lcnote->payload.GetSize()); 6726 } 6727 6728 // Align to 4096-byte page boundary for the LC_SEGMENTs. 6729 file_offset = llvm::alignTo(file_offset, 4096); 6730 6731 for (auto &segment : segment_load_commands) { 6732 segment.fileoff = file_offset; 6733 file_offset += segment.filesize; 6734 } 6735 6736 // Write out all of the LC_THREAD load commands 6737 for (const auto &LC_THREAD_data : LC_THREAD_datas) { 6738 const size_t LC_THREAD_data_size = LC_THREAD_data.GetSize(); 6739 buffer.PutHex32(LC_THREAD); 6740 buffer.PutHex32(8 + LC_THREAD_data_size); // cmd + cmdsize + data 6741 buffer.Write(LC_THREAD_data.GetString().data(), LC_THREAD_data_size); 6742 } 6743 6744 // Write out all of the segment load commands 6745 for (const auto &segment : segment_load_commands) { 6746 buffer.PutHex32(segment.cmd); 6747 buffer.PutHex32(segment.cmdsize); 6748 buffer.PutRawBytes(segment.segname, sizeof(segment.segname)); 6749 if (addr_byte_size == 8) { 6750 buffer.PutHex64(segment.vmaddr); 6751 buffer.PutHex64(segment.vmsize); 6752 buffer.PutHex64(segment.fileoff); 6753 buffer.PutHex64(segment.filesize); 6754 } else { 6755 buffer.PutHex32(static_cast<uint32_t>(segment.vmaddr)); 6756 buffer.PutHex32(static_cast<uint32_t>(segment.vmsize)); 6757 buffer.PutHex32(static_cast<uint32_t>(segment.fileoff)); 6758 buffer.PutHex32(static_cast<uint32_t>(segment.filesize)); 6759 } 6760 buffer.PutHex32(segment.maxprot); 6761 buffer.PutHex32(segment.initprot); 6762 buffer.PutHex32(segment.nsects); 6763 buffer.PutHex32(segment.flags); 6764 } 6765 6766 std::string core_file_path(outfile.GetPath()); 6767 auto core_file = FileSystem::Instance().Open( 6768 outfile, File::eOpenOptionWriteOnly | File::eOpenOptionTruncate | 6769 File::eOpenOptionCanCreate); 6770 if (!core_file) { 6771 error = Status::FromError(core_file.takeError()); 6772 } else { 6773 // Read 1 page at a time 6774 uint8_t bytes[0x1000]; 6775 // Write the mach header and load commands out to the core file 6776 size_t bytes_written = buffer.GetString().size(); 6777 error = 6778 core_file.get()->Write(buffer.GetString().data(), bytes_written); 6779 if (error.Success()) { 6780 6781 for (auto &lcnote : lc_notes) { 6782 if (core_file.get()->SeekFromStart(lcnote->payload_file_offset) == 6783 -1) { 6784 error = Status::FromErrorStringWithFormat( 6785 "Unable to seek to corefile pos " 6786 "to write '%s' LC_NOTE payload", 6787 lcnote->name.c_str()); 6788 return false; 6789 } 6790 bytes_written = lcnote->payload.GetSize(); 6791 error = core_file.get()->Write(lcnote->payload.GetData(), 6792 bytes_written); 6793 if (!error.Success()) 6794 return false; 6795 } 6796 6797 // Now write the file data for all memory segments in the process 6798 for (const auto &segment : segment_load_commands) { 6799 if (core_file.get()->SeekFromStart(segment.fileoff) == -1) { 6800 error = Status::FromErrorStringWithFormat( 6801 "unable to seek to offset 0x%" PRIx64 " in '%s'", 6802 segment.fileoff, core_file_path.c_str()); 6803 break; 6804 } 6805 6806 target.GetDebugger().GetAsyncOutputStream()->Printf( 6807 "Saving %" PRId64 6808 " bytes of data for memory region at 0x%" PRIx64 "\n", 6809 segment.vmsize, segment.vmaddr); 6810 addr_t bytes_left = segment.vmsize; 6811 addr_t addr = segment.vmaddr; 6812 Status memory_read_error; 6813 while (bytes_left > 0 && error.Success()) { 6814 const size_t bytes_to_read = 6815 bytes_left > sizeof(bytes) ? sizeof(bytes) : bytes_left; 6816 6817 // In a savecore setting, we don't really care about caching, 6818 // as the data is dumped and very likely never read again, 6819 // so we call ReadMemoryFromInferior to bypass it. 6820 const size_t bytes_read = process_sp->ReadMemoryFromInferior( 6821 addr, bytes, bytes_to_read, memory_read_error); 6822 6823 if (bytes_read == bytes_to_read) { 6824 size_t bytes_written = bytes_read; 6825 error = core_file.get()->Write(bytes, bytes_written); 6826 bytes_left -= bytes_read; 6827 addr += bytes_read; 6828 } else { 6829 // Some pages within regions are not readable, those should 6830 // be zero filled 6831 memset(bytes, 0, bytes_to_read); 6832 size_t bytes_written = bytes_to_read; 6833 error = core_file.get()->Write(bytes, bytes_written); 6834 bytes_left -= bytes_to_read; 6835 addr += bytes_to_read; 6836 } 6837 } 6838 } 6839 } 6840 } 6841 } 6842 } 6843 return true; // This is the right plug to handle saving core files for 6844 // this process 6845 } 6846 return false; 6847 } 6848 6849 ObjectFileMachO::MachOCorefileAllImageInfos 6850 ObjectFileMachO::GetCorefileAllImageInfos() { 6851 MachOCorefileAllImageInfos image_infos; 6852 Log *log(GetLog(LLDBLog::Object | LLDBLog::Symbols | LLDBLog::Process | 6853 LLDBLog::DynamicLoader)); 6854 6855 auto lc_notes = FindLC_NOTEByName("all image infos"); 6856 for (auto lc_note : lc_notes) { 6857 offset_t payload_offset = std::get<0>(lc_note); 6858 // Read the struct all_image_infos_header. 6859 uint32_t version = m_data.GetU32(&payload_offset); 6860 if (version != 1) { 6861 return image_infos; 6862 } 6863 uint32_t imgcount = m_data.GetU32(&payload_offset); 6864 uint64_t entries_fileoff = m_data.GetU64(&payload_offset); 6865 // 'entries_size' is not used, nor is the 'unused' entry. 6866 // offset += 4; // uint32_t entries_size; 6867 // offset += 4; // uint32_t unused; 6868 6869 LLDB_LOGF(log, "LC_NOTE 'all image infos' found version %d with %d images", 6870 version, imgcount); 6871 payload_offset = entries_fileoff; 6872 for (uint32_t i = 0; i < imgcount; i++) { 6873 // Read the struct image_entry. 6874 offset_t filepath_offset = m_data.GetU64(&payload_offset); 6875 uuid_t uuid; 6876 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)), 6877 sizeof(uuid_t)); 6878 uint64_t load_address = m_data.GetU64(&payload_offset); 6879 offset_t seg_addrs_offset = m_data.GetU64(&payload_offset); 6880 uint32_t segment_count = m_data.GetU32(&payload_offset); 6881 uint32_t currently_executing = m_data.GetU32(&payload_offset); 6882 6883 MachOCorefileImageEntry image_entry; 6884 image_entry.filename = (const char *)m_data.GetCStr(&filepath_offset); 6885 image_entry.uuid = UUID(uuid, sizeof(uuid_t)); 6886 image_entry.load_address = load_address; 6887 image_entry.currently_executing = currently_executing; 6888 6889 offset_t seg_vmaddrs_offset = seg_addrs_offset; 6890 for (uint32_t j = 0; j < segment_count; j++) { 6891 char segname[17]; 6892 m_data.CopyData(seg_vmaddrs_offset, 16, segname); 6893 segname[16] = '\0'; 6894 seg_vmaddrs_offset += 16; 6895 uint64_t vmaddr = m_data.GetU64(&seg_vmaddrs_offset); 6896 seg_vmaddrs_offset += 8; /* unused */ 6897 6898 std::tuple<ConstString, addr_t> new_seg{ConstString(segname), vmaddr}; 6899 image_entry.segment_load_addresses.push_back(new_seg); 6900 } 6901 LLDB_LOGF(log, " image entry: %s %s 0x%" PRIx64 " %s", 6902 image_entry.filename.c_str(), 6903 image_entry.uuid.GetAsString().c_str(), 6904 image_entry.load_address, 6905 image_entry.currently_executing ? "currently executing" 6906 : "not currently executing"); 6907 image_infos.all_image_infos.push_back(image_entry); 6908 } 6909 } 6910 6911 lc_notes = FindLC_NOTEByName("load binary"); 6912 for (auto lc_note : lc_notes) { 6913 offset_t payload_offset = std::get<0>(lc_note); 6914 uint32_t version = m_data.GetU32(&payload_offset); 6915 if (version == 1) { 6916 uuid_t uuid; 6917 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)), 6918 sizeof(uuid_t)); 6919 uint64_t load_address = m_data.GetU64(&payload_offset); 6920 uint64_t slide = m_data.GetU64(&payload_offset); 6921 std::string filename = m_data.GetCStr(&payload_offset); 6922 6923 MachOCorefileImageEntry image_entry; 6924 image_entry.filename = filename; 6925 image_entry.uuid = UUID(uuid, sizeof(uuid_t)); 6926 image_entry.load_address = load_address; 6927 image_entry.slide = slide; 6928 image_entry.currently_executing = true; 6929 image_infos.all_image_infos.push_back(image_entry); 6930 LLDB_LOGF(log, 6931 "LC_NOTE 'load binary' found, filename %s uuid %s load " 6932 "address 0x%" PRIx64 " slide 0x%" PRIx64, 6933 filename.c_str(), 6934 image_entry.uuid.IsValid() 6935 ? image_entry.uuid.GetAsString().c_str() 6936 : "00000000-0000-0000-0000-000000000000", 6937 load_address, slide); 6938 } 6939 } 6940 6941 return image_infos; 6942 } 6943 6944 bool ObjectFileMachO::LoadCoreFileImages(lldb_private::Process &process) { 6945 MachOCorefileAllImageInfos image_infos = GetCorefileAllImageInfos(); 6946 Log *log = GetLog(LLDBLog::Object | LLDBLog::DynamicLoader); 6947 Status error; 6948 6949 bool found_platform_binary = false; 6950 ModuleList added_modules; 6951 for (MachOCorefileImageEntry &image : image_infos.all_image_infos) { 6952 ModuleSP module_sp, local_filesystem_module_sp; 6953 6954 // If this is a platform binary, it has been loaded (or registered with 6955 // the DynamicLoader to be loaded), we don't need to do any further 6956 // processing. We're not going to call ModulesDidLoad on this in this 6957 // method, so notify==true. 6958 if (process.GetTarget() 6959 .GetDebugger() 6960 .GetPlatformList() 6961 .LoadPlatformBinaryAndSetup(&process, image.load_address, 6962 true /* notify */)) { 6963 LLDB_LOGF(log, 6964 "ObjectFileMachO::%s binary at 0x%" PRIx64 6965 " is a platform binary, has been handled by a Platform plugin.", 6966 __FUNCTION__, image.load_address); 6967 continue; 6968 } 6969 6970 bool value_is_offset = image.load_address == LLDB_INVALID_ADDRESS; 6971 uint64_t value = value_is_offset ? image.slide : image.load_address; 6972 if (value_is_offset && value == LLDB_INVALID_ADDRESS) { 6973 // We have neither address nor slide; so we will find the binary 6974 // by UUID and load it at slide/offset 0. 6975 value = 0; 6976 } 6977 6978 // We have either a UUID, or we have a load address which 6979 // and can try to read load commands and find a UUID. 6980 if (image.uuid.IsValid() || 6981 (!value_is_offset && value != LLDB_INVALID_ADDRESS)) { 6982 const bool set_load_address = image.segment_load_addresses.size() == 0; 6983 const bool notify = false; 6984 // Userland Darwin binaries will have segment load addresses via 6985 // the `all image infos` LC_NOTE. 6986 const bool allow_memory_image_last_resort = 6987 image.segment_load_addresses.size(); 6988 module_sp = DynamicLoader::LoadBinaryWithUUIDAndAddress( 6989 &process, image.filename, image.uuid, value, value_is_offset, 6990 image.currently_executing, notify, set_load_address, 6991 allow_memory_image_last_resort); 6992 } 6993 6994 // We have a ModuleSP to load in the Target. Load it at the 6995 // correct address/slide and notify/load scripting resources. 6996 if (module_sp) { 6997 added_modules.Append(module_sp, false /* notify */); 6998 6999 // We have a list of segment load address 7000 if (image.segment_load_addresses.size() > 0) { 7001 if (log) { 7002 std::string uuidstr = image.uuid.GetAsString(); 7003 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' " 7004 "UUID %s with section load addresses", 7005 module_sp->GetFileSpec().GetPath().c_str(), 7006 uuidstr.c_str()); 7007 } 7008 for (auto name_vmaddr_tuple : image.segment_load_addresses) { 7009 SectionList *sectlist = module_sp->GetObjectFile()->GetSectionList(); 7010 if (sectlist) { 7011 SectionSP sect_sp = 7012 sectlist->FindSectionByName(std::get<0>(name_vmaddr_tuple)); 7013 if (sect_sp) { 7014 process.GetTarget().SetSectionLoadAddress( 7015 sect_sp, std::get<1>(name_vmaddr_tuple)); 7016 } 7017 } 7018 } 7019 } else { 7020 if (log) { 7021 std::string uuidstr = image.uuid.GetAsString(); 7022 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' " 7023 "UUID %s with %s 0x%" PRIx64, 7024 module_sp->GetFileSpec().GetPath().c_str(), 7025 uuidstr.c_str(), 7026 value_is_offset ? "slide" : "load address", value); 7027 } 7028 bool changed; 7029 module_sp->SetLoadAddress(process.GetTarget(), value, value_is_offset, 7030 changed); 7031 } 7032 } 7033 } 7034 if (added_modules.GetSize() > 0) { 7035 process.GetTarget().ModulesDidLoad(added_modules); 7036 process.Flush(); 7037 return true; 7038 } 7039 // Return true if the only binary we found was the platform binary, 7040 // and it was loaded outside the scope of this method. 7041 if (found_platform_binary) 7042 return true; 7043 7044 // No binaries. 7045 return false; 7046 } 7047