1 //===-- ObjectFileMachO.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/ScopeExit.h" 10 #include "llvm/ADT/StringRef.h" 11 12 #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h" 13 #include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h" 14 #include "Plugins/Process/Utility/RegisterContextDarwin_i386.h" 15 #include "Plugins/Process/Utility/RegisterContextDarwin_x86_64.h" 16 #include "lldb/Core/Debugger.h" 17 #include "lldb/Core/Module.h" 18 #include "lldb/Core/ModuleSpec.h" 19 #include "lldb/Core/PluginManager.h" 20 #include "lldb/Core/Progress.h" 21 #include "lldb/Core/Section.h" 22 #include "lldb/Host/Host.h" 23 #include "lldb/Symbol/DWARFCallFrameInfo.h" 24 #include "lldb/Symbol/ObjectFile.h" 25 #include "lldb/Target/DynamicLoader.h" 26 #include "lldb/Target/MemoryRegionInfo.h" 27 #include "lldb/Target/Platform.h" 28 #include "lldb/Target/Process.h" 29 #include "lldb/Target/SectionLoadList.h" 30 #include "lldb/Target/Target.h" 31 #include "lldb/Target/Thread.h" 32 #include "lldb/Target/ThreadList.h" 33 #include "lldb/Utility/ArchSpec.h" 34 #include "lldb/Utility/DataBuffer.h" 35 #include "lldb/Utility/FileSpec.h" 36 #include "lldb/Utility/FileSpecList.h" 37 #include "lldb/Utility/LLDBLog.h" 38 #include "lldb/Utility/Log.h" 39 #include "lldb/Utility/RangeMap.h" 40 #include "lldb/Utility/RegisterValue.h" 41 #include "lldb/Utility/Status.h" 42 #include "lldb/Utility/StreamString.h" 43 #include "lldb/Utility/Timer.h" 44 #include "lldb/Utility/UUID.h" 45 46 #include "lldb/Host/SafeMachO.h" 47 48 #include "llvm/ADT/DenseSet.h" 49 #include "llvm/Support/FormatVariadic.h" 50 #include "llvm/Support/MemoryBuffer.h" 51 52 #include "ObjectFileMachO.h" 53 54 #if defined(__APPLE__) 55 #include <TargetConditionals.h> 56 // GetLLDBSharedCacheUUID() needs to call dlsym() 57 #include <dlfcn.h> 58 #include <mach/mach_init.h> 59 #include <mach/vm_map.h> 60 #include <lldb/Host/SafeMachO.h> 61 #endif 62 63 #ifndef __APPLE__ 64 #include "lldb/Utility/AppleUuidCompatibility.h" 65 #else 66 #include <uuid/uuid.h> 67 #endif 68 69 #include <bitset> 70 #include <memory> 71 #include <optional> 72 73 // Unfortunately the signpost header pulls in the system MachO header, too. 74 #ifdef CPU_TYPE_ARM 75 #undef CPU_TYPE_ARM 76 #endif 77 #ifdef CPU_TYPE_ARM64 78 #undef CPU_TYPE_ARM64 79 #endif 80 #ifdef CPU_TYPE_ARM64_32 81 #undef CPU_TYPE_ARM64_32 82 #endif 83 #ifdef CPU_TYPE_I386 84 #undef CPU_TYPE_I386 85 #endif 86 #ifdef CPU_TYPE_X86_64 87 #undef CPU_TYPE_X86_64 88 #endif 89 #ifdef MH_DYLINKER 90 #undef MH_DYLINKER 91 #endif 92 #ifdef MH_OBJECT 93 #undef MH_OBJECT 94 #endif 95 #ifdef LC_VERSION_MIN_MACOSX 96 #undef LC_VERSION_MIN_MACOSX 97 #endif 98 #ifdef LC_VERSION_MIN_IPHONEOS 99 #undef LC_VERSION_MIN_IPHONEOS 100 #endif 101 #ifdef LC_VERSION_MIN_TVOS 102 #undef LC_VERSION_MIN_TVOS 103 #endif 104 #ifdef LC_VERSION_MIN_WATCHOS 105 #undef LC_VERSION_MIN_WATCHOS 106 #endif 107 #ifdef LC_BUILD_VERSION 108 #undef LC_BUILD_VERSION 109 #endif 110 #ifdef PLATFORM_MACOS 111 #undef PLATFORM_MACOS 112 #endif 113 #ifdef PLATFORM_MACCATALYST 114 #undef PLATFORM_MACCATALYST 115 #endif 116 #ifdef PLATFORM_IOS 117 #undef PLATFORM_IOS 118 #endif 119 #ifdef PLATFORM_IOSSIMULATOR 120 #undef PLATFORM_IOSSIMULATOR 121 #endif 122 #ifdef PLATFORM_TVOS 123 #undef PLATFORM_TVOS 124 #endif 125 #ifdef PLATFORM_TVOSSIMULATOR 126 #undef PLATFORM_TVOSSIMULATOR 127 #endif 128 #ifdef PLATFORM_WATCHOS 129 #undef PLATFORM_WATCHOS 130 #endif 131 #ifdef PLATFORM_WATCHOSSIMULATOR 132 #undef PLATFORM_WATCHOSSIMULATOR 133 #endif 134 135 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull 136 using namespace lldb; 137 using namespace lldb_private; 138 using namespace llvm::MachO; 139 140 static constexpr llvm::StringLiteral g_loader_path = "@loader_path"; 141 static constexpr llvm::StringLiteral g_executable_path = "@executable_path"; 142 143 LLDB_PLUGIN_DEFINE(ObjectFileMachO) 144 145 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name, 146 const char *alt_name, size_t reg_byte_size, 147 Stream &data) { 148 const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name); 149 if (reg_info == nullptr) 150 reg_info = reg_ctx->GetRegisterInfoByName(alt_name); 151 if (reg_info) { 152 lldb_private::RegisterValue reg_value; 153 if (reg_ctx->ReadRegister(reg_info, reg_value)) { 154 if (reg_info->byte_size >= reg_byte_size) 155 data.Write(reg_value.GetBytes(), reg_byte_size); 156 else { 157 data.Write(reg_value.GetBytes(), reg_info->byte_size); 158 for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i) 159 data.PutChar(0); 160 } 161 return; 162 } 163 } 164 // Just write zeros if all else fails 165 for (size_t i = 0; i < reg_byte_size; ++i) 166 data.PutChar(0); 167 } 168 169 class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 { 170 public: 171 RegisterContextDarwin_x86_64_Mach(lldb_private::Thread &thread, 172 const DataExtractor &data) 173 : RegisterContextDarwin_x86_64(thread, 0) { 174 SetRegisterDataFrom_LC_THREAD(data); 175 } 176 177 void InvalidateAllRegisters() override { 178 // Do nothing... registers are always valid... 179 } 180 181 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 182 lldb::offset_t offset = 0; 183 SetError(GPRRegSet, Read, -1); 184 SetError(FPURegSet, Read, -1); 185 SetError(EXCRegSet, Read, -1); 186 bool done = false; 187 188 while (!done) { 189 int flavor = data.GetU32(&offset); 190 if (flavor == 0) 191 done = true; 192 else { 193 uint32_t i; 194 uint32_t count = data.GetU32(&offset); 195 switch (flavor) { 196 case GPRRegSet: 197 for (i = 0; i < count; ++i) 198 (&gpr.rax)[i] = data.GetU64(&offset); 199 SetError(GPRRegSet, Read, 0); 200 done = true; 201 202 break; 203 case FPURegSet: 204 // TODO: fill in FPU regs.... 205 // SetError (FPURegSet, Read, -1); 206 done = true; 207 208 break; 209 case EXCRegSet: 210 exc.trapno = data.GetU32(&offset); 211 exc.err = data.GetU32(&offset); 212 exc.faultvaddr = data.GetU64(&offset); 213 SetError(EXCRegSet, Read, 0); 214 done = true; 215 break; 216 case 7: 217 case 8: 218 case 9: 219 // fancy flavors that encapsulate of the above flavors... 220 break; 221 222 default: 223 done = true; 224 break; 225 } 226 } 227 } 228 } 229 230 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 231 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 232 if (reg_ctx_sp) { 233 RegisterContext *reg_ctx = reg_ctx_sp.get(); 234 235 data.PutHex32(GPRRegSet); // Flavor 236 data.PutHex32(GPRWordCount); 237 PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data); 238 PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data); 239 PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data); 240 PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data); 241 PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data); 242 PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data); 243 PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data); 244 PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data); 245 PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data); 246 PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data); 247 PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data); 248 PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data); 249 PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data); 250 PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data); 251 PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data); 252 PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data); 253 PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data); 254 PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data); 255 PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data); 256 PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data); 257 PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data); 258 259 // // Write out the FPU registers 260 // const size_t fpu_byte_size = sizeof(FPU); 261 // size_t bytes_written = 0; 262 // data.PutHex32 (FPURegSet); 263 // data.PutHex32 (fpu_byte_size/sizeof(uint64_t)); 264 // bytes_written += data.PutHex32(0); // uint32_t pad[0] 265 // bytes_written += data.PutHex32(0); // uint32_t pad[1] 266 // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2, 267 // data); // uint16_t fcw; // "fctrl" 268 // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2, 269 // data); // uint16_t fsw; // "fstat" 270 // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1, 271 // data); // uint8_t ftw; // "ftag" 272 // bytes_written += data.PutHex8 (0); // uint8_t pad1; 273 // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2, 274 // data); // uint16_t fop; // "fop" 275 // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4, 276 // data); // uint32_t ip; // "fioff" 277 // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2, 278 // data); // uint16_t cs; // "fiseg" 279 // bytes_written += data.PutHex16 (0); // uint16_t pad2; 280 // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4, 281 // data); // uint32_t dp; // "fooff" 282 // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2, 283 // data); // uint16_t ds; // "foseg" 284 // bytes_written += data.PutHex16 (0); // uint16_t pad3; 285 // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4, 286 // data); // uint32_t mxcsr; 287 // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL, 288 // 4, data);// uint32_t mxcsrmask; 289 // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL, 290 // sizeof(MMSReg), data); 291 // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL, 292 // sizeof(MMSReg), data); 293 // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL, 294 // sizeof(MMSReg), data); 295 // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL, 296 // sizeof(MMSReg), data); 297 // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL, 298 // sizeof(MMSReg), data); 299 // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL, 300 // sizeof(MMSReg), data); 301 // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL, 302 // sizeof(MMSReg), data); 303 // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL, 304 // sizeof(MMSReg), data); 305 // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL, 306 // sizeof(XMMReg), data); 307 // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL, 308 // sizeof(XMMReg), data); 309 // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL, 310 // sizeof(XMMReg), data); 311 // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL, 312 // sizeof(XMMReg), data); 313 // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL, 314 // sizeof(XMMReg), data); 315 // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL, 316 // sizeof(XMMReg), data); 317 // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL, 318 // sizeof(XMMReg), data); 319 // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL, 320 // sizeof(XMMReg), data); 321 // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL, 322 // sizeof(XMMReg), data); 323 // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL, 324 // sizeof(XMMReg), data); 325 // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL, 326 // sizeof(XMMReg), data); 327 // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL, 328 // sizeof(XMMReg), data); 329 // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL, 330 // sizeof(XMMReg), data); 331 // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL, 332 // sizeof(XMMReg), data); 333 // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL, 334 // sizeof(XMMReg), data); 335 // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL, 336 // sizeof(XMMReg), data); 337 // 338 // // Fill rest with zeros 339 // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++ 340 // i) 341 // data.PutChar(0); 342 343 // Write out the EXC registers 344 data.PutHex32(EXCRegSet); 345 data.PutHex32(EXCWordCount); 346 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data); 347 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data); 348 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data); 349 return true; 350 } 351 return false; 352 } 353 354 protected: 355 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; } 356 357 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; } 358 359 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; } 360 361 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 362 return 0; 363 } 364 365 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 366 return 0; 367 } 368 369 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 370 return 0; 371 } 372 }; 373 374 class RegisterContextDarwin_i386_Mach : public RegisterContextDarwin_i386 { 375 public: 376 RegisterContextDarwin_i386_Mach(lldb_private::Thread &thread, 377 const DataExtractor &data) 378 : RegisterContextDarwin_i386(thread, 0) { 379 SetRegisterDataFrom_LC_THREAD(data); 380 } 381 382 void InvalidateAllRegisters() override { 383 // Do nothing... registers are always valid... 384 } 385 386 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 387 lldb::offset_t offset = 0; 388 SetError(GPRRegSet, Read, -1); 389 SetError(FPURegSet, Read, -1); 390 SetError(EXCRegSet, Read, -1); 391 bool done = false; 392 393 while (!done) { 394 int flavor = data.GetU32(&offset); 395 if (flavor == 0) 396 done = true; 397 else { 398 uint32_t i; 399 uint32_t count = data.GetU32(&offset); 400 switch (flavor) { 401 case GPRRegSet: 402 for (i = 0; i < count; ++i) 403 (&gpr.eax)[i] = data.GetU32(&offset); 404 SetError(GPRRegSet, Read, 0); 405 done = true; 406 407 break; 408 case FPURegSet: 409 // TODO: fill in FPU regs.... 410 // SetError (FPURegSet, Read, -1); 411 done = true; 412 413 break; 414 case EXCRegSet: 415 exc.trapno = data.GetU32(&offset); 416 exc.err = data.GetU32(&offset); 417 exc.faultvaddr = data.GetU32(&offset); 418 SetError(EXCRegSet, Read, 0); 419 done = true; 420 break; 421 case 7: 422 case 8: 423 case 9: 424 // fancy flavors that encapsulate of the above flavors... 425 break; 426 427 default: 428 done = true; 429 break; 430 } 431 } 432 } 433 } 434 435 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 436 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 437 if (reg_ctx_sp) { 438 RegisterContext *reg_ctx = reg_ctx_sp.get(); 439 440 data.PutHex32(GPRRegSet); // Flavor 441 data.PutHex32(GPRWordCount); 442 PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data); 443 PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data); 444 PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data); 445 PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data); 446 PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data); 447 PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data); 448 PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data); 449 PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data); 450 PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data); 451 PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data); 452 PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data); 453 PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data); 454 PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data); 455 PrintRegisterValue(reg_ctx, "es", nullptr, 4, data); 456 PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data); 457 PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data); 458 459 // Write out the EXC registers 460 data.PutHex32(EXCRegSet); 461 data.PutHex32(EXCWordCount); 462 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data); 463 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data); 464 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data); 465 return true; 466 } 467 return false; 468 } 469 470 protected: 471 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; } 472 473 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; } 474 475 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; } 476 477 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 478 return 0; 479 } 480 481 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 482 return 0; 483 } 484 485 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 486 return 0; 487 } 488 }; 489 490 class RegisterContextDarwin_arm_Mach : public RegisterContextDarwin_arm { 491 public: 492 RegisterContextDarwin_arm_Mach(lldb_private::Thread &thread, 493 const DataExtractor &data) 494 : RegisterContextDarwin_arm(thread, 0) { 495 SetRegisterDataFrom_LC_THREAD(data); 496 } 497 498 void InvalidateAllRegisters() override { 499 // Do nothing... registers are always valid... 500 } 501 502 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 503 lldb::offset_t offset = 0; 504 SetError(GPRRegSet, Read, -1); 505 SetError(FPURegSet, Read, -1); 506 SetError(EXCRegSet, Read, -1); 507 bool done = false; 508 509 while (!done) { 510 int flavor = data.GetU32(&offset); 511 uint32_t count = data.GetU32(&offset); 512 lldb::offset_t next_thread_state = offset + (count * 4); 513 switch (flavor) { 514 case GPRAltRegSet: 515 case GPRRegSet: { 516 // r0-r15, plus CPSR 517 uint32_t gpr_buf_count = (sizeof(gpr.r) / sizeof(gpr.r[0])) + 1; 518 if (count == gpr_buf_count) { 519 for (uint32_t i = 0; i < (count - 1); ++i) { 520 gpr.r[i] = data.GetU32(&offset); 521 } 522 gpr.cpsr = data.GetU32(&offset); 523 524 SetError(GPRRegSet, Read, 0); 525 } 526 } 527 offset = next_thread_state; 528 break; 529 530 case FPURegSet: { 531 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats; 532 const int fpu_reg_buf_size = sizeof(fpu.floats); 533 if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle, 534 fpu_reg_buf) == fpu_reg_buf_size) { 535 offset += fpu_reg_buf_size; 536 fpu.fpscr = data.GetU32(&offset); 537 SetError(FPURegSet, Read, 0); 538 } else { 539 done = true; 540 } 541 } 542 offset = next_thread_state; 543 break; 544 545 case EXCRegSet: 546 if (count == 3) { 547 exc.exception = data.GetU32(&offset); 548 exc.fsr = data.GetU32(&offset); 549 exc.far = data.GetU32(&offset); 550 SetError(EXCRegSet, Read, 0); 551 } 552 done = true; 553 offset = next_thread_state; 554 break; 555 556 // Unknown register set flavor, stop trying to parse. 557 default: 558 done = true; 559 } 560 } 561 } 562 563 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 564 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 565 if (reg_ctx_sp) { 566 RegisterContext *reg_ctx = reg_ctx_sp.get(); 567 568 data.PutHex32(GPRRegSet); // Flavor 569 data.PutHex32(GPRWordCount); 570 PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data); 571 PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data); 572 PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data); 573 PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data); 574 PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data); 575 PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data); 576 PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data); 577 PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data); 578 PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data); 579 PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data); 580 PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data); 581 PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data); 582 PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data); 583 PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data); 584 PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data); 585 PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data); 586 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data); 587 588 // Write out the EXC registers 589 // data.PutHex32 (EXCRegSet); 590 // data.PutHex32 (EXCWordCount); 591 // WriteRegister (reg_ctx, "exception", NULL, 4, data); 592 // WriteRegister (reg_ctx, "fsr", NULL, 4, data); 593 // WriteRegister (reg_ctx, "far", NULL, 4, data); 594 return true; 595 } 596 return false; 597 } 598 599 protected: 600 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; } 601 602 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; } 603 604 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; } 605 606 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; } 607 608 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 609 return 0; 610 } 611 612 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 613 return 0; 614 } 615 616 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 617 return 0; 618 } 619 620 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override { 621 return -1; 622 } 623 }; 624 625 class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 { 626 public: 627 RegisterContextDarwin_arm64_Mach(lldb_private::Thread &thread, 628 const DataExtractor &data) 629 : RegisterContextDarwin_arm64(thread, 0) { 630 SetRegisterDataFrom_LC_THREAD(data); 631 } 632 633 void InvalidateAllRegisters() override { 634 // Do nothing... registers are always valid... 635 } 636 637 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 638 lldb::offset_t offset = 0; 639 SetError(GPRRegSet, Read, -1); 640 SetError(FPURegSet, Read, -1); 641 SetError(EXCRegSet, Read, -1); 642 bool done = false; 643 while (!done) { 644 int flavor = data.GetU32(&offset); 645 uint32_t count = data.GetU32(&offset); 646 lldb::offset_t next_thread_state = offset + (count * 4); 647 switch (flavor) { 648 case GPRRegSet: 649 // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1 650 // 32-bit register) 651 if (count >= (33 * 2) + 1) { 652 for (uint32_t i = 0; i < 29; ++i) 653 gpr.x[i] = data.GetU64(&offset); 654 gpr.fp = data.GetU64(&offset); 655 gpr.lr = data.GetU64(&offset); 656 gpr.sp = data.GetU64(&offset); 657 gpr.pc = data.GetU64(&offset); 658 gpr.cpsr = data.GetU32(&offset); 659 SetError(GPRRegSet, Read, 0); 660 } 661 offset = next_thread_state; 662 break; 663 case FPURegSet: { 664 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0]; 665 const int fpu_reg_buf_size = sizeof(fpu); 666 if (fpu_reg_buf_size == count * sizeof(uint32_t) && 667 data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle, 668 fpu_reg_buf) == fpu_reg_buf_size) { 669 SetError(FPURegSet, Read, 0); 670 } else { 671 done = true; 672 } 673 } 674 offset = next_thread_state; 675 break; 676 case EXCRegSet: 677 if (count == 4) { 678 exc.far = data.GetU64(&offset); 679 exc.esr = data.GetU32(&offset); 680 exc.exception = data.GetU32(&offset); 681 SetError(EXCRegSet, Read, 0); 682 } 683 offset = next_thread_state; 684 break; 685 default: 686 done = true; 687 break; 688 } 689 } 690 } 691 692 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 693 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 694 if (reg_ctx_sp) { 695 RegisterContext *reg_ctx = reg_ctx_sp.get(); 696 697 data.PutHex32(GPRRegSet); // Flavor 698 data.PutHex32(GPRWordCount); 699 PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data); 700 PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data); 701 PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data); 702 PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data); 703 PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data); 704 PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data); 705 PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data); 706 PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data); 707 PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data); 708 PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data); 709 PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data); 710 PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data); 711 PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data); 712 PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data); 713 PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data); 714 PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data); 715 PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data); 716 PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data); 717 PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data); 718 PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data); 719 PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data); 720 PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data); 721 PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data); 722 PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data); 723 PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data); 724 PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data); 725 PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data); 726 PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data); 727 PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data); 728 PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data); 729 PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data); 730 PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data); 731 PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data); 732 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data); 733 data.PutHex32(0); // uint32_t pad at the end 734 735 // Write out the EXC registers 736 data.PutHex32(EXCRegSet); 737 data.PutHex32(EXCWordCount); 738 PrintRegisterValue(reg_ctx, "far", nullptr, 8, data); 739 PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data); 740 PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data); 741 return true; 742 } 743 return false; 744 } 745 746 protected: 747 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; } 748 749 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; } 750 751 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; } 752 753 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; } 754 755 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 756 return 0; 757 } 758 759 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 760 return 0; 761 } 762 763 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 764 return 0; 765 } 766 767 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override { 768 return -1; 769 } 770 }; 771 772 static uint32_t MachHeaderSizeFromMagic(uint32_t magic) { 773 switch (magic) { 774 case MH_MAGIC: 775 case MH_CIGAM: 776 return sizeof(struct llvm::MachO::mach_header); 777 778 case MH_MAGIC_64: 779 case MH_CIGAM_64: 780 return sizeof(struct llvm::MachO::mach_header_64); 781 break; 782 783 default: 784 break; 785 } 786 return 0; 787 } 788 789 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008 790 791 char ObjectFileMachO::ID; 792 793 void ObjectFileMachO::Initialize() { 794 PluginManager::RegisterPlugin( 795 GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance, 796 CreateMemoryInstance, GetModuleSpecifications, SaveCore); 797 } 798 799 void ObjectFileMachO::Terminate() { 800 PluginManager::UnregisterPlugin(CreateInstance); 801 } 802 803 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp, 804 DataBufferSP data_sp, 805 lldb::offset_t data_offset, 806 const FileSpec *file, 807 lldb::offset_t file_offset, 808 lldb::offset_t length) { 809 if (!data_sp) { 810 data_sp = MapFileData(*file, length, file_offset); 811 if (!data_sp) 812 return nullptr; 813 data_offset = 0; 814 } 815 816 if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length)) 817 return nullptr; 818 819 // Update the data to contain the entire file if it doesn't already 820 if (data_sp->GetByteSize() < length) { 821 data_sp = MapFileData(*file, length, file_offset); 822 if (!data_sp) 823 return nullptr; 824 data_offset = 0; 825 } 826 auto objfile_up = std::make_unique<ObjectFileMachO>( 827 module_sp, data_sp, data_offset, file, file_offset, length); 828 if (!objfile_up || !objfile_up->ParseHeader()) 829 return nullptr; 830 831 return objfile_up.release(); 832 } 833 834 ObjectFile *ObjectFileMachO::CreateMemoryInstance( 835 const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp, 836 const ProcessSP &process_sp, lldb::addr_t header_addr) { 837 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) { 838 std::unique_ptr<ObjectFile> objfile_up( 839 new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr)); 840 if (objfile_up.get() && objfile_up->ParseHeader()) 841 return objfile_up.release(); 842 } 843 return nullptr; 844 } 845 846 size_t ObjectFileMachO::GetModuleSpecifications( 847 const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp, 848 lldb::offset_t data_offset, lldb::offset_t file_offset, 849 lldb::offset_t length, lldb_private::ModuleSpecList &specs) { 850 const size_t initial_count = specs.GetSize(); 851 852 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) { 853 DataExtractor data; 854 data.SetData(data_sp); 855 llvm::MachO::mach_header header; 856 if (ParseHeader(data, &data_offset, header)) { 857 size_t header_and_load_cmds = 858 header.sizeofcmds + MachHeaderSizeFromMagic(header.magic); 859 if (header_and_load_cmds >= data_sp->GetByteSize()) { 860 data_sp = MapFileData(file, header_and_load_cmds, file_offset); 861 data.SetData(data_sp); 862 data_offset = MachHeaderSizeFromMagic(header.magic); 863 } 864 if (data_sp) { 865 ModuleSpec base_spec; 866 base_spec.GetFileSpec() = file; 867 base_spec.SetObjectOffset(file_offset); 868 base_spec.SetObjectSize(length); 869 GetAllArchSpecs(header, data, data_offset, base_spec, specs); 870 } 871 } 872 } 873 return specs.GetSize() - initial_count; 874 } 875 876 ConstString ObjectFileMachO::GetSegmentNameTEXT() { 877 static ConstString g_segment_name_TEXT("__TEXT"); 878 return g_segment_name_TEXT; 879 } 880 881 ConstString ObjectFileMachO::GetSegmentNameDATA() { 882 static ConstString g_segment_name_DATA("__DATA"); 883 return g_segment_name_DATA; 884 } 885 886 ConstString ObjectFileMachO::GetSegmentNameDATA_DIRTY() { 887 static ConstString g_segment_name("__DATA_DIRTY"); 888 return g_segment_name; 889 } 890 891 ConstString ObjectFileMachO::GetSegmentNameDATA_CONST() { 892 static ConstString g_segment_name("__DATA_CONST"); 893 return g_segment_name; 894 } 895 896 ConstString ObjectFileMachO::GetSegmentNameOBJC() { 897 static ConstString g_segment_name_OBJC("__OBJC"); 898 return g_segment_name_OBJC; 899 } 900 901 ConstString ObjectFileMachO::GetSegmentNameLINKEDIT() { 902 static ConstString g_section_name_LINKEDIT("__LINKEDIT"); 903 return g_section_name_LINKEDIT; 904 } 905 906 ConstString ObjectFileMachO::GetSegmentNameDWARF() { 907 static ConstString g_section_name("__DWARF"); 908 return g_section_name; 909 } 910 911 ConstString ObjectFileMachO::GetSegmentNameLLVM_COV() { 912 static ConstString g_section_name("__LLVM_COV"); 913 return g_section_name; 914 } 915 916 ConstString ObjectFileMachO::GetSectionNameEHFrame() { 917 static ConstString g_section_name_eh_frame("__eh_frame"); 918 return g_section_name_eh_frame; 919 } 920 921 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp, 922 lldb::addr_t data_offset, 923 lldb::addr_t data_length) { 924 DataExtractor data; 925 data.SetData(data_sp, data_offset, data_length); 926 lldb::offset_t offset = 0; 927 uint32_t magic = data.GetU32(&offset); 928 929 offset += 4; // cputype 930 offset += 4; // cpusubtype 931 uint32_t filetype = data.GetU32(&offset); 932 933 // A fileset has a Mach-O header but is not an 934 // individual file and must be handled via an 935 // ObjectContainer plugin. 936 if (filetype == llvm::MachO::MH_FILESET) 937 return false; 938 939 return MachHeaderSizeFromMagic(magic) != 0; 940 } 941 942 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp, 943 DataBufferSP data_sp, 944 lldb::offset_t data_offset, 945 const FileSpec *file, 946 lldb::offset_t file_offset, 947 lldb::offset_t length) 948 : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset), 949 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(), 950 m_thread_context_offsets_valid(false), m_reexported_dylibs(), 951 m_allow_assembly_emulation_unwind_plans(true) { 952 ::memset(&m_header, 0, sizeof(m_header)); 953 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 954 } 955 956 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp, 957 lldb::WritableDataBufferSP header_data_sp, 958 const lldb::ProcessSP &process_sp, 959 lldb::addr_t header_addr) 960 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), 961 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(), 962 m_thread_context_offsets_valid(false), m_reexported_dylibs(), 963 m_allow_assembly_emulation_unwind_plans(true) { 964 ::memset(&m_header, 0, sizeof(m_header)); 965 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 966 } 967 968 bool ObjectFileMachO::ParseHeader(DataExtractor &data, 969 lldb::offset_t *data_offset_ptr, 970 llvm::MachO::mach_header &header) { 971 data.SetByteOrder(endian::InlHostByteOrder()); 972 // Leave magic in the original byte order 973 header.magic = data.GetU32(data_offset_ptr); 974 bool can_parse = false; 975 bool is_64_bit = false; 976 switch (header.magic) { 977 case MH_MAGIC: 978 data.SetByteOrder(endian::InlHostByteOrder()); 979 data.SetAddressByteSize(4); 980 can_parse = true; 981 break; 982 983 case MH_MAGIC_64: 984 data.SetByteOrder(endian::InlHostByteOrder()); 985 data.SetAddressByteSize(8); 986 can_parse = true; 987 is_64_bit = true; 988 break; 989 990 case MH_CIGAM: 991 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 992 ? eByteOrderLittle 993 : eByteOrderBig); 994 data.SetAddressByteSize(4); 995 can_parse = true; 996 break; 997 998 case MH_CIGAM_64: 999 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1000 ? eByteOrderLittle 1001 : eByteOrderBig); 1002 data.SetAddressByteSize(8); 1003 is_64_bit = true; 1004 can_parse = true; 1005 break; 1006 1007 default: 1008 break; 1009 } 1010 1011 if (can_parse) { 1012 data.GetU32(data_offset_ptr, &header.cputype, 6); 1013 if (is_64_bit) 1014 *data_offset_ptr += 4; 1015 return true; 1016 } else { 1017 memset(&header, 0, sizeof(header)); 1018 } 1019 return false; 1020 } 1021 1022 bool ObjectFileMachO::ParseHeader() { 1023 ModuleSP module_sp(GetModule()); 1024 if (!module_sp) 1025 return false; 1026 1027 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 1028 bool can_parse = false; 1029 lldb::offset_t offset = 0; 1030 m_data.SetByteOrder(endian::InlHostByteOrder()); 1031 // Leave magic in the original byte order 1032 m_header.magic = m_data.GetU32(&offset); 1033 switch (m_header.magic) { 1034 case MH_MAGIC: 1035 m_data.SetByteOrder(endian::InlHostByteOrder()); 1036 m_data.SetAddressByteSize(4); 1037 can_parse = true; 1038 break; 1039 1040 case MH_MAGIC_64: 1041 m_data.SetByteOrder(endian::InlHostByteOrder()); 1042 m_data.SetAddressByteSize(8); 1043 can_parse = true; 1044 break; 1045 1046 case MH_CIGAM: 1047 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1048 ? eByteOrderLittle 1049 : eByteOrderBig); 1050 m_data.SetAddressByteSize(4); 1051 can_parse = true; 1052 break; 1053 1054 case MH_CIGAM_64: 1055 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1056 ? eByteOrderLittle 1057 : eByteOrderBig); 1058 m_data.SetAddressByteSize(8); 1059 can_parse = true; 1060 break; 1061 1062 default: 1063 break; 1064 } 1065 1066 if (can_parse) { 1067 m_data.GetU32(&offset, &m_header.cputype, 6); 1068 1069 ModuleSpecList all_specs; 1070 ModuleSpec base_spec; 1071 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic), 1072 base_spec, all_specs); 1073 1074 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 1075 ArchSpec mach_arch = 1076 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture(); 1077 1078 // Check if the module has a required architecture 1079 const ArchSpec &module_arch = module_sp->GetArchitecture(); 1080 if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch)) 1081 continue; 1082 1083 if (SetModulesArchitecture(mach_arch)) { 1084 const size_t header_and_lc_size = 1085 m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic); 1086 if (m_data.GetByteSize() < header_and_lc_size) { 1087 DataBufferSP data_sp; 1088 ProcessSP process_sp(m_process_wp.lock()); 1089 if (process_sp) { 1090 data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size); 1091 } else { 1092 // Read in all only the load command data from the file on disk 1093 data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset); 1094 if (data_sp->GetByteSize() != header_and_lc_size) 1095 continue; 1096 } 1097 if (data_sp) 1098 m_data.SetData(data_sp); 1099 } 1100 } 1101 return true; 1102 } 1103 // None found. 1104 return false; 1105 } else { 1106 memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header)); 1107 } 1108 return false; 1109 } 1110 1111 ByteOrder ObjectFileMachO::GetByteOrder() const { 1112 return m_data.GetByteOrder(); 1113 } 1114 1115 bool ObjectFileMachO::IsExecutable() const { 1116 return m_header.filetype == MH_EXECUTE; 1117 } 1118 1119 bool ObjectFileMachO::IsDynamicLoader() const { 1120 return m_header.filetype == MH_DYLINKER; 1121 } 1122 1123 bool ObjectFileMachO::IsSharedCacheBinary() const { 1124 return m_header.flags & MH_DYLIB_IN_CACHE; 1125 } 1126 1127 bool ObjectFileMachO::IsKext() const { 1128 return m_header.filetype == MH_KEXT_BUNDLE; 1129 } 1130 1131 uint32_t ObjectFileMachO::GetAddressByteSize() const { 1132 return m_data.GetAddressByteSize(); 1133 } 1134 1135 AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { 1136 Symtab *symtab = GetSymtab(); 1137 if (!symtab) 1138 return AddressClass::eUnknown; 1139 1140 Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr); 1141 if (symbol) { 1142 if (symbol->ValueIsAddress()) { 1143 SectionSP section_sp(symbol->GetAddressRef().GetSection()); 1144 if (section_sp) { 1145 const lldb::SectionType section_type = section_sp->GetType(); 1146 switch (section_type) { 1147 case eSectionTypeInvalid: 1148 return AddressClass::eUnknown; 1149 1150 case eSectionTypeCode: 1151 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) { 1152 // For ARM we have a bit in the n_desc field of the symbol that 1153 // tells us ARM/Thumb which is bit 0x0008. 1154 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB) 1155 return AddressClass::eCodeAlternateISA; 1156 } 1157 return AddressClass::eCode; 1158 1159 case eSectionTypeContainer: 1160 return AddressClass::eUnknown; 1161 1162 case eSectionTypeData: 1163 case eSectionTypeDataCString: 1164 case eSectionTypeDataCStringPointers: 1165 case eSectionTypeDataSymbolAddress: 1166 case eSectionTypeData4: 1167 case eSectionTypeData8: 1168 case eSectionTypeData16: 1169 case eSectionTypeDataPointers: 1170 case eSectionTypeZeroFill: 1171 case eSectionTypeDataObjCMessageRefs: 1172 case eSectionTypeDataObjCCFStrings: 1173 case eSectionTypeGoSymtab: 1174 return AddressClass::eData; 1175 1176 case eSectionTypeDebug: 1177 case eSectionTypeDWARFDebugAbbrev: 1178 case eSectionTypeDWARFDebugAbbrevDwo: 1179 case eSectionTypeDWARFDebugAddr: 1180 case eSectionTypeDWARFDebugAranges: 1181 case eSectionTypeDWARFDebugCuIndex: 1182 case eSectionTypeDWARFDebugFrame: 1183 case eSectionTypeDWARFDebugInfo: 1184 case eSectionTypeDWARFDebugInfoDwo: 1185 case eSectionTypeDWARFDebugLine: 1186 case eSectionTypeDWARFDebugLineStr: 1187 case eSectionTypeDWARFDebugLoc: 1188 case eSectionTypeDWARFDebugLocDwo: 1189 case eSectionTypeDWARFDebugLocLists: 1190 case eSectionTypeDWARFDebugLocListsDwo: 1191 case eSectionTypeDWARFDebugMacInfo: 1192 case eSectionTypeDWARFDebugMacro: 1193 case eSectionTypeDWARFDebugNames: 1194 case eSectionTypeDWARFDebugPubNames: 1195 case eSectionTypeDWARFDebugPubTypes: 1196 case eSectionTypeDWARFDebugRanges: 1197 case eSectionTypeDWARFDebugRngLists: 1198 case eSectionTypeDWARFDebugRngListsDwo: 1199 case eSectionTypeDWARFDebugStr: 1200 case eSectionTypeDWARFDebugStrDwo: 1201 case eSectionTypeDWARFDebugStrOffsets: 1202 case eSectionTypeDWARFDebugStrOffsetsDwo: 1203 case eSectionTypeDWARFDebugTuIndex: 1204 case eSectionTypeDWARFDebugTypes: 1205 case eSectionTypeDWARFDebugTypesDwo: 1206 case eSectionTypeDWARFAppleNames: 1207 case eSectionTypeDWARFAppleTypes: 1208 case eSectionTypeDWARFAppleNamespaces: 1209 case eSectionTypeDWARFAppleObjC: 1210 case eSectionTypeDWARFGNUDebugAltLink: 1211 case eSectionTypeCTF: 1212 case eSectionTypeSwiftModules: 1213 return AddressClass::eDebug; 1214 1215 case eSectionTypeEHFrame: 1216 case eSectionTypeARMexidx: 1217 case eSectionTypeARMextab: 1218 case eSectionTypeCompactUnwind: 1219 return AddressClass::eRuntime; 1220 1221 case eSectionTypeAbsoluteAddress: 1222 case eSectionTypeELFSymbolTable: 1223 case eSectionTypeELFDynamicSymbols: 1224 case eSectionTypeELFRelocationEntries: 1225 case eSectionTypeELFDynamicLinkInfo: 1226 case eSectionTypeOther: 1227 return AddressClass::eUnknown; 1228 } 1229 } 1230 } 1231 1232 const SymbolType symbol_type = symbol->GetType(); 1233 switch (symbol_type) { 1234 case eSymbolTypeAny: 1235 return AddressClass::eUnknown; 1236 case eSymbolTypeAbsolute: 1237 return AddressClass::eUnknown; 1238 1239 case eSymbolTypeCode: 1240 case eSymbolTypeTrampoline: 1241 case eSymbolTypeResolver: 1242 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) { 1243 // For ARM we have a bit in the n_desc field of the symbol that tells 1244 // us ARM/Thumb which is bit 0x0008. 1245 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB) 1246 return AddressClass::eCodeAlternateISA; 1247 } 1248 return AddressClass::eCode; 1249 1250 case eSymbolTypeData: 1251 return AddressClass::eData; 1252 case eSymbolTypeRuntime: 1253 return AddressClass::eRuntime; 1254 case eSymbolTypeException: 1255 return AddressClass::eRuntime; 1256 case eSymbolTypeSourceFile: 1257 return AddressClass::eDebug; 1258 case eSymbolTypeHeaderFile: 1259 return AddressClass::eDebug; 1260 case eSymbolTypeObjectFile: 1261 return AddressClass::eDebug; 1262 case eSymbolTypeCommonBlock: 1263 return AddressClass::eDebug; 1264 case eSymbolTypeBlock: 1265 return AddressClass::eDebug; 1266 case eSymbolTypeLocal: 1267 return AddressClass::eData; 1268 case eSymbolTypeParam: 1269 return AddressClass::eData; 1270 case eSymbolTypeVariable: 1271 return AddressClass::eData; 1272 case eSymbolTypeVariableType: 1273 return AddressClass::eDebug; 1274 case eSymbolTypeLineEntry: 1275 return AddressClass::eDebug; 1276 case eSymbolTypeLineHeader: 1277 return AddressClass::eDebug; 1278 case eSymbolTypeScopeBegin: 1279 return AddressClass::eDebug; 1280 case eSymbolTypeScopeEnd: 1281 return AddressClass::eDebug; 1282 case eSymbolTypeAdditional: 1283 return AddressClass::eUnknown; 1284 case eSymbolTypeCompiler: 1285 return AddressClass::eDebug; 1286 case eSymbolTypeInstrumentation: 1287 return AddressClass::eDebug; 1288 case eSymbolTypeUndefined: 1289 return AddressClass::eUnknown; 1290 case eSymbolTypeObjCClass: 1291 return AddressClass::eRuntime; 1292 case eSymbolTypeObjCMetaClass: 1293 return AddressClass::eRuntime; 1294 case eSymbolTypeObjCIVar: 1295 return AddressClass::eRuntime; 1296 case eSymbolTypeReExported: 1297 return AddressClass::eRuntime; 1298 } 1299 } 1300 return AddressClass::eUnknown; 1301 } 1302 1303 bool ObjectFileMachO::IsStripped() { 1304 if (m_dysymtab.cmd == 0) { 1305 ModuleSP module_sp(GetModule()); 1306 if (module_sp) { 1307 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1308 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1309 const lldb::offset_t load_cmd_offset = offset; 1310 1311 llvm::MachO::load_command lc = {}; 1312 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr) 1313 break; 1314 if (lc.cmd == LC_DYSYMTAB) { 1315 m_dysymtab.cmd = lc.cmd; 1316 m_dysymtab.cmdsize = lc.cmdsize; 1317 if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym, 1318 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) == 1319 nullptr) { 1320 // Clear m_dysymtab if we were unable to read all items from the 1321 // load command 1322 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 1323 } 1324 } 1325 offset = load_cmd_offset + lc.cmdsize; 1326 } 1327 } 1328 } 1329 if (m_dysymtab.cmd) 1330 return m_dysymtab.nlocalsym <= 1; 1331 return false; 1332 } 1333 1334 ObjectFileMachO::EncryptedFileRanges ObjectFileMachO::GetEncryptedFileRanges() { 1335 EncryptedFileRanges result; 1336 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1337 1338 llvm::MachO::encryption_info_command encryption_cmd; 1339 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1340 const lldb::offset_t load_cmd_offset = offset; 1341 if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr) 1342 break; 1343 1344 // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the 1345 // 3 fields we care about, so treat them the same. 1346 if (encryption_cmd.cmd == LC_ENCRYPTION_INFO || 1347 encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) { 1348 if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) { 1349 if (encryption_cmd.cryptid != 0) { 1350 EncryptedFileRanges::Entry entry; 1351 entry.SetRangeBase(encryption_cmd.cryptoff); 1352 entry.SetByteSize(encryption_cmd.cryptsize); 1353 result.Append(entry); 1354 } 1355 } 1356 } 1357 offset = load_cmd_offset + encryption_cmd.cmdsize; 1358 } 1359 1360 return result; 1361 } 1362 1363 void ObjectFileMachO::SanitizeSegmentCommand( 1364 llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) { 1365 if (m_length == 0 || seg_cmd.filesize == 0) 1366 return; 1367 1368 if (IsSharedCacheBinary() && !IsInMemory()) { 1369 // In shared cache images, the load commands are relative to the 1370 // shared cache file, and not the specific image we are 1371 // examining. Let's fix this up so that it looks like a normal 1372 // image. 1373 if (strncmp(seg_cmd.segname, GetSegmentNameTEXT().GetCString(), 1374 sizeof(seg_cmd.segname)) == 0) 1375 m_text_address = seg_cmd.vmaddr; 1376 if (strncmp(seg_cmd.segname, GetSegmentNameLINKEDIT().GetCString(), 1377 sizeof(seg_cmd.segname)) == 0) 1378 m_linkedit_original_offset = seg_cmd.fileoff; 1379 1380 seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address; 1381 } 1382 1383 if (seg_cmd.fileoff > m_length) { 1384 // We have a load command that says it extends past the end of the file. 1385 // This is likely a corrupt file. We don't have any way to return an error 1386 // condition here (this method was likely invoked from something like 1387 // ObjectFile::GetSectionList()), so we just null out the section contents, 1388 // and dump a message to stdout. The most common case here is core file 1389 // debugging with a truncated file. 1390 const char *lc_segment_name = 1391 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT"; 1392 GetModule()->ReportWarning( 1393 "load command {0} {1} has a fileoff ({2:x16}) that extends beyond " 1394 "the end of the file ({3:x16}), ignoring this section", 1395 cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length); 1396 1397 seg_cmd.fileoff = 0; 1398 seg_cmd.filesize = 0; 1399 } 1400 1401 if (seg_cmd.fileoff + seg_cmd.filesize > m_length) { 1402 // We have a load command that says it extends past the end of the file. 1403 // This is likely a corrupt file. We don't have any way to return an error 1404 // condition here (this method was likely invoked from something like 1405 // ObjectFile::GetSectionList()), so we just null out the section contents, 1406 // and dump a message to stdout. The most common case here is core file 1407 // debugging with a truncated file. 1408 const char *lc_segment_name = 1409 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT"; 1410 GetModule()->ReportWarning( 1411 "load command {0} {1} has a fileoff + filesize ({2:x16}) that " 1412 "extends beyond the end of the file ({4:x16}), the segment will be " 1413 "truncated to match", 1414 cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length); 1415 1416 // Truncate the length 1417 seg_cmd.filesize = m_length - seg_cmd.fileoff; 1418 } 1419 } 1420 1421 static uint32_t 1422 GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) { 1423 uint32_t result = 0; 1424 if (seg_cmd.initprot & VM_PROT_READ) 1425 result |= ePermissionsReadable; 1426 if (seg_cmd.initprot & VM_PROT_WRITE) 1427 result |= ePermissionsWritable; 1428 if (seg_cmd.initprot & VM_PROT_EXECUTE) 1429 result |= ePermissionsExecutable; 1430 return result; 1431 } 1432 1433 static lldb::SectionType GetSectionType(uint32_t flags, 1434 ConstString section_name) { 1435 1436 if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS)) 1437 return eSectionTypeCode; 1438 1439 uint32_t mach_sect_type = flags & SECTION_TYPE; 1440 static ConstString g_sect_name_objc_data("__objc_data"); 1441 static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs"); 1442 static ConstString g_sect_name_objc_selrefs("__objc_selrefs"); 1443 static ConstString g_sect_name_objc_classrefs("__objc_classrefs"); 1444 static ConstString g_sect_name_objc_superrefs("__objc_superrefs"); 1445 static ConstString g_sect_name_objc_const("__objc_const"); 1446 static ConstString g_sect_name_objc_classlist("__objc_classlist"); 1447 static ConstString g_sect_name_cfstring("__cfstring"); 1448 1449 static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev"); 1450 static ConstString g_sect_name_dwarf_debug_abbrev_dwo("__debug_abbrev.dwo"); 1451 static ConstString g_sect_name_dwarf_debug_addr("__debug_addr"); 1452 static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges"); 1453 static ConstString g_sect_name_dwarf_debug_cu_index("__debug_cu_index"); 1454 static ConstString g_sect_name_dwarf_debug_frame("__debug_frame"); 1455 static ConstString g_sect_name_dwarf_debug_info("__debug_info"); 1456 static ConstString g_sect_name_dwarf_debug_info_dwo("__debug_info.dwo"); 1457 static ConstString g_sect_name_dwarf_debug_line("__debug_line"); 1458 static ConstString g_sect_name_dwarf_debug_line_dwo("__debug_line.dwo"); 1459 static ConstString g_sect_name_dwarf_debug_line_str("__debug_line_str"); 1460 static ConstString g_sect_name_dwarf_debug_loc("__debug_loc"); 1461 static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists"); 1462 static ConstString g_sect_name_dwarf_debug_loclists_dwo("__debug_loclists.dwo"); 1463 static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo"); 1464 static ConstString g_sect_name_dwarf_debug_macro("__debug_macro"); 1465 static ConstString g_sect_name_dwarf_debug_macro_dwo("__debug_macro.dwo"); 1466 static ConstString g_sect_name_dwarf_debug_names("__debug_names"); 1467 static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames"); 1468 static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes"); 1469 static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges"); 1470 static ConstString g_sect_name_dwarf_debug_rnglists("__debug_rnglists"); 1471 static ConstString g_sect_name_dwarf_debug_str("__debug_str"); 1472 static ConstString g_sect_name_dwarf_debug_str_dwo("__debug_str.dwo"); 1473 static ConstString g_sect_name_dwarf_debug_str_offs("__debug_str_offs"); 1474 static ConstString g_sect_name_dwarf_debug_str_offs_dwo("__debug_str_offs.dwo"); 1475 static ConstString g_sect_name_dwarf_debug_tu_index("__debug_tu_index"); 1476 static ConstString g_sect_name_dwarf_debug_types("__debug_types"); 1477 static ConstString g_sect_name_dwarf_apple_names("__apple_names"); 1478 static ConstString g_sect_name_dwarf_apple_types("__apple_types"); 1479 static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac"); 1480 static ConstString g_sect_name_dwarf_apple_objc("__apple_objc"); 1481 static ConstString g_sect_name_eh_frame("__eh_frame"); 1482 static ConstString g_sect_name_compact_unwind("__unwind_info"); 1483 static ConstString g_sect_name_text("__text"); 1484 static ConstString g_sect_name_data("__data"); 1485 static ConstString g_sect_name_go_symtab("__gosymtab"); 1486 static ConstString g_sect_name_ctf("__ctf"); 1487 static ConstString g_sect_name_swift_ast("__swift_ast"); 1488 1489 if (section_name == g_sect_name_dwarf_debug_abbrev) 1490 return eSectionTypeDWARFDebugAbbrev; 1491 if (section_name == g_sect_name_dwarf_debug_abbrev_dwo) 1492 return eSectionTypeDWARFDebugAbbrevDwo; 1493 if (section_name == g_sect_name_dwarf_debug_addr) 1494 return eSectionTypeDWARFDebugAddr; 1495 if (section_name == g_sect_name_dwarf_debug_aranges) 1496 return eSectionTypeDWARFDebugAranges; 1497 if (section_name == g_sect_name_dwarf_debug_cu_index) 1498 return eSectionTypeDWARFDebugCuIndex; 1499 if (section_name == g_sect_name_dwarf_debug_frame) 1500 return eSectionTypeDWARFDebugFrame; 1501 if (section_name == g_sect_name_dwarf_debug_info) 1502 return eSectionTypeDWARFDebugInfo; 1503 if (section_name == g_sect_name_dwarf_debug_info_dwo) 1504 return eSectionTypeDWARFDebugInfoDwo; 1505 if (section_name == g_sect_name_dwarf_debug_line) 1506 return eSectionTypeDWARFDebugLine; 1507 if (section_name == g_sect_name_dwarf_debug_line_dwo) 1508 return eSectionTypeDWARFDebugLine; // Same as debug_line. 1509 if (section_name == g_sect_name_dwarf_debug_line_str) 1510 return eSectionTypeDWARFDebugLineStr; 1511 if (section_name == g_sect_name_dwarf_debug_loc) 1512 return eSectionTypeDWARFDebugLoc; 1513 if (section_name == g_sect_name_dwarf_debug_loclists) 1514 return eSectionTypeDWARFDebugLocLists; 1515 if (section_name == g_sect_name_dwarf_debug_loclists_dwo) 1516 return eSectionTypeDWARFDebugLocListsDwo; 1517 if (section_name == g_sect_name_dwarf_debug_macinfo) 1518 return eSectionTypeDWARFDebugMacInfo; 1519 if (section_name == g_sect_name_dwarf_debug_macro) 1520 return eSectionTypeDWARFDebugMacro; 1521 if (section_name == g_sect_name_dwarf_debug_macro_dwo) 1522 return eSectionTypeDWARFDebugMacInfo; // Same as debug_macro. 1523 if (section_name == g_sect_name_dwarf_debug_names) 1524 return eSectionTypeDWARFDebugNames; 1525 if (section_name == g_sect_name_dwarf_debug_pubnames) 1526 return eSectionTypeDWARFDebugPubNames; 1527 if (section_name == g_sect_name_dwarf_debug_pubtypes) 1528 return eSectionTypeDWARFDebugPubTypes; 1529 if (section_name == g_sect_name_dwarf_debug_ranges) 1530 return eSectionTypeDWARFDebugRanges; 1531 if (section_name == g_sect_name_dwarf_debug_rnglists) 1532 return eSectionTypeDWARFDebugRngLists; 1533 if (section_name == g_sect_name_dwarf_debug_str) 1534 return eSectionTypeDWARFDebugStr; 1535 if (section_name == g_sect_name_dwarf_debug_str_dwo) 1536 return eSectionTypeDWARFDebugStrDwo; 1537 if (section_name == g_sect_name_dwarf_debug_str_offs) 1538 return eSectionTypeDWARFDebugStrOffsets; 1539 if (section_name == g_sect_name_dwarf_debug_str_offs_dwo) 1540 return eSectionTypeDWARFDebugStrOffsetsDwo; 1541 if (section_name == g_sect_name_dwarf_debug_tu_index) 1542 return eSectionTypeDWARFDebugTuIndex; 1543 if (section_name == g_sect_name_dwarf_debug_types) 1544 return eSectionTypeDWARFDebugTypes; 1545 if (section_name == g_sect_name_dwarf_apple_names) 1546 return eSectionTypeDWARFAppleNames; 1547 if (section_name == g_sect_name_dwarf_apple_types) 1548 return eSectionTypeDWARFAppleTypes; 1549 if (section_name == g_sect_name_dwarf_apple_namespaces) 1550 return eSectionTypeDWARFAppleNamespaces; 1551 if (section_name == g_sect_name_dwarf_apple_objc) 1552 return eSectionTypeDWARFAppleObjC; 1553 if (section_name == g_sect_name_objc_selrefs) 1554 return eSectionTypeDataCStringPointers; 1555 if (section_name == g_sect_name_objc_msgrefs) 1556 return eSectionTypeDataObjCMessageRefs; 1557 if (section_name == g_sect_name_eh_frame) 1558 return eSectionTypeEHFrame; 1559 if (section_name == g_sect_name_compact_unwind) 1560 return eSectionTypeCompactUnwind; 1561 if (section_name == g_sect_name_cfstring) 1562 return eSectionTypeDataObjCCFStrings; 1563 if (section_name == g_sect_name_go_symtab) 1564 return eSectionTypeGoSymtab; 1565 if (section_name == g_sect_name_ctf) 1566 return eSectionTypeCTF; 1567 if (section_name == g_sect_name_swift_ast) 1568 return eSectionTypeSwiftModules; 1569 if (section_name == g_sect_name_objc_data || 1570 section_name == g_sect_name_objc_classrefs || 1571 section_name == g_sect_name_objc_superrefs || 1572 section_name == g_sect_name_objc_const || 1573 section_name == g_sect_name_objc_classlist) { 1574 return eSectionTypeDataPointers; 1575 } 1576 1577 switch (mach_sect_type) { 1578 // TODO: categorize sections by other flags for regular sections 1579 case S_REGULAR: 1580 if (section_name == g_sect_name_text) 1581 return eSectionTypeCode; 1582 if (section_name == g_sect_name_data) 1583 return eSectionTypeData; 1584 return eSectionTypeOther; 1585 case S_ZEROFILL: 1586 return eSectionTypeZeroFill; 1587 case S_CSTRING_LITERALS: // section with only literal C strings 1588 return eSectionTypeDataCString; 1589 case S_4BYTE_LITERALS: // section with only 4 byte literals 1590 return eSectionTypeData4; 1591 case S_8BYTE_LITERALS: // section with only 8 byte literals 1592 return eSectionTypeData8; 1593 case S_LITERAL_POINTERS: // section with only pointers to literals 1594 return eSectionTypeDataPointers; 1595 case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers 1596 return eSectionTypeDataPointers; 1597 case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers 1598 return eSectionTypeDataPointers; 1599 case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in 1600 // the reserved2 field 1601 return eSectionTypeCode; 1602 case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for 1603 // initialization 1604 return eSectionTypeDataPointers; 1605 case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for 1606 // termination 1607 return eSectionTypeDataPointers; 1608 case S_COALESCED: 1609 return eSectionTypeOther; 1610 case S_GB_ZEROFILL: 1611 return eSectionTypeZeroFill; 1612 case S_INTERPOSING: // section with only pairs of function pointers for 1613 // interposing 1614 return eSectionTypeCode; 1615 case S_16BYTE_LITERALS: // section with only 16 byte literals 1616 return eSectionTypeData16; 1617 case S_DTRACE_DOF: 1618 return eSectionTypeDebug; 1619 case S_LAZY_DYLIB_SYMBOL_POINTERS: 1620 return eSectionTypeDataPointers; 1621 default: 1622 return eSectionTypeOther; 1623 } 1624 } 1625 1626 struct ObjectFileMachO::SegmentParsingContext { 1627 const EncryptedFileRanges EncryptedRanges; 1628 lldb_private::SectionList &UnifiedList; 1629 uint32_t NextSegmentIdx = 0; 1630 uint32_t NextSectionIdx = 0; 1631 bool FileAddressesChanged = false; 1632 1633 SegmentParsingContext(EncryptedFileRanges EncryptedRanges, 1634 lldb_private::SectionList &UnifiedList) 1635 : EncryptedRanges(std::move(EncryptedRanges)), UnifiedList(UnifiedList) {} 1636 }; 1637 1638 void ObjectFileMachO::ProcessSegmentCommand( 1639 const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset, 1640 uint32_t cmd_idx, SegmentParsingContext &context) { 1641 llvm::MachO::segment_command_64 load_cmd; 1642 memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_)); 1643 1644 if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16)) 1645 return; 1646 1647 ModuleSP module_sp = GetModule(); 1648 const bool is_core = GetType() == eTypeCoreFile; 1649 const bool is_dsym = (m_header.filetype == MH_DSYM); 1650 bool add_section = true; 1651 bool add_to_unified = true; 1652 ConstString const_segname( 1653 load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname))); 1654 1655 SectionSP unified_section_sp( 1656 context.UnifiedList.FindSectionByName(const_segname)); 1657 if (is_dsym && unified_section_sp) { 1658 if (const_segname == GetSegmentNameLINKEDIT()) { 1659 // We need to keep the __LINKEDIT segment private to this object file 1660 // only 1661 add_to_unified = false; 1662 } else { 1663 // This is the dSYM file and this section has already been created by the 1664 // object file, no need to create it. 1665 add_section = false; 1666 } 1667 } 1668 load_cmd.vmaddr = m_data.GetAddress(&offset); 1669 load_cmd.vmsize = m_data.GetAddress(&offset); 1670 load_cmd.fileoff = m_data.GetAddress(&offset); 1671 load_cmd.filesize = m_data.GetAddress(&offset); 1672 if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4)) 1673 return; 1674 1675 SanitizeSegmentCommand(load_cmd, cmd_idx); 1676 1677 const uint32_t segment_permissions = GetSegmentPermissions(load_cmd); 1678 const bool segment_is_encrypted = 1679 (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0; 1680 1681 // Use a segment ID of the segment index shifted left by 8 so they never 1682 // conflict with any of the sections. 1683 SectionSP segment_sp; 1684 if (add_section && (const_segname || is_core)) { 1685 segment_sp = std::make_shared<Section>( 1686 module_sp, // Module to which this section belongs 1687 this, // Object file to which this sections belongs 1688 ++context.NextSegmentIdx 1689 << 8, // Section ID is the 1 based segment index 1690 // shifted right by 8 bits as not to collide with any of the 256 1691 // section IDs that are possible 1692 const_segname, // Name of this section 1693 eSectionTypeContainer, // This section is a container of other 1694 // sections. 1695 load_cmd.vmaddr, // File VM address == addresses as they are 1696 // found in the object file 1697 load_cmd.vmsize, // VM size in bytes of this section 1698 load_cmd.fileoff, // Offset to the data for this section in 1699 // the file 1700 load_cmd.filesize, // Size in bytes of this section as found 1701 // in the file 1702 0, // Segments have no alignment information 1703 load_cmd.flags); // Flags for this section 1704 1705 segment_sp->SetIsEncrypted(segment_is_encrypted); 1706 m_sections_up->AddSection(segment_sp); 1707 segment_sp->SetPermissions(segment_permissions); 1708 if (add_to_unified) 1709 context.UnifiedList.AddSection(segment_sp); 1710 } else if (unified_section_sp) { 1711 // If this is a dSYM and the file addresses in the dSYM differ from the 1712 // file addresses in the ObjectFile, we must use the file base address for 1713 // the Section from the dSYM for the DWARF to resolve correctly. 1714 // This only happens with binaries in the shared cache in practice; 1715 // normally a mismatch like this would give a binary & dSYM that do not 1716 // match UUIDs. When a binary is included in the shared cache, its 1717 // segments are rearranged to optimize the shared cache, so its file 1718 // addresses will differ from what the ObjectFile had originally, 1719 // and what the dSYM has. 1720 if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) { 1721 Log *log = GetLog(LLDBLog::Symbols); 1722 if (log) { 1723 log->Printf( 1724 "Installing dSYM's %s segment file address over ObjectFile's " 1725 "so symbol table/debug info resolves correctly for %s", 1726 const_segname.AsCString(), 1727 module_sp->GetFileSpec().GetFilename().AsCString()); 1728 } 1729 1730 // Make sure we've parsed the symbol table from the ObjectFile before 1731 // we go around changing its Sections. 1732 module_sp->GetObjectFile()->GetSymtab(); 1733 // eh_frame would present the same problems but we parse that on a per- 1734 // function basis as-needed so it's more difficult to remove its use of 1735 // the Sections. Realistically, the environments where this code path 1736 // will be taken will not have eh_frame sections. 1737 1738 unified_section_sp->SetFileAddress(load_cmd.vmaddr); 1739 1740 // Notify the module that the section addresses have been changed once 1741 // we're done so any file-address caches can be updated. 1742 context.FileAddressesChanged = true; 1743 } 1744 m_sections_up->AddSection(unified_section_sp); 1745 } 1746 1747 llvm::MachO::section_64 sect64; 1748 ::memset(§64, 0, sizeof(sect64)); 1749 // Push a section into our mach sections for the section at index zero 1750 // (NO_SECT) if we don't have any mach sections yet... 1751 if (m_mach_sections.empty()) 1752 m_mach_sections.push_back(sect64); 1753 uint32_t segment_sect_idx; 1754 const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1; 1755 1756 const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8; 1757 for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects; 1758 ++segment_sect_idx) { 1759 if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname, 1760 sizeof(sect64.sectname)) == nullptr) 1761 break; 1762 if (m_data.GetU8(&offset, (uint8_t *)sect64.segname, 1763 sizeof(sect64.segname)) == nullptr) 1764 break; 1765 sect64.addr = m_data.GetAddress(&offset); 1766 sect64.size = m_data.GetAddress(&offset); 1767 1768 if (m_data.GetU32(&offset, §64.offset, num_u32s) == nullptr) 1769 break; 1770 1771 if (IsSharedCacheBinary() && !IsInMemory()) { 1772 sect64.offset = sect64.addr - m_text_address; 1773 } 1774 1775 // Keep a list of mach sections around in case we need to get at data that 1776 // isn't stored in the abstracted Sections. 1777 m_mach_sections.push_back(sect64); 1778 1779 if (add_section) { 1780 ConstString section_name( 1781 sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname))); 1782 if (!const_segname) { 1783 // We have a segment with no name so we need to conjure up segments 1784 // that correspond to the section's segname if there isn't already such 1785 // a section. If there is such a section, we resize the section so that 1786 // it spans all sections. We also mark these sections as fake so 1787 // address matches don't hit if they land in the gaps between the child 1788 // sections. 1789 const_segname.SetTrimmedCStringWithLength(sect64.segname, 1790 sizeof(sect64.segname)); 1791 segment_sp = context.UnifiedList.FindSectionByName(const_segname); 1792 if (segment_sp.get()) { 1793 Section *segment = segment_sp.get(); 1794 // Grow the section size as needed. 1795 const lldb::addr_t sect64_min_addr = sect64.addr; 1796 const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size; 1797 const lldb::addr_t curr_seg_byte_size = segment->GetByteSize(); 1798 const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress(); 1799 const lldb::addr_t curr_seg_max_addr = 1800 curr_seg_min_addr + curr_seg_byte_size; 1801 if (sect64_min_addr >= curr_seg_min_addr) { 1802 const lldb::addr_t new_seg_byte_size = 1803 sect64_max_addr - curr_seg_min_addr; 1804 // Only grow the section size if needed 1805 if (new_seg_byte_size > curr_seg_byte_size) 1806 segment->SetByteSize(new_seg_byte_size); 1807 } else { 1808 // We need to change the base address of the segment and adjust the 1809 // child section offsets for all existing children. 1810 const lldb::addr_t slide_amount = 1811 sect64_min_addr - curr_seg_min_addr; 1812 segment->Slide(slide_amount, false); 1813 segment->GetChildren().Slide(-slide_amount, false); 1814 segment->SetByteSize(curr_seg_max_addr - sect64_min_addr); 1815 } 1816 1817 // Grow the section size as needed. 1818 if (sect64.offset) { 1819 const lldb::addr_t segment_min_file_offset = 1820 segment->GetFileOffset(); 1821 const lldb::addr_t segment_max_file_offset = 1822 segment_min_file_offset + segment->GetFileSize(); 1823 1824 const lldb::addr_t section_min_file_offset = sect64.offset; 1825 const lldb::addr_t section_max_file_offset = 1826 section_min_file_offset + sect64.size; 1827 const lldb::addr_t new_file_offset = 1828 std::min(section_min_file_offset, segment_min_file_offset); 1829 const lldb::addr_t new_file_size = 1830 std::max(section_max_file_offset, segment_max_file_offset) - 1831 new_file_offset; 1832 segment->SetFileOffset(new_file_offset); 1833 segment->SetFileSize(new_file_size); 1834 } 1835 } else { 1836 // Create a fake section for the section's named segment 1837 segment_sp = std::make_shared<Section>( 1838 segment_sp, // Parent section 1839 module_sp, // Module to which this section belongs 1840 this, // Object file to which this section belongs 1841 ++context.NextSegmentIdx 1842 << 8, // Section ID is the 1 based segment index 1843 // shifted right by 8 bits as not to 1844 // collide with any of the 256 section IDs 1845 // that are possible 1846 const_segname, // Name of this section 1847 eSectionTypeContainer, // This section is a container of 1848 // other sections. 1849 sect64.addr, // File VM address == addresses as they are 1850 // found in the object file 1851 sect64.size, // VM size in bytes of this section 1852 sect64.offset, // Offset to the data for this section in 1853 // the file 1854 sect64.offset ? sect64.size : 0, // Size in bytes of 1855 // this section as 1856 // found in the file 1857 sect64.align, 1858 load_cmd.flags); // Flags for this section 1859 segment_sp->SetIsFake(true); 1860 segment_sp->SetPermissions(segment_permissions); 1861 m_sections_up->AddSection(segment_sp); 1862 if (add_to_unified) 1863 context.UnifiedList.AddSection(segment_sp); 1864 segment_sp->SetIsEncrypted(segment_is_encrypted); 1865 } 1866 } 1867 assert(segment_sp.get()); 1868 1869 lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name); 1870 1871 SectionSP section_sp(new Section( 1872 segment_sp, module_sp, this, ++context.NextSectionIdx, section_name, 1873 sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size, 1874 sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align, 1875 sect64.flags)); 1876 // Set the section to be encrypted to match the segment 1877 1878 bool section_is_encrypted = false; 1879 if (!segment_is_encrypted && load_cmd.filesize != 0) 1880 section_is_encrypted = context.EncryptedRanges.FindEntryThatContains( 1881 sect64.offset) != nullptr; 1882 1883 section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted); 1884 section_sp->SetPermissions(segment_permissions); 1885 segment_sp->GetChildren().AddSection(section_sp); 1886 1887 if (segment_sp->IsFake()) { 1888 segment_sp.reset(); 1889 const_segname.Clear(); 1890 } 1891 } 1892 } 1893 if (segment_sp && is_dsym) { 1894 if (first_segment_sectID <= context.NextSectionIdx) { 1895 lldb::user_id_t sect_uid; 1896 for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx; 1897 ++sect_uid) { 1898 SectionSP curr_section_sp( 1899 segment_sp->GetChildren().FindSectionByID(sect_uid)); 1900 SectionSP next_section_sp; 1901 if (sect_uid + 1 <= context.NextSectionIdx) 1902 next_section_sp = 1903 segment_sp->GetChildren().FindSectionByID(sect_uid + 1); 1904 1905 if (curr_section_sp.get()) { 1906 if (curr_section_sp->GetByteSize() == 0) { 1907 if (next_section_sp.get() != nullptr) 1908 curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() - 1909 curr_section_sp->GetFileAddress()); 1910 else 1911 curr_section_sp->SetByteSize(load_cmd.vmsize); 1912 } 1913 } 1914 } 1915 } 1916 } 1917 } 1918 1919 void ObjectFileMachO::ProcessDysymtabCommand( 1920 const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) { 1921 m_dysymtab.cmd = load_cmd.cmd; 1922 m_dysymtab.cmdsize = load_cmd.cmdsize; 1923 m_data.GetU32(&offset, &m_dysymtab.ilocalsym, 1924 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2); 1925 } 1926 1927 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) { 1928 if (m_sections_up) 1929 return; 1930 1931 m_sections_up = std::make_unique<SectionList>(); 1932 1933 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1934 // bool dump_sections = false; 1935 ModuleSP module_sp(GetModule()); 1936 1937 offset = MachHeaderSizeFromMagic(m_header.magic); 1938 1939 SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list); 1940 llvm::MachO::load_command load_cmd; 1941 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1942 const lldb::offset_t load_cmd_offset = offset; 1943 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 1944 break; 1945 1946 if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64) 1947 ProcessSegmentCommand(load_cmd, offset, i, context); 1948 else if (load_cmd.cmd == LC_DYSYMTAB) 1949 ProcessDysymtabCommand(load_cmd, offset); 1950 1951 offset = load_cmd_offset + load_cmd.cmdsize; 1952 } 1953 1954 if (context.FileAddressesChanged && module_sp) 1955 module_sp->SectionFileAddressesChanged(); 1956 } 1957 1958 class MachSymtabSectionInfo { 1959 public: 1960 MachSymtabSectionInfo(SectionList *section_list) 1961 : m_section_list(section_list), m_section_infos() { 1962 // Get the number of sections down to a depth of 1 to include all segments 1963 // and their sections, but no other sections that may be added for debug 1964 // map or 1965 m_section_infos.resize(section_list->GetNumSections(1)); 1966 } 1967 1968 SectionSP GetSection(uint8_t n_sect, addr_t file_addr) { 1969 if (n_sect == 0) 1970 return SectionSP(); 1971 if (n_sect < m_section_infos.size()) { 1972 if (!m_section_infos[n_sect].section_sp) { 1973 SectionSP section_sp(m_section_list->FindSectionByID(n_sect)); 1974 m_section_infos[n_sect].section_sp = section_sp; 1975 if (section_sp) { 1976 m_section_infos[n_sect].vm_range.SetBaseAddress( 1977 section_sp->GetFileAddress()); 1978 m_section_infos[n_sect].vm_range.SetByteSize( 1979 section_sp->GetByteSize()); 1980 } else { 1981 std::string filename = "<unknown>"; 1982 SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0)); 1983 if (first_section_sp) 1984 filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath(); 1985 1986 Debugger::ReportError( 1987 llvm::formatv("unable to find section {0} for a symbol in " 1988 "{1}, corrupt file?", 1989 n_sect, filename)); 1990 } 1991 } 1992 if (m_section_infos[n_sect].vm_range.Contains(file_addr)) { 1993 // Symbol is in section. 1994 return m_section_infos[n_sect].section_sp; 1995 } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 && 1996 m_section_infos[n_sect].vm_range.GetBaseAddress() == 1997 file_addr) { 1998 // Symbol is in section with zero size, but has the same start address 1999 // as the section. This can happen with linker symbols (symbols that 2000 // start with the letter 'l' or 'L'. 2001 return m_section_infos[n_sect].section_sp; 2002 } 2003 } 2004 return m_section_list->FindSectionContainingFileAddress(file_addr); 2005 } 2006 2007 protected: 2008 struct SectionInfo { 2009 SectionInfo() : vm_range(), section_sp() {} 2010 2011 VMRange vm_range; 2012 SectionSP section_sp; 2013 }; 2014 SectionList *m_section_list; 2015 std::vector<SectionInfo> m_section_infos; 2016 }; 2017 2018 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63) 2019 struct TrieEntry { 2020 void Dump() const { 2021 printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"", 2022 static_cast<unsigned long long>(address), 2023 static_cast<unsigned long long>(flags), 2024 static_cast<unsigned long long>(other), name.GetCString()); 2025 if (import_name) 2026 printf(" -> \"%s\"\n", import_name.GetCString()); 2027 else 2028 printf("\n"); 2029 } 2030 ConstString name; 2031 uint64_t address = LLDB_INVALID_ADDRESS; 2032 uint64_t flags = 2033 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, 2034 // TRIE_SYMBOL_IS_THUMB 2035 uint64_t other = 0; 2036 ConstString import_name; 2037 }; 2038 2039 struct TrieEntryWithOffset { 2040 lldb::offset_t nodeOffset; 2041 TrieEntry entry; 2042 2043 TrieEntryWithOffset(lldb::offset_t offset) : nodeOffset(offset), entry() {} 2044 2045 void Dump(uint32_t idx) const { 2046 printf("[%3u] 0x%16.16llx: ", idx, 2047 static_cast<unsigned long long>(nodeOffset)); 2048 entry.Dump(); 2049 } 2050 2051 bool operator<(const TrieEntryWithOffset &other) const { 2052 return (nodeOffset < other.nodeOffset); 2053 } 2054 }; 2055 2056 static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset, 2057 const bool is_arm, addr_t text_seg_base_addr, 2058 std::vector<llvm::StringRef> &nameSlices, 2059 std::set<lldb::addr_t> &resolver_addresses, 2060 std::vector<TrieEntryWithOffset> &reexports, 2061 std::vector<TrieEntryWithOffset> &ext_symbols) { 2062 if (!data.ValidOffset(offset)) 2063 return true; 2064 2065 // Terminal node -- end of a branch, possibly add this to 2066 // the symbol table or resolver table. 2067 const uint64_t terminalSize = data.GetULEB128(&offset); 2068 lldb::offset_t children_offset = offset + terminalSize; 2069 if (terminalSize != 0) { 2070 TrieEntryWithOffset e(offset); 2071 e.entry.flags = data.GetULEB128(&offset); 2072 const char *import_name = nullptr; 2073 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 2074 e.entry.address = 0; 2075 e.entry.other = data.GetULEB128(&offset); // dylib ordinal 2076 import_name = data.GetCStr(&offset); 2077 } else { 2078 e.entry.address = data.GetULEB128(&offset); 2079 if (text_seg_base_addr != LLDB_INVALID_ADDRESS) 2080 e.entry.address += text_seg_base_addr; 2081 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 2082 e.entry.other = data.GetULEB128(&offset); 2083 uint64_t resolver_addr = e.entry.other; 2084 if (text_seg_base_addr != LLDB_INVALID_ADDRESS) 2085 resolver_addr += text_seg_base_addr; 2086 if (is_arm) 2087 resolver_addr &= THUMB_ADDRESS_BIT_MASK; 2088 resolver_addresses.insert(resolver_addr); 2089 } else 2090 e.entry.other = 0; 2091 } 2092 bool add_this_entry = false; 2093 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) && 2094 import_name && import_name[0]) { 2095 // add symbols that are reexport symbols with a valid import name. 2096 add_this_entry = true; 2097 } else if (e.entry.flags == 0 && 2098 (import_name == nullptr || import_name[0] == '\0')) { 2099 // add externally visible symbols, in case the nlist record has 2100 // been stripped/omitted. 2101 add_this_entry = true; 2102 } 2103 if (add_this_entry) { 2104 std::string name; 2105 if (!nameSlices.empty()) { 2106 for (auto name_slice : nameSlices) 2107 name.append(name_slice.data(), name_slice.size()); 2108 } 2109 if (name.size() > 1) { 2110 // Skip the leading '_' 2111 e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1); 2112 } 2113 if (import_name) { 2114 // Skip the leading '_' 2115 e.entry.import_name.SetCString(import_name + 1); 2116 } 2117 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) { 2118 reexports.push_back(e); 2119 } else { 2120 if (is_arm && (e.entry.address & 1)) { 2121 e.entry.flags |= TRIE_SYMBOL_IS_THUMB; 2122 e.entry.address &= THUMB_ADDRESS_BIT_MASK; 2123 } 2124 ext_symbols.push_back(e); 2125 } 2126 } 2127 } 2128 2129 const uint8_t childrenCount = data.GetU8(&children_offset); 2130 for (uint8_t i = 0; i < childrenCount; ++i) { 2131 const char *cstr = data.GetCStr(&children_offset); 2132 if (cstr) 2133 nameSlices.push_back(llvm::StringRef(cstr)); 2134 else 2135 return false; // Corrupt data 2136 lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset); 2137 if (childNodeOffset) { 2138 if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr, 2139 nameSlices, resolver_addresses, reexports, 2140 ext_symbols)) { 2141 return false; 2142 } 2143 } 2144 nameSlices.pop_back(); 2145 } 2146 return true; 2147 } 2148 2149 static SymbolType GetSymbolType(const char *&symbol_name, 2150 bool &demangled_is_synthesized, 2151 const SectionSP &text_section_sp, 2152 const SectionSP &data_section_sp, 2153 const SectionSP &data_dirty_section_sp, 2154 const SectionSP &data_const_section_sp, 2155 const SectionSP &symbol_section) { 2156 SymbolType type = eSymbolTypeInvalid; 2157 2158 const char *symbol_sect_name = symbol_section->GetName().AsCString(); 2159 if (symbol_section->IsDescendant(text_section_sp.get())) { 2160 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS | 2161 S_ATTR_SELF_MODIFYING_CODE | 2162 S_ATTR_SOME_INSTRUCTIONS)) 2163 type = eSymbolTypeData; 2164 else 2165 type = eSymbolTypeCode; 2166 } else if (symbol_section->IsDescendant(data_section_sp.get()) || 2167 symbol_section->IsDescendant(data_dirty_section_sp.get()) || 2168 symbol_section->IsDescendant(data_const_section_sp.get())) { 2169 if (symbol_sect_name && 2170 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) { 2171 type = eSymbolTypeRuntime; 2172 2173 if (symbol_name) { 2174 llvm::StringRef symbol_name_ref(symbol_name); 2175 if (symbol_name_ref.starts_with("OBJC_")) { 2176 static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_"); 2177 static const llvm::StringRef g_objc_v2_prefix_metaclass( 2178 "OBJC_METACLASS_$_"); 2179 static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_"); 2180 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 2181 symbol_name = symbol_name + g_objc_v2_prefix_class.size(); 2182 type = eSymbolTypeObjCClass; 2183 demangled_is_synthesized = true; 2184 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_metaclass)) { 2185 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size(); 2186 type = eSymbolTypeObjCMetaClass; 2187 demangled_is_synthesized = true; 2188 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) { 2189 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size(); 2190 type = eSymbolTypeObjCIVar; 2191 demangled_is_synthesized = true; 2192 } 2193 } 2194 } 2195 } else if (symbol_sect_name && 2196 ::strstr(symbol_sect_name, "__gcc_except_tab") == 2197 symbol_sect_name) { 2198 type = eSymbolTypeException; 2199 } else { 2200 type = eSymbolTypeData; 2201 } 2202 } else if (symbol_sect_name && 2203 ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) { 2204 type = eSymbolTypeTrampoline; 2205 } 2206 return type; 2207 } 2208 2209 static std::optional<struct nlist_64> 2210 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset, 2211 size_t nlist_byte_size) { 2212 struct nlist_64 nlist; 2213 if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size)) 2214 return {}; 2215 nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset); 2216 nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset); 2217 nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset); 2218 nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset); 2219 nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset); 2220 return nlist; 2221 } 2222 2223 enum { DebugSymbols = true, NonDebugSymbols = false }; 2224 2225 void ObjectFileMachO::ParseSymtab(Symtab &symtab) { 2226 ModuleSP module_sp(GetModule()); 2227 if (!module_sp) 2228 return; 2229 2230 Log *log = GetLog(LLDBLog::Symbols); 2231 2232 const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec(); 2233 const char *file_name = file.GetFilename().AsCString("<Unknown>"); 2234 LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name); 2235 LLDB_LOG(log, "Parsing symbol table for {0}", file_name); 2236 Progress progress("Parsing symbol table", file_name); 2237 2238 llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0}; 2239 llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0}; 2240 llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0}; 2241 llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 2242 llvm::MachO::dysymtab_command dysymtab = m_dysymtab; 2243 // The data element of type bool indicates that this entry is thumb 2244 // code. 2245 typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts; 2246 2247 // Record the address of every function/data that we add to the symtab. 2248 // We add symbols to the table in the order of most information (nlist 2249 // records) to least (function starts), and avoid duplicating symbols 2250 // via this set. 2251 llvm::DenseSet<addr_t> symbols_added; 2252 2253 // We are using a llvm::DenseSet for "symbols_added" so we must be sure we 2254 // do not add the tombstone or empty keys to the set. 2255 auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) { 2256 // Don't add the tombstone or empty keys. 2257 if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1) 2258 return; 2259 symbols_added.insert(file_addr); 2260 }; 2261 FunctionStarts function_starts; 2262 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 2263 uint32_t i; 2264 FileSpecList dylib_files; 2265 llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_"); 2266 llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_"); 2267 llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_"); 2268 UUID image_uuid; 2269 2270 for (i = 0; i < m_header.ncmds; ++i) { 2271 const lldb::offset_t cmd_offset = offset; 2272 // Read in the load command and load command size 2273 llvm::MachO::load_command lc; 2274 if (m_data.GetU32(&offset, &lc, 2) == nullptr) 2275 break; 2276 // Watch for the symbol table load command 2277 switch (lc.cmd) { 2278 case LC_SYMTAB: 2279 symtab_load_command.cmd = lc.cmd; 2280 symtab_load_command.cmdsize = lc.cmdsize; 2281 // Read in the rest of the symtab load command 2282 if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) == 2283 nullptr) // fill in symoff, nsyms, stroff, strsize fields 2284 return; 2285 break; 2286 2287 case LC_DYLD_INFO: 2288 case LC_DYLD_INFO_ONLY: 2289 if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) { 2290 dyld_info.cmd = lc.cmd; 2291 dyld_info.cmdsize = lc.cmdsize; 2292 } else { 2293 memset(&dyld_info, 0, sizeof(dyld_info)); 2294 } 2295 break; 2296 2297 case LC_LOAD_DYLIB: 2298 case LC_LOAD_WEAK_DYLIB: 2299 case LC_REEXPORT_DYLIB: 2300 case LC_LOADFVMLIB: 2301 case LC_LOAD_UPWARD_DYLIB: { 2302 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset); 2303 const char *path = m_data.PeekCStr(name_offset); 2304 if (path) { 2305 FileSpec file_spec(path); 2306 // Strip the path if there is @rpath, @executable, etc so we just use 2307 // the basename 2308 if (path[0] == '@') 2309 file_spec.ClearDirectory(); 2310 2311 if (lc.cmd == LC_REEXPORT_DYLIB) { 2312 m_reexported_dylibs.AppendIfUnique(file_spec); 2313 } 2314 2315 dylib_files.Append(file_spec); 2316 } 2317 } break; 2318 2319 case LC_DYLD_EXPORTS_TRIE: 2320 exports_trie_load_command.cmd = lc.cmd; 2321 exports_trie_load_command.cmdsize = lc.cmdsize; 2322 if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) == 2323 nullptr) // fill in offset and size fields 2324 memset(&exports_trie_load_command, 0, 2325 sizeof(exports_trie_load_command)); 2326 break; 2327 case LC_FUNCTION_STARTS: 2328 function_starts_load_command.cmd = lc.cmd; 2329 function_starts_load_command.cmdsize = lc.cmdsize; 2330 if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) == 2331 nullptr) // fill in data offset and size fields 2332 memset(&function_starts_load_command, 0, 2333 sizeof(function_starts_load_command)); 2334 break; 2335 2336 case LC_UUID: { 2337 const uint8_t *uuid_bytes = m_data.PeekData(offset, 16); 2338 2339 if (uuid_bytes) 2340 image_uuid = UUID(uuid_bytes, 16); 2341 break; 2342 } 2343 2344 default: 2345 break; 2346 } 2347 offset = cmd_offset + lc.cmdsize; 2348 } 2349 2350 if (!symtab_load_command.cmd) 2351 return; 2352 2353 SectionList *section_list = GetSectionList(); 2354 if (section_list == nullptr) 2355 return; 2356 2357 const uint32_t addr_byte_size = m_data.GetAddressByteSize(); 2358 const ByteOrder byte_order = m_data.GetByteOrder(); 2359 bool bit_width_32 = addr_byte_size == 4; 2360 const size_t nlist_byte_size = 2361 bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64); 2362 2363 DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size); 2364 DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size); 2365 DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size); 2366 DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order, 2367 addr_byte_size); 2368 DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size); 2369 2370 const addr_t nlist_data_byte_size = 2371 symtab_load_command.nsyms * nlist_byte_size; 2372 const addr_t strtab_data_byte_size = symtab_load_command.strsize; 2373 addr_t strtab_addr = LLDB_INVALID_ADDRESS; 2374 2375 ProcessSP process_sp(m_process_wp.lock()); 2376 Process *process = process_sp.get(); 2377 2378 uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete; 2379 bool is_shared_cache_image = IsSharedCacheBinary(); 2380 bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory(); 2381 SectionSP linkedit_section_sp( 2382 section_list->FindSectionByName(GetSegmentNameLINKEDIT())); 2383 2384 if (process && m_header.filetype != llvm::MachO::MH_OBJECT && 2385 !is_local_shared_cache_image) { 2386 Target &target = process->GetTarget(); 2387 2388 memory_module_load_level = target.GetMemoryModuleLoadLevel(); 2389 2390 // Reading mach file from memory in a process or core file... 2391 2392 if (linkedit_section_sp) { 2393 addr_t linkedit_load_addr = 2394 linkedit_section_sp->GetLoadBaseAddress(&target); 2395 if (linkedit_load_addr == LLDB_INVALID_ADDRESS) { 2396 // We might be trying to access the symbol table before the 2397 // __LINKEDIT's load address has been set in the target. We can't 2398 // fail to read the symbol table, so calculate the right address 2399 // manually 2400 linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage( 2401 m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get()); 2402 } 2403 2404 const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset(); 2405 const addr_t symoff_addr = linkedit_load_addr + 2406 symtab_load_command.symoff - 2407 linkedit_file_offset; 2408 strtab_addr = linkedit_load_addr + symtab_load_command.stroff - 2409 linkedit_file_offset; 2410 2411 // Always load dyld - the dynamic linker - from memory if we didn't 2412 // find a binary anywhere else. lldb will not register 2413 // dylib/framework/bundle loads/unloads if we don't have the dyld 2414 // symbols, we force dyld to load from memory despite the user's 2415 // target.memory-module-load-level setting. 2416 if (memory_module_load_level == eMemoryModuleLoadLevelComplete || 2417 m_header.filetype == llvm::MachO::MH_DYLINKER) { 2418 DataBufferSP nlist_data_sp( 2419 ReadMemory(process_sp, symoff_addr, nlist_data_byte_size)); 2420 if (nlist_data_sp) 2421 nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize()); 2422 if (dysymtab.nindirectsyms != 0) { 2423 const addr_t indirect_syms_addr = linkedit_load_addr + 2424 dysymtab.indirectsymoff - 2425 linkedit_file_offset; 2426 DataBufferSP indirect_syms_data_sp(ReadMemory( 2427 process_sp, indirect_syms_addr, dysymtab.nindirectsyms * 4)); 2428 if (indirect_syms_data_sp) 2429 indirect_symbol_index_data.SetData( 2430 indirect_syms_data_sp, 0, 2431 indirect_syms_data_sp->GetByteSize()); 2432 // If this binary is outside the shared cache, 2433 // cache the string table. 2434 // Binaries in the shared cache all share a giant string table, 2435 // and we can't share the string tables across multiple 2436 // ObjectFileMachO's, so we'd end up re-reading this mega-strtab 2437 // for every binary in the shared cache - it would be a big perf 2438 // problem. For binaries outside the shared cache, it's faster to 2439 // read the entire strtab at once instead of piece-by-piece as we 2440 // process the nlist records. 2441 if (!is_shared_cache_image) { 2442 DataBufferSP strtab_data_sp( 2443 ReadMemory(process_sp, strtab_addr, strtab_data_byte_size)); 2444 if (strtab_data_sp) { 2445 strtab_data.SetData(strtab_data_sp, 0, 2446 strtab_data_sp->GetByteSize()); 2447 } 2448 } 2449 } 2450 if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) { 2451 if (function_starts_load_command.cmd) { 2452 const addr_t func_start_addr = 2453 linkedit_load_addr + function_starts_load_command.dataoff - 2454 linkedit_file_offset; 2455 DataBufferSP func_start_data_sp( 2456 ReadMemory(process_sp, func_start_addr, 2457 function_starts_load_command.datasize)); 2458 if (func_start_data_sp) 2459 function_starts_data.SetData(func_start_data_sp, 0, 2460 func_start_data_sp->GetByteSize()); 2461 } 2462 } 2463 } 2464 } 2465 } else { 2466 if (is_local_shared_cache_image) { 2467 // The load commands in shared cache images are relative to the 2468 // beginning of the shared cache, not the library image. The 2469 // data we get handed when creating the ObjectFileMachO starts 2470 // at the beginning of a specific library and spans to the end 2471 // of the cache to be able to reach the shared LINKEDIT 2472 // segments. We need to convert the load command offsets to be 2473 // relative to the beginning of our specific image. 2474 lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset(); 2475 lldb::offset_t linkedit_slide = 2476 linkedit_offset - m_linkedit_original_offset; 2477 symtab_load_command.symoff += linkedit_slide; 2478 symtab_load_command.stroff += linkedit_slide; 2479 dyld_info.export_off += linkedit_slide; 2480 dysymtab.indirectsymoff += linkedit_slide; 2481 function_starts_load_command.dataoff += linkedit_slide; 2482 exports_trie_load_command.dataoff += linkedit_slide; 2483 } 2484 2485 nlist_data.SetData(m_data, symtab_load_command.symoff, 2486 nlist_data_byte_size); 2487 strtab_data.SetData(m_data, symtab_load_command.stroff, 2488 strtab_data_byte_size); 2489 2490 // We shouldn't have exports data from both the LC_DYLD_INFO command 2491 // AND the LC_DYLD_EXPORTS_TRIE command in the same binary: 2492 lldbassert(!((dyld_info.export_size > 0) 2493 && (exports_trie_load_command.datasize > 0))); 2494 if (dyld_info.export_size > 0) { 2495 dyld_trie_data.SetData(m_data, dyld_info.export_off, 2496 dyld_info.export_size); 2497 } else if (exports_trie_load_command.datasize > 0) { 2498 dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff, 2499 exports_trie_load_command.datasize); 2500 } 2501 2502 if (dysymtab.nindirectsyms != 0) { 2503 indirect_symbol_index_data.SetData(m_data, dysymtab.indirectsymoff, 2504 dysymtab.nindirectsyms * 4); 2505 } 2506 if (function_starts_load_command.cmd) { 2507 function_starts_data.SetData(m_data, function_starts_load_command.dataoff, 2508 function_starts_load_command.datasize); 2509 } 2510 } 2511 2512 const bool have_strtab_data = strtab_data.GetByteSize() > 0; 2513 2514 ConstString g_segment_name_TEXT = GetSegmentNameTEXT(); 2515 ConstString g_segment_name_DATA = GetSegmentNameDATA(); 2516 ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY(); 2517 ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST(); 2518 ConstString g_segment_name_OBJC = GetSegmentNameOBJC(); 2519 ConstString g_section_name_eh_frame = GetSectionNameEHFrame(); 2520 SectionSP text_section_sp( 2521 section_list->FindSectionByName(g_segment_name_TEXT)); 2522 SectionSP data_section_sp( 2523 section_list->FindSectionByName(g_segment_name_DATA)); 2524 SectionSP data_dirty_section_sp( 2525 section_list->FindSectionByName(g_segment_name_DATA_DIRTY)); 2526 SectionSP data_const_section_sp( 2527 section_list->FindSectionByName(g_segment_name_DATA_CONST)); 2528 SectionSP objc_section_sp( 2529 section_list->FindSectionByName(g_segment_name_OBJC)); 2530 SectionSP eh_frame_section_sp; 2531 if (text_section_sp.get()) 2532 eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName( 2533 g_section_name_eh_frame); 2534 else 2535 eh_frame_section_sp = 2536 section_list->FindSectionByName(g_section_name_eh_frame); 2537 2538 const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM); 2539 const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions(); 2540 2541 // lldb works best if it knows the start address of all functions in a 2542 // module. Linker symbols or debug info are normally the best source of 2543 // information for start addr / size but they may be stripped in a released 2544 // binary. Two additional sources of information exist in Mach-O binaries: 2545 // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each 2546 // function's start address in the 2547 // binary, relative to the text section. 2548 // eh_frame - the eh_frame FDEs have the start addr & size of 2549 // each function 2550 // LC_FUNCTION_STARTS is the fastest source to read in, and is present on 2551 // all modern binaries. 2552 // Binaries built to run on older releases may need to use eh_frame 2553 // information. 2554 2555 if (text_section_sp && function_starts_data.GetByteSize()) { 2556 FunctionStarts::Entry function_start_entry; 2557 function_start_entry.data = false; 2558 lldb::offset_t function_start_offset = 0; 2559 function_start_entry.addr = text_section_sp->GetFileAddress(); 2560 uint64_t delta; 2561 while ((delta = function_starts_data.GetULEB128(&function_start_offset)) > 2562 0) { 2563 // Now append the current entry 2564 function_start_entry.addr += delta; 2565 if (is_arm) { 2566 if (function_start_entry.addr & 1) { 2567 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; 2568 function_start_entry.data = true; 2569 } else if (always_thumb) { 2570 function_start_entry.data = true; 2571 } 2572 } 2573 function_starts.Append(function_start_entry); 2574 } 2575 } else { 2576 // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the 2577 // load command claiming an eh_frame but it doesn't actually have the 2578 // eh_frame content. And if we have a dSYM, we don't need to do any of 2579 // this fill-in-the-missing-symbols works anyway - the debug info should 2580 // give us all the functions in the module. 2581 if (text_section_sp.get() && eh_frame_section_sp.get() && 2582 m_type != eTypeDebugInfo) { 2583 DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp, 2584 DWARFCallFrameInfo::EH); 2585 DWARFCallFrameInfo::FunctionAddressAndSizeVector functions; 2586 eh_frame.GetFunctionAddressAndSizeVector(functions); 2587 addr_t text_base_addr = text_section_sp->GetFileAddress(); 2588 size_t count = functions.GetSize(); 2589 for (size_t i = 0; i < count; ++i) { 2590 const DWARFCallFrameInfo::FunctionAddressAndSizeVector::Entry *func = 2591 functions.GetEntryAtIndex(i); 2592 if (func) { 2593 FunctionStarts::Entry function_start_entry; 2594 function_start_entry.addr = func->base - text_base_addr; 2595 if (is_arm) { 2596 if (function_start_entry.addr & 1) { 2597 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; 2598 function_start_entry.data = true; 2599 } else if (always_thumb) { 2600 function_start_entry.data = true; 2601 } 2602 } 2603 function_starts.Append(function_start_entry); 2604 } 2605 } 2606 } 2607 } 2608 2609 const size_t function_starts_count = function_starts.GetSize(); 2610 2611 // For user process binaries (executables, dylibs, frameworks, bundles), if 2612 // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're 2613 // going to assume the binary has been stripped. Don't allow assembly 2614 // language instruction emulation because we don't know proper function 2615 // start boundaries. 2616 // 2617 // For all other types of binaries (kernels, stand-alone bare board 2618 // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame 2619 // sections - we should not make any assumptions about them based on that. 2620 if (function_starts_count == 0 && CalculateStrata() == eStrataUser) { 2621 m_allow_assembly_emulation_unwind_plans = false; 2622 Log *unwind_or_symbol_log(GetLog(LLDBLog::Symbols | LLDBLog::Unwind)); 2623 2624 if (unwind_or_symbol_log) 2625 module_sp->LogMessage( 2626 unwind_or_symbol_log, 2627 "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds"); 2628 } 2629 2630 const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get() 2631 ? eh_frame_section_sp->GetID() 2632 : static_cast<user_id_t>(NO_SECT); 2633 2634 uint32_t N_SO_index = UINT32_MAX; 2635 2636 MachSymtabSectionInfo section_info(section_list); 2637 std::vector<uint32_t> N_FUN_indexes; 2638 std::vector<uint32_t> N_NSYM_indexes; 2639 std::vector<uint32_t> N_INCL_indexes; 2640 std::vector<uint32_t> N_BRAC_indexes; 2641 std::vector<uint32_t> N_COMM_indexes; 2642 typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap; 2643 typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap; 2644 typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap; 2645 ValueToSymbolIndexMap N_FUN_addr_to_sym_idx; 2646 ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx; 2647 ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx; 2648 // Any symbols that get merged into another will get an entry in this map 2649 // so we know 2650 NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx; 2651 uint32_t nlist_idx = 0; 2652 Symbol *symbol_ptr = nullptr; 2653 2654 uint32_t sym_idx = 0; 2655 Symbol *sym = nullptr; 2656 size_t num_syms = 0; 2657 std::string memory_symbol_name; 2658 uint32_t unmapped_local_symbols_found = 0; 2659 2660 std::vector<TrieEntryWithOffset> reexport_trie_entries; 2661 std::vector<TrieEntryWithOffset> external_sym_trie_entries; 2662 std::set<lldb::addr_t> resolver_addresses; 2663 2664 const size_t dyld_trie_data_size = dyld_trie_data.GetByteSize(); 2665 if (dyld_trie_data_size > 0) { 2666 LLDB_LOG(log, "Parsing {0} bytes of dyld trie data", dyld_trie_data_size); 2667 SectionSP text_segment_sp = 2668 GetSectionList()->FindSectionByName(GetSegmentNameTEXT()); 2669 lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS; 2670 if (text_segment_sp) 2671 text_segment_file_addr = text_segment_sp->GetFileAddress(); 2672 std::vector<llvm::StringRef> nameSlices; 2673 ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr, 2674 nameSlices, resolver_addresses, reexport_trie_entries, 2675 external_sym_trie_entries); 2676 } 2677 2678 typedef std::set<ConstString> IndirectSymbols; 2679 IndirectSymbols indirect_symbol_names; 2680 2681 #if TARGET_OS_IPHONE 2682 2683 // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been 2684 // optimized by moving LOCAL symbols out of the memory mapped portion of 2685 // the DSC. The symbol information has all been retained, but it isn't 2686 // available in the normal nlist data. However, there *are* duplicate 2687 // entries of *some* 2688 // LOCAL symbols in the normal nlist data. To handle this situation 2689 // correctly, we must first attempt 2690 // to parse any DSC unmapped symbol information. If we find any, we set a 2691 // flag that tells the normal nlist parser to ignore all LOCAL symbols. 2692 2693 if (IsSharedCacheBinary()) { 2694 // Before we can start mapping the DSC, we need to make certain the 2695 // target process is actually using the cache we can find. 2696 2697 // Next we need to determine the correct path for the dyld shared cache. 2698 2699 ArchSpec header_arch = GetArchitecture(); 2700 2701 UUID dsc_uuid; 2702 UUID process_shared_cache_uuid; 2703 addr_t process_shared_cache_base_addr; 2704 2705 if (process) { 2706 GetProcessSharedCacheUUID(process, process_shared_cache_base_addr, 2707 process_shared_cache_uuid); 2708 } 2709 2710 __block bool found_image = false; 2711 __block void *nlist_buffer = nullptr; 2712 __block unsigned nlist_count = 0; 2713 __block char *string_table = nullptr; 2714 __block vm_offset_t vm_nlist_memory = 0; 2715 __block mach_msg_type_number_t vm_nlist_bytes_read = 0; 2716 __block vm_offset_t vm_string_memory = 0; 2717 __block mach_msg_type_number_t vm_string_bytes_read = 0; 2718 2719 auto _ = llvm::make_scope_exit(^{ 2720 if (vm_nlist_memory) 2721 vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read); 2722 if (vm_string_memory) 2723 vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read); 2724 }); 2725 2726 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap; 2727 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName; 2728 UndefinedNameToDescMap undefined_name_to_desc; 2729 SymbolIndexToName reexport_shlib_needs_fixup; 2730 2731 dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) { 2732 uuid_t cache_uuid; 2733 dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid); 2734 if (found_image) 2735 return; 2736 2737 if (process_shared_cache_uuid.IsValid() && 2738 process_shared_cache_uuid != UUID::fromData(&cache_uuid, 16)) 2739 return; 2740 2741 dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) { 2742 uuid_t dsc_image_uuid; 2743 if (found_image) 2744 return; 2745 2746 dyld_image_copy_uuid(image, &dsc_image_uuid); 2747 if (image_uuid != UUID::fromData(dsc_image_uuid, 16)) 2748 return; 2749 2750 found_image = true; 2751 2752 // Compute the size of the string table. We need to ask dyld for a 2753 // new SPI to avoid this step. 2754 dyld_image_local_nlist_content_4Symbolication( 2755 image, ^(const void *nlistStart, uint64_t nlistCount, 2756 const char *stringTable) { 2757 if (!nlistStart || !nlistCount) 2758 return; 2759 2760 // The buffers passed here are valid only inside the block. 2761 // Use vm_read to make a cheap copy of them available for our 2762 // processing later. 2763 kern_return_t ret = 2764 vm_read(mach_task_self(), (vm_address_t)nlistStart, 2765 nlist_byte_size * nlistCount, &vm_nlist_memory, 2766 &vm_nlist_bytes_read); 2767 if (ret != KERN_SUCCESS) 2768 return; 2769 assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount); 2770 2771 // We don't know the size of the string table. It's cheaper 2772 // to map the whole VM region than to determine the size by 2773 // parsing all the nlist entries. 2774 vm_address_t string_address = (vm_address_t)stringTable; 2775 vm_size_t region_size; 2776 mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64; 2777 vm_region_basic_info_data_t info; 2778 memory_object_name_t object; 2779 ret = vm_region_64(mach_task_self(), &string_address, 2780 ®ion_size, VM_REGION_BASIC_INFO_64, 2781 (vm_region_info_t)&info, &info_count, &object); 2782 if (ret != KERN_SUCCESS) 2783 return; 2784 2785 ret = vm_read(mach_task_self(), (vm_address_t)stringTable, 2786 region_size - 2787 ((vm_address_t)stringTable - string_address), 2788 &vm_string_memory, &vm_string_bytes_read); 2789 if (ret != KERN_SUCCESS) 2790 return; 2791 2792 nlist_buffer = (void *)vm_nlist_memory; 2793 string_table = (char *)vm_string_memory; 2794 nlist_count = nlistCount; 2795 }); 2796 }); 2797 }); 2798 if (nlist_buffer) { 2799 DataExtractor dsc_local_symbols_data(nlist_buffer, 2800 nlist_count * nlist_byte_size, 2801 byte_order, addr_byte_size); 2802 unmapped_local_symbols_found = nlist_count; 2803 2804 // The normal nlist code cannot correctly size the Symbols 2805 // array, we need to allocate it here. 2806 sym = symtab.Resize( 2807 symtab_load_command.nsyms + m_dysymtab.nindirectsyms + 2808 unmapped_local_symbols_found - m_dysymtab.nlocalsym); 2809 num_syms = symtab.GetNumSymbols(); 2810 2811 lldb::offset_t nlist_data_offset = 0; 2812 2813 for (uint32_t nlist_index = 0; 2814 nlist_index < nlist_count; 2815 nlist_index++) { 2816 ///////////////////////////// 2817 { 2818 std::optional<struct nlist_64> nlist_maybe = 2819 ParseNList(dsc_local_symbols_data, nlist_data_offset, 2820 nlist_byte_size); 2821 if (!nlist_maybe) 2822 break; 2823 struct nlist_64 nlist = *nlist_maybe; 2824 2825 SymbolType type = eSymbolTypeInvalid; 2826 const char *symbol_name = string_table + nlist.n_strx; 2827 2828 if (symbol_name == NULL) { 2829 // No symbol should be NULL, even the symbols with no 2830 // string values should have an offset zero which 2831 // points to an empty C-string 2832 Debugger::ReportError(llvm::formatv( 2833 "DSC unmapped local symbol[{0}] has invalid " 2834 "string table offset {1:x} in {2}, ignoring symbol", 2835 nlist_index, nlist.n_strx, 2836 module_sp->GetFileSpec().GetPath()); 2837 continue; 2838 } 2839 if (symbol_name[0] == '\0') 2840 symbol_name = NULL; 2841 2842 const char *symbol_name_non_abi_mangled = NULL; 2843 2844 SectionSP symbol_section; 2845 uint32_t symbol_byte_size = 0; 2846 bool add_nlist = true; 2847 bool is_debug = ((nlist.n_type & N_STAB) != 0); 2848 bool demangled_is_synthesized = false; 2849 bool is_gsym = false; 2850 bool set_value = true; 2851 2852 assert(sym_idx < num_syms); 2853 2854 sym[sym_idx].SetDebug(is_debug); 2855 2856 if (is_debug) { 2857 switch (nlist.n_type) { 2858 case N_GSYM: 2859 // global symbol: name,,NO_SECT,type,0 2860 // Sometimes the N_GSYM value contains the address. 2861 2862 // FIXME: In the .o files, we have a GSYM and a debug 2863 // symbol for all the ObjC data. They 2864 // have the same address, but we want to ensure that 2865 // we always find only the real symbol, 'cause we 2866 // don't currently correctly attribute the 2867 // GSYM one to the ObjCClass/Ivar/MetaClass 2868 // symbol type. This is a temporary hack to make 2869 // sure the ObjectiveC symbols get treated correctly. 2870 // To do this right, we should coalesce all the GSYM 2871 // & global symbols that have the same address. 2872 2873 is_gsym = true; 2874 sym[sym_idx].SetExternal(true); 2875 2876 if (symbol_name && symbol_name[0] == '_' && 2877 symbol_name[1] == 'O') { 2878 llvm::StringRef symbol_name_ref(symbol_name); 2879 if (symbol_name_ref.starts_with( 2880 g_objc_v2_prefix_class)) { 2881 symbol_name_non_abi_mangled = symbol_name + 1; 2882 symbol_name = 2883 symbol_name + g_objc_v2_prefix_class.size(); 2884 type = eSymbolTypeObjCClass; 2885 demangled_is_synthesized = true; 2886 2887 } else if (symbol_name_ref.starts_with( 2888 g_objc_v2_prefix_metaclass)) { 2889 symbol_name_non_abi_mangled = symbol_name + 1; 2890 symbol_name = 2891 symbol_name + g_objc_v2_prefix_metaclass.size(); 2892 type = eSymbolTypeObjCMetaClass; 2893 demangled_is_synthesized = true; 2894 } else if (symbol_name_ref.starts_with( 2895 g_objc_v2_prefix_ivar)) { 2896 symbol_name_non_abi_mangled = symbol_name + 1; 2897 symbol_name = 2898 symbol_name + g_objc_v2_prefix_ivar.size(); 2899 type = eSymbolTypeObjCIVar; 2900 demangled_is_synthesized = true; 2901 } 2902 } else { 2903 if (nlist.n_value != 0) 2904 symbol_section = section_info.GetSection( 2905 nlist.n_sect, nlist.n_value); 2906 type = eSymbolTypeData; 2907 } 2908 break; 2909 2910 case N_FNAME: 2911 // procedure name (f77 kludge): name,,NO_SECT,0,0 2912 type = eSymbolTypeCompiler; 2913 break; 2914 2915 case N_FUN: 2916 // procedure: name,,n_sect,linenumber,address 2917 if (symbol_name) { 2918 type = eSymbolTypeCode; 2919 symbol_section = section_info.GetSection( 2920 nlist.n_sect, nlist.n_value); 2921 2922 N_FUN_addr_to_sym_idx.insert( 2923 std::make_pair(nlist.n_value, sym_idx)); 2924 // We use the current number of symbols in the 2925 // symbol table in lieu of using nlist_idx in case 2926 // we ever start trimming entries out 2927 N_FUN_indexes.push_back(sym_idx); 2928 } else { 2929 type = eSymbolTypeCompiler; 2930 2931 if (!N_FUN_indexes.empty()) { 2932 // Copy the size of the function into the 2933 // original 2934 // STAB entry so we don't have 2935 // to hunt for it later 2936 symtab.SymbolAtIndex(N_FUN_indexes.back()) 2937 ->SetByteSize(nlist.n_value); 2938 N_FUN_indexes.pop_back(); 2939 // We don't really need the end function STAB as 2940 // it contains the size which we already placed 2941 // with the original symbol, so don't add it if 2942 // we want a minimal symbol table 2943 add_nlist = false; 2944 } 2945 } 2946 break; 2947 2948 case N_STSYM: 2949 // static symbol: name,,n_sect,type,address 2950 N_STSYM_addr_to_sym_idx.insert( 2951 std::make_pair(nlist.n_value, sym_idx)); 2952 symbol_section = section_info.GetSection(nlist.n_sect, 2953 nlist.n_value); 2954 if (symbol_name && symbol_name[0]) { 2955 type = ObjectFile::GetSymbolTypeFromName( 2956 symbol_name + 1, eSymbolTypeData); 2957 } 2958 break; 2959 2960 case N_LCSYM: 2961 // .lcomm symbol: name,,n_sect,type,address 2962 symbol_section = section_info.GetSection(nlist.n_sect, 2963 nlist.n_value); 2964 type = eSymbolTypeCommonBlock; 2965 break; 2966 2967 case N_BNSYM: 2968 // We use the current number of symbols in the symbol 2969 // table in lieu of using nlist_idx in case we ever 2970 // start trimming entries out Skip these if we want 2971 // minimal symbol tables 2972 add_nlist = false; 2973 break; 2974 2975 case N_ENSYM: 2976 // Set the size of the N_BNSYM to the terminating 2977 // index of this N_ENSYM so that we can always skip 2978 // the entire symbol if we need to navigate more 2979 // quickly at the source level when parsing STABS 2980 // Skip these if we want minimal symbol tables 2981 add_nlist = false; 2982 break; 2983 2984 case N_OPT: 2985 // emitted with gcc2_compiled and in gcc source 2986 type = eSymbolTypeCompiler; 2987 break; 2988 2989 case N_RSYM: 2990 // register sym: name,,NO_SECT,type,register 2991 type = eSymbolTypeVariable; 2992 break; 2993 2994 case N_SLINE: 2995 // src line: 0,,n_sect,linenumber,address 2996 symbol_section = section_info.GetSection(nlist.n_sect, 2997 nlist.n_value); 2998 type = eSymbolTypeLineEntry; 2999 break; 3000 3001 case N_SSYM: 3002 // structure elt: name,,NO_SECT,type,struct_offset 3003 type = eSymbolTypeVariableType; 3004 break; 3005 3006 case N_SO: 3007 // source file name 3008 type = eSymbolTypeSourceFile; 3009 if (symbol_name == NULL) { 3010 add_nlist = false; 3011 if (N_SO_index != UINT32_MAX) { 3012 // Set the size of the N_SO to the terminating 3013 // index of this N_SO so that we can always skip 3014 // the entire N_SO if we need to navigate more 3015 // quickly at the source level when parsing STABS 3016 symbol_ptr = symtab.SymbolAtIndex(N_SO_index); 3017 symbol_ptr->SetByteSize(sym_idx); 3018 symbol_ptr->SetSizeIsSibling(true); 3019 } 3020 N_NSYM_indexes.clear(); 3021 N_INCL_indexes.clear(); 3022 N_BRAC_indexes.clear(); 3023 N_COMM_indexes.clear(); 3024 N_FUN_indexes.clear(); 3025 N_SO_index = UINT32_MAX; 3026 } else { 3027 // We use the current number of symbols in the 3028 // symbol table in lieu of using nlist_idx in case 3029 // we ever start trimming entries out 3030 const bool N_SO_has_full_path = symbol_name[0] == '/'; 3031 if (N_SO_has_full_path) { 3032 if ((N_SO_index == sym_idx - 1) && 3033 ((sym_idx - 1) < num_syms)) { 3034 // We have two consecutive N_SO entries where 3035 // the first contains a directory and the 3036 // second contains a full path. 3037 sym[sym_idx - 1].GetMangled().SetValue( 3038 ConstString(symbol_name)); 3039 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3040 add_nlist = false; 3041 } else { 3042 // This is the first entry in a N_SO that 3043 // contains a directory or 3044 // a full path to the source file 3045 N_SO_index = sym_idx; 3046 } 3047 } else if ((N_SO_index == sym_idx - 1) && 3048 ((sym_idx - 1) < num_syms)) { 3049 // This is usually the second N_SO entry that 3050 // contains just the filename, so here we combine 3051 // it with the first one if we are minimizing the 3052 // symbol table 3053 const char *so_path = sym[sym_idx - 1] 3054 .GetMangled() 3055 .GetDemangledName() 3056 .AsCString(); 3057 if (so_path && so_path[0]) { 3058 std::string full_so_path(so_path); 3059 const size_t double_slash_pos = 3060 full_so_path.find("//"); 3061 if (double_slash_pos != std::string::npos) { 3062 // The linker has been generating bad N_SO 3063 // entries with doubled up paths 3064 // in the format "%s%s" where the first 3065 // string in the DW_AT_comp_dir, and the 3066 // second is the directory for the source 3067 // file so you end up with a path that looks 3068 // like "/tmp/src//tmp/src/" 3069 FileSpec so_dir(so_path); 3070 if (!FileSystem::Instance().Exists(so_dir)) { 3071 so_dir.SetFile( 3072 &full_so_path[double_slash_pos + 1], 3073 FileSpec::Style::native); 3074 if (FileSystem::Instance().Exists(so_dir)) { 3075 // Trim off the incorrect path 3076 full_so_path.erase(0, double_slash_pos + 1); 3077 } 3078 } 3079 } 3080 if (*full_so_path.rbegin() != '/') 3081 full_so_path += '/'; 3082 full_so_path += symbol_name; 3083 sym[sym_idx - 1].GetMangled().SetValue( 3084 ConstString(full_so_path.c_str())); 3085 add_nlist = false; 3086 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3087 } 3088 } else { 3089 // This could be a relative path to a N_SO 3090 N_SO_index = sym_idx; 3091 } 3092 } 3093 break; 3094 3095 case N_OSO: 3096 // object file name: name,,0,0,st_mtime 3097 type = eSymbolTypeObjectFile; 3098 break; 3099 3100 case N_LSYM: 3101 // local sym: name,,NO_SECT,type,offset 3102 type = eSymbolTypeLocal; 3103 break; 3104 3105 // INCL scopes 3106 case N_BINCL: 3107 // include file beginning: name,,NO_SECT,0,sum We use 3108 // the current number of symbols in the symbol table 3109 // in lieu of using nlist_idx in case we ever start 3110 // trimming entries out 3111 N_INCL_indexes.push_back(sym_idx); 3112 type = eSymbolTypeScopeBegin; 3113 break; 3114 3115 case N_EINCL: 3116 // include file end: name,,NO_SECT,0,0 3117 // Set the size of the N_BINCL to the terminating 3118 // index of this N_EINCL so that we can always skip 3119 // the entire symbol if we need to navigate more 3120 // quickly at the source level when parsing STABS 3121 if (!N_INCL_indexes.empty()) { 3122 symbol_ptr = 3123 symtab.SymbolAtIndex(N_INCL_indexes.back()); 3124 symbol_ptr->SetByteSize(sym_idx + 1); 3125 symbol_ptr->SetSizeIsSibling(true); 3126 N_INCL_indexes.pop_back(); 3127 } 3128 type = eSymbolTypeScopeEnd; 3129 break; 3130 3131 case N_SOL: 3132 // #included file name: name,,n_sect,0,address 3133 type = eSymbolTypeHeaderFile; 3134 3135 // We currently don't use the header files on darwin 3136 add_nlist = false; 3137 break; 3138 3139 case N_PARAMS: 3140 // compiler parameters: name,,NO_SECT,0,0 3141 type = eSymbolTypeCompiler; 3142 break; 3143 3144 case N_VERSION: 3145 // compiler version: name,,NO_SECT,0,0 3146 type = eSymbolTypeCompiler; 3147 break; 3148 3149 case N_OLEVEL: 3150 // compiler -O level: name,,NO_SECT,0,0 3151 type = eSymbolTypeCompiler; 3152 break; 3153 3154 case N_PSYM: 3155 // parameter: name,,NO_SECT,type,offset 3156 type = eSymbolTypeVariable; 3157 break; 3158 3159 case N_ENTRY: 3160 // alternate entry: name,,n_sect,linenumber,address 3161 symbol_section = section_info.GetSection(nlist.n_sect, 3162 nlist.n_value); 3163 type = eSymbolTypeLineEntry; 3164 break; 3165 3166 // Left and Right Braces 3167 case N_LBRAC: 3168 // left bracket: 0,,NO_SECT,nesting level,address We 3169 // use the current number of symbols in the symbol 3170 // table in lieu of using nlist_idx in case we ever 3171 // start trimming entries out 3172 symbol_section = section_info.GetSection(nlist.n_sect, 3173 nlist.n_value); 3174 N_BRAC_indexes.push_back(sym_idx); 3175 type = eSymbolTypeScopeBegin; 3176 break; 3177 3178 case N_RBRAC: 3179 // right bracket: 0,,NO_SECT,nesting level,address 3180 // Set the size of the N_LBRAC to the terminating 3181 // index of this N_RBRAC so that we can always skip 3182 // the entire symbol if we need to navigate more 3183 // quickly at the source level when parsing STABS 3184 symbol_section = section_info.GetSection(nlist.n_sect, 3185 nlist.n_value); 3186 if (!N_BRAC_indexes.empty()) { 3187 symbol_ptr = 3188 symtab.SymbolAtIndex(N_BRAC_indexes.back()); 3189 symbol_ptr->SetByteSize(sym_idx + 1); 3190 symbol_ptr->SetSizeIsSibling(true); 3191 N_BRAC_indexes.pop_back(); 3192 } 3193 type = eSymbolTypeScopeEnd; 3194 break; 3195 3196 case N_EXCL: 3197 // deleted include file: name,,NO_SECT,0,sum 3198 type = eSymbolTypeHeaderFile; 3199 break; 3200 3201 // COMM scopes 3202 case N_BCOMM: 3203 // begin common: name,,NO_SECT,0,0 3204 // We use the current number of symbols in the symbol 3205 // table in lieu of using nlist_idx in case we ever 3206 // start trimming entries out 3207 type = eSymbolTypeScopeBegin; 3208 N_COMM_indexes.push_back(sym_idx); 3209 break; 3210 3211 case N_ECOML: 3212 // end common (local name): 0,,n_sect,0,address 3213 symbol_section = section_info.GetSection(nlist.n_sect, 3214 nlist.n_value); 3215 // Fall through 3216 3217 case N_ECOMM: 3218 // end common: name,,n_sect,0,0 3219 // Set the size of the N_BCOMM to the terminating 3220 // index of this N_ECOMM/N_ECOML so that we can 3221 // always skip the entire symbol if we need to 3222 // navigate more quickly at the source level when 3223 // parsing STABS 3224 if (!N_COMM_indexes.empty()) { 3225 symbol_ptr = 3226 symtab.SymbolAtIndex(N_COMM_indexes.back()); 3227 symbol_ptr->SetByteSize(sym_idx + 1); 3228 symbol_ptr->SetSizeIsSibling(true); 3229 N_COMM_indexes.pop_back(); 3230 } 3231 type = eSymbolTypeScopeEnd; 3232 break; 3233 3234 case N_LENG: 3235 // second stab entry with length information 3236 type = eSymbolTypeAdditional; 3237 break; 3238 3239 default: 3240 break; 3241 } 3242 } else { 3243 // uint8_t n_pext = N_PEXT & nlist.n_type; 3244 uint8_t n_type = N_TYPE & nlist.n_type; 3245 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0); 3246 3247 switch (n_type) { 3248 case N_INDR: { 3249 const char *reexport_name_cstr = 3250 strtab_data.PeekCStr(nlist.n_value); 3251 if (reexport_name_cstr && reexport_name_cstr[0]) { 3252 type = eSymbolTypeReExported; 3253 ConstString reexport_name( 3254 reexport_name_cstr + 3255 ((reexport_name_cstr[0] == '_') ? 1 : 0)); 3256 sym[sym_idx].SetReExportedSymbolName(reexport_name); 3257 set_value = false; 3258 reexport_shlib_needs_fixup[sym_idx] = reexport_name; 3259 indirect_symbol_names.insert(ConstString( 3260 symbol_name + ((symbol_name[0] == '_') ? 1 : 0))); 3261 } else 3262 type = eSymbolTypeUndefined; 3263 } break; 3264 3265 case N_UNDF: 3266 if (symbol_name && symbol_name[0]) { 3267 ConstString undefined_name( 3268 symbol_name + ((symbol_name[0] == '_') ? 1 : 0)); 3269 undefined_name_to_desc[undefined_name] = nlist.n_desc; 3270 } 3271 // Fall through 3272 case N_PBUD: 3273 type = eSymbolTypeUndefined; 3274 break; 3275 3276 case N_ABS: 3277 type = eSymbolTypeAbsolute; 3278 break; 3279 3280 case N_SECT: { 3281 symbol_section = section_info.GetSection(nlist.n_sect, 3282 nlist.n_value); 3283 3284 if (symbol_section == NULL) { 3285 // TODO: warn about this? 3286 add_nlist = false; 3287 break; 3288 } 3289 3290 if (TEXT_eh_frame_sectID == nlist.n_sect) { 3291 type = eSymbolTypeException; 3292 } else { 3293 uint32_t section_type = 3294 symbol_section->Get() & SECTION_TYPE; 3295 3296 switch (section_type) { 3297 case S_CSTRING_LITERALS: 3298 type = eSymbolTypeData; 3299 break; // section with only literal C strings 3300 case S_4BYTE_LITERALS: 3301 type = eSymbolTypeData; 3302 break; // section with only 4 byte literals 3303 case S_8BYTE_LITERALS: 3304 type = eSymbolTypeData; 3305 break; // section with only 8 byte literals 3306 case S_LITERAL_POINTERS: 3307 type = eSymbolTypeTrampoline; 3308 break; // section with only pointers to literals 3309 case S_NON_LAZY_SYMBOL_POINTERS: 3310 type = eSymbolTypeTrampoline; 3311 break; // section with only non-lazy symbol 3312 // pointers 3313 case S_LAZY_SYMBOL_POINTERS: 3314 type = eSymbolTypeTrampoline; 3315 break; // section with only lazy symbol pointers 3316 case S_SYMBOL_STUBS: 3317 type = eSymbolTypeTrampoline; 3318 break; // section with only symbol stubs, byte 3319 // size of stub in the reserved2 field 3320 case S_MOD_INIT_FUNC_POINTERS: 3321 type = eSymbolTypeCode; 3322 break; // section with only function pointers for 3323 // initialization 3324 case S_MOD_TERM_FUNC_POINTERS: 3325 type = eSymbolTypeCode; 3326 break; // section with only function pointers for 3327 // termination 3328 case S_INTERPOSING: 3329 type = eSymbolTypeTrampoline; 3330 break; // section with only pairs of function 3331 // pointers for interposing 3332 case S_16BYTE_LITERALS: 3333 type = eSymbolTypeData; 3334 break; // section with only 16 byte literals 3335 case S_DTRACE_DOF: 3336 type = eSymbolTypeInstrumentation; 3337 break; 3338 case S_LAZY_DYLIB_SYMBOL_POINTERS: 3339 type = eSymbolTypeTrampoline; 3340 break; 3341 default: 3342 switch (symbol_section->GetType()) { 3343 case lldb::eSectionTypeCode: 3344 type = eSymbolTypeCode; 3345 break; 3346 case eSectionTypeData: 3347 case eSectionTypeDataCString: // Inlined C string 3348 // data 3349 case eSectionTypeDataCStringPointers: // Pointers 3350 // to C 3351 // string 3352 // data 3353 case eSectionTypeDataSymbolAddress: // Address of 3354 // a symbol in 3355 // the symbol 3356 // table 3357 case eSectionTypeData4: 3358 case eSectionTypeData8: 3359 case eSectionTypeData16: 3360 type = eSymbolTypeData; 3361 break; 3362 default: 3363 break; 3364 } 3365 break; 3366 } 3367 3368 if (type == eSymbolTypeInvalid) { 3369 const char *symbol_sect_name = 3370 symbol_section->GetName().AsCString(); 3371 if (symbol_section->IsDescendant( 3372 text_section_sp.get())) { 3373 if (symbol_section->IsClear( 3374 S_ATTR_PURE_INSTRUCTIONS | 3375 S_ATTR_SELF_MODIFYING_CODE | 3376 S_ATTR_SOME_INSTRUCTIONS)) 3377 type = eSymbolTypeData; 3378 else 3379 type = eSymbolTypeCode; 3380 } else if (symbol_section->IsDescendant( 3381 data_section_sp.get()) || 3382 symbol_section->IsDescendant( 3383 data_dirty_section_sp.get()) || 3384 symbol_section->IsDescendant( 3385 data_const_section_sp.get())) { 3386 if (symbol_sect_name && 3387 ::strstr(symbol_sect_name, "__objc") == 3388 symbol_sect_name) { 3389 type = eSymbolTypeRuntime; 3390 3391 if (symbol_name) { 3392 llvm::StringRef symbol_name_ref(symbol_name); 3393 if (symbol_name_ref.starts_with("_OBJC_")) { 3394 llvm::StringRef 3395 g_objc_v2_prefix_class( 3396 "_OBJC_CLASS_$_"); 3397 llvm::StringRef 3398 g_objc_v2_prefix_metaclass( 3399 "_OBJC_METACLASS_$_"); 3400 llvm::StringRef 3401 g_objc_v2_prefix_ivar("_OBJC_IVAR_$_"); 3402 if (symbol_name_ref.starts_with( 3403 g_objc_v2_prefix_class)) { 3404 symbol_name_non_abi_mangled = 3405 symbol_name + 1; 3406 symbol_name = 3407 symbol_name + 3408 g_objc_v2_prefix_class.size(); 3409 type = eSymbolTypeObjCClass; 3410 demangled_is_synthesized = true; 3411 } else if ( 3412 symbol_name_ref.starts_with( 3413 g_objc_v2_prefix_metaclass)) { 3414 symbol_name_non_abi_mangled = 3415 symbol_name + 1; 3416 symbol_name = 3417 symbol_name + 3418 g_objc_v2_prefix_metaclass.size(); 3419 type = eSymbolTypeObjCMetaClass; 3420 demangled_is_synthesized = true; 3421 } else if (symbol_name_ref.starts_with( 3422 g_objc_v2_prefix_ivar)) { 3423 symbol_name_non_abi_mangled = 3424 symbol_name + 1; 3425 symbol_name = 3426 symbol_name + 3427 g_objc_v2_prefix_ivar.size(); 3428 type = eSymbolTypeObjCIVar; 3429 demangled_is_synthesized = true; 3430 } 3431 } 3432 } 3433 } else if (symbol_sect_name && 3434 ::strstr(symbol_sect_name, 3435 "__gcc_except_tab") == 3436 symbol_sect_name) { 3437 type = eSymbolTypeException; 3438 } else { 3439 type = eSymbolTypeData; 3440 } 3441 } else if (symbol_sect_name && 3442 ::strstr(symbol_sect_name, "__IMPORT") == 3443 symbol_sect_name) { 3444 type = eSymbolTypeTrampoline; 3445 } else if (symbol_section->IsDescendant( 3446 objc_section_sp.get())) { 3447 type = eSymbolTypeRuntime; 3448 if (symbol_name && symbol_name[0] == '.') { 3449 llvm::StringRef symbol_name_ref(symbol_name); 3450 llvm::StringRef 3451 g_objc_v1_prefix_class(".objc_class_name_"); 3452 if (symbol_name_ref.starts_with( 3453 g_objc_v1_prefix_class)) { 3454 symbol_name_non_abi_mangled = symbol_name; 3455 symbol_name = symbol_name + 3456 g_objc_v1_prefix_class.size(); 3457 type = eSymbolTypeObjCClass; 3458 demangled_is_synthesized = true; 3459 } 3460 } 3461 } 3462 } 3463 } 3464 } break; 3465 } 3466 } 3467 3468 if (add_nlist) { 3469 uint64_t symbol_value = nlist.n_value; 3470 if (symbol_name_non_abi_mangled) { 3471 sym[sym_idx].GetMangled().SetMangledName( 3472 ConstString(symbol_name_non_abi_mangled)); 3473 sym[sym_idx].GetMangled().SetDemangledName( 3474 ConstString(symbol_name)); 3475 } else { 3476 if (symbol_name && symbol_name[0] == '_') { 3477 symbol_name++; // Skip the leading underscore 3478 } 3479 3480 if (symbol_name) { 3481 ConstString const_symbol_name(symbol_name); 3482 sym[sym_idx].GetMangled().SetValue(const_symbol_name); 3483 if (is_gsym && is_debug) { 3484 const char *gsym_name = 3485 sym[sym_idx] 3486 .GetMangled() 3487 .GetName(Mangled::ePreferMangled) 3488 .GetCString(); 3489 if (gsym_name) 3490 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; 3491 } 3492 } 3493 } 3494 if (symbol_section) { 3495 const addr_t section_file_addr = 3496 symbol_section->GetFileAddress(); 3497 if (symbol_byte_size == 0 && 3498 function_starts_count > 0) { 3499 addr_t symbol_lookup_file_addr = nlist.n_value; 3500 // Do an exact address match for non-ARM addresses, 3501 // else get the closest since the symbol might be a 3502 // thumb symbol which has an address with bit zero 3503 // set 3504 FunctionStarts::Entry *func_start_entry = 3505 function_starts.FindEntry(symbol_lookup_file_addr, 3506 !is_arm); 3507 if (is_arm && func_start_entry) { 3508 // Verify that the function start address is the 3509 // symbol address (ARM) or the symbol address + 1 3510 // (thumb) 3511 if (func_start_entry->addr != 3512 symbol_lookup_file_addr && 3513 func_start_entry->addr != 3514 (symbol_lookup_file_addr + 1)) { 3515 // Not the right entry, NULL it out... 3516 func_start_entry = NULL; 3517 } 3518 } 3519 if (func_start_entry) { 3520 func_start_entry->data = true; 3521 3522 addr_t symbol_file_addr = func_start_entry->addr; 3523 uint32_t symbol_flags = 0; 3524 if (is_arm) { 3525 if (symbol_file_addr & 1) 3526 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; 3527 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 3528 } 3529 3530 const FunctionStarts::Entry *next_func_start_entry = 3531 function_starts.FindNextEntry(func_start_entry); 3532 const addr_t section_end_file_addr = 3533 section_file_addr + 3534 symbol_section->GetByteSize(); 3535 if (next_func_start_entry) { 3536 addr_t next_symbol_file_addr = 3537 next_func_start_entry->addr; 3538 // Be sure the clear the Thumb address bit when 3539 // we calculate the size from the current and 3540 // next address 3541 if (is_arm) 3542 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 3543 symbol_byte_size = std::min<lldb::addr_t>( 3544 next_symbol_file_addr - symbol_file_addr, 3545 section_end_file_addr - symbol_file_addr); 3546 } else { 3547 symbol_byte_size = 3548 section_end_file_addr - symbol_file_addr; 3549 } 3550 } 3551 } 3552 symbol_value -= section_file_addr; 3553 } 3554 3555 if (is_debug == false) { 3556 if (type == eSymbolTypeCode) { 3557 // See if we can find a N_FUN entry for any code 3558 // symbols. If we do find a match, and the name 3559 // matches, then we can merge the two into just the 3560 // function symbol to avoid duplicate entries in 3561 // the symbol table 3562 auto range = 3563 N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); 3564 if (range.first != range.second) { 3565 bool found_it = false; 3566 for (auto pos = range.first; pos != range.second; 3567 ++pos) { 3568 if (sym[sym_idx].GetMangled().GetName( 3569 Mangled::ePreferMangled) == 3570 sym[pos->second].GetMangled().GetName( 3571 Mangled::ePreferMangled)) { 3572 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 3573 // We just need the flags from the linker 3574 // symbol, so put these flags 3575 // into the N_FUN flags to avoid duplicate 3576 // symbols in the symbol table 3577 sym[pos->second].SetExternal( 3578 sym[sym_idx].IsExternal()); 3579 sym[pos->second].SetFlags(nlist.n_type << 16 | 3580 nlist.n_desc); 3581 if (resolver_addresses.find(nlist.n_value) != 3582 resolver_addresses.end()) 3583 sym[pos->second].SetType(eSymbolTypeResolver); 3584 sym[sym_idx].Clear(); 3585 found_it = true; 3586 break; 3587 } 3588 } 3589 if (found_it) 3590 continue; 3591 } else { 3592 if (resolver_addresses.find(nlist.n_value) != 3593 resolver_addresses.end()) 3594 type = eSymbolTypeResolver; 3595 } 3596 } else if (type == eSymbolTypeData || 3597 type == eSymbolTypeObjCClass || 3598 type == eSymbolTypeObjCMetaClass || 3599 type == eSymbolTypeObjCIVar) { 3600 // See if we can find a N_STSYM entry for any data 3601 // symbols. If we do find a match, and the name 3602 // matches, then we can merge the two into just the 3603 // Static symbol to avoid duplicate entries in the 3604 // symbol table 3605 auto range = N_STSYM_addr_to_sym_idx.equal_range( 3606 nlist.n_value); 3607 if (range.first != range.second) { 3608 bool found_it = false; 3609 for (auto pos = range.first; pos != range.second; 3610 ++pos) { 3611 if (sym[sym_idx].GetMangled().GetName( 3612 Mangled::ePreferMangled) == 3613 sym[pos->second].GetMangled().GetName( 3614 Mangled::ePreferMangled)) { 3615 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 3616 // We just need the flags from the linker 3617 // symbol, so put these flags 3618 // into the N_STSYM flags to avoid duplicate 3619 // symbols in the symbol table 3620 sym[pos->second].SetExternal( 3621 sym[sym_idx].IsExternal()); 3622 sym[pos->second].SetFlags(nlist.n_type << 16 | 3623 nlist.n_desc); 3624 sym[sym_idx].Clear(); 3625 found_it = true; 3626 break; 3627 } 3628 } 3629 if (found_it) 3630 continue; 3631 } else { 3632 const char *gsym_name = 3633 sym[sym_idx] 3634 .GetMangled() 3635 .GetName(Mangled::ePreferMangled) 3636 .GetCString(); 3637 if (gsym_name) { 3638 // Combine N_GSYM stab entries with the non 3639 // stab symbol 3640 ConstNameToSymbolIndexMap::const_iterator pos = 3641 N_GSYM_name_to_sym_idx.find(gsym_name); 3642 if (pos != N_GSYM_name_to_sym_idx.end()) { 3643 const uint32_t GSYM_sym_idx = pos->second; 3644 m_nlist_idx_to_sym_idx[nlist_idx] = 3645 GSYM_sym_idx; 3646 // Copy the address, because often the N_GSYM 3647 // address has an invalid address of zero 3648 // when the global is a common symbol 3649 sym[GSYM_sym_idx].GetAddressRef().SetSection( 3650 symbol_section); 3651 sym[GSYM_sym_idx].GetAddressRef().SetOffset( 3652 symbol_value); 3653 add_symbol_addr(sym[GSYM_sym_idx] 3654 .GetAddress() 3655 .GetFileAddress()); 3656 // We just need the flags from the linker 3657 // symbol, so put these flags 3658 // into the N_GSYM flags to avoid duplicate 3659 // symbols in the symbol table 3660 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | 3661 nlist.n_desc); 3662 sym[sym_idx].Clear(); 3663 continue; 3664 } 3665 } 3666 } 3667 } 3668 } 3669 3670 sym[sym_idx].SetID(nlist_idx); 3671 sym[sym_idx].SetType(type); 3672 if (set_value) { 3673 sym[sym_idx].GetAddressRef().SetSection(symbol_section); 3674 sym[sym_idx].GetAddressRef().SetOffset(symbol_value); 3675 add_symbol_addr( 3676 sym[sym_idx].GetAddress().GetFileAddress()); 3677 } 3678 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 3679 3680 if (symbol_byte_size > 0) 3681 sym[sym_idx].SetByteSize(symbol_byte_size); 3682 3683 if (demangled_is_synthesized) 3684 sym[sym_idx].SetDemangledNameIsSynthesized(true); 3685 ++sym_idx; 3686 } else { 3687 sym[sym_idx].Clear(); 3688 } 3689 } 3690 ///////////////////////////// 3691 } 3692 } 3693 3694 for (const auto &pos : reexport_shlib_needs_fixup) { 3695 const auto undef_pos = undefined_name_to_desc.find(pos.second); 3696 if (undef_pos != undefined_name_to_desc.end()) { 3697 const uint8_t dylib_ordinal = 3698 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second); 3699 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize()) 3700 sym[pos.first].SetReExportedSymbolSharedLibrary( 3701 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1)); 3702 } 3703 } 3704 } 3705 3706 #endif 3707 lldb::offset_t nlist_data_offset = 0; 3708 3709 if (nlist_data.GetByteSize() > 0) { 3710 3711 // If the sym array was not created while parsing the DSC unmapped 3712 // symbols, create it now. 3713 if (sym == nullptr) { 3714 sym = 3715 symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms); 3716 num_syms = symtab.GetNumSymbols(); 3717 } 3718 3719 if (unmapped_local_symbols_found) { 3720 assert(m_dysymtab.ilocalsym == 0); 3721 nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size); 3722 nlist_idx = m_dysymtab.nlocalsym; 3723 } else { 3724 nlist_idx = 0; 3725 } 3726 3727 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap; 3728 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName; 3729 UndefinedNameToDescMap undefined_name_to_desc; 3730 SymbolIndexToName reexport_shlib_needs_fixup; 3731 3732 // Symtab parsing is a huge mess. Everything is entangled and the code 3733 // requires access to a ridiculous amount of variables. LLDB depends 3734 // heavily on the proper merging of symbols and to get that right we need 3735 // to make sure we have parsed all the debug symbols first. Therefore we 3736 // invoke the lambda twice, once to parse only the debug symbols and then 3737 // once more to parse the remaining symbols. 3738 auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx, 3739 bool debug_only) { 3740 const bool is_debug = ((nlist.n_type & N_STAB) != 0); 3741 if (is_debug != debug_only) 3742 return true; 3743 3744 const char *symbol_name_non_abi_mangled = nullptr; 3745 const char *symbol_name = nullptr; 3746 3747 if (have_strtab_data) { 3748 symbol_name = strtab_data.PeekCStr(nlist.n_strx); 3749 3750 if (symbol_name == nullptr) { 3751 // No symbol should be NULL, even the symbols with no string values 3752 // should have an offset zero which points to an empty C-string 3753 Debugger::ReportError(llvm::formatv( 3754 "symbol[{0}] has invalid string table offset {1:x} in {2}, " 3755 "ignoring symbol", 3756 nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath())); 3757 return true; 3758 } 3759 if (symbol_name[0] == '\0') 3760 symbol_name = nullptr; 3761 } else { 3762 const addr_t str_addr = strtab_addr + nlist.n_strx; 3763 Status str_error; 3764 if (process->ReadCStringFromMemory(str_addr, memory_symbol_name, 3765 str_error)) 3766 symbol_name = memory_symbol_name.c_str(); 3767 } 3768 3769 SymbolType type = eSymbolTypeInvalid; 3770 SectionSP symbol_section; 3771 lldb::addr_t symbol_byte_size = 0; 3772 bool add_nlist = true; 3773 bool is_gsym = false; 3774 bool demangled_is_synthesized = false; 3775 bool set_value = true; 3776 3777 assert(sym_idx < num_syms); 3778 sym[sym_idx].SetDebug(is_debug); 3779 3780 if (is_debug) { 3781 switch (nlist.n_type) { 3782 case N_GSYM: 3783 // global symbol: name,,NO_SECT,type,0 3784 // Sometimes the N_GSYM value contains the address. 3785 3786 // FIXME: In the .o files, we have a GSYM and a debug symbol for all 3787 // the ObjC data. They 3788 // have the same address, but we want to ensure that we always find 3789 // only the real symbol, 'cause we don't currently correctly 3790 // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol 3791 // type. This is a temporary hack to make sure the ObjectiveC 3792 // symbols get treated correctly. To do this right, we should 3793 // coalesce all the GSYM & global symbols that have the same 3794 // address. 3795 is_gsym = true; 3796 sym[sym_idx].SetExternal(true); 3797 3798 if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') { 3799 llvm::StringRef symbol_name_ref(symbol_name); 3800 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 3801 symbol_name_non_abi_mangled = symbol_name + 1; 3802 symbol_name = symbol_name + g_objc_v2_prefix_class.size(); 3803 type = eSymbolTypeObjCClass; 3804 demangled_is_synthesized = true; 3805 3806 } else if (symbol_name_ref.starts_with( 3807 g_objc_v2_prefix_metaclass)) { 3808 symbol_name_non_abi_mangled = symbol_name + 1; 3809 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size(); 3810 type = eSymbolTypeObjCMetaClass; 3811 demangled_is_synthesized = true; 3812 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) { 3813 symbol_name_non_abi_mangled = symbol_name + 1; 3814 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size(); 3815 type = eSymbolTypeObjCIVar; 3816 demangled_is_synthesized = true; 3817 } 3818 } else { 3819 if (nlist.n_value != 0) 3820 symbol_section = 3821 section_info.GetSection(nlist.n_sect, nlist.n_value); 3822 type = eSymbolTypeData; 3823 } 3824 break; 3825 3826 case N_FNAME: 3827 // procedure name (f77 kludge): name,,NO_SECT,0,0 3828 type = eSymbolTypeCompiler; 3829 break; 3830 3831 case N_FUN: 3832 // procedure: name,,n_sect,linenumber,address 3833 if (symbol_name) { 3834 type = eSymbolTypeCode; 3835 symbol_section = 3836 section_info.GetSection(nlist.n_sect, nlist.n_value); 3837 3838 N_FUN_addr_to_sym_idx.insert( 3839 std::make_pair(nlist.n_value, sym_idx)); 3840 // We use the current number of symbols in the symbol table in 3841 // lieu of using nlist_idx in case we ever start trimming entries 3842 // out 3843 N_FUN_indexes.push_back(sym_idx); 3844 } else { 3845 type = eSymbolTypeCompiler; 3846 3847 if (!N_FUN_indexes.empty()) { 3848 // Copy the size of the function into the original STAB entry 3849 // so we don't have to hunt for it later 3850 symtab.SymbolAtIndex(N_FUN_indexes.back()) 3851 ->SetByteSize(nlist.n_value); 3852 N_FUN_indexes.pop_back(); 3853 // We don't really need the end function STAB as it contains 3854 // the size which we already placed with the original symbol, 3855 // so don't add it if we want a minimal symbol table 3856 add_nlist = false; 3857 } 3858 } 3859 break; 3860 3861 case N_STSYM: 3862 // static symbol: name,,n_sect,type,address 3863 N_STSYM_addr_to_sym_idx.insert( 3864 std::make_pair(nlist.n_value, sym_idx)); 3865 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3866 if (symbol_name && symbol_name[0]) { 3867 type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1, 3868 eSymbolTypeData); 3869 } 3870 break; 3871 3872 case N_LCSYM: 3873 // .lcomm symbol: name,,n_sect,type,address 3874 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3875 type = eSymbolTypeCommonBlock; 3876 break; 3877 3878 case N_BNSYM: 3879 // We use the current number of symbols in the symbol table in lieu 3880 // of using nlist_idx in case we ever start trimming entries out 3881 // Skip these if we want minimal symbol tables 3882 add_nlist = false; 3883 break; 3884 3885 case N_ENSYM: 3886 // Set the size of the N_BNSYM to the terminating index of this 3887 // N_ENSYM so that we can always skip the entire symbol if we need 3888 // to navigate more quickly at the source level when parsing STABS 3889 // Skip these if we want minimal symbol tables 3890 add_nlist = false; 3891 break; 3892 3893 case N_OPT: 3894 // emitted with gcc2_compiled and in gcc source 3895 type = eSymbolTypeCompiler; 3896 break; 3897 3898 case N_RSYM: 3899 // register sym: name,,NO_SECT,type,register 3900 type = eSymbolTypeVariable; 3901 break; 3902 3903 case N_SLINE: 3904 // src line: 0,,n_sect,linenumber,address 3905 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3906 type = eSymbolTypeLineEntry; 3907 break; 3908 3909 case N_SSYM: 3910 // structure elt: name,,NO_SECT,type,struct_offset 3911 type = eSymbolTypeVariableType; 3912 break; 3913 3914 case N_SO: 3915 // source file name 3916 type = eSymbolTypeSourceFile; 3917 if (symbol_name == nullptr) { 3918 add_nlist = false; 3919 if (N_SO_index != UINT32_MAX) { 3920 // Set the size of the N_SO to the terminating index of this 3921 // N_SO so that we can always skip the entire N_SO if we need 3922 // to navigate more quickly at the source level when parsing 3923 // STABS 3924 symbol_ptr = symtab.SymbolAtIndex(N_SO_index); 3925 symbol_ptr->SetByteSize(sym_idx); 3926 symbol_ptr->SetSizeIsSibling(true); 3927 } 3928 N_NSYM_indexes.clear(); 3929 N_INCL_indexes.clear(); 3930 N_BRAC_indexes.clear(); 3931 N_COMM_indexes.clear(); 3932 N_FUN_indexes.clear(); 3933 N_SO_index = UINT32_MAX; 3934 } else { 3935 // We use the current number of symbols in the symbol table in 3936 // lieu of using nlist_idx in case we ever start trimming entries 3937 // out 3938 const bool N_SO_has_full_path = symbol_name[0] == '/'; 3939 if (N_SO_has_full_path) { 3940 if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) { 3941 // We have two consecutive N_SO entries where the first 3942 // contains a directory and the second contains a full path. 3943 sym[sym_idx - 1].GetMangled().SetValue( 3944 ConstString(symbol_name)); 3945 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3946 add_nlist = false; 3947 } else { 3948 // This is the first entry in a N_SO that contains a 3949 // directory or a full path to the source file 3950 N_SO_index = sym_idx; 3951 } 3952 } else if ((N_SO_index == sym_idx - 1) && 3953 ((sym_idx - 1) < num_syms)) { 3954 // This is usually the second N_SO entry that contains just the 3955 // filename, so here we combine it with the first one if we are 3956 // minimizing the symbol table 3957 const char *so_path = 3958 sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString(); 3959 if (so_path && so_path[0]) { 3960 std::string full_so_path(so_path); 3961 const size_t double_slash_pos = full_so_path.find("//"); 3962 if (double_slash_pos != std::string::npos) { 3963 // The linker has been generating bad N_SO entries with 3964 // doubled up paths in the format "%s%s" where the first 3965 // string in the DW_AT_comp_dir, and the second is the 3966 // directory for the source file so you end up with a path 3967 // that looks like "/tmp/src//tmp/src/" 3968 FileSpec so_dir(so_path); 3969 if (!FileSystem::Instance().Exists(so_dir)) { 3970 so_dir.SetFile(&full_so_path[double_slash_pos + 1], 3971 FileSpec::Style::native); 3972 if (FileSystem::Instance().Exists(so_dir)) { 3973 // Trim off the incorrect path 3974 full_so_path.erase(0, double_slash_pos + 1); 3975 } 3976 } 3977 } 3978 if (*full_so_path.rbegin() != '/') 3979 full_so_path += '/'; 3980 full_so_path += symbol_name; 3981 sym[sym_idx - 1].GetMangled().SetValue( 3982 ConstString(full_so_path.c_str())); 3983 add_nlist = false; 3984 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3985 } 3986 } else { 3987 // This could be a relative path to a N_SO 3988 N_SO_index = sym_idx; 3989 } 3990 } 3991 break; 3992 3993 case N_OSO: 3994 // object file name: name,,0,0,st_mtime 3995 type = eSymbolTypeObjectFile; 3996 break; 3997 3998 case N_LSYM: 3999 // local sym: name,,NO_SECT,type,offset 4000 type = eSymbolTypeLocal; 4001 break; 4002 4003 // INCL scopes 4004 case N_BINCL: 4005 // include file beginning: name,,NO_SECT,0,sum We use the current 4006 // number of symbols in the symbol table in lieu of using nlist_idx 4007 // in case we ever start trimming entries out 4008 N_INCL_indexes.push_back(sym_idx); 4009 type = eSymbolTypeScopeBegin; 4010 break; 4011 4012 case N_EINCL: 4013 // include file end: name,,NO_SECT,0,0 4014 // Set the size of the N_BINCL to the terminating index of this 4015 // N_EINCL so that we can always skip the entire symbol if we need 4016 // to navigate more quickly at the source level when parsing STABS 4017 if (!N_INCL_indexes.empty()) { 4018 symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back()); 4019 symbol_ptr->SetByteSize(sym_idx + 1); 4020 symbol_ptr->SetSizeIsSibling(true); 4021 N_INCL_indexes.pop_back(); 4022 } 4023 type = eSymbolTypeScopeEnd; 4024 break; 4025 4026 case N_SOL: 4027 // #included file name: name,,n_sect,0,address 4028 type = eSymbolTypeHeaderFile; 4029 4030 // We currently don't use the header files on darwin 4031 add_nlist = false; 4032 break; 4033 4034 case N_PARAMS: 4035 // compiler parameters: name,,NO_SECT,0,0 4036 type = eSymbolTypeCompiler; 4037 break; 4038 4039 case N_VERSION: 4040 // compiler version: name,,NO_SECT,0,0 4041 type = eSymbolTypeCompiler; 4042 break; 4043 4044 case N_OLEVEL: 4045 // compiler -O level: name,,NO_SECT,0,0 4046 type = eSymbolTypeCompiler; 4047 break; 4048 4049 case N_PSYM: 4050 // parameter: name,,NO_SECT,type,offset 4051 type = eSymbolTypeVariable; 4052 break; 4053 4054 case N_ENTRY: 4055 // alternate entry: name,,n_sect,linenumber,address 4056 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4057 type = eSymbolTypeLineEntry; 4058 break; 4059 4060 // Left and Right Braces 4061 case N_LBRAC: 4062 // left bracket: 0,,NO_SECT,nesting level,address We use the 4063 // current number of symbols in the symbol table in lieu of using 4064 // nlist_idx in case we ever start trimming entries out 4065 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4066 N_BRAC_indexes.push_back(sym_idx); 4067 type = eSymbolTypeScopeBegin; 4068 break; 4069 4070 case N_RBRAC: 4071 // right bracket: 0,,NO_SECT,nesting level,address Set the size of 4072 // the N_LBRAC to the terminating index of this N_RBRAC so that we 4073 // can always skip the entire symbol if we need to navigate more 4074 // quickly at the source level when parsing STABS 4075 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4076 if (!N_BRAC_indexes.empty()) { 4077 symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back()); 4078 symbol_ptr->SetByteSize(sym_idx + 1); 4079 symbol_ptr->SetSizeIsSibling(true); 4080 N_BRAC_indexes.pop_back(); 4081 } 4082 type = eSymbolTypeScopeEnd; 4083 break; 4084 4085 case N_EXCL: 4086 // deleted include file: name,,NO_SECT,0,sum 4087 type = eSymbolTypeHeaderFile; 4088 break; 4089 4090 // COMM scopes 4091 case N_BCOMM: 4092 // begin common: name,,NO_SECT,0,0 4093 // We use the current number of symbols in the symbol table in lieu 4094 // of using nlist_idx in case we ever start trimming entries out 4095 type = eSymbolTypeScopeBegin; 4096 N_COMM_indexes.push_back(sym_idx); 4097 break; 4098 4099 case N_ECOML: 4100 // end common (local name): 0,,n_sect,0,address 4101 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4102 [[fallthrough]]; 4103 4104 case N_ECOMM: 4105 // end common: name,,n_sect,0,0 4106 // Set the size of the N_BCOMM to the terminating index of this 4107 // N_ECOMM/N_ECOML so that we can always skip the entire symbol if 4108 // we need to navigate more quickly at the source level when 4109 // parsing STABS 4110 if (!N_COMM_indexes.empty()) { 4111 symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back()); 4112 symbol_ptr->SetByteSize(sym_idx + 1); 4113 symbol_ptr->SetSizeIsSibling(true); 4114 N_COMM_indexes.pop_back(); 4115 } 4116 type = eSymbolTypeScopeEnd; 4117 break; 4118 4119 case N_LENG: 4120 // second stab entry with length information 4121 type = eSymbolTypeAdditional; 4122 break; 4123 4124 default: 4125 break; 4126 } 4127 } else { 4128 uint8_t n_type = N_TYPE & nlist.n_type; 4129 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0); 4130 4131 switch (n_type) { 4132 case N_INDR: { 4133 const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value); 4134 if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) { 4135 type = eSymbolTypeReExported; 4136 ConstString reexport_name(reexport_name_cstr + 4137 ((reexport_name_cstr[0] == '_') ? 1 : 0)); 4138 sym[sym_idx].SetReExportedSymbolName(reexport_name); 4139 set_value = false; 4140 reexport_shlib_needs_fixup[sym_idx] = reexport_name; 4141 indirect_symbol_names.insert( 4142 ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0))); 4143 } else 4144 type = eSymbolTypeUndefined; 4145 } break; 4146 4147 case N_UNDF: 4148 if (symbol_name && symbol_name[0]) { 4149 ConstString undefined_name(symbol_name + 4150 ((symbol_name[0] == '_') ? 1 : 0)); 4151 undefined_name_to_desc[undefined_name] = nlist.n_desc; 4152 } 4153 [[fallthrough]]; 4154 4155 case N_PBUD: 4156 type = eSymbolTypeUndefined; 4157 break; 4158 4159 case N_ABS: 4160 type = eSymbolTypeAbsolute; 4161 break; 4162 4163 case N_SECT: { 4164 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4165 4166 if (!symbol_section) { 4167 // TODO: warn about this? 4168 add_nlist = false; 4169 break; 4170 } 4171 4172 if (TEXT_eh_frame_sectID == nlist.n_sect) { 4173 type = eSymbolTypeException; 4174 } else { 4175 uint32_t section_type = symbol_section->Get() & SECTION_TYPE; 4176 4177 switch (section_type) { 4178 case S_CSTRING_LITERALS: 4179 type = eSymbolTypeData; 4180 break; // section with only literal C strings 4181 case S_4BYTE_LITERALS: 4182 type = eSymbolTypeData; 4183 break; // section with only 4 byte literals 4184 case S_8BYTE_LITERALS: 4185 type = eSymbolTypeData; 4186 break; // section with only 8 byte literals 4187 case S_LITERAL_POINTERS: 4188 type = eSymbolTypeTrampoline; 4189 break; // section with only pointers to literals 4190 case S_NON_LAZY_SYMBOL_POINTERS: 4191 type = eSymbolTypeTrampoline; 4192 break; // section with only non-lazy symbol pointers 4193 case S_LAZY_SYMBOL_POINTERS: 4194 type = eSymbolTypeTrampoline; 4195 break; // section with only lazy symbol pointers 4196 case S_SYMBOL_STUBS: 4197 type = eSymbolTypeTrampoline; 4198 break; // section with only symbol stubs, byte size of stub in 4199 // the reserved2 field 4200 case S_MOD_INIT_FUNC_POINTERS: 4201 type = eSymbolTypeCode; 4202 break; // section with only function pointers for initialization 4203 case S_MOD_TERM_FUNC_POINTERS: 4204 type = eSymbolTypeCode; 4205 break; // section with only function pointers for termination 4206 case S_INTERPOSING: 4207 type = eSymbolTypeTrampoline; 4208 break; // section with only pairs of function pointers for 4209 // interposing 4210 case S_16BYTE_LITERALS: 4211 type = eSymbolTypeData; 4212 break; // section with only 16 byte literals 4213 case S_DTRACE_DOF: 4214 type = eSymbolTypeInstrumentation; 4215 break; 4216 case S_LAZY_DYLIB_SYMBOL_POINTERS: 4217 type = eSymbolTypeTrampoline; 4218 break; 4219 default: 4220 switch (symbol_section->GetType()) { 4221 case lldb::eSectionTypeCode: 4222 type = eSymbolTypeCode; 4223 break; 4224 case eSectionTypeData: 4225 case eSectionTypeDataCString: // Inlined C string data 4226 case eSectionTypeDataCStringPointers: // Pointers to C string 4227 // data 4228 case eSectionTypeDataSymbolAddress: // Address of a symbol in 4229 // the symbol table 4230 case eSectionTypeData4: 4231 case eSectionTypeData8: 4232 case eSectionTypeData16: 4233 type = eSymbolTypeData; 4234 break; 4235 default: 4236 break; 4237 } 4238 break; 4239 } 4240 4241 if (type == eSymbolTypeInvalid) { 4242 const char *symbol_sect_name = 4243 symbol_section->GetName().AsCString(); 4244 if (symbol_section->IsDescendant(text_section_sp.get())) { 4245 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS | 4246 S_ATTR_SELF_MODIFYING_CODE | 4247 S_ATTR_SOME_INSTRUCTIONS)) 4248 type = eSymbolTypeData; 4249 else 4250 type = eSymbolTypeCode; 4251 } else if (symbol_section->IsDescendant(data_section_sp.get()) || 4252 symbol_section->IsDescendant( 4253 data_dirty_section_sp.get()) || 4254 symbol_section->IsDescendant( 4255 data_const_section_sp.get())) { 4256 if (symbol_sect_name && 4257 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) { 4258 type = eSymbolTypeRuntime; 4259 4260 if (symbol_name) { 4261 llvm::StringRef symbol_name_ref(symbol_name); 4262 if (symbol_name_ref.starts_with("_OBJC_")) { 4263 llvm::StringRef g_objc_v2_prefix_class( 4264 "_OBJC_CLASS_$_"); 4265 llvm::StringRef g_objc_v2_prefix_metaclass( 4266 "_OBJC_METACLASS_$_"); 4267 llvm::StringRef g_objc_v2_prefix_ivar( 4268 "_OBJC_IVAR_$_"); 4269 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 4270 symbol_name_non_abi_mangled = symbol_name + 1; 4271 symbol_name = 4272 symbol_name + g_objc_v2_prefix_class.size(); 4273 type = eSymbolTypeObjCClass; 4274 demangled_is_synthesized = true; 4275 } else if (symbol_name_ref.starts_with( 4276 g_objc_v2_prefix_metaclass)) { 4277 symbol_name_non_abi_mangled = symbol_name + 1; 4278 symbol_name = 4279 symbol_name + g_objc_v2_prefix_metaclass.size(); 4280 type = eSymbolTypeObjCMetaClass; 4281 demangled_is_synthesized = true; 4282 } else if (symbol_name_ref.starts_with( 4283 g_objc_v2_prefix_ivar)) { 4284 symbol_name_non_abi_mangled = symbol_name + 1; 4285 symbol_name = 4286 symbol_name + g_objc_v2_prefix_ivar.size(); 4287 type = eSymbolTypeObjCIVar; 4288 demangled_is_synthesized = true; 4289 } 4290 } 4291 } 4292 } else if (symbol_sect_name && 4293 ::strstr(symbol_sect_name, "__gcc_except_tab") == 4294 symbol_sect_name) { 4295 type = eSymbolTypeException; 4296 } else { 4297 type = eSymbolTypeData; 4298 } 4299 } else if (symbol_sect_name && 4300 ::strstr(symbol_sect_name, "__IMPORT") == 4301 symbol_sect_name) { 4302 type = eSymbolTypeTrampoline; 4303 } else if (symbol_section->IsDescendant(objc_section_sp.get())) { 4304 type = eSymbolTypeRuntime; 4305 if (symbol_name && symbol_name[0] == '.') { 4306 llvm::StringRef symbol_name_ref(symbol_name); 4307 llvm::StringRef g_objc_v1_prefix_class( 4308 ".objc_class_name_"); 4309 if (symbol_name_ref.starts_with(g_objc_v1_prefix_class)) { 4310 symbol_name_non_abi_mangled = symbol_name; 4311 symbol_name = symbol_name + g_objc_v1_prefix_class.size(); 4312 type = eSymbolTypeObjCClass; 4313 demangled_is_synthesized = true; 4314 } 4315 } 4316 } 4317 } 4318 } 4319 } break; 4320 } 4321 } 4322 4323 if (!add_nlist) { 4324 sym[sym_idx].Clear(); 4325 return true; 4326 } 4327 4328 uint64_t symbol_value = nlist.n_value; 4329 4330 if (symbol_name_non_abi_mangled) { 4331 sym[sym_idx].GetMangled().SetMangledName( 4332 ConstString(symbol_name_non_abi_mangled)); 4333 sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name)); 4334 } else { 4335 4336 if (symbol_name && symbol_name[0] == '_') { 4337 symbol_name++; // Skip the leading underscore 4338 } 4339 4340 if (symbol_name) { 4341 ConstString const_symbol_name(symbol_name); 4342 sym[sym_idx].GetMangled().SetValue(const_symbol_name); 4343 } 4344 } 4345 4346 if (is_gsym) { 4347 const char *gsym_name = sym[sym_idx] 4348 .GetMangled() 4349 .GetName(Mangled::ePreferMangled) 4350 .GetCString(); 4351 if (gsym_name) 4352 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; 4353 } 4354 4355 if (symbol_section) { 4356 const addr_t section_file_addr = symbol_section->GetFileAddress(); 4357 if (symbol_byte_size == 0 && function_starts_count > 0) { 4358 addr_t symbol_lookup_file_addr = nlist.n_value; 4359 // Do an exact address match for non-ARM addresses, else get the 4360 // closest since the symbol might be a thumb symbol which has an 4361 // address with bit zero set. 4362 FunctionStarts::Entry *func_start_entry = 4363 function_starts.FindEntry(symbol_lookup_file_addr, !is_arm); 4364 if (is_arm && func_start_entry) { 4365 // Verify that the function start address is the symbol address 4366 // (ARM) or the symbol address + 1 (thumb). 4367 if (func_start_entry->addr != symbol_lookup_file_addr && 4368 func_start_entry->addr != (symbol_lookup_file_addr + 1)) { 4369 // Not the right entry, NULL it out... 4370 func_start_entry = nullptr; 4371 } 4372 } 4373 if (func_start_entry) { 4374 func_start_entry->data = true; 4375 4376 addr_t symbol_file_addr = func_start_entry->addr; 4377 if (is_arm) 4378 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 4379 4380 const FunctionStarts::Entry *next_func_start_entry = 4381 function_starts.FindNextEntry(func_start_entry); 4382 const addr_t section_end_file_addr = 4383 section_file_addr + symbol_section->GetByteSize(); 4384 if (next_func_start_entry) { 4385 addr_t next_symbol_file_addr = next_func_start_entry->addr; 4386 // Be sure the clear the Thumb address bit when we calculate the 4387 // size from the current and next address 4388 if (is_arm) 4389 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 4390 symbol_byte_size = std::min<lldb::addr_t>( 4391 next_symbol_file_addr - symbol_file_addr, 4392 section_end_file_addr - symbol_file_addr); 4393 } else { 4394 symbol_byte_size = section_end_file_addr - symbol_file_addr; 4395 } 4396 } 4397 } 4398 symbol_value -= section_file_addr; 4399 } 4400 4401 if (!is_debug) { 4402 if (type == eSymbolTypeCode) { 4403 // See if we can find a N_FUN entry for any code symbols. If we do 4404 // find a match, and the name matches, then we can merge the two into 4405 // just the function symbol to avoid duplicate entries in the symbol 4406 // table. 4407 std::pair<ValueToSymbolIndexMap::const_iterator, 4408 ValueToSymbolIndexMap::const_iterator> 4409 range; 4410 range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); 4411 if (range.first != range.second) { 4412 for (ValueToSymbolIndexMap::const_iterator pos = range.first; 4413 pos != range.second; ++pos) { 4414 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == 4415 sym[pos->second].GetMangled().GetName( 4416 Mangled::ePreferMangled)) { 4417 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 4418 // We just need the flags from the linker symbol, so put these 4419 // flags into the N_FUN flags to avoid duplicate symbols in the 4420 // symbol table. 4421 sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); 4422 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4423 if (resolver_addresses.find(nlist.n_value) != 4424 resolver_addresses.end()) 4425 sym[pos->second].SetType(eSymbolTypeResolver); 4426 sym[sym_idx].Clear(); 4427 return true; 4428 } 4429 } 4430 } else { 4431 if (resolver_addresses.find(nlist.n_value) != 4432 resolver_addresses.end()) 4433 type = eSymbolTypeResolver; 4434 } 4435 } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass || 4436 type == eSymbolTypeObjCMetaClass || 4437 type == eSymbolTypeObjCIVar) { 4438 // See if we can find a N_STSYM entry for any data symbols. If we do 4439 // find a match, and the name matches, then we can merge the two into 4440 // just the Static symbol to avoid duplicate entries in the symbol 4441 // table. 4442 std::pair<ValueToSymbolIndexMap::const_iterator, 4443 ValueToSymbolIndexMap::const_iterator> 4444 range; 4445 range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value); 4446 if (range.first != range.second) { 4447 for (ValueToSymbolIndexMap::const_iterator pos = range.first; 4448 pos != range.second; ++pos) { 4449 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == 4450 sym[pos->second].GetMangled().GetName( 4451 Mangled::ePreferMangled)) { 4452 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 4453 // We just need the flags from the linker symbol, so put these 4454 // flags into the N_STSYM flags to avoid duplicate symbols in 4455 // the symbol table. 4456 sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); 4457 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4458 sym[sym_idx].Clear(); 4459 return true; 4460 } 4461 } 4462 } else { 4463 // Combine N_GSYM stab entries with the non stab symbol. 4464 const char *gsym_name = sym[sym_idx] 4465 .GetMangled() 4466 .GetName(Mangled::ePreferMangled) 4467 .GetCString(); 4468 if (gsym_name) { 4469 ConstNameToSymbolIndexMap::const_iterator pos = 4470 N_GSYM_name_to_sym_idx.find(gsym_name); 4471 if (pos != N_GSYM_name_to_sym_idx.end()) { 4472 const uint32_t GSYM_sym_idx = pos->second; 4473 m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx; 4474 // Copy the address, because often the N_GSYM address has an 4475 // invalid address of zero when the global is a common symbol. 4476 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section); 4477 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value); 4478 add_symbol_addr( 4479 sym[GSYM_sym_idx].GetAddress().GetFileAddress()); 4480 // We just need the flags from the linker symbol, so put these 4481 // flags into the N_GSYM flags to avoid duplicate symbols in 4482 // the symbol table. 4483 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4484 sym[sym_idx].Clear(); 4485 return true; 4486 } 4487 } 4488 } 4489 } 4490 } 4491 4492 sym[sym_idx].SetID(nlist_idx); 4493 sym[sym_idx].SetType(type); 4494 if (set_value) { 4495 sym[sym_idx].GetAddressRef().SetSection(symbol_section); 4496 sym[sym_idx].GetAddressRef().SetOffset(symbol_value); 4497 if (symbol_section) 4498 add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress()); 4499 } 4500 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4501 if (nlist.n_desc & N_WEAK_REF) 4502 sym[sym_idx].SetIsWeak(true); 4503 4504 if (symbol_byte_size > 0) 4505 sym[sym_idx].SetByteSize(symbol_byte_size); 4506 4507 if (demangled_is_synthesized) 4508 sym[sym_idx].SetDemangledNameIsSynthesized(true); 4509 4510 ++sym_idx; 4511 return true; 4512 }; 4513 4514 // First parse all the nlists but don't process them yet. See the next 4515 // comment for an explanation why. 4516 std::vector<struct nlist_64> nlists; 4517 nlists.reserve(symtab_load_command.nsyms); 4518 for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) { 4519 if (auto nlist = 4520 ParseNList(nlist_data, nlist_data_offset, nlist_byte_size)) 4521 nlists.push_back(*nlist); 4522 else 4523 break; 4524 } 4525 4526 // Now parse all the debug symbols. This is needed to merge non-debug 4527 // symbols in the next step. Non-debug symbols are always coalesced into 4528 // the debug symbol. Doing this in one step would mean that some symbols 4529 // won't be merged. 4530 nlist_idx = 0; 4531 for (auto &nlist : nlists) { 4532 if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols)) 4533 break; 4534 } 4535 4536 // Finally parse all the non debug symbols. 4537 nlist_idx = 0; 4538 for (auto &nlist : nlists) { 4539 if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols)) 4540 break; 4541 } 4542 4543 for (const auto &pos : reexport_shlib_needs_fixup) { 4544 const auto undef_pos = undefined_name_to_desc.find(pos.second); 4545 if (undef_pos != undefined_name_to_desc.end()) { 4546 const uint8_t dylib_ordinal = 4547 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second); 4548 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize()) 4549 sym[pos.first].SetReExportedSymbolSharedLibrary( 4550 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1)); 4551 } 4552 } 4553 } 4554 4555 // Count how many trie symbols we'll add to the symbol table 4556 int trie_symbol_table_augment_count = 0; 4557 for (auto &e : external_sym_trie_entries) { 4558 if (!symbols_added.contains(e.entry.address)) 4559 trie_symbol_table_augment_count++; 4560 } 4561 4562 if (num_syms < sym_idx + trie_symbol_table_augment_count) { 4563 num_syms = sym_idx + trie_symbol_table_augment_count; 4564 sym = symtab.Resize(num_syms); 4565 } 4566 uint32_t synthetic_sym_id = symtab_load_command.nsyms; 4567 4568 // Add symbols from the trie to the symbol table. 4569 for (auto &e : external_sym_trie_entries) { 4570 if (symbols_added.contains(e.entry.address)) 4571 continue; 4572 4573 // Find the section that this trie address is in, use that to annotate 4574 // symbol type as we add the trie address and name to the symbol table. 4575 Address symbol_addr; 4576 if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) { 4577 SectionSP symbol_section(symbol_addr.GetSection()); 4578 const char *symbol_name = e.entry.name.GetCString(); 4579 bool demangled_is_synthesized = false; 4580 SymbolType type = 4581 GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp, 4582 data_section_sp, data_dirty_section_sp, 4583 data_const_section_sp, symbol_section); 4584 4585 sym[sym_idx].SetType(type); 4586 if (symbol_section) { 4587 sym[sym_idx].SetID(synthetic_sym_id++); 4588 sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name)); 4589 if (demangled_is_synthesized) 4590 sym[sym_idx].SetDemangledNameIsSynthesized(true); 4591 sym[sym_idx].SetIsSynthetic(true); 4592 sym[sym_idx].SetExternal(true); 4593 sym[sym_idx].GetAddressRef() = symbol_addr; 4594 add_symbol_addr(symbol_addr.GetFileAddress()); 4595 if (e.entry.flags & TRIE_SYMBOL_IS_THUMB) 4596 sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB); 4597 ++sym_idx; 4598 } 4599 } 4600 } 4601 4602 if (function_starts_count > 0) { 4603 uint32_t num_synthetic_function_symbols = 0; 4604 for (i = 0; i < function_starts_count; ++i) { 4605 if (!symbols_added.contains(function_starts.GetEntryRef(i).addr)) 4606 ++num_synthetic_function_symbols; 4607 } 4608 4609 if (num_synthetic_function_symbols > 0) { 4610 if (num_syms < sym_idx + num_synthetic_function_symbols) { 4611 num_syms = sym_idx + num_synthetic_function_symbols; 4612 sym = symtab.Resize(num_syms); 4613 } 4614 for (i = 0; i < function_starts_count; ++i) { 4615 const FunctionStarts::Entry *func_start_entry = 4616 function_starts.GetEntryAtIndex(i); 4617 if (!symbols_added.contains(func_start_entry->addr)) { 4618 addr_t symbol_file_addr = func_start_entry->addr; 4619 uint32_t symbol_flags = 0; 4620 if (func_start_entry->data) 4621 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; 4622 Address symbol_addr; 4623 if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) { 4624 SectionSP symbol_section(symbol_addr.GetSection()); 4625 uint32_t symbol_byte_size = 0; 4626 if (symbol_section) { 4627 const addr_t section_file_addr = symbol_section->GetFileAddress(); 4628 const FunctionStarts::Entry *next_func_start_entry = 4629 function_starts.FindNextEntry(func_start_entry); 4630 const addr_t section_end_file_addr = 4631 section_file_addr + symbol_section->GetByteSize(); 4632 if (next_func_start_entry) { 4633 addr_t next_symbol_file_addr = next_func_start_entry->addr; 4634 if (is_arm) 4635 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 4636 symbol_byte_size = std::min<lldb::addr_t>( 4637 next_symbol_file_addr - symbol_file_addr, 4638 section_end_file_addr - symbol_file_addr); 4639 } else { 4640 symbol_byte_size = section_end_file_addr - symbol_file_addr; 4641 } 4642 sym[sym_idx].SetID(synthetic_sym_id++); 4643 // Don't set the name for any synthetic symbols, the Symbol 4644 // object will generate one if needed when the name is accessed 4645 // via accessors. 4646 sym[sym_idx].GetMangled().SetDemangledName(ConstString()); 4647 sym[sym_idx].SetType(eSymbolTypeCode); 4648 sym[sym_idx].SetIsSynthetic(true); 4649 sym[sym_idx].GetAddressRef() = symbol_addr; 4650 add_symbol_addr(symbol_addr.GetFileAddress()); 4651 if (symbol_flags) 4652 sym[sym_idx].SetFlags(symbol_flags); 4653 if (symbol_byte_size) 4654 sym[sym_idx].SetByteSize(symbol_byte_size); 4655 ++sym_idx; 4656 } 4657 } 4658 } 4659 } 4660 } 4661 } 4662 4663 // Trim our symbols down to just what we ended up with after removing any 4664 // symbols. 4665 if (sym_idx < num_syms) { 4666 num_syms = sym_idx; 4667 sym = symtab.Resize(num_syms); 4668 } 4669 4670 // Now synthesize indirect symbols 4671 if (m_dysymtab.nindirectsyms != 0) { 4672 if (indirect_symbol_index_data.GetByteSize()) { 4673 NListIndexToSymbolIndexMap::const_iterator end_index_pos = 4674 m_nlist_idx_to_sym_idx.end(); 4675 4676 for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size(); 4677 ++sect_idx) { 4678 if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) == 4679 S_SYMBOL_STUBS) { 4680 uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2; 4681 if (symbol_stub_byte_size == 0) 4682 continue; 4683 4684 const uint32_t num_symbol_stubs = 4685 m_mach_sections[sect_idx].size / symbol_stub_byte_size; 4686 4687 if (num_symbol_stubs == 0) 4688 continue; 4689 4690 const uint32_t symbol_stub_index_offset = 4691 m_mach_sections[sect_idx].reserved1; 4692 for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) { 4693 const uint32_t symbol_stub_index = 4694 symbol_stub_index_offset + stub_idx; 4695 const lldb::addr_t symbol_stub_addr = 4696 m_mach_sections[sect_idx].addr + 4697 (stub_idx * symbol_stub_byte_size); 4698 lldb::offset_t symbol_stub_offset = symbol_stub_index * 4; 4699 if (indirect_symbol_index_data.ValidOffsetForDataOfSize( 4700 symbol_stub_offset, 4)) { 4701 const uint32_t stub_sym_id = 4702 indirect_symbol_index_data.GetU32(&symbol_stub_offset); 4703 if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL)) 4704 continue; 4705 4706 NListIndexToSymbolIndexMap::const_iterator index_pos = 4707 m_nlist_idx_to_sym_idx.find(stub_sym_id); 4708 Symbol *stub_symbol = nullptr; 4709 if (index_pos != end_index_pos) { 4710 // We have a remapping from the original nlist index to a 4711 // current symbol index, so just look this up by index 4712 stub_symbol = symtab.SymbolAtIndex(index_pos->second); 4713 } else { 4714 // We need to lookup a symbol using the original nlist symbol 4715 // index since this index is coming from the S_SYMBOL_STUBS 4716 stub_symbol = symtab.FindSymbolByID(stub_sym_id); 4717 } 4718 4719 if (stub_symbol) { 4720 Address so_addr(symbol_stub_addr, section_list); 4721 4722 if (stub_symbol->GetType() == eSymbolTypeUndefined) { 4723 // Change the external symbol into a trampoline that makes 4724 // sense These symbols were N_UNDF N_EXT, and are useless 4725 // to us, so we can re-use them so we don't have to make up 4726 // a synthetic symbol for no good reason. 4727 if (resolver_addresses.find(symbol_stub_addr) == 4728 resolver_addresses.end()) 4729 stub_symbol->SetType(eSymbolTypeTrampoline); 4730 else 4731 stub_symbol->SetType(eSymbolTypeResolver); 4732 stub_symbol->SetExternal(false); 4733 stub_symbol->GetAddressRef() = so_addr; 4734 stub_symbol->SetByteSize(symbol_stub_byte_size); 4735 } else { 4736 // Make a synthetic symbol to describe the trampoline stub 4737 Mangled stub_symbol_mangled_name(stub_symbol->GetMangled()); 4738 if (sym_idx >= num_syms) { 4739 sym = symtab.Resize(++num_syms); 4740 stub_symbol = nullptr; // this pointer no longer valid 4741 } 4742 sym[sym_idx].SetID(synthetic_sym_id++); 4743 sym[sym_idx].GetMangled() = stub_symbol_mangled_name; 4744 if (resolver_addresses.find(symbol_stub_addr) == 4745 resolver_addresses.end()) 4746 sym[sym_idx].SetType(eSymbolTypeTrampoline); 4747 else 4748 sym[sym_idx].SetType(eSymbolTypeResolver); 4749 sym[sym_idx].SetIsSynthetic(true); 4750 sym[sym_idx].GetAddressRef() = so_addr; 4751 add_symbol_addr(so_addr.GetFileAddress()); 4752 sym[sym_idx].SetByteSize(symbol_stub_byte_size); 4753 ++sym_idx; 4754 } 4755 } else { 4756 if (log) 4757 log->Warning("symbol stub referencing symbol table symbol " 4758 "%u that isn't in our minimal symbol table, " 4759 "fix this!!!", 4760 stub_sym_id); 4761 } 4762 } 4763 } 4764 } 4765 } 4766 } 4767 } 4768 4769 if (!reexport_trie_entries.empty()) { 4770 for (const auto &e : reexport_trie_entries) { 4771 if (e.entry.import_name) { 4772 // Only add indirect symbols from the Trie entries if we didn't have 4773 // a N_INDR nlist entry for this already 4774 if (indirect_symbol_names.find(e.entry.name) == 4775 indirect_symbol_names.end()) { 4776 // Make a synthetic symbol to describe re-exported symbol. 4777 if (sym_idx >= num_syms) 4778 sym = symtab.Resize(++num_syms); 4779 sym[sym_idx].SetID(synthetic_sym_id++); 4780 sym[sym_idx].GetMangled() = Mangled(e.entry.name); 4781 sym[sym_idx].SetType(eSymbolTypeReExported); 4782 sym[sym_idx].SetIsSynthetic(true); 4783 sym[sym_idx].SetReExportedSymbolName(e.entry.import_name); 4784 if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) { 4785 sym[sym_idx].SetReExportedSymbolSharedLibrary( 4786 dylib_files.GetFileSpecAtIndex(e.entry.other - 1)); 4787 } 4788 ++sym_idx; 4789 } 4790 } 4791 } 4792 } 4793 } 4794 4795 void ObjectFileMachO::Dump(Stream *s) { 4796 ModuleSP module_sp(GetModule()); 4797 if (module_sp) { 4798 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 4799 s->Printf("%p: ", static_cast<void *>(this)); 4800 s->Indent(); 4801 if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64) 4802 s->PutCString("ObjectFileMachO64"); 4803 else 4804 s->PutCString("ObjectFileMachO32"); 4805 4806 *s << ", file = '" << m_file; 4807 ModuleSpecList all_specs; 4808 ModuleSpec base_spec; 4809 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic), 4810 base_spec, all_specs); 4811 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 4812 *s << "', triple"; 4813 if (e) 4814 s->Printf("[%d]", i); 4815 *s << " = "; 4816 *s << all_specs.GetModuleSpecRefAtIndex(i) 4817 .GetArchitecture() 4818 .GetTriple() 4819 .getTriple(); 4820 } 4821 *s << "\n"; 4822 SectionList *sections = GetSectionList(); 4823 if (sections) 4824 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, 4825 UINT32_MAX); 4826 4827 if (m_symtab_up) 4828 m_symtab_up->Dump(s, nullptr, eSortOrderNone); 4829 } 4830 } 4831 4832 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header, 4833 const lldb_private::DataExtractor &data, 4834 lldb::offset_t lc_offset) { 4835 uint32_t i; 4836 llvm::MachO::uuid_command load_cmd; 4837 4838 lldb::offset_t offset = lc_offset; 4839 for (i = 0; i < header.ncmds; ++i) { 4840 const lldb::offset_t cmd_offset = offset; 4841 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 4842 break; 4843 4844 if (load_cmd.cmd == LC_UUID) { 4845 const uint8_t *uuid_bytes = data.PeekData(offset, 16); 4846 4847 if (uuid_bytes) { 4848 // OpenCL on Mac OS X uses the same UUID for each of its object files. 4849 // We pretend these object files have no UUID to prevent crashing. 4850 4851 const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8, 4852 0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63, 4853 0xbb, 0x14, 0xf0, 0x0d}; 4854 4855 if (!memcmp(uuid_bytes, opencl_uuid, 16)) 4856 return UUID(); 4857 4858 return UUID(uuid_bytes, 16); 4859 } 4860 return UUID(); 4861 } 4862 offset = cmd_offset + load_cmd.cmdsize; 4863 } 4864 return UUID(); 4865 } 4866 4867 static llvm::StringRef GetOSName(uint32_t cmd) { 4868 switch (cmd) { 4869 case llvm::MachO::LC_VERSION_MIN_IPHONEOS: 4870 return llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4871 case llvm::MachO::LC_VERSION_MIN_MACOSX: 4872 return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX); 4873 case llvm::MachO::LC_VERSION_MIN_TVOS: 4874 return llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4875 case llvm::MachO::LC_VERSION_MIN_WATCHOS: 4876 return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4877 default: 4878 llvm_unreachable("unexpected LC_VERSION load command"); 4879 } 4880 } 4881 4882 namespace { 4883 struct OSEnv { 4884 llvm::StringRef os_type; 4885 llvm::StringRef environment; 4886 OSEnv(uint32_t cmd) { 4887 switch (cmd) { 4888 case llvm::MachO::PLATFORM_MACOS: 4889 os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX); 4890 return; 4891 case llvm::MachO::PLATFORM_IOS: 4892 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4893 return; 4894 case llvm::MachO::PLATFORM_TVOS: 4895 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4896 return; 4897 case llvm::MachO::PLATFORM_WATCHOS: 4898 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4899 return; 4900 case llvm::MachO::PLATFORM_BRIDGEOS: 4901 os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS); 4902 return; 4903 case llvm::MachO::PLATFORM_DRIVERKIT: 4904 os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit); 4905 return; 4906 case llvm::MachO::PLATFORM_MACCATALYST: 4907 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4908 environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI); 4909 return; 4910 case llvm::MachO::PLATFORM_IOSSIMULATOR: 4911 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4912 environment = 4913 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4914 return; 4915 case llvm::MachO::PLATFORM_TVOSSIMULATOR: 4916 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4917 environment = 4918 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4919 return; 4920 case llvm::MachO::PLATFORM_WATCHOSSIMULATOR: 4921 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4922 environment = 4923 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4924 return; 4925 case llvm::MachO::PLATFORM_XROS: 4926 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS); 4927 return; 4928 case llvm::MachO::PLATFORM_XROS_SIMULATOR: 4929 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS); 4930 environment = 4931 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4932 return; 4933 default: { 4934 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 4935 LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION"); 4936 } 4937 } 4938 } 4939 }; 4940 4941 struct MinOS { 4942 uint32_t major_version, minor_version, patch_version; 4943 MinOS(uint32_t version) 4944 : major_version(version >> 16), minor_version((version >> 8) & 0xffu), 4945 patch_version(version & 0xffu) {} 4946 }; 4947 } // namespace 4948 4949 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, 4950 const lldb_private::DataExtractor &data, 4951 lldb::offset_t lc_offset, 4952 ModuleSpec &base_spec, 4953 lldb_private::ModuleSpecList &all_specs) { 4954 auto &base_arch = base_spec.GetArchitecture(); 4955 base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype); 4956 if (!base_arch.IsValid()) 4957 return; 4958 4959 bool found_any = false; 4960 auto add_triple = [&](const llvm::Triple &triple) { 4961 auto spec = base_spec; 4962 spec.GetArchitecture().GetTriple() = triple; 4963 if (spec.GetArchitecture().IsValid()) { 4964 spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset); 4965 all_specs.Append(spec); 4966 found_any = true; 4967 } 4968 }; 4969 4970 // Set OS to an unspecified unknown or a "*" so it can match any OS 4971 llvm::Triple base_triple = base_arch.GetTriple(); 4972 base_triple.setOS(llvm::Triple::UnknownOS); 4973 base_triple.setOSName(llvm::StringRef()); 4974 4975 if (header.filetype == MH_PRELOAD) { 4976 if (header.cputype == CPU_TYPE_ARM) { 4977 // If this is a 32-bit arm binary, and it's a standalone binary, force 4978 // the Vendor to Apple so we don't accidentally pick up the generic 4979 // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the 4980 // frame pointer register; most other armv7 ABIs use a combination of 4981 // r7 and r11. 4982 base_triple.setVendor(llvm::Triple::Apple); 4983 } else { 4984 // Set vendor to an unspecified unknown or a "*" so it can match any 4985 // vendor This is required for correct behavior of EFI debugging on 4986 // x86_64 4987 base_triple.setVendor(llvm::Triple::UnknownVendor); 4988 base_triple.setVendorName(llvm::StringRef()); 4989 } 4990 return add_triple(base_triple); 4991 } 4992 4993 llvm::MachO::load_command load_cmd; 4994 4995 // See if there is an LC_VERSION_MIN_* load command that can give 4996 // us the OS type. 4997 lldb::offset_t offset = lc_offset; 4998 for (uint32_t i = 0; i < header.ncmds; ++i) { 4999 const lldb::offset_t cmd_offset = offset; 5000 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 5001 break; 5002 5003 llvm::MachO::version_min_command version_min; 5004 switch (load_cmd.cmd) { 5005 case llvm::MachO::LC_VERSION_MIN_MACOSX: 5006 case llvm::MachO::LC_VERSION_MIN_IPHONEOS: 5007 case llvm::MachO::LC_VERSION_MIN_TVOS: 5008 case llvm::MachO::LC_VERSION_MIN_WATCHOS: { 5009 if (load_cmd.cmdsize != sizeof(version_min)) 5010 break; 5011 if (data.ExtractBytes(cmd_offset, sizeof(version_min), 5012 data.GetByteOrder(), &version_min) == 0) 5013 break; 5014 MinOS min_os(version_min.version); 5015 llvm::SmallString<32> os_name; 5016 llvm::raw_svector_ostream os(os_name); 5017 os << GetOSName(load_cmd.cmd) << min_os.major_version << '.' 5018 << min_os.minor_version << '.' << min_os.patch_version; 5019 5020 auto triple = base_triple; 5021 triple.setOSName(os.str()); 5022 5023 // Disambiguate legacy simulator platforms. 5024 if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX && 5025 (base_triple.getArch() == llvm::Triple::x86_64 || 5026 base_triple.getArch() == llvm::Triple::x86)) { 5027 // The combination of legacy LC_VERSION_MIN load command and 5028 // x86 architecture always indicates a simulator environment. 5029 // The combination of LC_VERSION_MIN and arm architecture only 5030 // appears for native binaries. Back-deploying simulator 5031 // binaries on Apple Silicon Macs use the modern unambigous 5032 // LC_BUILD_VERSION load commands; no special handling required. 5033 triple.setEnvironment(llvm::Triple::Simulator); 5034 } 5035 add_triple(triple); 5036 break; 5037 } 5038 default: 5039 break; 5040 } 5041 5042 offset = cmd_offset + load_cmd.cmdsize; 5043 } 5044 5045 // See if there are LC_BUILD_VERSION load commands that can give 5046 // us the OS type. 5047 offset = lc_offset; 5048 for (uint32_t i = 0; i < header.ncmds; ++i) { 5049 const lldb::offset_t cmd_offset = offset; 5050 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 5051 break; 5052 5053 do { 5054 if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) { 5055 llvm::MachO::build_version_command build_version; 5056 if (load_cmd.cmdsize < sizeof(build_version)) { 5057 // Malformed load command. 5058 break; 5059 } 5060 if (data.ExtractBytes(cmd_offset, sizeof(build_version), 5061 data.GetByteOrder(), &build_version) == 0) 5062 break; 5063 MinOS min_os(build_version.minos); 5064 OSEnv os_env(build_version.platform); 5065 llvm::SmallString<16> os_name; 5066 llvm::raw_svector_ostream os(os_name); 5067 os << os_env.os_type << min_os.major_version << '.' 5068 << min_os.minor_version << '.' << min_os.patch_version; 5069 auto triple = base_triple; 5070 triple.setOSName(os.str()); 5071 os_name.clear(); 5072 if (!os_env.environment.empty()) 5073 triple.setEnvironmentName(os_env.environment); 5074 add_triple(triple); 5075 } 5076 } while (false); 5077 offset = cmd_offset + load_cmd.cmdsize; 5078 } 5079 5080 if (!found_any) { 5081 add_triple(base_triple); 5082 } 5083 } 5084 5085 ArchSpec ObjectFileMachO::GetArchitecture( 5086 ModuleSP module_sp, const llvm::MachO::mach_header &header, 5087 const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) { 5088 ModuleSpecList all_specs; 5089 ModuleSpec base_spec; 5090 GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic), 5091 base_spec, all_specs); 5092 5093 // If the object file offers multiple alternative load commands, 5094 // pick the one that matches the module. 5095 if (module_sp) { 5096 const ArchSpec &module_arch = module_sp->GetArchitecture(); 5097 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 5098 ArchSpec mach_arch = 5099 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture(); 5100 if (module_arch.IsCompatibleMatch(mach_arch)) 5101 return mach_arch; 5102 } 5103 } 5104 5105 // Return the first arch we found. 5106 if (all_specs.GetSize() == 0) 5107 return {}; 5108 return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture(); 5109 } 5110 5111 UUID ObjectFileMachO::GetUUID() { 5112 ModuleSP module_sp(GetModule()); 5113 if (module_sp) { 5114 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5115 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5116 return GetUUID(m_header, m_data, offset); 5117 } 5118 return UUID(); 5119 } 5120 5121 uint32_t ObjectFileMachO::GetDependentModules(FileSpecList &files) { 5122 ModuleSP module_sp = GetModule(); 5123 if (!module_sp) 5124 return 0; 5125 5126 uint32_t count = 0; 5127 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5128 llvm::MachO::load_command load_cmd; 5129 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5130 std::vector<std::string> rpath_paths; 5131 std::vector<std::string> rpath_relative_paths; 5132 std::vector<std::string> at_exec_relative_paths; 5133 uint32_t i; 5134 for (i = 0; i < m_header.ncmds; ++i) { 5135 const uint32_t cmd_offset = offset; 5136 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5137 break; 5138 5139 switch (load_cmd.cmd) { 5140 case LC_RPATH: 5141 case LC_LOAD_DYLIB: 5142 case LC_LOAD_WEAK_DYLIB: 5143 case LC_REEXPORT_DYLIB: 5144 case LC_LOAD_DYLINKER: 5145 case LC_LOADFVMLIB: 5146 case LC_LOAD_UPWARD_DYLIB: { 5147 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset); 5148 // For LC_LOAD_DYLIB there is an alternate encoding 5149 // which adds a uint32_t `flags` field for `DYLD_USE_*` 5150 // flags. This can be detected by a timestamp field with 5151 // the `DYLIB_USE_MARKER` constant value. 5152 bool is_delayed_init = false; 5153 uint32_t use_command_marker = m_data.GetU32(&offset); 5154 if (use_command_marker == 0x1a741800 /* DYLIB_USE_MARKER */) { 5155 offset += 4; /* uint32_t current_version */ 5156 offset += 4; /* uint32_t compat_version */ 5157 uint32_t flags = m_data.GetU32(&offset); 5158 // If this LC_LOAD_DYLIB is marked delay-init, 5159 // don't report it as a dependent library -- it 5160 // may be loaded in the process at some point, 5161 // but will most likely not be load at launch. 5162 if (flags & 0x08 /* DYLIB_USE_DELAYED_INIT */) 5163 is_delayed_init = true; 5164 } 5165 const char *path = m_data.PeekCStr(name_offset); 5166 if (path && !is_delayed_init) { 5167 if (load_cmd.cmd == LC_RPATH) 5168 rpath_paths.push_back(path); 5169 else { 5170 if (path[0] == '@') { 5171 if (strncmp(path, "@rpath", strlen("@rpath")) == 0) 5172 rpath_relative_paths.push_back(path + strlen("@rpath")); 5173 else if (strncmp(path, "@executable_path", 5174 strlen("@executable_path")) == 0) 5175 at_exec_relative_paths.push_back(path + 5176 strlen("@executable_path")); 5177 } else { 5178 FileSpec file_spec(path); 5179 if (files.AppendIfUnique(file_spec)) 5180 count++; 5181 } 5182 } 5183 } 5184 } break; 5185 5186 default: 5187 break; 5188 } 5189 offset = cmd_offset + load_cmd.cmdsize; 5190 } 5191 5192 FileSpec this_file_spec(m_file); 5193 FileSystem::Instance().Resolve(this_file_spec); 5194 5195 if (!rpath_paths.empty()) { 5196 // Fixup all LC_RPATH values to be absolute paths. 5197 const std::string this_directory = 5198 this_file_spec.GetDirectory().GetString(); 5199 for (auto &rpath : rpath_paths) { 5200 if (llvm::StringRef(rpath).starts_with(g_loader_path)) 5201 rpath = this_directory + rpath.substr(g_loader_path.size()); 5202 else if (llvm::StringRef(rpath).starts_with(g_executable_path)) 5203 rpath = this_directory + rpath.substr(g_executable_path.size()); 5204 } 5205 5206 for (const auto &rpath_relative_path : rpath_relative_paths) { 5207 for (const auto &rpath : rpath_paths) { 5208 std::string path = rpath; 5209 path += rpath_relative_path; 5210 // It is OK to resolve this path because we must find a file on disk 5211 // for us to accept it anyway if it is rpath relative. 5212 FileSpec file_spec(path); 5213 FileSystem::Instance().Resolve(file_spec); 5214 if (FileSystem::Instance().Exists(file_spec) && 5215 files.AppendIfUnique(file_spec)) { 5216 count++; 5217 break; 5218 } 5219 } 5220 } 5221 } 5222 5223 // We may have @executable_paths but no RPATHS. Figure those out here. 5224 // Only do this if this object file is the executable. We have no way to 5225 // get back to the actual executable otherwise, so we won't get the right 5226 // path. 5227 if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) { 5228 FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent(); 5229 for (const auto &at_exec_relative_path : at_exec_relative_paths) { 5230 FileSpec file_spec = 5231 exec_dir.CopyByAppendingPathComponent(at_exec_relative_path); 5232 if (FileSystem::Instance().Exists(file_spec) && 5233 files.AppendIfUnique(file_spec)) 5234 count++; 5235 } 5236 } 5237 return count; 5238 } 5239 5240 lldb_private::Address ObjectFileMachO::GetEntryPointAddress() { 5241 // If the object file is not an executable it can't hold the entry point. 5242 // m_entry_point_address is initialized to an invalid address, so we can just 5243 // return that. If m_entry_point_address is valid it means we've found it 5244 // already, so return the cached value. 5245 5246 if ((!IsExecutable() && !IsDynamicLoader()) || 5247 m_entry_point_address.IsValid()) { 5248 return m_entry_point_address; 5249 } 5250 5251 // Otherwise, look for the UnixThread or Thread command. The data for the 5252 // Thread command is given in /usr/include/mach-o.h, but it is basically: 5253 // 5254 // uint32_t flavor - this is the flavor argument you would pass to 5255 // thread_get_state 5256 // uint32_t count - this is the count of longs in the thread state data 5257 // struct XXX_thread_state state - this is the structure from 5258 // <machine/thread_status.h> corresponding to the flavor. 5259 // <repeat this trio> 5260 // 5261 // So we just keep reading the various register flavors till we find the GPR 5262 // one, then read the PC out of there. 5263 // FIXME: We will need to have a "RegisterContext data provider" class at some 5264 // point that can get all the registers 5265 // out of data in this form & attach them to a given thread. That should 5266 // underlie the MacOS X User process plugin, and we'll also need it for the 5267 // MacOS X Core File process plugin. When we have that we can also use it 5268 // here. 5269 // 5270 // For now we hard-code the offsets and flavors we need: 5271 // 5272 // 5273 5274 ModuleSP module_sp(GetModule()); 5275 if (module_sp) { 5276 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5277 llvm::MachO::load_command load_cmd; 5278 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5279 uint32_t i; 5280 lldb::addr_t start_address = LLDB_INVALID_ADDRESS; 5281 bool done = false; 5282 5283 for (i = 0; i < m_header.ncmds; ++i) { 5284 const lldb::offset_t cmd_offset = offset; 5285 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5286 break; 5287 5288 switch (load_cmd.cmd) { 5289 case LC_UNIXTHREAD: 5290 case LC_THREAD: { 5291 while (offset < cmd_offset + load_cmd.cmdsize) { 5292 uint32_t flavor = m_data.GetU32(&offset); 5293 uint32_t count = m_data.GetU32(&offset); 5294 if (count == 0) { 5295 // We've gotten off somehow, log and exit; 5296 return m_entry_point_address; 5297 } 5298 5299 switch (m_header.cputype) { 5300 case llvm::MachO::CPU_TYPE_ARM: 5301 if (flavor == 1 || 5302 flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32 5303 // from mach/arm/thread_status.h 5304 { 5305 offset += 60; // This is the offset of pc in the GPR thread state 5306 // data structure. 5307 start_address = m_data.GetU32(&offset); 5308 done = true; 5309 } 5310 break; 5311 case llvm::MachO::CPU_TYPE_ARM64: 5312 case llvm::MachO::CPU_TYPE_ARM64_32: 5313 if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h 5314 { 5315 offset += 256; // This is the offset of pc in the GPR thread state 5316 // data structure. 5317 start_address = m_data.GetU64(&offset); 5318 done = true; 5319 } 5320 break; 5321 case llvm::MachO::CPU_TYPE_I386: 5322 if (flavor == 5323 1) // x86_THREAD_STATE32 from mach/i386/thread_status.h 5324 { 5325 offset += 40; // This is the offset of eip in the GPR thread state 5326 // data structure. 5327 start_address = m_data.GetU32(&offset); 5328 done = true; 5329 } 5330 break; 5331 case llvm::MachO::CPU_TYPE_X86_64: 5332 if (flavor == 5333 4) // x86_THREAD_STATE64 from mach/i386/thread_status.h 5334 { 5335 offset += 16 * 8; // This is the offset of rip in the GPR thread 5336 // state data structure. 5337 start_address = m_data.GetU64(&offset); 5338 done = true; 5339 } 5340 break; 5341 default: 5342 return m_entry_point_address; 5343 } 5344 // Haven't found the GPR flavor yet, skip over the data for this 5345 // flavor: 5346 if (done) 5347 break; 5348 offset += count * 4; 5349 } 5350 } break; 5351 case LC_MAIN: { 5352 uint64_t entryoffset = m_data.GetU64(&offset); 5353 SectionSP text_segment_sp = 5354 GetSectionList()->FindSectionByName(GetSegmentNameTEXT()); 5355 if (text_segment_sp) { 5356 done = true; 5357 start_address = text_segment_sp->GetFileAddress() + entryoffset; 5358 } 5359 } break; 5360 5361 default: 5362 break; 5363 } 5364 if (done) 5365 break; 5366 5367 // Go to the next load command: 5368 offset = cmd_offset + load_cmd.cmdsize; 5369 } 5370 5371 if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) { 5372 if (GetSymtab()) { 5373 Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType( 5374 ConstString("_dyld_start"), SymbolType::eSymbolTypeCode, 5375 Symtab::eDebugAny, Symtab::eVisibilityAny); 5376 if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) { 5377 start_address = dyld_start_sym->GetAddress().GetFileAddress(); 5378 } 5379 } 5380 } 5381 5382 if (start_address != LLDB_INVALID_ADDRESS) { 5383 // We got the start address from the load commands, so now resolve that 5384 // address in the sections of this ObjectFile: 5385 if (!m_entry_point_address.ResolveAddressUsingFileSections( 5386 start_address, GetSectionList())) { 5387 m_entry_point_address.Clear(); 5388 } 5389 } else { 5390 // We couldn't read the UnixThread load command - maybe it wasn't there. 5391 // As a fallback look for the "start" symbol in the main executable. 5392 5393 ModuleSP module_sp(GetModule()); 5394 5395 if (module_sp) { 5396 SymbolContextList contexts; 5397 SymbolContext context; 5398 module_sp->FindSymbolsWithNameAndType(ConstString("start"), 5399 eSymbolTypeCode, contexts); 5400 if (contexts.GetSize()) { 5401 if (contexts.GetContextAtIndex(0, context)) 5402 m_entry_point_address = context.symbol->GetAddress(); 5403 } 5404 } 5405 } 5406 } 5407 5408 return m_entry_point_address; 5409 } 5410 5411 lldb_private::Address ObjectFileMachO::GetBaseAddress() { 5412 lldb_private::Address header_addr; 5413 SectionList *section_list = GetSectionList(); 5414 if (section_list) { 5415 SectionSP text_segment_sp( 5416 section_list->FindSectionByName(GetSegmentNameTEXT())); 5417 if (text_segment_sp) { 5418 header_addr.SetSection(text_segment_sp); 5419 header_addr.SetOffset(0); 5420 } 5421 } 5422 return header_addr; 5423 } 5424 5425 uint32_t ObjectFileMachO::GetNumThreadContexts() { 5426 ModuleSP module_sp(GetModule()); 5427 if (module_sp) { 5428 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5429 if (!m_thread_context_offsets_valid) { 5430 m_thread_context_offsets_valid = true; 5431 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5432 FileRangeArray::Entry file_range; 5433 llvm::MachO::thread_command thread_cmd; 5434 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5435 const uint32_t cmd_offset = offset; 5436 if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr) 5437 break; 5438 5439 if (thread_cmd.cmd == LC_THREAD) { 5440 file_range.SetRangeBase(offset); 5441 file_range.SetByteSize(thread_cmd.cmdsize - 8); 5442 m_thread_context_offsets.Append(file_range); 5443 } 5444 offset = cmd_offset + thread_cmd.cmdsize; 5445 } 5446 } 5447 } 5448 return m_thread_context_offsets.GetSize(); 5449 } 5450 5451 std::vector<std::tuple<offset_t, offset_t>> 5452 ObjectFileMachO::FindLC_NOTEByName(std::string name) { 5453 std::vector<std::tuple<offset_t, offset_t>> results; 5454 ModuleSP module_sp(GetModule()); 5455 if (module_sp) { 5456 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5457 5458 offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5459 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5460 const uint32_t cmd_offset = offset; 5461 llvm::MachO::load_command lc = {}; 5462 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr) 5463 break; 5464 if (lc.cmd == LC_NOTE) { 5465 char data_owner[17]; 5466 m_data.CopyData(offset, 16, data_owner); 5467 data_owner[16] = '\0'; 5468 offset += 16; 5469 5470 if (name == data_owner) { 5471 offset_t payload_offset = m_data.GetU64_unchecked(&offset); 5472 offset_t payload_size = m_data.GetU64_unchecked(&offset); 5473 results.push_back({payload_offset, payload_size}); 5474 } 5475 } 5476 offset = cmd_offset + lc.cmdsize; 5477 } 5478 } 5479 return results; 5480 } 5481 5482 std::string ObjectFileMachO::GetIdentifierString() { 5483 Log *log( 5484 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader)); 5485 ModuleSP module_sp(GetModule()); 5486 if (module_sp) { 5487 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5488 5489 auto lc_notes = FindLC_NOTEByName("kern ver str"); 5490 for (auto lc_note : lc_notes) { 5491 offset_t payload_offset = std::get<0>(lc_note); 5492 offset_t payload_size = std::get<1>(lc_note); 5493 uint32_t version; 5494 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) { 5495 if (version == 1) { 5496 uint32_t strsize = payload_size - sizeof(uint32_t); 5497 std::string result(strsize, '\0'); 5498 m_data.CopyData(payload_offset, strsize, result.data()); 5499 LLDB_LOGF(log, "LC_NOTE 'kern ver str' found with text '%s'", 5500 result.c_str()); 5501 return result; 5502 } 5503 } 5504 } 5505 5506 // Second, make a pass over the load commands looking for an obsolete 5507 // LC_IDENT load command. 5508 offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5509 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5510 const uint32_t cmd_offset = offset; 5511 llvm::MachO::ident_command ident_command; 5512 if (m_data.GetU32(&offset, &ident_command, 2) == nullptr) 5513 break; 5514 if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) { 5515 std::string result(ident_command.cmdsize, '\0'); 5516 if (m_data.CopyData(offset, ident_command.cmdsize, result.data()) == 5517 ident_command.cmdsize) { 5518 LLDB_LOGF(log, "LC_IDENT found with text '%s'", result.c_str()); 5519 return result; 5520 } 5521 } 5522 offset = cmd_offset + ident_command.cmdsize; 5523 } 5524 } 5525 return {}; 5526 } 5527 5528 AddressableBits ObjectFileMachO::GetAddressableBits() { 5529 AddressableBits addressable_bits; 5530 5531 Log *log(GetLog(LLDBLog::Process)); 5532 ModuleSP module_sp(GetModule()); 5533 if (module_sp) { 5534 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5535 auto lc_notes = FindLC_NOTEByName("addrable bits"); 5536 for (auto lc_note : lc_notes) { 5537 offset_t payload_offset = std::get<0>(lc_note); 5538 uint32_t version; 5539 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) { 5540 if (version == 3) { 5541 uint32_t num_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5542 addressable_bits.SetAddressableBits(num_addr_bits); 5543 LLDB_LOGF(log, 5544 "LC_NOTE 'addrable bits' v3 found, value %d " 5545 "bits", 5546 num_addr_bits); 5547 } 5548 if (version == 4) { 5549 uint32_t lo_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5550 uint32_t hi_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5551 5552 if (lo_addr_bits == hi_addr_bits) 5553 addressable_bits.SetAddressableBits(lo_addr_bits); 5554 else 5555 addressable_bits.SetAddressableBits(lo_addr_bits, hi_addr_bits); 5556 LLDB_LOGF(log, "LC_NOTE 'addrable bits' v4 found, value %d & %d bits", 5557 lo_addr_bits, hi_addr_bits); 5558 } 5559 } 5560 } 5561 } 5562 return addressable_bits; 5563 } 5564 5565 bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value, 5566 bool &value_is_offset, 5567 UUID &uuid, 5568 ObjectFile::BinaryType &type) { 5569 Log *log( 5570 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader)); 5571 value = LLDB_INVALID_ADDRESS; 5572 value_is_offset = false; 5573 uuid.Clear(); 5574 uint32_t log2_pagesize = 0; // not currently passed up to caller 5575 uint32_t platform = 0; // not currently passed up to caller 5576 ModuleSP module_sp(GetModule()); 5577 if (module_sp) { 5578 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5579 5580 auto lc_notes = FindLC_NOTEByName("main bin spec"); 5581 for (auto lc_note : lc_notes) { 5582 offset_t payload_offset = std::get<0>(lc_note); 5583 5584 // struct main_bin_spec 5585 // { 5586 // uint32_t version; // currently 2 5587 // uint32_t type; // 0 == unspecified, 1 == kernel, 5588 // // 2 == user process, 5589 // // 3 == standalone binary 5590 // uint64_t address; // UINT64_MAX if address not specified 5591 // uint64_t slide; // slide, UINT64_MAX if unspecified 5592 // // 0 if no slide needs to be applied to 5593 // // file address 5594 // uuid_t uuid; // all zero's if uuid not specified 5595 // uint32_t log2_pagesize; // process page size in log base 2, 5596 // // e.g. 4k pages are 12. 5597 // // 0 for unspecified 5598 // uint32_t platform; // The Mach-O platform for this corefile. 5599 // // 0 for unspecified. 5600 // // The values are defined in 5601 // // <mach-o/loader.h>, PLATFORM_*. 5602 // } __attribute((packed)); 5603 5604 // "main bin spec" (main binary specification) data payload is 5605 // formatted: 5606 // uint32_t version [currently 1] 5607 // uint32_t type [0 == unspecified, 1 == kernel, 5608 // 2 == user process, 3 == firmware ] 5609 // uint64_t address [ UINT64_MAX if address not specified ] 5610 // uuid_t uuid [ all zero's if uuid not specified ] 5611 // uint32_t log2_pagesize [ process page size in log base 5612 // 2, e.g. 4k pages are 12. 5613 // 0 for unspecified ] 5614 // uint32_t unused [ for alignment ] 5615 5616 uint32_t version; 5617 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr && 5618 version <= 2) { 5619 uint32_t binspec_type = 0; 5620 uuid_t raw_uuid; 5621 memset(raw_uuid, 0, sizeof(uuid_t)); 5622 5623 if (!m_data.GetU32(&payload_offset, &binspec_type, 1)) 5624 return false; 5625 if (!m_data.GetU64(&payload_offset, &value, 1)) 5626 return false; 5627 uint64_t slide = LLDB_INVALID_ADDRESS; 5628 if (version > 1 && !m_data.GetU64(&payload_offset, &slide, 1)) 5629 return false; 5630 if (value == LLDB_INVALID_ADDRESS && slide != LLDB_INVALID_ADDRESS) { 5631 value = slide; 5632 value_is_offset = true; 5633 } 5634 5635 if (m_data.CopyData(payload_offset, sizeof(uuid_t), raw_uuid) != 0) { 5636 uuid = UUID(raw_uuid, sizeof(uuid_t)); 5637 // convert the "main bin spec" type into our 5638 // ObjectFile::BinaryType enum 5639 const char *typestr = "unrecognized type"; 5640 switch (binspec_type) { 5641 case 0: 5642 type = eBinaryTypeUnknown; 5643 typestr = "uknown"; 5644 break; 5645 case 1: 5646 type = eBinaryTypeKernel; 5647 typestr = "xnu kernel"; 5648 break; 5649 case 2: 5650 type = eBinaryTypeUser; 5651 typestr = "userland dyld"; 5652 break; 5653 case 3: 5654 type = eBinaryTypeStandalone; 5655 typestr = "standalone"; 5656 break; 5657 } 5658 LLDB_LOGF(log, 5659 "LC_NOTE 'main bin spec' found, version %d type %d " 5660 "(%s), value 0x%" PRIx64 " value-is-slide==%s uuid %s", 5661 version, type, typestr, value, 5662 value_is_offset ? "true" : "false", 5663 uuid.GetAsString().c_str()); 5664 if (!m_data.GetU32(&payload_offset, &log2_pagesize, 1)) 5665 return false; 5666 if (version > 1 && !m_data.GetU32(&payload_offset, &platform, 1)) 5667 return false; 5668 return true; 5669 } 5670 } 5671 } 5672 } 5673 return false; 5674 } 5675 5676 bool ObjectFileMachO::GetCorefileThreadExtraInfos( 5677 std::vector<lldb::tid_t> &tids) { 5678 tids.clear(); 5679 ModuleSP module_sp(GetModule()); 5680 if (module_sp) { 5681 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5682 5683 Log *log(GetLog(LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread)); 5684 auto lc_notes = FindLC_NOTEByName("process metadata"); 5685 for (auto lc_note : lc_notes) { 5686 offset_t payload_offset = std::get<0>(lc_note); 5687 offset_t strsize = std::get<1>(lc_note); 5688 std::string buf(strsize, '\0'); 5689 if (m_data.CopyData(payload_offset, strsize, buf.data()) != strsize) { 5690 LLDB_LOGF(log, 5691 "Unable to read %" PRIu64 5692 " bytes of 'process metadata' LC_NOTE JSON contents", 5693 strsize); 5694 return false; 5695 } 5696 while (buf.back() == '\0') 5697 buf.resize(buf.size() - 1); 5698 StructuredData::ObjectSP object_sp = StructuredData::ParseJSON(buf); 5699 StructuredData::Dictionary *dict = object_sp->GetAsDictionary(); 5700 if (!dict) { 5701 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not " 5702 "get a dictionary."); 5703 return false; 5704 } 5705 StructuredData::Array *threads; 5706 if (!dict->GetValueForKeyAsArray("threads", threads) || !threads) { 5707 LLDB_LOGF(log, 5708 "'process metadata' LC_NOTE does not have a 'threads' key"); 5709 return false; 5710 } 5711 if (threads->GetSize() != GetNumThreadContexts()) { 5712 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, number of " 5713 "threads does not match number of LC_THREADS."); 5714 return false; 5715 } 5716 const size_t num_threads = threads->GetSize(); 5717 for (size_t i = 0; i < num_threads; i++) { 5718 std::optional<StructuredData::Dictionary *> maybe_thread = 5719 threads->GetItemAtIndexAsDictionary(i); 5720 if (!maybe_thread) { 5721 LLDB_LOGF(log, 5722 "Unable to read 'process metadata' LC_NOTE, threads " 5723 "array does not have a dictionary at index %zu.", 5724 i); 5725 return false; 5726 } 5727 StructuredData::Dictionary *thread = *maybe_thread; 5728 lldb::tid_t tid = LLDB_INVALID_THREAD_ID; 5729 if (thread->GetValueForKeyAsInteger<lldb::tid_t>("thread_id", tid)) 5730 if (tid == 0) 5731 tid = LLDB_INVALID_THREAD_ID; 5732 tids.push_back(tid); 5733 } 5734 5735 if (log) { 5736 StreamString logmsg; 5737 logmsg.Printf("LC_NOTE 'process metadata' found: "); 5738 dict->Dump(logmsg, /* pretty_print */ false); 5739 LLDB_LOGF(log, "%s", logmsg.GetData()); 5740 } 5741 return true; 5742 } 5743 } 5744 return false; 5745 } 5746 5747 lldb::RegisterContextSP 5748 ObjectFileMachO::GetThreadContextAtIndex(uint32_t idx, 5749 lldb_private::Thread &thread) { 5750 lldb::RegisterContextSP reg_ctx_sp; 5751 5752 ModuleSP module_sp(GetModule()); 5753 if (module_sp) { 5754 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5755 if (!m_thread_context_offsets_valid) 5756 GetNumThreadContexts(); 5757 5758 const FileRangeArray::Entry *thread_context_file_range = 5759 m_thread_context_offsets.GetEntryAtIndex(idx); 5760 if (thread_context_file_range) { 5761 5762 DataExtractor data(m_data, thread_context_file_range->GetRangeBase(), 5763 thread_context_file_range->GetByteSize()); 5764 5765 switch (m_header.cputype) { 5766 case llvm::MachO::CPU_TYPE_ARM64: 5767 case llvm::MachO::CPU_TYPE_ARM64_32: 5768 reg_ctx_sp = 5769 std::make_shared<RegisterContextDarwin_arm64_Mach>(thread, data); 5770 break; 5771 5772 case llvm::MachO::CPU_TYPE_ARM: 5773 reg_ctx_sp = 5774 std::make_shared<RegisterContextDarwin_arm_Mach>(thread, data); 5775 break; 5776 5777 case llvm::MachO::CPU_TYPE_I386: 5778 reg_ctx_sp = 5779 std::make_shared<RegisterContextDarwin_i386_Mach>(thread, data); 5780 break; 5781 5782 case llvm::MachO::CPU_TYPE_X86_64: 5783 reg_ctx_sp = 5784 std::make_shared<RegisterContextDarwin_x86_64_Mach>(thread, data); 5785 break; 5786 } 5787 } 5788 } 5789 return reg_ctx_sp; 5790 } 5791 5792 ObjectFile::Type ObjectFileMachO::CalculateType() { 5793 switch (m_header.filetype) { 5794 case MH_OBJECT: // 0x1u 5795 if (GetAddressByteSize() == 4) { 5796 // 32 bit kexts are just object files, but they do have a valid 5797 // UUID load command. 5798 if (GetUUID()) { 5799 // this checking for the UUID load command is not enough we could 5800 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as 5801 // this is required of kexts 5802 if (m_strata == eStrataInvalid) 5803 m_strata = eStrataKernel; 5804 return eTypeSharedLibrary; 5805 } 5806 } 5807 return eTypeObjectFile; 5808 5809 case MH_EXECUTE: 5810 return eTypeExecutable; // 0x2u 5811 case MH_FVMLIB: 5812 return eTypeSharedLibrary; // 0x3u 5813 case MH_CORE: 5814 return eTypeCoreFile; // 0x4u 5815 case MH_PRELOAD: 5816 return eTypeSharedLibrary; // 0x5u 5817 case MH_DYLIB: 5818 return eTypeSharedLibrary; // 0x6u 5819 case MH_DYLINKER: 5820 return eTypeDynamicLinker; // 0x7u 5821 case MH_BUNDLE: 5822 return eTypeSharedLibrary; // 0x8u 5823 case MH_DYLIB_STUB: 5824 return eTypeStubLibrary; // 0x9u 5825 case MH_DSYM: 5826 return eTypeDebugInfo; // 0xAu 5827 case MH_KEXT_BUNDLE: 5828 return eTypeSharedLibrary; // 0xBu 5829 default: 5830 break; 5831 } 5832 return eTypeUnknown; 5833 } 5834 5835 ObjectFile::Strata ObjectFileMachO::CalculateStrata() { 5836 switch (m_header.filetype) { 5837 case MH_OBJECT: // 0x1u 5838 { 5839 // 32 bit kexts are just object files, but they do have a valid 5840 // UUID load command. 5841 if (GetUUID()) { 5842 // this checking for the UUID load command is not enough we could 5843 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as 5844 // this is required of kexts 5845 if (m_type == eTypeInvalid) 5846 m_type = eTypeSharedLibrary; 5847 5848 return eStrataKernel; 5849 } 5850 } 5851 return eStrataUnknown; 5852 5853 case MH_EXECUTE: // 0x2u 5854 // Check for the MH_DYLDLINK bit in the flags 5855 if (m_header.flags & MH_DYLDLINK) { 5856 return eStrataUser; 5857 } else { 5858 SectionList *section_list = GetSectionList(); 5859 if (section_list) { 5860 static ConstString g_kld_section_name("__KLD"); 5861 if (section_list->FindSectionByName(g_kld_section_name)) 5862 return eStrataKernel; 5863 } 5864 } 5865 return eStrataRawImage; 5866 5867 case MH_FVMLIB: 5868 return eStrataUser; // 0x3u 5869 case MH_CORE: 5870 return eStrataUnknown; // 0x4u 5871 case MH_PRELOAD: 5872 return eStrataRawImage; // 0x5u 5873 case MH_DYLIB: 5874 return eStrataUser; // 0x6u 5875 case MH_DYLINKER: 5876 return eStrataUser; // 0x7u 5877 case MH_BUNDLE: 5878 return eStrataUser; // 0x8u 5879 case MH_DYLIB_STUB: 5880 return eStrataUser; // 0x9u 5881 case MH_DSYM: 5882 return eStrataUnknown; // 0xAu 5883 case MH_KEXT_BUNDLE: 5884 return eStrataKernel; // 0xBu 5885 default: 5886 break; 5887 } 5888 return eStrataUnknown; 5889 } 5890 5891 llvm::VersionTuple ObjectFileMachO::GetVersion() { 5892 ModuleSP module_sp(GetModule()); 5893 if (module_sp) { 5894 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5895 llvm::MachO::dylib_command load_cmd; 5896 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5897 uint32_t version_cmd = 0; 5898 uint64_t version = 0; 5899 uint32_t i; 5900 for (i = 0; i < m_header.ncmds; ++i) { 5901 const lldb::offset_t cmd_offset = offset; 5902 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5903 break; 5904 5905 if (load_cmd.cmd == LC_ID_DYLIB) { 5906 if (version_cmd == 0) { 5907 version_cmd = load_cmd.cmd; 5908 if (m_data.GetU32(&offset, &load_cmd.dylib, 4) == nullptr) 5909 break; 5910 version = load_cmd.dylib.current_version; 5911 } 5912 break; // Break for now unless there is another more complete version 5913 // number load command in the future. 5914 } 5915 offset = cmd_offset + load_cmd.cmdsize; 5916 } 5917 5918 if (version_cmd == LC_ID_DYLIB) { 5919 unsigned major = (version & 0xFFFF0000ull) >> 16; 5920 unsigned minor = (version & 0x0000FF00ull) >> 8; 5921 unsigned subminor = (version & 0x000000FFull); 5922 return llvm::VersionTuple(major, minor, subminor); 5923 } 5924 } 5925 return llvm::VersionTuple(); 5926 } 5927 5928 ArchSpec ObjectFileMachO::GetArchitecture() { 5929 ModuleSP module_sp(GetModule()); 5930 ArchSpec arch; 5931 if (module_sp) { 5932 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5933 5934 return GetArchitecture(module_sp, m_header, m_data, 5935 MachHeaderSizeFromMagic(m_header.magic)); 5936 } 5937 return arch; 5938 } 5939 5940 void ObjectFileMachO::GetProcessSharedCacheUUID(Process *process, 5941 addr_t &base_addr, UUID &uuid) { 5942 uuid.Clear(); 5943 base_addr = LLDB_INVALID_ADDRESS; 5944 if (process && process->GetDynamicLoader()) { 5945 DynamicLoader *dl = process->GetDynamicLoader(); 5946 LazyBool using_shared_cache; 5947 LazyBool private_shared_cache; 5948 dl->GetSharedCacheInformation(base_addr, uuid, using_shared_cache, 5949 private_shared_cache); 5950 } 5951 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 5952 LLDB_LOGF( 5953 log, 5954 "inferior process shared cache has a UUID of %s, base address 0x%" PRIx64, 5955 uuid.GetAsString().c_str(), base_addr); 5956 } 5957 5958 // From dyld SPI header dyld_process_info.h 5959 typedef void *dyld_process_info; 5960 struct lldb_copy__dyld_process_cache_info { 5961 uuid_t cacheUUID; // UUID of cache used by process 5962 uint64_t cacheBaseAddress; // load address of dyld shared cache 5963 bool noCache; // process is running without a dyld cache 5964 bool privateCache; // process is using a private copy of its dyld cache 5965 }; 5966 5967 // #including mach/mach.h pulls in machine.h & CPU_TYPE_ARM etc conflicts with 5968 // llvm enum definitions llvm::MachO::CPU_TYPE_ARM turning them into compile 5969 // errors. So we need to use the actual underlying types of task_t and 5970 // kern_return_t below. 5971 extern "C" unsigned int /*task_t*/ mach_task_self(); 5972 5973 void ObjectFileMachO::GetLLDBSharedCacheUUID(addr_t &base_addr, UUID &uuid) { 5974 uuid.Clear(); 5975 base_addr = LLDB_INVALID_ADDRESS; 5976 5977 #if defined(__APPLE__) 5978 uint8_t *(*dyld_get_all_image_infos)(void); 5979 dyld_get_all_image_infos = 5980 (uint8_t * (*)()) dlsym(RTLD_DEFAULT, "_dyld_get_all_image_infos"); 5981 if (dyld_get_all_image_infos) { 5982 uint8_t *dyld_all_image_infos_address = dyld_get_all_image_infos(); 5983 if (dyld_all_image_infos_address) { 5984 uint32_t *version = (uint32_t *) 5985 dyld_all_image_infos_address; // version <mach-o/dyld_images.h> 5986 if (*version >= 13) { 5987 uuid_t *sharedCacheUUID_address = 0; 5988 int wordsize = sizeof(uint8_t *); 5989 if (wordsize == 8) { 5990 sharedCacheUUID_address = 5991 (uuid_t *)((uint8_t *)dyld_all_image_infos_address + 5992 160); // sharedCacheUUID <mach-o/dyld_images.h> 5993 if (*version >= 15) 5994 base_addr = 5995 *(uint64_t 5996 *)((uint8_t *)dyld_all_image_infos_address + 5997 176); // sharedCacheBaseAddress <mach-o/dyld_images.h> 5998 } else { 5999 sharedCacheUUID_address = 6000 (uuid_t *)((uint8_t *)dyld_all_image_infos_address + 6001 84); // sharedCacheUUID <mach-o/dyld_images.h> 6002 if (*version >= 15) { 6003 base_addr = 0; 6004 base_addr = 6005 *(uint32_t 6006 *)((uint8_t *)dyld_all_image_infos_address + 6007 100); // sharedCacheBaseAddress <mach-o/dyld_images.h> 6008 } 6009 } 6010 uuid = UUID(sharedCacheUUID_address, sizeof(uuid_t)); 6011 } 6012 } 6013 } else { 6014 // Exists in macOS 10.12 and later, iOS 10.0 and later - dyld SPI 6015 dyld_process_info (*dyld_process_info_create)( 6016 unsigned int /* task_t */ task, uint64_t timestamp, 6017 unsigned int /*kern_return_t*/ *kernelError); 6018 void (*dyld_process_info_get_cache)(void *info, void *cacheInfo); 6019 void (*dyld_process_info_release)(dyld_process_info info); 6020 6021 dyld_process_info_create = (void *(*)(unsigned int /* task_t */, uint64_t, 6022 unsigned int /*kern_return_t*/ *)) 6023 dlsym(RTLD_DEFAULT, "_dyld_process_info_create"); 6024 dyld_process_info_get_cache = (void (*)(void *, void *))dlsym( 6025 RTLD_DEFAULT, "_dyld_process_info_get_cache"); 6026 dyld_process_info_release = 6027 (void (*)(void *))dlsym(RTLD_DEFAULT, "_dyld_process_info_release"); 6028 6029 if (dyld_process_info_create && dyld_process_info_get_cache) { 6030 unsigned int /*kern_return_t */ kern_ret; 6031 dyld_process_info process_info = 6032 dyld_process_info_create(::mach_task_self(), 0, &kern_ret); 6033 if (process_info) { 6034 struct lldb_copy__dyld_process_cache_info sc_info; 6035 memset(&sc_info, 0, sizeof(struct lldb_copy__dyld_process_cache_info)); 6036 dyld_process_info_get_cache(process_info, &sc_info); 6037 if (sc_info.cacheBaseAddress != 0) { 6038 base_addr = sc_info.cacheBaseAddress; 6039 uuid = UUID(sc_info.cacheUUID, sizeof(uuid_t)); 6040 } 6041 dyld_process_info_release(process_info); 6042 } 6043 } 6044 } 6045 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 6046 if (log && uuid.IsValid()) 6047 LLDB_LOGF(log, 6048 "lldb's in-memory shared cache has a UUID of %s base address of " 6049 "0x%" PRIx64, 6050 uuid.GetAsString().c_str(), base_addr); 6051 #endif 6052 } 6053 6054 static llvm::VersionTuple FindMinimumVersionInfo(DataExtractor &data, 6055 lldb::offset_t offset, 6056 size_t ncmds) { 6057 for (size_t i = 0; i < ncmds; i++) { 6058 const lldb::offset_t load_cmd_offset = offset; 6059 llvm::MachO::load_command lc = {}; 6060 if (data.GetU32(&offset, &lc.cmd, 2) == nullptr) 6061 break; 6062 6063 uint32_t version = 0; 6064 if (lc.cmd == llvm::MachO::LC_VERSION_MIN_MACOSX || 6065 lc.cmd == llvm::MachO::LC_VERSION_MIN_IPHONEOS || 6066 lc.cmd == llvm::MachO::LC_VERSION_MIN_TVOS || 6067 lc.cmd == llvm::MachO::LC_VERSION_MIN_WATCHOS) { 6068 // struct version_min_command { 6069 // uint32_t cmd; // LC_VERSION_MIN_* 6070 // uint32_t cmdsize; 6071 // uint32_t version; // X.Y.Z encoded in nibbles xxxx.yy.zz 6072 // uint32_t sdk; 6073 // }; 6074 // We want to read version. 6075 version = data.GetU32(&offset); 6076 } else if (lc.cmd == llvm::MachO::LC_BUILD_VERSION) { 6077 // struct build_version_command { 6078 // uint32_t cmd; // LC_BUILD_VERSION 6079 // uint32_t cmdsize; 6080 // uint32_t platform; 6081 // uint32_t minos; // X.Y.Z encoded in nibbles xxxx.yy.zz 6082 // uint32_t sdk; 6083 // uint32_t ntools; 6084 // }; 6085 // We want to read minos. 6086 offset += sizeof(uint32_t); // Skip over platform 6087 version = data.GetU32(&offset); // Extract minos 6088 } 6089 6090 if (version) { 6091 const uint32_t xxxx = version >> 16; 6092 const uint32_t yy = (version >> 8) & 0xffu; 6093 const uint32_t zz = version & 0xffu; 6094 if (xxxx) 6095 return llvm::VersionTuple(xxxx, yy, zz); 6096 } 6097 offset = load_cmd_offset + lc.cmdsize; 6098 } 6099 return llvm::VersionTuple(); 6100 } 6101 6102 llvm::VersionTuple ObjectFileMachO::GetMinimumOSVersion() { 6103 if (!m_min_os_version) 6104 m_min_os_version = FindMinimumVersionInfo( 6105 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds); 6106 return *m_min_os_version; 6107 } 6108 6109 llvm::VersionTuple ObjectFileMachO::GetSDKVersion() { 6110 if (!m_sdk_versions) 6111 m_sdk_versions = FindMinimumVersionInfo( 6112 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds); 6113 return *m_sdk_versions; 6114 } 6115 6116 bool ObjectFileMachO::GetIsDynamicLinkEditor() { 6117 return m_header.filetype == llvm::MachO::MH_DYLINKER; 6118 } 6119 6120 bool ObjectFileMachO::CanTrustAddressRanges() { 6121 // Dsymutil guarantees that the .debug_aranges accelerator is complete and can 6122 // be trusted by LLDB. 6123 return m_header.filetype == llvm::MachO::MH_DSYM; 6124 } 6125 6126 bool ObjectFileMachO::AllowAssemblyEmulationUnwindPlans() { 6127 return m_allow_assembly_emulation_unwind_plans; 6128 } 6129 6130 Section *ObjectFileMachO::GetMachHeaderSection() { 6131 // Find the first address of the mach header which is the first non-zero file 6132 // sized section whose file offset is zero. This is the base file address of 6133 // the mach-o file which can be subtracted from the vmaddr of the other 6134 // segments found in memory and added to the load address 6135 ModuleSP module_sp = GetModule(); 6136 if (!module_sp) 6137 return nullptr; 6138 SectionList *section_list = GetSectionList(); 6139 if (!section_list) 6140 return nullptr; 6141 6142 // Some binaries can have a TEXT segment with a non-zero file offset. 6143 // Binaries in the shared cache are one example. Some hand-generated 6144 // binaries may not be laid out in the normal TEXT,DATA,LC_SYMTAB order 6145 // in the file, even though they're laid out correctly in vmaddr terms. 6146 SectionSP text_segment_sp = 6147 section_list->FindSectionByName(GetSegmentNameTEXT()); 6148 if (text_segment_sp.get() && SectionIsLoadable(text_segment_sp.get())) 6149 return text_segment_sp.get(); 6150 6151 const size_t num_sections = section_list->GetSize(); 6152 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6153 Section *section = section_list->GetSectionAtIndex(sect_idx).get(); 6154 if (section->GetFileOffset() == 0 && SectionIsLoadable(section)) 6155 return section; 6156 } 6157 6158 return nullptr; 6159 } 6160 6161 bool ObjectFileMachO::SectionIsLoadable(const Section *section) { 6162 if (!section) 6163 return false; 6164 if (section->IsThreadSpecific()) 6165 return false; 6166 if (GetModule().get() != section->GetModule().get()) 6167 return false; 6168 // firmware style binaries with llvm gcov segment do 6169 // not have that segment mapped into memory. 6170 if (section->GetName() == GetSegmentNameLLVM_COV()) { 6171 const Strata strata = GetStrata(); 6172 if (strata == eStrataKernel || strata == eStrataRawImage) 6173 return false; 6174 } 6175 // Be careful with __LINKEDIT and __DWARF segments 6176 if (section->GetName() == GetSegmentNameLINKEDIT() || 6177 section->GetName() == GetSegmentNameDWARF()) { 6178 // Only map __LINKEDIT and __DWARF if we have an in memory image and 6179 // this isn't a kernel binary like a kext or mach_kernel. 6180 const bool is_memory_image = (bool)m_process_wp.lock(); 6181 const Strata strata = GetStrata(); 6182 if (is_memory_image == false || strata == eStrataKernel) 6183 return false; 6184 } 6185 return true; 6186 } 6187 6188 lldb::addr_t ObjectFileMachO::CalculateSectionLoadAddressForMemoryImage( 6189 lldb::addr_t header_load_address, const Section *header_section, 6190 const Section *section) { 6191 ModuleSP module_sp = GetModule(); 6192 if (module_sp && header_section && section && 6193 header_load_address != LLDB_INVALID_ADDRESS) { 6194 lldb::addr_t file_addr = header_section->GetFileAddress(); 6195 if (file_addr != LLDB_INVALID_ADDRESS && SectionIsLoadable(section)) 6196 return section->GetFileAddress() - file_addr + header_load_address; 6197 } 6198 return LLDB_INVALID_ADDRESS; 6199 } 6200 6201 bool ObjectFileMachO::SetLoadAddress(Target &target, lldb::addr_t value, 6202 bool value_is_offset) { 6203 Log *log(GetLog(LLDBLog::DynamicLoader)); 6204 ModuleSP module_sp = GetModule(); 6205 if (!module_sp) 6206 return false; 6207 6208 SectionList *section_list = GetSectionList(); 6209 if (!section_list) 6210 return false; 6211 6212 size_t num_loaded_sections = 0; 6213 const size_t num_sections = section_list->GetSize(); 6214 6215 // Warn if some top-level segments map to the same address. The binary may be 6216 // malformed. 6217 const bool warn_multiple = true; 6218 6219 if (log) { 6220 StreamString logmsg; 6221 logmsg << "ObjectFileMachO::SetLoadAddress "; 6222 if (GetFileSpec()) 6223 logmsg << "path='" << GetFileSpec().GetPath() << "' "; 6224 if (GetUUID()) { 6225 logmsg << "uuid=" << GetUUID().GetAsString(); 6226 } 6227 LLDB_LOGF(log, "%s", logmsg.GetData()); 6228 } 6229 if (value_is_offset) { 6230 // "value" is an offset to apply to each top level segment 6231 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6232 // Iterate through the object file sections to find all of the 6233 // sections that size on disk (to avoid __PAGEZERO) and load them 6234 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 6235 if (SectionIsLoadable(section_sp.get())) { 6236 LLDB_LOGF(log, 6237 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is " 6238 "0x%" PRIx64, 6239 section_sp->GetName().AsCString(), 6240 section_sp->GetFileAddress() + value); 6241 if (target.GetSectionLoadList().SetSectionLoadAddress( 6242 section_sp, section_sp->GetFileAddress() + value, 6243 warn_multiple)) 6244 ++num_loaded_sections; 6245 } 6246 } 6247 } else { 6248 // "value" is the new base address of the mach_header, adjust each 6249 // section accordingly 6250 6251 Section *mach_header_section = GetMachHeaderSection(); 6252 if (mach_header_section) { 6253 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6254 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 6255 6256 lldb::addr_t section_load_addr = 6257 CalculateSectionLoadAddressForMemoryImage( 6258 value, mach_header_section, section_sp.get()); 6259 if (section_load_addr != LLDB_INVALID_ADDRESS) { 6260 LLDB_LOGF(log, 6261 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is " 6262 "0x%" PRIx64, 6263 section_sp->GetName().AsCString(), section_load_addr); 6264 if (target.GetSectionLoadList().SetSectionLoadAddress( 6265 section_sp, section_load_addr, warn_multiple)) 6266 ++num_loaded_sections; 6267 } 6268 } 6269 } 6270 } 6271 return num_loaded_sections > 0; 6272 } 6273 6274 struct all_image_infos_header { 6275 uint32_t version; // currently 1 6276 uint32_t imgcount; // number of binary images 6277 uint64_t entries_fileoff; // file offset in the corefile of where the array of 6278 // struct entry's begin. 6279 uint32_t entries_size; // size of 'struct entry'. 6280 uint32_t unused; 6281 }; 6282 6283 struct image_entry { 6284 uint64_t filepath_offset; // offset in corefile to c-string of the file path, 6285 // UINT64_MAX if unavailable. 6286 uuid_t uuid; // uint8_t[16]. should be set to all zeroes if 6287 // uuid is unknown. 6288 uint64_t load_address; // UINT64_MAX if unknown. 6289 uint64_t seg_addrs_offset; // offset to the array of struct segment_vmaddr's. 6290 uint32_t segment_count; // The number of segments for this binary. 6291 uint32_t unused; 6292 6293 image_entry() { 6294 filepath_offset = UINT64_MAX; 6295 memset(&uuid, 0, sizeof(uuid_t)); 6296 segment_count = 0; 6297 load_address = UINT64_MAX; 6298 seg_addrs_offset = UINT64_MAX; 6299 unused = 0; 6300 } 6301 image_entry(const image_entry &rhs) { 6302 filepath_offset = rhs.filepath_offset; 6303 memcpy(&uuid, &rhs.uuid, sizeof(uuid_t)); 6304 segment_count = rhs.segment_count; 6305 seg_addrs_offset = rhs.seg_addrs_offset; 6306 load_address = rhs.load_address; 6307 unused = rhs.unused; 6308 } 6309 }; 6310 6311 struct segment_vmaddr { 6312 char segname[16]; 6313 uint64_t vmaddr; 6314 uint64_t unused; 6315 6316 segment_vmaddr() { 6317 memset(&segname, 0, 16); 6318 vmaddr = UINT64_MAX; 6319 unused = 0; 6320 } 6321 segment_vmaddr(const segment_vmaddr &rhs) { 6322 memcpy(&segname, &rhs.segname, 16); 6323 vmaddr = rhs.vmaddr; 6324 unused = rhs.unused; 6325 } 6326 }; 6327 6328 // Write the payload for the "all image infos" LC_NOTE into 6329 // the supplied all_image_infos_payload, assuming that this 6330 // will be written into the corefile starting at 6331 // initial_file_offset. 6332 // 6333 // The placement of this payload is a little tricky. We're 6334 // laying this out as 6335 // 6336 // 1. header (struct all_image_info_header) 6337 // 2. Array of fixed-size (struct image_entry)'s, one 6338 // per binary image present in the process. 6339 // 3. Arrays of (struct segment_vmaddr)'s, a varying number 6340 // for each binary image. 6341 // 4. Variable length c-strings of binary image filepaths, 6342 // one per binary. 6343 // 6344 // To compute where everything will be laid out in the 6345 // payload, we need to iterate over the images and calculate 6346 // how many segment_vmaddr structures each image will need, 6347 // and how long each image's filepath c-string is. There 6348 // are some multiple passes over the image list while calculating 6349 // everything. 6350 6351 static offset_t 6352 CreateAllImageInfosPayload(const lldb::ProcessSP &process_sp, 6353 offset_t initial_file_offset, 6354 StreamString &all_image_infos_payload, 6355 lldb_private::SaveCoreOptions &options) { 6356 Target &target = process_sp->GetTarget(); 6357 ModuleList modules = target.GetImages(); 6358 6359 // stack-only corefiles have no reason to include binaries that 6360 // are not executing; we're trying to make the smallest corefile 6361 // we can, so leave the rest out. 6362 if (options.GetStyle() == SaveCoreStyle::eSaveCoreStackOnly) 6363 modules.Clear(); 6364 6365 std::set<std::string> executing_uuids; 6366 std::vector<ThreadSP> thread_list = 6367 process_sp->CalculateCoreFileThreadList(options); 6368 for (const ThreadSP &thread_sp : thread_list) { 6369 uint32_t stack_frame_count = thread_sp->GetStackFrameCount(); 6370 for (uint32_t j = 0; j < stack_frame_count; j++) { 6371 StackFrameSP stack_frame_sp = thread_sp->GetStackFrameAtIndex(j); 6372 Address pc = stack_frame_sp->GetFrameCodeAddress(); 6373 ModuleSP module_sp = pc.GetModule(); 6374 if (module_sp) { 6375 UUID uuid = module_sp->GetUUID(); 6376 if (uuid.IsValid()) { 6377 executing_uuids.insert(uuid.GetAsString()); 6378 modules.AppendIfNeeded(module_sp); 6379 } 6380 } 6381 } 6382 } 6383 size_t modules_count = modules.GetSize(); 6384 6385 struct all_image_infos_header infos; 6386 infos.version = 1; 6387 infos.imgcount = modules_count; 6388 infos.entries_size = sizeof(image_entry); 6389 infos.entries_fileoff = initial_file_offset + sizeof(all_image_infos_header); 6390 infos.unused = 0; 6391 6392 all_image_infos_payload.PutHex32(infos.version); 6393 all_image_infos_payload.PutHex32(infos.imgcount); 6394 all_image_infos_payload.PutHex64(infos.entries_fileoff); 6395 all_image_infos_payload.PutHex32(infos.entries_size); 6396 all_image_infos_payload.PutHex32(infos.unused); 6397 6398 // First create the structures for all of the segment name+vmaddr vectors 6399 // for each module, so we will know the size of them as we add the 6400 // module entries. 6401 std::vector<std::vector<segment_vmaddr>> modules_segment_vmaddrs; 6402 for (size_t i = 0; i < modules_count; i++) { 6403 ModuleSP module = modules.GetModuleAtIndex(i); 6404 6405 SectionList *sections = module->GetSectionList(); 6406 size_t sections_count = sections->GetSize(); 6407 std::vector<segment_vmaddr> segment_vmaddrs; 6408 for (size_t j = 0; j < sections_count; j++) { 6409 SectionSP section = sections->GetSectionAtIndex(j); 6410 if (!section->GetParent().get()) { 6411 addr_t vmaddr = section->GetLoadBaseAddress(&target); 6412 if (vmaddr == LLDB_INVALID_ADDRESS) 6413 continue; 6414 ConstString name = section->GetName(); 6415 segment_vmaddr seg_vmaddr; 6416 // This is the uncommon case where strncpy is exactly 6417 // the right one, doesn't need to be nul terminated. 6418 // The segment name in a Mach-O LC_SEGMENT/LC_SEGMENT_64 is char[16] and 6419 // is not guaranteed to be nul-terminated if all 16 characters are 6420 // used. 6421 // coverity[buffer_size_warning] 6422 strncpy(seg_vmaddr.segname, name.AsCString(), 6423 sizeof(seg_vmaddr.segname)); 6424 seg_vmaddr.vmaddr = vmaddr; 6425 seg_vmaddr.unused = 0; 6426 segment_vmaddrs.push_back(seg_vmaddr); 6427 } 6428 } 6429 modules_segment_vmaddrs.push_back(segment_vmaddrs); 6430 } 6431 6432 offset_t size_of_vmaddr_structs = 0; 6433 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) { 6434 size_of_vmaddr_structs += 6435 modules_segment_vmaddrs[i].size() * sizeof(segment_vmaddr); 6436 } 6437 6438 offset_t size_of_filepath_cstrings = 0; 6439 for (size_t i = 0; i < modules_count; i++) { 6440 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6441 size_of_filepath_cstrings += module_sp->GetFileSpec().GetPath().size() + 1; 6442 } 6443 6444 // Calculate the file offsets of our "all image infos" payload in the 6445 // corefile. initial_file_offset the original value passed in to this method. 6446 6447 offset_t start_of_entries = 6448 initial_file_offset + sizeof(all_image_infos_header); 6449 offset_t start_of_seg_vmaddrs = 6450 start_of_entries + sizeof(image_entry) * modules_count; 6451 offset_t start_of_filenames = start_of_seg_vmaddrs + size_of_vmaddr_structs; 6452 6453 offset_t final_file_offset = start_of_filenames + size_of_filepath_cstrings; 6454 6455 // Now write the one-per-module 'struct image_entry' into the 6456 // StringStream; keep track of where the struct segment_vmaddr 6457 // entries for each module will end up in the corefile. 6458 6459 offset_t current_string_offset = start_of_filenames; 6460 offset_t current_segaddrs_offset = start_of_seg_vmaddrs; 6461 std::vector<struct image_entry> image_entries; 6462 for (size_t i = 0; i < modules_count; i++) { 6463 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6464 6465 struct image_entry ent; 6466 memcpy(&ent.uuid, module_sp->GetUUID().GetBytes().data(), sizeof(ent.uuid)); 6467 if (modules_segment_vmaddrs[i].size() > 0) { 6468 ent.segment_count = modules_segment_vmaddrs[i].size(); 6469 ent.seg_addrs_offset = current_segaddrs_offset; 6470 } 6471 ent.filepath_offset = current_string_offset; 6472 ObjectFile *objfile = module_sp->GetObjectFile(); 6473 if (objfile) { 6474 Address base_addr(objfile->GetBaseAddress()); 6475 if (base_addr.IsValid()) { 6476 ent.load_address = base_addr.GetLoadAddress(&target); 6477 } 6478 } 6479 6480 all_image_infos_payload.PutHex64(ent.filepath_offset); 6481 all_image_infos_payload.PutRawBytes(ent.uuid, sizeof(ent.uuid)); 6482 all_image_infos_payload.PutHex64(ent.load_address); 6483 all_image_infos_payload.PutHex64(ent.seg_addrs_offset); 6484 all_image_infos_payload.PutHex32(ent.segment_count); 6485 6486 if (executing_uuids.find(module_sp->GetUUID().GetAsString()) != 6487 executing_uuids.end()) 6488 all_image_infos_payload.PutHex32(1); 6489 else 6490 all_image_infos_payload.PutHex32(0); 6491 6492 current_segaddrs_offset += ent.segment_count * sizeof(segment_vmaddr); 6493 current_string_offset += module_sp->GetFileSpec().GetPath().size() + 1; 6494 } 6495 6496 // Now write the struct segment_vmaddr entries into the StringStream. 6497 6498 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) { 6499 if (modules_segment_vmaddrs[i].size() == 0) 6500 continue; 6501 for (struct segment_vmaddr segvm : modules_segment_vmaddrs[i]) { 6502 all_image_infos_payload.PutRawBytes(segvm.segname, sizeof(segvm.segname)); 6503 all_image_infos_payload.PutHex64(segvm.vmaddr); 6504 all_image_infos_payload.PutHex64(segvm.unused); 6505 } 6506 } 6507 6508 for (size_t i = 0; i < modules_count; i++) { 6509 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6510 std::string filepath = module_sp->GetFileSpec().GetPath(); 6511 all_image_infos_payload.PutRawBytes(filepath.data(), filepath.size() + 1); 6512 } 6513 6514 return final_file_offset; 6515 } 6516 6517 // Temp struct used to combine contiguous memory regions with 6518 // identical permissions. 6519 struct page_object { 6520 addr_t addr; 6521 addr_t size; 6522 uint32_t prot; 6523 }; 6524 6525 bool ObjectFileMachO::SaveCore(const lldb::ProcessSP &process_sp, 6526 lldb_private::SaveCoreOptions &options, 6527 Status &error) { 6528 // The FileSpec and Process are already checked in PluginManager::SaveCore. 6529 assert(options.GetOutputFile().has_value()); 6530 assert(process_sp); 6531 const FileSpec outfile = options.GetOutputFile().value(); 6532 6533 // MachO defaults to dirty pages 6534 if (options.GetStyle() == SaveCoreStyle::eSaveCoreUnspecified) 6535 options.SetStyle(eSaveCoreDirtyOnly); 6536 6537 Target &target = process_sp->GetTarget(); 6538 const ArchSpec target_arch = target.GetArchitecture(); 6539 const llvm::Triple &target_triple = target_arch.GetTriple(); 6540 if (target_triple.getVendor() == llvm::Triple::Apple && 6541 (target_triple.getOS() == llvm::Triple::MacOSX || 6542 target_triple.getOS() == llvm::Triple::IOS || 6543 target_triple.getOS() == llvm::Triple::WatchOS || 6544 target_triple.getOS() == llvm::Triple::TvOS || 6545 target_triple.getOS() == llvm::Triple::XROS)) { 6546 // NEED_BRIDGEOS_TRIPLE target_triple.getOS() == llvm::Triple::BridgeOS)) 6547 // { 6548 bool make_core = false; 6549 switch (target_arch.GetMachine()) { 6550 case llvm::Triple::aarch64: 6551 case llvm::Triple::aarch64_32: 6552 case llvm::Triple::arm: 6553 case llvm::Triple::thumb: 6554 case llvm::Triple::x86: 6555 case llvm::Triple::x86_64: 6556 make_core = true; 6557 break; 6558 default: 6559 error.SetErrorStringWithFormat("unsupported core architecture: %s", 6560 target_triple.str().c_str()); 6561 break; 6562 } 6563 6564 if (make_core) { 6565 Process::CoreFileMemoryRanges core_ranges; 6566 error = process_sp->CalculateCoreFileSaveRanges(options, core_ranges); 6567 if (error.Success()) { 6568 const uint32_t addr_byte_size = target_arch.GetAddressByteSize(); 6569 const ByteOrder byte_order = target_arch.GetByteOrder(); 6570 std::vector<llvm::MachO::segment_command_64> segment_load_commands; 6571 for (const auto &core_range : core_ranges) { 6572 uint32_t cmd_type = LC_SEGMENT_64; 6573 uint32_t segment_size = sizeof(llvm::MachO::segment_command_64); 6574 if (addr_byte_size == 4) { 6575 cmd_type = LC_SEGMENT; 6576 segment_size = sizeof(llvm::MachO::segment_command); 6577 } 6578 // Skip any ranges with no read/write/execute permissions and empty 6579 // ranges. 6580 if (core_range.lldb_permissions == 0 || core_range.range.size() == 0) 6581 continue; 6582 uint32_t vm_prot = 0; 6583 if (core_range.lldb_permissions & ePermissionsReadable) 6584 vm_prot |= VM_PROT_READ; 6585 if (core_range.lldb_permissions & ePermissionsWritable) 6586 vm_prot |= VM_PROT_WRITE; 6587 if (core_range.lldb_permissions & ePermissionsExecutable) 6588 vm_prot |= VM_PROT_EXECUTE; 6589 const addr_t vm_addr = core_range.range.start(); 6590 const addr_t vm_size = core_range.range.size(); 6591 llvm::MachO::segment_command_64 segment = { 6592 cmd_type, // uint32_t cmd; 6593 segment_size, // uint32_t cmdsize; 6594 {0}, // char segname[16]; 6595 vm_addr, // uint64_t vmaddr; // uint32_t for 32-bit Mach-O 6596 vm_size, // uint64_t vmsize; // uint32_t for 32-bit Mach-O 6597 0, // uint64_t fileoff; // uint32_t for 32-bit Mach-O 6598 vm_size, // uint64_t filesize; // uint32_t for 32-bit Mach-O 6599 vm_prot, // uint32_t maxprot; 6600 vm_prot, // uint32_t initprot; 6601 0, // uint32_t nsects; 6602 0}; // uint32_t flags; 6603 segment_load_commands.push_back(segment); 6604 } 6605 6606 StreamString buffer(Stream::eBinary, addr_byte_size, byte_order); 6607 6608 llvm::MachO::mach_header_64 mach_header; 6609 mach_header.magic = addr_byte_size == 8 ? MH_MAGIC_64 : MH_MAGIC; 6610 mach_header.cputype = target_arch.GetMachOCPUType(); 6611 mach_header.cpusubtype = target_arch.GetMachOCPUSubType(); 6612 mach_header.filetype = MH_CORE; 6613 mach_header.ncmds = segment_load_commands.size(); 6614 mach_header.flags = 0; 6615 mach_header.reserved = 0; 6616 ThreadList &thread_list = process_sp->GetThreadList(); 6617 const uint32_t num_threads = thread_list.GetSize(); 6618 6619 // Make an array of LC_THREAD data items. Each one contains the 6620 // contents of the LC_THREAD load command. The data doesn't contain 6621 // the load command + load command size, we will add the load command 6622 // and load command size as we emit the data. 6623 std::vector<StreamString> LC_THREAD_datas(num_threads); 6624 for (auto &LC_THREAD_data : LC_THREAD_datas) { 6625 LC_THREAD_data.GetFlags().Set(Stream::eBinary); 6626 LC_THREAD_data.SetAddressByteSize(addr_byte_size); 6627 LC_THREAD_data.SetByteOrder(byte_order); 6628 } 6629 for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { 6630 ThreadSP thread_sp(thread_list.GetThreadAtIndex(thread_idx)); 6631 if (thread_sp) { 6632 switch (mach_header.cputype) { 6633 case llvm::MachO::CPU_TYPE_ARM64: 6634 case llvm::MachO::CPU_TYPE_ARM64_32: 6635 RegisterContextDarwin_arm64_Mach::Create_LC_THREAD( 6636 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6637 break; 6638 6639 case llvm::MachO::CPU_TYPE_ARM: 6640 RegisterContextDarwin_arm_Mach::Create_LC_THREAD( 6641 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6642 break; 6643 6644 case llvm::MachO::CPU_TYPE_I386: 6645 RegisterContextDarwin_i386_Mach::Create_LC_THREAD( 6646 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6647 break; 6648 6649 case llvm::MachO::CPU_TYPE_X86_64: 6650 RegisterContextDarwin_x86_64_Mach::Create_LC_THREAD( 6651 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6652 break; 6653 } 6654 } 6655 } 6656 6657 // The size of the load command is the size of the segments... 6658 if (addr_byte_size == 8) { 6659 mach_header.sizeofcmds = segment_load_commands.size() * 6660 sizeof(llvm::MachO::segment_command_64); 6661 } else { 6662 mach_header.sizeofcmds = segment_load_commands.size() * 6663 sizeof(llvm::MachO::segment_command); 6664 } 6665 6666 // and the size of all LC_THREAD load command 6667 for (const auto &LC_THREAD_data : LC_THREAD_datas) { 6668 ++mach_header.ncmds; 6669 mach_header.sizeofcmds += 8 + LC_THREAD_data.GetSize(); 6670 } 6671 6672 // Bits will be set to indicate which bits are NOT used in 6673 // addressing in this process or 0 for unknown. 6674 uint64_t address_mask = process_sp->GetCodeAddressMask(); 6675 if (address_mask != LLDB_INVALID_ADDRESS_MASK) { 6676 // LC_NOTE "addrable bits" 6677 mach_header.ncmds++; 6678 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6679 } 6680 6681 // LC_NOTE "process metadata" 6682 mach_header.ncmds++; 6683 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6684 6685 // LC_NOTE "all image infos" 6686 mach_header.ncmds++; 6687 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6688 6689 // Write the mach header 6690 buffer.PutHex32(mach_header.magic); 6691 buffer.PutHex32(mach_header.cputype); 6692 buffer.PutHex32(mach_header.cpusubtype); 6693 buffer.PutHex32(mach_header.filetype); 6694 buffer.PutHex32(mach_header.ncmds); 6695 buffer.PutHex32(mach_header.sizeofcmds); 6696 buffer.PutHex32(mach_header.flags); 6697 if (addr_byte_size == 8) { 6698 buffer.PutHex32(mach_header.reserved); 6699 } 6700 6701 // Skip the mach header and all load commands and align to the next 6702 // 0x1000 byte boundary 6703 addr_t file_offset = buffer.GetSize() + mach_header.sizeofcmds; 6704 6705 file_offset = llvm::alignTo(file_offset, 16); 6706 std::vector<std::unique_ptr<LCNoteEntry>> lc_notes; 6707 6708 // Add "addrable bits" LC_NOTE when an address mask is available 6709 if (address_mask != LLDB_INVALID_ADDRESS_MASK) { 6710 std::unique_ptr<LCNoteEntry> addrable_bits_lcnote_up( 6711 new LCNoteEntry(addr_byte_size, byte_order)); 6712 addrable_bits_lcnote_up->name = "addrable bits"; 6713 addrable_bits_lcnote_up->payload_file_offset = file_offset; 6714 int bits = std::bitset<64>(~address_mask).count(); 6715 addrable_bits_lcnote_up->payload.PutHex32(4); // version 6716 addrable_bits_lcnote_up->payload.PutHex32( 6717 bits); // # of bits used for low addresses 6718 addrable_bits_lcnote_up->payload.PutHex32( 6719 bits); // # of bits used for high addresses 6720 addrable_bits_lcnote_up->payload.PutHex32(0); // reserved 6721 6722 file_offset += addrable_bits_lcnote_up->payload.GetSize(); 6723 6724 lc_notes.push_back(std::move(addrable_bits_lcnote_up)); 6725 } 6726 6727 // Add "process metadata" LC_NOTE 6728 std::unique_ptr<LCNoteEntry> thread_extrainfo_lcnote_up( 6729 new LCNoteEntry(addr_byte_size, byte_order)); 6730 thread_extrainfo_lcnote_up->name = "process metadata"; 6731 thread_extrainfo_lcnote_up->payload_file_offset = file_offset; 6732 6733 StructuredData::DictionarySP dict( 6734 std::make_shared<StructuredData::Dictionary>()); 6735 StructuredData::ArraySP threads( 6736 std::make_shared<StructuredData::Array>()); 6737 for (const ThreadSP &thread_sp : 6738 process_sp->CalculateCoreFileThreadList(options)) { 6739 StructuredData::DictionarySP thread( 6740 std::make_shared<StructuredData::Dictionary>()); 6741 thread->AddIntegerItem("thread_id", thread_sp->GetID()); 6742 threads->AddItem(thread); 6743 } 6744 dict->AddItem("threads", threads); 6745 StreamString strm; 6746 dict->Dump(strm, /* pretty */ false); 6747 thread_extrainfo_lcnote_up->payload.PutRawBytes(strm.GetData(), 6748 strm.GetSize()); 6749 6750 file_offset += thread_extrainfo_lcnote_up->payload.GetSize(); 6751 file_offset = llvm::alignTo(file_offset, 16); 6752 lc_notes.push_back(std::move(thread_extrainfo_lcnote_up)); 6753 6754 // Add "all image infos" LC_NOTE 6755 std::unique_ptr<LCNoteEntry> all_image_infos_lcnote_up( 6756 new LCNoteEntry(addr_byte_size, byte_order)); 6757 all_image_infos_lcnote_up->name = "all image infos"; 6758 all_image_infos_lcnote_up->payload_file_offset = file_offset; 6759 file_offset = CreateAllImageInfosPayload( 6760 process_sp, file_offset, all_image_infos_lcnote_up->payload, 6761 options); 6762 lc_notes.push_back(std::move(all_image_infos_lcnote_up)); 6763 6764 // Add LC_NOTE load commands 6765 for (auto &lcnote : lc_notes) { 6766 // Add the LC_NOTE load command to the file. 6767 buffer.PutHex32(LC_NOTE); 6768 buffer.PutHex32(sizeof(llvm::MachO::note_command)); 6769 char namebuf[16]; 6770 memset(namebuf, 0, sizeof(namebuf)); 6771 // This is the uncommon case where strncpy is exactly 6772 // the right one, doesn't need to be nul terminated. 6773 // LC_NOTE name field is char[16] and is not guaranteed to be 6774 // nul-terminated. 6775 // coverity[buffer_size_warning] 6776 strncpy(namebuf, lcnote->name.c_str(), sizeof(namebuf)); 6777 buffer.PutRawBytes(namebuf, sizeof(namebuf)); 6778 buffer.PutHex64(lcnote->payload_file_offset); 6779 buffer.PutHex64(lcnote->payload.GetSize()); 6780 } 6781 6782 // Align to 4096-byte page boundary for the LC_SEGMENTs. 6783 file_offset = llvm::alignTo(file_offset, 4096); 6784 6785 for (auto &segment : segment_load_commands) { 6786 segment.fileoff = file_offset; 6787 file_offset += segment.filesize; 6788 } 6789 6790 // Write out all of the LC_THREAD load commands 6791 for (const auto &LC_THREAD_data : LC_THREAD_datas) { 6792 const size_t LC_THREAD_data_size = LC_THREAD_data.GetSize(); 6793 buffer.PutHex32(LC_THREAD); 6794 buffer.PutHex32(8 + LC_THREAD_data_size); // cmd + cmdsize + data 6795 buffer.Write(LC_THREAD_data.GetString().data(), LC_THREAD_data_size); 6796 } 6797 6798 // Write out all of the segment load commands 6799 for (const auto &segment : segment_load_commands) { 6800 buffer.PutHex32(segment.cmd); 6801 buffer.PutHex32(segment.cmdsize); 6802 buffer.PutRawBytes(segment.segname, sizeof(segment.segname)); 6803 if (addr_byte_size == 8) { 6804 buffer.PutHex64(segment.vmaddr); 6805 buffer.PutHex64(segment.vmsize); 6806 buffer.PutHex64(segment.fileoff); 6807 buffer.PutHex64(segment.filesize); 6808 } else { 6809 buffer.PutHex32(static_cast<uint32_t>(segment.vmaddr)); 6810 buffer.PutHex32(static_cast<uint32_t>(segment.vmsize)); 6811 buffer.PutHex32(static_cast<uint32_t>(segment.fileoff)); 6812 buffer.PutHex32(static_cast<uint32_t>(segment.filesize)); 6813 } 6814 buffer.PutHex32(segment.maxprot); 6815 buffer.PutHex32(segment.initprot); 6816 buffer.PutHex32(segment.nsects); 6817 buffer.PutHex32(segment.flags); 6818 } 6819 6820 std::string core_file_path(outfile.GetPath()); 6821 auto core_file = FileSystem::Instance().Open( 6822 outfile, File::eOpenOptionWriteOnly | File::eOpenOptionTruncate | 6823 File::eOpenOptionCanCreate); 6824 if (!core_file) { 6825 error = core_file.takeError(); 6826 } else { 6827 // Read 1 page at a time 6828 uint8_t bytes[0x1000]; 6829 // Write the mach header and load commands out to the core file 6830 size_t bytes_written = buffer.GetString().size(); 6831 error = 6832 core_file.get()->Write(buffer.GetString().data(), bytes_written); 6833 if (error.Success()) { 6834 6835 for (auto &lcnote : lc_notes) { 6836 if (core_file.get()->SeekFromStart(lcnote->payload_file_offset) == 6837 -1) { 6838 error.SetErrorStringWithFormat("Unable to seek to corefile pos " 6839 "to write '%s' LC_NOTE payload", 6840 lcnote->name.c_str()); 6841 return false; 6842 } 6843 bytes_written = lcnote->payload.GetSize(); 6844 error = core_file.get()->Write(lcnote->payload.GetData(), 6845 bytes_written); 6846 if (!error.Success()) 6847 return false; 6848 } 6849 6850 // Now write the file data for all memory segments in the process 6851 for (const auto &segment : segment_load_commands) { 6852 if (core_file.get()->SeekFromStart(segment.fileoff) == -1) { 6853 error.SetErrorStringWithFormat( 6854 "unable to seek to offset 0x%" PRIx64 " in '%s'", 6855 segment.fileoff, core_file_path.c_str()); 6856 break; 6857 } 6858 6859 target.GetDebugger().GetAsyncOutputStream()->Printf( 6860 "Saving %" PRId64 6861 " bytes of data for memory region at 0x%" PRIx64 "\n", 6862 segment.vmsize, segment.vmaddr); 6863 addr_t bytes_left = segment.vmsize; 6864 addr_t addr = segment.vmaddr; 6865 Status memory_read_error; 6866 while (bytes_left > 0 && error.Success()) { 6867 const size_t bytes_to_read = 6868 bytes_left > sizeof(bytes) ? sizeof(bytes) : bytes_left; 6869 6870 // In a savecore setting, we don't really care about caching, 6871 // as the data is dumped and very likely never read again, 6872 // so we call ReadMemoryFromInferior to bypass it. 6873 const size_t bytes_read = process_sp->ReadMemoryFromInferior( 6874 addr, bytes, bytes_to_read, memory_read_error); 6875 6876 if (bytes_read == bytes_to_read) { 6877 size_t bytes_written = bytes_read; 6878 error = core_file.get()->Write(bytes, bytes_written); 6879 bytes_left -= bytes_read; 6880 addr += bytes_read; 6881 } else { 6882 // Some pages within regions are not readable, those should 6883 // be zero filled 6884 memset(bytes, 0, bytes_to_read); 6885 size_t bytes_written = bytes_to_read; 6886 error = core_file.get()->Write(bytes, bytes_written); 6887 bytes_left -= bytes_to_read; 6888 addr += bytes_to_read; 6889 } 6890 } 6891 } 6892 } 6893 } 6894 } 6895 } 6896 return true; // This is the right plug to handle saving core files for 6897 // this process 6898 } 6899 return false; 6900 } 6901 6902 ObjectFileMachO::MachOCorefileAllImageInfos 6903 ObjectFileMachO::GetCorefileAllImageInfos() { 6904 MachOCorefileAllImageInfos image_infos; 6905 Log *log(GetLog(LLDBLog::Object | LLDBLog::Symbols | LLDBLog::Process | 6906 LLDBLog::DynamicLoader)); 6907 6908 auto lc_notes = FindLC_NOTEByName("all image infos"); 6909 for (auto lc_note : lc_notes) { 6910 offset_t payload_offset = std::get<0>(lc_note); 6911 // Read the struct all_image_infos_header. 6912 uint32_t version = m_data.GetU32(&payload_offset); 6913 if (version != 1) { 6914 return image_infos; 6915 } 6916 uint32_t imgcount = m_data.GetU32(&payload_offset); 6917 uint64_t entries_fileoff = m_data.GetU64(&payload_offset); 6918 // 'entries_size' is not used, nor is the 'unused' entry. 6919 // offset += 4; // uint32_t entries_size; 6920 // offset += 4; // uint32_t unused; 6921 6922 LLDB_LOGF(log, "LC_NOTE 'all image infos' found version %d with %d images", 6923 version, imgcount); 6924 payload_offset = entries_fileoff; 6925 for (uint32_t i = 0; i < imgcount; i++) { 6926 // Read the struct image_entry. 6927 offset_t filepath_offset = m_data.GetU64(&payload_offset); 6928 uuid_t uuid; 6929 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)), 6930 sizeof(uuid_t)); 6931 uint64_t load_address = m_data.GetU64(&payload_offset); 6932 offset_t seg_addrs_offset = m_data.GetU64(&payload_offset); 6933 uint32_t segment_count = m_data.GetU32(&payload_offset); 6934 uint32_t currently_executing = m_data.GetU32(&payload_offset); 6935 6936 MachOCorefileImageEntry image_entry; 6937 image_entry.filename = (const char *)m_data.GetCStr(&filepath_offset); 6938 image_entry.uuid = UUID(uuid, sizeof(uuid_t)); 6939 image_entry.load_address = load_address; 6940 image_entry.currently_executing = currently_executing; 6941 6942 offset_t seg_vmaddrs_offset = seg_addrs_offset; 6943 for (uint32_t j = 0; j < segment_count; j++) { 6944 char segname[17]; 6945 m_data.CopyData(seg_vmaddrs_offset, 16, segname); 6946 segname[16] = '\0'; 6947 seg_vmaddrs_offset += 16; 6948 uint64_t vmaddr = m_data.GetU64(&seg_vmaddrs_offset); 6949 seg_vmaddrs_offset += 8; /* unused */ 6950 6951 std::tuple<ConstString, addr_t> new_seg{ConstString(segname), vmaddr}; 6952 image_entry.segment_load_addresses.push_back(new_seg); 6953 } 6954 LLDB_LOGF(log, " image entry: %s %s 0x%" PRIx64 " %s", 6955 image_entry.filename.c_str(), 6956 image_entry.uuid.GetAsString().c_str(), 6957 image_entry.load_address, 6958 image_entry.currently_executing ? "currently executing" 6959 : "not currently executing"); 6960 image_infos.all_image_infos.push_back(image_entry); 6961 } 6962 } 6963 6964 lc_notes = FindLC_NOTEByName("load binary"); 6965 for (auto lc_note : lc_notes) { 6966 offset_t payload_offset = std::get<0>(lc_note); 6967 uint32_t version = m_data.GetU32(&payload_offset); 6968 if (version == 1) { 6969 uuid_t uuid; 6970 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)), 6971 sizeof(uuid_t)); 6972 uint64_t load_address = m_data.GetU64(&payload_offset); 6973 uint64_t slide = m_data.GetU64(&payload_offset); 6974 std::string filename = m_data.GetCStr(&payload_offset); 6975 6976 MachOCorefileImageEntry image_entry; 6977 image_entry.filename = filename; 6978 image_entry.uuid = UUID(uuid, sizeof(uuid_t)); 6979 image_entry.load_address = load_address; 6980 image_entry.slide = slide; 6981 image_entry.currently_executing = true; 6982 image_infos.all_image_infos.push_back(image_entry); 6983 LLDB_LOGF(log, 6984 "LC_NOTE 'load binary' found, filename %s uuid %s load " 6985 "address 0x%" PRIx64 " slide 0x%" PRIx64, 6986 filename.c_str(), 6987 image_entry.uuid.IsValid() 6988 ? image_entry.uuid.GetAsString().c_str() 6989 : "00000000-0000-0000-0000-000000000000", 6990 load_address, slide); 6991 } 6992 } 6993 6994 return image_infos; 6995 } 6996 6997 bool ObjectFileMachO::LoadCoreFileImages(lldb_private::Process &process) { 6998 MachOCorefileAllImageInfos image_infos = GetCorefileAllImageInfos(); 6999 Log *log = GetLog(LLDBLog::Object | LLDBLog::DynamicLoader); 7000 Status error; 7001 7002 bool found_platform_binary = false; 7003 ModuleList added_modules; 7004 for (MachOCorefileImageEntry &image : image_infos.all_image_infos) { 7005 ModuleSP module_sp, local_filesystem_module_sp; 7006 7007 // If this is a platform binary, it has been loaded (or registered with 7008 // the DynamicLoader to be loaded), we don't need to do any further 7009 // processing. We're not going to call ModulesDidLoad on this in this 7010 // method, so notify==true. 7011 if (process.GetTarget() 7012 .GetDebugger() 7013 .GetPlatformList() 7014 .LoadPlatformBinaryAndSetup(&process, image.load_address, 7015 true /* notify */)) { 7016 LLDB_LOGF(log, 7017 "ObjectFileMachO::%s binary at 0x%" PRIx64 7018 " is a platform binary, has been handled by a Platform plugin.", 7019 __FUNCTION__, image.load_address); 7020 continue; 7021 } 7022 7023 bool value_is_offset = image.load_address == LLDB_INVALID_ADDRESS; 7024 uint64_t value = value_is_offset ? image.slide : image.load_address; 7025 if (value_is_offset && value == LLDB_INVALID_ADDRESS) { 7026 // We have neither address nor slide; so we will find the binary 7027 // by UUID and load it at slide/offset 0. 7028 value = 0; 7029 } 7030 7031 // We have either a UUID, or we have a load address which 7032 // and can try to read load commands and find a UUID. 7033 if (image.uuid.IsValid() || 7034 (!value_is_offset && value != LLDB_INVALID_ADDRESS)) { 7035 const bool set_load_address = image.segment_load_addresses.size() == 0; 7036 const bool notify = false; 7037 // Userland Darwin binaries will have segment load addresses via 7038 // the `all image infos` LC_NOTE. 7039 const bool allow_memory_image_last_resort = 7040 image.segment_load_addresses.size(); 7041 module_sp = DynamicLoader::LoadBinaryWithUUIDAndAddress( 7042 &process, image.filename, image.uuid, value, value_is_offset, 7043 image.currently_executing, notify, set_load_address, 7044 allow_memory_image_last_resort); 7045 } 7046 7047 // We have a ModuleSP to load in the Target. Load it at the 7048 // correct address/slide and notify/load scripting resources. 7049 if (module_sp) { 7050 added_modules.Append(module_sp, false /* notify */); 7051 7052 // We have a list of segment load address 7053 if (image.segment_load_addresses.size() > 0) { 7054 if (log) { 7055 std::string uuidstr = image.uuid.GetAsString(); 7056 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' " 7057 "UUID %s with section load addresses", 7058 module_sp->GetFileSpec().GetPath().c_str(), 7059 uuidstr.c_str()); 7060 } 7061 for (auto name_vmaddr_tuple : image.segment_load_addresses) { 7062 SectionList *sectlist = module_sp->GetObjectFile()->GetSectionList(); 7063 if (sectlist) { 7064 SectionSP sect_sp = 7065 sectlist->FindSectionByName(std::get<0>(name_vmaddr_tuple)); 7066 if (sect_sp) { 7067 process.GetTarget().SetSectionLoadAddress( 7068 sect_sp, std::get<1>(name_vmaddr_tuple)); 7069 } 7070 } 7071 } 7072 } else { 7073 if (log) { 7074 std::string uuidstr = image.uuid.GetAsString(); 7075 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' " 7076 "UUID %s with %s 0x%" PRIx64, 7077 module_sp->GetFileSpec().GetPath().c_str(), 7078 uuidstr.c_str(), 7079 value_is_offset ? "slide" : "load address", value); 7080 } 7081 bool changed; 7082 module_sp->SetLoadAddress(process.GetTarget(), value, value_is_offset, 7083 changed); 7084 } 7085 } 7086 } 7087 if (added_modules.GetSize() > 0) { 7088 process.GetTarget().ModulesDidLoad(added_modules); 7089 process.Flush(); 7090 return true; 7091 } 7092 // Return true if the only binary we found was the platform binary, 7093 // and it was loaded outside the scope of this method. 7094 if (found_platform_binary) 7095 return true; 7096 7097 // No binaries. 7098 return false; 7099 } 7100