1 //===-- ObjectFileMachO.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/ScopeExit.h" 10 #include "llvm/ADT/StringRef.h" 11 12 #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h" 13 #include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h" 14 #include "Plugins/Process/Utility/RegisterContextDarwin_i386.h" 15 #include "Plugins/Process/Utility/RegisterContextDarwin_x86_64.h" 16 #include "lldb/Core/Debugger.h" 17 #include "lldb/Core/Module.h" 18 #include "lldb/Core/ModuleSpec.h" 19 #include "lldb/Core/PluginManager.h" 20 #include "lldb/Core/Progress.h" 21 #include "lldb/Core/Section.h" 22 #include "lldb/Host/Host.h" 23 #include "lldb/Symbol/DWARFCallFrameInfo.h" 24 #include "lldb/Symbol/ObjectFile.h" 25 #include "lldb/Target/DynamicLoader.h" 26 #include "lldb/Target/MemoryRegionInfo.h" 27 #include "lldb/Target/Platform.h" 28 #include "lldb/Target/Process.h" 29 #include "lldb/Target/SectionLoadList.h" 30 #include "lldb/Target/Target.h" 31 #include "lldb/Target/Thread.h" 32 #include "lldb/Target/ThreadList.h" 33 #include "lldb/Utility/ArchSpec.h" 34 #include "lldb/Utility/DataBuffer.h" 35 #include "lldb/Utility/FileSpec.h" 36 #include "lldb/Utility/FileSpecList.h" 37 #include "lldb/Utility/LLDBLog.h" 38 #include "lldb/Utility/Log.h" 39 #include "lldb/Utility/RangeMap.h" 40 #include "lldb/Utility/RegisterValue.h" 41 #include "lldb/Utility/Status.h" 42 #include "lldb/Utility/StreamString.h" 43 #include "lldb/Utility/Timer.h" 44 #include "lldb/Utility/UUID.h" 45 46 #include "lldb/Host/SafeMachO.h" 47 48 #include "llvm/ADT/DenseSet.h" 49 #include "llvm/Support/FormatVariadic.h" 50 #include "llvm/Support/MemoryBuffer.h" 51 52 #include "ObjectFileMachO.h" 53 54 #if defined(__APPLE__) 55 #include <TargetConditionals.h> 56 // GetLLDBSharedCacheUUID() needs to call dlsym() 57 #include <dlfcn.h> 58 #include <mach/mach_init.h> 59 #include <mach/vm_map.h> 60 #include <lldb/Host/SafeMachO.h> 61 #endif 62 63 #ifndef __APPLE__ 64 #include "lldb/Utility/AppleUuidCompatibility.h" 65 #else 66 #include <uuid/uuid.h> 67 #endif 68 69 #include <bitset> 70 #include <memory> 71 #include <optional> 72 73 // Unfortunately the signpost header pulls in the system MachO header, too. 74 #ifdef CPU_TYPE_ARM 75 #undef CPU_TYPE_ARM 76 #endif 77 #ifdef CPU_TYPE_ARM64 78 #undef CPU_TYPE_ARM64 79 #endif 80 #ifdef CPU_TYPE_ARM64_32 81 #undef CPU_TYPE_ARM64_32 82 #endif 83 #ifdef CPU_TYPE_I386 84 #undef CPU_TYPE_I386 85 #endif 86 #ifdef CPU_TYPE_X86_64 87 #undef CPU_TYPE_X86_64 88 #endif 89 #ifdef MH_DYLINKER 90 #undef MH_DYLINKER 91 #endif 92 #ifdef MH_OBJECT 93 #undef MH_OBJECT 94 #endif 95 #ifdef LC_VERSION_MIN_MACOSX 96 #undef LC_VERSION_MIN_MACOSX 97 #endif 98 #ifdef LC_VERSION_MIN_IPHONEOS 99 #undef LC_VERSION_MIN_IPHONEOS 100 #endif 101 #ifdef LC_VERSION_MIN_TVOS 102 #undef LC_VERSION_MIN_TVOS 103 #endif 104 #ifdef LC_VERSION_MIN_WATCHOS 105 #undef LC_VERSION_MIN_WATCHOS 106 #endif 107 #ifdef LC_BUILD_VERSION 108 #undef LC_BUILD_VERSION 109 #endif 110 #ifdef PLATFORM_MACOS 111 #undef PLATFORM_MACOS 112 #endif 113 #ifdef PLATFORM_MACCATALYST 114 #undef PLATFORM_MACCATALYST 115 #endif 116 #ifdef PLATFORM_IOS 117 #undef PLATFORM_IOS 118 #endif 119 #ifdef PLATFORM_IOSSIMULATOR 120 #undef PLATFORM_IOSSIMULATOR 121 #endif 122 #ifdef PLATFORM_TVOS 123 #undef PLATFORM_TVOS 124 #endif 125 #ifdef PLATFORM_TVOSSIMULATOR 126 #undef PLATFORM_TVOSSIMULATOR 127 #endif 128 #ifdef PLATFORM_WATCHOS 129 #undef PLATFORM_WATCHOS 130 #endif 131 #ifdef PLATFORM_WATCHOSSIMULATOR 132 #undef PLATFORM_WATCHOSSIMULATOR 133 #endif 134 135 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull 136 using namespace lldb; 137 using namespace lldb_private; 138 using namespace llvm::MachO; 139 140 static constexpr llvm::StringLiteral g_loader_path = "@loader_path"; 141 static constexpr llvm::StringLiteral g_executable_path = "@executable_path"; 142 143 LLDB_PLUGIN_DEFINE(ObjectFileMachO) 144 145 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name, 146 const char *alt_name, size_t reg_byte_size, 147 Stream &data) { 148 const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name); 149 if (reg_info == nullptr) 150 reg_info = reg_ctx->GetRegisterInfoByName(alt_name); 151 if (reg_info) { 152 lldb_private::RegisterValue reg_value; 153 if (reg_ctx->ReadRegister(reg_info, reg_value)) { 154 if (reg_info->byte_size >= reg_byte_size) 155 data.Write(reg_value.GetBytes(), reg_byte_size); 156 else { 157 data.Write(reg_value.GetBytes(), reg_info->byte_size); 158 for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i) 159 data.PutChar(0); 160 } 161 return; 162 } 163 } 164 // Just write zeros if all else fails 165 for (size_t i = 0; i < reg_byte_size; ++i) 166 data.PutChar(0); 167 } 168 169 class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 { 170 public: 171 RegisterContextDarwin_x86_64_Mach(lldb_private::Thread &thread, 172 const DataExtractor &data) 173 : RegisterContextDarwin_x86_64(thread, 0) { 174 SetRegisterDataFrom_LC_THREAD(data); 175 } 176 177 void InvalidateAllRegisters() override { 178 // Do nothing... registers are always valid... 179 } 180 181 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 182 lldb::offset_t offset = 0; 183 SetError(GPRRegSet, Read, -1); 184 SetError(FPURegSet, Read, -1); 185 SetError(EXCRegSet, Read, -1); 186 bool done = false; 187 188 while (!done) { 189 int flavor = data.GetU32(&offset); 190 if (flavor == 0) 191 done = true; 192 else { 193 uint32_t i; 194 uint32_t count = data.GetU32(&offset); 195 switch (flavor) { 196 case GPRRegSet: 197 for (i = 0; i < count; ++i) 198 (&gpr.rax)[i] = data.GetU64(&offset); 199 SetError(GPRRegSet, Read, 0); 200 done = true; 201 202 break; 203 case FPURegSet: 204 // TODO: fill in FPU regs.... 205 // SetError (FPURegSet, Read, -1); 206 done = true; 207 208 break; 209 case EXCRegSet: 210 exc.trapno = data.GetU32(&offset); 211 exc.err = data.GetU32(&offset); 212 exc.faultvaddr = data.GetU64(&offset); 213 SetError(EXCRegSet, Read, 0); 214 done = true; 215 break; 216 case 7: 217 case 8: 218 case 9: 219 // fancy flavors that encapsulate of the above flavors... 220 break; 221 222 default: 223 done = true; 224 break; 225 } 226 } 227 } 228 } 229 230 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 231 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 232 if (reg_ctx_sp) { 233 RegisterContext *reg_ctx = reg_ctx_sp.get(); 234 235 data.PutHex32(GPRRegSet); // Flavor 236 data.PutHex32(GPRWordCount); 237 PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data); 238 PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data); 239 PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data); 240 PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data); 241 PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data); 242 PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data); 243 PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data); 244 PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data); 245 PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data); 246 PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data); 247 PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data); 248 PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data); 249 PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data); 250 PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data); 251 PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data); 252 PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data); 253 PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data); 254 PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data); 255 PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data); 256 PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data); 257 PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data); 258 259 // // Write out the FPU registers 260 // const size_t fpu_byte_size = sizeof(FPU); 261 // size_t bytes_written = 0; 262 // data.PutHex32 (FPURegSet); 263 // data.PutHex32 (fpu_byte_size/sizeof(uint64_t)); 264 // bytes_written += data.PutHex32(0); // uint32_t pad[0] 265 // bytes_written += data.PutHex32(0); // uint32_t pad[1] 266 // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2, 267 // data); // uint16_t fcw; // "fctrl" 268 // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2, 269 // data); // uint16_t fsw; // "fstat" 270 // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1, 271 // data); // uint8_t ftw; // "ftag" 272 // bytes_written += data.PutHex8 (0); // uint8_t pad1; 273 // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2, 274 // data); // uint16_t fop; // "fop" 275 // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4, 276 // data); // uint32_t ip; // "fioff" 277 // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2, 278 // data); // uint16_t cs; // "fiseg" 279 // bytes_written += data.PutHex16 (0); // uint16_t pad2; 280 // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4, 281 // data); // uint32_t dp; // "fooff" 282 // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2, 283 // data); // uint16_t ds; // "foseg" 284 // bytes_written += data.PutHex16 (0); // uint16_t pad3; 285 // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4, 286 // data); // uint32_t mxcsr; 287 // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL, 288 // 4, data);// uint32_t mxcsrmask; 289 // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL, 290 // sizeof(MMSReg), data); 291 // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL, 292 // sizeof(MMSReg), data); 293 // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL, 294 // sizeof(MMSReg), data); 295 // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL, 296 // sizeof(MMSReg), data); 297 // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL, 298 // sizeof(MMSReg), data); 299 // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL, 300 // sizeof(MMSReg), data); 301 // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL, 302 // sizeof(MMSReg), data); 303 // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL, 304 // sizeof(MMSReg), data); 305 // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL, 306 // sizeof(XMMReg), data); 307 // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL, 308 // sizeof(XMMReg), data); 309 // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL, 310 // sizeof(XMMReg), data); 311 // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL, 312 // sizeof(XMMReg), data); 313 // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL, 314 // sizeof(XMMReg), data); 315 // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL, 316 // sizeof(XMMReg), data); 317 // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL, 318 // sizeof(XMMReg), data); 319 // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL, 320 // sizeof(XMMReg), data); 321 // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL, 322 // sizeof(XMMReg), data); 323 // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL, 324 // sizeof(XMMReg), data); 325 // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL, 326 // sizeof(XMMReg), data); 327 // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL, 328 // sizeof(XMMReg), data); 329 // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL, 330 // sizeof(XMMReg), data); 331 // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL, 332 // sizeof(XMMReg), data); 333 // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL, 334 // sizeof(XMMReg), data); 335 // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL, 336 // sizeof(XMMReg), data); 337 // 338 // // Fill rest with zeros 339 // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++ 340 // i) 341 // data.PutChar(0); 342 343 // Write out the EXC registers 344 data.PutHex32(EXCRegSet); 345 data.PutHex32(EXCWordCount); 346 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data); 347 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data); 348 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data); 349 return true; 350 } 351 return false; 352 } 353 354 protected: 355 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; } 356 357 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; } 358 359 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; } 360 361 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 362 return 0; 363 } 364 365 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 366 return 0; 367 } 368 369 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 370 return 0; 371 } 372 }; 373 374 class RegisterContextDarwin_i386_Mach : public RegisterContextDarwin_i386 { 375 public: 376 RegisterContextDarwin_i386_Mach(lldb_private::Thread &thread, 377 const DataExtractor &data) 378 : RegisterContextDarwin_i386(thread, 0) { 379 SetRegisterDataFrom_LC_THREAD(data); 380 } 381 382 void InvalidateAllRegisters() override { 383 // Do nothing... registers are always valid... 384 } 385 386 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 387 lldb::offset_t offset = 0; 388 SetError(GPRRegSet, Read, -1); 389 SetError(FPURegSet, Read, -1); 390 SetError(EXCRegSet, Read, -1); 391 bool done = false; 392 393 while (!done) { 394 int flavor = data.GetU32(&offset); 395 if (flavor == 0) 396 done = true; 397 else { 398 uint32_t i; 399 uint32_t count = data.GetU32(&offset); 400 switch (flavor) { 401 case GPRRegSet: 402 for (i = 0; i < count; ++i) 403 (&gpr.eax)[i] = data.GetU32(&offset); 404 SetError(GPRRegSet, Read, 0); 405 done = true; 406 407 break; 408 case FPURegSet: 409 // TODO: fill in FPU regs.... 410 // SetError (FPURegSet, Read, -1); 411 done = true; 412 413 break; 414 case EXCRegSet: 415 exc.trapno = data.GetU32(&offset); 416 exc.err = data.GetU32(&offset); 417 exc.faultvaddr = data.GetU32(&offset); 418 SetError(EXCRegSet, Read, 0); 419 done = true; 420 break; 421 case 7: 422 case 8: 423 case 9: 424 // fancy flavors that encapsulate of the above flavors... 425 break; 426 427 default: 428 done = true; 429 break; 430 } 431 } 432 } 433 } 434 435 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 436 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 437 if (reg_ctx_sp) { 438 RegisterContext *reg_ctx = reg_ctx_sp.get(); 439 440 data.PutHex32(GPRRegSet); // Flavor 441 data.PutHex32(GPRWordCount); 442 PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data); 443 PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data); 444 PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data); 445 PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data); 446 PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data); 447 PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data); 448 PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data); 449 PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data); 450 PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data); 451 PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data); 452 PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data); 453 PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data); 454 PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data); 455 PrintRegisterValue(reg_ctx, "es", nullptr, 4, data); 456 PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data); 457 PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data); 458 459 // Write out the EXC registers 460 data.PutHex32(EXCRegSet); 461 data.PutHex32(EXCWordCount); 462 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data); 463 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data); 464 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data); 465 return true; 466 } 467 return false; 468 } 469 470 protected: 471 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; } 472 473 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; } 474 475 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; } 476 477 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 478 return 0; 479 } 480 481 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 482 return 0; 483 } 484 485 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 486 return 0; 487 } 488 }; 489 490 class RegisterContextDarwin_arm_Mach : public RegisterContextDarwin_arm { 491 public: 492 RegisterContextDarwin_arm_Mach(lldb_private::Thread &thread, 493 const DataExtractor &data) 494 : RegisterContextDarwin_arm(thread, 0) { 495 SetRegisterDataFrom_LC_THREAD(data); 496 } 497 498 void InvalidateAllRegisters() override { 499 // Do nothing... registers are always valid... 500 } 501 502 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 503 lldb::offset_t offset = 0; 504 SetError(GPRRegSet, Read, -1); 505 SetError(FPURegSet, Read, -1); 506 SetError(EXCRegSet, Read, -1); 507 bool done = false; 508 509 while (!done) { 510 int flavor = data.GetU32(&offset); 511 uint32_t count = data.GetU32(&offset); 512 lldb::offset_t next_thread_state = offset + (count * 4); 513 switch (flavor) { 514 case GPRAltRegSet: 515 case GPRRegSet: { 516 // r0-r15, plus CPSR 517 uint32_t gpr_buf_count = (sizeof(gpr.r) / sizeof(gpr.r[0])) + 1; 518 if (count == gpr_buf_count) { 519 for (uint32_t i = 0; i < (count - 1); ++i) { 520 gpr.r[i] = data.GetU32(&offset); 521 } 522 gpr.cpsr = data.GetU32(&offset); 523 524 SetError(GPRRegSet, Read, 0); 525 } 526 } 527 offset = next_thread_state; 528 break; 529 530 case FPURegSet: { 531 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats; 532 const int fpu_reg_buf_size = sizeof(fpu.floats); 533 if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle, 534 fpu_reg_buf) == fpu_reg_buf_size) { 535 offset += fpu_reg_buf_size; 536 fpu.fpscr = data.GetU32(&offset); 537 SetError(FPURegSet, Read, 0); 538 } else { 539 done = true; 540 } 541 } 542 offset = next_thread_state; 543 break; 544 545 case EXCRegSet: 546 if (count == 3) { 547 exc.exception = data.GetU32(&offset); 548 exc.fsr = data.GetU32(&offset); 549 exc.far = data.GetU32(&offset); 550 SetError(EXCRegSet, Read, 0); 551 } 552 done = true; 553 offset = next_thread_state; 554 break; 555 556 // Unknown register set flavor, stop trying to parse. 557 default: 558 done = true; 559 } 560 } 561 } 562 563 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 564 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 565 if (reg_ctx_sp) { 566 RegisterContext *reg_ctx = reg_ctx_sp.get(); 567 568 data.PutHex32(GPRRegSet); // Flavor 569 data.PutHex32(GPRWordCount); 570 PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data); 571 PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data); 572 PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data); 573 PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data); 574 PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data); 575 PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data); 576 PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data); 577 PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data); 578 PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data); 579 PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data); 580 PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data); 581 PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data); 582 PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data); 583 PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data); 584 PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data); 585 PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data); 586 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data); 587 588 // Write out the EXC registers 589 // data.PutHex32 (EXCRegSet); 590 // data.PutHex32 (EXCWordCount); 591 // WriteRegister (reg_ctx, "exception", NULL, 4, data); 592 // WriteRegister (reg_ctx, "fsr", NULL, 4, data); 593 // WriteRegister (reg_ctx, "far", NULL, 4, data); 594 return true; 595 } 596 return false; 597 } 598 599 protected: 600 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; } 601 602 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; } 603 604 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; } 605 606 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; } 607 608 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 609 return 0; 610 } 611 612 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 613 return 0; 614 } 615 616 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 617 return 0; 618 } 619 620 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override { 621 return -1; 622 } 623 }; 624 625 class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 { 626 public: 627 RegisterContextDarwin_arm64_Mach(lldb_private::Thread &thread, 628 const DataExtractor &data) 629 : RegisterContextDarwin_arm64(thread, 0) { 630 SetRegisterDataFrom_LC_THREAD(data); 631 } 632 633 void InvalidateAllRegisters() override { 634 // Do nothing... registers are always valid... 635 } 636 637 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) { 638 lldb::offset_t offset = 0; 639 SetError(GPRRegSet, Read, -1); 640 SetError(FPURegSet, Read, -1); 641 SetError(EXCRegSet, Read, -1); 642 bool done = false; 643 while (!done) { 644 int flavor = data.GetU32(&offset); 645 uint32_t count = data.GetU32(&offset); 646 lldb::offset_t next_thread_state = offset + (count * 4); 647 switch (flavor) { 648 case GPRRegSet: 649 // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1 650 // 32-bit register) 651 if (count >= (33 * 2) + 1) { 652 for (uint32_t i = 0; i < 29; ++i) 653 gpr.x[i] = data.GetU64(&offset); 654 gpr.fp = data.GetU64(&offset); 655 gpr.lr = data.GetU64(&offset); 656 gpr.sp = data.GetU64(&offset); 657 gpr.pc = data.GetU64(&offset); 658 gpr.cpsr = data.GetU32(&offset); 659 SetError(GPRRegSet, Read, 0); 660 } 661 offset = next_thread_state; 662 break; 663 case FPURegSet: { 664 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0]; 665 const int fpu_reg_buf_size = sizeof(fpu); 666 if (fpu_reg_buf_size == count * sizeof(uint32_t) && 667 data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle, 668 fpu_reg_buf) == fpu_reg_buf_size) { 669 SetError(FPURegSet, Read, 0); 670 } else { 671 done = true; 672 } 673 } 674 offset = next_thread_state; 675 break; 676 case EXCRegSet: 677 if (count == 4) { 678 exc.far = data.GetU64(&offset); 679 exc.esr = data.GetU32(&offset); 680 exc.exception = data.GetU32(&offset); 681 SetError(EXCRegSet, Read, 0); 682 } 683 offset = next_thread_state; 684 break; 685 default: 686 done = true; 687 break; 688 } 689 } 690 } 691 692 static bool Create_LC_THREAD(Thread *thread, Stream &data) { 693 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext()); 694 if (reg_ctx_sp) { 695 RegisterContext *reg_ctx = reg_ctx_sp.get(); 696 697 data.PutHex32(GPRRegSet); // Flavor 698 data.PutHex32(GPRWordCount); 699 PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data); 700 PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data); 701 PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data); 702 PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data); 703 PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data); 704 PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data); 705 PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data); 706 PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data); 707 PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data); 708 PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data); 709 PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data); 710 PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data); 711 PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data); 712 PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data); 713 PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data); 714 PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data); 715 PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data); 716 PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data); 717 PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data); 718 PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data); 719 PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data); 720 PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data); 721 PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data); 722 PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data); 723 PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data); 724 PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data); 725 PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data); 726 PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data); 727 PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data); 728 PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data); 729 PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data); 730 PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data); 731 PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data); 732 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data); 733 data.PutHex32(0); // uint32_t pad at the end 734 735 // Write out the EXC registers 736 data.PutHex32(EXCRegSet); 737 data.PutHex32(EXCWordCount); 738 PrintRegisterValue(reg_ctx, "far", nullptr, 8, data); 739 PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data); 740 PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data); 741 return true; 742 } 743 return false; 744 } 745 746 protected: 747 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; } 748 749 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; } 750 751 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; } 752 753 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; } 754 755 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override { 756 return 0; 757 } 758 759 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override { 760 return 0; 761 } 762 763 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override { 764 return 0; 765 } 766 767 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override { 768 return -1; 769 } 770 }; 771 772 static uint32_t MachHeaderSizeFromMagic(uint32_t magic) { 773 switch (magic) { 774 case MH_MAGIC: 775 case MH_CIGAM: 776 return sizeof(struct llvm::MachO::mach_header); 777 778 case MH_MAGIC_64: 779 case MH_CIGAM_64: 780 return sizeof(struct llvm::MachO::mach_header_64); 781 break; 782 783 default: 784 break; 785 } 786 return 0; 787 } 788 789 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008 790 791 char ObjectFileMachO::ID; 792 793 void ObjectFileMachO::Initialize() { 794 PluginManager::RegisterPlugin( 795 GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance, 796 CreateMemoryInstance, GetModuleSpecifications, SaveCore); 797 } 798 799 void ObjectFileMachO::Terminate() { 800 PluginManager::UnregisterPlugin(CreateInstance); 801 } 802 803 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp, 804 DataBufferSP data_sp, 805 lldb::offset_t data_offset, 806 const FileSpec *file, 807 lldb::offset_t file_offset, 808 lldb::offset_t length) { 809 if (!data_sp) { 810 data_sp = MapFileData(*file, length, file_offset); 811 if (!data_sp) 812 return nullptr; 813 data_offset = 0; 814 } 815 816 if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length)) 817 return nullptr; 818 819 // Update the data to contain the entire file if it doesn't already 820 if (data_sp->GetByteSize() < length) { 821 data_sp = MapFileData(*file, length, file_offset); 822 if (!data_sp) 823 return nullptr; 824 data_offset = 0; 825 } 826 auto objfile_up = std::make_unique<ObjectFileMachO>( 827 module_sp, data_sp, data_offset, file, file_offset, length); 828 if (!objfile_up || !objfile_up->ParseHeader()) 829 return nullptr; 830 831 return objfile_up.release(); 832 } 833 834 ObjectFile *ObjectFileMachO::CreateMemoryInstance( 835 const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp, 836 const ProcessSP &process_sp, lldb::addr_t header_addr) { 837 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) { 838 std::unique_ptr<ObjectFile> objfile_up( 839 new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr)); 840 if (objfile_up.get() && objfile_up->ParseHeader()) 841 return objfile_up.release(); 842 } 843 return nullptr; 844 } 845 846 size_t ObjectFileMachO::GetModuleSpecifications( 847 const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp, 848 lldb::offset_t data_offset, lldb::offset_t file_offset, 849 lldb::offset_t length, lldb_private::ModuleSpecList &specs) { 850 const size_t initial_count = specs.GetSize(); 851 852 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) { 853 DataExtractor data; 854 data.SetData(data_sp); 855 llvm::MachO::mach_header header; 856 if (ParseHeader(data, &data_offset, header)) { 857 size_t header_and_load_cmds = 858 header.sizeofcmds + MachHeaderSizeFromMagic(header.magic); 859 if (header_and_load_cmds >= data_sp->GetByteSize()) { 860 data_sp = MapFileData(file, header_and_load_cmds, file_offset); 861 data.SetData(data_sp); 862 data_offset = MachHeaderSizeFromMagic(header.magic); 863 } 864 if (data_sp) { 865 ModuleSpec base_spec; 866 base_spec.GetFileSpec() = file; 867 base_spec.SetObjectOffset(file_offset); 868 base_spec.SetObjectSize(length); 869 GetAllArchSpecs(header, data, data_offset, base_spec, specs); 870 } 871 } 872 } 873 return specs.GetSize() - initial_count; 874 } 875 876 ConstString ObjectFileMachO::GetSegmentNameTEXT() { 877 static ConstString g_segment_name_TEXT("__TEXT"); 878 return g_segment_name_TEXT; 879 } 880 881 ConstString ObjectFileMachO::GetSegmentNameDATA() { 882 static ConstString g_segment_name_DATA("__DATA"); 883 return g_segment_name_DATA; 884 } 885 886 ConstString ObjectFileMachO::GetSegmentNameDATA_DIRTY() { 887 static ConstString g_segment_name("__DATA_DIRTY"); 888 return g_segment_name; 889 } 890 891 ConstString ObjectFileMachO::GetSegmentNameDATA_CONST() { 892 static ConstString g_segment_name("__DATA_CONST"); 893 return g_segment_name; 894 } 895 896 ConstString ObjectFileMachO::GetSegmentNameOBJC() { 897 static ConstString g_segment_name_OBJC("__OBJC"); 898 return g_segment_name_OBJC; 899 } 900 901 ConstString ObjectFileMachO::GetSegmentNameLINKEDIT() { 902 static ConstString g_section_name_LINKEDIT("__LINKEDIT"); 903 return g_section_name_LINKEDIT; 904 } 905 906 ConstString ObjectFileMachO::GetSegmentNameDWARF() { 907 static ConstString g_section_name("__DWARF"); 908 return g_section_name; 909 } 910 911 ConstString ObjectFileMachO::GetSegmentNameLLVM_COV() { 912 static ConstString g_section_name("__LLVM_COV"); 913 return g_section_name; 914 } 915 916 ConstString ObjectFileMachO::GetSectionNameEHFrame() { 917 static ConstString g_section_name_eh_frame("__eh_frame"); 918 return g_section_name_eh_frame; 919 } 920 921 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp, 922 lldb::addr_t data_offset, 923 lldb::addr_t data_length) { 924 DataExtractor data; 925 data.SetData(data_sp, data_offset, data_length); 926 lldb::offset_t offset = 0; 927 uint32_t magic = data.GetU32(&offset); 928 929 offset += 4; // cputype 930 offset += 4; // cpusubtype 931 uint32_t filetype = data.GetU32(&offset); 932 933 // A fileset has a Mach-O header but is not an 934 // individual file and must be handled via an 935 // ObjectContainer plugin. 936 if (filetype == llvm::MachO::MH_FILESET) 937 return false; 938 939 return MachHeaderSizeFromMagic(magic) != 0; 940 } 941 942 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp, 943 DataBufferSP data_sp, 944 lldb::offset_t data_offset, 945 const FileSpec *file, 946 lldb::offset_t file_offset, 947 lldb::offset_t length) 948 : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset), 949 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(), 950 m_thread_context_offsets_valid(false), m_reexported_dylibs(), 951 m_allow_assembly_emulation_unwind_plans(true) { 952 ::memset(&m_header, 0, sizeof(m_header)); 953 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 954 } 955 956 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp, 957 lldb::WritableDataBufferSP header_data_sp, 958 const lldb::ProcessSP &process_sp, 959 lldb::addr_t header_addr) 960 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), 961 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(), 962 m_thread_context_offsets_valid(false), m_reexported_dylibs(), 963 m_allow_assembly_emulation_unwind_plans(true) { 964 ::memset(&m_header, 0, sizeof(m_header)); 965 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 966 } 967 968 bool ObjectFileMachO::ParseHeader(DataExtractor &data, 969 lldb::offset_t *data_offset_ptr, 970 llvm::MachO::mach_header &header) { 971 data.SetByteOrder(endian::InlHostByteOrder()); 972 // Leave magic in the original byte order 973 header.magic = data.GetU32(data_offset_ptr); 974 bool can_parse = false; 975 bool is_64_bit = false; 976 switch (header.magic) { 977 case MH_MAGIC: 978 data.SetByteOrder(endian::InlHostByteOrder()); 979 data.SetAddressByteSize(4); 980 can_parse = true; 981 break; 982 983 case MH_MAGIC_64: 984 data.SetByteOrder(endian::InlHostByteOrder()); 985 data.SetAddressByteSize(8); 986 can_parse = true; 987 is_64_bit = true; 988 break; 989 990 case MH_CIGAM: 991 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 992 ? eByteOrderLittle 993 : eByteOrderBig); 994 data.SetAddressByteSize(4); 995 can_parse = true; 996 break; 997 998 case MH_CIGAM_64: 999 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1000 ? eByteOrderLittle 1001 : eByteOrderBig); 1002 data.SetAddressByteSize(8); 1003 is_64_bit = true; 1004 can_parse = true; 1005 break; 1006 1007 default: 1008 break; 1009 } 1010 1011 if (can_parse) { 1012 data.GetU32(data_offset_ptr, &header.cputype, 6); 1013 if (is_64_bit) 1014 *data_offset_ptr += 4; 1015 return true; 1016 } else { 1017 memset(&header, 0, sizeof(header)); 1018 } 1019 return false; 1020 } 1021 1022 bool ObjectFileMachO::ParseHeader() { 1023 ModuleSP module_sp(GetModule()); 1024 if (!module_sp) 1025 return false; 1026 1027 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 1028 bool can_parse = false; 1029 lldb::offset_t offset = 0; 1030 m_data.SetByteOrder(endian::InlHostByteOrder()); 1031 // Leave magic in the original byte order 1032 m_header.magic = m_data.GetU32(&offset); 1033 switch (m_header.magic) { 1034 case MH_MAGIC: 1035 m_data.SetByteOrder(endian::InlHostByteOrder()); 1036 m_data.SetAddressByteSize(4); 1037 can_parse = true; 1038 break; 1039 1040 case MH_MAGIC_64: 1041 m_data.SetByteOrder(endian::InlHostByteOrder()); 1042 m_data.SetAddressByteSize(8); 1043 can_parse = true; 1044 break; 1045 1046 case MH_CIGAM: 1047 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1048 ? eByteOrderLittle 1049 : eByteOrderBig); 1050 m_data.SetAddressByteSize(4); 1051 can_parse = true; 1052 break; 1053 1054 case MH_CIGAM_64: 1055 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig 1056 ? eByteOrderLittle 1057 : eByteOrderBig); 1058 m_data.SetAddressByteSize(8); 1059 can_parse = true; 1060 break; 1061 1062 default: 1063 break; 1064 } 1065 1066 if (can_parse) { 1067 m_data.GetU32(&offset, &m_header.cputype, 6); 1068 1069 ModuleSpecList all_specs; 1070 ModuleSpec base_spec; 1071 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic), 1072 base_spec, all_specs); 1073 1074 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 1075 ArchSpec mach_arch = 1076 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture(); 1077 1078 // Check if the module has a required architecture 1079 const ArchSpec &module_arch = module_sp->GetArchitecture(); 1080 if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch)) 1081 continue; 1082 1083 if (SetModulesArchitecture(mach_arch)) { 1084 const size_t header_and_lc_size = 1085 m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic); 1086 if (m_data.GetByteSize() < header_and_lc_size) { 1087 DataBufferSP data_sp; 1088 ProcessSP process_sp(m_process_wp.lock()); 1089 if (process_sp) { 1090 data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size); 1091 } else { 1092 // Read in all only the load command data from the file on disk 1093 data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset); 1094 if (data_sp->GetByteSize() != header_and_lc_size) 1095 continue; 1096 } 1097 if (data_sp) 1098 m_data.SetData(data_sp); 1099 } 1100 } 1101 return true; 1102 } 1103 // None found. 1104 return false; 1105 } else { 1106 memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header)); 1107 } 1108 return false; 1109 } 1110 1111 ByteOrder ObjectFileMachO::GetByteOrder() const { 1112 return m_data.GetByteOrder(); 1113 } 1114 1115 bool ObjectFileMachO::IsExecutable() const { 1116 return m_header.filetype == MH_EXECUTE; 1117 } 1118 1119 bool ObjectFileMachO::IsDynamicLoader() const { 1120 return m_header.filetype == MH_DYLINKER; 1121 } 1122 1123 bool ObjectFileMachO::IsSharedCacheBinary() const { 1124 return m_header.flags & MH_DYLIB_IN_CACHE; 1125 } 1126 1127 bool ObjectFileMachO::IsKext() const { 1128 return m_header.filetype == MH_KEXT_BUNDLE; 1129 } 1130 1131 uint32_t ObjectFileMachO::GetAddressByteSize() const { 1132 return m_data.GetAddressByteSize(); 1133 } 1134 1135 AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { 1136 Symtab *symtab = GetSymtab(); 1137 if (!symtab) 1138 return AddressClass::eUnknown; 1139 1140 Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr); 1141 if (symbol) { 1142 if (symbol->ValueIsAddress()) { 1143 SectionSP section_sp(symbol->GetAddressRef().GetSection()); 1144 if (section_sp) { 1145 const lldb::SectionType section_type = section_sp->GetType(); 1146 switch (section_type) { 1147 case eSectionTypeInvalid: 1148 return AddressClass::eUnknown; 1149 1150 case eSectionTypeCode: 1151 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) { 1152 // For ARM we have a bit in the n_desc field of the symbol that 1153 // tells us ARM/Thumb which is bit 0x0008. 1154 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB) 1155 return AddressClass::eCodeAlternateISA; 1156 } 1157 return AddressClass::eCode; 1158 1159 case eSectionTypeContainer: 1160 return AddressClass::eUnknown; 1161 1162 case eSectionTypeData: 1163 case eSectionTypeDataCString: 1164 case eSectionTypeDataCStringPointers: 1165 case eSectionTypeDataSymbolAddress: 1166 case eSectionTypeData4: 1167 case eSectionTypeData8: 1168 case eSectionTypeData16: 1169 case eSectionTypeDataPointers: 1170 case eSectionTypeZeroFill: 1171 case eSectionTypeDataObjCMessageRefs: 1172 case eSectionTypeDataObjCCFStrings: 1173 case eSectionTypeGoSymtab: 1174 return AddressClass::eData; 1175 1176 case eSectionTypeDebug: 1177 case eSectionTypeDWARFDebugAbbrev: 1178 case eSectionTypeDWARFDebugAbbrevDwo: 1179 case eSectionTypeDWARFDebugAddr: 1180 case eSectionTypeDWARFDebugAranges: 1181 case eSectionTypeDWARFDebugCuIndex: 1182 case eSectionTypeDWARFDebugFrame: 1183 case eSectionTypeDWARFDebugInfo: 1184 case eSectionTypeDWARFDebugInfoDwo: 1185 case eSectionTypeDWARFDebugLine: 1186 case eSectionTypeDWARFDebugLineStr: 1187 case eSectionTypeDWARFDebugLoc: 1188 case eSectionTypeDWARFDebugLocDwo: 1189 case eSectionTypeDWARFDebugLocLists: 1190 case eSectionTypeDWARFDebugLocListsDwo: 1191 case eSectionTypeDWARFDebugMacInfo: 1192 case eSectionTypeDWARFDebugMacro: 1193 case eSectionTypeDWARFDebugNames: 1194 case eSectionTypeDWARFDebugPubNames: 1195 case eSectionTypeDWARFDebugPubTypes: 1196 case eSectionTypeDWARFDebugRanges: 1197 case eSectionTypeDWARFDebugRngLists: 1198 case eSectionTypeDWARFDebugRngListsDwo: 1199 case eSectionTypeDWARFDebugStr: 1200 case eSectionTypeDWARFDebugStrDwo: 1201 case eSectionTypeDWARFDebugStrOffsets: 1202 case eSectionTypeDWARFDebugStrOffsetsDwo: 1203 case eSectionTypeDWARFDebugTuIndex: 1204 case eSectionTypeDWARFDebugTypes: 1205 case eSectionTypeDWARFDebugTypesDwo: 1206 case eSectionTypeDWARFAppleNames: 1207 case eSectionTypeDWARFAppleTypes: 1208 case eSectionTypeDWARFAppleNamespaces: 1209 case eSectionTypeDWARFAppleObjC: 1210 case eSectionTypeDWARFGNUDebugAltLink: 1211 case eSectionTypeCTF: 1212 case eSectionTypeLLDBTypeSummaries: 1213 case eSectionTypeLLDBFormatters: 1214 case eSectionTypeSwiftModules: 1215 return AddressClass::eDebug; 1216 1217 case eSectionTypeEHFrame: 1218 case eSectionTypeARMexidx: 1219 case eSectionTypeARMextab: 1220 case eSectionTypeCompactUnwind: 1221 return AddressClass::eRuntime; 1222 1223 case eSectionTypeAbsoluteAddress: 1224 case eSectionTypeELFSymbolTable: 1225 case eSectionTypeELFDynamicSymbols: 1226 case eSectionTypeELFRelocationEntries: 1227 case eSectionTypeELFDynamicLinkInfo: 1228 case eSectionTypeOther: 1229 return AddressClass::eUnknown; 1230 } 1231 } 1232 } 1233 1234 const SymbolType symbol_type = symbol->GetType(); 1235 switch (symbol_type) { 1236 case eSymbolTypeAny: 1237 return AddressClass::eUnknown; 1238 case eSymbolTypeAbsolute: 1239 return AddressClass::eUnknown; 1240 1241 case eSymbolTypeCode: 1242 case eSymbolTypeTrampoline: 1243 case eSymbolTypeResolver: 1244 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) { 1245 // For ARM we have a bit in the n_desc field of the symbol that tells 1246 // us ARM/Thumb which is bit 0x0008. 1247 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB) 1248 return AddressClass::eCodeAlternateISA; 1249 } 1250 return AddressClass::eCode; 1251 1252 case eSymbolTypeData: 1253 return AddressClass::eData; 1254 case eSymbolTypeRuntime: 1255 return AddressClass::eRuntime; 1256 case eSymbolTypeException: 1257 return AddressClass::eRuntime; 1258 case eSymbolTypeSourceFile: 1259 return AddressClass::eDebug; 1260 case eSymbolTypeHeaderFile: 1261 return AddressClass::eDebug; 1262 case eSymbolTypeObjectFile: 1263 return AddressClass::eDebug; 1264 case eSymbolTypeCommonBlock: 1265 return AddressClass::eDebug; 1266 case eSymbolTypeBlock: 1267 return AddressClass::eDebug; 1268 case eSymbolTypeLocal: 1269 return AddressClass::eData; 1270 case eSymbolTypeParam: 1271 return AddressClass::eData; 1272 case eSymbolTypeVariable: 1273 return AddressClass::eData; 1274 case eSymbolTypeVariableType: 1275 return AddressClass::eDebug; 1276 case eSymbolTypeLineEntry: 1277 return AddressClass::eDebug; 1278 case eSymbolTypeLineHeader: 1279 return AddressClass::eDebug; 1280 case eSymbolTypeScopeBegin: 1281 return AddressClass::eDebug; 1282 case eSymbolTypeScopeEnd: 1283 return AddressClass::eDebug; 1284 case eSymbolTypeAdditional: 1285 return AddressClass::eUnknown; 1286 case eSymbolTypeCompiler: 1287 return AddressClass::eDebug; 1288 case eSymbolTypeInstrumentation: 1289 return AddressClass::eDebug; 1290 case eSymbolTypeUndefined: 1291 return AddressClass::eUnknown; 1292 case eSymbolTypeObjCClass: 1293 return AddressClass::eRuntime; 1294 case eSymbolTypeObjCMetaClass: 1295 return AddressClass::eRuntime; 1296 case eSymbolTypeObjCIVar: 1297 return AddressClass::eRuntime; 1298 case eSymbolTypeReExported: 1299 return AddressClass::eRuntime; 1300 } 1301 } 1302 return AddressClass::eUnknown; 1303 } 1304 1305 bool ObjectFileMachO::IsStripped() { 1306 if (m_dysymtab.cmd == 0) { 1307 ModuleSP module_sp(GetModule()); 1308 if (module_sp) { 1309 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1310 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1311 const lldb::offset_t load_cmd_offset = offset; 1312 1313 llvm::MachO::load_command lc = {}; 1314 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr) 1315 break; 1316 if (lc.cmd == LC_DYSYMTAB) { 1317 m_dysymtab.cmd = lc.cmd; 1318 m_dysymtab.cmdsize = lc.cmdsize; 1319 if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym, 1320 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) == 1321 nullptr) { 1322 // Clear m_dysymtab if we were unable to read all items from the 1323 // load command 1324 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab)); 1325 } 1326 } 1327 offset = load_cmd_offset + lc.cmdsize; 1328 } 1329 } 1330 } 1331 if (m_dysymtab.cmd) 1332 return m_dysymtab.nlocalsym <= 1; 1333 return false; 1334 } 1335 1336 ObjectFileMachO::EncryptedFileRanges ObjectFileMachO::GetEncryptedFileRanges() { 1337 EncryptedFileRanges result; 1338 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1339 1340 llvm::MachO::encryption_info_command encryption_cmd; 1341 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1342 const lldb::offset_t load_cmd_offset = offset; 1343 if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr) 1344 break; 1345 1346 // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the 1347 // 3 fields we care about, so treat them the same. 1348 if (encryption_cmd.cmd == LC_ENCRYPTION_INFO || 1349 encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) { 1350 if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) { 1351 if (encryption_cmd.cryptid != 0) { 1352 EncryptedFileRanges::Entry entry; 1353 entry.SetRangeBase(encryption_cmd.cryptoff); 1354 entry.SetByteSize(encryption_cmd.cryptsize); 1355 result.Append(entry); 1356 } 1357 } 1358 } 1359 offset = load_cmd_offset + encryption_cmd.cmdsize; 1360 } 1361 1362 return result; 1363 } 1364 1365 void ObjectFileMachO::SanitizeSegmentCommand( 1366 llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) { 1367 if (m_length == 0 || seg_cmd.filesize == 0) 1368 return; 1369 1370 if (IsSharedCacheBinary() && !IsInMemory()) { 1371 // In shared cache images, the load commands are relative to the 1372 // shared cache file, and not the specific image we are 1373 // examining. Let's fix this up so that it looks like a normal 1374 // image. 1375 if (strncmp(seg_cmd.segname, GetSegmentNameTEXT().GetCString(), 1376 sizeof(seg_cmd.segname)) == 0) 1377 m_text_address = seg_cmd.vmaddr; 1378 if (strncmp(seg_cmd.segname, GetSegmentNameLINKEDIT().GetCString(), 1379 sizeof(seg_cmd.segname)) == 0) 1380 m_linkedit_original_offset = seg_cmd.fileoff; 1381 1382 seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address; 1383 } 1384 1385 if (seg_cmd.fileoff > m_length) { 1386 // We have a load command that says it extends past the end of the file. 1387 // This is likely a corrupt file. We don't have any way to return an error 1388 // condition here (this method was likely invoked from something like 1389 // ObjectFile::GetSectionList()), so we just null out the section contents, 1390 // and dump a message to stdout. The most common case here is core file 1391 // debugging with a truncated file. 1392 const char *lc_segment_name = 1393 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT"; 1394 GetModule()->ReportWarning( 1395 "load command {0} {1} has a fileoff ({2:x16}) that extends beyond " 1396 "the end of the file ({3:x16}), ignoring this section", 1397 cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length); 1398 1399 seg_cmd.fileoff = 0; 1400 seg_cmd.filesize = 0; 1401 } 1402 1403 if (seg_cmd.fileoff + seg_cmd.filesize > m_length) { 1404 // We have a load command that says it extends past the end of the file. 1405 // This is likely a corrupt file. We don't have any way to return an error 1406 // condition here (this method was likely invoked from something like 1407 // ObjectFile::GetSectionList()), so we just null out the section contents, 1408 // and dump a message to stdout. The most common case here is core file 1409 // debugging with a truncated file. 1410 const char *lc_segment_name = 1411 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT"; 1412 GetModule()->ReportWarning( 1413 "load command {0} {1} has a fileoff + filesize ({2:x16}) that " 1414 "extends beyond the end of the file ({3:x16}), the segment will be " 1415 "truncated to match", 1416 cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length); 1417 1418 // Truncate the length 1419 seg_cmd.filesize = m_length - seg_cmd.fileoff; 1420 } 1421 } 1422 1423 static uint32_t 1424 GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) { 1425 uint32_t result = 0; 1426 if (seg_cmd.initprot & VM_PROT_READ) 1427 result |= ePermissionsReadable; 1428 if (seg_cmd.initprot & VM_PROT_WRITE) 1429 result |= ePermissionsWritable; 1430 if (seg_cmd.initprot & VM_PROT_EXECUTE) 1431 result |= ePermissionsExecutable; 1432 return result; 1433 } 1434 1435 static lldb::SectionType GetSectionType(uint32_t flags, 1436 ConstString section_name) { 1437 1438 if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS)) 1439 return eSectionTypeCode; 1440 1441 uint32_t mach_sect_type = flags & SECTION_TYPE; 1442 static ConstString g_sect_name_objc_data("__objc_data"); 1443 static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs"); 1444 static ConstString g_sect_name_objc_selrefs("__objc_selrefs"); 1445 static ConstString g_sect_name_objc_classrefs("__objc_classrefs"); 1446 static ConstString g_sect_name_objc_superrefs("__objc_superrefs"); 1447 static ConstString g_sect_name_objc_const("__objc_const"); 1448 static ConstString g_sect_name_objc_classlist("__objc_classlist"); 1449 static ConstString g_sect_name_cfstring("__cfstring"); 1450 1451 static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev"); 1452 static ConstString g_sect_name_dwarf_debug_abbrev_dwo("__debug_abbrev.dwo"); 1453 static ConstString g_sect_name_dwarf_debug_addr("__debug_addr"); 1454 static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges"); 1455 static ConstString g_sect_name_dwarf_debug_cu_index("__debug_cu_index"); 1456 static ConstString g_sect_name_dwarf_debug_frame("__debug_frame"); 1457 static ConstString g_sect_name_dwarf_debug_info("__debug_info"); 1458 static ConstString g_sect_name_dwarf_debug_info_dwo("__debug_info.dwo"); 1459 static ConstString g_sect_name_dwarf_debug_line("__debug_line"); 1460 static ConstString g_sect_name_dwarf_debug_line_dwo("__debug_line.dwo"); 1461 static ConstString g_sect_name_dwarf_debug_line_str("__debug_line_str"); 1462 static ConstString g_sect_name_dwarf_debug_loc("__debug_loc"); 1463 static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists"); 1464 static ConstString g_sect_name_dwarf_debug_loclists_dwo("__debug_loclists.dwo"); 1465 static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo"); 1466 static ConstString g_sect_name_dwarf_debug_macro("__debug_macro"); 1467 static ConstString g_sect_name_dwarf_debug_macro_dwo("__debug_macro.dwo"); 1468 static ConstString g_sect_name_dwarf_debug_names("__debug_names"); 1469 static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames"); 1470 static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes"); 1471 static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges"); 1472 static ConstString g_sect_name_dwarf_debug_rnglists("__debug_rnglists"); 1473 static ConstString g_sect_name_dwarf_debug_str("__debug_str"); 1474 static ConstString g_sect_name_dwarf_debug_str_dwo("__debug_str.dwo"); 1475 static ConstString g_sect_name_dwarf_debug_str_offs("__debug_str_offs"); 1476 static ConstString g_sect_name_dwarf_debug_str_offs_dwo("__debug_str_offs.dwo"); 1477 static ConstString g_sect_name_dwarf_debug_tu_index("__debug_tu_index"); 1478 static ConstString g_sect_name_dwarf_debug_types("__debug_types"); 1479 static ConstString g_sect_name_dwarf_apple_names("__apple_names"); 1480 static ConstString g_sect_name_dwarf_apple_types("__apple_types"); 1481 static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac"); 1482 static ConstString g_sect_name_dwarf_apple_objc("__apple_objc"); 1483 static ConstString g_sect_name_eh_frame("__eh_frame"); 1484 static ConstString g_sect_name_compact_unwind("__unwind_info"); 1485 static ConstString g_sect_name_text("__text"); 1486 static ConstString g_sect_name_data("__data"); 1487 static ConstString g_sect_name_go_symtab("__gosymtab"); 1488 static ConstString g_sect_name_ctf("__ctf"); 1489 static ConstString g_sect_name_lldb_summaries("__lldbsummaries"); 1490 static ConstString g_sect_name_lldb_formatters("__lldbformatters"); 1491 static ConstString g_sect_name_swift_ast("__swift_ast"); 1492 1493 if (section_name == g_sect_name_dwarf_debug_abbrev) 1494 return eSectionTypeDWARFDebugAbbrev; 1495 if (section_name == g_sect_name_dwarf_debug_abbrev_dwo) 1496 return eSectionTypeDWARFDebugAbbrevDwo; 1497 if (section_name == g_sect_name_dwarf_debug_addr) 1498 return eSectionTypeDWARFDebugAddr; 1499 if (section_name == g_sect_name_dwarf_debug_aranges) 1500 return eSectionTypeDWARFDebugAranges; 1501 if (section_name == g_sect_name_dwarf_debug_cu_index) 1502 return eSectionTypeDWARFDebugCuIndex; 1503 if (section_name == g_sect_name_dwarf_debug_frame) 1504 return eSectionTypeDWARFDebugFrame; 1505 if (section_name == g_sect_name_dwarf_debug_info) 1506 return eSectionTypeDWARFDebugInfo; 1507 if (section_name == g_sect_name_dwarf_debug_info_dwo) 1508 return eSectionTypeDWARFDebugInfoDwo; 1509 if (section_name == g_sect_name_dwarf_debug_line) 1510 return eSectionTypeDWARFDebugLine; 1511 if (section_name == g_sect_name_dwarf_debug_line_dwo) 1512 return eSectionTypeDWARFDebugLine; // Same as debug_line. 1513 if (section_name == g_sect_name_dwarf_debug_line_str) 1514 return eSectionTypeDWARFDebugLineStr; 1515 if (section_name == g_sect_name_dwarf_debug_loc) 1516 return eSectionTypeDWARFDebugLoc; 1517 if (section_name == g_sect_name_dwarf_debug_loclists) 1518 return eSectionTypeDWARFDebugLocLists; 1519 if (section_name == g_sect_name_dwarf_debug_loclists_dwo) 1520 return eSectionTypeDWARFDebugLocListsDwo; 1521 if (section_name == g_sect_name_dwarf_debug_macinfo) 1522 return eSectionTypeDWARFDebugMacInfo; 1523 if (section_name == g_sect_name_dwarf_debug_macro) 1524 return eSectionTypeDWARFDebugMacro; 1525 if (section_name == g_sect_name_dwarf_debug_macro_dwo) 1526 return eSectionTypeDWARFDebugMacInfo; // Same as debug_macro. 1527 if (section_name == g_sect_name_dwarf_debug_names) 1528 return eSectionTypeDWARFDebugNames; 1529 if (section_name == g_sect_name_dwarf_debug_pubnames) 1530 return eSectionTypeDWARFDebugPubNames; 1531 if (section_name == g_sect_name_dwarf_debug_pubtypes) 1532 return eSectionTypeDWARFDebugPubTypes; 1533 if (section_name == g_sect_name_dwarf_debug_ranges) 1534 return eSectionTypeDWARFDebugRanges; 1535 if (section_name == g_sect_name_dwarf_debug_rnglists) 1536 return eSectionTypeDWARFDebugRngLists; 1537 if (section_name == g_sect_name_dwarf_debug_str) 1538 return eSectionTypeDWARFDebugStr; 1539 if (section_name == g_sect_name_dwarf_debug_str_dwo) 1540 return eSectionTypeDWARFDebugStrDwo; 1541 if (section_name == g_sect_name_dwarf_debug_str_offs) 1542 return eSectionTypeDWARFDebugStrOffsets; 1543 if (section_name == g_sect_name_dwarf_debug_str_offs_dwo) 1544 return eSectionTypeDWARFDebugStrOffsetsDwo; 1545 if (section_name == g_sect_name_dwarf_debug_tu_index) 1546 return eSectionTypeDWARFDebugTuIndex; 1547 if (section_name == g_sect_name_dwarf_debug_types) 1548 return eSectionTypeDWARFDebugTypes; 1549 if (section_name == g_sect_name_dwarf_apple_names) 1550 return eSectionTypeDWARFAppleNames; 1551 if (section_name == g_sect_name_dwarf_apple_types) 1552 return eSectionTypeDWARFAppleTypes; 1553 if (section_name == g_sect_name_dwarf_apple_namespaces) 1554 return eSectionTypeDWARFAppleNamespaces; 1555 if (section_name == g_sect_name_dwarf_apple_objc) 1556 return eSectionTypeDWARFAppleObjC; 1557 if (section_name == g_sect_name_objc_selrefs) 1558 return eSectionTypeDataCStringPointers; 1559 if (section_name == g_sect_name_objc_msgrefs) 1560 return eSectionTypeDataObjCMessageRefs; 1561 if (section_name == g_sect_name_eh_frame) 1562 return eSectionTypeEHFrame; 1563 if (section_name == g_sect_name_compact_unwind) 1564 return eSectionTypeCompactUnwind; 1565 if (section_name == g_sect_name_cfstring) 1566 return eSectionTypeDataObjCCFStrings; 1567 if (section_name == g_sect_name_go_symtab) 1568 return eSectionTypeGoSymtab; 1569 if (section_name == g_sect_name_ctf) 1570 return eSectionTypeCTF; 1571 if (section_name == g_sect_name_lldb_summaries) 1572 return lldb::eSectionTypeLLDBTypeSummaries; 1573 if (section_name == g_sect_name_lldb_formatters) 1574 return lldb::eSectionTypeLLDBFormatters; 1575 if (section_name == g_sect_name_swift_ast) 1576 return eSectionTypeSwiftModules; 1577 if (section_name == g_sect_name_objc_data || 1578 section_name == g_sect_name_objc_classrefs || 1579 section_name == g_sect_name_objc_superrefs || 1580 section_name == g_sect_name_objc_const || 1581 section_name == g_sect_name_objc_classlist) { 1582 return eSectionTypeDataPointers; 1583 } 1584 1585 switch (mach_sect_type) { 1586 // TODO: categorize sections by other flags for regular sections 1587 case S_REGULAR: 1588 if (section_name == g_sect_name_text) 1589 return eSectionTypeCode; 1590 if (section_name == g_sect_name_data) 1591 return eSectionTypeData; 1592 return eSectionTypeOther; 1593 case S_ZEROFILL: 1594 return eSectionTypeZeroFill; 1595 case S_CSTRING_LITERALS: // section with only literal C strings 1596 return eSectionTypeDataCString; 1597 case S_4BYTE_LITERALS: // section with only 4 byte literals 1598 return eSectionTypeData4; 1599 case S_8BYTE_LITERALS: // section with only 8 byte literals 1600 return eSectionTypeData8; 1601 case S_LITERAL_POINTERS: // section with only pointers to literals 1602 return eSectionTypeDataPointers; 1603 case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers 1604 return eSectionTypeDataPointers; 1605 case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers 1606 return eSectionTypeDataPointers; 1607 case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in 1608 // the reserved2 field 1609 return eSectionTypeCode; 1610 case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for 1611 // initialization 1612 return eSectionTypeDataPointers; 1613 case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for 1614 // termination 1615 return eSectionTypeDataPointers; 1616 case S_COALESCED: 1617 return eSectionTypeOther; 1618 case S_GB_ZEROFILL: 1619 return eSectionTypeZeroFill; 1620 case S_INTERPOSING: // section with only pairs of function pointers for 1621 // interposing 1622 return eSectionTypeCode; 1623 case S_16BYTE_LITERALS: // section with only 16 byte literals 1624 return eSectionTypeData16; 1625 case S_DTRACE_DOF: 1626 return eSectionTypeDebug; 1627 case S_LAZY_DYLIB_SYMBOL_POINTERS: 1628 return eSectionTypeDataPointers; 1629 default: 1630 return eSectionTypeOther; 1631 } 1632 } 1633 1634 struct ObjectFileMachO::SegmentParsingContext { 1635 const EncryptedFileRanges EncryptedRanges; 1636 lldb_private::SectionList &UnifiedList; 1637 uint32_t NextSegmentIdx = 0; 1638 uint32_t NextSectionIdx = 0; 1639 bool FileAddressesChanged = false; 1640 1641 SegmentParsingContext(EncryptedFileRanges EncryptedRanges, 1642 lldb_private::SectionList &UnifiedList) 1643 : EncryptedRanges(std::move(EncryptedRanges)), UnifiedList(UnifiedList) {} 1644 }; 1645 1646 void ObjectFileMachO::ProcessSegmentCommand( 1647 const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset, 1648 uint32_t cmd_idx, SegmentParsingContext &context) { 1649 llvm::MachO::segment_command_64 load_cmd; 1650 memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_)); 1651 1652 if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16)) 1653 return; 1654 1655 ModuleSP module_sp = GetModule(); 1656 const bool is_core = GetType() == eTypeCoreFile; 1657 const bool is_dsym = (m_header.filetype == MH_DSYM); 1658 bool add_section = true; 1659 bool add_to_unified = true; 1660 ConstString const_segname( 1661 load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname))); 1662 1663 SectionSP unified_section_sp( 1664 context.UnifiedList.FindSectionByName(const_segname)); 1665 if (is_dsym && unified_section_sp) { 1666 if (const_segname == GetSegmentNameLINKEDIT()) { 1667 // We need to keep the __LINKEDIT segment private to this object file 1668 // only 1669 add_to_unified = false; 1670 } else { 1671 // This is the dSYM file and this section has already been created by the 1672 // object file, no need to create it. 1673 add_section = false; 1674 } 1675 } 1676 load_cmd.vmaddr = m_data.GetAddress(&offset); 1677 load_cmd.vmsize = m_data.GetAddress(&offset); 1678 load_cmd.fileoff = m_data.GetAddress(&offset); 1679 load_cmd.filesize = m_data.GetAddress(&offset); 1680 if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4)) 1681 return; 1682 1683 SanitizeSegmentCommand(load_cmd, cmd_idx); 1684 1685 const uint32_t segment_permissions = GetSegmentPermissions(load_cmd); 1686 const bool segment_is_encrypted = 1687 (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0; 1688 1689 // Use a segment ID of the segment index shifted left by 8 so they never 1690 // conflict with any of the sections. 1691 SectionSP segment_sp; 1692 if (add_section && (const_segname || is_core)) { 1693 segment_sp = std::make_shared<Section>( 1694 module_sp, // Module to which this section belongs 1695 this, // Object file to which this sections belongs 1696 ++context.NextSegmentIdx 1697 << 8, // Section ID is the 1 based segment index 1698 // shifted right by 8 bits as not to collide with any of the 256 1699 // section IDs that are possible 1700 const_segname, // Name of this section 1701 eSectionTypeContainer, // This section is a container of other 1702 // sections. 1703 load_cmd.vmaddr, // File VM address == addresses as they are 1704 // found in the object file 1705 load_cmd.vmsize, // VM size in bytes of this section 1706 load_cmd.fileoff, // Offset to the data for this section in 1707 // the file 1708 load_cmd.filesize, // Size in bytes of this section as found 1709 // in the file 1710 0, // Segments have no alignment information 1711 load_cmd.flags); // Flags for this section 1712 1713 segment_sp->SetIsEncrypted(segment_is_encrypted); 1714 m_sections_up->AddSection(segment_sp); 1715 segment_sp->SetPermissions(segment_permissions); 1716 if (add_to_unified) 1717 context.UnifiedList.AddSection(segment_sp); 1718 } else if (unified_section_sp) { 1719 // If this is a dSYM and the file addresses in the dSYM differ from the 1720 // file addresses in the ObjectFile, we must use the file base address for 1721 // the Section from the dSYM for the DWARF to resolve correctly. 1722 // This only happens with binaries in the shared cache in practice; 1723 // normally a mismatch like this would give a binary & dSYM that do not 1724 // match UUIDs. When a binary is included in the shared cache, its 1725 // segments are rearranged to optimize the shared cache, so its file 1726 // addresses will differ from what the ObjectFile had originally, 1727 // and what the dSYM has. 1728 if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) { 1729 Log *log = GetLog(LLDBLog::Symbols); 1730 if (log) { 1731 log->Printf( 1732 "Installing dSYM's %s segment file address over ObjectFile's " 1733 "so symbol table/debug info resolves correctly for %s", 1734 const_segname.AsCString(), 1735 module_sp->GetFileSpec().GetFilename().AsCString()); 1736 } 1737 1738 // Make sure we've parsed the symbol table from the ObjectFile before 1739 // we go around changing its Sections. 1740 module_sp->GetObjectFile()->GetSymtab(); 1741 // eh_frame would present the same problems but we parse that on a per- 1742 // function basis as-needed so it's more difficult to remove its use of 1743 // the Sections. Realistically, the environments where this code path 1744 // will be taken will not have eh_frame sections. 1745 1746 unified_section_sp->SetFileAddress(load_cmd.vmaddr); 1747 1748 // Notify the module that the section addresses have been changed once 1749 // we're done so any file-address caches can be updated. 1750 context.FileAddressesChanged = true; 1751 } 1752 m_sections_up->AddSection(unified_section_sp); 1753 } 1754 1755 llvm::MachO::section_64 sect64; 1756 ::memset(§64, 0, sizeof(sect64)); 1757 // Push a section into our mach sections for the section at index zero 1758 // (NO_SECT) if we don't have any mach sections yet... 1759 if (m_mach_sections.empty()) 1760 m_mach_sections.push_back(sect64); 1761 uint32_t segment_sect_idx; 1762 const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1; 1763 1764 const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8; 1765 for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects; 1766 ++segment_sect_idx) { 1767 if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname, 1768 sizeof(sect64.sectname)) == nullptr) 1769 break; 1770 if (m_data.GetU8(&offset, (uint8_t *)sect64.segname, 1771 sizeof(sect64.segname)) == nullptr) 1772 break; 1773 sect64.addr = m_data.GetAddress(&offset); 1774 sect64.size = m_data.GetAddress(&offset); 1775 1776 if (m_data.GetU32(&offset, §64.offset, num_u32s) == nullptr) 1777 break; 1778 1779 if (IsSharedCacheBinary() && !IsInMemory()) { 1780 sect64.offset = sect64.addr - m_text_address; 1781 } 1782 1783 // Keep a list of mach sections around in case we need to get at data that 1784 // isn't stored in the abstracted Sections. 1785 m_mach_sections.push_back(sect64); 1786 1787 if (add_section) { 1788 ConstString section_name( 1789 sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname))); 1790 if (!const_segname) { 1791 // We have a segment with no name so we need to conjure up segments 1792 // that correspond to the section's segname if there isn't already such 1793 // a section. If there is such a section, we resize the section so that 1794 // it spans all sections. We also mark these sections as fake so 1795 // address matches don't hit if they land in the gaps between the child 1796 // sections. 1797 const_segname.SetTrimmedCStringWithLength(sect64.segname, 1798 sizeof(sect64.segname)); 1799 segment_sp = context.UnifiedList.FindSectionByName(const_segname); 1800 if (segment_sp.get()) { 1801 Section *segment = segment_sp.get(); 1802 // Grow the section size as needed. 1803 const lldb::addr_t sect64_min_addr = sect64.addr; 1804 const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size; 1805 const lldb::addr_t curr_seg_byte_size = segment->GetByteSize(); 1806 const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress(); 1807 const lldb::addr_t curr_seg_max_addr = 1808 curr_seg_min_addr + curr_seg_byte_size; 1809 if (sect64_min_addr >= curr_seg_min_addr) { 1810 const lldb::addr_t new_seg_byte_size = 1811 sect64_max_addr - curr_seg_min_addr; 1812 // Only grow the section size if needed 1813 if (new_seg_byte_size > curr_seg_byte_size) 1814 segment->SetByteSize(new_seg_byte_size); 1815 } else { 1816 // We need to change the base address of the segment and adjust the 1817 // child section offsets for all existing children. 1818 const lldb::addr_t slide_amount = 1819 sect64_min_addr - curr_seg_min_addr; 1820 segment->Slide(slide_amount, false); 1821 segment->GetChildren().Slide(-slide_amount, false); 1822 segment->SetByteSize(curr_seg_max_addr - sect64_min_addr); 1823 } 1824 1825 // Grow the section size as needed. 1826 if (sect64.offset) { 1827 const lldb::addr_t segment_min_file_offset = 1828 segment->GetFileOffset(); 1829 const lldb::addr_t segment_max_file_offset = 1830 segment_min_file_offset + segment->GetFileSize(); 1831 1832 const lldb::addr_t section_min_file_offset = sect64.offset; 1833 const lldb::addr_t section_max_file_offset = 1834 section_min_file_offset + sect64.size; 1835 const lldb::addr_t new_file_offset = 1836 std::min(section_min_file_offset, segment_min_file_offset); 1837 const lldb::addr_t new_file_size = 1838 std::max(section_max_file_offset, segment_max_file_offset) - 1839 new_file_offset; 1840 segment->SetFileOffset(new_file_offset); 1841 segment->SetFileSize(new_file_size); 1842 } 1843 } else { 1844 // Create a fake section for the section's named segment 1845 segment_sp = std::make_shared<Section>( 1846 segment_sp, // Parent section 1847 module_sp, // Module to which this section belongs 1848 this, // Object file to which this section belongs 1849 ++context.NextSegmentIdx 1850 << 8, // Section ID is the 1 based segment index 1851 // shifted right by 8 bits as not to 1852 // collide with any of the 256 section IDs 1853 // that are possible 1854 const_segname, // Name of this section 1855 eSectionTypeContainer, // This section is a container of 1856 // other sections. 1857 sect64.addr, // File VM address == addresses as they are 1858 // found in the object file 1859 sect64.size, // VM size in bytes of this section 1860 sect64.offset, // Offset to the data for this section in 1861 // the file 1862 sect64.offset ? sect64.size : 0, // Size in bytes of 1863 // this section as 1864 // found in the file 1865 sect64.align, 1866 load_cmd.flags); // Flags for this section 1867 segment_sp->SetIsFake(true); 1868 segment_sp->SetPermissions(segment_permissions); 1869 m_sections_up->AddSection(segment_sp); 1870 if (add_to_unified) 1871 context.UnifiedList.AddSection(segment_sp); 1872 segment_sp->SetIsEncrypted(segment_is_encrypted); 1873 } 1874 } 1875 assert(segment_sp.get()); 1876 1877 lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name); 1878 1879 SectionSP section_sp(new Section( 1880 segment_sp, module_sp, this, ++context.NextSectionIdx, section_name, 1881 sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size, 1882 sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align, 1883 sect64.flags)); 1884 // Set the section to be encrypted to match the segment 1885 1886 bool section_is_encrypted = false; 1887 if (!segment_is_encrypted && load_cmd.filesize != 0) 1888 section_is_encrypted = context.EncryptedRanges.FindEntryThatContains( 1889 sect64.offset) != nullptr; 1890 1891 section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted); 1892 section_sp->SetPermissions(segment_permissions); 1893 segment_sp->GetChildren().AddSection(section_sp); 1894 1895 if (segment_sp->IsFake()) { 1896 segment_sp.reset(); 1897 const_segname.Clear(); 1898 } 1899 } 1900 } 1901 if (segment_sp && is_dsym) { 1902 if (first_segment_sectID <= context.NextSectionIdx) { 1903 lldb::user_id_t sect_uid; 1904 for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx; 1905 ++sect_uid) { 1906 SectionSP curr_section_sp( 1907 segment_sp->GetChildren().FindSectionByID(sect_uid)); 1908 SectionSP next_section_sp; 1909 if (sect_uid + 1 <= context.NextSectionIdx) 1910 next_section_sp = 1911 segment_sp->GetChildren().FindSectionByID(sect_uid + 1); 1912 1913 if (curr_section_sp.get()) { 1914 if (curr_section_sp->GetByteSize() == 0) { 1915 if (next_section_sp.get() != nullptr) 1916 curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() - 1917 curr_section_sp->GetFileAddress()); 1918 else 1919 curr_section_sp->SetByteSize(load_cmd.vmsize); 1920 } 1921 } 1922 } 1923 } 1924 } 1925 } 1926 1927 void ObjectFileMachO::ProcessDysymtabCommand( 1928 const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) { 1929 m_dysymtab.cmd = load_cmd.cmd; 1930 m_dysymtab.cmdsize = load_cmd.cmdsize; 1931 m_data.GetU32(&offset, &m_dysymtab.ilocalsym, 1932 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2); 1933 } 1934 1935 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) { 1936 if (m_sections_up) 1937 return; 1938 1939 m_sections_up = std::make_unique<SectionList>(); 1940 1941 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 1942 // bool dump_sections = false; 1943 ModuleSP module_sp(GetModule()); 1944 1945 offset = MachHeaderSizeFromMagic(m_header.magic); 1946 1947 SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list); 1948 llvm::MachO::load_command load_cmd; 1949 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 1950 const lldb::offset_t load_cmd_offset = offset; 1951 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 1952 break; 1953 1954 if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64) 1955 ProcessSegmentCommand(load_cmd, offset, i, context); 1956 else if (load_cmd.cmd == LC_DYSYMTAB) 1957 ProcessDysymtabCommand(load_cmd, offset); 1958 1959 offset = load_cmd_offset + load_cmd.cmdsize; 1960 } 1961 1962 if (context.FileAddressesChanged && module_sp) 1963 module_sp->SectionFileAddressesChanged(); 1964 } 1965 1966 class MachSymtabSectionInfo { 1967 public: 1968 MachSymtabSectionInfo(SectionList *section_list) 1969 : m_section_list(section_list), m_section_infos() { 1970 // Get the number of sections down to a depth of 1 to include all segments 1971 // and their sections, but no other sections that may be added for debug 1972 // map or 1973 m_section_infos.resize(section_list->GetNumSections(1)); 1974 } 1975 1976 SectionSP GetSection(uint8_t n_sect, addr_t file_addr) { 1977 if (n_sect == 0) 1978 return SectionSP(); 1979 if (n_sect < m_section_infos.size()) { 1980 if (!m_section_infos[n_sect].section_sp) { 1981 SectionSP section_sp(m_section_list->FindSectionByID(n_sect)); 1982 m_section_infos[n_sect].section_sp = section_sp; 1983 if (section_sp) { 1984 m_section_infos[n_sect].vm_range.SetBaseAddress( 1985 section_sp->GetFileAddress()); 1986 m_section_infos[n_sect].vm_range.SetByteSize( 1987 section_sp->GetByteSize()); 1988 } else { 1989 std::string filename = "<unknown>"; 1990 SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0)); 1991 if (first_section_sp) 1992 filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath(); 1993 1994 Debugger::ReportError( 1995 llvm::formatv("unable to find section {0} for a symbol in " 1996 "{1}, corrupt file?", 1997 n_sect, filename)); 1998 } 1999 } 2000 if (m_section_infos[n_sect].vm_range.Contains(file_addr)) { 2001 // Symbol is in section. 2002 return m_section_infos[n_sect].section_sp; 2003 } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 && 2004 m_section_infos[n_sect].vm_range.GetBaseAddress() == 2005 file_addr) { 2006 // Symbol is in section with zero size, but has the same start address 2007 // as the section. This can happen with linker symbols (symbols that 2008 // start with the letter 'l' or 'L'. 2009 return m_section_infos[n_sect].section_sp; 2010 } 2011 } 2012 return m_section_list->FindSectionContainingFileAddress(file_addr); 2013 } 2014 2015 protected: 2016 struct SectionInfo { 2017 SectionInfo() : vm_range(), section_sp() {} 2018 2019 VMRange vm_range; 2020 SectionSP section_sp; 2021 }; 2022 SectionList *m_section_list; 2023 std::vector<SectionInfo> m_section_infos; 2024 }; 2025 2026 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63) 2027 struct TrieEntry { 2028 void Dump() const { 2029 printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"", 2030 static_cast<unsigned long long>(address), 2031 static_cast<unsigned long long>(flags), 2032 static_cast<unsigned long long>(other), name.GetCString()); 2033 if (import_name) 2034 printf(" -> \"%s\"\n", import_name.GetCString()); 2035 else 2036 printf("\n"); 2037 } 2038 ConstString name; 2039 uint64_t address = LLDB_INVALID_ADDRESS; 2040 uint64_t flags = 2041 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, 2042 // TRIE_SYMBOL_IS_THUMB 2043 uint64_t other = 0; 2044 ConstString import_name; 2045 }; 2046 2047 struct TrieEntryWithOffset { 2048 lldb::offset_t nodeOffset; 2049 TrieEntry entry; 2050 2051 TrieEntryWithOffset(lldb::offset_t offset) : nodeOffset(offset), entry() {} 2052 2053 void Dump(uint32_t idx) const { 2054 printf("[%3u] 0x%16.16llx: ", idx, 2055 static_cast<unsigned long long>(nodeOffset)); 2056 entry.Dump(); 2057 } 2058 2059 bool operator<(const TrieEntryWithOffset &other) const { 2060 return (nodeOffset < other.nodeOffset); 2061 } 2062 }; 2063 2064 static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset, 2065 const bool is_arm, addr_t text_seg_base_addr, 2066 std::vector<llvm::StringRef> &nameSlices, 2067 std::set<lldb::addr_t> &resolver_addresses, 2068 std::vector<TrieEntryWithOffset> &reexports, 2069 std::vector<TrieEntryWithOffset> &ext_symbols) { 2070 if (!data.ValidOffset(offset)) 2071 return true; 2072 2073 // Terminal node -- end of a branch, possibly add this to 2074 // the symbol table or resolver table. 2075 const uint64_t terminalSize = data.GetULEB128(&offset); 2076 lldb::offset_t children_offset = offset + terminalSize; 2077 if (terminalSize != 0) { 2078 TrieEntryWithOffset e(offset); 2079 e.entry.flags = data.GetULEB128(&offset); 2080 const char *import_name = nullptr; 2081 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { 2082 e.entry.address = 0; 2083 e.entry.other = data.GetULEB128(&offset); // dylib ordinal 2084 import_name = data.GetCStr(&offset); 2085 } else { 2086 e.entry.address = data.GetULEB128(&offset); 2087 if (text_seg_base_addr != LLDB_INVALID_ADDRESS) 2088 e.entry.address += text_seg_base_addr; 2089 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 2090 e.entry.other = data.GetULEB128(&offset); 2091 uint64_t resolver_addr = e.entry.other; 2092 if (text_seg_base_addr != LLDB_INVALID_ADDRESS) 2093 resolver_addr += text_seg_base_addr; 2094 if (is_arm) 2095 resolver_addr &= THUMB_ADDRESS_BIT_MASK; 2096 resolver_addresses.insert(resolver_addr); 2097 } else 2098 e.entry.other = 0; 2099 } 2100 bool add_this_entry = false; 2101 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) && 2102 import_name && import_name[0]) { 2103 // add symbols that are reexport symbols with a valid import name. 2104 add_this_entry = true; 2105 } else if (e.entry.flags == 0 && 2106 (import_name == nullptr || import_name[0] == '\0')) { 2107 // add externally visible symbols, in case the nlist record has 2108 // been stripped/omitted. 2109 add_this_entry = true; 2110 } 2111 if (add_this_entry) { 2112 std::string name; 2113 if (!nameSlices.empty()) { 2114 for (auto name_slice : nameSlices) 2115 name.append(name_slice.data(), name_slice.size()); 2116 } 2117 if (name.size() > 1) { 2118 // Skip the leading '_' 2119 e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1); 2120 } 2121 if (import_name) { 2122 // Skip the leading '_' 2123 e.entry.import_name.SetCString(import_name + 1); 2124 } 2125 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) { 2126 reexports.push_back(e); 2127 } else { 2128 if (is_arm && (e.entry.address & 1)) { 2129 e.entry.flags |= TRIE_SYMBOL_IS_THUMB; 2130 e.entry.address &= THUMB_ADDRESS_BIT_MASK; 2131 } 2132 ext_symbols.push_back(e); 2133 } 2134 } 2135 } 2136 2137 const uint8_t childrenCount = data.GetU8(&children_offset); 2138 for (uint8_t i = 0; i < childrenCount; ++i) { 2139 const char *cstr = data.GetCStr(&children_offset); 2140 if (cstr) 2141 nameSlices.push_back(llvm::StringRef(cstr)); 2142 else 2143 return false; // Corrupt data 2144 lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset); 2145 if (childNodeOffset) { 2146 if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr, 2147 nameSlices, resolver_addresses, reexports, 2148 ext_symbols)) { 2149 return false; 2150 } 2151 } 2152 nameSlices.pop_back(); 2153 } 2154 return true; 2155 } 2156 2157 static SymbolType GetSymbolType(const char *&symbol_name, 2158 bool &demangled_is_synthesized, 2159 const SectionSP &text_section_sp, 2160 const SectionSP &data_section_sp, 2161 const SectionSP &data_dirty_section_sp, 2162 const SectionSP &data_const_section_sp, 2163 const SectionSP &symbol_section) { 2164 SymbolType type = eSymbolTypeInvalid; 2165 2166 const char *symbol_sect_name = symbol_section->GetName().AsCString(); 2167 if (symbol_section->IsDescendant(text_section_sp.get())) { 2168 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS | 2169 S_ATTR_SELF_MODIFYING_CODE | 2170 S_ATTR_SOME_INSTRUCTIONS)) 2171 type = eSymbolTypeData; 2172 else 2173 type = eSymbolTypeCode; 2174 } else if (symbol_section->IsDescendant(data_section_sp.get()) || 2175 symbol_section->IsDescendant(data_dirty_section_sp.get()) || 2176 symbol_section->IsDescendant(data_const_section_sp.get())) { 2177 if (symbol_sect_name && 2178 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) { 2179 type = eSymbolTypeRuntime; 2180 2181 if (symbol_name) { 2182 llvm::StringRef symbol_name_ref(symbol_name); 2183 if (symbol_name_ref.starts_with("OBJC_")) { 2184 static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_"); 2185 static const llvm::StringRef g_objc_v2_prefix_metaclass( 2186 "OBJC_METACLASS_$_"); 2187 static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_"); 2188 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 2189 symbol_name = symbol_name + g_objc_v2_prefix_class.size(); 2190 type = eSymbolTypeObjCClass; 2191 demangled_is_synthesized = true; 2192 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_metaclass)) { 2193 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size(); 2194 type = eSymbolTypeObjCMetaClass; 2195 demangled_is_synthesized = true; 2196 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) { 2197 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size(); 2198 type = eSymbolTypeObjCIVar; 2199 demangled_is_synthesized = true; 2200 } 2201 } 2202 } 2203 } else if (symbol_sect_name && 2204 ::strstr(symbol_sect_name, "__gcc_except_tab") == 2205 symbol_sect_name) { 2206 type = eSymbolTypeException; 2207 } else { 2208 type = eSymbolTypeData; 2209 } 2210 } else if (symbol_sect_name && 2211 ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) { 2212 type = eSymbolTypeTrampoline; 2213 } 2214 return type; 2215 } 2216 2217 static std::optional<struct nlist_64> 2218 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset, 2219 size_t nlist_byte_size) { 2220 struct nlist_64 nlist; 2221 if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size)) 2222 return {}; 2223 nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset); 2224 nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset); 2225 nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset); 2226 nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset); 2227 nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset); 2228 return nlist; 2229 } 2230 2231 enum { DebugSymbols = true, NonDebugSymbols = false }; 2232 2233 void ObjectFileMachO::ParseSymtab(Symtab &symtab) { 2234 ModuleSP module_sp(GetModule()); 2235 if (!module_sp) 2236 return; 2237 2238 Log *log = GetLog(LLDBLog::Symbols); 2239 2240 const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec(); 2241 const char *file_name = file.GetFilename().AsCString("<Unknown>"); 2242 LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name); 2243 LLDB_LOG(log, "Parsing symbol table for {0}", file_name); 2244 Progress progress("Parsing symbol table", file_name); 2245 2246 llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0}; 2247 llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0}; 2248 llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 2249 llvm::MachO::dysymtab_command dysymtab = m_dysymtab; 2250 SymtabCommandLargeOffsets symtab_load_command; 2251 // The data element of type bool indicates that this entry is thumb 2252 // code. 2253 typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts; 2254 2255 // Record the address of every function/data that we add to the symtab. 2256 // We add symbols to the table in the order of most information (nlist 2257 // records) to least (function starts), and avoid duplicating symbols 2258 // via this set. 2259 llvm::DenseSet<addr_t> symbols_added; 2260 2261 // We are using a llvm::DenseSet for "symbols_added" so we must be sure we 2262 // do not add the tombstone or empty keys to the set. 2263 auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) { 2264 // Don't add the tombstone or empty keys. 2265 if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1) 2266 return; 2267 symbols_added.insert(file_addr); 2268 }; 2269 FunctionStarts function_starts; 2270 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 2271 uint32_t i; 2272 FileSpecList dylib_files; 2273 llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_"); 2274 llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_"); 2275 llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_"); 2276 UUID image_uuid; 2277 2278 for (i = 0; i < m_header.ncmds; ++i) { 2279 const lldb::offset_t cmd_offset = offset; 2280 // Read in the load command and load command size 2281 llvm::MachO::load_command lc; 2282 if (m_data.GetU32(&offset, &lc, 2) == nullptr) 2283 break; 2284 // Watch for the symbol table load command 2285 switch (lc.cmd) { 2286 case LC_SYMTAB: 2287 // struct symtab_command { 2288 // uint32_t cmd; /* LC_SYMTAB */ 2289 // uint32_t cmdsize; /* sizeof(struct symtab_command) */ 2290 // uint32_t symoff; /* symbol table offset */ 2291 // uint32_t nsyms; /* number of symbol table entries */ 2292 // uint32_t stroff; /* string table offset */ 2293 // uint32_t strsize; /* string table size in bytes */ 2294 // }; 2295 symtab_load_command.cmd = lc.cmd; 2296 symtab_load_command.cmdsize = lc.cmdsize; 2297 symtab_load_command.symoff = m_data.GetU32(&offset); 2298 symtab_load_command.nsyms = m_data.GetU32(&offset); 2299 symtab_load_command.stroff = m_data.GetU32(&offset); 2300 symtab_load_command.strsize = m_data.GetU32(&offset); 2301 break; 2302 2303 case LC_DYLD_INFO: 2304 case LC_DYLD_INFO_ONLY: 2305 if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) { 2306 dyld_info.cmd = lc.cmd; 2307 dyld_info.cmdsize = lc.cmdsize; 2308 } else { 2309 memset(&dyld_info, 0, sizeof(dyld_info)); 2310 } 2311 break; 2312 2313 case LC_LOAD_DYLIB: 2314 case LC_LOAD_WEAK_DYLIB: 2315 case LC_REEXPORT_DYLIB: 2316 case LC_LOADFVMLIB: 2317 case LC_LOAD_UPWARD_DYLIB: { 2318 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset); 2319 const char *path = m_data.PeekCStr(name_offset); 2320 if (path) { 2321 FileSpec file_spec(path); 2322 // Strip the path if there is @rpath, @executable, etc so we just use 2323 // the basename 2324 if (path[0] == '@') 2325 file_spec.ClearDirectory(); 2326 2327 if (lc.cmd == LC_REEXPORT_DYLIB) { 2328 m_reexported_dylibs.AppendIfUnique(file_spec); 2329 } 2330 2331 dylib_files.Append(file_spec); 2332 } 2333 } break; 2334 2335 case LC_DYLD_EXPORTS_TRIE: 2336 exports_trie_load_command.cmd = lc.cmd; 2337 exports_trie_load_command.cmdsize = lc.cmdsize; 2338 if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) == 2339 nullptr) // fill in offset and size fields 2340 memset(&exports_trie_load_command, 0, 2341 sizeof(exports_trie_load_command)); 2342 break; 2343 case LC_FUNCTION_STARTS: 2344 function_starts_load_command.cmd = lc.cmd; 2345 function_starts_load_command.cmdsize = lc.cmdsize; 2346 if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) == 2347 nullptr) // fill in data offset and size fields 2348 memset(&function_starts_load_command, 0, 2349 sizeof(function_starts_load_command)); 2350 break; 2351 2352 case LC_UUID: { 2353 const uint8_t *uuid_bytes = m_data.PeekData(offset, 16); 2354 2355 if (uuid_bytes) 2356 image_uuid = UUID(uuid_bytes, 16); 2357 break; 2358 } 2359 2360 default: 2361 break; 2362 } 2363 offset = cmd_offset + lc.cmdsize; 2364 } 2365 2366 if (!symtab_load_command.cmd) 2367 return; 2368 2369 SectionList *section_list = GetSectionList(); 2370 if (section_list == nullptr) 2371 return; 2372 2373 const uint32_t addr_byte_size = m_data.GetAddressByteSize(); 2374 const ByteOrder byte_order = m_data.GetByteOrder(); 2375 bool bit_width_32 = addr_byte_size == 4; 2376 const size_t nlist_byte_size = 2377 bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64); 2378 2379 DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size); 2380 DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size); 2381 DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size); 2382 DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order, 2383 addr_byte_size); 2384 DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size); 2385 2386 const addr_t nlist_data_byte_size = 2387 symtab_load_command.nsyms * nlist_byte_size; 2388 const addr_t strtab_data_byte_size = symtab_load_command.strsize; 2389 addr_t strtab_addr = LLDB_INVALID_ADDRESS; 2390 2391 ProcessSP process_sp(m_process_wp.lock()); 2392 Process *process = process_sp.get(); 2393 2394 uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete; 2395 bool is_shared_cache_image = IsSharedCacheBinary(); 2396 bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory(); 2397 SectionSP linkedit_section_sp( 2398 section_list->FindSectionByName(GetSegmentNameLINKEDIT())); 2399 2400 if (process && m_header.filetype != llvm::MachO::MH_OBJECT && 2401 !is_local_shared_cache_image) { 2402 Target &target = process->GetTarget(); 2403 2404 memory_module_load_level = target.GetMemoryModuleLoadLevel(); 2405 2406 // Reading mach file from memory in a process or core file... 2407 2408 if (linkedit_section_sp) { 2409 addr_t linkedit_load_addr = 2410 linkedit_section_sp->GetLoadBaseAddress(&target); 2411 if (linkedit_load_addr == LLDB_INVALID_ADDRESS) { 2412 // We might be trying to access the symbol table before the 2413 // __LINKEDIT's load address has been set in the target. We can't 2414 // fail to read the symbol table, so calculate the right address 2415 // manually 2416 linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage( 2417 m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get()); 2418 } 2419 2420 const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset(); 2421 const addr_t symoff_addr = linkedit_load_addr + 2422 symtab_load_command.symoff - 2423 linkedit_file_offset; 2424 strtab_addr = linkedit_load_addr + symtab_load_command.stroff - 2425 linkedit_file_offset; 2426 2427 // Always load dyld - the dynamic linker - from memory if we didn't 2428 // find a binary anywhere else. lldb will not register 2429 // dylib/framework/bundle loads/unloads if we don't have the dyld 2430 // symbols, we force dyld to load from memory despite the user's 2431 // target.memory-module-load-level setting. 2432 if (memory_module_load_level == eMemoryModuleLoadLevelComplete || 2433 m_header.filetype == llvm::MachO::MH_DYLINKER) { 2434 DataBufferSP nlist_data_sp( 2435 ReadMemory(process_sp, symoff_addr, nlist_data_byte_size)); 2436 if (nlist_data_sp) 2437 nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize()); 2438 if (dysymtab.nindirectsyms != 0) { 2439 const addr_t indirect_syms_addr = linkedit_load_addr + 2440 dysymtab.indirectsymoff - 2441 linkedit_file_offset; 2442 DataBufferSP indirect_syms_data_sp(ReadMemory( 2443 process_sp, indirect_syms_addr, dysymtab.nindirectsyms * 4)); 2444 if (indirect_syms_data_sp) 2445 indirect_symbol_index_data.SetData( 2446 indirect_syms_data_sp, 0, indirect_syms_data_sp->GetByteSize()); 2447 // If this binary is outside the shared cache, 2448 // cache the string table. 2449 // Binaries in the shared cache all share a giant string table, 2450 // and we can't share the string tables across multiple 2451 // ObjectFileMachO's, so we'd end up re-reading this mega-strtab 2452 // for every binary in the shared cache - it would be a big perf 2453 // problem. For binaries outside the shared cache, it's faster to 2454 // read the entire strtab at once instead of piece-by-piece as we 2455 // process the nlist records. 2456 if (!is_shared_cache_image) { 2457 DataBufferSP strtab_data_sp( 2458 ReadMemory(process_sp, strtab_addr, strtab_data_byte_size)); 2459 if (strtab_data_sp) { 2460 strtab_data.SetData(strtab_data_sp, 0, 2461 strtab_data_sp->GetByteSize()); 2462 } 2463 } 2464 } 2465 if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) { 2466 if (function_starts_load_command.cmd) { 2467 const addr_t func_start_addr = 2468 linkedit_load_addr + function_starts_load_command.dataoff - 2469 linkedit_file_offset; 2470 DataBufferSP func_start_data_sp( 2471 ReadMemory(process_sp, func_start_addr, 2472 function_starts_load_command.datasize)); 2473 if (func_start_data_sp) 2474 function_starts_data.SetData(func_start_data_sp, 0, 2475 func_start_data_sp->GetByteSize()); 2476 } 2477 } 2478 } 2479 } 2480 } else { 2481 if (is_local_shared_cache_image) { 2482 // The load commands in shared cache images are relative to the 2483 // beginning of the shared cache, not the library image. The 2484 // data we get handed when creating the ObjectFileMachO starts 2485 // at the beginning of a specific library and spans to the end 2486 // of the cache to be able to reach the shared LINKEDIT 2487 // segments. We need to convert the load command offsets to be 2488 // relative to the beginning of our specific image. 2489 lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset(); 2490 lldb::offset_t linkedit_slide = 2491 linkedit_offset - m_linkedit_original_offset; 2492 symtab_load_command.symoff += linkedit_slide; 2493 symtab_load_command.stroff += linkedit_slide; 2494 dyld_info.export_off += linkedit_slide; 2495 dysymtab.indirectsymoff += linkedit_slide; 2496 function_starts_load_command.dataoff += linkedit_slide; 2497 exports_trie_load_command.dataoff += linkedit_slide; 2498 } 2499 2500 nlist_data.SetData(m_data, symtab_load_command.symoff, 2501 nlist_data_byte_size); 2502 strtab_data.SetData(m_data, symtab_load_command.stroff, 2503 strtab_data_byte_size); 2504 2505 // We shouldn't have exports data from both the LC_DYLD_INFO command 2506 // AND the LC_DYLD_EXPORTS_TRIE command in the same binary: 2507 lldbassert(!((dyld_info.export_size > 0) 2508 && (exports_trie_load_command.datasize > 0))); 2509 if (dyld_info.export_size > 0) { 2510 dyld_trie_data.SetData(m_data, dyld_info.export_off, 2511 dyld_info.export_size); 2512 } else if (exports_trie_load_command.datasize > 0) { 2513 dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff, 2514 exports_trie_load_command.datasize); 2515 } 2516 2517 if (dysymtab.nindirectsyms != 0) { 2518 indirect_symbol_index_data.SetData(m_data, dysymtab.indirectsymoff, 2519 dysymtab.nindirectsyms * 4); 2520 } 2521 if (function_starts_load_command.cmd) { 2522 function_starts_data.SetData(m_data, function_starts_load_command.dataoff, 2523 function_starts_load_command.datasize); 2524 } 2525 } 2526 2527 const bool have_strtab_data = strtab_data.GetByteSize() > 0; 2528 2529 ConstString g_segment_name_TEXT = GetSegmentNameTEXT(); 2530 ConstString g_segment_name_DATA = GetSegmentNameDATA(); 2531 ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY(); 2532 ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST(); 2533 ConstString g_segment_name_OBJC = GetSegmentNameOBJC(); 2534 ConstString g_section_name_eh_frame = GetSectionNameEHFrame(); 2535 SectionSP text_section_sp( 2536 section_list->FindSectionByName(g_segment_name_TEXT)); 2537 SectionSP data_section_sp( 2538 section_list->FindSectionByName(g_segment_name_DATA)); 2539 SectionSP data_dirty_section_sp( 2540 section_list->FindSectionByName(g_segment_name_DATA_DIRTY)); 2541 SectionSP data_const_section_sp( 2542 section_list->FindSectionByName(g_segment_name_DATA_CONST)); 2543 SectionSP objc_section_sp( 2544 section_list->FindSectionByName(g_segment_name_OBJC)); 2545 SectionSP eh_frame_section_sp; 2546 if (text_section_sp.get()) 2547 eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName( 2548 g_section_name_eh_frame); 2549 else 2550 eh_frame_section_sp = 2551 section_list->FindSectionByName(g_section_name_eh_frame); 2552 2553 const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM); 2554 const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions(); 2555 2556 // lldb works best if it knows the start address of all functions in a 2557 // module. Linker symbols or debug info are normally the best source of 2558 // information for start addr / size but they may be stripped in a released 2559 // binary. Two additional sources of information exist in Mach-O binaries: 2560 // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each 2561 // function's start address in the 2562 // binary, relative to the text section. 2563 // eh_frame - the eh_frame FDEs have the start addr & size of 2564 // each function 2565 // LC_FUNCTION_STARTS is the fastest source to read in, and is present on 2566 // all modern binaries. 2567 // Binaries built to run on older releases may need to use eh_frame 2568 // information. 2569 2570 if (text_section_sp && function_starts_data.GetByteSize()) { 2571 FunctionStarts::Entry function_start_entry; 2572 function_start_entry.data = false; 2573 lldb::offset_t function_start_offset = 0; 2574 function_start_entry.addr = text_section_sp->GetFileAddress(); 2575 uint64_t delta; 2576 while ((delta = function_starts_data.GetULEB128(&function_start_offset)) > 2577 0) { 2578 // Now append the current entry 2579 function_start_entry.addr += delta; 2580 if (is_arm) { 2581 if (function_start_entry.addr & 1) { 2582 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; 2583 function_start_entry.data = true; 2584 } else if (always_thumb) { 2585 function_start_entry.data = true; 2586 } 2587 } 2588 function_starts.Append(function_start_entry); 2589 } 2590 } else { 2591 // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the 2592 // load command claiming an eh_frame but it doesn't actually have the 2593 // eh_frame content. And if we have a dSYM, we don't need to do any of 2594 // this fill-in-the-missing-symbols works anyway - the debug info should 2595 // give us all the functions in the module. 2596 if (text_section_sp.get() && eh_frame_section_sp.get() && 2597 m_type != eTypeDebugInfo) { 2598 DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp, 2599 DWARFCallFrameInfo::EH); 2600 DWARFCallFrameInfo::FunctionAddressAndSizeVector functions; 2601 eh_frame.GetFunctionAddressAndSizeVector(functions); 2602 addr_t text_base_addr = text_section_sp->GetFileAddress(); 2603 size_t count = functions.GetSize(); 2604 for (size_t i = 0; i < count; ++i) { 2605 const DWARFCallFrameInfo::FunctionAddressAndSizeVector::Entry *func = 2606 functions.GetEntryAtIndex(i); 2607 if (func) { 2608 FunctionStarts::Entry function_start_entry; 2609 function_start_entry.addr = func->base - text_base_addr; 2610 if (is_arm) { 2611 if (function_start_entry.addr & 1) { 2612 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; 2613 function_start_entry.data = true; 2614 } else if (always_thumb) { 2615 function_start_entry.data = true; 2616 } 2617 } 2618 function_starts.Append(function_start_entry); 2619 } 2620 } 2621 } 2622 } 2623 2624 const size_t function_starts_count = function_starts.GetSize(); 2625 2626 // For user process binaries (executables, dylibs, frameworks, bundles), if 2627 // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're 2628 // going to assume the binary has been stripped. Don't allow assembly 2629 // language instruction emulation because we don't know proper function 2630 // start boundaries. 2631 // 2632 // For all other types of binaries (kernels, stand-alone bare board 2633 // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame 2634 // sections - we should not make any assumptions about them based on that. 2635 if (function_starts_count == 0 && CalculateStrata() == eStrataUser) { 2636 m_allow_assembly_emulation_unwind_plans = false; 2637 Log *unwind_or_symbol_log(GetLog(LLDBLog::Symbols | LLDBLog::Unwind)); 2638 2639 if (unwind_or_symbol_log) 2640 module_sp->LogMessage( 2641 unwind_or_symbol_log, 2642 "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds"); 2643 } 2644 2645 const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get() 2646 ? eh_frame_section_sp->GetID() 2647 : static_cast<user_id_t>(NO_SECT); 2648 2649 uint32_t N_SO_index = UINT32_MAX; 2650 2651 MachSymtabSectionInfo section_info(section_list); 2652 std::vector<uint32_t> N_FUN_indexes; 2653 std::vector<uint32_t> N_NSYM_indexes; 2654 std::vector<uint32_t> N_INCL_indexes; 2655 std::vector<uint32_t> N_BRAC_indexes; 2656 std::vector<uint32_t> N_COMM_indexes; 2657 typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap; 2658 typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap; 2659 typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap; 2660 ValueToSymbolIndexMap N_FUN_addr_to_sym_idx; 2661 ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx; 2662 ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx; 2663 // Any symbols that get merged into another will get an entry in this map 2664 // so we know 2665 NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx; 2666 uint32_t nlist_idx = 0; 2667 Symbol *symbol_ptr = nullptr; 2668 2669 uint32_t sym_idx = 0; 2670 Symbol *sym = nullptr; 2671 size_t num_syms = 0; 2672 std::string memory_symbol_name; 2673 uint32_t unmapped_local_symbols_found = 0; 2674 2675 std::vector<TrieEntryWithOffset> reexport_trie_entries; 2676 std::vector<TrieEntryWithOffset> external_sym_trie_entries; 2677 std::set<lldb::addr_t> resolver_addresses; 2678 2679 const size_t dyld_trie_data_size = dyld_trie_data.GetByteSize(); 2680 if (dyld_trie_data_size > 0) { 2681 LLDB_LOG(log, "Parsing {0} bytes of dyld trie data", dyld_trie_data_size); 2682 SectionSP text_segment_sp = 2683 GetSectionList()->FindSectionByName(GetSegmentNameTEXT()); 2684 lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS; 2685 if (text_segment_sp) 2686 text_segment_file_addr = text_segment_sp->GetFileAddress(); 2687 std::vector<llvm::StringRef> nameSlices; 2688 ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr, 2689 nameSlices, resolver_addresses, reexport_trie_entries, 2690 external_sym_trie_entries); 2691 } 2692 2693 typedef std::set<ConstString> IndirectSymbols; 2694 IndirectSymbols indirect_symbol_names; 2695 2696 #if TARGET_OS_IPHONE 2697 2698 // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been 2699 // optimized by moving LOCAL symbols out of the memory mapped portion of 2700 // the DSC. The symbol information has all been retained, but it isn't 2701 // available in the normal nlist data. However, there *are* duplicate 2702 // entries of *some* 2703 // LOCAL symbols in the normal nlist data. To handle this situation 2704 // correctly, we must first attempt 2705 // to parse any DSC unmapped symbol information. If we find any, we set a 2706 // flag that tells the normal nlist parser to ignore all LOCAL symbols. 2707 2708 if (IsSharedCacheBinary()) { 2709 // Before we can start mapping the DSC, we need to make certain the 2710 // target process is actually using the cache we can find. 2711 2712 // Next we need to determine the correct path for the dyld shared cache. 2713 2714 ArchSpec header_arch = GetArchitecture(); 2715 2716 UUID dsc_uuid; 2717 UUID process_shared_cache_uuid; 2718 addr_t process_shared_cache_base_addr; 2719 2720 if (process) { 2721 GetProcessSharedCacheUUID(process, process_shared_cache_base_addr, 2722 process_shared_cache_uuid); 2723 } 2724 2725 __block bool found_image = false; 2726 __block void *nlist_buffer = nullptr; 2727 __block unsigned nlist_count = 0; 2728 __block char *string_table = nullptr; 2729 __block vm_offset_t vm_nlist_memory = 0; 2730 __block mach_msg_type_number_t vm_nlist_bytes_read = 0; 2731 __block vm_offset_t vm_string_memory = 0; 2732 __block mach_msg_type_number_t vm_string_bytes_read = 0; 2733 2734 auto _ = llvm::make_scope_exit(^{ 2735 if (vm_nlist_memory) 2736 vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read); 2737 if (vm_string_memory) 2738 vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read); 2739 }); 2740 2741 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap; 2742 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName; 2743 UndefinedNameToDescMap undefined_name_to_desc; 2744 SymbolIndexToName reexport_shlib_needs_fixup; 2745 2746 dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) { 2747 uuid_t cache_uuid; 2748 dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid); 2749 if (found_image) 2750 return; 2751 2752 if (process_shared_cache_uuid.IsValid() && 2753 process_shared_cache_uuid != UUID::fromData(&cache_uuid, 16)) 2754 return; 2755 2756 dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) { 2757 uuid_t dsc_image_uuid; 2758 if (found_image) 2759 return; 2760 2761 dyld_image_copy_uuid(image, &dsc_image_uuid); 2762 if (image_uuid != UUID::fromData(dsc_image_uuid, 16)) 2763 return; 2764 2765 found_image = true; 2766 2767 // Compute the size of the string table. We need to ask dyld for a 2768 // new SPI to avoid this step. 2769 dyld_image_local_nlist_content_4Symbolication( 2770 image, ^(const void *nlistStart, uint64_t nlistCount, 2771 const char *stringTable) { 2772 if (!nlistStart || !nlistCount) 2773 return; 2774 2775 // The buffers passed here are valid only inside the block. 2776 // Use vm_read to make a cheap copy of them available for our 2777 // processing later. 2778 kern_return_t ret = 2779 vm_read(mach_task_self(), (vm_address_t)nlistStart, 2780 nlist_byte_size * nlistCount, &vm_nlist_memory, 2781 &vm_nlist_bytes_read); 2782 if (ret != KERN_SUCCESS) 2783 return; 2784 assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount); 2785 2786 // We don't know the size of the string table. It's cheaper 2787 // to map the whole VM region than to determine the size by 2788 // parsing all the nlist entries. 2789 vm_address_t string_address = (vm_address_t)stringTable; 2790 vm_size_t region_size; 2791 mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64; 2792 vm_region_basic_info_data_t info; 2793 memory_object_name_t object; 2794 ret = vm_region_64(mach_task_self(), &string_address, 2795 ®ion_size, VM_REGION_BASIC_INFO_64, 2796 (vm_region_info_t)&info, &info_count, &object); 2797 if (ret != KERN_SUCCESS) 2798 return; 2799 2800 ret = vm_read(mach_task_self(), (vm_address_t)stringTable, 2801 region_size - 2802 ((vm_address_t)stringTable - string_address), 2803 &vm_string_memory, &vm_string_bytes_read); 2804 if (ret != KERN_SUCCESS) 2805 return; 2806 2807 nlist_buffer = (void *)vm_nlist_memory; 2808 string_table = (char *)vm_string_memory; 2809 nlist_count = nlistCount; 2810 }); 2811 }); 2812 }); 2813 if (nlist_buffer) { 2814 DataExtractor dsc_local_symbols_data(nlist_buffer, 2815 nlist_count * nlist_byte_size, 2816 byte_order, addr_byte_size); 2817 unmapped_local_symbols_found = nlist_count; 2818 2819 // The normal nlist code cannot correctly size the Symbols 2820 // array, we need to allocate it here. 2821 sym = symtab.Resize( 2822 symtab_load_command.nsyms + m_dysymtab.nindirectsyms + 2823 unmapped_local_symbols_found - m_dysymtab.nlocalsym); 2824 num_syms = symtab.GetNumSymbols(); 2825 2826 lldb::offset_t nlist_data_offset = 0; 2827 2828 for (uint32_t nlist_index = 0; 2829 nlist_index < nlist_count; 2830 nlist_index++) { 2831 ///////////////////////////// 2832 { 2833 std::optional<struct nlist_64> nlist_maybe = 2834 ParseNList(dsc_local_symbols_data, nlist_data_offset, 2835 nlist_byte_size); 2836 if (!nlist_maybe) 2837 break; 2838 struct nlist_64 nlist = *nlist_maybe; 2839 2840 SymbolType type = eSymbolTypeInvalid; 2841 const char *symbol_name = string_table + nlist.n_strx; 2842 2843 if (symbol_name == NULL) { 2844 // No symbol should be NULL, even the symbols with no 2845 // string values should have an offset zero which 2846 // points to an empty C-string 2847 Debugger::ReportError(llvm::formatv( 2848 "DSC unmapped local symbol[{0}] has invalid " 2849 "string table offset {1:x} in {2}, ignoring symbol", 2850 nlist_index, nlist.n_strx, 2851 module_sp->GetFileSpec().GetPath()); 2852 continue; 2853 } 2854 if (symbol_name[0] == '\0') 2855 symbol_name = NULL; 2856 2857 const char *symbol_name_non_abi_mangled = NULL; 2858 2859 SectionSP symbol_section; 2860 uint32_t symbol_byte_size = 0; 2861 bool add_nlist = true; 2862 bool is_debug = ((nlist.n_type & N_STAB) != 0); 2863 bool demangled_is_synthesized = false; 2864 bool is_gsym = false; 2865 bool set_value = true; 2866 2867 assert(sym_idx < num_syms); 2868 2869 sym[sym_idx].SetDebug(is_debug); 2870 2871 if (is_debug) { 2872 switch (nlist.n_type) { 2873 case N_GSYM: 2874 // global symbol: name,,NO_SECT,type,0 2875 // Sometimes the N_GSYM value contains the address. 2876 2877 // FIXME: In the .o files, we have a GSYM and a debug 2878 // symbol for all the ObjC data. They 2879 // have the same address, but we want to ensure that 2880 // we always find only the real symbol, 'cause we 2881 // don't currently correctly attribute the 2882 // GSYM one to the ObjCClass/Ivar/MetaClass 2883 // symbol type. This is a temporary hack to make 2884 // sure the ObjectiveC symbols get treated correctly. 2885 // To do this right, we should coalesce all the GSYM 2886 // & global symbols that have the same address. 2887 2888 is_gsym = true; 2889 sym[sym_idx].SetExternal(true); 2890 2891 if (symbol_name && symbol_name[0] == '_' && 2892 symbol_name[1] == 'O') { 2893 llvm::StringRef symbol_name_ref(symbol_name); 2894 if (symbol_name_ref.starts_with( 2895 g_objc_v2_prefix_class)) { 2896 symbol_name_non_abi_mangled = symbol_name + 1; 2897 symbol_name = 2898 symbol_name + g_objc_v2_prefix_class.size(); 2899 type = eSymbolTypeObjCClass; 2900 demangled_is_synthesized = true; 2901 2902 } else if (symbol_name_ref.starts_with( 2903 g_objc_v2_prefix_metaclass)) { 2904 symbol_name_non_abi_mangled = symbol_name + 1; 2905 symbol_name = 2906 symbol_name + g_objc_v2_prefix_metaclass.size(); 2907 type = eSymbolTypeObjCMetaClass; 2908 demangled_is_synthesized = true; 2909 } else if (symbol_name_ref.starts_with( 2910 g_objc_v2_prefix_ivar)) { 2911 symbol_name_non_abi_mangled = symbol_name + 1; 2912 symbol_name = 2913 symbol_name + g_objc_v2_prefix_ivar.size(); 2914 type = eSymbolTypeObjCIVar; 2915 demangled_is_synthesized = true; 2916 } 2917 } else { 2918 if (nlist.n_value != 0) 2919 symbol_section = section_info.GetSection( 2920 nlist.n_sect, nlist.n_value); 2921 type = eSymbolTypeData; 2922 } 2923 break; 2924 2925 case N_FNAME: 2926 // procedure name (f77 kludge): name,,NO_SECT,0,0 2927 type = eSymbolTypeCompiler; 2928 break; 2929 2930 case N_FUN: 2931 // procedure: name,,n_sect,linenumber,address 2932 if (symbol_name) { 2933 type = eSymbolTypeCode; 2934 symbol_section = section_info.GetSection( 2935 nlist.n_sect, nlist.n_value); 2936 2937 N_FUN_addr_to_sym_idx.insert( 2938 std::make_pair(nlist.n_value, sym_idx)); 2939 // We use the current number of symbols in the 2940 // symbol table in lieu of using nlist_idx in case 2941 // we ever start trimming entries out 2942 N_FUN_indexes.push_back(sym_idx); 2943 } else { 2944 type = eSymbolTypeCompiler; 2945 2946 if (!N_FUN_indexes.empty()) { 2947 // Copy the size of the function into the 2948 // original 2949 // STAB entry so we don't have 2950 // to hunt for it later 2951 symtab.SymbolAtIndex(N_FUN_indexes.back()) 2952 ->SetByteSize(nlist.n_value); 2953 N_FUN_indexes.pop_back(); 2954 // We don't really need the end function STAB as 2955 // it contains the size which we already placed 2956 // with the original symbol, so don't add it if 2957 // we want a minimal symbol table 2958 add_nlist = false; 2959 } 2960 } 2961 break; 2962 2963 case N_STSYM: 2964 // static symbol: name,,n_sect,type,address 2965 N_STSYM_addr_to_sym_idx.insert( 2966 std::make_pair(nlist.n_value, sym_idx)); 2967 symbol_section = section_info.GetSection(nlist.n_sect, 2968 nlist.n_value); 2969 if (symbol_name && symbol_name[0]) { 2970 type = ObjectFile::GetSymbolTypeFromName( 2971 symbol_name + 1, eSymbolTypeData); 2972 } 2973 break; 2974 2975 case N_LCSYM: 2976 // .lcomm symbol: name,,n_sect,type,address 2977 symbol_section = section_info.GetSection(nlist.n_sect, 2978 nlist.n_value); 2979 type = eSymbolTypeCommonBlock; 2980 break; 2981 2982 case N_BNSYM: 2983 // We use the current number of symbols in the symbol 2984 // table in lieu of using nlist_idx in case we ever 2985 // start trimming entries out Skip these if we want 2986 // minimal symbol tables 2987 add_nlist = false; 2988 break; 2989 2990 case N_ENSYM: 2991 // Set the size of the N_BNSYM to the terminating 2992 // index of this N_ENSYM so that we can always skip 2993 // the entire symbol if we need to navigate more 2994 // quickly at the source level when parsing STABS 2995 // Skip these if we want minimal symbol tables 2996 add_nlist = false; 2997 break; 2998 2999 case N_OPT: 3000 // emitted with gcc2_compiled and in gcc source 3001 type = eSymbolTypeCompiler; 3002 break; 3003 3004 case N_RSYM: 3005 // register sym: name,,NO_SECT,type,register 3006 type = eSymbolTypeVariable; 3007 break; 3008 3009 case N_SLINE: 3010 // src line: 0,,n_sect,linenumber,address 3011 symbol_section = section_info.GetSection(nlist.n_sect, 3012 nlist.n_value); 3013 type = eSymbolTypeLineEntry; 3014 break; 3015 3016 case N_SSYM: 3017 // structure elt: name,,NO_SECT,type,struct_offset 3018 type = eSymbolTypeVariableType; 3019 break; 3020 3021 case N_SO: 3022 // source file name 3023 type = eSymbolTypeSourceFile; 3024 if (symbol_name == NULL) { 3025 add_nlist = false; 3026 if (N_SO_index != UINT32_MAX) { 3027 // Set the size of the N_SO to the terminating 3028 // index of this N_SO so that we can always skip 3029 // the entire N_SO if we need to navigate more 3030 // quickly at the source level when parsing STABS 3031 symbol_ptr = symtab.SymbolAtIndex(N_SO_index); 3032 symbol_ptr->SetByteSize(sym_idx); 3033 symbol_ptr->SetSizeIsSibling(true); 3034 } 3035 N_NSYM_indexes.clear(); 3036 N_INCL_indexes.clear(); 3037 N_BRAC_indexes.clear(); 3038 N_COMM_indexes.clear(); 3039 N_FUN_indexes.clear(); 3040 N_SO_index = UINT32_MAX; 3041 } else { 3042 // We use the current number of symbols in the 3043 // symbol table in lieu of using nlist_idx in case 3044 // we ever start trimming entries out 3045 const bool N_SO_has_full_path = symbol_name[0] == '/'; 3046 if (N_SO_has_full_path) { 3047 if ((N_SO_index == sym_idx - 1) && 3048 ((sym_idx - 1) < num_syms)) { 3049 // We have two consecutive N_SO entries where 3050 // the first contains a directory and the 3051 // second contains a full path. 3052 sym[sym_idx - 1].GetMangled().SetValue( 3053 ConstString(symbol_name)); 3054 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3055 add_nlist = false; 3056 } else { 3057 // This is the first entry in a N_SO that 3058 // contains a directory or 3059 // a full path to the source file 3060 N_SO_index = sym_idx; 3061 } 3062 } else if ((N_SO_index == sym_idx - 1) && 3063 ((sym_idx - 1) < num_syms)) { 3064 // This is usually the second N_SO entry that 3065 // contains just the filename, so here we combine 3066 // it with the first one if we are minimizing the 3067 // symbol table 3068 const char *so_path = sym[sym_idx - 1] 3069 .GetMangled() 3070 .GetDemangledName() 3071 .AsCString(); 3072 if (so_path && so_path[0]) { 3073 std::string full_so_path(so_path); 3074 const size_t double_slash_pos = 3075 full_so_path.find("//"); 3076 if (double_slash_pos != std::string::npos) { 3077 // The linker has been generating bad N_SO 3078 // entries with doubled up paths 3079 // in the format "%s%s" where the first 3080 // string in the DW_AT_comp_dir, and the 3081 // second is the directory for the source 3082 // file so you end up with a path that looks 3083 // like "/tmp/src//tmp/src/" 3084 FileSpec so_dir(so_path); 3085 if (!FileSystem::Instance().Exists(so_dir)) { 3086 so_dir.SetFile( 3087 &full_so_path[double_slash_pos + 1], 3088 FileSpec::Style::native); 3089 if (FileSystem::Instance().Exists(so_dir)) { 3090 // Trim off the incorrect path 3091 full_so_path.erase(0, double_slash_pos + 1); 3092 } 3093 } 3094 } 3095 if (*full_so_path.rbegin() != '/') 3096 full_so_path += '/'; 3097 full_so_path += symbol_name; 3098 sym[sym_idx - 1].GetMangled().SetValue( 3099 ConstString(full_so_path.c_str())); 3100 add_nlist = false; 3101 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3102 } 3103 } else { 3104 // This could be a relative path to a N_SO 3105 N_SO_index = sym_idx; 3106 } 3107 } 3108 break; 3109 3110 case N_OSO: 3111 // object file name: name,,0,0,st_mtime 3112 type = eSymbolTypeObjectFile; 3113 break; 3114 3115 case N_LSYM: 3116 // local sym: name,,NO_SECT,type,offset 3117 type = eSymbolTypeLocal; 3118 break; 3119 3120 // INCL scopes 3121 case N_BINCL: 3122 // include file beginning: name,,NO_SECT,0,sum We use 3123 // the current number of symbols in the symbol table 3124 // in lieu of using nlist_idx in case we ever start 3125 // trimming entries out 3126 N_INCL_indexes.push_back(sym_idx); 3127 type = eSymbolTypeScopeBegin; 3128 break; 3129 3130 case N_EINCL: 3131 // include file end: name,,NO_SECT,0,0 3132 // Set the size of the N_BINCL to the terminating 3133 // index of this N_EINCL so that we can always skip 3134 // the entire symbol if we need to navigate more 3135 // quickly at the source level when parsing STABS 3136 if (!N_INCL_indexes.empty()) { 3137 symbol_ptr = 3138 symtab.SymbolAtIndex(N_INCL_indexes.back()); 3139 symbol_ptr->SetByteSize(sym_idx + 1); 3140 symbol_ptr->SetSizeIsSibling(true); 3141 N_INCL_indexes.pop_back(); 3142 } 3143 type = eSymbolTypeScopeEnd; 3144 break; 3145 3146 case N_SOL: 3147 // #included file name: name,,n_sect,0,address 3148 type = eSymbolTypeHeaderFile; 3149 3150 // We currently don't use the header files on darwin 3151 add_nlist = false; 3152 break; 3153 3154 case N_PARAMS: 3155 // compiler parameters: name,,NO_SECT,0,0 3156 type = eSymbolTypeCompiler; 3157 break; 3158 3159 case N_VERSION: 3160 // compiler version: name,,NO_SECT,0,0 3161 type = eSymbolTypeCompiler; 3162 break; 3163 3164 case N_OLEVEL: 3165 // compiler -O level: name,,NO_SECT,0,0 3166 type = eSymbolTypeCompiler; 3167 break; 3168 3169 case N_PSYM: 3170 // parameter: name,,NO_SECT,type,offset 3171 type = eSymbolTypeVariable; 3172 break; 3173 3174 case N_ENTRY: 3175 // alternate entry: name,,n_sect,linenumber,address 3176 symbol_section = section_info.GetSection(nlist.n_sect, 3177 nlist.n_value); 3178 type = eSymbolTypeLineEntry; 3179 break; 3180 3181 // Left and Right Braces 3182 case N_LBRAC: 3183 // left bracket: 0,,NO_SECT,nesting level,address We 3184 // use the current number of symbols in the symbol 3185 // table in lieu of using nlist_idx in case we ever 3186 // start trimming entries out 3187 symbol_section = section_info.GetSection(nlist.n_sect, 3188 nlist.n_value); 3189 N_BRAC_indexes.push_back(sym_idx); 3190 type = eSymbolTypeScopeBegin; 3191 break; 3192 3193 case N_RBRAC: 3194 // right bracket: 0,,NO_SECT,nesting level,address 3195 // Set the size of the N_LBRAC to the terminating 3196 // index of this N_RBRAC so that we can always skip 3197 // the entire symbol if we need to navigate more 3198 // quickly at the source level when parsing STABS 3199 symbol_section = section_info.GetSection(nlist.n_sect, 3200 nlist.n_value); 3201 if (!N_BRAC_indexes.empty()) { 3202 symbol_ptr = 3203 symtab.SymbolAtIndex(N_BRAC_indexes.back()); 3204 symbol_ptr->SetByteSize(sym_idx + 1); 3205 symbol_ptr->SetSizeIsSibling(true); 3206 N_BRAC_indexes.pop_back(); 3207 } 3208 type = eSymbolTypeScopeEnd; 3209 break; 3210 3211 case N_EXCL: 3212 // deleted include file: name,,NO_SECT,0,sum 3213 type = eSymbolTypeHeaderFile; 3214 break; 3215 3216 // COMM scopes 3217 case N_BCOMM: 3218 // begin common: name,,NO_SECT,0,0 3219 // We use the current number of symbols in the symbol 3220 // table in lieu of using nlist_idx in case we ever 3221 // start trimming entries out 3222 type = eSymbolTypeScopeBegin; 3223 N_COMM_indexes.push_back(sym_idx); 3224 break; 3225 3226 case N_ECOML: 3227 // end common (local name): 0,,n_sect,0,address 3228 symbol_section = section_info.GetSection(nlist.n_sect, 3229 nlist.n_value); 3230 // Fall through 3231 3232 case N_ECOMM: 3233 // end common: name,,n_sect,0,0 3234 // Set the size of the N_BCOMM to the terminating 3235 // index of this N_ECOMM/N_ECOML so that we can 3236 // always skip the entire symbol if we need to 3237 // navigate more quickly at the source level when 3238 // parsing STABS 3239 if (!N_COMM_indexes.empty()) { 3240 symbol_ptr = 3241 symtab.SymbolAtIndex(N_COMM_indexes.back()); 3242 symbol_ptr->SetByteSize(sym_idx + 1); 3243 symbol_ptr->SetSizeIsSibling(true); 3244 N_COMM_indexes.pop_back(); 3245 } 3246 type = eSymbolTypeScopeEnd; 3247 break; 3248 3249 case N_LENG: 3250 // second stab entry with length information 3251 type = eSymbolTypeAdditional; 3252 break; 3253 3254 default: 3255 break; 3256 } 3257 } else { 3258 // uint8_t n_pext = N_PEXT & nlist.n_type; 3259 uint8_t n_type = N_TYPE & nlist.n_type; 3260 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0); 3261 3262 switch (n_type) { 3263 case N_INDR: { 3264 const char *reexport_name_cstr = 3265 strtab_data.PeekCStr(nlist.n_value); 3266 if (reexport_name_cstr && reexport_name_cstr[0]) { 3267 type = eSymbolTypeReExported; 3268 ConstString reexport_name( 3269 reexport_name_cstr + 3270 ((reexport_name_cstr[0] == '_') ? 1 : 0)); 3271 sym[sym_idx].SetReExportedSymbolName(reexport_name); 3272 set_value = false; 3273 reexport_shlib_needs_fixup[sym_idx] = reexport_name; 3274 indirect_symbol_names.insert(ConstString( 3275 symbol_name + ((symbol_name[0] == '_') ? 1 : 0))); 3276 } else 3277 type = eSymbolTypeUndefined; 3278 } break; 3279 3280 case N_UNDF: 3281 if (symbol_name && symbol_name[0]) { 3282 ConstString undefined_name( 3283 symbol_name + ((symbol_name[0] == '_') ? 1 : 0)); 3284 undefined_name_to_desc[undefined_name] = nlist.n_desc; 3285 } 3286 // Fall through 3287 case N_PBUD: 3288 type = eSymbolTypeUndefined; 3289 break; 3290 3291 case N_ABS: 3292 type = eSymbolTypeAbsolute; 3293 break; 3294 3295 case N_SECT: { 3296 symbol_section = section_info.GetSection(nlist.n_sect, 3297 nlist.n_value); 3298 3299 if (symbol_section == NULL) { 3300 // TODO: warn about this? 3301 add_nlist = false; 3302 break; 3303 } 3304 3305 if (TEXT_eh_frame_sectID == nlist.n_sect) { 3306 type = eSymbolTypeException; 3307 } else { 3308 uint32_t section_type = 3309 symbol_section->Get() & SECTION_TYPE; 3310 3311 switch (section_type) { 3312 case S_CSTRING_LITERALS: 3313 type = eSymbolTypeData; 3314 break; // section with only literal C strings 3315 case S_4BYTE_LITERALS: 3316 type = eSymbolTypeData; 3317 break; // section with only 4 byte literals 3318 case S_8BYTE_LITERALS: 3319 type = eSymbolTypeData; 3320 break; // section with only 8 byte literals 3321 case S_LITERAL_POINTERS: 3322 type = eSymbolTypeTrampoline; 3323 break; // section with only pointers to literals 3324 case S_NON_LAZY_SYMBOL_POINTERS: 3325 type = eSymbolTypeTrampoline; 3326 break; // section with only non-lazy symbol 3327 // pointers 3328 case S_LAZY_SYMBOL_POINTERS: 3329 type = eSymbolTypeTrampoline; 3330 break; // section with only lazy symbol pointers 3331 case S_SYMBOL_STUBS: 3332 type = eSymbolTypeTrampoline; 3333 break; // section with only symbol stubs, byte 3334 // size of stub in the reserved2 field 3335 case S_MOD_INIT_FUNC_POINTERS: 3336 type = eSymbolTypeCode; 3337 break; // section with only function pointers for 3338 // initialization 3339 case S_MOD_TERM_FUNC_POINTERS: 3340 type = eSymbolTypeCode; 3341 break; // section with only function pointers for 3342 // termination 3343 case S_INTERPOSING: 3344 type = eSymbolTypeTrampoline; 3345 break; // section with only pairs of function 3346 // pointers for interposing 3347 case S_16BYTE_LITERALS: 3348 type = eSymbolTypeData; 3349 break; // section with only 16 byte literals 3350 case S_DTRACE_DOF: 3351 type = eSymbolTypeInstrumentation; 3352 break; 3353 case S_LAZY_DYLIB_SYMBOL_POINTERS: 3354 type = eSymbolTypeTrampoline; 3355 break; 3356 default: 3357 switch (symbol_section->GetType()) { 3358 case lldb::eSectionTypeCode: 3359 type = eSymbolTypeCode; 3360 break; 3361 case eSectionTypeData: 3362 case eSectionTypeDataCString: // Inlined C string 3363 // data 3364 case eSectionTypeDataCStringPointers: // Pointers 3365 // to C 3366 // string 3367 // data 3368 case eSectionTypeDataSymbolAddress: // Address of 3369 // a symbol in 3370 // the symbol 3371 // table 3372 case eSectionTypeData4: 3373 case eSectionTypeData8: 3374 case eSectionTypeData16: 3375 type = eSymbolTypeData; 3376 break; 3377 default: 3378 break; 3379 } 3380 break; 3381 } 3382 3383 if (type == eSymbolTypeInvalid) { 3384 const char *symbol_sect_name = 3385 symbol_section->GetName().AsCString(); 3386 if (symbol_section->IsDescendant( 3387 text_section_sp.get())) { 3388 if (symbol_section->IsClear( 3389 S_ATTR_PURE_INSTRUCTIONS | 3390 S_ATTR_SELF_MODIFYING_CODE | 3391 S_ATTR_SOME_INSTRUCTIONS)) 3392 type = eSymbolTypeData; 3393 else 3394 type = eSymbolTypeCode; 3395 } else if (symbol_section->IsDescendant( 3396 data_section_sp.get()) || 3397 symbol_section->IsDescendant( 3398 data_dirty_section_sp.get()) || 3399 symbol_section->IsDescendant( 3400 data_const_section_sp.get())) { 3401 if (symbol_sect_name && 3402 ::strstr(symbol_sect_name, "__objc") == 3403 symbol_sect_name) { 3404 type = eSymbolTypeRuntime; 3405 3406 if (symbol_name) { 3407 llvm::StringRef symbol_name_ref(symbol_name); 3408 if (symbol_name_ref.starts_with("_OBJC_")) { 3409 llvm::StringRef 3410 g_objc_v2_prefix_class( 3411 "_OBJC_CLASS_$_"); 3412 llvm::StringRef 3413 g_objc_v2_prefix_metaclass( 3414 "_OBJC_METACLASS_$_"); 3415 llvm::StringRef 3416 g_objc_v2_prefix_ivar("_OBJC_IVAR_$_"); 3417 if (symbol_name_ref.starts_with( 3418 g_objc_v2_prefix_class)) { 3419 symbol_name_non_abi_mangled = 3420 symbol_name + 1; 3421 symbol_name = 3422 symbol_name + 3423 g_objc_v2_prefix_class.size(); 3424 type = eSymbolTypeObjCClass; 3425 demangled_is_synthesized = true; 3426 } else if ( 3427 symbol_name_ref.starts_with( 3428 g_objc_v2_prefix_metaclass)) { 3429 symbol_name_non_abi_mangled = 3430 symbol_name + 1; 3431 symbol_name = 3432 symbol_name + 3433 g_objc_v2_prefix_metaclass.size(); 3434 type = eSymbolTypeObjCMetaClass; 3435 demangled_is_synthesized = true; 3436 } else if (symbol_name_ref.starts_with( 3437 g_objc_v2_prefix_ivar)) { 3438 symbol_name_non_abi_mangled = 3439 symbol_name + 1; 3440 symbol_name = 3441 symbol_name + 3442 g_objc_v2_prefix_ivar.size(); 3443 type = eSymbolTypeObjCIVar; 3444 demangled_is_synthesized = true; 3445 } 3446 } 3447 } 3448 } else if (symbol_sect_name && 3449 ::strstr(symbol_sect_name, 3450 "__gcc_except_tab") == 3451 symbol_sect_name) { 3452 type = eSymbolTypeException; 3453 } else { 3454 type = eSymbolTypeData; 3455 } 3456 } else if (symbol_sect_name && 3457 ::strstr(symbol_sect_name, "__IMPORT") == 3458 symbol_sect_name) { 3459 type = eSymbolTypeTrampoline; 3460 } else if (symbol_section->IsDescendant( 3461 objc_section_sp.get())) { 3462 type = eSymbolTypeRuntime; 3463 if (symbol_name && symbol_name[0] == '.') { 3464 llvm::StringRef symbol_name_ref(symbol_name); 3465 llvm::StringRef 3466 g_objc_v1_prefix_class(".objc_class_name_"); 3467 if (symbol_name_ref.starts_with( 3468 g_objc_v1_prefix_class)) { 3469 symbol_name_non_abi_mangled = symbol_name; 3470 symbol_name = symbol_name + 3471 g_objc_v1_prefix_class.size(); 3472 type = eSymbolTypeObjCClass; 3473 demangled_is_synthesized = true; 3474 } 3475 } 3476 } 3477 } 3478 } 3479 } break; 3480 } 3481 } 3482 3483 if (add_nlist) { 3484 uint64_t symbol_value = nlist.n_value; 3485 if (symbol_name_non_abi_mangled) { 3486 sym[sym_idx].GetMangled().SetMangledName( 3487 ConstString(symbol_name_non_abi_mangled)); 3488 sym[sym_idx].GetMangled().SetDemangledName( 3489 ConstString(symbol_name)); 3490 } else { 3491 if (symbol_name && symbol_name[0] == '_') { 3492 symbol_name++; // Skip the leading underscore 3493 } 3494 3495 if (symbol_name) { 3496 ConstString const_symbol_name(symbol_name); 3497 sym[sym_idx].GetMangled().SetValue(const_symbol_name); 3498 if (is_gsym && is_debug) { 3499 const char *gsym_name = 3500 sym[sym_idx] 3501 .GetMangled() 3502 .GetName(Mangled::ePreferMangled) 3503 .GetCString(); 3504 if (gsym_name) 3505 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; 3506 } 3507 } 3508 } 3509 if (symbol_section) { 3510 const addr_t section_file_addr = 3511 symbol_section->GetFileAddress(); 3512 if (symbol_byte_size == 0 && 3513 function_starts_count > 0) { 3514 addr_t symbol_lookup_file_addr = nlist.n_value; 3515 // Do an exact address match for non-ARM addresses, 3516 // else get the closest since the symbol might be a 3517 // thumb symbol which has an address with bit zero 3518 // set 3519 FunctionStarts::Entry *func_start_entry = 3520 function_starts.FindEntry(symbol_lookup_file_addr, 3521 !is_arm); 3522 if (is_arm && func_start_entry) { 3523 // Verify that the function start address is the 3524 // symbol address (ARM) or the symbol address + 1 3525 // (thumb) 3526 if (func_start_entry->addr != 3527 symbol_lookup_file_addr && 3528 func_start_entry->addr != 3529 (symbol_lookup_file_addr + 1)) { 3530 // Not the right entry, NULL it out... 3531 func_start_entry = NULL; 3532 } 3533 } 3534 if (func_start_entry) { 3535 func_start_entry->data = true; 3536 3537 addr_t symbol_file_addr = func_start_entry->addr; 3538 uint32_t symbol_flags = 0; 3539 if (is_arm) { 3540 if (symbol_file_addr & 1) 3541 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; 3542 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 3543 } 3544 3545 const FunctionStarts::Entry *next_func_start_entry = 3546 function_starts.FindNextEntry(func_start_entry); 3547 const addr_t section_end_file_addr = 3548 section_file_addr + 3549 symbol_section->GetByteSize(); 3550 if (next_func_start_entry) { 3551 addr_t next_symbol_file_addr = 3552 next_func_start_entry->addr; 3553 // Be sure the clear the Thumb address bit when 3554 // we calculate the size from the current and 3555 // next address 3556 if (is_arm) 3557 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 3558 symbol_byte_size = std::min<lldb::addr_t>( 3559 next_symbol_file_addr - symbol_file_addr, 3560 section_end_file_addr - symbol_file_addr); 3561 } else { 3562 symbol_byte_size = 3563 section_end_file_addr - symbol_file_addr; 3564 } 3565 } 3566 } 3567 symbol_value -= section_file_addr; 3568 } 3569 3570 if (is_debug == false) { 3571 if (type == eSymbolTypeCode) { 3572 // See if we can find a N_FUN entry for any code 3573 // symbols. If we do find a match, and the name 3574 // matches, then we can merge the two into just the 3575 // function symbol to avoid duplicate entries in 3576 // the symbol table 3577 auto range = 3578 N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); 3579 if (range.first != range.second) { 3580 bool found_it = false; 3581 for (auto pos = range.first; pos != range.second; 3582 ++pos) { 3583 if (sym[sym_idx].GetMangled().GetName( 3584 Mangled::ePreferMangled) == 3585 sym[pos->second].GetMangled().GetName( 3586 Mangled::ePreferMangled)) { 3587 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 3588 // We just need the flags from the linker 3589 // symbol, so put these flags 3590 // into the N_FUN flags to avoid duplicate 3591 // symbols in the symbol table 3592 sym[pos->second].SetExternal( 3593 sym[sym_idx].IsExternal()); 3594 sym[pos->second].SetFlags(nlist.n_type << 16 | 3595 nlist.n_desc); 3596 if (resolver_addresses.find(nlist.n_value) != 3597 resolver_addresses.end()) 3598 sym[pos->second].SetType(eSymbolTypeResolver); 3599 sym[sym_idx].Clear(); 3600 found_it = true; 3601 break; 3602 } 3603 } 3604 if (found_it) 3605 continue; 3606 } else { 3607 if (resolver_addresses.find(nlist.n_value) != 3608 resolver_addresses.end()) 3609 type = eSymbolTypeResolver; 3610 } 3611 } else if (type == eSymbolTypeData || 3612 type == eSymbolTypeObjCClass || 3613 type == eSymbolTypeObjCMetaClass || 3614 type == eSymbolTypeObjCIVar) { 3615 // See if we can find a N_STSYM entry for any data 3616 // symbols. If we do find a match, and the name 3617 // matches, then we can merge the two into just the 3618 // Static symbol to avoid duplicate entries in the 3619 // symbol table 3620 auto range = N_STSYM_addr_to_sym_idx.equal_range( 3621 nlist.n_value); 3622 if (range.first != range.second) { 3623 bool found_it = false; 3624 for (auto pos = range.first; pos != range.second; 3625 ++pos) { 3626 if (sym[sym_idx].GetMangled().GetName( 3627 Mangled::ePreferMangled) == 3628 sym[pos->second].GetMangled().GetName( 3629 Mangled::ePreferMangled)) { 3630 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 3631 // We just need the flags from the linker 3632 // symbol, so put these flags 3633 // into the N_STSYM flags to avoid duplicate 3634 // symbols in the symbol table 3635 sym[pos->second].SetExternal( 3636 sym[sym_idx].IsExternal()); 3637 sym[pos->second].SetFlags(nlist.n_type << 16 | 3638 nlist.n_desc); 3639 sym[sym_idx].Clear(); 3640 found_it = true; 3641 break; 3642 } 3643 } 3644 if (found_it) 3645 continue; 3646 } else { 3647 const char *gsym_name = 3648 sym[sym_idx] 3649 .GetMangled() 3650 .GetName(Mangled::ePreferMangled) 3651 .GetCString(); 3652 if (gsym_name) { 3653 // Combine N_GSYM stab entries with the non 3654 // stab symbol 3655 ConstNameToSymbolIndexMap::const_iterator pos = 3656 N_GSYM_name_to_sym_idx.find(gsym_name); 3657 if (pos != N_GSYM_name_to_sym_idx.end()) { 3658 const uint32_t GSYM_sym_idx = pos->second; 3659 m_nlist_idx_to_sym_idx[nlist_idx] = 3660 GSYM_sym_idx; 3661 // Copy the address, because often the N_GSYM 3662 // address has an invalid address of zero 3663 // when the global is a common symbol 3664 sym[GSYM_sym_idx].GetAddressRef().SetSection( 3665 symbol_section); 3666 sym[GSYM_sym_idx].GetAddressRef().SetOffset( 3667 symbol_value); 3668 add_symbol_addr(sym[GSYM_sym_idx] 3669 .GetAddress() 3670 .GetFileAddress()); 3671 // We just need the flags from the linker 3672 // symbol, so put these flags 3673 // into the N_GSYM flags to avoid duplicate 3674 // symbols in the symbol table 3675 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | 3676 nlist.n_desc); 3677 sym[sym_idx].Clear(); 3678 continue; 3679 } 3680 } 3681 } 3682 } 3683 } 3684 3685 sym[sym_idx].SetID(nlist_idx); 3686 sym[sym_idx].SetType(type); 3687 if (set_value) { 3688 sym[sym_idx].GetAddressRef().SetSection(symbol_section); 3689 sym[sym_idx].GetAddressRef().SetOffset(symbol_value); 3690 add_symbol_addr( 3691 sym[sym_idx].GetAddress().GetFileAddress()); 3692 } 3693 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 3694 3695 if (symbol_byte_size > 0) 3696 sym[sym_idx].SetByteSize(symbol_byte_size); 3697 3698 if (demangled_is_synthesized) 3699 sym[sym_idx].SetDemangledNameIsSynthesized(true); 3700 ++sym_idx; 3701 } else { 3702 sym[sym_idx].Clear(); 3703 } 3704 } 3705 ///////////////////////////// 3706 } 3707 } 3708 3709 for (const auto &pos : reexport_shlib_needs_fixup) { 3710 const auto undef_pos = undefined_name_to_desc.find(pos.second); 3711 if (undef_pos != undefined_name_to_desc.end()) { 3712 const uint8_t dylib_ordinal = 3713 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second); 3714 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize()) 3715 sym[pos.first].SetReExportedSymbolSharedLibrary( 3716 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1)); 3717 } 3718 } 3719 } 3720 3721 #endif 3722 lldb::offset_t nlist_data_offset = 0; 3723 3724 if (nlist_data.GetByteSize() > 0) { 3725 3726 // If the sym array was not created while parsing the DSC unmapped 3727 // symbols, create it now. 3728 if (sym == nullptr) { 3729 sym = 3730 symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms); 3731 num_syms = symtab.GetNumSymbols(); 3732 } 3733 3734 if (unmapped_local_symbols_found) { 3735 assert(m_dysymtab.ilocalsym == 0); 3736 nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size); 3737 nlist_idx = m_dysymtab.nlocalsym; 3738 } else { 3739 nlist_idx = 0; 3740 } 3741 3742 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap; 3743 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName; 3744 UndefinedNameToDescMap undefined_name_to_desc; 3745 SymbolIndexToName reexport_shlib_needs_fixup; 3746 3747 // Symtab parsing is a huge mess. Everything is entangled and the code 3748 // requires access to a ridiculous amount of variables. LLDB depends 3749 // heavily on the proper merging of symbols and to get that right we need 3750 // to make sure we have parsed all the debug symbols first. Therefore we 3751 // invoke the lambda twice, once to parse only the debug symbols and then 3752 // once more to parse the remaining symbols. 3753 auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx, 3754 bool debug_only) { 3755 const bool is_debug = ((nlist.n_type & N_STAB) != 0); 3756 if (is_debug != debug_only) 3757 return true; 3758 3759 const char *symbol_name_non_abi_mangled = nullptr; 3760 const char *symbol_name = nullptr; 3761 3762 if (have_strtab_data) { 3763 symbol_name = strtab_data.PeekCStr(nlist.n_strx); 3764 3765 if (symbol_name == nullptr) { 3766 // No symbol should be NULL, even the symbols with no string values 3767 // should have an offset zero which points to an empty C-string 3768 Debugger::ReportError(llvm::formatv( 3769 "symbol[{0}] has invalid string table offset {1:x} in {2}, " 3770 "ignoring symbol", 3771 nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath())); 3772 return true; 3773 } 3774 if (symbol_name[0] == '\0') 3775 symbol_name = nullptr; 3776 } else { 3777 const addr_t str_addr = strtab_addr + nlist.n_strx; 3778 Status str_error; 3779 if (process->ReadCStringFromMemory(str_addr, memory_symbol_name, 3780 str_error)) 3781 symbol_name = memory_symbol_name.c_str(); 3782 } 3783 3784 SymbolType type = eSymbolTypeInvalid; 3785 SectionSP symbol_section; 3786 lldb::addr_t symbol_byte_size = 0; 3787 bool add_nlist = true; 3788 bool is_gsym = false; 3789 bool demangled_is_synthesized = false; 3790 bool set_value = true; 3791 3792 assert(sym_idx < num_syms); 3793 sym[sym_idx].SetDebug(is_debug); 3794 3795 if (is_debug) { 3796 switch (nlist.n_type) { 3797 case N_GSYM: 3798 // global symbol: name,,NO_SECT,type,0 3799 // Sometimes the N_GSYM value contains the address. 3800 3801 // FIXME: In the .o files, we have a GSYM and a debug symbol for all 3802 // the ObjC data. They 3803 // have the same address, but we want to ensure that we always find 3804 // only the real symbol, 'cause we don't currently correctly 3805 // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol 3806 // type. This is a temporary hack to make sure the ObjectiveC 3807 // symbols get treated correctly. To do this right, we should 3808 // coalesce all the GSYM & global symbols that have the same 3809 // address. 3810 is_gsym = true; 3811 sym[sym_idx].SetExternal(true); 3812 3813 if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') { 3814 llvm::StringRef symbol_name_ref(symbol_name); 3815 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 3816 symbol_name_non_abi_mangled = symbol_name + 1; 3817 symbol_name = symbol_name + g_objc_v2_prefix_class.size(); 3818 type = eSymbolTypeObjCClass; 3819 demangled_is_synthesized = true; 3820 3821 } else if (symbol_name_ref.starts_with( 3822 g_objc_v2_prefix_metaclass)) { 3823 symbol_name_non_abi_mangled = symbol_name + 1; 3824 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size(); 3825 type = eSymbolTypeObjCMetaClass; 3826 demangled_is_synthesized = true; 3827 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) { 3828 symbol_name_non_abi_mangled = symbol_name + 1; 3829 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size(); 3830 type = eSymbolTypeObjCIVar; 3831 demangled_is_synthesized = true; 3832 } 3833 } else { 3834 if (nlist.n_value != 0) 3835 symbol_section = 3836 section_info.GetSection(nlist.n_sect, nlist.n_value); 3837 type = eSymbolTypeData; 3838 } 3839 break; 3840 3841 case N_FNAME: 3842 // procedure name (f77 kludge): name,,NO_SECT,0,0 3843 type = eSymbolTypeCompiler; 3844 break; 3845 3846 case N_FUN: 3847 // procedure: name,,n_sect,linenumber,address 3848 if (symbol_name) { 3849 type = eSymbolTypeCode; 3850 symbol_section = 3851 section_info.GetSection(nlist.n_sect, nlist.n_value); 3852 3853 N_FUN_addr_to_sym_idx.insert( 3854 std::make_pair(nlist.n_value, sym_idx)); 3855 // We use the current number of symbols in the symbol table in 3856 // lieu of using nlist_idx in case we ever start trimming entries 3857 // out 3858 N_FUN_indexes.push_back(sym_idx); 3859 } else { 3860 type = eSymbolTypeCompiler; 3861 3862 if (!N_FUN_indexes.empty()) { 3863 // Copy the size of the function into the original STAB entry 3864 // so we don't have to hunt for it later 3865 symtab.SymbolAtIndex(N_FUN_indexes.back()) 3866 ->SetByteSize(nlist.n_value); 3867 N_FUN_indexes.pop_back(); 3868 // We don't really need the end function STAB as it contains 3869 // the size which we already placed with the original symbol, 3870 // so don't add it if we want a minimal symbol table 3871 add_nlist = false; 3872 } 3873 } 3874 break; 3875 3876 case N_STSYM: 3877 // static symbol: name,,n_sect,type,address 3878 N_STSYM_addr_to_sym_idx.insert( 3879 std::make_pair(nlist.n_value, sym_idx)); 3880 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3881 if (symbol_name && symbol_name[0]) { 3882 type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1, 3883 eSymbolTypeData); 3884 } 3885 break; 3886 3887 case N_LCSYM: 3888 // .lcomm symbol: name,,n_sect,type,address 3889 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3890 type = eSymbolTypeCommonBlock; 3891 break; 3892 3893 case N_BNSYM: 3894 // We use the current number of symbols in the symbol table in lieu 3895 // of using nlist_idx in case we ever start trimming entries out 3896 // Skip these if we want minimal symbol tables 3897 add_nlist = false; 3898 break; 3899 3900 case N_ENSYM: 3901 // Set the size of the N_BNSYM to the terminating index of this 3902 // N_ENSYM so that we can always skip the entire symbol if we need 3903 // to navigate more quickly at the source level when parsing STABS 3904 // Skip these if we want minimal symbol tables 3905 add_nlist = false; 3906 break; 3907 3908 case N_OPT: 3909 // emitted with gcc2_compiled and in gcc source 3910 type = eSymbolTypeCompiler; 3911 break; 3912 3913 case N_RSYM: 3914 // register sym: name,,NO_SECT,type,register 3915 type = eSymbolTypeVariable; 3916 break; 3917 3918 case N_SLINE: 3919 // src line: 0,,n_sect,linenumber,address 3920 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 3921 type = eSymbolTypeLineEntry; 3922 break; 3923 3924 case N_SSYM: 3925 // structure elt: name,,NO_SECT,type,struct_offset 3926 type = eSymbolTypeVariableType; 3927 break; 3928 3929 case N_SO: 3930 // source file name 3931 type = eSymbolTypeSourceFile; 3932 if (symbol_name == nullptr) { 3933 add_nlist = false; 3934 if (N_SO_index != UINT32_MAX) { 3935 // Set the size of the N_SO to the terminating index of this 3936 // N_SO so that we can always skip the entire N_SO if we need 3937 // to navigate more quickly at the source level when parsing 3938 // STABS 3939 symbol_ptr = symtab.SymbolAtIndex(N_SO_index); 3940 symbol_ptr->SetByteSize(sym_idx); 3941 symbol_ptr->SetSizeIsSibling(true); 3942 } 3943 N_NSYM_indexes.clear(); 3944 N_INCL_indexes.clear(); 3945 N_BRAC_indexes.clear(); 3946 N_COMM_indexes.clear(); 3947 N_FUN_indexes.clear(); 3948 N_SO_index = UINT32_MAX; 3949 } else { 3950 // We use the current number of symbols in the symbol table in 3951 // lieu of using nlist_idx in case we ever start trimming entries 3952 // out 3953 const bool N_SO_has_full_path = symbol_name[0] == '/'; 3954 if (N_SO_has_full_path) { 3955 if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) { 3956 // We have two consecutive N_SO entries where the first 3957 // contains a directory and the second contains a full path. 3958 sym[sym_idx - 1].GetMangled().SetValue( 3959 ConstString(symbol_name)); 3960 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 3961 add_nlist = false; 3962 } else { 3963 // This is the first entry in a N_SO that contains a 3964 // directory or a full path to the source file 3965 N_SO_index = sym_idx; 3966 } 3967 } else if ((N_SO_index == sym_idx - 1) && 3968 ((sym_idx - 1) < num_syms)) { 3969 // This is usually the second N_SO entry that contains just the 3970 // filename, so here we combine it with the first one if we are 3971 // minimizing the symbol table 3972 const char *so_path = 3973 sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString(); 3974 if (so_path && so_path[0]) { 3975 std::string full_so_path(so_path); 3976 const size_t double_slash_pos = full_so_path.find("//"); 3977 if (double_slash_pos != std::string::npos) { 3978 // The linker has been generating bad N_SO entries with 3979 // doubled up paths in the format "%s%s" where the first 3980 // string in the DW_AT_comp_dir, and the second is the 3981 // directory for the source file so you end up with a path 3982 // that looks like "/tmp/src//tmp/src/" 3983 FileSpec so_dir(so_path); 3984 if (!FileSystem::Instance().Exists(so_dir)) { 3985 so_dir.SetFile(&full_so_path[double_slash_pos + 1], 3986 FileSpec::Style::native); 3987 if (FileSystem::Instance().Exists(so_dir)) { 3988 // Trim off the incorrect path 3989 full_so_path.erase(0, double_slash_pos + 1); 3990 } 3991 } 3992 } 3993 if (*full_so_path.rbegin() != '/') 3994 full_so_path += '/'; 3995 full_so_path += symbol_name; 3996 sym[sym_idx - 1].GetMangled().SetValue( 3997 ConstString(full_so_path.c_str())); 3998 add_nlist = false; 3999 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1; 4000 } 4001 } else { 4002 // This could be a relative path to a N_SO 4003 N_SO_index = sym_idx; 4004 } 4005 } 4006 break; 4007 4008 case N_OSO: 4009 // object file name: name,,0,0,st_mtime 4010 type = eSymbolTypeObjectFile; 4011 break; 4012 4013 case N_LSYM: 4014 // local sym: name,,NO_SECT,type,offset 4015 type = eSymbolTypeLocal; 4016 break; 4017 4018 // INCL scopes 4019 case N_BINCL: 4020 // include file beginning: name,,NO_SECT,0,sum We use the current 4021 // number of symbols in the symbol table in lieu of using nlist_idx 4022 // in case we ever start trimming entries out 4023 N_INCL_indexes.push_back(sym_idx); 4024 type = eSymbolTypeScopeBegin; 4025 break; 4026 4027 case N_EINCL: 4028 // include file end: name,,NO_SECT,0,0 4029 // Set the size of the N_BINCL to the terminating index of this 4030 // N_EINCL so that we can always skip the entire symbol if we need 4031 // to navigate more quickly at the source level when parsing STABS 4032 if (!N_INCL_indexes.empty()) { 4033 symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back()); 4034 symbol_ptr->SetByteSize(sym_idx + 1); 4035 symbol_ptr->SetSizeIsSibling(true); 4036 N_INCL_indexes.pop_back(); 4037 } 4038 type = eSymbolTypeScopeEnd; 4039 break; 4040 4041 case N_SOL: 4042 // #included file name: name,,n_sect,0,address 4043 type = eSymbolTypeHeaderFile; 4044 4045 // We currently don't use the header files on darwin 4046 add_nlist = false; 4047 break; 4048 4049 case N_PARAMS: 4050 // compiler parameters: name,,NO_SECT,0,0 4051 type = eSymbolTypeCompiler; 4052 break; 4053 4054 case N_VERSION: 4055 // compiler version: name,,NO_SECT,0,0 4056 type = eSymbolTypeCompiler; 4057 break; 4058 4059 case N_OLEVEL: 4060 // compiler -O level: name,,NO_SECT,0,0 4061 type = eSymbolTypeCompiler; 4062 break; 4063 4064 case N_PSYM: 4065 // parameter: name,,NO_SECT,type,offset 4066 type = eSymbolTypeVariable; 4067 break; 4068 4069 case N_ENTRY: 4070 // alternate entry: name,,n_sect,linenumber,address 4071 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4072 type = eSymbolTypeLineEntry; 4073 break; 4074 4075 // Left and Right Braces 4076 case N_LBRAC: 4077 // left bracket: 0,,NO_SECT,nesting level,address We use the 4078 // current number of symbols in the symbol table in lieu of using 4079 // nlist_idx in case we ever start trimming entries out 4080 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4081 N_BRAC_indexes.push_back(sym_idx); 4082 type = eSymbolTypeScopeBegin; 4083 break; 4084 4085 case N_RBRAC: 4086 // right bracket: 0,,NO_SECT,nesting level,address Set the size of 4087 // the N_LBRAC to the terminating index of this N_RBRAC so that we 4088 // can always skip the entire symbol if we need to navigate more 4089 // quickly at the source level when parsing STABS 4090 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4091 if (!N_BRAC_indexes.empty()) { 4092 symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back()); 4093 symbol_ptr->SetByteSize(sym_idx + 1); 4094 symbol_ptr->SetSizeIsSibling(true); 4095 N_BRAC_indexes.pop_back(); 4096 } 4097 type = eSymbolTypeScopeEnd; 4098 break; 4099 4100 case N_EXCL: 4101 // deleted include file: name,,NO_SECT,0,sum 4102 type = eSymbolTypeHeaderFile; 4103 break; 4104 4105 // COMM scopes 4106 case N_BCOMM: 4107 // begin common: name,,NO_SECT,0,0 4108 // We use the current number of symbols in the symbol table in lieu 4109 // of using nlist_idx in case we ever start trimming entries out 4110 type = eSymbolTypeScopeBegin; 4111 N_COMM_indexes.push_back(sym_idx); 4112 break; 4113 4114 case N_ECOML: 4115 // end common (local name): 0,,n_sect,0,address 4116 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4117 [[fallthrough]]; 4118 4119 case N_ECOMM: 4120 // end common: name,,n_sect,0,0 4121 // Set the size of the N_BCOMM to the terminating index of this 4122 // N_ECOMM/N_ECOML so that we can always skip the entire symbol if 4123 // we need to navigate more quickly at the source level when 4124 // parsing STABS 4125 if (!N_COMM_indexes.empty()) { 4126 symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back()); 4127 symbol_ptr->SetByteSize(sym_idx + 1); 4128 symbol_ptr->SetSizeIsSibling(true); 4129 N_COMM_indexes.pop_back(); 4130 } 4131 type = eSymbolTypeScopeEnd; 4132 break; 4133 4134 case N_LENG: 4135 // second stab entry with length information 4136 type = eSymbolTypeAdditional; 4137 break; 4138 4139 default: 4140 break; 4141 } 4142 } else { 4143 uint8_t n_type = N_TYPE & nlist.n_type; 4144 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0); 4145 4146 switch (n_type) { 4147 case N_INDR: { 4148 const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value); 4149 if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) { 4150 type = eSymbolTypeReExported; 4151 ConstString reexport_name(reexport_name_cstr + 4152 ((reexport_name_cstr[0] == '_') ? 1 : 0)); 4153 sym[sym_idx].SetReExportedSymbolName(reexport_name); 4154 set_value = false; 4155 reexport_shlib_needs_fixup[sym_idx] = reexport_name; 4156 indirect_symbol_names.insert( 4157 ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0))); 4158 } else 4159 type = eSymbolTypeUndefined; 4160 } break; 4161 4162 case N_UNDF: 4163 if (symbol_name && symbol_name[0]) { 4164 ConstString undefined_name(symbol_name + 4165 ((symbol_name[0] == '_') ? 1 : 0)); 4166 undefined_name_to_desc[undefined_name] = nlist.n_desc; 4167 } 4168 [[fallthrough]]; 4169 4170 case N_PBUD: 4171 type = eSymbolTypeUndefined; 4172 break; 4173 4174 case N_ABS: 4175 type = eSymbolTypeAbsolute; 4176 break; 4177 4178 case N_SECT: { 4179 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value); 4180 4181 if (!symbol_section) { 4182 // TODO: warn about this? 4183 add_nlist = false; 4184 break; 4185 } 4186 4187 if (TEXT_eh_frame_sectID == nlist.n_sect) { 4188 type = eSymbolTypeException; 4189 } else { 4190 uint32_t section_type = symbol_section->Get() & SECTION_TYPE; 4191 4192 switch (section_type) { 4193 case S_CSTRING_LITERALS: 4194 type = eSymbolTypeData; 4195 break; // section with only literal C strings 4196 case S_4BYTE_LITERALS: 4197 type = eSymbolTypeData; 4198 break; // section with only 4 byte literals 4199 case S_8BYTE_LITERALS: 4200 type = eSymbolTypeData; 4201 break; // section with only 8 byte literals 4202 case S_LITERAL_POINTERS: 4203 type = eSymbolTypeTrampoline; 4204 break; // section with only pointers to literals 4205 case S_NON_LAZY_SYMBOL_POINTERS: 4206 type = eSymbolTypeTrampoline; 4207 break; // section with only non-lazy symbol pointers 4208 case S_LAZY_SYMBOL_POINTERS: 4209 type = eSymbolTypeTrampoline; 4210 break; // section with only lazy symbol pointers 4211 case S_SYMBOL_STUBS: 4212 type = eSymbolTypeTrampoline; 4213 break; // section with only symbol stubs, byte size of stub in 4214 // the reserved2 field 4215 case S_MOD_INIT_FUNC_POINTERS: 4216 type = eSymbolTypeCode; 4217 break; // section with only function pointers for initialization 4218 case S_MOD_TERM_FUNC_POINTERS: 4219 type = eSymbolTypeCode; 4220 break; // section with only function pointers for termination 4221 case S_INTERPOSING: 4222 type = eSymbolTypeTrampoline; 4223 break; // section with only pairs of function pointers for 4224 // interposing 4225 case S_16BYTE_LITERALS: 4226 type = eSymbolTypeData; 4227 break; // section with only 16 byte literals 4228 case S_DTRACE_DOF: 4229 type = eSymbolTypeInstrumentation; 4230 break; 4231 case S_LAZY_DYLIB_SYMBOL_POINTERS: 4232 type = eSymbolTypeTrampoline; 4233 break; 4234 default: 4235 switch (symbol_section->GetType()) { 4236 case lldb::eSectionTypeCode: 4237 type = eSymbolTypeCode; 4238 break; 4239 case eSectionTypeData: 4240 case eSectionTypeDataCString: // Inlined C string data 4241 case eSectionTypeDataCStringPointers: // Pointers to C string 4242 // data 4243 case eSectionTypeDataSymbolAddress: // Address of a symbol in 4244 // the symbol table 4245 case eSectionTypeData4: 4246 case eSectionTypeData8: 4247 case eSectionTypeData16: 4248 type = eSymbolTypeData; 4249 break; 4250 default: 4251 break; 4252 } 4253 break; 4254 } 4255 4256 if (type == eSymbolTypeInvalid) { 4257 const char *symbol_sect_name = 4258 symbol_section->GetName().AsCString(); 4259 if (symbol_section->IsDescendant(text_section_sp.get())) { 4260 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS | 4261 S_ATTR_SELF_MODIFYING_CODE | 4262 S_ATTR_SOME_INSTRUCTIONS)) 4263 type = eSymbolTypeData; 4264 else 4265 type = eSymbolTypeCode; 4266 } else if (symbol_section->IsDescendant(data_section_sp.get()) || 4267 symbol_section->IsDescendant( 4268 data_dirty_section_sp.get()) || 4269 symbol_section->IsDescendant( 4270 data_const_section_sp.get())) { 4271 if (symbol_sect_name && 4272 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) { 4273 type = eSymbolTypeRuntime; 4274 4275 if (symbol_name) { 4276 llvm::StringRef symbol_name_ref(symbol_name); 4277 if (symbol_name_ref.starts_with("_OBJC_")) { 4278 llvm::StringRef g_objc_v2_prefix_class( 4279 "_OBJC_CLASS_$_"); 4280 llvm::StringRef g_objc_v2_prefix_metaclass( 4281 "_OBJC_METACLASS_$_"); 4282 llvm::StringRef g_objc_v2_prefix_ivar( 4283 "_OBJC_IVAR_$_"); 4284 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) { 4285 symbol_name_non_abi_mangled = symbol_name + 1; 4286 symbol_name = 4287 symbol_name + g_objc_v2_prefix_class.size(); 4288 type = eSymbolTypeObjCClass; 4289 demangled_is_synthesized = true; 4290 } else if (symbol_name_ref.starts_with( 4291 g_objc_v2_prefix_metaclass)) { 4292 symbol_name_non_abi_mangled = symbol_name + 1; 4293 symbol_name = 4294 symbol_name + g_objc_v2_prefix_metaclass.size(); 4295 type = eSymbolTypeObjCMetaClass; 4296 demangled_is_synthesized = true; 4297 } else if (symbol_name_ref.starts_with( 4298 g_objc_v2_prefix_ivar)) { 4299 symbol_name_non_abi_mangled = symbol_name + 1; 4300 symbol_name = 4301 symbol_name + g_objc_v2_prefix_ivar.size(); 4302 type = eSymbolTypeObjCIVar; 4303 demangled_is_synthesized = true; 4304 } 4305 } 4306 } 4307 } else if (symbol_sect_name && 4308 ::strstr(symbol_sect_name, "__gcc_except_tab") == 4309 symbol_sect_name) { 4310 type = eSymbolTypeException; 4311 } else { 4312 type = eSymbolTypeData; 4313 } 4314 } else if (symbol_sect_name && 4315 ::strstr(symbol_sect_name, "__IMPORT") == 4316 symbol_sect_name) { 4317 type = eSymbolTypeTrampoline; 4318 } else if (symbol_section->IsDescendant(objc_section_sp.get())) { 4319 type = eSymbolTypeRuntime; 4320 if (symbol_name && symbol_name[0] == '.') { 4321 llvm::StringRef symbol_name_ref(symbol_name); 4322 llvm::StringRef g_objc_v1_prefix_class( 4323 ".objc_class_name_"); 4324 if (symbol_name_ref.starts_with(g_objc_v1_prefix_class)) { 4325 symbol_name_non_abi_mangled = symbol_name; 4326 symbol_name = symbol_name + g_objc_v1_prefix_class.size(); 4327 type = eSymbolTypeObjCClass; 4328 demangled_is_synthesized = true; 4329 } 4330 } 4331 } 4332 } 4333 } 4334 } break; 4335 } 4336 } 4337 4338 if (!add_nlist) { 4339 sym[sym_idx].Clear(); 4340 return true; 4341 } 4342 4343 uint64_t symbol_value = nlist.n_value; 4344 4345 if (symbol_name_non_abi_mangled) { 4346 sym[sym_idx].GetMangled().SetMangledName( 4347 ConstString(symbol_name_non_abi_mangled)); 4348 sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name)); 4349 } else { 4350 4351 if (symbol_name && symbol_name[0] == '_') { 4352 symbol_name++; // Skip the leading underscore 4353 } 4354 4355 if (symbol_name) { 4356 ConstString const_symbol_name(symbol_name); 4357 sym[sym_idx].GetMangled().SetValue(const_symbol_name); 4358 } 4359 } 4360 4361 if (is_gsym) { 4362 const char *gsym_name = sym[sym_idx] 4363 .GetMangled() 4364 .GetName(Mangled::ePreferMangled) 4365 .GetCString(); 4366 if (gsym_name) 4367 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; 4368 } 4369 4370 if (symbol_section) { 4371 const addr_t section_file_addr = symbol_section->GetFileAddress(); 4372 if (symbol_byte_size == 0 && function_starts_count > 0) { 4373 addr_t symbol_lookup_file_addr = nlist.n_value; 4374 // Do an exact address match for non-ARM addresses, else get the 4375 // closest since the symbol might be a thumb symbol which has an 4376 // address with bit zero set. 4377 FunctionStarts::Entry *func_start_entry = 4378 function_starts.FindEntry(symbol_lookup_file_addr, !is_arm); 4379 if (is_arm && func_start_entry) { 4380 // Verify that the function start address is the symbol address 4381 // (ARM) or the symbol address + 1 (thumb). 4382 if (func_start_entry->addr != symbol_lookup_file_addr && 4383 func_start_entry->addr != (symbol_lookup_file_addr + 1)) { 4384 // Not the right entry, NULL it out... 4385 func_start_entry = nullptr; 4386 } 4387 } 4388 if (func_start_entry) { 4389 func_start_entry->data = true; 4390 4391 addr_t symbol_file_addr = func_start_entry->addr; 4392 if (is_arm) 4393 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 4394 4395 const FunctionStarts::Entry *next_func_start_entry = 4396 function_starts.FindNextEntry(func_start_entry); 4397 const addr_t section_end_file_addr = 4398 section_file_addr + symbol_section->GetByteSize(); 4399 if (next_func_start_entry) { 4400 addr_t next_symbol_file_addr = next_func_start_entry->addr; 4401 // Be sure the clear the Thumb address bit when we calculate the 4402 // size from the current and next address 4403 if (is_arm) 4404 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 4405 symbol_byte_size = std::min<lldb::addr_t>( 4406 next_symbol_file_addr - symbol_file_addr, 4407 section_end_file_addr - symbol_file_addr); 4408 } else { 4409 symbol_byte_size = section_end_file_addr - symbol_file_addr; 4410 } 4411 } 4412 } 4413 symbol_value -= section_file_addr; 4414 } 4415 4416 if (!is_debug) { 4417 if (type == eSymbolTypeCode) { 4418 // See if we can find a N_FUN entry for any code symbols. If we do 4419 // find a match, and the name matches, then we can merge the two into 4420 // just the function symbol to avoid duplicate entries in the symbol 4421 // table. 4422 std::pair<ValueToSymbolIndexMap::const_iterator, 4423 ValueToSymbolIndexMap::const_iterator> 4424 range; 4425 range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); 4426 if (range.first != range.second) { 4427 for (ValueToSymbolIndexMap::const_iterator pos = range.first; 4428 pos != range.second; ++pos) { 4429 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == 4430 sym[pos->second].GetMangled().GetName( 4431 Mangled::ePreferMangled)) { 4432 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 4433 // We just need the flags from the linker symbol, so put these 4434 // flags into the N_FUN flags to avoid duplicate symbols in the 4435 // symbol table. 4436 sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); 4437 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4438 if (resolver_addresses.find(nlist.n_value) != 4439 resolver_addresses.end()) 4440 sym[pos->second].SetType(eSymbolTypeResolver); 4441 sym[sym_idx].Clear(); 4442 return true; 4443 } 4444 } 4445 } else { 4446 if (resolver_addresses.find(nlist.n_value) != 4447 resolver_addresses.end()) 4448 type = eSymbolTypeResolver; 4449 } 4450 } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass || 4451 type == eSymbolTypeObjCMetaClass || 4452 type == eSymbolTypeObjCIVar) { 4453 // See if we can find a N_STSYM entry for any data symbols. If we do 4454 // find a match, and the name matches, then we can merge the two into 4455 // just the Static symbol to avoid duplicate entries in the symbol 4456 // table. 4457 std::pair<ValueToSymbolIndexMap::const_iterator, 4458 ValueToSymbolIndexMap::const_iterator> 4459 range; 4460 range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value); 4461 if (range.first != range.second) { 4462 for (ValueToSymbolIndexMap::const_iterator pos = range.first; 4463 pos != range.second; ++pos) { 4464 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == 4465 sym[pos->second].GetMangled().GetName( 4466 Mangled::ePreferMangled)) { 4467 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; 4468 // We just need the flags from the linker symbol, so put these 4469 // flags into the N_STSYM flags to avoid duplicate symbols in 4470 // the symbol table. 4471 sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); 4472 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4473 sym[sym_idx].Clear(); 4474 return true; 4475 } 4476 } 4477 } else { 4478 // Combine N_GSYM stab entries with the non stab symbol. 4479 const char *gsym_name = sym[sym_idx] 4480 .GetMangled() 4481 .GetName(Mangled::ePreferMangled) 4482 .GetCString(); 4483 if (gsym_name) { 4484 ConstNameToSymbolIndexMap::const_iterator pos = 4485 N_GSYM_name_to_sym_idx.find(gsym_name); 4486 if (pos != N_GSYM_name_to_sym_idx.end()) { 4487 const uint32_t GSYM_sym_idx = pos->second; 4488 m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx; 4489 // Copy the address, because often the N_GSYM address has an 4490 // invalid address of zero when the global is a common symbol. 4491 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section); 4492 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value); 4493 add_symbol_addr( 4494 sym[GSYM_sym_idx].GetAddress().GetFileAddress()); 4495 // We just need the flags from the linker symbol, so put these 4496 // flags into the N_GSYM flags to avoid duplicate symbols in 4497 // the symbol table. 4498 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4499 sym[sym_idx].Clear(); 4500 return true; 4501 } 4502 } 4503 } 4504 } 4505 } 4506 4507 sym[sym_idx].SetID(nlist_idx); 4508 sym[sym_idx].SetType(type); 4509 if (set_value) { 4510 sym[sym_idx].GetAddressRef().SetSection(symbol_section); 4511 sym[sym_idx].GetAddressRef().SetOffset(symbol_value); 4512 if (symbol_section) 4513 add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress()); 4514 } 4515 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); 4516 if (nlist.n_desc & N_WEAK_REF) 4517 sym[sym_idx].SetIsWeak(true); 4518 4519 if (symbol_byte_size > 0) 4520 sym[sym_idx].SetByteSize(symbol_byte_size); 4521 4522 if (demangled_is_synthesized) 4523 sym[sym_idx].SetDemangledNameIsSynthesized(true); 4524 4525 ++sym_idx; 4526 return true; 4527 }; 4528 4529 // First parse all the nlists but don't process them yet. See the next 4530 // comment for an explanation why. 4531 std::vector<struct nlist_64> nlists; 4532 nlists.reserve(symtab_load_command.nsyms); 4533 for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) { 4534 if (auto nlist = 4535 ParseNList(nlist_data, nlist_data_offset, nlist_byte_size)) 4536 nlists.push_back(*nlist); 4537 else 4538 break; 4539 } 4540 4541 // Now parse all the debug symbols. This is needed to merge non-debug 4542 // symbols in the next step. Non-debug symbols are always coalesced into 4543 // the debug symbol. Doing this in one step would mean that some symbols 4544 // won't be merged. 4545 nlist_idx = 0; 4546 for (auto &nlist : nlists) { 4547 if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols)) 4548 break; 4549 } 4550 4551 // Finally parse all the non debug symbols. 4552 nlist_idx = 0; 4553 for (auto &nlist : nlists) { 4554 if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols)) 4555 break; 4556 } 4557 4558 for (const auto &pos : reexport_shlib_needs_fixup) { 4559 const auto undef_pos = undefined_name_to_desc.find(pos.second); 4560 if (undef_pos != undefined_name_to_desc.end()) { 4561 const uint8_t dylib_ordinal = 4562 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second); 4563 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize()) 4564 sym[pos.first].SetReExportedSymbolSharedLibrary( 4565 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1)); 4566 } 4567 } 4568 } 4569 4570 // Count how many trie symbols we'll add to the symbol table 4571 int trie_symbol_table_augment_count = 0; 4572 for (auto &e : external_sym_trie_entries) { 4573 if (!symbols_added.contains(e.entry.address)) 4574 trie_symbol_table_augment_count++; 4575 } 4576 4577 if (num_syms < sym_idx + trie_symbol_table_augment_count) { 4578 num_syms = sym_idx + trie_symbol_table_augment_count; 4579 sym = symtab.Resize(num_syms); 4580 } 4581 uint32_t synthetic_sym_id = symtab_load_command.nsyms; 4582 4583 // Add symbols from the trie to the symbol table. 4584 for (auto &e : external_sym_trie_entries) { 4585 if (symbols_added.contains(e.entry.address)) 4586 continue; 4587 4588 // Find the section that this trie address is in, use that to annotate 4589 // symbol type as we add the trie address and name to the symbol table. 4590 Address symbol_addr; 4591 if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) { 4592 SectionSP symbol_section(symbol_addr.GetSection()); 4593 const char *symbol_name = e.entry.name.GetCString(); 4594 bool demangled_is_synthesized = false; 4595 SymbolType type = 4596 GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp, 4597 data_section_sp, data_dirty_section_sp, 4598 data_const_section_sp, symbol_section); 4599 4600 sym[sym_idx].SetType(type); 4601 if (symbol_section) { 4602 sym[sym_idx].SetID(synthetic_sym_id++); 4603 sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name)); 4604 if (demangled_is_synthesized) 4605 sym[sym_idx].SetDemangledNameIsSynthesized(true); 4606 sym[sym_idx].SetIsSynthetic(true); 4607 sym[sym_idx].SetExternal(true); 4608 sym[sym_idx].GetAddressRef() = symbol_addr; 4609 add_symbol_addr(symbol_addr.GetFileAddress()); 4610 if (e.entry.flags & TRIE_SYMBOL_IS_THUMB) 4611 sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB); 4612 ++sym_idx; 4613 } 4614 } 4615 } 4616 4617 if (function_starts_count > 0) { 4618 uint32_t num_synthetic_function_symbols = 0; 4619 for (i = 0; i < function_starts_count; ++i) { 4620 if (!symbols_added.contains(function_starts.GetEntryRef(i).addr)) 4621 ++num_synthetic_function_symbols; 4622 } 4623 4624 if (num_synthetic_function_symbols > 0) { 4625 if (num_syms < sym_idx + num_synthetic_function_symbols) { 4626 num_syms = sym_idx + num_synthetic_function_symbols; 4627 sym = symtab.Resize(num_syms); 4628 } 4629 for (i = 0; i < function_starts_count; ++i) { 4630 const FunctionStarts::Entry *func_start_entry = 4631 function_starts.GetEntryAtIndex(i); 4632 if (!symbols_added.contains(func_start_entry->addr)) { 4633 addr_t symbol_file_addr = func_start_entry->addr; 4634 uint32_t symbol_flags = 0; 4635 if (func_start_entry->data) 4636 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; 4637 Address symbol_addr; 4638 if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) { 4639 SectionSP symbol_section(symbol_addr.GetSection()); 4640 uint32_t symbol_byte_size = 0; 4641 if (symbol_section) { 4642 const addr_t section_file_addr = symbol_section->GetFileAddress(); 4643 const FunctionStarts::Entry *next_func_start_entry = 4644 function_starts.FindNextEntry(func_start_entry); 4645 const addr_t section_end_file_addr = 4646 section_file_addr + symbol_section->GetByteSize(); 4647 if (next_func_start_entry) { 4648 addr_t next_symbol_file_addr = next_func_start_entry->addr; 4649 if (is_arm) 4650 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; 4651 symbol_byte_size = std::min<lldb::addr_t>( 4652 next_symbol_file_addr - symbol_file_addr, 4653 section_end_file_addr - symbol_file_addr); 4654 } else { 4655 symbol_byte_size = section_end_file_addr - symbol_file_addr; 4656 } 4657 sym[sym_idx].SetID(synthetic_sym_id++); 4658 // Don't set the name for any synthetic symbols, the Symbol 4659 // object will generate one if needed when the name is accessed 4660 // via accessors. 4661 sym[sym_idx].GetMangled().SetDemangledName(ConstString()); 4662 sym[sym_idx].SetType(eSymbolTypeCode); 4663 sym[sym_idx].SetIsSynthetic(true); 4664 sym[sym_idx].GetAddressRef() = symbol_addr; 4665 add_symbol_addr(symbol_addr.GetFileAddress()); 4666 if (symbol_flags) 4667 sym[sym_idx].SetFlags(symbol_flags); 4668 if (symbol_byte_size) 4669 sym[sym_idx].SetByteSize(symbol_byte_size); 4670 ++sym_idx; 4671 } 4672 } 4673 } 4674 } 4675 } 4676 } 4677 4678 // Trim our symbols down to just what we ended up with after removing any 4679 // symbols. 4680 if (sym_idx < num_syms) { 4681 num_syms = sym_idx; 4682 sym = symtab.Resize(num_syms); 4683 } 4684 4685 // Now synthesize indirect symbols 4686 if (m_dysymtab.nindirectsyms != 0) { 4687 if (indirect_symbol_index_data.GetByteSize()) { 4688 NListIndexToSymbolIndexMap::const_iterator end_index_pos = 4689 m_nlist_idx_to_sym_idx.end(); 4690 4691 for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size(); 4692 ++sect_idx) { 4693 if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) == 4694 S_SYMBOL_STUBS) { 4695 uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2; 4696 if (symbol_stub_byte_size == 0) 4697 continue; 4698 4699 const uint32_t num_symbol_stubs = 4700 m_mach_sections[sect_idx].size / symbol_stub_byte_size; 4701 4702 if (num_symbol_stubs == 0) 4703 continue; 4704 4705 const uint32_t symbol_stub_index_offset = 4706 m_mach_sections[sect_idx].reserved1; 4707 for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) { 4708 const uint32_t symbol_stub_index = 4709 symbol_stub_index_offset + stub_idx; 4710 const lldb::addr_t symbol_stub_addr = 4711 m_mach_sections[sect_idx].addr + 4712 (stub_idx * symbol_stub_byte_size); 4713 lldb::offset_t symbol_stub_offset = symbol_stub_index * 4; 4714 if (indirect_symbol_index_data.ValidOffsetForDataOfSize( 4715 symbol_stub_offset, 4)) { 4716 const uint32_t stub_sym_id = 4717 indirect_symbol_index_data.GetU32(&symbol_stub_offset); 4718 if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL)) 4719 continue; 4720 4721 NListIndexToSymbolIndexMap::const_iterator index_pos = 4722 m_nlist_idx_to_sym_idx.find(stub_sym_id); 4723 Symbol *stub_symbol = nullptr; 4724 if (index_pos != end_index_pos) { 4725 // We have a remapping from the original nlist index to a 4726 // current symbol index, so just look this up by index 4727 stub_symbol = symtab.SymbolAtIndex(index_pos->second); 4728 } else { 4729 // We need to lookup a symbol using the original nlist symbol 4730 // index since this index is coming from the S_SYMBOL_STUBS 4731 stub_symbol = symtab.FindSymbolByID(stub_sym_id); 4732 } 4733 4734 if (stub_symbol) { 4735 Address so_addr(symbol_stub_addr, section_list); 4736 4737 if (stub_symbol->GetType() == eSymbolTypeUndefined) { 4738 // Change the external symbol into a trampoline that makes 4739 // sense These symbols were N_UNDF N_EXT, and are useless 4740 // to us, so we can re-use them so we don't have to make up 4741 // a synthetic symbol for no good reason. 4742 if (resolver_addresses.find(symbol_stub_addr) == 4743 resolver_addresses.end()) 4744 stub_symbol->SetType(eSymbolTypeTrampoline); 4745 else 4746 stub_symbol->SetType(eSymbolTypeResolver); 4747 stub_symbol->SetExternal(false); 4748 stub_symbol->GetAddressRef() = so_addr; 4749 stub_symbol->SetByteSize(symbol_stub_byte_size); 4750 } else { 4751 // Make a synthetic symbol to describe the trampoline stub 4752 Mangled stub_symbol_mangled_name(stub_symbol->GetMangled()); 4753 if (sym_idx >= num_syms) { 4754 sym = symtab.Resize(++num_syms); 4755 stub_symbol = nullptr; // this pointer no longer valid 4756 } 4757 sym[sym_idx].SetID(synthetic_sym_id++); 4758 sym[sym_idx].GetMangled() = stub_symbol_mangled_name; 4759 if (resolver_addresses.find(symbol_stub_addr) == 4760 resolver_addresses.end()) 4761 sym[sym_idx].SetType(eSymbolTypeTrampoline); 4762 else 4763 sym[sym_idx].SetType(eSymbolTypeResolver); 4764 sym[sym_idx].SetIsSynthetic(true); 4765 sym[sym_idx].GetAddressRef() = so_addr; 4766 add_symbol_addr(so_addr.GetFileAddress()); 4767 sym[sym_idx].SetByteSize(symbol_stub_byte_size); 4768 ++sym_idx; 4769 } 4770 } else { 4771 if (log) 4772 log->Warning("symbol stub referencing symbol table symbol " 4773 "%u that isn't in our minimal symbol table, " 4774 "fix this!!!", 4775 stub_sym_id); 4776 } 4777 } 4778 } 4779 } 4780 } 4781 } 4782 } 4783 4784 if (!reexport_trie_entries.empty()) { 4785 for (const auto &e : reexport_trie_entries) { 4786 if (e.entry.import_name) { 4787 // Only add indirect symbols from the Trie entries if we didn't have 4788 // a N_INDR nlist entry for this already 4789 if (indirect_symbol_names.find(e.entry.name) == 4790 indirect_symbol_names.end()) { 4791 // Make a synthetic symbol to describe re-exported symbol. 4792 if (sym_idx >= num_syms) 4793 sym = symtab.Resize(++num_syms); 4794 sym[sym_idx].SetID(synthetic_sym_id++); 4795 sym[sym_idx].GetMangled() = Mangled(e.entry.name); 4796 sym[sym_idx].SetType(eSymbolTypeReExported); 4797 sym[sym_idx].SetIsSynthetic(true); 4798 sym[sym_idx].SetReExportedSymbolName(e.entry.import_name); 4799 if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) { 4800 sym[sym_idx].SetReExportedSymbolSharedLibrary( 4801 dylib_files.GetFileSpecAtIndex(e.entry.other - 1)); 4802 } 4803 ++sym_idx; 4804 } 4805 } 4806 } 4807 } 4808 } 4809 4810 void ObjectFileMachO::Dump(Stream *s) { 4811 ModuleSP module_sp(GetModule()); 4812 if (module_sp) { 4813 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 4814 s->Printf("%p: ", static_cast<void *>(this)); 4815 s->Indent(); 4816 if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64) 4817 s->PutCString("ObjectFileMachO64"); 4818 else 4819 s->PutCString("ObjectFileMachO32"); 4820 4821 *s << ", file = '" << m_file; 4822 ModuleSpecList all_specs; 4823 ModuleSpec base_spec; 4824 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic), 4825 base_spec, all_specs); 4826 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 4827 *s << "', triple"; 4828 if (e) 4829 s->Printf("[%d]", i); 4830 *s << " = "; 4831 *s << all_specs.GetModuleSpecRefAtIndex(i) 4832 .GetArchitecture() 4833 .GetTriple() 4834 .getTriple(); 4835 } 4836 *s << "\n"; 4837 SectionList *sections = GetSectionList(); 4838 if (sections) 4839 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, 4840 UINT32_MAX); 4841 4842 if (m_symtab_up) 4843 m_symtab_up->Dump(s, nullptr, eSortOrderNone); 4844 } 4845 } 4846 4847 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header, 4848 const lldb_private::DataExtractor &data, 4849 lldb::offset_t lc_offset) { 4850 uint32_t i; 4851 llvm::MachO::uuid_command load_cmd; 4852 4853 lldb::offset_t offset = lc_offset; 4854 for (i = 0; i < header.ncmds; ++i) { 4855 const lldb::offset_t cmd_offset = offset; 4856 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 4857 break; 4858 4859 if (load_cmd.cmd == LC_UUID) { 4860 const uint8_t *uuid_bytes = data.PeekData(offset, 16); 4861 4862 if (uuid_bytes) { 4863 // OpenCL on Mac OS X uses the same UUID for each of its object files. 4864 // We pretend these object files have no UUID to prevent crashing. 4865 4866 const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8, 4867 0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63, 4868 0xbb, 0x14, 0xf0, 0x0d}; 4869 4870 if (!memcmp(uuid_bytes, opencl_uuid, 16)) 4871 return UUID(); 4872 4873 return UUID(uuid_bytes, 16); 4874 } 4875 return UUID(); 4876 } 4877 offset = cmd_offset + load_cmd.cmdsize; 4878 } 4879 return UUID(); 4880 } 4881 4882 static llvm::StringRef GetOSName(uint32_t cmd) { 4883 switch (cmd) { 4884 case llvm::MachO::LC_VERSION_MIN_IPHONEOS: 4885 return llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4886 case llvm::MachO::LC_VERSION_MIN_MACOSX: 4887 return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX); 4888 case llvm::MachO::LC_VERSION_MIN_TVOS: 4889 return llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4890 case llvm::MachO::LC_VERSION_MIN_WATCHOS: 4891 return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4892 default: 4893 llvm_unreachable("unexpected LC_VERSION load command"); 4894 } 4895 } 4896 4897 namespace { 4898 struct OSEnv { 4899 llvm::StringRef os_type; 4900 llvm::StringRef environment; 4901 OSEnv(uint32_t cmd) { 4902 switch (cmd) { 4903 case llvm::MachO::PLATFORM_MACOS: 4904 os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX); 4905 return; 4906 case llvm::MachO::PLATFORM_IOS: 4907 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4908 return; 4909 case llvm::MachO::PLATFORM_TVOS: 4910 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4911 return; 4912 case llvm::MachO::PLATFORM_WATCHOS: 4913 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4914 return; 4915 case llvm::MachO::PLATFORM_BRIDGEOS: 4916 os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS); 4917 return; 4918 case llvm::MachO::PLATFORM_DRIVERKIT: 4919 os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit); 4920 return; 4921 case llvm::MachO::PLATFORM_MACCATALYST: 4922 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4923 environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI); 4924 return; 4925 case llvm::MachO::PLATFORM_IOSSIMULATOR: 4926 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS); 4927 environment = 4928 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4929 return; 4930 case llvm::MachO::PLATFORM_TVOSSIMULATOR: 4931 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS); 4932 environment = 4933 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4934 return; 4935 case llvm::MachO::PLATFORM_WATCHOSSIMULATOR: 4936 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS); 4937 environment = 4938 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4939 return; 4940 case llvm::MachO::PLATFORM_XROS: 4941 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS); 4942 return; 4943 case llvm::MachO::PLATFORM_XROS_SIMULATOR: 4944 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS); 4945 environment = 4946 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator); 4947 return; 4948 default: { 4949 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 4950 LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION"); 4951 } 4952 } 4953 } 4954 }; 4955 4956 struct MinOS { 4957 uint32_t major_version, minor_version, patch_version; 4958 MinOS(uint32_t version) 4959 : major_version(version >> 16), minor_version((version >> 8) & 0xffu), 4960 patch_version(version & 0xffu) {} 4961 }; 4962 } // namespace 4963 4964 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, 4965 const lldb_private::DataExtractor &data, 4966 lldb::offset_t lc_offset, 4967 ModuleSpec &base_spec, 4968 lldb_private::ModuleSpecList &all_specs) { 4969 auto &base_arch = base_spec.GetArchitecture(); 4970 base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype); 4971 if (!base_arch.IsValid()) 4972 return; 4973 4974 bool found_any = false; 4975 auto add_triple = [&](const llvm::Triple &triple) { 4976 auto spec = base_spec; 4977 spec.GetArchitecture().GetTriple() = triple; 4978 if (spec.GetArchitecture().IsValid()) { 4979 spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset); 4980 all_specs.Append(spec); 4981 found_any = true; 4982 } 4983 }; 4984 4985 // Set OS to an unspecified unknown or a "*" so it can match any OS 4986 llvm::Triple base_triple = base_arch.GetTriple(); 4987 base_triple.setOS(llvm::Triple::UnknownOS); 4988 base_triple.setOSName(llvm::StringRef()); 4989 4990 if (header.filetype == MH_PRELOAD) { 4991 if (header.cputype == CPU_TYPE_ARM) { 4992 // If this is a 32-bit arm binary, and it's a standalone binary, force 4993 // the Vendor to Apple so we don't accidentally pick up the generic 4994 // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the 4995 // frame pointer register; most other armv7 ABIs use a combination of 4996 // r7 and r11. 4997 base_triple.setVendor(llvm::Triple::Apple); 4998 } else { 4999 // Set vendor to an unspecified unknown or a "*" so it can match any 5000 // vendor This is required for correct behavior of EFI debugging on 5001 // x86_64 5002 base_triple.setVendor(llvm::Triple::UnknownVendor); 5003 base_triple.setVendorName(llvm::StringRef()); 5004 } 5005 return add_triple(base_triple); 5006 } 5007 5008 llvm::MachO::load_command load_cmd; 5009 5010 // See if there is an LC_VERSION_MIN_* load command that can give 5011 // us the OS type. 5012 lldb::offset_t offset = lc_offset; 5013 for (uint32_t i = 0; i < header.ncmds; ++i) { 5014 const lldb::offset_t cmd_offset = offset; 5015 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 5016 break; 5017 5018 llvm::MachO::version_min_command version_min; 5019 switch (load_cmd.cmd) { 5020 case llvm::MachO::LC_VERSION_MIN_MACOSX: 5021 case llvm::MachO::LC_VERSION_MIN_IPHONEOS: 5022 case llvm::MachO::LC_VERSION_MIN_TVOS: 5023 case llvm::MachO::LC_VERSION_MIN_WATCHOS: { 5024 if (load_cmd.cmdsize != sizeof(version_min)) 5025 break; 5026 if (data.ExtractBytes(cmd_offset, sizeof(version_min), 5027 data.GetByteOrder(), &version_min) == 0) 5028 break; 5029 MinOS min_os(version_min.version); 5030 llvm::SmallString<32> os_name; 5031 llvm::raw_svector_ostream os(os_name); 5032 os << GetOSName(load_cmd.cmd) << min_os.major_version << '.' 5033 << min_os.minor_version << '.' << min_os.patch_version; 5034 5035 auto triple = base_triple; 5036 triple.setOSName(os.str()); 5037 5038 // Disambiguate legacy simulator platforms. 5039 if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX && 5040 (base_triple.getArch() == llvm::Triple::x86_64 || 5041 base_triple.getArch() == llvm::Triple::x86)) { 5042 // The combination of legacy LC_VERSION_MIN load command and 5043 // x86 architecture always indicates a simulator environment. 5044 // The combination of LC_VERSION_MIN and arm architecture only 5045 // appears for native binaries. Back-deploying simulator 5046 // binaries on Apple Silicon Macs use the modern unambigous 5047 // LC_BUILD_VERSION load commands; no special handling required. 5048 triple.setEnvironment(llvm::Triple::Simulator); 5049 } 5050 add_triple(triple); 5051 break; 5052 } 5053 default: 5054 break; 5055 } 5056 5057 offset = cmd_offset + load_cmd.cmdsize; 5058 } 5059 5060 // See if there are LC_BUILD_VERSION load commands that can give 5061 // us the OS type. 5062 offset = lc_offset; 5063 for (uint32_t i = 0; i < header.ncmds; ++i) { 5064 const lldb::offset_t cmd_offset = offset; 5065 if (data.GetU32(&offset, &load_cmd, 2) == nullptr) 5066 break; 5067 5068 do { 5069 if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) { 5070 llvm::MachO::build_version_command build_version; 5071 if (load_cmd.cmdsize < sizeof(build_version)) { 5072 // Malformed load command. 5073 break; 5074 } 5075 if (data.ExtractBytes(cmd_offset, sizeof(build_version), 5076 data.GetByteOrder(), &build_version) == 0) 5077 break; 5078 MinOS min_os(build_version.minos); 5079 OSEnv os_env(build_version.platform); 5080 llvm::SmallString<16> os_name; 5081 llvm::raw_svector_ostream os(os_name); 5082 os << os_env.os_type << min_os.major_version << '.' 5083 << min_os.minor_version << '.' << min_os.patch_version; 5084 auto triple = base_triple; 5085 triple.setOSName(os.str()); 5086 os_name.clear(); 5087 if (!os_env.environment.empty()) 5088 triple.setEnvironmentName(os_env.environment); 5089 add_triple(triple); 5090 } 5091 } while (false); 5092 offset = cmd_offset + load_cmd.cmdsize; 5093 } 5094 5095 if (!found_any) { 5096 add_triple(base_triple); 5097 } 5098 } 5099 5100 ArchSpec ObjectFileMachO::GetArchitecture( 5101 ModuleSP module_sp, const llvm::MachO::mach_header &header, 5102 const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) { 5103 ModuleSpecList all_specs; 5104 ModuleSpec base_spec; 5105 GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic), 5106 base_spec, all_specs); 5107 5108 // If the object file offers multiple alternative load commands, 5109 // pick the one that matches the module. 5110 if (module_sp) { 5111 const ArchSpec &module_arch = module_sp->GetArchitecture(); 5112 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) { 5113 ArchSpec mach_arch = 5114 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture(); 5115 if (module_arch.IsCompatibleMatch(mach_arch)) 5116 return mach_arch; 5117 } 5118 } 5119 5120 // Return the first arch we found. 5121 if (all_specs.GetSize() == 0) 5122 return {}; 5123 return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture(); 5124 } 5125 5126 UUID ObjectFileMachO::GetUUID() { 5127 ModuleSP module_sp(GetModule()); 5128 if (module_sp) { 5129 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5130 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5131 return GetUUID(m_header, m_data, offset); 5132 } 5133 return UUID(); 5134 } 5135 5136 uint32_t ObjectFileMachO::GetDependentModules(FileSpecList &files) { 5137 ModuleSP module_sp = GetModule(); 5138 if (!module_sp) 5139 return 0; 5140 5141 uint32_t count = 0; 5142 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5143 llvm::MachO::load_command load_cmd; 5144 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5145 std::vector<std::string> rpath_paths; 5146 std::vector<std::string> rpath_relative_paths; 5147 std::vector<std::string> at_exec_relative_paths; 5148 uint32_t i; 5149 for (i = 0; i < m_header.ncmds; ++i) { 5150 const uint32_t cmd_offset = offset; 5151 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5152 break; 5153 5154 switch (load_cmd.cmd) { 5155 case LC_RPATH: 5156 case LC_LOAD_DYLIB: 5157 case LC_LOAD_WEAK_DYLIB: 5158 case LC_REEXPORT_DYLIB: 5159 case LC_LOAD_DYLINKER: 5160 case LC_LOADFVMLIB: 5161 case LC_LOAD_UPWARD_DYLIB: { 5162 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset); 5163 // For LC_LOAD_DYLIB there is an alternate encoding 5164 // which adds a uint32_t `flags` field for `DYLD_USE_*` 5165 // flags. This can be detected by a timestamp field with 5166 // the `DYLIB_USE_MARKER` constant value. 5167 bool is_delayed_init = false; 5168 uint32_t use_command_marker = m_data.GetU32(&offset); 5169 if (use_command_marker == 0x1a741800 /* DYLIB_USE_MARKER */) { 5170 offset += 4; /* uint32_t current_version */ 5171 offset += 4; /* uint32_t compat_version */ 5172 uint32_t flags = m_data.GetU32(&offset); 5173 // If this LC_LOAD_DYLIB is marked delay-init, 5174 // don't report it as a dependent library -- it 5175 // may be loaded in the process at some point, 5176 // but will most likely not be load at launch. 5177 if (flags & 0x08 /* DYLIB_USE_DELAYED_INIT */) 5178 is_delayed_init = true; 5179 } 5180 const char *path = m_data.PeekCStr(name_offset); 5181 if (path && !is_delayed_init) { 5182 if (load_cmd.cmd == LC_RPATH) 5183 rpath_paths.push_back(path); 5184 else { 5185 if (path[0] == '@') { 5186 if (strncmp(path, "@rpath", strlen("@rpath")) == 0) 5187 rpath_relative_paths.push_back(path + strlen("@rpath")); 5188 else if (strncmp(path, "@executable_path", 5189 strlen("@executable_path")) == 0) 5190 at_exec_relative_paths.push_back(path + 5191 strlen("@executable_path")); 5192 } else { 5193 FileSpec file_spec(path); 5194 if (files.AppendIfUnique(file_spec)) 5195 count++; 5196 } 5197 } 5198 } 5199 } break; 5200 5201 default: 5202 break; 5203 } 5204 offset = cmd_offset + load_cmd.cmdsize; 5205 } 5206 5207 FileSpec this_file_spec(m_file); 5208 FileSystem::Instance().Resolve(this_file_spec); 5209 5210 if (!rpath_paths.empty()) { 5211 // Fixup all LC_RPATH values to be absolute paths. 5212 const std::string this_directory = 5213 this_file_spec.GetDirectory().GetString(); 5214 for (auto &rpath : rpath_paths) { 5215 if (llvm::StringRef(rpath).starts_with(g_loader_path)) 5216 rpath = this_directory + rpath.substr(g_loader_path.size()); 5217 else if (llvm::StringRef(rpath).starts_with(g_executable_path)) 5218 rpath = this_directory + rpath.substr(g_executable_path.size()); 5219 } 5220 5221 for (const auto &rpath_relative_path : rpath_relative_paths) { 5222 for (const auto &rpath : rpath_paths) { 5223 std::string path = rpath; 5224 path += rpath_relative_path; 5225 // It is OK to resolve this path because we must find a file on disk 5226 // for us to accept it anyway if it is rpath relative. 5227 FileSpec file_spec(path); 5228 FileSystem::Instance().Resolve(file_spec); 5229 if (FileSystem::Instance().Exists(file_spec) && 5230 files.AppendIfUnique(file_spec)) { 5231 count++; 5232 break; 5233 } 5234 } 5235 } 5236 } 5237 5238 // We may have @executable_paths but no RPATHS. Figure those out here. 5239 // Only do this if this object file is the executable. We have no way to 5240 // get back to the actual executable otherwise, so we won't get the right 5241 // path. 5242 if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) { 5243 FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent(); 5244 for (const auto &at_exec_relative_path : at_exec_relative_paths) { 5245 FileSpec file_spec = 5246 exec_dir.CopyByAppendingPathComponent(at_exec_relative_path); 5247 if (FileSystem::Instance().Exists(file_spec) && 5248 files.AppendIfUnique(file_spec)) 5249 count++; 5250 } 5251 } 5252 return count; 5253 } 5254 5255 lldb_private::Address ObjectFileMachO::GetEntryPointAddress() { 5256 // If the object file is not an executable it can't hold the entry point. 5257 // m_entry_point_address is initialized to an invalid address, so we can just 5258 // return that. If m_entry_point_address is valid it means we've found it 5259 // already, so return the cached value. 5260 5261 if ((!IsExecutable() && !IsDynamicLoader()) || 5262 m_entry_point_address.IsValid()) { 5263 return m_entry_point_address; 5264 } 5265 5266 // Otherwise, look for the UnixThread or Thread command. The data for the 5267 // Thread command is given in /usr/include/mach-o.h, but it is basically: 5268 // 5269 // uint32_t flavor - this is the flavor argument you would pass to 5270 // thread_get_state 5271 // uint32_t count - this is the count of longs in the thread state data 5272 // struct XXX_thread_state state - this is the structure from 5273 // <machine/thread_status.h> corresponding to the flavor. 5274 // <repeat this trio> 5275 // 5276 // So we just keep reading the various register flavors till we find the GPR 5277 // one, then read the PC out of there. 5278 // FIXME: We will need to have a "RegisterContext data provider" class at some 5279 // point that can get all the registers 5280 // out of data in this form & attach them to a given thread. That should 5281 // underlie the MacOS X User process plugin, and we'll also need it for the 5282 // MacOS X Core File process plugin. When we have that we can also use it 5283 // here. 5284 // 5285 // For now we hard-code the offsets and flavors we need: 5286 // 5287 // 5288 5289 ModuleSP module_sp(GetModule()); 5290 if (module_sp) { 5291 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5292 llvm::MachO::load_command load_cmd; 5293 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5294 uint32_t i; 5295 lldb::addr_t start_address = LLDB_INVALID_ADDRESS; 5296 bool done = false; 5297 5298 for (i = 0; i < m_header.ncmds; ++i) { 5299 const lldb::offset_t cmd_offset = offset; 5300 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5301 break; 5302 5303 switch (load_cmd.cmd) { 5304 case LC_UNIXTHREAD: 5305 case LC_THREAD: { 5306 while (offset < cmd_offset + load_cmd.cmdsize) { 5307 uint32_t flavor = m_data.GetU32(&offset); 5308 uint32_t count = m_data.GetU32(&offset); 5309 if (count == 0) { 5310 // We've gotten off somehow, log and exit; 5311 return m_entry_point_address; 5312 } 5313 5314 switch (m_header.cputype) { 5315 case llvm::MachO::CPU_TYPE_ARM: 5316 if (flavor == 1 || 5317 flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32 5318 // from mach/arm/thread_status.h 5319 { 5320 offset += 60; // This is the offset of pc in the GPR thread state 5321 // data structure. 5322 start_address = m_data.GetU32(&offset); 5323 done = true; 5324 } 5325 break; 5326 case llvm::MachO::CPU_TYPE_ARM64: 5327 case llvm::MachO::CPU_TYPE_ARM64_32: 5328 if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h 5329 { 5330 offset += 256; // This is the offset of pc in the GPR thread state 5331 // data structure. 5332 start_address = m_data.GetU64(&offset); 5333 done = true; 5334 } 5335 break; 5336 case llvm::MachO::CPU_TYPE_I386: 5337 if (flavor == 5338 1) // x86_THREAD_STATE32 from mach/i386/thread_status.h 5339 { 5340 offset += 40; // This is the offset of eip in the GPR thread state 5341 // data structure. 5342 start_address = m_data.GetU32(&offset); 5343 done = true; 5344 } 5345 break; 5346 case llvm::MachO::CPU_TYPE_X86_64: 5347 if (flavor == 5348 4) // x86_THREAD_STATE64 from mach/i386/thread_status.h 5349 { 5350 offset += 16 * 8; // This is the offset of rip in the GPR thread 5351 // state data structure. 5352 start_address = m_data.GetU64(&offset); 5353 done = true; 5354 } 5355 break; 5356 default: 5357 return m_entry_point_address; 5358 } 5359 // Haven't found the GPR flavor yet, skip over the data for this 5360 // flavor: 5361 if (done) 5362 break; 5363 offset += count * 4; 5364 } 5365 } break; 5366 case LC_MAIN: { 5367 uint64_t entryoffset = m_data.GetU64(&offset); 5368 SectionSP text_segment_sp = 5369 GetSectionList()->FindSectionByName(GetSegmentNameTEXT()); 5370 if (text_segment_sp) { 5371 done = true; 5372 start_address = text_segment_sp->GetFileAddress() + entryoffset; 5373 } 5374 } break; 5375 5376 default: 5377 break; 5378 } 5379 if (done) 5380 break; 5381 5382 // Go to the next load command: 5383 offset = cmd_offset + load_cmd.cmdsize; 5384 } 5385 5386 if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) { 5387 if (GetSymtab()) { 5388 Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType( 5389 ConstString("_dyld_start"), SymbolType::eSymbolTypeCode, 5390 Symtab::eDebugAny, Symtab::eVisibilityAny); 5391 if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) { 5392 start_address = dyld_start_sym->GetAddress().GetFileAddress(); 5393 } 5394 } 5395 } 5396 5397 if (start_address != LLDB_INVALID_ADDRESS) { 5398 // We got the start address from the load commands, so now resolve that 5399 // address in the sections of this ObjectFile: 5400 if (!m_entry_point_address.ResolveAddressUsingFileSections( 5401 start_address, GetSectionList())) { 5402 m_entry_point_address.Clear(); 5403 } 5404 } else { 5405 // We couldn't read the UnixThread load command - maybe it wasn't there. 5406 // As a fallback look for the "start" symbol in the main executable. 5407 5408 ModuleSP module_sp(GetModule()); 5409 5410 if (module_sp) { 5411 SymbolContextList contexts; 5412 SymbolContext context; 5413 module_sp->FindSymbolsWithNameAndType(ConstString("start"), 5414 eSymbolTypeCode, contexts); 5415 if (contexts.GetSize()) { 5416 if (contexts.GetContextAtIndex(0, context)) 5417 m_entry_point_address = context.symbol->GetAddress(); 5418 } 5419 } 5420 } 5421 } 5422 5423 return m_entry_point_address; 5424 } 5425 5426 lldb_private::Address ObjectFileMachO::GetBaseAddress() { 5427 lldb_private::Address header_addr; 5428 SectionList *section_list = GetSectionList(); 5429 if (section_list) { 5430 SectionSP text_segment_sp( 5431 section_list->FindSectionByName(GetSegmentNameTEXT())); 5432 if (text_segment_sp) { 5433 header_addr.SetSection(text_segment_sp); 5434 header_addr.SetOffset(0); 5435 } 5436 } 5437 return header_addr; 5438 } 5439 5440 uint32_t ObjectFileMachO::GetNumThreadContexts() { 5441 ModuleSP module_sp(GetModule()); 5442 if (module_sp) { 5443 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5444 if (!m_thread_context_offsets_valid) { 5445 m_thread_context_offsets_valid = true; 5446 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5447 FileRangeArray::Entry file_range; 5448 llvm::MachO::thread_command thread_cmd; 5449 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5450 const uint32_t cmd_offset = offset; 5451 if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr) 5452 break; 5453 5454 if (thread_cmd.cmd == LC_THREAD) { 5455 file_range.SetRangeBase(offset); 5456 file_range.SetByteSize(thread_cmd.cmdsize - 8); 5457 m_thread_context_offsets.Append(file_range); 5458 } 5459 offset = cmd_offset + thread_cmd.cmdsize; 5460 } 5461 } 5462 } 5463 return m_thread_context_offsets.GetSize(); 5464 } 5465 5466 std::vector<std::tuple<offset_t, offset_t>> 5467 ObjectFileMachO::FindLC_NOTEByName(std::string name) { 5468 std::vector<std::tuple<offset_t, offset_t>> results; 5469 ModuleSP module_sp(GetModule()); 5470 if (module_sp) { 5471 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5472 5473 offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5474 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5475 const uint32_t cmd_offset = offset; 5476 llvm::MachO::load_command lc = {}; 5477 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr) 5478 break; 5479 if (lc.cmd == LC_NOTE) { 5480 char data_owner[17]; 5481 m_data.CopyData(offset, 16, data_owner); 5482 data_owner[16] = '\0'; 5483 offset += 16; 5484 5485 if (name == data_owner) { 5486 offset_t payload_offset = m_data.GetU64_unchecked(&offset); 5487 offset_t payload_size = m_data.GetU64_unchecked(&offset); 5488 results.push_back({payload_offset, payload_size}); 5489 } 5490 } 5491 offset = cmd_offset + lc.cmdsize; 5492 } 5493 } 5494 return results; 5495 } 5496 5497 std::string ObjectFileMachO::GetIdentifierString() { 5498 Log *log( 5499 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader)); 5500 ModuleSP module_sp(GetModule()); 5501 if (module_sp) { 5502 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5503 5504 auto lc_notes = FindLC_NOTEByName("kern ver str"); 5505 for (auto lc_note : lc_notes) { 5506 offset_t payload_offset = std::get<0>(lc_note); 5507 offset_t payload_size = std::get<1>(lc_note); 5508 uint32_t version; 5509 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) { 5510 if (version == 1) { 5511 uint32_t strsize = payload_size - sizeof(uint32_t); 5512 std::string result(strsize, '\0'); 5513 m_data.CopyData(payload_offset, strsize, result.data()); 5514 LLDB_LOGF(log, "LC_NOTE 'kern ver str' found with text '%s'", 5515 result.c_str()); 5516 return result; 5517 } 5518 } 5519 } 5520 5521 // Second, make a pass over the load commands looking for an obsolete 5522 // LC_IDENT load command. 5523 offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5524 for (uint32_t i = 0; i < m_header.ncmds; ++i) { 5525 const uint32_t cmd_offset = offset; 5526 llvm::MachO::ident_command ident_command; 5527 if (m_data.GetU32(&offset, &ident_command, 2) == nullptr) 5528 break; 5529 if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) { 5530 std::string result(ident_command.cmdsize, '\0'); 5531 if (m_data.CopyData(offset, ident_command.cmdsize, result.data()) == 5532 ident_command.cmdsize) { 5533 LLDB_LOGF(log, "LC_IDENT found with text '%s'", result.c_str()); 5534 return result; 5535 } 5536 } 5537 offset = cmd_offset + ident_command.cmdsize; 5538 } 5539 } 5540 return {}; 5541 } 5542 5543 AddressableBits ObjectFileMachO::GetAddressableBits() { 5544 AddressableBits addressable_bits; 5545 5546 Log *log(GetLog(LLDBLog::Process)); 5547 ModuleSP module_sp(GetModule()); 5548 if (module_sp) { 5549 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5550 auto lc_notes = FindLC_NOTEByName("addrable bits"); 5551 for (auto lc_note : lc_notes) { 5552 offset_t payload_offset = std::get<0>(lc_note); 5553 uint32_t version; 5554 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) { 5555 if (version == 3) { 5556 uint32_t num_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5557 addressable_bits.SetAddressableBits(num_addr_bits); 5558 LLDB_LOGF(log, 5559 "LC_NOTE 'addrable bits' v3 found, value %d " 5560 "bits", 5561 num_addr_bits); 5562 } 5563 if (version == 4) { 5564 uint32_t lo_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5565 uint32_t hi_addr_bits = m_data.GetU32_unchecked(&payload_offset); 5566 5567 if (lo_addr_bits == hi_addr_bits) 5568 addressable_bits.SetAddressableBits(lo_addr_bits); 5569 else 5570 addressable_bits.SetAddressableBits(lo_addr_bits, hi_addr_bits); 5571 LLDB_LOGF(log, "LC_NOTE 'addrable bits' v4 found, value %d & %d bits", 5572 lo_addr_bits, hi_addr_bits); 5573 } 5574 } 5575 } 5576 } 5577 return addressable_bits; 5578 } 5579 5580 bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value, 5581 bool &value_is_offset, 5582 UUID &uuid, 5583 ObjectFile::BinaryType &type) { 5584 Log *log( 5585 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader)); 5586 value = LLDB_INVALID_ADDRESS; 5587 value_is_offset = false; 5588 uuid.Clear(); 5589 uint32_t log2_pagesize = 0; // not currently passed up to caller 5590 uint32_t platform = 0; // not currently passed up to caller 5591 ModuleSP module_sp(GetModule()); 5592 if (module_sp) { 5593 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5594 5595 auto lc_notes = FindLC_NOTEByName("main bin spec"); 5596 for (auto lc_note : lc_notes) { 5597 offset_t payload_offset = std::get<0>(lc_note); 5598 5599 // struct main_bin_spec 5600 // { 5601 // uint32_t version; // currently 2 5602 // uint32_t type; // 0 == unspecified, 1 == kernel, 5603 // // 2 == user process, 5604 // // 3 == standalone binary 5605 // uint64_t address; // UINT64_MAX if address not specified 5606 // uint64_t slide; // slide, UINT64_MAX if unspecified 5607 // // 0 if no slide needs to be applied to 5608 // // file address 5609 // uuid_t uuid; // all zero's if uuid not specified 5610 // uint32_t log2_pagesize; // process page size in log base 2, 5611 // // e.g. 4k pages are 12. 5612 // // 0 for unspecified 5613 // uint32_t platform; // The Mach-O platform for this corefile. 5614 // // 0 for unspecified. 5615 // // The values are defined in 5616 // // <mach-o/loader.h>, PLATFORM_*. 5617 // } __attribute((packed)); 5618 5619 // "main bin spec" (main binary specification) data payload is 5620 // formatted: 5621 // uint32_t version [currently 1] 5622 // uint32_t type [0 == unspecified, 1 == kernel, 5623 // 2 == user process, 3 == firmware ] 5624 // uint64_t address [ UINT64_MAX if address not specified ] 5625 // uuid_t uuid [ all zero's if uuid not specified ] 5626 // uint32_t log2_pagesize [ process page size in log base 5627 // 2, e.g. 4k pages are 12. 5628 // 0 for unspecified ] 5629 // uint32_t unused [ for alignment ] 5630 5631 uint32_t version; 5632 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr && 5633 version <= 2) { 5634 uint32_t binspec_type = 0; 5635 uuid_t raw_uuid; 5636 memset(raw_uuid, 0, sizeof(uuid_t)); 5637 5638 if (!m_data.GetU32(&payload_offset, &binspec_type, 1)) 5639 return false; 5640 if (!m_data.GetU64(&payload_offset, &value, 1)) 5641 return false; 5642 uint64_t slide = LLDB_INVALID_ADDRESS; 5643 if (version > 1 && !m_data.GetU64(&payload_offset, &slide, 1)) 5644 return false; 5645 if (value == LLDB_INVALID_ADDRESS && slide != LLDB_INVALID_ADDRESS) { 5646 value = slide; 5647 value_is_offset = true; 5648 } 5649 5650 if (m_data.CopyData(payload_offset, sizeof(uuid_t), raw_uuid) != 0) { 5651 uuid = UUID(raw_uuid, sizeof(uuid_t)); 5652 // convert the "main bin spec" type into our 5653 // ObjectFile::BinaryType enum 5654 const char *typestr = "unrecognized type"; 5655 switch (binspec_type) { 5656 case 0: 5657 type = eBinaryTypeUnknown; 5658 typestr = "uknown"; 5659 break; 5660 case 1: 5661 type = eBinaryTypeKernel; 5662 typestr = "xnu kernel"; 5663 break; 5664 case 2: 5665 type = eBinaryTypeUser; 5666 typestr = "userland dyld"; 5667 break; 5668 case 3: 5669 type = eBinaryTypeStandalone; 5670 typestr = "standalone"; 5671 break; 5672 } 5673 LLDB_LOGF(log, 5674 "LC_NOTE 'main bin spec' found, version %d type %d " 5675 "(%s), value 0x%" PRIx64 " value-is-slide==%s uuid %s", 5676 version, type, typestr, value, 5677 value_is_offset ? "true" : "false", 5678 uuid.GetAsString().c_str()); 5679 if (!m_data.GetU32(&payload_offset, &log2_pagesize, 1)) 5680 return false; 5681 if (version > 1 && !m_data.GetU32(&payload_offset, &platform, 1)) 5682 return false; 5683 return true; 5684 } 5685 } 5686 } 5687 } 5688 return false; 5689 } 5690 5691 bool ObjectFileMachO::GetCorefileThreadExtraInfos( 5692 std::vector<lldb::tid_t> &tids) { 5693 tids.clear(); 5694 ModuleSP module_sp(GetModule()); 5695 if (module_sp) { 5696 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5697 5698 Log *log(GetLog(LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread)); 5699 auto lc_notes = FindLC_NOTEByName("process metadata"); 5700 for (auto lc_note : lc_notes) { 5701 offset_t payload_offset = std::get<0>(lc_note); 5702 offset_t strsize = std::get<1>(lc_note); 5703 std::string buf(strsize, '\0'); 5704 if (m_data.CopyData(payload_offset, strsize, buf.data()) != strsize) { 5705 LLDB_LOGF(log, 5706 "Unable to read %" PRIu64 5707 " bytes of 'process metadata' LC_NOTE JSON contents", 5708 strsize); 5709 return false; 5710 } 5711 while (buf.back() == '\0') 5712 buf.resize(buf.size() - 1); 5713 StructuredData::ObjectSP object_sp = StructuredData::ParseJSON(buf); 5714 StructuredData::Dictionary *dict = object_sp->GetAsDictionary(); 5715 if (!dict) { 5716 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not " 5717 "get a dictionary."); 5718 return false; 5719 } 5720 StructuredData::Array *threads; 5721 if (!dict->GetValueForKeyAsArray("threads", threads) || !threads) { 5722 LLDB_LOGF(log, 5723 "'process metadata' LC_NOTE does not have a 'threads' key"); 5724 return false; 5725 } 5726 if (threads->GetSize() != GetNumThreadContexts()) { 5727 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, number of " 5728 "threads does not match number of LC_THREADS."); 5729 return false; 5730 } 5731 const size_t num_threads = threads->GetSize(); 5732 for (size_t i = 0; i < num_threads; i++) { 5733 std::optional<StructuredData::Dictionary *> maybe_thread = 5734 threads->GetItemAtIndexAsDictionary(i); 5735 if (!maybe_thread) { 5736 LLDB_LOGF(log, 5737 "Unable to read 'process metadata' LC_NOTE, threads " 5738 "array does not have a dictionary at index %zu.", 5739 i); 5740 return false; 5741 } 5742 StructuredData::Dictionary *thread = *maybe_thread; 5743 lldb::tid_t tid = LLDB_INVALID_THREAD_ID; 5744 if (thread->GetValueForKeyAsInteger<lldb::tid_t>("thread_id", tid)) 5745 if (tid == 0) 5746 tid = LLDB_INVALID_THREAD_ID; 5747 tids.push_back(tid); 5748 } 5749 5750 if (log) { 5751 StreamString logmsg; 5752 logmsg.Printf("LC_NOTE 'process metadata' found: "); 5753 dict->Dump(logmsg, /* pretty_print */ false); 5754 LLDB_LOGF(log, "%s", logmsg.GetData()); 5755 } 5756 return true; 5757 } 5758 } 5759 return false; 5760 } 5761 5762 lldb::RegisterContextSP 5763 ObjectFileMachO::GetThreadContextAtIndex(uint32_t idx, 5764 lldb_private::Thread &thread) { 5765 lldb::RegisterContextSP reg_ctx_sp; 5766 5767 ModuleSP module_sp(GetModule()); 5768 if (module_sp) { 5769 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5770 if (!m_thread_context_offsets_valid) 5771 GetNumThreadContexts(); 5772 5773 const FileRangeArray::Entry *thread_context_file_range = 5774 m_thread_context_offsets.GetEntryAtIndex(idx); 5775 if (thread_context_file_range) { 5776 5777 DataExtractor data(m_data, thread_context_file_range->GetRangeBase(), 5778 thread_context_file_range->GetByteSize()); 5779 5780 switch (m_header.cputype) { 5781 case llvm::MachO::CPU_TYPE_ARM64: 5782 case llvm::MachO::CPU_TYPE_ARM64_32: 5783 reg_ctx_sp = 5784 std::make_shared<RegisterContextDarwin_arm64_Mach>(thread, data); 5785 break; 5786 5787 case llvm::MachO::CPU_TYPE_ARM: 5788 reg_ctx_sp = 5789 std::make_shared<RegisterContextDarwin_arm_Mach>(thread, data); 5790 break; 5791 5792 case llvm::MachO::CPU_TYPE_I386: 5793 reg_ctx_sp = 5794 std::make_shared<RegisterContextDarwin_i386_Mach>(thread, data); 5795 break; 5796 5797 case llvm::MachO::CPU_TYPE_X86_64: 5798 reg_ctx_sp = 5799 std::make_shared<RegisterContextDarwin_x86_64_Mach>(thread, data); 5800 break; 5801 } 5802 } 5803 } 5804 return reg_ctx_sp; 5805 } 5806 5807 ObjectFile::Type ObjectFileMachO::CalculateType() { 5808 switch (m_header.filetype) { 5809 case MH_OBJECT: // 0x1u 5810 if (GetAddressByteSize() == 4) { 5811 // 32 bit kexts are just object files, but they do have a valid 5812 // UUID load command. 5813 if (GetUUID()) { 5814 // this checking for the UUID load command is not enough we could 5815 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as 5816 // this is required of kexts 5817 if (m_strata == eStrataInvalid) 5818 m_strata = eStrataKernel; 5819 return eTypeSharedLibrary; 5820 } 5821 } 5822 return eTypeObjectFile; 5823 5824 case MH_EXECUTE: 5825 return eTypeExecutable; // 0x2u 5826 case MH_FVMLIB: 5827 return eTypeSharedLibrary; // 0x3u 5828 case MH_CORE: 5829 return eTypeCoreFile; // 0x4u 5830 case MH_PRELOAD: 5831 return eTypeSharedLibrary; // 0x5u 5832 case MH_DYLIB: 5833 return eTypeSharedLibrary; // 0x6u 5834 case MH_DYLINKER: 5835 return eTypeDynamicLinker; // 0x7u 5836 case MH_BUNDLE: 5837 return eTypeSharedLibrary; // 0x8u 5838 case MH_DYLIB_STUB: 5839 return eTypeStubLibrary; // 0x9u 5840 case MH_DSYM: 5841 return eTypeDebugInfo; // 0xAu 5842 case MH_KEXT_BUNDLE: 5843 return eTypeSharedLibrary; // 0xBu 5844 default: 5845 break; 5846 } 5847 return eTypeUnknown; 5848 } 5849 5850 ObjectFile::Strata ObjectFileMachO::CalculateStrata() { 5851 switch (m_header.filetype) { 5852 case MH_OBJECT: // 0x1u 5853 { 5854 // 32 bit kexts are just object files, but they do have a valid 5855 // UUID load command. 5856 if (GetUUID()) { 5857 // this checking for the UUID load command is not enough we could 5858 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as 5859 // this is required of kexts 5860 if (m_type == eTypeInvalid) 5861 m_type = eTypeSharedLibrary; 5862 5863 return eStrataKernel; 5864 } 5865 } 5866 return eStrataUnknown; 5867 5868 case MH_EXECUTE: // 0x2u 5869 // Check for the MH_DYLDLINK bit in the flags 5870 if (m_header.flags & MH_DYLDLINK) { 5871 return eStrataUser; 5872 } else { 5873 SectionList *section_list = GetSectionList(); 5874 if (section_list) { 5875 static ConstString g_kld_section_name("__KLD"); 5876 if (section_list->FindSectionByName(g_kld_section_name)) 5877 return eStrataKernel; 5878 } 5879 } 5880 return eStrataRawImage; 5881 5882 case MH_FVMLIB: 5883 return eStrataUser; // 0x3u 5884 case MH_CORE: 5885 return eStrataUnknown; // 0x4u 5886 case MH_PRELOAD: 5887 return eStrataRawImage; // 0x5u 5888 case MH_DYLIB: 5889 return eStrataUser; // 0x6u 5890 case MH_DYLINKER: 5891 return eStrataUser; // 0x7u 5892 case MH_BUNDLE: 5893 return eStrataUser; // 0x8u 5894 case MH_DYLIB_STUB: 5895 return eStrataUser; // 0x9u 5896 case MH_DSYM: 5897 return eStrataUnknown; // 0xAu 5898 case MH_KEXT_BUNDLE: 5899 return eStrataKernel; // 0xBu 5900 default: 5901 break; 5902 } 5903 return eStrataUnknown; 5904 } 5905 5906 llvm::VersionTuple ObjectFileMachO::GetVersion() { 5907 ModuleSP module_sp(GetModule()); 5908 if (module_sp) { 5909 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5910 llvm::MachO::dylib_command load_cmd; 5911 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); 5912 uint32_t version_cmd = 0; 5913 uint64_t version = 0; 5914 uint32_t i; 5915 for (i = 0; i < m_header.ncmds; ++i) { 5916 const lldb::offset_t cmd_offset = offset; 5917 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr) 5918 break; 5919 5920 if (load_cmd.cmd == LC_ID_DYLIB) { 5921 if (version_cmd == 0) { 5922 version_cmd = load_cmd.cmd; 5923 if (m_data.GetU32(&offset, &load_cmd.dylib, 4) == nullptr) 5924 break; 5925 version = load_cmd.dylib.current_version; 5926 } 5927 break; // Break for now unless there is another more complete version 5928 // number load command in the future. 5929 } 5930 offset = cmd_offset + load_cmd.cmdsize; 5931 } 5932 5933 if (version_cmd == LC_ID_DYLIB) { 5934 unsigned major = (version & 0xFFFF0000ull) >> 16; 5935 unsigned minor = (version & 0x0000FF00ull) >> 8; 5936 unsigned subminor = (version & 0x000000FFull); 5937 return llvm::VersionTuple(major, minor, subminor); 5938 } 5939 } 5940 return llvm::VersionTuple(); 5941 } 5942 5943 ArchSpec ObjectFileMachO::GetArchitecture() { 5944 ModuleSP module_sp(GetModule()); 5945 ArchSpec arch; 5946 if (module_sp) { 5947 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); 5948 5949 return GetArchitecture(module_sp, m_header, m_data, 5950 MachHeaderSizeFromMagic(m_header.magic)); 5951 } 5952 return arch; 5953 } 5954 5955 void ObjectFileMachO::GetProcessSharedCacheUUID(Process *process, 5956 addr_t &base_addr, UUID &uuid) { 5957 uuid.Clear(); 5958 base_addr = LLDB_INVALID_ADDRESS; 5959 if (process && process->GetDynamicLoader()) { 5960 DynamicLoader *dl = process->GetDynamicLoader(); 5961 LazyBool using_shared_cache; 5962 LazyBool private_shared_cache; 5963 dl->GetSharedCacheInformation(base_addr, uuid, using_shared_cache, 5964 private_shared_cache); 5965 } 5966 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 5967 LLDB_LOGF( 5968 log, 5969 "inferior process shared cache has a UUID of %s, base address 0x%" PRIx64, 5970 uuid.GetAsString().c_str(), base_addr); 5971 } 5972 5973 // From dyld SPI header dyld_process_info.h 5974 typedef void *dyld_process_info; 5975 struct lldb_copy__dyld_process_cache_info { 5976 uuid_t cacheUUID; // UUID of cache used by process 5977 uint64_t cacheBaseAddress; // load address of dyld shared cache 5978 bool noCache; // process is running without a dyld cache 5979 bool privateCache; // process is using a private copy of its dyld cache 5980 }; 5981 5982 // #including mach/mach.h pulls in machine.h & CPU_TYPE_ARM etc conflicts with 5983 // llvm enum definitions llvm::MachO::CPU_TYPE_ARM turning them into compile 5984 // errors. So we need to use the actual underlying types of task_t and 5985 // kern_return_t below. 5986 extern "C" unsigned int /*task_t*/ mach_task_self(); 5987 5988 void ObjectFileMachO::GetLLDBSharedCacheUUID(addr_t &base_addr, UUID &uuid) { 5989 uuid.Clear(); 5990 base_addr = LLDB_INVALID_ADDRESS; 5991 5992 #if defined(__APPLE__) 5993 uint8_t *(*dyld_get_all_image_infos)(void); 5994 dyld_get_all_image_infos = 5995 (uint8_t * (*)()) dlsym(RTLD_DEFAULT, "_dyld_get_all_image_infos"); 5996 if (dyld_get_all_image_infos) { 5997 uint8_t *dyld_all_image_infos_address = dyld_get_all_image_infos(); 5998 if (dyld_all_image_infos_address) { 5999 uint32_t *version = (uint32_t *) 6000 dyld_all_image_infos_address; // version <mach-o/dyld_images.h> 6001 if (*version >= 13) { 6002 uuid_t *sharedCacheUUID_address = 0; 6003 int wordsize = sizeof(uint8_t *); 6004 if (wordsize == 8) { 6005 sharedCacheUUID_address = 6006 (uuid_t *)((uint8_t *)dyld_all_image_infos_address + 6007 160); // sharedCacheUUID <mach-o/dyld_images.h> 6008 if (*version >= 15) 6009 base_addr = 6010 *(uint64_t 6011 *)((uint8_t *)dyld_all_image_infos_address + 6012 176); // sharedCacheBaseAddress <mach-o/dyld_images.h> 6013 } else { 6014 sharedCacheUUID_address = 6015 (uuid_t *)((uint8_t *)dyld_all_image_infos_address + 6016 84); // sharedCacheUUID <mach-o/dyld_images.h> 6017 if (*version >= 15) { 6018 base_addr = 0; 6019 base_addr = 6020 *(uint32_t 6021 *)((uint8_t *)dyld_all_image_infos_address + 6022 100); // sharedCacheBaseAddress <mach-o/dyld_images.h> 6023 } 6024 } 6025 uuid = UUID(sharedCacheUUID_address, sizeof(uuid_t)); 6026 } 6027 } 6028 } else { 6029 // Exists in macOS 10.12 and later, iOS 10.0 and later - dyld SPI 6030 dyld_process_info (*dyld_process_info_create)( 6031 unsigned int /* task_t */ task, uint64_t timestamp, 6032 unsigned int /*kern_return_t*/ *kernelError); 6033 void (*dyld_process_info_get_cache)(void *info, void *cacheInfo); 6034 void (*dyld_process_info_release)(dyld_process_info info); 6035 6036 dyld_process_info_create = (void *(*)(unsigned int /* task_t */, uint64_t, 6037 unsigned int /*kern_return_t*/ *)) 6038 dlsym(RTLD_DEFAULT, "_dyld_process_info_create"); 6039 dyld_process_info_get_cache = (void (*)(void *, void *))dlsym( 6040 RTLD_DEFAULT, "_dyld_process_info_get_cache"); 6041 dyld_process_info_release = 6042 (void (*)(void *))dlsym(RTLD_DEFAULT, "_dyld_process_info_release"); 6043 6044 if (dyld_process_info_create && dyld_process_info_get_cache) { 6045 unsigned int /*kern_return_t */ kern_ret; 6046 dyld_process_info process_info = 6047 dyld_process_info_create(::mach_task_self(), 0, &kern_ret); 6048 if (process_info) { 6049 struct lldb_copy__dyld_process_cache_info sc_info; 6050 memset(&sc_info, 0, sizeof(struct lldb_copy__dyld_process_cache_info)); 6051 dyld_process_info_get_cache(process_info, &sc_info); 6052 if (sc_info.cacheBaseAddress != 0) { 6053 base_addr = sc_info.cacheBaseAddress; 6054 uuid = UUID(sc_info.cacheUUID, sizeof(uuid_t)); 6055 } 6056 dyld_process_info_release(process_info); 6057 } 6058 } 6059 } 6060 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process)); 6061 if (log && uuid.IsValid()) 6062 LLDB_LOGF(log, 6063 "lldb's in-memory shared cache has a UUID of %s base address of " 6064 "0x%" PRIx64, 6065 uuid.GetAsString().c_str(), base_addr); 6066 #endif 6067 } 6068 6069 static llvm::VersionTuple FindMinimumVersionInfo(DataExtractor &data, 6070 lldb::offset_t offset, 6071 size_t ncmds) { 6072 for (size_t i = 0; i < ncmds; i++) { 6073 const lldb::offset_t load_cmd_offset = offset; 6074 llvm::MachO::load_command lc = {}; 6075 if (data.GetU32(&offset, &lc.cmd, 2) == nullptr) 6076 break; 6077 6078 uint32_t version = 0; 6079 if (lc.cmd == llvm::MachO::LC_VERSION_MIN_MACOSX || 6080 lc.cmd == llvm::MachO::LC_VERSION_MIN_IPHONEOS || 6081 lc.cmd == llvm::MachO::LC_VERSION_MIN_TVOS || 6082 lc.cmd == llvm::MachO::LC_VERSION_MIN_WATCHOS) { 6083 // struct version_min_command { 6084 // uint32_t cmd; // LC_VERSION_MIN_* 6085 // uint32_t cmdsize; 6086 // uint32_t version; // X.Y.Z encoded in nibbles xxxx.yy.zz 6087 // uint32_t sdk; 6088 // }; 6089 // We want to read version. 6090 version = data.GetU32(&offset); 6091 } else if (lc.cmd == llvm::MachO::LC_BUILD_VERSION) { 6092 // struct build_version_command { 6093 // uint32_t cmd; // LC_BUILD_VERSION 6094 // uint32_t cmdsize; 6095 // uint32_t platform; 6096 // uint32_t minos; // X.Y.Z encoded in nibbles xxxx.yy.zz 6097 // uint32_t sdk; 6098 // uint32_t ntools; 6099 // }; 6100 // We want to read minos. 6101 offset += sizeof(uint32_t); // Skip over platform 6102 version = data.GetU32(&offset); // Extract minos 6103 } 6104 6105 if (version) { 6106 const uint32_t xxxx = version >> 16; 6107 const uint32_t yy = (version >> 8) & 0xffu; 6108 const uint32_t zz = version & 0xffu; 6109 if (xxxx) 6110 return llvm::VersionTuple(xxxx, yy, zz); 6111 } 6112 offset = load_cmd_offset + lc.cmdsize; 6113 } 6114 return llvm::VersionTuple(); 6115 } 6116 6117 llvm::VersionTuple ObjectFileMachO::GetMinimumOSVersion() { 6118 if (!m_min_os_version) 6119 m_min_os_version = FindMinimumVersionInfo( 6120 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds); 6121 return *m_min_os_version; 6122 } 6123 6124 llvm::VersionTuple ObjectFileMachO::GetSDKVersion() { 6125 if (!m_sdk_versions) 6126 m_sdk_versions = FindMinimumVersionInfo( 6127 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds); 6128 return *m_sdk_versions; 6129 } 6130 6131 bool ObjectFileMachO::GetIsDynamicLinkEditor() { 6132 return m_header.filetype == llvm::MachO::MH_DYLINKER; 6133 } 6134 6135 bool ObjectFileMachO::CanTrustAddressRanges() { 6136 // Dsymutil guarantees that the .debug_aranges accelerator is complete and can 6137 // be trusted by LLDB. 6138 return m_header.filetype == llvm::MachO::MH_DSYM; 6139 } 6140 6141 bool ObjectFileMachO::AllowAssemblyEmulationUnwindPlans() { 6142 return m_allow_assembly_emulation_unwind_plans; 6143 } 6144 6145 Section *ObjectFileMachO::GetMachHeaderSection() { 6146 // Find the first address of the mach header which is the first non-zero file 6147 // sized section whose file offset is zero. This is the base file address of 6148 // the mach-o file which can be subtracted from the vmaddr of the other 6149 // segments found in memory and added to the load address 6150 ModuleSP module_sp = GetModule(); 6151 if (!module_sp) 6152 return nullptr; 6153 SectionList *section_list = GetSectionList(); 6154 if (!section_list) 6155 return nullptr; 6156 6157 // Some binaries can have a TEXT segment with a non-zero file offset. 6158 // Binaries in the shared cache are one example. Some hand-generated 6159 // binaries may not be laid out in the normal TEXT,DATA,LC_SYMTAB order 6160 // in the file, even though they're laid out correctly in vmaddr terms. 6161 SectionSP text_segment_sp = 6162 section_list->FindSectionByName(GetSegmentNameTEXT()); 6163 if (text_segment_sp.get() && SectionIsLoadable(text_segment_sp.get())) 6164 return text_segment_sp.get(); 6165 6166 const size_t num_sections = section_list->GetSize(); 6167 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6168 Section *section = section_list->GetSectionAtIndex(sect_idx).get(); 6169 if (section->GetFileOffset() == 0 && SectionIsLoadable(section)) 6170 return section; 6171 } 6172 6173 return nullptr; 6174 } 6175 6176 bool ObjectFileMachO::SectionIsLoadable(const Section *section) { 6177 if (!section) 6178 return false; 6179 if (section->IsThreadSpecific()) 6180 return false; 6181 if (GetModule().get() != section->GetModule().get()) 6182 return false; 6183 // firmware style binaries with llvm gcov segment do 6184 // not have that segment mapped into memory. 6185 if (section->GetName() == GetSegmentNameLLVM_COV()) { 6186 const Strata strata = GetStrata(); 6187 if (strata == eStrataKernel || strata == eStrataRawImage) 6188 return false; 6189 } 6190 // Be careful with __LINKEDIT and __DWARF segments 6191 if (section->GetName() == GetSegmentNameLINKEDIT() || 6192 section->GetName() == GetSegmentNameDWARF()) { 6193 // Only map __LINKEDIT and __DWARF if we have an in memory image and 6194 // this isn't a kernel binary like a kext or mach_kernel. 6195 const bool is_memory_image = (bool)m_process_wp.lock(); 6196 const Strata strata = GetStrata(); 6197 if (is_memory_image == false || strata == eStrataKernel) 6198 return false; 6199 } 6200 return true; 6201 } 6202 6203 lldb::addr_t ObjectFileMachO::CalculateSectionLoadAddressForMemoryImage( 6204 lldb::addr_t header_load_address, const Section *header_section, 6205 const Section *section) { 6206 ModuleSP module_sp = GetModule(); 6207 if (module_sp && header_section && section && 6208 header_load_address != LLDB_INVALID_ADDRESS) { 6209 lldb::addr_t file_addr = header_section->GetFileAddress(); 6210 if (file_addr != LLDB_INVALID_ADDRESS && SectionIsLoadable(section)) 6211 return section->GetFileAddress() - file_addr + header_load_address; 6212 } 6213 return LLDB_INVALID_ADDRESS; 6214 } 6215 6216 bool ObjectFileMachO::SetLoadAddress(Target &target, lldb::addr_t value, 6217 bool value_is_offset) { 6218 Log *log(GetLog(LLDBLog::DynamicLoader)); 6219 ModuleSP module_sp = GetModule(); 6220 if (!module_sp) 6221 return false; 6222 6223 SectionList *section_list = GetSectionList(); 6224 if (!section_list) 6225 return false; 6226 6227 size_t num_loaded_sections = 0; 6228 const size_t num_sections = section_list->GetSize(); 6229 6230 // Warn if some top-level segments map to the same address. The binary may be 6231 // malformed. 6232 const bool warn_multiple = true; 6233 6234 if (log) { 6235 StreamString logmsg; 6236 logmsg << "ObjectFileMachO::SetLoadAddress "; 6237 if (GetFileSpec()) 6238 logmsg << "path='" << GetFileSpec().GetPath() << "' "; 6239 if (GetUUID()) { 6240 logmsg << "uuid=" << GetUUID().GetAsString(); 6241 } 6242 LLDB_LOGF(log, "%s", logmsg.GetData()); 6243 } 6244 if (value_is_offset) { 6245 // "value" is an offset to apply to each top level segment 6246 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6247 // Iterate through the object file sections to find all of the 6248 // sections that size on disk (to avoid __PAGEZERO) and load them 6249 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 6250 if (SectionIsLoadable(section_sp.get())) { 6251 LLDB_LOGF(log, 6252 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is " 6253 "0x%" PRIx64, 6254 section_sp->GetName().AsCString(), 6255 section_sp->GetFileAddress() + value); 6256 if (target.SetSectionLoadAddress(section_sp, 6257 section_sp->GetFileAddress() + value, 6258 warn_multiple)) 6259 ++num_loaded_sections; 6260 } 6261 } 6262 } else { 6263 // "value" is the new base address of the mach_header, adjust each 6264 // section accordingly 6265 6266 Section *mach_header_section = GetMachHeaderSection(); 6267 if (mach_header_section) { 6268 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { 6269 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); 6270 6271 lldb::addr_t section_load_addr = 6272 CalculateSectionLoadAddressForMemoryImage( 6273 value, mach_header_section, section_sp.get()); 6274 if (section_load_addr != LLDB_INVALID_ADDRESS) { 6275 LLDB_LOGF(log, 6276 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is " 6277 "0x%" PRIx64, 6278 section_sp->GetName().AsCString(), section_load_addr); 6279 if (target.SetSectionLoadAddress(section_sp, section_load_addr, 6280 warn_multiple)) 6281 ++num_loaded_sections; 6282 } 6283 } 6284 } 6285 } 6286 return num_loaded_sections > 0; 6287 } 6288 6289 struct all_image_infos_header { 6290 uint32_t version; // currently 1 6291 uint32_t imgcount; // number of binary images 6292 uint64_t entries_fileoff; // file offset in the corefile of where the array of 6293 // struct entry's begin. 6294 uint32_t entries_size; // size of 'struct entry'. 6295 uint32_t unused; 6296 }; 6297 6298 struct image_entry { 6299 uint64_t filepath_offset; // offset in corefile to c-string of the file path, 6300 // UINT64_MAX if unavailable. 6301 uuid_t uuid; // uint8_t[16]. should be set to all zeroes if 6302 // uuid is unknown. 6303 uint64_t load_address; // UINT64_MAX if unknown. 6304 uint64_t seg_addrs_offset; // offset to the array of struct segment_vmaddr's. 6305 uint32_t segment_count; // The number of segments for this binary. 6306 uint32_t unused; 6307 6308 image_entry() { 6309 filepath_offset = UINT64_MAX; 6310 memset(&uuid, 0, sizeof(uuid_t)); 6311 segment_count = 0; 6312 load_address = UINT64_MAX; 6313 seg_addrs_offset = UINT64_MAX; 6314 unused = 0; 6315 } 6316 image_entry(const image_entry &rhs) { 6317 filepath_offset = rhs.filepath_offset; 6318 memcpy(&uuid, &rhs.uuid, sizeof(uuid_t)); 6319 segment_count = rhs.segment_count; 6320 seg_addrs_offset = rhs.seg_addrs_offset; 6321 load_address = rhs.load_address; 6322 unused = rhs.unused; 6323 } 6324 }; 6325 6326 struct segment_vmaddr { 6327 char segname[16]; 6328 uint64_t vmaddr; 6329 uint64_t unused; 6330 6331 segment_vmaddr() { 6332 memset(&segname, 0, 16); 6333 vmaddr = UINT64_MAX; 6334 unused = 0; 6335 } 6336 segment_vmaddr(const segment_vmaddr &rhs) { 6337 memcpy(&segname, &rhs.segname, 16); 6338 vmaddr = rhs.vmaddr; 6339 unused = rhs.unused; 6340 } 6341 }; 6342 6343 // Write the payload for the "all image infos" LC_NOTE into 6344 // the supplied all_image_infos_payload, assuming that this 6345 // will be written into the corefile starting at 6346 // initial_file_offset. 6347 // 6348 // The placement of this payload is a little tricky. We're 6349 // laying this out as 6350 // 6351 // 1. header (struct all_image_info_header) 6352 // 2. Array of fixed-size (struct image_entry)'s, one 6353 // per binary image present in the process. 6354 // 3. Arrays of (struct segment_vmaddr)'s, a varying number 6355 // for each binary image. 6356 // 4. Variable length c-strings of binary image filepaths, 6357 // one per binary. 6358 // 6359 // To compute where everything will be laid out in the 6360 // payload, we need to iterate over the images and calculate 6361 // how many segment_vmaddr structures each image will need, 6362 // and how long each image's filepath c-string is. There 6363 // are some multiple passes over the image list while calculating 6364 // everything. 6365 6366 static offset_t 6367 CreateAllImageInfosPayload(const lldb::ProcessSP &process_sp, 6368 offset_t initial_file_offset, 6369 StreamString &all_image_infos_payload, 6370 lldb_private::SaveCoreOptions &options) { 6371 Target &target = process_sp->GetTarget(); 6372 ModuleList modules = target.GetImages(); 6373 6374 // stack-only corefiles have no reason to include binaries that 6375 // are not executing; we're trying to make the smallest corefile 6376 // we can, so leave the rest out. 6377 if (options.GetStyle() == SaveCoreStyle::eSaveCoreStackOnly) 6378 modules.Clear(); 6379 6380 std::set<std::string> executing_uuids; 6381 std::vector<ThreadSP> thread_list = 6382 process_sp->CalculateCoreFileThreadList(options); 6383 for (const ThreadSP &thread_sp : thread_list) { 6384 uint32_t stack_frame_count = thread_sp->GetStackFrameCount(); 6385 for (uint32_t j = 0; j < stack_frame_count; j++) { 6386 StackFrameSP stack_frame_sp = thread_sp->GetStackFrameAtIndex(j); 6387 Address pc = stack_frame_sp->GetFrameCodeAddress(); 6388 ModuleSP module_sp = pc.GetModule(); 6389 if (module_sp) { 6390 UUID uuid = module_sp->GetUUID(); 6391 if (uuid.IsValid()) { 6392 executing_uuids.insert(uuid.GetAsString()); 6393 modules.AppendIfNeeded(module_sp); 6394 } 6395 } 6396 } 6397 } 6398 size_t modules_count = modules.GetSize(); 6399 6400 struct all_image_infos_header infos; 6401 infos.version = 1; 6402 infos.imgcount = modules_count; 6403 infos.entries_size = sizeof(image_entry); 6404 infos.entries_fileoff = initial_file_offset + sizeof(all_image_infos_header); 6405 infos.unused = 0; 6406 6407 all_image_infos_payload.PutHex32(infos.version); 6408 all_image_infos_payload.PutHex32(infos.imgcount); 6409 all_image_infos_payload.PutHex64(infos.entries_fileoff); 6410 all_image_infos_payload.PutHex32(infos.entries_size); 6411 all_image_infos_payload.PutHex32(infos.unused); 6412 6413 // First create the structures for all of the segment name+vmaddr vectors 6414 // for each module, so we will know the size of them as we add the 6415 // module entries. 6416 std::vector<std::vector<segment_vmaddr>> modules_segment_vmaddrs; 6417 for (size_t i = 0; i < modules_count; i++) { 6418 ModuleSP module = modules.GetModuleAtIndex(i); 6419 6420 SectionList *sections = module->GetSectionList(); 6421 size_t sections_count = sections->GetSize(); 6422 std::vector<segment_vmaddr> segment_vmaddrs; 6423 for (size_t j = 0; j < sections_count; j++) { 6424 SectionSP section = sections->GetSectionAtIndex(j); 6425 if (!section->GetParent().get()) { 6426 addr_t vmaddr = section->GetLoadBaseAddress(&target); 6427 if (vmaddr == LLDB_INVALID_ADDRESS) 6428 continue; 6429 ConstString name = section->GetName(); 6430 segment_vmaddr seg_vmaddr; 6431 // This is the uncommon case where strncpy is exactly 6432 // the right one, doesn't need to be nul terminated. 6433 // The segment name in a Mach-O LC_SEGMENT/LC_SEGMENT_64 is char[16] and 6434 // is not guaranteed to be nul-terminated if all 16 characters are 6435 // used. 6436 // coverity[buffer_size_warning] 6437 strncpy(seg_vmaddr.segname, name.AsCString(), 6438 sizeof(seg_vmaddr.segname)); 6439 seg_vmaddr.vmaddr = vmaddr; 6440 seg_vmaddr.unused = 0; 6441 segment_vmaddrs.push_back(seg_vmaddr); 6442 } 6443 } 6444 modules_segment_vmaddrs.push_back(segment_vmaddrs); 6445 } 6446 6447 offset_t size_of_vmaddr_structs = 0; 6448 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) { 6449 size_of_vmaddr_structs += 6450 modules_segment_vmaddrs[i].size() * sizeof(segment_vmaddr); 6451 } 6452 6453 offset_t size_of_filepath_cstrings = 0; 6454 for (size_t i = 0; i < modules_count; i++) { 6455 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6456 size_of_filepath_cstrings += module_sp->GetFileSpec().GetPath().size() + 1; 6457 } 6458 6459 // Calculate the file offsets of our "all image infos" payload in the 6460 // corefile. initial_file_offset the original value passed in to this method. 6461 6462 offset_t start_of_entries = 6463 initial_file_offset + sizeof(all_image_infos_header); 6464 offset_t start_of_seg_vmaddrs = 6465 start_of_entries + sizeof(image_entry) * modules_count; 6466 offset_t start_of_filenames = start_of_seg_vmaddrs + size_of_vmaddr_structs; 6467 6468 offset_t final_file_offset = start_of_filenames + size_of_filepath_cstrings; 6469 6470 // Now write the one-per-module 'struct image_entry' into the 6471 // StringStream; keep track of where the struct segment_vmaddr 6472 // entries for each module will end up in the corefile. 6473 6474 offset_t current_string_offset = start_of_filenames; 6475 offset_t current_segaddrs_offset = start_of_seg_vmaddrs; 6476 std::vector<struct image_entry> image_entries; 6477 for (size_t i = 0; i < modules_count; i++) { 6478 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6479 6480 struct image_entry ent; 6481 memcpy(&ent.uuid, module_sp->GetUUID().GetBytes().data(), sizeof(ent.uuid)); 6482 if (modules_segment_vmaddrs[i].size() > 0) { 6483 ent.segment_count = modules_segment_vmaddrs[i].size(); 6484 ent.seg_addrs_offset = current_segaddrs_offset; 6485 } 6486 ent.filepath_offset = current_string_offset; 6487 ObjectFile *objfile = module_sp->GetObjectFile(); 6488 if (objfile) { 6489 Address base_addr(objfile->GetBaseAddress()); 6490 if (base_addr.IsValid()) { 6491 ent.load_address = base_addr.GetLoadAddress(&target); 6492 } 6493 } 6494 6495 all_image_infos_payload.PutHex64(ent.filepath_offset); 6496 all_image_infos_payload.PutRawBytes(ent.uuid, sizeof(ent.uuid)); 6497 all_image_infos_payload.PutHex64(ent.load_address); 6498 all_image_infos_payload.PutHex64(ent.seg_addrs_offset); 6499 all_image_infos_payload.PutHex32(ent.segment_count); 6500 6501 if (executing_uuids.find(module_sp->GetUUID().GetAsString()) != 6502 executing_uuids.end()) 6503 all_image_infos_payload.PutHex32(1); 6504 else 6505 all_image_infos_payload.PutHex32(0); 6506 6507 current_segaddrs_offset += ent.segment_count * sizeof(segment_vmaddr); 6508 current_string_offset += module_sp->GetFileSpec().GetPath().size() + 1; 6509 } 6510 6511 // Now write the struct segment_vmaddr entries into the StringStream. 6512 6513 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) { 6514 if (modules_segment_vmaddrs[i].size() == 0) 6515 continue; 6516 for (struct segment_vmaddr segvm : modules_segment_vmaddrs[i]) { 6517 all_image_infos_payload.PutRawBytes(segvm.segname, sizeof(segvm.segname)); 6518 all_image_infos_payload.PutHex64(segvm.vmaddr); 6519 all_image_infos_payload.PutHex64(segvm.unused); 6520 } 6521 } 6522 6523 for (size_t i = 0; i < modules_count; i++) { 6524 ModuleSP module_sp = modules.GetModuleAtIndex(i); 6525 std::string filepath = module_sp->GetFileSpec().GetPath(); 6526 all_image_infos_payload.PutRawBytes(filepath.data(), filepath.size() + 1); 6527 } 6528 6529 return final_file_offset; 6530 } 6531 6532 // Temp struct used to combine contiguous memory regions with 6533 // identical permissions. 6534 struct page_object { 6535 addr_t addr; 6536 addr_t size; 6537 uint32_t prot; 6538 }; 6539 6540 bool ObjectFileMachO::SaveCore(const lldb::ProcessSP &process_sp, 6541 lldb_private::SaveCoreOptions &options, 6542 Status &error) { 6543 // The FileSpec and Process are already checked in PluginManager::SaveCore. 6544 assert(options.GetOutputFile().has_value()); 6545 assert(process_sp); 6546 const FileSpec outfile = options.GetOutputFile().value(); 6547 6548 // MachO defaults to dirty pages 6549 if (options.GetStyle() == SaveCoreStyle::eSaveCoreUnspecified) 6550 options.SetStyle(eSaveCoreDirtyOnly); 6551 6552 Target &target = process_sp->GetTarget(); 6553 const ArchSpec target_arch = target.GetArchitecture(); 6554 const llvm::Triple &target_triple = target_arch.GetTriple(); 6555 if (target_triple.getVendor() == llvm::Triple::Apple && 6556 (target_triple.getOS() == llvm::Triple::MacOSX || 6557 target_triple.getOS() == llvm::Triple::IOS || 6558 target_triple.getOS() == llvm::Triple::WatchOS || 6559 target_triple.getOS() == llvm::Triple::TvOS || 6560 target_triple.getOS() == llvm::Triple::XROS)) { 6561 // NEED_BRIDGEOS_TRIPLE target_triple.getOS() == llvm::Triple::BridgeOS)) 6562 // { 6563 bool make_core = false; 6564 switch (target_arch.GetMachine()) { 6565 case llvm::Triple::aarch64: 6566 case llvm::Triple::aarch64_32: 6567 case llvm::Triple::arm: 6568 case llvm::Triple::thumb: 6569 case llvm::Triple::x86: 6570 case llvm::Triple::x86_64: 6571 make_core = true; 6572 break; 6573 default: 6574 error = Status::FromErrorStringWithFormat( 6575 "unsupported core architecture: %s", target_triple.str().c_str()); 6576 break; 6577 } 6578 6579 if (make_core) { 6580 CoreFileMemoryRanges core_ranges; 6581 error = process_sp->CalculateCoreFileSaveRanges(options, core_ranges); 6582 if (error.Success()) { 6583 const uint32_t addr_byte_size = target_arch.GetAddressByteSize(); 6584 const ByteOrder byte_order = target_arch.GetByteOrder(); 6585 std::vector<llvm::MachO::segment_command_64> segment_load_commands; 6586 for (const auto &core_range_info : core_ranges) { 6587 // TODO: Refactor RangeDataVector to have a data iterator. 6588 const auto &core_range = core_range_info.data; 6589 uint32_t cmd_type = LC_SEGMENT_64; 6590 uint32_t segment_size = sizeof(llvm::MachO::segment_command_64); 6591 if (addr_byte_size == 4) { 6592 cmd_type = LC_SEGMENT; 6593 segment_size = sizeof(llvm::MachO::segment_command); 6594 } 6595 // Skip any ranges with no read/write/execute permissions and empty 6596 // ranges. 6597 if (core_range.lldb_permissions == 0 || core_range.range.size() == 0) 6598 continue; 6599 uint32_t vm_prot = 0; 6600 if (core_range.lldb_permissions & ePermissionsReadable) 6601 vm_prot |= VM_PROT_READ; 6602 if (core_range.lldb_permissions & ePermissionsWritable) 6603 vm_prot |= VM_PROT_WRITE; 6604 if (core_range.lldb_permissions & ePermissionsExecutable) 6605 vm_prot |= VM_PROT_EXECUTE; 6606 const addr_t vm_addr = core_range.range.start(); 6607 const addr_t vm_size = core_range.range.size(); 6608 llvm::MachO::segment_command_64 segment = { 6609 cmd_type, // uint32_t cmd; 6610 segment_size, // uint32_t cmdsize; 6611 {0}, // char segname[16]; 6612 vm_addr, // uint64_t vmaddr; // uint32_t for 32-bit Mach-O 6613 vm_size, // uint64_t vmsize; // uint32_t for 32-bit Mach-O 6614 0, // uint64_t fileoff; // uint32_t for 32-bit Mach-O 6615 vm_size, // uint64_t filesize; // uint32_t for 32-bit Mach-O 6616 vm_prot, // uint32_t maxprot; 6617 vm_prot, // uint32_t initprot; 6618 0, // uint32_t nsects; 6619 0}; // uint32_t flags; 6620 segment_load_commands.push_back(segment); 6621 } 6622 6623 StreamString buffer(Stream::eBinary, addr_byte_size, byte_order); 6624 6625 llvm::MachO::mach_header_64 mach_header; 6626 mach_header.magic = addr_byte_size == 8 ? MH_MAGIC_64 : MH_MAGIC; 6627 mach_header.cputype = target_arch.GetMachOCPUType(); 6628 mach_header.cpusubtype = target_arch.GetMachOCPUSubType(); 6629 mach_header.filetype = MH_CORE; 6630 mach_header.ncmds = segment_load_commands.size(); 6631 mach_header.flags = 0; 6632 mach_header.reserved = 0; 6633 ThreadList &thread_list = process_sp->GetThreadList(); 6634 const uint32_t num_threads = thread_list.GetSize(); 6635 6636 // Make an array of LC_THREAD data items. Each one contains the 6637 // contents of the LC_THREAD load command. The data doesn't contain 6638 // the load command + load command size, we will add the load command 6639 // and load command size as we emit the data. 6640 std::vector<StreamString> LC_THREAD_datas(num_threads); 6641 for (auto &LC_THREAD_data : LC_THREAD_datas) { 6642 LC_THREAD_data.GetFlags().Set(Stream::eBinary); 6643 LC_THREAD_data.SetAddressByteSize(addr_byte_size); 6644 LC_THREAD_data.SetByteOrder(byte_order); 6645 } 6646 for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { 6647 ThreadSP thread_sp(thread_list.GetThreadAtIndex(thread_idx)); 6648 if (thread_sp) { 6649 switch (mach_header.cputype) { 6650 case llvm::MachO::CPU_TYPE_ARM64: 6651 case llvm::MachO::CPU_TYPE_ARM64_32: 6652 RegisterContextDarwin_arm64_Mach::Create_LC_THREAD( 6653 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6654 break; 6655 6656 case llvm::MachO::CPU_TYPE_ARM: 6657 RegisterContextDarwin_arm_Mach::Create_LC_THREAD( 6658 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6659 break; 6660 6661 case llvm::MachO::CPU_TYPE_I386: 6662 RegisterContextDarwin_i386_Mach::Create_LC_THREAD( 6663 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6664 break; 6665 6666 case llvm::MachO::CPU_TYPE_X86_64: 6667 RegisterContextDarwin_x86_64_Mach::Create_LC_THREAD( 6668 thread_sp.get(), LC_THREAD_datas[thread_idx]); 6669 break; 6670 } 6671 } 6672 } 6673 6674 // The size of the load command is the size of the segments... 6675 if (addr_byte_size == 8) { 6676 mach_header.sizeofcmds = segment_load_commands.size() * 6677 sizeof(llvm::MachO::segment_command_64); 6678 } else { 6679 mach_header.sizeofcmds = segment_load_commands.size() * 6680 sizeof(llvm::MachO::segment_command); 6681 } 6682 6683 // and the size of all LC_THREAD load command 6684 for (const auto &LC_THREAD_data : LC_THREAD_datas) { 6685 ++mach_header.ncmds; 6686 mach_header.sizeofcmds += 8 + LC_THREAD_data.GetSize(); 6687 } 6688 6689 // Bits will be set to indicate which bits are NOT used in 6690 // addressing in this process or 0 for unknown. 6691 uint64_t address_mask = process_sp->GetCodeAddressMask(); 6692 if (address_mask != LLDB_INVALID_ADDRESS_MASK) { 6693 // LC_NOTE "addrable bits" 6694 mach_header.ncmds++; 6695 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6696 } 6697 6698 // LC_NOTE "process metadata" 6699 mach_header.ncmds++; 6700 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6701 6702 // LC_NOTE "all image infos" 6703 mach_header.ncmds++; 6704 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command); 6705 6706 // Write the mach header 6707 buffer.PutHex32(mach_header.magic); 6708 buffer.PutHex32(mach_header.cputype); 6709 buffer.PutHex32(mach_header.cpusubtype); 6710 buffer.PutHex32(mach_header.filetype); 6711 buffer.PutHex32(mach_header.ncmds); 6712 buffer.PutHex32(mach_header.sizeofcmds); 6713 buffer.PutHex32(mach_header.flags); 6714 if (addr_byte_size == 8) { 6715 buffer.PutHex32(mach_header.reserved); 6716 } 6717 6718 // Skip the mach header and all load commands and align to the next 6719 // 0x1000 byte boundary 6720 addr_t file_offset = buffer.GetSize() + mach_header.sizeofcmds; 6721 6722 file_offset = llvm::alignTo(file_offset, 16); 6723 std::vector<std::unique_ptr<LCNoteEntry>> lc_notes; 6724 6725 // Add "addrable bits" LC_NOTE when an address mask is available 6726 if (address_mask != LLDB_INVALID_ADDRESS_MASK) { 6727 std::unique_ptr<LCNoteEntry> addrable_bits_lcnote_up( 6728 new LCNoteEntry(addr_byte_size, byte_order)); 6729 addrable_bits_lcnote_up->name = "addrable bits"; 6730 addrable_bits_lcnote_up->payload_file_offset = file_offset; 6731 int bits = std::bitset<64>(~address_mask).count(); 6732 addrable_bits_lcnote_up->payload.PutHex32(4); // version 6733 addrable_bits_lcnote_up->payload.PutHex32( 6734 bits); // # of bits used for low addresses 6735 addrable_bits_lcnote_up->payload.PutHex32( 6736 bits); // # of bits used for high addresses 6737 addrable_bits_lcnote_up->payload.PutHex32(0); // reserved 6738 6739 file_offset += addrable_bits_lcnote_up->payload.GetSize(); 6740 6741 lc_notes.push_back(std::move(addrable_bits_lcnote_up)); 6742 } 6743 6744 // Add "process metadata" LC_NOTE 6745 std::unique_ptr<LCNoteEntry> thread_extrainfo_lcnote_up( 6746 new LCNoteEntry(addr_byte_size, byte_order)); 6747 thread_extrainfo_lcnote_up->name = "process metadata"; 6748 thread_extrainfo_lcnote_up->payload_file_offset = file_offset; 6749 6750 StructuredData::DictionarySP dict( 6751 std::make_shared<StructuredData::Dictionary>()); 6752 StructuredData::ArraySP threads( 6753 std::make_shared<StructuredData::Array>()); 6754 for (const ThreadSP &thread_sp : 6755 process_sp->CalculateCoreFileThreadList(options)) { 6756 StructuredData::DictionarySP thread( 6757 std::make_shared<StructuredData::Dictionary>()); 6758 thread->AddIntegerItem("thread_id", thread_sp->GetID()); 6759 threads->AddItem(thread); 6760 } 6761 dict->AddItem("threads", threads); 6762 StreamString strm; 6763 dict->Dump(strm, /* pretty */ false); 6764 thread_extrainfo_lcnote_up->payload.PutRawBytes(strm.GetData(), 6765 strm.GetSize()); 6766 6767 file_offset += thread_extrainfo_lcnote_up->payload.GetSize(); 6768 file_offset = llvm::alignTo(file_offset, 16); 6769 lc_notes.push_back(std::move(thread_extrainfo_lcnote_up)); 6770 6771 // Add "all image infos" LC_NOTE 6772 std::unique_ptr<LCNoteEntry> all_image_infos_lcnote_up( 6773 new LCNoteEntry(addr_byte_size, byte_order)); 6774 all_image_infos_lcnote_up->name = "all image infos"; 6775 all_image_infos_lcnote_up->payload_file_offset = file_offset; 6776 file_offset = CreateAllImageInfosPayload( 6777 process_sp, file_offset, all_image_infos_lcnote_up->payload, 6778 options); 6779 lc_notes.push_back(std::move(all_image_infos_lcnote_up)); 6780 6781 // Add LC_NOTE load commands 6782 for (auto &lcnote : lc_notes) { 6783 // Add the LC_NOTE load command to the file. 6784 buffer.PutHex32(LC_NOTE); 6785 buffer.PutHex32(sizeof(llvm::MachO::note_command)); 6786 char namebuf[16]; 6787 memset(namebuf, 0, sizeof(namebuf)); 6788 // This is the uncommon case where strncpy is exactly 6789 // the right one, doesn't need to be nul terminated. 6790 // LC_NOTE name field is char[16] and is not guaranteed to be 6791 // nul-terminated. 6792 // coverity[buffer_size_warning] 6793 strncpy(namebuf, lcnote->name.c_str(), sizeof(namebuf)); 6794 buffer.PutRawBytes(namebuf, sizeof(namebuf)); 6795 buffer.PutHex64(lcnote->payload_file_offset); 6796 buffer.PutHex64(lcnote->payload.GetSize()); 6797 } 6798 6799 // Align to 4096-byte page boundary for the LC_SEGMENTs. 6800 file_offset = llvm::alignTo(file_offset, 4096); 6801 6802 for (auto &segment : segment_load_commands) { 6803 segment.fileoff = file_offset; 6804 file_offset += segment.filesize; 6805 } 6806 6807 // Write out all of the LC_THREAD load commands 6808 for (const auto &LC_THREAD_data : LC_THREAD_datas) { 6809 const size_t LC_THREAD_data_size = LC_THREAD_data.GetSize(); 6810 buffer.PutHex32(LC_THREAD); 6811 buffer.PutHex32(8 + LC_THREAD_data_size); // cmd + cmdsize + data 6812 buffer.Write(LC_THREAD_data.GetString().data(), LC_THREAD_data_size); 6813 } 6814 6815 // Write out all of the segment load commands 6816 for (const auto &segment : segment_load_commands) { 6817 buffer.PutHex32(segment.cmd); 6818 buffer.PutHex32(segment.cmdsize); 6819 buffer.PutRawBytes(segment.segname, sizeof(segment.segname)); 6820 if (addr_byte_size == 8) { 6821 buffer.PutHex64(segment.vmaddr); 6822 buffer.PutHex64(segment.vmsize); 6823 buffer.PutHex64(segment.fileoff); 6824 buffer.PutHex64(segment.filesize); 6825 } else { 6826 buffer.PutHex32(static_cast<uint32_t>(segment.vmaddr)); 6827 buffer.PutHex32(static_cast<uint32_t>(segment.vmsize)); 6828 buffer.PutHex32(static_cast<uint32_t>(segment.fileoff)); 6829 buffer.PutHex32(static_cast<uint32_t>(segment.filesize)); 6830 } 6831 buffer.PutHex32(segment.maxprot); 6832 buffer.PutHex32(segment.initprot); 6833 buffer.PutHex32(segment.nsects); 6834 buffer.PutHex32(segment.flags); 6835 } 6836 6837 std::string core_file_path(outfile.GetPath()); 6838 auto core_file = FileSystem::Instance().Open( 6839 outfile, File::eOpenOptionWriteOnly | File::eOpenOptionTruncate | 6840 File::eOpenOptionCanCreate); 6841 if (!core_file) { 6842 error = Status::FromError(core_file.takeError()); 6843 } else { 6844 // Read 1 page at a time 6845 uint8_t bytes[0x1000]; 6846 // Write the mach header and load commands out to the core file 6847 size_t bytes_written = buffer.GetString().size(); 6848 error = 6849 core_file.get()->Write(buffer.GetString().data(), bytes_written); 6850 if (error.Success()) { 6851 6852 for (auto &lcnote : lc_notes) { 6853 if (core_file.get()->SeekFromStart(lcnote->payload_file_offset) == 6854 -1) { 6855 error = Status::FromErrorStringWithFormat( 6856 "Unable to seek to corefile pos " 6857 "to write '%s' LC_NOTE payload", 6858 lcnote->name.c_str()); 6859 return false; 6860 } 6861 bytes_written = lcnote->payload.GetSize(); 6862 error = core_file.get()->Write(lcnote->payload.GetData(), 6863 bytes_written); 6864 if (!error.Success()) 6865 return false; 6866 } 6867 6868 // Now write the file data for all memory segments in the process 6869 for (const auto &segment : segment_load_commands) { 6870 if (core_file.get()->SeekFromStart(segment.fileoff) == -1) { 6871 error = Status::FromErrorStringWithFormat( 6872 "unable to seek to offset 0x%" PRIx64 " in '%s'", 6873 segment.fileoff, core_file_path.c_str()); 6874 break; 6875 } 6876 6877 target.GetDebugger().GetAsyncOutputStream()->Printf( 6878 "Saving %" PRId64 6879 " bytes of data for memory region at 0x%" PRIx64 "\n", 6880 segment.vmsize, segment.vmaddr); 6881 addr_t bytes_left = segment.vmsize; 6882 addr_t addr = segment.vmaddr; 6883 Status memory_read_error; 6884 while (bytes_left > 0 && error.Success()) { 6885 const size_t bytes_to_read = 6886 bytes_left > sizeof(bytes) ? sizeof(bytes) : bytes_left; 6887 6888 // In a savecore setting, we don't really care about caching, 6889 // as the data is dumped and very likely never read again, 6890 // so we call ReadMemoryFromInferior to bypass it. 6891 const size_t bytes_read = process_sp->ReadMemoryFromInferior( 6892 addr, bytes, bytes_to_read, memory_read_error); 6893 6894 if (bytes_read == bytes_to_read) { 6895 size_t bytes_written = bytes_read; 6896 error = core_file.get()->Write(bytes, bytes_written); 6897 bytes_left -= bytes_read; 6898 addr += bytes_read; 6899 } else { 6900 // Some pages within regions are not readable, those should 6901 // be zero filled 6902 memset(bytes, 0, bytes_to_read); 6903 size_t bytes_written = bytes_to_read; 6904 error = core_file.get()->Write(bytes, bytes_written); 6905 bytes_left -= bytes_to_read; 6906 addr += bytes_to_read; 6907 } 6908 } 6909 } 6910 } 6911 } 6912 } 6913 } 6914 return true; // This is the right plug to handle saving core files for 6915 // this process 6916 } 6917 return false; 6918 } 6919 6920 ObjectFileMachO::MachOCorefileAllImageInfos 6921 ObjectFileMachO::GetCorefileAllImageInfos() { 6922 MachOCorefileAllImageInfos image_infos; 6923 Log *log(GetLog(LLDBLog::Object | LLDBLog::Symbols | LLDBLog::Process | 6924 LLDBLog::DynamicLoader)); 6925 6926 auto lc_notes = FindLC_NOTEByName("all image infos"); 6927 for (auto lc_note : lc_notes) { 6928 offset_t payload_offset = std::get<0>(lc_note); 6929 // Read the struct all_image_infos_header. 6930 uint32_t version = m_data.GetU32(&payload_offset); 6931 if (version != 1) { 6932 return image_infos; 6933 } 6934 uint32_t imgcount = m_data.GetU32(&payload_offset); 6935 uint64_t entries_fileoff = m_data.GetU64(&payload_offset); 6936 // 'entries_size' is not used, nor is the 'unused' entry. 6937 // offset += 4; // uint32_t entries_size; 6938 // offset += 4; // uint32_t unused; 6939 6940 LLDB_LOGF(log, "LC_NOTE 'all image infos' found version %d with %d images", 6941 version, imgcount); 6942 payload_offset = entries_fileoff; 6943 for (uint32_t i = 0; i < imgcount; i++) { 6944 // Read the struct image_entry. 6945 offset_t filepath_offset = m_data.GetU64(&payload_offset); 6946 uuid_t uuid; 6947 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)), 6948 sizeof(uuid_t)); 6949 uint64_t load_address = m_data.GetU64(&payload_offset); 6950 offset_t seg_addrs_offset = m_data.GetU64(&payload_offset); 6951 uint32_t segment_count = m_data.GetU32(&payload_offset); 6952 uint32_t currently_executing = m_data.GetU32(&payload_offset); 6953 6954 MachOCorefileImageEntry image_entry; 6955 image_entry.filename = (const char *)m_data.GetCStr(&filepath_offset); 6956 image_entry.uuid = UUID(uuid, sizeof(uuid_t)); 6957 image_entry.load_address = load_address; 6958 image_entry.currently_executing = currently_executing; 6959 6960 offset_t seg_vmaddrs_offset = seg_addrs_offset; 6961 for (uint32_t j = 0; j < segment_count; j++) { 6962 char segname[17]; 6963 m_data.CopyData(seg_vmaddrs_offset, 16, segname); 6964 segname[16] = '\0'; 6965 seg_vmaddrs_offset += 16; 6966 uint64_t vmaddr = m_data.GetU64(&seg_vmaddrs_offset); 6967 seg_vmaddrs_offset += 8; /* unused */ 6968 6969 std::tuple<ConstString, addr_t> new_seg{ConstString(segname), vmaddr}; 6970 image_entry.segment_load_addresses.push_back(new_seg); 6971 } 6972 LLDB_LOGF(log, " image entry: %s %s 0x%" PRIx64 " %s", 6973 image_entry.filename.c_str(), 6974 image_entry.uuid.GetAsString().c_str(), 6975 image_entry.load_address, 6976 image_entry.currently_executing ? "currently executing" 6977 : "not currently executing"); 6978 image_infos.all_image_infos.push_back(image_entry); 6979 } 6980 } 6981 6982 lc_notes = FindLC_NOTEByName("load binary"); 6983 for (auto lc_note : lc_notes) { 6984 offset_t payload_offset = std::get<0>(lc_note); 6985 uint32_t version = m_data.GetU32(&payload_offset); 6986 if (version == 1) { 6987 uuid_t uuid; 6988 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)), 6989 sizeof(uuid_t)); 6990 uint64_t load_address = m_data.GetU64(&payload_offset); 6991 uint64_t slide = m_data.GetU64(&payload_offset); 6992 std::string filename = m_data.GetCStr(&payload_offset); 6993 6994 MachOCorefileImageEntry image_entry; 6995 image_entry.filename = filename; 6996 image_entry.uuid = UUID(uuid, sizeof(uuid_t)); 6997 image_entry.load_address = load_address; 6998 image_entry.slide = slide; 6999 image_entry.currently_executing = true; 7000 image_infos.all_image_infos.push_back(image_entry); 7001 LLDB_LOGF(log, 7002 "LC_NOTE 'load binary' found, filename %s uuid %s load " 7003 "address 0x%" PRIx64 " slide 0x%" PRIx64, 7004 filename.c_str(), 7005 image_entry.uuid.IsValid() 7006 ? image_entry.uuid.GetAsString().c_str() 7007 : "00000000-0000-0000-0000-000000000000", 7008 load_address, slide); 7009 } 7010 } 7011 7012 return image_infos; 7013 } 7014 7015 bool ObjectFileMachO::LoadCoreFileImages(lldb_private::Process &process) { 7016 MachOCorefileAllImageInfos image_infos = GetCorefileAllImageInfos(); 7017 Log *log = GetLog(LLDBLog::Object | LLDBLog::DynamicLoader); 7018 Status error; 7019 7020 bool found_platform_binary = false; 7021 ModuleList added_modules; 7022 for (MachOCorefileImageEntry &image : image_infos.all_image_infos) { 7023 ModuleSP module_sp, local_filesystem_module_sp; 7024 7025 // If this is a platform binary, it has been loaded (or registered with 7026 // the DynamicLoader to be loaded), we don't need to do any further 7027 // processing. We're not going to call ModulesDidLoad on this in this 7028 // method, so notify==true. 7029 if (process.GetTarget() 7030 .GetDebugger() 7031 .GetPlatformList() 7032 .LoadPlatformBinaryAndSetup(&process, image.load_address, 7033 true /* notify */)) { 7034 LLDB_LOGF(log, 7035 "ObjectFileMachO::%s binary at 0x%" PRIx64 7036 " is a platform binary, has been handled by a Platform plugin.", 7037 __FUNCTION__, image.load_address); 7038 continue; 7039 } 7040 7041 bool value_is_offset = image.load_address == LLDB_INVALID_ADDRESS; 7042 uint64_t value = value_is_offset ? image.slide : image.load_address; 7043 if (value_is_offset && value == LLDB_INVALID_ADDRESS) { 7044 // We have neither address nor slide; so we will find the binary 7045 // by UUID and load it at slide/offset 0. 7046 value = 0; 7047 } 7048 7049 // We have either a UUID, or we have a load address which 7050 // and can try to read load commands and find a UUID. 7051 if (image.uuid.IsValid() || 7052 (!value_is_offset && value != LLDB_INVALID_ADDRESS)) { 7053 const bool set_load_address = image.segment_load_addresses.size() == 0; 7054 const bool notify = false; 7055 // Userland Darwin binaries will have segment load addresses via 7056 // the `all image infos` LC_NOTE. 7057 const bool allow_memory_image_last_resort = 7058 image.segment_load_addresses.size(); 7059 module_sp = DynamicLoader::LoadBinaryWithUUIDAndAddress( 7060 &process, image.filename, image.uuid, value, value_is_offset, 7061 image.currently_executing, notify, set_load_address, 7062 allow_memory_image_last_resort); 7063 } 7064 7065 // We have a ModuleSP to load in the Target. Load it at the 7066 // correct address/slide and notify/load scripting resources. 7067 if (module_sp) { 7068 added_modules.Append(module_sp, false /* notify */); 7069 7070 // We have a list of segment load address 7071 if (image.segment_load_addresses.size() > 0) { 7072 if (log) { 7073 std::string uuidstr = image.uuid.GetAsString(); 7074 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' " 7075 "UUID %s with section load addresses", 7076 module_sp->GetFileSpec().GetPath().c_str(), 7077 uuidstr.c_str()); 7078 } 7079 for (auto name_vmaddr_tuple : image.segment_load_addresses) { 7080 SectionList *sectlist = module_sp->GetObjectFile()->GetSectionList(); 7081 if (sectlist) { 7082 SectionSP sect_sp = 7083 sectlist->FindSectionByName(std::get<0>(name_vmaddr_tuple)); 7084 if (sect_sp) { 7085 process.GetTarget().SetSectionLoadAddress( 7086 sect_sp, std::get<1>(name_vmaddr_tuple)); 7087 } 7088 } 7089 } 7090 } else { 7091 if (log) { 7092 std::string uuidstr = image.uuid.GetAsString(); 7093 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' " 7094 "UUID %s with %s 0x%" PRIx64, 7095 module_sp->GetFileSpec().GetPath().c_str(), 7096 uuidstr.c_str(), 7097 value_is_offset ? "slide" : "load address", value); 7098 } 7099 bool changed; 7100 module_sp->SetLoadAddress(process.GetTarget(), value, value_is_offset, 7101 changed); 7102 } 7103 } 7104 } 7105 if (added_modules.GetSize() > 0) { 7106 process.GetTarget().ModulesDidLoad(added_modules); 7107 process.Flush(); 7108 return true; 7109 } 7110 // Return true if the only binary we found was the platform binary, 7111 // and it was loaded outside the scope of this method. 7112 if (found_platform_binary) 7113 return true; 7114 7115 // No binaries. 7116 return false; 7117 } 7118