xref: /netbsd-src/external/gpl3/gdb/dist/gdb/solib-darwin.c (revision e1d2c773f6fd9b543d6e7f86e37d5564696629c9)
1 /* Handle Darwin shared libraries for GDB, the GNU Debugger.
2 
3    Copyright (C) 2009-2024 Free Software Foundation, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 
21 #include "bfd.h"
22 #include "extract-store-integer.h"
23 #include "objfiles.h"
24 #include "gdbcore.h"
25 #include "target.h"
26 #include "inferior.h"
27 #include "regcache.h"
28 #include "gdb_bfd.h"
29 
30 #include "solist.h"
31 #include "solib-darwin.h"
32 
33 #include "mach-o.h"
34 #include "mach-o/external.h"
35 
36 struct gdb_dyld_image_info
37 {
38   /* Base address (which corresponds to the Mach-O header).  */
39   CORE_ADDR mach_header;
40   /* Image file path.  */
41   CORE_ADDR file_path;
42   /* st.m_time of image file.  */
43   unsigned long mtime;
44 };
45 
46 /* Content of inferior dyld_all_image_infos structure.
47    See /usr/include/mach-o/dyld_images.h for the documentation.  */
48 struct gdb_dyld_all_image_infos
49 {
50   /* Version (1).  */
51   unsigned int version;
52   /* Number of images.  */
53   unsigned int count;
54   /* Image description.  */
55   CORE_ADDR info;
56   /* Notifier (function called when a library is added or removed).  */
57   CORE_ADDR notifier;
58 };
59 
60 /* Current all_image_infos version.  */
61 #define DYLD_VERSION_MIN 1
62 #define DYLD_VERSION_MAX 15
63 
64 /* Per PSPACE specific data.  */
65 struct darwin_info
66 {
67   /* Address of structure dyld_all_image_infos in inferior.  */
68   CORE_ADDR all_image_addr = 0;
69 
70   /* Gdb copy of dyld_all_info_infos.  */
71   struct gdb_dyld_all_image_infos all_image {};
72 };
73 
74 /* Per-program-space data key.  */
75 static const registry<program_space>::key<darwin_info>
76   solib_darwin_pspace_data;
77 
78 /* Get the darwin solib data for PSPACE.  If none is found yet, add it now.  This
79    function always returns a valid object.  */
80 
81 static darwin_info *
82 get_darwin_info (program_space *pspace)
83 {
84   darwin_info *info = solib_darwin_pspace_data.get (pspace);
85   if (info != nullptr)
86     return info;
87 
88   return solib_darwin_pspace_data.emplace (pspace);
89 }
90 
91 /* Return non-zero if the version in dyld_all_image is known.  */
92 
93 static int
94 darwin_dyld_version_ok (const struct darwin_info *info)
95 {
96   return info->all_image.version >= DYLD_VERSION_MIN
97     && info->all_image.version <= DYLD_VERSION_MAX;
98 }
99 
100 /* Read dyld_all_image from inferior.  */
101 
102 static void
103 darwin_load_image_infos (struct darwin_info *info)
104 {
105   gdb_byte buf[24];
106   bfd_endian byte_order = gdbarch_byte_order (current_inferior ()->arch ());
107   type *ptr_type
108     = builtin_type (current_inferior ()->arch ())->builtin_data_ptr;
109   int len;
110 
111   /* If the structure address is not known, don't continue.  */
112   if (info->all_image_addr == 0)
113     return;
114 
115   /* The structure has 4 fields: version (4 bytes), count (4 bytes),
116      info (pointer) and notifier (pointer).  */
117   len = 4 + 4 + 2 * ptr_type->length ();
118   gdb_assert (len <= sizeof (buf));
119   memset (&info->all_image, 0, sizeof (info->all_image));
120 
121   /* Read structure raw bytes from target.  */
122   if (target_read_memory (info->all_image_addr, buf, len))
123     return;
124 
125   /* Extract the fields.  */
126   info->all_image.version = extract_unsigned_integer (buf, 4, byte_order);
127   if (!darwin_dyld_version_ok (info))
128     return;
129 
130   info->all_image.count = extract_unsigned_integer (buf + 4, 4, byte_order);
131   info->all_image.info = extract_typed_address (buf + 8, ptr_type);
132   info->all_image.notifier = extract_typed_address
133     (buf + 8 + ptr_type->length (), ptr_type);
134 }
135 
136 /* Link map info to include in an allocated so_list entry.  */
137 
138 struct lm_info_darwin final : public lm_info
139 {
140   /* The target location of lm.  */
141   CORE_ADDR lm_addr = 0;
142 };
143 
144 /* Lookup the value for a specific symbol.  */
145 
146 static CORE_ADDR
147 lookup_symbol_from_bfd (bfd *abfd, const char *symname)
148 {
149   long storage_needed;
150   asymbol **symbol_table;
151   unsigned int number_of_symbols;
152   unsigned int i;
153   CORE_ADDR symaddr = 0;
154 
155   storage_needed = bfd_get_symtab_upper_bound (abfd);
156 
157   if (storage_needed <= 0)
158     return 0;
159 
160   symbol_table = (asymbol **) xmalloc (storage_needed);
161   number_of_symbols = bfd_canonicalize_symtab (abfd, symbol_table);
162 
163   for (i = 0; i < number_of_symbols; i++)
164     {
165       asymbol *sym = symbol_table[i];
166 
167       if (strcmp (sym->name, symname) == 0
168 	  && (sym->section->flags & (SEC_CODE | SEC_DATA)) != 0)
169 	{
170 	  /* BFD symbols are section relative.  */
171 	  symaddr = sym->value + sym->section->vma;
172 	  break;
173 	}
174     }
175   xfree (symbol_table);
176 
177   return symaddr;
178 }
179 
180 /* Return program interpreter string.  */
181 
182 static char *
183 find_program_interpreter (void)
184 {
185   char *buf = NULL;
186 
187   /* If we have an current exec_bfd, get the interpreter from the load
188      commands.  */
189   if (current_program_space->exec_bfd ())
190     {
191       bfd_mach_o_load_command *cmd;
192 
193       if (bfd_mach_o_lookup_command (current_program_space->exec_bfd (),
194 				     BFD_MACH_O_LC_LOAD_DYLINKER, &cmd) == 1)
195 	return cmd->command.dylinker.name_str;
196     }
197 
198   /* If we didn't find it, read from memory.
199      FIXME: todo.  */
200   return buf;
201 }
202 
203 /*  Not used.  I don't see how the main symbol file can be found: the
204     interpreter name is needed and it is known from the executable file.
205     Note that darwin-nat.c implements pid_to_exec_file.  */
206 
207 static int
208 open_symbol_file_object (int from_tty)
209 {
210   return 0;
211 }
212 
213 /* Build a list of currently loaded shared objects.  See solib-svr4.c.  */
214 
215 static intrusive_list<solib>
216 darwin_current_sos ()
217 {
218   type *ptr_type
219     = builtin_type (current_inferior ()->arch ())->builtin_data_ptr;
220   enum bfd_endian byte_order = type_byte_order (ptr_type);
221   int ptr_len = ptr_type->length ();
222   unsigned int image_info_size;
223   darwin_info *info = get_darwin_info (current_program_space);
224 
225   /* Be sure image infos are loaded.  */
226   darwin_load_image_infos (info);
227 
228   if (!darwin_dyld_version_ok (info))
229     return {};
230 
231   image_info_size = ptr_len * 3;
232 
233   intrusive_list<solib> sos;
234 
235   /* Read infos for each solib.
236      The first entry was rumored to be the executable itself, but this is not
237      true when a large number of shared libraries are used (table expanded ?).
238      We now check all entries, but discard executable images.  */
239   for (int i = 0; i < info->all_image.count; i++)
240     {
241       CORE_ADDR iinfo = info->all_image.info + i * image_info_size;
242       gdb_byte buf[image_info_size];
243       CORE_ADDR load_addr;
244       CORE_ADDR path_addr;
245       struct mach_o_header_external hdr;
246       unsigned long hdr_val;
247 
248       /* Read image info from inferior.  */
249       if (target_read_memory (iinfo, buf, image_info_size))
250 	break;
251 
252       load_addr = extract_typed_address (buf, ptr_type);
253       path_addr = extract_typed_address (buf + ptr_len, ptr_type);
254 
255       /* Read Mach-O header from memory.  */
256       if (target_read_memory (load_addr, (gdb_byte *) &hdr, sizeof (hdr) - 4))
257 	break;
258       /* Discard wrong magic numbers.  Shouldn't happen.  */
259       hdr_val = extract_unsigned_integer
260 	(hdr.magic, sizeof (hdr.magic), byte_order);
261       if (hdr_val != BFD_MACH_O_MH_MAGIC && hdr_val != BFD_MACH_O_MH_MAGIC_64)
262 	continue;
263       /* Discard executable.  Should happen only once.  */
264       hdr_val = extract_unsigned_integer
265 	(hdr.filetype, sizeof (hdr.filetype), byte_order);
266       if (hdr_val == BFD_MACH_O_MH_EXECUTE)
267 	continue;
268 
269       gdb::unique_xmalloc_ptr<char> file_path
270 	= target_read_string (path_addr, SO_NAME_MAX_PATH_SIZE - 1);
271       if (file_path == nullptr)
272 	break;
273 
274       /* Create and fill the new struct solib element.  */
275       solib *newobj = new solib;
276 
277       auto li = std::make_unique<lm_info_darwin> ();
278 
279       newobj->so_name = file_path.get ();
280       newobj->so_original_name = newobj->so_name;
281       li->lm_addr = load_addr;
282 
283       newobj->lm_info = std::move (li);
284       sos.push_back (*newobj);
285     }
286 
287   return sos;
288 }
289 
290 /* Check LOAD_ADDR points to a Mach-O executable header.  Return LOAD_ADDR
291    in case of success, 0 in case of failure.  */
292 
293 static CORE_ADDR
294 darwin_validate_exec_header (CORE_ADDR load_addr)
295 {
296   bfd_endian byte_order = gdbarch_byte_order (current_inferior ()->arch ());
297   struct mach_o_header_external hdr;
298   unsigned long hdr_val;
299 
300   /* Read Mach-O header from memory.  */
301   if (target_read_memory (load_addr, (gdb_byte *) &hdr, sizeof (hdr) - 4))
302     return 0;
303 
304   /* Discard wrong magic numbers.  Shouldn't happen.  */
305   hdr_val = extract_unsigned_integer
306     (hdr.magic, sizeof (hdr.magic), byte_order);
307   if (hdr_val != BFD_MACH_O_MH_MAGIC && hdr_val != BFD_MACH_O_MH_MAGIC_64)
308     return 0;
309 
310   /* Check executable.  */
311   hdr_val = extract_unsigned_integer
312     (hdr.filetype, sizeof (hdr.filetype), byte_order);
313   if (hdr_val == BFD_MACH_O_MH_EXECUTE)
314     return load_addr;
315 
316   return 0;
317 }
318 
319 /* Get the load address of the executable using dyld list of images.
320    We assume that the dyld info are correct (which is wrong if the target
321    is stopped at the first instruction).  */
322 
323 static CORE_ADDR
324 darwin_read_exec_load_addr_from_dyld (struct darwin_info *info)
325 {
326   type *ptr_type
327     = builtin_type (current_inferior ()->arch ())->builtin_data_ptr;
328   int ptr_len = ptr_type->length ();
329   unsigned int image_info_size = ptr_len * 3;
330   int i;
331 
332   /* Read infos for each solib.  One of them should be the executable.  */
333   for (i = 0; i < info->all_image.count; i++)
334     {
335       CORE_ADDR iinfo = info->all_image.info + i * image_info_size;
336       gdb_byte buf[image_info_size];
337       CORE_ADDR load_addr;
338 
339       /* Read image info from inferior.  */
340       if (target_read_memory (iinfo, buf, image_info_size))
341 	break;
342 
343       load_addr = extract_typed_address (buf, ptr_type);
344       if (darwin_validate_exec_header (load_addr) == load_addr)
345 	return load_addr;
346     }
347 
348   return 0;
349 }
350 
351 /* Get the load address of the executable when the PC is at the dyld
352    entry point using parameter passed by the kernel (at SP). */
353 
354 static CORE_ADDR
355 darwin_read_exec_load_addr_at_init (struct darwin_info *info)
356 {
357   gdbarch *gdbarch = current_inferior ()->arch ();
358   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
359   int addr_size = gdbarch_addr_bit (gdbarch) / 8;
360   ULONGEST load_ptr_addr;
361   ULONGEST load_addr;
362   gdb_byte buf[8];
363 
364   /* Get SP.  */
365   if (regcache_cooked_read_unsigned (get_thread_regcache (inferior_thread ()),
366 				     gdbarch_sp_regnum (gdbarch),
367 				     &load_ptr_addr) != REG_VALID)
368     return 0;
369 
370   /* Read value at SP (image load address).  */
371   if (target_read_memory (load_ptr_addr, buf, addr_size))
372     return 0;
373 
374   load_addr = extract_unsigned_integer (buf, addr_size, byte_order);
375 
376   return darwin_validate_exec_header (load_addr);
377 }
378 
379 /* Return 1 if PC lies in the dynamic symbol resolution code of the
380    run time loader.  */
381 
382 static int
383 darwin_in_dynsym_resolve_code (CORE_ADDR pc)
384 {
385   return 0;
386 }
387 
388 /* A wrapper for bfd_mach_o_fat_extract that handles reference
389    counting properly.  This will either return NULL, or return a new
390    reference to a BFD.  */
391 
392 static gdb_bfd_ref_ptr
393 gdb_bfd_mach_o_fat_extract (bfd *abfd, bfd_format format,
394 			    const bfd_arch_info_type *arch)
395 {
396   bfd *result = bfd_mach_o_fat_extract (abfd, format, arch);
397 
398   if (result == NULL)
399     return NULL;
400 
401   if (result == abfd)
402     gdb_bfd_ref (result);
403   else
404     gdb_bfd_mark_parent (result, abfd);
405 
406   return gdb_bfd_ref_ptr (result);
407 }
408 
409 /* Return the BFD for the program interpreter.  */
410 
411 static gdb_bfd_ref_ptr
412 darwin_get_dyld_bfd ()
413 {
414   char *interp_name;
415 
416   /* This method doesn't work with an attached process.  */
417   if (current_inferior ()->attach_flag)
418     return NULL;
419 
420   /* Find the program interpreter.  */
421   interp_name = find_program_interpreter ();
422   if (!interp_name)
423     return NULL;
424 
425   /* Create a bfd for the interpreter.  */
426   gdb_bfd_ref_ptr dyld_bfd (gdb_bfd_open (interp_name, gnutarget));
427   if (dyld_bfd != NULL)
428     {
429       gdb_bfd_ref_ptr sub
430 	(gdb_bfd_mach_o_fat_extract
431 	   (dyld_bfd.get (), bfd_object,
432 	    gdbarch_bfd_arch_info (current_inferior ()->arch ())));
433       dyld_bfd = sub;
434     }
435   return dyld_bfd;
436 }
437 
438 /* Extract dyld_all_image_addr when the process was just created, assuming the
439    current PC is at the entry of the dynamic linker.  */
440 
441 static void
442 darwin_solib_get_all_image_info_addr_at_init (struct darwin_info *info)
443 {
444   CORE_ADDR load_addr = 0;
445   gdb_bfd_ref_ptr dyld_bfd = darwin_get_dyld_bfd ();
446 
447   if (dyld_bfd == NULL)
448     return;
449 
450   /* We find the dynamic linker's base address by examining
451      the current pc (which should point at the entry point for the
452      dynamic linker) and subtracting the offset of the entry point.  */
453   load_addr = (regcache_read_pc (get_thread_regcache (inferior_thread ()))
454 	       - bfd_get_start_address (dyld_bfd.get ()));
455 
456   /* Now try to set a breakpoint in the dynamic linker.  */
457   info->all_image_addr =
458     lookup_symbol_from_bfd (dyld_bfd.get (), "_dyld_all_image_infos");
459 
460   if (info->all_image_addr == 0)
461     return;
462 
463   info->all_image_addr += load_addr;
464 }
465 
466 /* Extract dyld_all_image_addr reading it from
467    TARGET_OBJECT_DARWIN_DYLD_INFO.  */
468 
469 static void
470 darwin_solib_read_all_image_info_addr (struct darwin_info *info)
471 {
472   gdb_byte buf[8];
473   LONGEST len;
474   type *ptr_type
475     = builtin_type (current_inferior ()->arch ())->builtin_data_ptr;
476 
477   /* Sanity check.  */
478   if (ptr_type->length () > sizeof (buf))
479     return;
480 
481   len = target_read (current_inferior ()->top_target (),
482 		     TARGET_OBJECT_DARWIN_DYLD_INFO,
483 		     NULL, buf, 0, ptr_type->length ());
484   if (len <= 0)
485     return;
486 
487   /* The use of BIG endian is intended, as BUF is a raw stream of bytes.  This
488       makes the support of remote protocol easier.  */
489   info->all_image_addr = extract_unsigned_integer (buf, len, BFD_ENDIAN_BIG);
490 }
491 
492 /* Shared library startup support.  See documentation in solib-svr4.c.  */
493 
494 static void
495 darwin_solib_create_inferior_hook (int from_tty)
496 {
497   /* Everything below only makes sense if we have a running inferior.  */
498   if (!target_has_execution ())
499     return;
500 
501   darwin_info *info = get_darwin_info (current_program_space);
502   CORE_ADDR load_addr;
503 
504   info->all_image_addr = 0;
505 
506   darwin_solib_read_all_image_info_addr (info);
507 
508   if (info->all_image_addr == 0)
509     darwin_solib_get_all_image_info_addr_at_init (info);
510 
511   if (info->all_image_addr == 0)
512     return;
513 
514   darwin_load_image_infos (info);
515 
516   if (!darwin_dyld_version_ok (info))
517     {
518       warning (_("unhandled dyld version (%d)"), info->all_image.version);
519       return;
520     }
521 
522   if (info->all_image.count != 0)
523     {
524       /* Possible relocate the main executable (PIE).  */
525       load_addr = darwin_read_exec_load_addr_from_dyld (info);
526     }
527   else
528     {
529       /* Possible issue:
530 	 Do not break on the notifier if dyld is not initialized (deduced from
531 	 count == 0).  In that case, dyld hasn't relocated itself and the
532 	 notifier may point to a wrong address.  */
533 
534       load_addr = darwin_read_exec_load_addr_at_init (info);
535     }
536 
537   if (load_addr != 0 && current_program_space->symfile_object_file != NULL)
538     {
539       CORE_ADDR vmaddr;
540 
541       /* Find the base address of the executable.  */
542       vmaddr = bfd_mach_o_get_base_address (current_program_space->exec_bfd ());
543 
544       /* Relocate.  */
545       if (vmaddr != load_addr)
546 	objfile_rebase (current_program_space->symfile_object_file,
547 			load_addr - vmaddr);
548     }
549 
550   /* Set solib notifier (to reload list of shared libraries).  */
551   CORE_ADDR notifier = info->all_image.notifier;
552 
553   if (info->all_image.count == 0)
554     {
555       /* Dyld hasn't yet relocated itself, so the notifier address may
556 	 be incorrect (as it has to be relocated).  */
557       CORE_ADDR start
558 	= bfd_get_start_address (current_program_space->exec_bfd ());
559       if (start == 0)
560 	notifier = 0;
561       else
562 	{
563 	  gdb_bfd_ref_ptr dyld_bfd = darwin_get_dyld_bfd ();
564 	  if (dyld_bfd != NULL)
565 	    {
566 	      CORE_ADDR dyld_bfd_start_address;
567 	      CORE_ADDR dyld_relocated_base_address;
568 	      CORE_ADDR pc;
569 
570 	      dyld_bfd_start_address = bfd_get_start_address (dyld_bfd.get());
571 
572 	      /* We find the dynamic linker's base address by examining
573 		 the current pc (which should point at the entry point
574 		 for the dynamic linker) and subtracting the offset of
575 		 the entry point.  */
576 
577 	      pc = regcache_read_pc (get_thread_regcache (inferior_thread ()));
578 	      dyld_relocated_base_address = pc - dyld_bfd_start_address;
579 
580 	      /* We get the proper notifier relocated address by
581 		 adding the dyld relocated base address to the current
582 		 notifier offset value.  */
583 
584 	      notifier += dyld_relocated_base_address;
585 	    }
586 	}
587     }
588 
589   /* Add the breakpoint which is hit by dyld when the list of solib is
590      modified.  */
591   if (notifier != 0)
592     create_solib_event_breakpoint (current_inferior ()->arch (), notifier);
593 }
594 
595 static void
596 darwin_clear_solib (program_space *pspace)
597 {
598   darwin_info *info = get_darwin_info (pspace);
599 
600   info->all_image_addr = 0;
601   info->all_image.version = 0;
602 }
603 
604 /* The section table is built from bfd sections using bfd VMAs.
605    Relocate these VMAs according to solib info.  */
606 
607 static void
608 darwin_relocate_section_addresses (solib &so, target_section *sec)
609 {
610   auto *li = gdb::checked_static_cast<lm_info_darwin *> (so.lm_info.get ());
611 
612   sec->addr += li->lm_addr;
613   sec->endaddr += li->lm_addr;
614 
615   /* Best effort to set addr_high/addr_low.  This is used only by
616      'info sharedlibary'.  */
617   if (so.addr_high == 0)
618     {
619       so.addr_low = sec->addr;
620       so.addr_high = sec->endaddr;
621     }
622   if (sec->endaddr > so.addr_high)
623     so.addr_high = sec->endaddr;
624   if (sec->addr < so.addr_low)
625     so.addr_low = sec->addr;
626 }
627 
628 static gdb_bfd_ref_ptr
629 darwin_bfd_open (const char *pathname)
630 {
631   int found_file;
632 
633   /* Search for shared library file.  */
634   gdb::unique_xmalloc_ptr<char> found_pathname
635     = solib_find (pathname, &found_file);
636   if (found_pathname == NULL)
637     perror_with_name (pathname);
638 
639   /* Open bfd for shared library.  */
640   gdb_bfd_ref_ptr abfd (solib_bfd_fopen (found_pathname.get (), found_file));
641 
642   gdb_bfd_ref_ptr res
643     (gdb_bfd_mach_o_fat_extract
644        (abfd.get (), bfd_object,
645 	gdbarch_bfd_arch_info (current_inferior ()->arch ())));
646   if (res == NULL)
647     error (_("`%s': not a shared-library: %s"),
648 	   bfd_get_filename (abfd.get ()), bfd_errmsg (bfd_get_error ()));
649 
650   /* The current filename for fat-binary BFDs is a name generated
651      by BFD, usually a string containing the name of the architecture.
652      Reset its value to the actual filename.  */
653   bfd_set_filename (res.get (), pathname);
654 
655   return res;
656 }
657 
658 const solib_ops darwin_so_ops =
659 {
660   darwin_relocate_section_addresses,
661   nullptr,
662   darwin_clear_solib,
663   darwin_solib_create_inferior_hook,
664   darwin_current_sos,
665   open_symbol_file_object,
666   darwin_in_dynsym_resolve_code,
667   darwin_bfd_open,
668 };
669