xref: /netbsd-src/external/gpl3/gcc.old/dist/libsanitizer/sanitizer_common/sanitizer_procmaps_mac.cc (revision 3587d6f89c746bbb4f886219ddacd41ace480ecf)
1 //===-- sanitizer_procmaps_mac.cc -----------------------------------------===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // Information about the process mappings (Mac-specific parts).
9 //===----------------------------------------------------------------------===//
10 
11 #include "sanitizer_platform.h"
12 #if SANITIZER_MAC
13 #include "sanitizer_common.h"
14 #include "sanitizer_placement_new.h"
15 #include "sanitizer_procmaps.h"
16 
17 #include <mach-o/dyld.h>
18 #include <mach-o/loader.h>
19 #include <mach/mach.h>
20 
21 // These are not available in older macOS SDKs.
22 #ifndef CPU_SUBTYPE_X86_64_H
23 #define CPU_SUBTYPE_X86_64_H  ((cpu_subtype_t)8)   /* Haswell */
24 #endif
25 #ifndef CPU_SUBTYPE_ARM_V7S
26 #define CPU_SUBTYPE_ARM_V7S   ((cpu_subtype_t)11)  /* Swift */
27 #endif
28 #ifndef CPU_SUBTYPE_ARM_V7K
29 #define CPU_SUBTYPE_ARM_V7K   ((cpu_subtype_t)12)
30 #endif
31 #ifndef CPU_TYPE_ARM64
32 #define CPU_TYPE_ARM64        (CPU_TYPE_ARM | CPU_ARCH_ABI64)
33 #endif
34 
35 namespace __sanitizer {
36 
37 // Contains information used to iterate through sections.
38 struct MemoryMappedSegmentData {
39   char name[kMaxSegName];
40   uptr nsects;
41   const char *current_load_cmd_addr;
42   u32 lc_type;
43   uptr base_virt_addr;
44   uptr addr_mask;
45 };
46 
47 template <typename Section>
48 static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data,
49                             bool isWritable) {
50   const Section *sc = (const Section *)data->current_load_cmd_addr;
51   data->current_load_cmd_addr += sizeof(Section);
52 
53   uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr;
54   uptr sec_end = sec_start + sc->size;
55   module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable,
56                           sc->sectname);
57 }
58 
59 void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
60   // Don't iterate over sections when the caller hasn't set up the
61   // data pointer, when there are no sections, or when the segment
62   // is executable. Avoid iterating over executable sections because
63   // it will confuse libignore, and because the extra granularity
64   // of information is not needed by any sanitizers.
65   if (!data_ || !data_->nsects || IsExecutable()) {
66     module->addAddressRange(start, end, IsExecutable(), IsWritable(),
67                             data_ ? data_->name : nullptr);
68     return;
69   }
70 
71   do {
72     if (data_->lc_type == LC_SEGMENT) {
73       NextSectionLoad<struct section>(module, data_, IsWritable());
74 #ifdef MH_MAGIC_64
75     } else if (data_->lc_type == LC_SEGMENT_64) {
76       NextSectionLoad<struct section_64>(module, data_, IsWritable());
77 #endif
78     }
79   } while (--data_->nsects);
80 }
81 
82 MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
83   Reset();
84 }
85 
86 MemoryMappingLayout::~MemoryMappingLayout() {
87 }
88 
89 // More information about Mach-O headers can be found in mach-o/loader.h
90 // Each Mach-O image has a header (mach_header or mach_header_64) starting with
91 // a magic number, and a list of linker load commands directly following the
92 // header.
93 // A load command is at least two 32-bit words: the command type and the
94 // command size in bytes. We're interested only in segment load commands
95 // (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped
96 // into the task's address space.
97 // The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or
98 // segment_command_64 correspond to the memory address, memory size and the
99 // file offset of the current memory segment.
100 // Because these fields are taken from the images as is, one needs to add
101 // _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime.
102 
103 void MemoryMappingLayout::Reset() {
104   // Count down from the top.
105   // TODO(glider): as per man 3 dyld, iterating over the headers with
106   // _dyld_image_count is thread-unsafe. We need to register callbacks for
107   // adding and removing images which will invalidate the MemoryMappingLayout
108   // state.
109   data_.current_image = _dyld_image_count();
110   data_.current_load_cmd_count = -1;
111   data_.current_load_cmd_addr = 0;
112   data_.current_magic = 0;
113   data_.current_filetype = 0;
114   data_.current_arch = kModuleArchUnknown;
115   internal_memset(data_.current_uuid, 0, kModuleUUIDSize);
116 }
117 
118 // The dyld load address should be unchanged throughout process execution,
119 // and it is expensive to compute once many libraries have been loaded,
120 // so cache it here and do not reset.
121 static mach_header *dyld_hdr = 0;
122 static const char kDyldPath[] = "/usr/lib/dyld";
123 static const int kDyldImageIdx = -1;
124 
125 // static
126 void MemoryMappingLayout::CacheMemoryMappings() {
127   // No-op on Mac for now.
128 }
129 
130 void MemoryMappingLayout::LoadFromCache() {
131   // No-op on Mac for now.
132 }
133 
134 // _dyld_get_image_header() and related APIs don't report dyld itself.
135 // We work around this by manually recursing through the memory map
136 // until we hit a Mach header matching dyld instead. These recurse
137 // calls are expensive, but the first memory map generation occurs
138 // early in the process, when dyld is one of the only images loaded,
139 // so it will be hit after only a few iterations.
140 static mach_header *get_dyld_image_header() {
141   unsigned depth = 1;
142   vm_size_t size = 0;
143   vm_address_t address = 0;
144   kern_return_t err = KERN_SUCCESS;
145   mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
146 
147   while (true) {
148     struct vm_region_submap_info_64 info;
149     err = vm_region_recurse_64(mach_task_self(), &address, &size, &depth,
150                                (vm_region_info_t)&info, &count);
151     if (err != KERN_SUCCESS) return nullptr;
152 
153     if (size >= sizeof(mach_header) && info.protection & kProtectionRead) {
154       mach_header *hdr = (mach_header *)address;
155       if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) &&
156           hdr->filetype == MH_DYLINKER) {
157         return hdr;
158       }
159     }
160     address += size;
161   }
162 }
163 
164 const mach_header *get_dyld_hdr() {
165   if (!dyld_hdr) dyld_hdr = get_dyld_image_header();
166 
167   return dyld_hdr;
168 }
169 
170 // Next and NextSegmentLoad were inspired by base/sysinfo.cc in
171 // Google Perftools, https://github.com/gperftools/gperftools.
172 
173 // NextSegmentLoad scans the current image for the next segment load command
174 // and returns the start and end addresses and file offset of the corresponding
175 // segment.
176 // Note that the segment addresses are not necessarily sorted.
177 template <u32 kLCSegment, typename SegmentCommand>
178 static bool NextSegmentLoad(MemoryMappedSegment *segment,
179 MemoryMappedSegmentData *seg_data, MemoryMappingLayoutData &layout_data) {
180   const char *lc = layout_data.current_load_cmd_addr;
181   layout_data.current_load_cmd_addr += ((const load_command *)lc)->cmdsize;
182   if (((const load_command *)lc)->cmd == kLCSegment) {
183     const SegmentCommand* sc = (const SegmentCommand *)lc;
184     uptr base_virt_addr, addr_mask;
185     if (layout_data.current_image == kDyldImageIdx) {
186       base_virt_addr = (uptr)get_dyld_hdr();
187       // vmaddr is masked with 0xfffff because on macOS versions < 10.12,
188       // it contains an absolute address rather than an offset for dyld.
189       // To make matters even more complicated, this absolute address
190       // isn't actually the absolute segment address, but the offset portion
191       // of the address is accurate when combined with the dyld base address,
192       // and the mask will give just this offset.
193       addr_mask = 0xfffff;
194     } else {
195       base_virt_addr =
196           (uptr)_dyld_get_image_vmaddr_slide(layout_data.current_image);
197       addr_mask = ~0;
198     }
199 
200     segment->start = (sc->vmaddr & addr_mask) + base_virt_addr;
201     segment->end = segment->start + sc->vmsize;
202     // Most callers don't need section information, so only fill this struct
203     // when required.
204     if (seg_data) {
205       seg_data->nsects = sc->nsects;
206       seg_data->current_load_cmd_addr =
207           (const char *)lc + sizeof(SegmentCommand);
208       seg_data->lc_type = kLCSegment;
209       seg_data->base_virt_addr = base_virt_addr;
210       seg_data->addr_mask = addr_mask;
211       internal_strncpy(seg_data->name, sc->segname,
212                        ARRAY_SIZE(seg_data->name));
213     }
214 
215     // Return the initial protection.
216     segment->protection = sc->initprot;
217     segment->offset = (layout_data.current_filetype ==
218                        /*MH_EXECUTE*/ 0x2)
219                           ? sc->vmaddr
220                           : sc->fileoff;
221     if (segment->filename) {
222       const char *src = (layout_data.current_image == kDyldImageIdx)
223                             ? kDyldPath
224                             : _dyld_get_image_name(layout_data.current_image);
225       internal_strncpy(segment->filename, src, segment->filename_size);
226     }
227     segment->arch = layout_data.current_arch;
228     internal_memcpy(segment->uuid, layout_data.current_uuid, kModuleUUIDSize);
229     return true;
230   }
231   return false;
232 }
233 
234 ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
235   cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
236   switch (cputype) {
237     case CPU_TYPE_I386:
238       return kModuleArchI386;
239     case CPU_TYPE_X86_64:
240       if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64;
241       if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H;
242       CHECK(0 && "Invalid subtype of x86_64");
243       return kModuleArchUnknown;
244     case CPU_TYPE_ARM:
245       if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6;
246       if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7;
247       if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S;
248       if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K;
249       CHECK(0 && "Invalid subtype of ARM");
250       return kModuleArchUnknown;
251     case CPU_TYPE_ARM64:
252       return kModuleArchARM64;
253     default:
254       CHECK(0 && "Invalid CPU type");
255       return kModuleArchUnknown;
256   }
257 }
258 
259 static const load_command *NextCommand(const load_command *lc) {
260   return (const load_command *)((const char *)lc + lc->cmdsize);
261 }
262 
263 static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
264   for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
265     if (lc->cmd != LC_UUID) continue;
266 
267     const uuid_command *uuid_lc = (const uuid_command *)lc;
268     const uint8_t *uuid = &uuid_lc->uuid[0];
269     internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
270     return;
271   }
272 }
273 
274 static bool IsModuleInstrumented(const load_command *first_lc) {
275   for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
276     if (lc->cmd != LC_LOAD_DYLIB) continue;
277 
278     const dylib_command *dylib_lc = (const dylib_command *)lc;
279     uint32_t dylib_name_offset = dylib_lc->dylib.name.offset;
280     const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset;
281     dylib_name = StripModuleName(dylib_name);
282     if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) {
283       return true;
284     }
285   }
286   return false;
287 }
288 
289 bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
290   for (; data_.current_image >= kDyldImageIdx; data_.current_image--) {
291     const mach_header *hdr = (data_.current_image == kDyldImageIdx)
292                                  ? get_dyld_hdr()
293                                  : _dyld_get_image_header(data_.current_image);
294     if (!hdr) continue;
295     if (data_.current_load_cmd_count < 0) {
296       // Set up for this image;
297       data_.current_load_cmd_count = hdr->ncmds;
298       data_.current_magic = hdr->magic;
299       data_.current_filetype = hdr->filetype;
300       data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
301       switch (data_.current_magic) {
302 #ifdef MH_MAGIC_64
303         case MH_MAGIC_64: {
304           data_.current_load_cmd_addr =
305               (const char *)hdr + sizeof(mach_header_64);
306           break;
307         }
308 #endif
309         case MH_MAGIC: {
310           data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header);
311           break;
312         }
313         default: {
314           continue;
315         }
316       }
317       FindUUID((const load_command *)data_.current_load_cmd_addr,
318                data_.current_uuid);
319       data_.current_instrumented = IsModuleInstrumented(
320           (const load_command *)data_.current_load_cmd_addr);
321     }
322 
323     for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) {
324       switch (data_.current_magic) {
325         // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64.
326 #ifdef MH_MAGIC_64
327         case MH_MAGIC_64: {
328           if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
329           segment, segment->data_, data_))
330             return true;
331           break;
332         }
333 #endif
334         case MH_MAGIC: {
335           if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
336           segment, segment->data_, data_))
337             return true;
338           break;
339         }
340       }
341     }
342     // If we get here, no more load_cmd's in this image talk about
343     // segments.  Go on to the next image.
344   }
345   return false;
346 }
347 
348 void MemoryMappingLayout::DumpListOfModules(
349     InternalMmapVectorNoCtor<LoadedModule> *modules) {
350   Reset();
351   InternalScopedString module_name(kMaxPathLength);
352   MemoryMappedSegment segment(module_name.data(), kMaxPathLength);
353   MemoryMappedSegmentData data;
354   segment.data_ = &data;
355   while (Next(&segment)) {
356     if (segment.filename[0] == '\0') continue;
357     LoadedModule *cur_module = nullptr;
358     if (!modules->empty() &&
359         0 == internal_strcmp(segment.filename, modules->back().full_name())) {
360       cur_module = &modules->back();
361     } else {
362       modules->push_back(LoadedModule());
363       cur_module = &modules->back();
364       cur_module->set(segment.filename, segment.start, segment.arch,
365                       segment.uuid, data_.current_instrumented);
366     }
367     segment.AddAddressRanges(cur_module);
368   }
369 }
370 
371 }  // namespace __sanitizer
372 
373 #endif  // SANITIZER_MAC
374