xref: /llvm-project/lldb/test/API/macosx/lc-note/multiple-binary-corefile/create-multibin-corefile.cpp (revision 57cbd26a68ab61631f5f4272d3c90df2eb0ce4f6)
1 #include <errno.h>
2 #include <fcntl.h>
3 #include <inttypes.h>
4 #include <mach-o/loader.h>
5 #include <mach/thread_status.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <string>
10 #include <sys/errno.h>
11 #include <sys/stat.h>
12 #include <sys/types.h>
13 #include <sys/uio.h>
14 #include <unistd.h>
15 #include <uuid/uuid.h>
16 #include <vector>
17 
18 // Given a list of binaries, and optional slides to be applied,
19 // create a corefile whose memory is those binaries laid down at
20 // their slid addresses.
21 //
22 // Add a 'main bin spec' LC_NOTE for the first binary, and
23 // 'load binary' LC_NOTEs for any additional binaries, and
24 // these LC_NOTEs will ONLY have the vmaddr of the binary - no
25 // UUID, no slide, no filename.
26 //
27 // Test that lldb can use the load addresses, find the UUIDs,
28 // and load the binaries/dSYMs and put them at the correct load
29 // address.
30 
31 struct main_bin_spec_payload {
32   uint32_t version;
33   uint32_t type;
34   uint64_t address;
35   uint64_t slide;
36   uuid_t uuid;
37   uint32_t log2_pagesize;
38   uint32_t platform;
39 };
40 
41 struct load_binary_payload {
42   uint32_t version;
43   uuid_t uuid;
44   uint64_t address;
45   uint64_t slide;
46   const char name[4];
47 };
48 
49 union uint32_buf {
50   uint8_t bytebuf[4];
51   uint32_t val;
52 };
53 
54 union uint64_buf {
55   uint8_t bytebuf[8];
56   uint64_t val;
57 };
58 
add_uint64(std::vector<uint8_t> & buf,uint64_t val)59 void add_uint64(std::vector<uint8_t> &buf, uint64_t val) {
60   uint64_buf conv;
61   conv.val = val;
62   for (int i = 0; i < 8; i++)
63     buf.push_back(conv.bytebuf[i]);
64 }
65 
add_uint32(std::vector<uint8_t> & buf,uint32_t val)66 void add_uint32(std::vector<uint8_t> &buf, uint32_t val) {
67   uint32_buf conv;
68   conv.val = val;
69   for (int i = 0; i < 4; i++)
70     buf.push_back(conv.bytebuf[i]);
71 }
72 
lc_thread_load_command(cpu_type_t cputype)73 std::vector<uint8_t> lc_thread_load_command(cpu_type_t cputype) {
74   std::vector<uint8_t> data;
75   // Emit an LC_THREAD register context appropriate for the cputype
76   // of the binary we're embedded.  The tests in this case do not
77   // use the register values, so 0's are fine, lldb needs to see at
78   // least one LC_THREAD in the corefile.
79 #if defined(__x86_64__)
80   if (cputype == CPU_TYPE_X86_64) {
81     add_uint32(data, LC_THREAD); // thread_command.cmd
82     add_uint32(data,
83                16 + (x86_THREAD_STATE64_COUNT * 4)); // thread_command.cmdsize
84     add_uint32(data, x86_THREAD_STATE64);            // thread_command.flavor
85     add_uint32(data, x86_THREAD_STATE64_COUNT);      // thread_command.count
86     for (int i = 0; i < x86_THREAD_STATE64_COUNT; i++) {
87       add_uint32(data, 0); // whatever, just some empty register values
88     }
89   }
90 #endif
91 #if defined(__arm64__) || defined(__aarch64__)
92   if (cputype == CPU_TYPE_ARM64) {
93     add_uint32(data, LC_THREAD); // thread_command.cmd
94     add_uint32(data,
95                16 + (ARM_THREAD_STATE64_COUNT * 4)); // thread_command.cmdsize
96     add_uint32(data, ARM_THREAD_STATE64);            // thread_command.flavor
97     add_uint32(data, ARM_THREAD_STATE64_COUNT);      // thread_command.count
98     for (int i = 0; i < ARM_THREAD_STATE64_COUNT; i++) {
99       add_uint32(data, 0); // whatever, just some empty register values
100     }
101   }
102 #endif
103   return data;
104 }
105 
add_lc_note_main_bin_spec_load_command(std::vector<std::vector<uint8_t>> & loadcmds,std::vector<uint8_t> & payload,int payload_file_offset,std::string uuidstr,uint64_t address,uint64_t slide)106 void add_lc_note_main_bin_spec_load_command(
107     std::vector<std::vector<uint8_t>> &loadcmds, std::vector<uint8_t> &payload,
108     int payload_file_offset, std::string uuidstr, uint64_t address,
109     uint64_t slide) {
110   std::vector<uint8_t> loadcmd_data;
111 
112   add_uint32(loadcmd_data, LC_NOTE); // note_command.cmd
113   add_uint32(loadcmd_data, 40);      // note_command.cmdsize
114   char lc_note_name[16];
115   memset(lc_note_name, 0, 16);
116   strcpy(lc_note_name, "main bin spec");
117 
118   // lc_note.data_owner
119   for (int i = 0; i < 16; i++)
120     loadcmd_data.push_back(lc_note_name[i]);
121 
122   // we start writing the payload at payload_file_offset to leave
123   // room at the start for the header & the load commands.
124   uint64_t current_payload_offset = payload.size() + payload_file_offset;
125 
126   add_uint64(loadcmd_data, current_payload_offset); // note_command.offset
127   add_uint64(loadcmd_data,
128              sizeof(struct main_bin_spec_payload)); // note_command.size
129 
130   loadcmds.push_back(loadcmd_data);
131 
132   // Now write the "main bin spec" payload.
133   add_uint32(payload, 2);       // version
134   add_uint32(payload, 3);       // type == 3 [ firmware, standalone, etc ]
135   add_uint64(payload, address); // load address
136   add_uint64(payload, slide);   // slide
137   uuid_t uuid;
138   uuid_parse(uuidstr.c_str(), uuid);
139   for (int i = 0; i < sizeof(uuid_t); i++)
140     payload.push_back(uuid[i]);
141   add_uint32(payload, 0); // log2_pagesize unspecified
142   add_uint32(payload, 0); // platform unspecified
143 }
144 
add_lc_note_load_binary_load_command(std::vector<std::vector<uint8_t>> & loadcmds,std::vector<uint8_t> & payload,int payload_file_offset,std::string uuidstr,uint64_t address,uint64_t slide)145 void add_lc_note_load_binary_load_command(
146     std::vector<std::vector<uint8_t>> &loadcmds, std::vector<uint8_t> &payload,
147     int payload_file_offset, std::string uuidstr, uint64_t address,
148     uint64_t slide) {
149   std::vector<uint8_t> loadcmd_data;
150 
151   add_uint32(loadcmd_data, LC_NOTE); // note_command.cmd
152   add_uint32(loadcmd_data, 40);      // note_command.cmdsize
153   char lc_note_name[16];
154   memset(lc_note_name, 0, 16);
155   strcpy(lc_note_name, "load binary");
156 
157   // lc_note.data_owner
158   for (int i = 0; i < 16; i++)
159     loadcmd_data.push_back(lc_note_name[i]);
160 
161   // we start writing the payload at payload_file_offset to leave
162   // room at the start for the header & the load commands.
163   uint64_t current_payload_offset = payload.size() + payload_file_offset;
164 
165   add_uint64(loadcmd_data, current_payload_offset); // note_command.offset
166   add_uint64(loadcmd_data,
167              sizeof(struct load_binary_payload)); // note_command.size
168 
169   loadcmds.push_back(loadcmd_data);
170 
171   // Now write the "load binary" payload.
172   add_uint32(payload, 1); // version
173   uuid_t uuid;
174   uuid_parse(uuidstr.c_str(), uuid);
175   for (int i = 0; i < sizeof(uuid_t); i++)
176     payload.push_back(uuid[i]);
177   add_uint64(payload, address); // load address
178   add_uint64(payload, slide);   // slide
179   add_uint32(payload, 0);       // name
180 }
181 
add_lc_segment(std::vector<std::vector<uint8_t>> & loadcmds,std::vector<uint8_t> & payload,int payload_file_offset,uint64_t vmaddr,uint64_t size)182 void add_lc_segment(std::vector<std::vector<uint8_t>> &loadcmds,
183                     std::vector<uint8_t> &payload, int payload_file_offset,
184                     uint64_t vmaddr, uint64_t size) {
185   std::vector<uint8_t> loadcmd_data;
186   struct segment_command_64 seg;
187   seg.cmd = LC_SEGMENT_64;
188   seg.cmdsize = sizeof(struct segment_command_64); // no sections
189   memset(seg.segname, 0, 16);
190   seg.vmaddr = vmaddr;
191   seg.vmsize = size;
192   seg.fileoff = payload.size() + payload_file_offset;
193   seg.filesize = size;
194   seg.maxprot = 1;
195   seg.initprot = 1;
196   seg.nsects = 0;
197   seg.flags = 0;
198 
199   uint8_t *p = (uint8_t *)&seg;
200   for (int i = 0; i < sizeof(struct segment_command_64); i++) {
201     loadcmd_data.push_back(*(p + i));
202   }
203   loadcmds.push_back(loadcmd_data);
204 }
205 
scan_binary(const char * fn,uint64_t & vmaddr,cpu_type_t & cputype,cpu_subtype_t & cpusubtype)206 std::string scan_binary(const char *fn, uint64_t &vmaddr, cpu_type_t &cputype,
207                         cpu_subtype_t &cpusubtype) {
208   FILE *f = fopen(fn, "r");
209   if (f == nullptr) {
210     fprintf(stderr, "Unable to open binary '%s' to get uuid\n", fn);
211     exit(1);
212   }
213   uint32_t num_of_load_cmds = 0;
214   uint32_t size_of_load_cmds = 0;
215   std::string uuid;
216   off_t file_offset = 0;
217   vmaddr = UINT64_MAX;
218 
219   uint8_t magic[4];
220   if (::fread(magic, 1, 4, f) != 4) {
221     fprintf(stderr, "Failed to read magic number from input file %s\n", fn);
222     exit(1);
223   }
224   uint8_t magic_32_be[] = {0xfe, 0xed, 0xfa, 0xce};
225   uint8_t magic_32_le[] = {0xce, 0xfa, 0xed, 0xfe};
226   uint8_t magic_64_be[] = {0xfe, 0xed, 0xfa, 0xcf};
227   uint8_t magic_64_le[] = {0xcf, 0xfa, 0xed, 0xfe};
228 
229   if (memcmp(magic, magic_32_be, 4) == 0 ||
230       memcmp(magic, magic_64_be, 4) == 0) {
231     fprintf(stderr, "big endian corefiles not supported\n");
232     exit(1);
233   }
234 
235   ::fseeko(f, 0, SEEK_SET);
236   if (memcmp(magic, magic_32_le, 4) == 0) {
237     struct mach_header mh;
238     if (::fread(&mh, 1, sizeof(mh), f) != sizeof(mh)) {
239       fprintf(stderr, "error reading mach header from input file\n");
240       exit(1);
241     }
242     if (mh.cputype != CPU_TYPE_X86_64 && mh.cputype != CPU_TYPE_ARM64) {
243       fprintf(stderr,
244               "This tool creates an x86_64/arm64 corefile but "
245               "the supplied binary '%s' is cputype 0x%x\n",
246               fn, (uint32_t)mh.cputype);
247       exit(1);
248     }
249     num_of_load_cmds = mh.ncmds;
250     size_of_load_cmds = mh.sizeofcmds;
251     file_offset += sizeof(struct mach_header);
252     cputype = mh.cputype;
253     cpusubtype = mh.cpusubtype;
254   } else {
255     struct mach_header_64 mh;
256     if (::fread(&mh, 1, sizeof(mh), f) != sizeof(mh)) {
257       fprintf(stderr, "error reading mach header from input file\n");
258       exit(1);
259     }
260     if (mh.cputype != CPU_TYPE_X86_64 && mh.cputype != CPU_TYPE_ARM64) {
261       fprintf(stderr,
262               "This tool creates an x86_64/arm64 corefile but "
263               "the supplied binary '%s' is cputype 0x%x\n",
264               fn, (uint32_t)mh.cputype);
265       exit(1);
266     }
267     num_of_load_cmds = mh.ncmds;
268     size_of_load_cmds = mh.sizeofcmds;
269     file_offset += sizeof(struct mach_header_64);
270     cputype = mh.cputype;
271     cpusubtype = mh.cpusubtype;
272   }
273 
274   off_t load_cmds_offset = file_offset;
275 
276   for (int i = 0; i < num_of_load_cmds &&
277                   (file_offset - load_cmds_offset) < size_of_load_cmds;
278        i++) {
279     ::fseeko(f, file_offset, SEEK_SET);
280     uint32_t cmd;
281     uint32_t cmdsize;
282     ::fread(&cmd, sizeof(uint32_t), 1, f);
283     ::fread(&cmdsize, sizeof(uint32_t), 1, f);
284     if (vmaddr == UINT64_MAX && cmd == LC_SEGMENT_64) {
285       struct segment_command_64 segcmd;
286       ::fseeko(f, file_offset, SEEK_SET);
287       if (::fread(&segcmd, 1, sizeof(segcmd), f) != sizeof(segcmd)) {
288         fprintf(stderr, "Unable to read LC_SEGMENT_64 load command.\n");
289         exit(1);
290       }
291       if (strcmp("__TEXT", segcmd.segname) == 0)
292         vmaddr = segcmd.vmaddr;
293     }
294     if (cmd == LC_UUID) {
295       struct uuid_command uuidcmd;
296       ::fseeko(f, file_offset, SEEK_SET);
297       if (::fread(&uuidcmd, 1, sizeof(uuidcmd), f) != sizeof(uuidcmd)) {
298         fprintf(stderr, "Unable to read LC_UUID load command.\n");
299         exit(1);
300       }
301       uuid_string_t uuidstr;
302       uuid_unparse(uuidcmd.uuid, uuidstr);
303       uuid = uuidstr;
304     }
305     file_offset += cmdsize;
306   }
307   return uuid;
308 }
309 
slide_macho_binary(std::vector<uint8_t> & image,uint64_t slide)310 void slide_macho_binary(std::vector<uint8_t> &image, uint64_t slide) {
311   uint8_t *p = image.data();
312   struct mach_header_64 *mh = (struct mach_header_64 *)p;
313   p += sizeof(struct mach_header_64);
314   for (int lc_idx = 0; lc_idx < mh->ncmds; lc_idx++) {
315     struct load_command *lc = (struct load_command *)p;
316     if (lc->cmd == LC_SEGMENT_64) {
317       struct segment_command_64 *seg = (struct segment_command_64 *)p;
318       if (seg->maxprot != 0 && seg->nsects > 0) {
319         seg->vmaddr += slide;
320         uint8_t *j = p + sizeof(segment_command_64);
321         for (int sect_idx = 0; sect_idx < seg->nsects; sect_idx++) {
322           struct section_64 *sect = (struct section_64 *)j;
323           sect->addr += slide;
324           j += sizeof(struct section_64);
325         }
326       }
327     }
328     p += lc->cmdsize;
329   }
330 }
331 
main(int argc,char ** argv)332 int main(int argc, char **argv) {
333   if (argc < 3) {
334     fprintf(stderr,
335             "usage: output-corefile binary1[@optional-slide] "
336             "[binary2[@optional-slide] [binary3[@optional-slide] ...]]\n");
337     exit(1);
338   }
339 
340   // An array of load commands (in the form of byte arrays)
341   std::vector<std::vector<uint8_t>> load_commands;
342 
343   // An array of corefile contents (page data, lc_note data, etc)
344   std::vector<uint8_t> payload;
345 
346   std::vector<std::string> input_filenames;
347   std::vector<uint64_t> input_slides;
348   std::vector<uint64_t> input_filesizes;
349   std::vector<uint64_t> input_filevmaddrs;
350   uint64_t main_binary_cputype = CPU_TYPE_ARM64;
351   uint64_t vmaddr = UINT64_MAX;
352   cpu_type_t cputype;
353   cpu_subtype_t cpusubtype;
354   for (int i = 2; i < argc; i++) {
355     std::string filename;
356     std::string filename_and_opt_hex(argv[i]);
357     uint64_t slide = 0;
358     auto at_pos = filename_and_opt_hex.find_last_of('@');
359     if (at_pos == std::string::npos) {
360       filename = filename_and_opt_hex;
361     } else {
362       filename = filename_and_opt_hex.substr(0, at_pos);
363       std::string hexstr = filename_and_opt_hex.substr(at_pos + 1);
364       errno = 0;
365       slide = (uint64_t)strtoull(hexstr.c_str(), nullptr, 16);
366       if (errno != 0) {
367         fprintf(stderr, "Unable to parse hex slide value in %s\n", argv[i]);
368         exit(1);
369       }
370     }
371     struct stat stbuf;
372     if (stat(filename.c_str(), &stbuf) == -1) {
373       fprintf(stderr, "Unable to stat '%s', exiting.\n", filename.c_str());
374       exit(1);
375     }
376     input_filenames.push_back(filename);
377     input_slides.push_back(slide);
378     input_filesizes.push_back(stbuf.st_size);
379     scan_binary(filename.c_str(), vmaddr, cputype, cpusubtype);
380     input_filevmaddrs.push_back(vmaddr + slide);
381     if (i == 2) {
382       main_binary_cputype = cputype;
383     }
384   }
385 
386   const char *output_corefile_name = argv[1];
387   std::string empty_uuidstr = "00000000-0000-0000-0000-000000000000";
388 
389   // First add all the load commands / payload so we can figure out how large
390   // the load commands will actually be.
391   load_commands.push_back(lc_thread_load_command(cputype));
392 
393   add_lc_note_main_bin_spec_load_command(load_commands, payload, 0,
394                                          empty_uuidstr, 0, UINT64_MAX);
395   for (int i = 1; i < input_filenames.size(); i++) {
396     add_lc_note_load_binary_load_command(load_commands, payload, 0,
397                                          empty_uuidstr, 0, UINT64_MAX);
398   }
399 
400   for (int i = 0; i < input_filenames.size(); i++) {
401     add_lc_segment(load_commands, payload, 0, 0, 0);
402   }
403 
404   int size_of_load_commands = 0;
405   for (const auto &lc : load_commands)
406     size_of_load_commands += lc.size();
407 
408   int size_of_header_and_load_cmds =
409       sizeof(struct mach_header_64) + size_of_load_commands;
410 
411   // Erase the load commands / payload now that we know how much space is
412   // needed, redo it.
413   load_commands.clear();
414   payload.clear();
415 
416   // Push the LC_THREAD load command.
417   load_commands.push_back(lc_thread_load_command(main_binary_cputype));
418 
419   const off_t payload_offset = size_of_header_and_load_cmds;
420 
421   add_lc_note_main_bin_spec_load_command(load_commands, payload, payload_offset,
422                                          empty_uuidstr, input_filevmaddrs[0],
423                                          UINT64_MAX);
424 
425   for (int i = 1; i < input_filenames.size(); i++) {
426     add_lc_note_load_binary_load_command(load_commands, payload, payload_offset,
427                                          empty_uuidstr, input_filevmaddrs[i],
428                                          UINT64_MAX);
429   }
430 
431   for (int i = 0; i < input_filenames.size(); i++) {
432     add_lc_segment(load_commands, payload, payload_offset, input_filevmaddrs[i],
433                    input_filesizes[i]);
434 
435     // Copy the contents of the binary into payload.
436     int fd = open(input_filenames[i].c_str(), O_RDONLY);
437     if (fd == -1) {
438       fprintf(stderr, "Unable to open %s for reading\n",
439               input_filenames[i].c_str());
440       exit(1);
441     }
442     std::vector<uint8_t> binary_contents;
443     for (int j = 0; j < input_filesizes[i]; j++) {
444       uint8_t byte;
445       read(fd, &byte, 1);
446       binary_contents.push_back(byte);
447     }
448     close(fd);
449 
450     size_t cur_payload_size = payload.size();
451     payload.resize(cur_payload_size + binary_contents.size());
452     slide_macho_binary(binary_contents, input_slides[i]);
453     memcpy(payload.data() + cur_payload_size, binary_contents.data(),
454            binary_contents.size());
455   }
456 
457   struct mach_header_64 mh;
458   mh.magic = MH_MAGIC_64;
459   mh.cputype = cputype;
460 
461   mh.cpusubtype = cpusubtype;
462   mh.filetype = MH_CORE;
463   mh.ncmds = load_commands.size();
464   mh.sizeofcmds = size_of_load_commands;
465   mh.flags = 0;
466   mh.reserved = 0;
467 
468   FILE *f = fopen(output_corefile_name, "w");
469 
470   if (f == nullptr) {
471     fprintf(stderr, "Unable to open file %s for writing\n",
472             output_corefile_name);
473     exit(1);
474   }
475 
476   fwrite(&mh, sizeof(mh), 1, f);
477 
478   for (const auto &lc : load_commands)
479     fwrite(lc.data(), lc.size(), 1, f);
480 
481   fwrite(payload.data(), payload.size(), 1, f);
482 
483   fclose(f);
484 }
485