1 /*- 2 * Copyright (c) 2000 David O'Brien 3 * Copyright (c) 1995-1996 Søren Schmidt 4 * Copyright (c) 1996 Peter Wemm 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * $FreeBSD: src/sys/kern/imgact_elf.c,v 1.73.2.13 2002/12/28 19:49:41 dillon Exp $ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/exec.h> 35 #include <sys/fcntl.h> 36 #include <sys/file.h> 37 #include <sys/imgact.h> 38 #include <sys/imgact_elf.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mman.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/nlookup.h> 45 #include <sys/pioctl.h> 46 #include <sys/procfs.h> 47 #include <sys/resourcevar.h> 48 #include <sys/signalvar.h> 49 #include <sys/stat.h> 50 #include <sys/syscall.h> 51 #include <sys/sysctl.h> 52 #include <sys/sysent.h> 53 #include <sys/vnode.h> 54 #include <sys/eventhandler.h> 55 56 #include <cpu/lwbuf.h> 57 58 #include <vm/vm.h> 59 #include <vm/vm_kern.h> 60 #include <vm/vm_param.h> 61 #include <vm/pmap.h> 62 #include <sys/lock.h> 63 #include <vm/vm_map.h> 64 #include <vm/vm_object.h> 65 #include <vm/vm_extern.h> 66 67 #include <machine/elf.h> 68 #include <machine/md_var.h> 69 #include <sys/mount.h> 70 #include <sys/ckpt.h> 71 72 #define OLD_EI_BRAND 8 73 #define truncps(va,ps) ((va) & ~(ps - 1)) 74 #define aligned(a,t) (truncps((u_long)(a), sizeof(t)) == (u_long)(a)) 75 76 static int __elfN(check_header)(const Elf_Ehdr *hdr); 77 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, 78 const char *interp, int32_t *osrel); 79 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, 80 u_long *entry); 81 static int __elfN(load_section)(struct proc *p, 82 struct vmspace *vmspace, struct vnode *vp, 83 vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, 84 vm_prot_t prot); 85 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); 86 static boolean_t __elfN(bsd_trans_osrel)(const Elf_Note *note, 87 int32_t *osrel); 88 static boolean_t __elfN(check_note)(struct image_params *imgp, 89 Elf_Brandnote *checknote, int32_t *osrel); 90 static boolean_t check_PT_NOTE(struct image_params *imgp, 91 Elf_Brandnote *checknote, int32_t *osrel, const Elf_Phdr * pnote); 92 static boolean_t extract_interpreter(struct image_params *imgp, 93 const Elf_Phdr *pinterpreter, char *data); 94 95 static int elf_legacy_coredump = 0; 96 static int __elfN(fallback_brand) = -1; 97 #if defined(__x86_64__) 98 SYSCTL_NODE(_kern, OID_AUTO, elf64, CTLFLAG_RW, 0, ""); 99 SYSCTL_INT(_debug, OID_AUTO, elf64_legacy_coredump, CTLFLAG_RW, 100 &elf_legacy_coredump, 0, "legacy coredump mode"); 101 SYSCTL_INT(_kern_elf64, OID_AUTO, fallback_brand, CTLFLAG_RW, 102 &elf64_fallback_brand, 0, "ELF64 brand of last resort"); 103 TUNABLE_INT("kern.elf64.fallback_brand", &elf64_fallback_brand); 104 #else /* i386 assumed */ 105 SYSCTL_NODE(_kern, OID_AUTO, elf32, CTLFLAG_RW, 0, ""); 106 SYSCTL_INT(_debug, OID_AUTO, elf32_legacy_coredump, CTLFLAG_RW, 107 &elf_legacy_coredump, 0, "legacy coredump mode"); 108 SYSCTL_INT(_kern_elf32, OID_AUTO, fallback_brand, CTLFLAG_RW, 109 &elf32_fallback_brand, 0, "ELF32 brand of last resort"); 110 TUNABLE_INT("kern.elf32.fallback_brand", &elf32_fallback_brand); 111 #endif 112 113 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; 114 115 static const char DRAGONFLY_ABI_VENDOR[] = "DragonFly"; 116 static const char FREEBSD_ABI_VENDOR[] = "FreeBSD"; 117 118 Elf_Brandnote __elfN(dragonfly_brandnote) = { 119 .hdr.n_namesz = sizeof(DRAGONFLY_ABI_VENDOR), 120 .hdr.n_descsz = sizeof(int32_t), 121 .hdr.n_type = 1, 122 .vendor = DRAGONFLY_ABI_VENDOR, 123 .flags = BN_TRANSLATE_OSREL, 124 .trans_osrel = __elfN(bsd_trans_osrel), 125 }; 126 127 Elf_Brandnote __elfN(freebsd_brandnote) = { 128 .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), 129 .hdr.n_descsz = sizeof(int32_t), 130 .hdr.n_type = 1, 131 .vendor = FREEBSD_ABI_VENDOR, 132 .flags = BN_TRANSLATE_OSREL, 133 .trans_osrel = __elfN(bsd_trans_osrel), 134 }; 135 136 int 137 __elfN(insert_brand_entry)(Elf_Brandinfo *entry) 138 { 139 int i; 140 141 for (i = 0; i < MAX_BRANDS; i++) { 142 if (elf_brand_list[i] == NULL) { 143 elf_brand_list[i] = entry; 144 break; 145 } 146 } 147 if (i == MAX_BRANDS) { 148 uprintf("WARNING: %s: could not insert brandinfo entry: %p\n", 149 __func__, entry); 150 return (-1); 151 } 152 return (0); 153 } 154 155 int 156 __elfN(remove_brand_entry)(Elf_Brandinfo *entry) 157 { 158 int i; 159 160 for (i = 0; i < MAX_BRANDS; i++) { 161 if (elf_brand_list[i] == entry) { 162 elf_brand_list[i] = NULL; 163 break; 164 } 165 } 166 if (i == MAX_BRANDS) 167 return (-1); 168 return (0); 169 } 170 171 /* 172 * Check if an elf brand is being used anywhere in the system. 173 * 174 * Used by the linux emulation module unloader. This isn't safe from 175 * races. 176 */ 177 struct elf_brand_inuse_info { 178 int rval; 179 Elf_Brandinfo *entry; 180 }; 181 182 static int elf_brand_inuse_callback(struct proc *p, void *data); 183 184 int 185 __elfN(brand_inuse)(Elf_Brandinfo *entry) 186 { 187 struct elf_brand_inuse_info info; 188 189 info.rval = FALSE; 190 info.entry = entry; 191 allproc_scan(elf_brand_inuse_callback, entry); 192 return (info.rval); 193 } 194 195 static 196 int 197 elf_brand_inuse_callback(struct proc *p, void *data) 198 { 199 struct elf_brand_inuse_info *info = data; 200 201 if (p->p_sysent == info->entry->sysvec) { 202 info->rval = TRUE; 203 return (-1); 204 } 205 return (0); 206 } 207 208 static int 209 __elfN(check_header)(const Elf_Ehdr *hdr) 210 { 211 Elf_Brandinfo *bi; 212 int i; 213 214 if (!IS_ELF(*hdr) || 215 hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || 216 hdr->e_ident[EI_DATA] != ELF_TARG_DATA || 217 hdr->e_ident[EI_VERSION] != EV_CURRENT || 218 hdr->e_phentsize != sizeof(Elf_Phdr) || 219 hdr->e_ehsize != sizeof(Elf_Ehdr) || 220 hdr->e_version != ELF_TARG_VER) 221 return (ENOEXEC); 222 223 /* 224 * Make sure we have at least one brand for this machine. 225 */ 226 227 for (i = 0; i < MAX_BRANDS; i++) { 228 bi = elf_brand_list[i]; 229 if (bi != NULL && bi->machine == hdr->e_machine) 230 break; 231 } 232 if (i == MAX_BRANDS) 233 return (ENOEXEC); 234 235 return (0); 236 } 237 238 static int 239 __elfN(load_section)(struct proc *p, struct vmspace *vmspace, struct vnode *vp, 240 vm_offset_t offset, caddr_t vmaddr, size_t memsz, 241 size_t filsz, vm_prot_t prot) 242 { 243 size_t map_len; 244 vm_offset_t map_addr; 245 int error, rv, cow; 246 int count; 247 size_t copy_len; 248 vm_object_t object; 249 vm_offset_t file_addr; 250 251 object = vp->v_object; 252 error = 0; 253 254 /* 255 * It's necessary to fail if the filsz + offset taken from the 256 * header is greater than the actual file pager object's size. 257 * If we were to allow this, then the vm_map_find() below would 258 * walk right off the end of the file object and into the ether. 259 * 260 * While I'm here, might as well check for something else that 261 * is invalid: filsz cannot be greater than memsz. 262 */ 263 if ((off_t)filsz + offset > vp->v_filesize || filsz > memsz) { 264 uprintf("elf_load_section: truncated ELF file\n"); 265 return (ENOEXEC); 266 } 267 268 map_addr = trunc_page((vm_offset_t)vmaddr); 269 file_addr = trunc_page(offset); 270 271 /* 272 * We have two choices. We can either clear the data in the last page 273 * of an oversized mapping, or we can start the anon mapping a page 274 * early and copy the initialized data into that first page. We 275 * choose the second.. 276 */ 277 if (memsz > filsz) 278 map_len = trunc_page(offset+filsz) - file_addr; 279 else 280 map_len = round_page(offset+filsz) - file_addr; 281 282 if (map_len != 0) { 283 vm_object_reference(object); 284 285 /* cow flags: don't dump readonly sections in core */ 286 cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | 287 (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); 288 289 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 290 vm_map_lock(&vmspace->vm_map); 291 rv = vm_map_insert(&vmspace->vm_map, &count, 292 object, 293 file_addr, /* file offset */ 294 map_addr, /* virtual start */ 295 map_addr + map_len,/* virtual end */ 296 VM_MAPTYPE_NORMAL, 297 prot, VM_PROT_ALL, 298 cow); 299 vm_map_unlock(&vmspace->vm_map); 300 vm_map_entry_release(count); 301 if (rv != KERN_SUCCESS) { 302 vm_object_deallocate(object); 303 return (EINVAL); 304 } 305 306 /* we can stop now if we've covered it all */ 307 if (memsz == filsz) { 308 return (0); 309 } 310 } 311 312 313 /* 314 * We have to get the remaining bit of the file into the first part 315 * of the oversized map segment. This is normally because the .data 316 * segment in the file is extended to provide bss. It's a neat idea 317 * to try and save a page, but it's a pain in the behind to implement. 318 */ 319 copy_len = (offset + filsz) - trunc_page(offset + filsz); 320 map_addr = trunc_page((vm_offset_t)vmaddr + filsz); 321 map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr; 322 323 /* This had damn well better be true! */ 324 if (map_len != 0) { 325 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 326 vm_map_lock(&vmspace->vm_map); 327 rv = vm_map_insert(&vmspace->vm_map, &count, 328 NULL, 0, 329 map_addr, map_addr + map_len, 330 VM_MAPTYPE_NORMAL, 331 VM_PROT_ALL, VM_PROT_ALL, 332 0); 333 vm_map_unlock(&vmspace->vm_map); 334 vm_map_entry_release(count); 335 if (rv != KERN_SUCCESS) { 336 return (EINVAL); 337 } 338 } 339 340 if (copy_len != 0) { 341 vm_page_t m; 342 struct lwbuf *lwb; 343 struct lwbuf lwb_cache; 344 345 m = vm_fault_object_page(object, trunc_page(offset + filsz), 346 VM_PROT_READ, 0, &error); 347 if (m) { 348 lwb = lwbuf_alloc(m, &lwb_cache); 349 error = copyout((caddr_t)lwbuf_kva(lwb), 350 (caddr_t)map_addr, copy_len); 351 lwbuf_free(lwb); 352 vm_page_unhold(m); 353 } 354 if (error) { 355 return (error); 356 } 357 } 358 359 /* 360 * set it to the specified protection 361 */ 362 vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot, 363 FALSE); 364 365 return (error); 366 } 367 368 /* 369 * Load the file "file" into memory. It may be either a shared object 370 * or an executable. 371 * 372 * The "addr" reference parameter is in/out. On entry, it specifies 373 * the address where a shared object should be loaded. If the file is 374 * an executable, this value is ignored. On exit, "addr" specifies 375 * where the file was actually loaded. 376 * 377 * The "entry" reference parameter is out only. On exit, it specifies 378 * the entry point for the loaded file. 379 */ 380 static int 381 __elfN(load_file)(struct proc *p, const char *file, u_long *addr, u_long *entry) 382 { 383 struct { 384 struct nlookupdata nd; 385 struct vattr attr; 386 struct image_params image_params; 387 } *tempdata; 388 const Elf_Ehdr *hdr = NULL; 389 const Elf_Phdr *phdr = NULL; 390 struct nlookupdata *nd; 391 struct vmspace *vmspace = p->p_vmspace; 392 struct vattr *attr; 393 struct image_params *imgp; 394 struct mount *topmnt; 395 vm_prot_t prot; 396 u_long rbase; 397 u_long base_addr = 0; 398 int error, i, numsegs; 399 400 tempdata = kmalloc(sizeof(*tempdata), M_TEMP, M_WAITOK); 401 nd = &tempdata->nd; 402 attr = &tempdata->attr; 403 imgp = &tempdata->image_params; 404 405 /* 406 * Initialize part of the common data 407 */ 408 imgp->proc = p; 409 imgp->attr = attr; 410 imgp->firstpage = NULL; 411 imgp->image_header = NULL; 412 imgp->vp = NULL; 413 414 error = nlookup_init(nd, file, UIO_SYSSPACE, NLC_FOLLOW); 415 if (error == 0) 416 error = nlookup(nd); 417 if (error == 0) 418 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &imgp->vp); 419 topmnt = nd->nl_nch.mount; 420 nlookup_done(nd); 421 if (error) 422 goto fail; 423 424 /* 425 * Check permissions, modes, uid, etc on the file, and "open" it. 426 */ 427 error = exec_check_permissions(imgp, topmnt); 428 if (error) { 429 vn_unlock(imgp->vp); 430 goto fail; 431 } 432 433 error = exec_map_first_page(imgp); 434 /* 435 * Also make certain that the interpreter stays the same, so set 436 * its VTEXT flag, too. 437 */ 438 if (error == 0) 439 vsetflags(imgp->vp, VTEXT); 440 vn_unlock(imgp->vp); 441 if (error) 442 goto fail; 443 444 hdr = (const Elf_Ehdr *)imgp->image_header; 445 if ((error = __elfN(check_header)(hdr)) != 0) 446 goto fail; 447 if (hdr->e_type == ET_DYN) 448 rbase = *addr; 449 else if (hdr->e_type == ET_EXEC) 450 rbase = 0; 451 else { 452 error = ENOEXEC; 453 goto fail; 454 } 455 456 /* Only support headers that fit within first page for now */ 457 /* (multiplication of two Elf_Half fields will not overflow) */ 458 if ((hdr->e_phoff > PAGE_SIZE) || 459 (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) { 460 error = ENOEXEC; 461 goto fail; 462 } 463 464 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 465 if (!aligned(phdr, Elf_Addr)) { 466 error = ENOEXEC; 467 goto fail; 468 } 469 470 for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) { 471 if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) { 472 /* Loadable segment */ 473 prot = 0; 474 if (phdr[i].p_flags & PF_X) 475 prot |= VM_PROT_EXECUTE; 476 if (phdr[i].p_flags & PF_W) 477 prot |= VM_PROT_WRITE; 478 if (phdr[i].p_flags & PF_R) 479 prot |= VM_PROT_READ; 480 481 error = __elfN(load_section)( 482 p, vmspace, imgp->vp, 483 phdr[i].p_offset, 484 (caddr_t)phdr[i].p_vaddr + 485 rbase, 486 phdr[i].p_memsz, 487 phdr[i].p_filesz, prot); 488 if (error != 0) 489 goto fail; 490 /* 491 * Establish the base address if this is the 492 * first segment. 493 */ 494 if (numsegs == 0) 495 base_addr = trunc_page(phdr[i].p_vaddr + rbase); 496 numsegs++; 497 } 498 } 499 *addr = base_addr; 500 *entry = (unsigned long)hdr->e_entry + rbase; 501 502 fail: 503 if (imgp->firstpage) 504 exec_unmap_first_page(imgp); 505 if (imgp->vp) { 506 vrele(imgp->vp); 507 imgp->vp = NULL; 508 } 509 kfree(tempdata, M_TEMP); 510 511 return (error); 512 } 513 514 static Elf_Brandinfo * 515 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, 516 int32_t *osrel) 517 { 518 const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; 519 Elf_Brandinfo *bi; 520 boolean_t ret; 521 int i; 522 523 /* We support four types of branding -- (1) the ELF EI_OSABI field 524 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string 525 * branding within the ELF header, (3) path of the `interp_path' field, 526 * and (4) the ".note.ABI-tag" ELF section. 527 */ 528 529 /* Look for an ".note.ABI-tag" ELF section */ 530 for (i = 0; i < MAX_BRANDS; i++) { 531 bi = elf_brand_list[i]; 532 533 if (bi == NULL) 534 continue; 535 if (hdr->e_machine == bi->machine && (bi->flags & 536 (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) { 537 ret = __elfN(check_note)(imgp, bi->brand_note, osrel); 538 if (ret) 539 return (bi); 540 } 541 } 542 543 /* If the executable has a brand, search for it in the brand list. */ 544 for (i = 0; i < MAX_BRANDS; i++) { 545 bi = elf_brand_list[i]; 546 547 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) 548 continue; 549 if (hdr->e_machine == bi->machine && 550 (hdr->e_ident[EI_OSABI] == bi->brand || 551 strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND], 552 bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0)) 553 return (bi); 554 } 555 556 /* Lacking a known brand, search for a recognized interpreter. */ 557 if (interp != NULL) { 558 for (i = 0; i < MAX_BRANDS; i++) { 559 bi = elf_brand_list[i]; 560 561 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) 562 continue; 563 if (hdr->e_machine == bi->machine && 564 strcmp(interp, bi->interp_path) == 0) 565 return (bi); 566 } 567 } 568 569 /* Lacking a recognized interpreter, try the default brand */ 570 for (i = 0; i < MAX_BRANDS; i++) { 571 bi = elf_brand_list[i]; 572 573 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) 574 continue; 575 if (hdr->e_machine == bi->machine && 576 __elfN(fallback_brand) == bi->brand) 577 return (bi); 578 } 579 return (NULL); 580 } 581 582 static int 583 __CONCAT(exec_,__elfN(imgact))(struct image_params *imgp) 584 { 585 const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header; 586 const Elf_Phdr *phdr; 587 Elf_Auxargs *elf_auxargs; 588 struct vmspace *vmspace; 589 vm_prot_t prot; 590 u_long text_size = 0, data_size = 0, total_size = 0; 591 u_long text_addr = 0, data_addr = 0; 592 u_long seg_size, seg_addr; 593 u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0; 594 int32_t osrel = 0; 595 int error = 0, i, n; 596 boolean_t failure; 597 char *interp = NULL; 598 const char *newinterp = NULL; 599 Elf_Brandinfo *brand_info; 600 char *path; 601 602 /* 603 * Do we have a valid ELF header ? 604 * 605 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later if a particular 606 * brand doesn't support it. Both DragonFly platforms do by default. 607 */ 608 if (__elfN(check_header)(hdr) != 0 || 609 (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)) 610 return (-1); 611 612 /* 613 * From here on down, we return an errno, not -1, as we've 614 * detected an ELF file. 615 */ 616 617 if ((hdr->e_phoff > PAGE_SIZE) || 618 (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) { 619 /* Only support headers in first page for now */ 620 return (ENOEXEC); 621 } 622 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 623 if (!aligned(phdr, Elf_Addr)) 624 return (ENOEXEC); 625 n = 0; 626 baddr = 0; 627 for (i = 0; i < hdr->e_phnum; i++) { 628 if (phdr[i].p_type == PT_LOAD) { 629 if (n == 0) 630 baddr = phdr[i].p_vaddr; 631 n++; 632 continue; 633 } 634 if (phdr[i].p_type == PT_INTERP) { 635 /* 636 * If interp is already defined there are more than 637 * one PT_INTERP program headers present. Take only 638 * the first one and ignore the rest. 639 */ 640 if (interp != NULL) 641 continue; 642 643 if (phdr[i].p_filesz == 0 || 644 phdr[i].p_filesz > PAGE_SIZE || 645 phdr[i].p_filesz > MAXPATHLEN) 646 return (ENOEXEC); 647 648 interp = kmalloc(phdr[i].p_filesz, M_TEMP, M_WAITOK); 649 failure = extract_interpreter(imgp, &phdr[i], interp); 650 if (failure) { 651 kfree(interp, M_TEMP); 652 return (ENOEXEC); 653 } 654 continue; 655 } 656 } 657 658 brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel); 659 if (brand_info == NULL) { 660 uprintf("ELF binary type \"%u\" not known.\n", 661 hdr->e_ident[EI_OSABI]); 662 if (interp != NULL) 663 kfree(interp, M_TEMP); 664 return (ENOEXEC); 665 } 666 if (hdr->e_type == ET_DYN) { 667 if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) { 668 if (interp != NULL) 669 kfree(interp, M_TEMP); 670 return (ENOEXEC); 671 } 672 /* 673 * Honour the base load address from the dso if it is 674 * non-zero for some reason. 675 */ 676 if (baddr == 0) 677 et_dyn_addr = ET_DYN_LOAD_ADDR; 678 else 679 et_dyn_addr = 0; 680 } else 681 et_dyn_addr = 0; 682 683 if (interp != NULL && brand_info->interp_newpath != NULL) 684 newinterp = brand_info->interp_newpath; 685 686 exec_new_vmspace(imgp, NULL); 687 688 /* 689 * Yeah, I'm paranoid. There is every reason in the world to get 690 * VTEXT now since from here on out, there are places we can have 691 * a context switch. Better safe than sorry; I really don't want 692 * the file to change while it's being loaded. 693 */ 694 vsetflags(imgp->vp, VTEXT); 695 696 vmspace = imgp->proc->p_vmspace; 697 698 for (i = 0; i < hdr->e_phnum; i++) { 699 switch (phdr[i].p_type) { 700 701 case PT_LOAD: /* Loadable segment */ 702 if (phdr[i].p_memsz == 0) 703 break; 704 prot = 0; 705 if (phdr[i].p_flags & PF_X) 706 prot |= VM_PROT_EXECUTE; 707 if (phdr[i].p_flags & PF_W) 708 prot |= VM_PROT_WRITE; 709 if (phdr[i].p_flags & PF_R) 710 prot |= VM_PROT_READ; 711 712 if ((error = __elfN(load_section)( 713 imgp->proc, 714 vmspace, 715 imgp->vp, 716 phdr[i].p_offset, 717 (caddr_t)phdr[i].p_vaddr + et_dyn_addr, 718 phdr[i].p_memsz, 719 phdr[i].p_filesz, 720 prot)) != 0) { 721 if (interp != NULL) 722 kfree (interp, M_TEMP); 723 return (error); 724 } 725 726 /* 727 * If this segment contains the program headers, 728 * remember their virtual address for the AT_PHDR 729 * aux entry. Static binaries don't usually include 730 * a PT_PHDR entry. 731 */ 732 if (phdr[i].p_offset == 0 && 733 hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize 734 <= phdr[i].p_filesz) 735 proghdr = phdr[i].p_vaddr + hdr->e_phoff + 736 et_dyn_addr; 737 738 seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr); 739 seg_size = round_page(phdr[i].p_memsz + 740 phdr[i].p_vaddr + et_dyn_addr - seg_addr); 741 742 /* 743 * Is this .text or .data? We can't use 744 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the 745 * alpha terribly and possibly does other bad 746 * things so we stick to the old way of figuring 747 * it out: If the segment contains the program 748 * entry point, it's a text segment, otherwise it 749 * is a data segment. 750 * 751 * Note that obreak() assumes that data_addr + 752 * data_size == end of data load area, and the ELF 753 * file format expects segments to be sorted by 754 * address. If multiple data segments exist, the 755 * last one will be used. 756 */ 757 if (hdr->e_entry >= phdr[i].p_vaddr && 758 hdr->e_entry < (phdr[i].p_vaddr + 759 phdr[i].p_memsz)) { 760 text_size = seg_size; 761 text_addr = seg_addr; 762 entry = (u_long)hdr->e_entry + et_dyn_addr; 763 } else { 764 data_size = seg_size; 765 data_addr = seg_addr; 766 } 767 total_size += seg_size; 768 769 /* 770 * Check limits. It should be safe to check the 771 * limits after loading the segment since we do 772 * not actually fault in all the segment's pages. 773 */ 774 if (data_size > 775 imgp->proc->p_rlimit[RLIMIT_DATA].rlim_cur || 776 text_size > maxtsiz || 777 total_size > 778 imgp->proc->p_rlimit[RLIMIT_VMEM].rlim_cur) { 779 if (interp != NULL) 780 kfree(interp, M_TEMP); 781 error = ENOMEM; 782 return (error); 783 } 784 break; 785 case PT_PHDR: /* Program header table info */ 786 proghdr = phdr[i].p_vaddr + et_dyn_addr; 787 break; 788 default: 789 break; 790 } 791 } 792 793 vmspace->vm_tsize = text_size >> PAGE_SHIFT; 794 vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; 795 vmspace->vm_dsize = data_size >> PAGE_SHIFT; 796 vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; 797 798 addr = ELF_RTLD_ADDR(vmspace); 799 800 imgp->entry_addr = entry; 801 802 imgp->proc->p_sysent = brand_info->sysvec; 803 EVENTHANDLER_INVOKE(process_exec, imgp); 804 805 if (interp != NULL) { 806 int have_interp = FALSE; 807 if (brand_info->emul_path != NULL && 808 brand_info->emul_path[0] != '\0') { 809 path = kmalloc(MAXPATHLEN, M_TEMP, M_WAITOK); 810 ksnprintf(path, MAXPATHLEN, "%s%s", 811 brand_info->emul_path, interp); 812 error = __elfN(load_file)(imgp->proc, path, &addr, 813 &imgp->entry_addr); 814 kfree(path, M_TEMP); 815 if (error == 0) 816 have_interp = TRUE; 817 } 818 if (!have_interp && newinterp != NULL) { 819 error = __elfN(load_file)(imgp->proc, newinterp, 820 &addr, &imgp->entry_addr); 821 if (error == 0) 822 have_interp = TRUE; 823 } 824 if (!have_interp) { 825 error = __elfN(load_file)(imgp->proc, interp, &addr, 826 &imgp->entry_addr); 827 } 828 if (error != 0) { 829 uprintf("ELF interpreter %s not found\n", interp); 830 kfree(interp, M_TEMP); 831 return (error); 832 } 833 kfree(interp, M_TEMP); 834 } else 835 addr = et_dyn_addr; 836 837 /* 838 * Construct auxargs table (used by the fixup routine) 839 */ 840 elf_auxargs = kmalloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); 841 elf_auxargs->execfd = -1; 842 elf_auxargs->phdr = proghdr; 843 elf_auxargs->phent = hdr->e_phentsize; 844 elf_auxargs->phnum = hdr->e_phnum; 845 elf_auxargs->pagesz = PAGE_SIZE; 846 elf_auxargs->base = addr; 847 elf_auxargs->flags = 0; 848 elf_auxargs->entry = entry; 849 850 imgp->auxargs = elf_auxargs; 851 imgp->interpreted = 0; 852 imgp->proc->p_osrel = osrel; 853 854 return (error); 855 } 856 857 int 858 __elfN(dragonfly_fixup)(register_t **stack_base, struct image_params *imgp) 859 { 860 Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; 861 Elf_Addr *base; 862 Elf_Addr *pos; 863 864 base = (Elf_Addr *)*stack_base; 865 pos = base + (imgp->args->argc + imgp->args->envc + 2); 866 867 if (args->execfd != -1) 868 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 869 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 870 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 871 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 872 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 873 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 874 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 875 AUXARGS_ENTRY(pos, AT_BASE, args->base); 876 if (imgp->execpathp != 0) 877 AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp); 878 AUXARGS_ENTRY(pos, AT_OSRELDATE, osreldate); 879 AUXARGS_ENTRY(pos, AT_NULL, 0); 880 881 kfree(imgp->auxargs, M_TEMP); 882 imgp->auxargs = NULL; 883 884 base--; 885 suword(base, (long)imgp->args->argc); 886 *stack_base = (register_t *)base; 887 return (0); 888 } 889 890 /* 891 * Code for generating ELF core dumps. 892 */ 893 894 typedef int (*segment_callback)(vm_map_entry_t, void *); 895 896 /* Closure for cb_put_phdr(). */ 897 struct phdr_closure { 898 Elf_Phdr *phdr; /* Program header to fill in (incremented) */ 899 Elf_Phdr *phdr_max; /* Pointer bound for error check */ 900 Elf_Off offset; /* Offset of segment in core file */ 901 }; 902 903 /* Closure for cb_size_segment(). */ 904 struct sseg_closure { 905 int count; /* Count of writable segments. */ 906 size_t vsize; /* Total size of all writable segments. */ 907 }; 908 909 /* Closure for cb_put_fp(). */ 910 struct fp_closure { 911 struct vn_hdr *vnh; 912 struct vn_hdr *vnh_max; 913 int count; 914 struct stat *sb; 915 }; 916 917 typedef struct elf_buf { 918 char *buf; 919 size_t off; 920 size_t off_max; 921 } *elf_buf_t; 922 923 static void *target_reserve(elf_buf_t target, size_t bytes, int *error); 924 925 static int cb_put_phdr (vm_map_entry_t, void *); 926 static int cb_size_segment (vm_map_entry_t, void *); 927 static int cb_fpcount_segment(vm_map_entry_t, void *); 928 static int cb_put_fp(vm_map_entry_t, void *); 929 930 931 static int each_segment (struct proc *, segment_callback, void *, int); 932 static int __elfN(corehdr)(struct lwp *, int, struct file *, struct ucred *, 933 int, elf_buf_t); 934 enum putmode { WRITE, DRYRUN }; 935 static int __elfN(puthdr)(struct lwp *, elf_buf_t, int sig, enum putmode, 936 int, struct file *); 937 static int elf_putallnotes(struct lwp *, elf_buf_t, int, enum putmode); 938 static int __elfN(putnote)(elf_buf_t, const char *, int, const void *, size_t); 939 940 static int elf_putsigs(struct lwp *, elf_buf_t); 941 static int elf_puttextvp(struct proc *, elf_buf_t); 942 static int elf_putfiles(struct proc *, elf_buf_t, struct file *); 943 944 int 945 __elfN(coredump)(struct lwp *lp, int sig, struct vnode *vp, off_t limit) 946 { 947 struct file *fp; 948 int error; 949 950 if ((error = falloc(NULL, &fp, NULL)) != 0) 951 return (error); 952 fsetcred(fp, lp->lwp_proc->p_ucred); 953 954 /* 955 * XXX fixme. 956 */ 957 fp->f_type = DTYPE_VNODE; 958 fp->f_flag = O_CREAT|O_WRONLY|O_NOFOLLOW; 959 fp->f_ops = &vnode_fileops; 960 fp->f_data = vp; 961 vn_unlock(vp); 962 963 error = generic_elf_coredump(lp, sig, fp, limit); 964 965 fp->f_type = 0; 966 fp->f_flag = 0; 967 fp->f_ops = &badfileops; 968 fp->f_data = NULL; 969 fdrop(fp); 970 return (error); 971 } 972 973 int 974 generic_elf_coredump(struct lwp *lp, int sig, struct file *fp, off_t limit) 975 { 976 struct proc *p = lp->lwp_proc; 977 struct ucred *cred = p->p_ucred; 978 int error = 0; 979 struct sseg_closure seginfo; 980 struct elf_buf target; 981 982 if (!fp) 983 kprintf("can't dump core - null fp\n"); 984 985 /* 986 * Size the program segments 987 */ 988 seginfo.count = 0; 989 seginfo.vsize = 0; 990 each_segment(p, cb_size_segment, &seginfo, 1); 991 992 /* 993 * Calculate the size of the core file header area by making 994 * a dry run of generating it. Nothing is written, but the 995 * size is calculated. 996 */ 997 bzero(&target, sizeof(target)); 998 __elfN(puthdr)(lp, &target, sig, DRYRUN, seginfo.count, fp); 999 1000 if (target.off + seginfo.vsize >= limit) 1001 return (EFAULT); 1002 1003 /* 1004 * Allocate memory for building the header, fill it up, 1005 * and write it out. 1006 */ 1007 target.off_max = target.off; 1008 target.off = 0; 1009 target.buf = kmalloc(target.off_max, M_TEMP, M_WAITOK|M_ZERO); 1010 1011 error = __elfN(corehdr)(lp, sig, fp, cred, seginfo.count, &target); 1012 1013 /* Write the contents of all of the writable segments. */ 1014 if (error == 0) { 1015 Elf_Phdr *php; 1016 int i; 1017 ssize_t nbytes; 1018 1019 php = (Elf_Phdr *)(target.buf + sizeof(Elf_Ehdr)) + 1; 1020 for (i = 0; i < seginfo.count; i++) { 1021 error = fp_write(fp, (caddr_t)php->p_vaddr, 1022 php->p_filesz, &nbytes, UIO_USERSPACE); 1023 if (error != 0) 1024 break; 1025 php++; 1026 } 1027 } 1028 kfree(target.buf, M_TEMP); 1029 1030 return (error); 1031 } 1032 1033 /* 1034 * A callback for each_segment() to write out the segment's 1035 * program header entry. 1036 */ 1037 static int 1038 cb_put_phdr(vm_map_entry_t entry, void *closure) 1039 { 1040 struct phdr_closure *phc = closure; 1041 Elf_Phdr *phdr = phc->phdr; 1042 1043 if (phc->phdr == phc->phdr_max) 1044 return (EINVAL); 1045 1046 phc->offset = round_page(phc->offset); 1047 1048 phdr->p_type = PT_LOAD; 1049 phdr->p_offset = phc->offset; 1050 phdr->p_vaddr = entry->start; 1051 phdr->p_paddr = 0; 1052 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 1053 phdr->p_align = PAGE_SIZE; 1054 phdr->p_flags = 0; 1055 if (entry->protection & VM_PROT_READ) 1056 phdr->p_flags |= PF_R; 1057 if (entry->protection & VM_PROT_WRITE) 1058 phdr->p_flags |= PF_W; 1059 if (entry->protection & VM_PROT_EXECUTE) 1060 phdr->p_flags |= PF_X; 1061 1062 phc->offset += phdr->p_filesz; 1063 ++phc->phdr; 1064 return (0); 1065 } 1066 1067 /* 1068 * A callback for each_writable_segment() to gather information about 1069 * the number of segments and their total size. 1070 */ 1071 static int 1072 cb_size_segment(vm_map_entry_t entry, void *closure) 1073 { 1074 struct sseg_closure *ssc = closure; 1075 1076 ++ssc->count; 1077 ssc->vsize += entry->end - entry->start; 1078 return (0); 1079 } 1080 1081 /* 1082 * A callback for each_segment() to gather information about 1083 * the number of text segments. 1084 */ 1085 static int 1086 cb_fpcount_segment(vm_map_entry_t entry, void *closure) 1087 { 1088 int *count = closure; 1089 struct vnode *vp; 1090 1091 if (entry->object.vm_object->type == OBJT_VNODE) { 1092 vp = (struct vnode *)entry->object.vm_object->handle; 1093 if ((vp->v_flag & VCKPT) && curproc->p_textvp == vp) 1094 return (0); 1095 ++*count; 1096 } 1097 return (0); 1098 } 1099 1100 static int 1101 cb_put_fp(vm_map_entry_t entry, void *closure) 1102 { 1103 struct fp_closure *fpc = closure; 1104 struct vn_hdr *vnh = fpc->vnh; 1105 Elf_Phdr *phdr = &vnh->vnh_phdr; 1106 struct vnode *vp; 1107 int error; 1108 1109 /* 1110 * If an entry represents a vnode then write out a file handle. 1111 * 1112 * If we are checkpointing a checkpoint-restored program we do 1113 * NOT record the filehandle for the old checkpoint vnode (which 1114 * is mapped all over the place). Instead we rely on the fact 1115 * that a checkpoint-restored program does not mmap() the checkpt 1116 * vnode NOCORE, so its contents will be written out to the 1117 * new checkpoint file. This is necessary because the 'old' 1118 * checkpoint file is typically destroyed when a new one is created 1119 * and thus cannot be used to restore the new checkpoint. 1120 * 1121 * Theoretically we could create a chain of checkpoint files and 1122 * operate the checkpointing operation kinda like an incremental 1123 * checkpoint, but a checkpoint restore would then likely wind up 1124 * referencing many prior checkpoint files and that is a bit over 1125 * the top for the purpose of the checkpoint API. 1126 */ 1127 if (entry->object.vm_object->type == OBJT_VNODE) { 1128 vp = (struct vnode *)entry->object.vm_object->handle; 1129 if ((vp->v_flag & VCKPT) && curproc->p_textvp == vp) 1130 return (0); 1131 if (vnh == fpc->vnh_max) 1132 return (EINVAL); 1133 1134 if (vp->v_mount) 1135 vnh->vnh_fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 1136 error = VFS_VPTOFH(vp, &vnh->vnh_fh.fh_fid); 1137 if (error) { 1138 char *freepath, *fullpath; 1139 1140 if (vn_fullpath(curproc, vp, &fullpath, &freepath, 0)) { 1141 kprintf("Warning: coredump, error %d: cannot store file handle for vnode %p\n", error, vp); 1142 } else { 1143 kprintf("Warning: coredump, error %d: cannot store file handle for %s\n", error, fullpath); 1144 kfree(freepath, M_TEMP); 1145 } 1146 error = 0; 1147 } 1148 1149 phdr->p_type = PT_LOAD; 1150 phdr->p_offset = 0; /* not written to core */ 1151 phdr->p_vaddr = entry->start; 1152 phdr->p_paddr = 0; 1153 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 1154 phdr->p_align = PAGE_SIZE; 1155 phdr->p_flags = 0; 1156 if (entry->protection & VM_PROT_READ) 1157 phdr->p_flags |= PF_R; 1158 if (entry->protection & VM_PROT_WRITE) 1159 phdr->p_flags |= PF_W; 1160 if (entry->protection & VM_PROT_EXECUTE) 1161 phdr->p_flags |= PF_X; 1162 ++fpc->vnh; 1163 ++fpc->count; 1164 } 1165 return (0); 1166 } 1167 1168 /* 1169 * For each writable segment in the process's memory map, call the given 1170 * function with a pointer to the map entry and some arbitrary 1171 * caller-supplied data. 1172 */ 1173 static int 1174 each_segment(struct proc *p, segment_callback func, void *closure, int writable) 1175 { 1176 int error = 0; 1177 vm_map_t map = &p->p_vmspace->vm_map; 1178 vm_map_entry_t entry; 1179 1180 for (entry = map->header.next; error == 0 && entry != &map->header; 1181 entry = entry->next) { 1182 vm_object_t obj; 1183 1184 /* 1185 * Don't dump inaccessible mappings, deal with legacy 1186 * coredump mode. 1187 * 1188 * Note that read-only segments related to the elf binary 1189 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer 1190 * need to arbitrarily ignore such segments. 1191 */ 1192 if (elf_legacy_coredump) { 1193 if (writable && (entry->protection & VM_PROT_RW) != VM_PROT_RW) 1194 continue; 1195 } else { 1196 if (writable && (entry->protection & VM_PROT_ALL) == 0) 1197 continue; 1198 } 1199 1200 /* 1201 * Dont include memory segment in the coredump if 1202 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in 1203 * madvise(2). 1204 * 1205 * Currently we only dump normal VM object maps. We do 1206 * not dump submaps or virtual page tables. 1207 */ 1208 if (writable && (entry->eflags & MAP_ENTRY_NOCOREDUMP)) 1209 continue; 1210 if (entry->maptype != VM_MAPTYPE_NORMAL) 1211 continue; 1212 if ((obj = entry->object.vm_object) == NULL) 1213 continue; 1214 1215 /* Find the deepest backing object. */ 1216 while (obj->backing_object != NULL) 1217 obj = obj->backing_object; 1218 1219 /* Ignore memory-mapped devices and such things. */ 1220 if (obj->type != OBJT_DEFAULT && 1221 obj->type != OBJT_SWAP && 1222 obj->type != OBJT_VNODE) 1223 continue; 1224 1225 error = (*func)(entry, closure); 1226 } 1227 return (error); 1228 } 1229 1230 static 1231 void * 1232 target_reserve(elf_buf_t target, size_t bytes, int *error) 1233 { 1234 void *res = NULL; 1235 1236 if (target->buf) { 1237 if (target->off + bytes > target->off_max) 1238 *error = EINVAL; 1239 else 1240 res = target->buf + target->off; 1241 } 1242 target->off += bytes; 1243 return (res); 1244 } 1245 1246 /* 1247 * Write the core file header to the file, including padding up to 1248 * the page boundary. 1249 */ 1250 static int 1251 __elfN(corehdr)(struct lwp *lp, int sig, struct file *fp, struct ucred *cred, 1252 int numsegs, elf_buf_t target) 1253 { 1254 int error; 1255 ssize_t nbytes; 1256 1257 /* 1258 * Fill in the header. The fp is passed so we can detect and flag 1259 * a checkpoint file pointer within the core file itself, because 1260 * it may not be restored from the same file handle. 1261 */ 1262 error = __elfN(puthdr)(lp, target, sig, WRITE, numsegs, fp); 1263 1264 /* Write it to the core file. */ 1265 if (error == 0) { 1266 error = fp_write(fp, target->buf, target->off, &nbytes, 1267 UIO_SYSSPACE); 1268 } 1269 return (error); 1270 } 1271 1272 static int 1273 __elfN(puthdr)(struct lwp *lp, elf_buf_t target, int sig, enum putmode mode, 1274 int numsegs, struct file *fp) 1275 { 1276 struct proc *p = lp->lwp_proc; 1277 int error = 0; 1278 size_t phoff; 1279 size_t noteoff; 1280 size_t notesz; 1281 Elf_Ehdr *ehdr; 1282 Elf_Phdr *phdr; 1283 1284 ehdr = target_reserve(target, sizeof(Elf_Ehdr), &error); 1285 1286 phoff = target->off; 1287 phdr = target_reserve(target, (numsegs + 1) * sizeof(Elf_Phdr), &error); 1288 1289 noteoff = target->off; 1290 if (error == 0) 1291 elf_putallnotes(lp, target, sig, mode); 1292 notesz = target->off - noteoff; 1293 1294 /* 1295 * put extra cruft for dumping process state here 1296 * - we really want it be before all the program 1297 * mappings 1298 * - we just need to update the offset accordingly 1299 * and GDB will be none the wiser. 1300 */ 1301 if (error == 0) 1302 error = elf_puttextvp(p, target); 1303 if (error == 0) 1304 error = elf_putsigs(lp, target); 1305 if (error == 0) 1306 error = elf_putfiles(p, target, fp); 1307 1308 /* 1309 * Align up to a page boundary for the program segments. The 1310 * actual data will be written to the outptu file, not to elf_buf_t, 1311 * so we do not have to do any further bounds checking. 1312 */ 1313 target->off = round_page(target->off); 1314 if (error == 0 && ehdr != NULL) { 1315 /* 1316 * Fill in the ELF header. 1317 */ 1318 ehdr->e_ident[EI_MAG0] = ELFMAG0; 1319 ehdr->e_ident[EI_MAG1] = ELFMAG1; 1320 ehdr->e_ident[EI_MAG2] = ELFMAG2; 1321 ehdr->e_ident[EI_MAG3] = ELFMAG3; 1322 ehdr->e_ident[EI_CLASS] = ELF_CLASS; 1323 ehdr->e_ident[EI_DATA] = ELF_DATA; 1324 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 1325 ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE; 1326 ehdr->e_ident[EI_ABIVERSION] = 0; 1327 ehdr->e_ident[EI_PAD] = 0; 1328 ehdr->e_type = ET_CORE; 1329 ehdr->e_machine = ELF_ARCH; 1330 ehdr->e_version = EV_CURRENT; 1331 ehdr->e_entry = 0; 1332 ehdr->e_phoff = phoff; 1333 ehdr->e_flags = 0; 1334 ehdr->e_ehsize = sizeof(Elf_Ehdr); 1335 ehdr->e_phentsize = sizeof(Elf_Phdr); 1336 ehdr->e_phnum = numsegs + 1; 1337 ehdr->e_shentsize = sizeof(Elf_Shdr); 1338 ehdr->e_shnum = 0; 1339 ehdr->e_shstrndx = SHN_UNDEF; 1340 } 1341 if (error == 0 && phdr != NULL) { 1342 /* 1343 * Fill in the program header entries. 1344 */ 1345 struct phdr_closure phc; 1346 1347 /* The note segement. */ 1348 phdr->p_type = PT_NOTE; 1349 phdr->p_offset = noteoff; 1350 phdr->p_vaddr = 0; 1351 phdr->p_paddr = 0; 1352 phdr->p_filesz = notesz; 1353 phdr->p_memsz = 0; 1354 phdr->p_flags = 0; 1355 phdr->p_align = 0; 1356 ++phdr; 1357 1358 /* All the writable segments from the program. */ 1359 phc.phdr = phdr; 1360 phc.phdr_max = phdr + numsegs; 1361 phc.offset = target->off; 1362 each_segment(p, cb_put_phdr, &phc, 1); 1363 } 1364 return (error); 1365 } 1366 1367 /* 1368 * Append core dump notes to target ELF buffer or simply update target size 1369 * if dryrun selected. 1370 */ 1371 static int 1372 elf_putallnotes(struct lwp *corelp, elf_buf_t target, int sig, 1373 enum putmode mode) 1374 { 1375 struct proc *p = corelp->lwp_proc; 1376 int error; 1377 struct { 1378 prstatus_t status; 1379 prfpregset_t fpregs; 1380 prpsinfo_t psinfo; 1381 } *tmpdata; 1382 prstatus_t *status; 1383 prfpregset_t *fpregs; 1384 prpsinfo_t *psinfo; 1385 struct lwp *lp; 1386 1387 /* 1388 * Allocate temporary storage for notes on heap to avoid stack overflow. 1389 */ 1390 if (mode != DRYRUN) { 1391 tmpdata = kmalloc(sizeof(*tmpdata), M_TEMP, M_ZERO | M_WAITOK); 1392 status = &tmpdata->status; 1393 fpregs = &tmpdata->fpregs; 1394 psinfo = &tmpdata->psinfo; 1395 } else { 1396 tmpdata = NULL; 1397 status = NULL; 1398 fpregs = NULL; 1399 psinfo = NULL; 1400 } 1401 1402 /* 1403 * Append LWP-agnostic note. 1404 */ 1405 if (mode != DRYRUN) { 1406 psinfo->pr_version = PRPSINFO_VERSION; 1407 psinfo->pr_psinfosz = sizeof(prpsinfo_t); 1408 strlcpy(psinfo->pr_fname, p->p_comm, 1409 sizeof(psinfo->pr_fname)); 1410 /* 1411 * XXX - We don't fill in the command line arguments 1412 * properly yet. 1413 */ 1414 strlcpy(psinfo->pr_psargs, p->p_comm, 1415 sizeof(psinfo->pr_psargs)); 1416 } 1417 error = 1418 __elfN(putnote)(target, "CORE", NT_PRPSINFO, psinfo, sizeof *psinfo); 1419 if (error) 1420 goto exit; 1421 1422 /* 1423 * Append first note for LWP that triggered core so that it is 1424 * the selected one when the debugger starts. 1425 */ 1426 if (mode != DRYRUN) { 1427 status->pr_version = PRSTATUS_VERSION; 1428 status->pr_statussz = sizeof(prstatus_t); 1429 status->pr_gregsetsz = sizeof(gregset_t); 1430 status->pr_fpregsetsz = sizeof(fpregset_t); 1431 status->pr_osreldate = osreldate; 1432 status->pr_cursig = sig; 1433 /* 1434 * XXX GDB needs unique pr_pid for each LWP and does not 1435 * not support pr_pid==0 but lwp_tid can be 0, so hack unique 1436 * value. 1437 */ 1438 status->pr_pid = corelp->lwp_tid; 1439 fill_regs(corelp, &status->pr_reg); 1440 fill_fpregs(corelp, fpregs); 1441 } 1442 error = 1443 __elfN(putnote)(target, "CORE", NT_PRSTATUS, status, sizeof *status); 1444 if (error) 1445 goto exit; 1446 error = 1447 __elfN(putnote)(target, "CORE", NT_FPREGSET, fpregs, sizeof *fpregs); 1448 if (error) 1449 goto exit; 1450 1451 /* 1452 * Then append notes for other LWPs. 1453 */ 1454 FOREACH_LWP_IN_PROC(lp, p) { 1455 if (lp == corelp) 1456 continue; 1457 /* skip lwps being created */ 1458 if (lp->lwp_thread == NULL) 1459 continue; 1460 if (mode != DRYRUN) { 1461 status->pr_pid = lp->lwp_tid; 1462 fill_regs(lp, &status->pr_reg); 1463 fill_fpregs(lp, fpregs); 1464 } 1465 error = __elfN(putnote)(target, "CORE", NT_PRSTATUS, 1466 status, sizeof *status); 1467 if (error) 1468 goto exit; 1469 error = __elfN(putnote)(target, "CORE", NT_FPREGSET, 1470 fpregs, sizeof *fpregs); 1471 if (error) 1472 goto exit; 1473 } 1474 1475 exit: 1476 if (tmpdata != NULL) 1477 kfree(tmpdata, M_TEMP); 1478 return (error); 1479 } 1480 1481 /* 1482 * Generate a note sub-structure. 1483 * 1484 * NOTE: 4-byte alignment. 1485 */ 1486 static int 1487 __elfN(putnote)(elf_buf_t target, const char *name, int type, 1488 const void *desc, size_t descsz) 1489 { 1490 int error = 0; 1491 char *dst; 1492 Elf_Note note; 1493 1494 note.n_namesz = strlen(name) + 1; 1495 note.n_descsz = descsz; 1496 note.n_type = type; 1497 dst = target_reserve(target, sizeof(note), &error); 1498 if (dst != NULL) 1499 bcopy(¬e, dst, sizeof note); 1500 dst = target_reserve(target, note.n_namesz, &error); 1501 if (dst != NULL) 1502 bcopy(name, dst, note.n_namesz); 1503 target->off = roundup2(target->off, sizeof(Elf_Word)); 1504 dst = target_reserve(target, note.n_descsz, &error); 1505 if (dst != NULL) 1506 bcopy(desc, dst, note.n_descsz); 1507 target->off = roundup2(target->off, sizeof(Elf_Word)); 1508 return (error); 1509 } 1510 1511 1512 static int 1513 elf_putsigs(struct lwp *lp, elf_buf_t target) 1514 { 1515 /* XXX lwp handle more than one lwp */ 1516 struct proc *p = lp->lwp_proc; 1517 int error = 0; 1518 struct ckpt_siginfo *csi; 1519 1520 csi = target_reserve(target, sizeof(struct ckpt_siginfo), &error); 1521 if (csi) { 1522 csi->csi_ckptpisz = sizeof(struct ckpt_siginfo); 1523 bcopy(p->p_sigacts, &csi->csi_sigacts, sizeof(*p->p_sigacts)); 1524 bcopy(&p->p_realtimer, &csi->csi_itimerval, sizeof(struct itimerval)); 1525 bcopy(&lp->lwp_sigmask, &csi->csi_sigmask, 1526 sizeof(sigset_t)); 1527 csi->csi_sigparent = p->p_sigparent; 1528 } 1529 return (error); 1530 } 1531 1532 static int 1533 elf_putfiles(struct proc *p, elf_buf_t target, struct file *ckfp) 1534 { 1535 int error = 0; 1536 int i; 1537 struct ckpt_filehdr *cfh = NULL; 1538 struct ckpt_fileinfo *cfi; 1539 struct file *fp; 1540 struct vnode *vp; 1541 /* 1542 * the duplicated loop is gross, but it was the only way 1543 * to eliminate uninitialized variable warnings 1544 */ 1545 cfh = target_reserve(target, sizeof(struct ckpt_filehdr), &error); 1546 if (cfh) { 1547 cfh->cfh_nfiles = 0; 1548 } 1549 1550 /* 1551 * ignore STDIN/STDERR/STDOUT. 1552 */ 1553 for (i = 3; error == 0 && i < p->p_fd->fd_nfiles; i++) { 1554 fp = holdfp(p->p_fd, i, -1); 1555 if (fp == NULL) 1556 continue; 1557 /* 1558 * XXX Only checkpoint vnodes for now. 1559 */ 1560 if (fp->f_type != DTYPE_VNODE) { 1561 fdrop(fp); 1562 continue; 1563 } 1564 cfi = target_reserve(target, sizeof(struct ckpt_fileinfo), 1565 &error); 1566 if (cfi == NULL) { 1567 fdrop(fp); 1568 continue; 1569 } 1570 cfi->cfi_index = -1; 1571 cfi->cfi_type = fp->f_type; 1572 cfi->cfi_flags = fp->f_flag; 1573 cfi->cfi_offset = fp->f_offset; 1574 cfi->cfi_ckflags = 0; 1575 1576 if (fp == ckfp) 1577 cfi->cfi_ckflags |= CKFIF_ISCKPTFD; 1578 /* f_count and f_msgcount should not be saved/restored */ 1579 /* XXX save cred info */ 1580 1581 switch(fp->f_type) { 1582 case DTYPE_VNODE: 1583 vp = (struct vnode *)fp->f_data; 1584 /* 1585 * it looks like a bug in ptrace is marking 1586 * a non-vnode as a vnode - until we find the 1587 * root cause this will at least prevent 1588 * further panics from truss 1589 */ 1590 if (vp == NULL || vp->v_mount == NULL) 1591 break; 1592 cfh->cfh_nfiles++; 1593 cfi->cfi_index = i; 1594 cfi->cfi_fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 1595 error = VFS_VPTOFH(vp, &cfi->cfi_fh.fh_fid); 1596 break; 1597 default: 1598 break; 1599 } 1600 fdrop(fp); 1601 } 1602 return (error); 1603 } 1604 1605 static int 1606 elf_puttextvp(struct proc *p, elf_buf_t target) 1607 { 1608 int error = 0; 1609 int *vn_count; 1610 struct fp_closure fpc; 1611 struct ckpt_vminfo *vminfo; 1612 1613 vminfo = target_reserve(target, sizeof(struct ckpt_vminfo), &error); 1614 if (vminfo != NULL) { 1615 vminfo->cvm_dsize = p->p_vmspace->vm_dsize; 1616 vminfo->cvm_tsize = p->p_vmspace->vm_tsize; 1617 vminfo->cvm_daddr = p->p_vmspace->vm_daddr; 1618 vminfo->cvm_taddr = p->p_vmspace->vm_taddr; 1619 } 1620 1621 fpc.count = 0; 1622 vn_count = target_reserve(target, sizeof(int), &error); 1623 if (target->buf != NULL) { 1624 fpc.vnh = (struct vn_hdr *)(target->buf + target->off); 1625 fpc.vnh_max = fpc.vnh + 1626 (target->off_max - target->off) / sizeof(struct vn_hdr); 1627 error = each_segment(p, cb_put_fp, &fpc, 0); 1628 if (vn_count) 1629 *vn_count = fpc.count; 1630 } else { 1631 error = each_segment(p, cb_fpcount_segment, &fpc.count, 0); 1632 } 1633 target->off += fpc.count * sizeof(struct vn_hdr); 1634 return (error); 1635 } 1636 1637 /* 1638 * Try to find the appropriate ABI-note section for checknote, 1639 * The entire image is searched if necessary, not only the first page. 1640 */ 1641 static boolean_t 1642 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote, 1643 int32_t *osrel) 1644 { 1645 boolean_t valid_note_found; 1646 const Elf_Phdr *phdr, *pnote; 1647 const Elf_Ehdr *hdr; 1648 int i; 1649 1650 valid_note_found = FALSE; 1651 hdr = (const Elf_Ehdr *)imgp->image_header; 1652 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 1653 1654 for (i = 0; i < hdr->e_phnum; i++) { 1655 if (phdr[i].p_type == PT_NOTE) { 1656 pnote = &phdr[i]; 1657 valid_note_found = check_PT_NOTE (imgp, checknote, 1658 osrel, pnote); 1659 if (valid_note_found) 1660 break; 1661 } 1662 } 1663 return valid_note_found; 1664 } 1665 1666 static boolean_t 1667 check_PT_NOTE(struct image_params *imgp, Elf_Brandnote *checknote, 1668 int32_t *osrel, const Elf_Phdr * pnote) 1669 { 1670 boolean_t limited_to_first_page; 1671 boolean_t found = FALSE; 1672 const Elf_Note *note, *note0, *note_end; 1673 const char *note_name; 1674 __ElfN(Off) noteloc, firstloc; 1675 __ElfN(Size) notesz, firstlen, endbyte; 1676 struct lwbuf *lwb; 1677 struct lwbuf lwb_cache; 1678 const char *page; 1679 char *data = NULL; 1680 int n; 1681 1682 notesz = pnote->p_filesz; 1683 noteloc = pnote->p_offset; 1684 endbyte = noteloc + notesz; 1685 limited_to_first_page = noteloc < PAGE_SIZE && endbyte < PAGE_SIZE; 1686 1687 if (limited_to_first_page) { 1688 note = (const Elf_Note *)(imgp->image_header + noteloc); 1689 note_end = (const Elf_Note *)(imgp->image_header + endbyte); 1690 note0 = note; 1691 } else { 1692 firstloc = noteloc & PAGE_MASK; 1693 firstlen = PAGE_SIZE - firstloc; 1694 if (notesz < sizeof(Elf_Note) || notesz > PAGE_SIZE) 1695 return (FALSE); 1696 1697 lwb = &lwb_cache; 1698 if (exec_map_page(imgp, noteloc >> PAGE_SHIFT, &lwb, &page)) 1699 return (FALSE); 1700 if (firstlen < notesz) { /* crosses page boundary */ 1701 data = kmalloc(notesz, M_TEMP, M_WAITOK); 1702 bcopy(page + firstloc, data, firstlen); 1703 1704 exec_unmap_page(lwb); 1705 lwb = &lwb_cache; 1706 if (exec_map_page(imgp, (noteloc >> PAGE_SHIFT) + 1, 1707 &lwb, &page)) { 1708 kfree(data, M_TEMP); 1709 return (FALSE); 1710 } 1711 bcopy(page, data + firstlen, notesz - firstlen); 1712 note = note0 = (const Elf_Note *)(data); 1713 note_end = (const Elf_Note *)(data + notesz); 1714 } else { 1715 note = note0 = (const Elf_Note *)(page + firstloc); 1716 note_end = (const Elf_Note *)(page + firstloc + 1717 firstlen); 1718 } 1719 } 1720 1721 for (n = 0; n < 100 && note >= note0 && note < note_end; n++) { 1722 if (!aligned(note, Elf32_Addr)) 1723 break; 1724 note_name = (const char *)(note + 1); 1725 1726 if (note->n_namesz == checknote->hdr.n_namesz 1727 && note->n_descsz == checknote->hdr.n_descsz 1728 && note->n_type == checknote->hdr.n_type 1729 && (strncmp(checknote->vendor, note_name, 1730 checknote->hdr.n_namesz) == 0)) { 1731 /* Fetch osreldata from ABI.note-tag */ 1732 if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 && 1733 checknote->trans_osrel != NULL) 1734 checknote->trans_osrel(note, osrel); 1735 found = TRUE; 1736 break; 1737 } 1738 note = (const Elf_Note *)((const char *)(note + 1) + 1739 roundup2(note->n_namesz, sizeof(Elf32_Addr)) + 1740 roundup2(note->n_descsz, sizeof(Elf32_Addr))); 1741 } 1742 1743 if (!limited_to_first_page) { 1744 if (data != NULL) 1745 kfree(data, M_TEMP); 1746 exec_unmap_page(lwb); 1747 } 1748 return (found); 1749 } 1750 1751 /* 1752 * The interpreter program header may be located beyond the first page, so 1753 * regardless of its location, a copy of the interpreter path is created so 1754 * that it may be safely referenced by the calling function in all case. The 1755 * memory is allocated by calling function, and the copying is done here. 1756 */ 1757 static boolean_t 1758 extract_interpreter(struct image_params *imgp, const Elf_Phdr *pinterpreter, 1759 char *data) 1760 { 1761 boolean_t limited_to_first_page; 1762 const boolean_t result_success = FALSE; 1763 const boolean_t result_failure = TRUE; 1764 __ElfN(Off) pathloc, firstloc; 1765 __ElfN(Size) pathsz, firstlen, endbyte; 1766 struct lwbuf *lwb; 1767 struct lwbuf lwb_cache; 1768 const char *page; 1769 1770 pathsz = pinterpreter->p_filesz; 1771 pathloc = pinterpreter->p_offset; 1772 endbyte = pathloc + pathsz; 1773 1774 limited_to_first_page = pathloc < PAGE_SIZE && endbyte < PAGE_SIZE; 1775 if (limited_to_first_page) { 1776 bcopy(imgp->image_header + pathloc, data, pathsz); 1777 return (result_success); 1778 } 1779 1780 firstloc = pathloc & PAGE_MASK; 1781 firstlen = PAGE_SIZE - firstloc; 1782 1783 lwb = &lwb_cache; 1784 if (exec_map_page(imgp, pathloc >> PAGE_SHIFT, &lwb, &page)) 1785 return (result_failure); 1786 1787 if (firstlen < pathsz) { /* crosses page boundary */ 1788 bcopy(page + firstloc, data, firstlen); 1789 1790 exec_unmap_page(lwb); 1791 lwb = &lwb_cache; 1792 if (exec_map_page(imgp, (pathloc >> PAGE_SHIFT) + 1, &lwb, 1793 &page)) 1794 return (result_failure); 1795 bcopy(page, data + firstlen, pathsz - firstlen); 1796 } else 1797 bcopy(page + firstloc, data, pathsz); 1798 1799 exec_unmap_page(lwb); 1800 return (result_success); 1801 } 1802 1803 static boolean_t 1804 __elfN(bsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) 1805 { 1806 uintptr_t p; 1807 1808 p = (uintptr_t)(note + 1); 1809 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1810 *osrel = *(const int32_t *)(p); 1811 1812 return (TRUE); 1813 } 1814 1815 /* 1816 * Tell kern_execve.c about it, with a little help from the linker. 1817 */ 1818 #if defined(__x86_64__) 1819 static struct execsw elf_execsw = {exec_elf64_imgact, "ELF64"}; 1820 EXEC_SET_ORDERED(elf64, elf_execsw, SI_ORDER_FIRST); 1821 #else /* i386 assumed */ 1822 static struct execsw elf_execsw = {exec_elf32_imgact, "ELF32"}; 1823 EXEC_SET_ORDERED(elf32, elf_execsw, SI_ORDER_FIRST); 1824 #endif 1825