xref: /openbsd-src/sys/kern/exec_elf.c (revision b702d7953f41d531d27bc0c8382640e12d81ddb6)
1 /*	$OpenBSD: exec_elf.c,v 1.162 2021/12/07 04:19:24 guenther Exp $	*/
2 
3 /*
4  * Copyright (c) 1996 Per Fogelstrom
5  * All rights reserved.
6  *
7  * Copyright (c) 1994 Christos Zoulas
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  */
33 
34 /*
35  * Copyright (c) 2001 Wasabi Systems, Inc.
36  * All rights reserved.
37  *
38  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed for the NetBSD Project by
51  *	Wasabi Systems, Inc.
52  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
53  *    or promote products derived from this software without specific prior
54  *    written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/proc.h>
73 #include <sys/malloc.h>
74 #include <sys/pool.h>
75 #include <sys/mount.h>
76 #include <sys/namei.h>
77 #include <sys/vnode.h>
78 #include <sys/core.h>
79 #include <sys/syslog.h>
80 #include <sys/exec.h>
81 #include <sys/exec_elf.h>
82 #include <sys/fcntl.h>
83 #include <sys/ptrace.h>
84 #include <sys/syscall.h>
85 #include <sys/signalvar.h>
86 #include <sys/stat.h>
87 #include <sys/pledge.h>
88 
89 #include <sys/mman.h>
90 
91 #include <uvm/uvm_extern.h>
92 
93 #include <machine/reg.h>
94 #include <machine/exec.h>
95 
96 int	elf_load_file(struct proc *, char *, struct exec_package *,
97 	    struct elf_args *);
98 int	elf_check_header(Elf_Ehdr *);
99 int	elf_read_from(struct proc *, struct vnode *, u_long, void *, int);
100 void	elf_load_psection(struct exec_vmcmd_set *, struct vnode *,
101 	    Elf_Phdr *, Elf_Addr *, Elf_Addr *, int *, int);
102 int	coredump_elf(struct proc *, void *);
103 int	exec_elf_fixup(struct proc *, struct exec_package *);
104 int	elf_os_pt_note_name(Elf_Note *);
105 int	elf_os_pt_note(struct proc *, struct exec_package *, Elf_Ehdr *, int *);
106 
107 extern char sigcode[], esigcode[], sigcoderet[];
108 #ifdef SYSCALL_DEBUG
109 extern char *syscallnames[];
110 #endif
111 
112 /* round up and down to page boundaries. */
113 #define ELF_ROUND(a, b)		(((a) + (b) - 1) & ~((b) - 1))
114 #define ELF_TRUNC(a, b)		((a) & ~((b) - 1))
115 
116 /*
117  * We limit the number of program headers to 32, this should
118  * be a reasonable limit for ELF, the most we have seen so far is 12
119  */
120 #define ELF_MAX_VALID_PHDR 32
121 
122 /*
123  * This is the OpenBSD ELF emul
124  */
125 struct emul emul_elf = {
126 	"native",
127 	NULL,
128 	SYS_syscall,
129 	SYS_MAXSYSCALL,
130 	sysent,
131 #ifdef SYSCALL_DEBUG
132 	syscallnames,
133 #else
134 	NULL,
135 #endif
136 	setregs,
137 	exec_elf_fixup,
138 	coredump_elf,
139 	sigcode,
140 	esigcode,
141 	sigcoderet
142 };
143 
144 #define ELF_NOTE_NAME_OPENBSD	0x01
145 
146 struct elf_note_name {
147 	char *name;
148 	int id;
149 } elf_note_names[] = {
150 	{ "OpenBSD",	ELF_NOTE_NAME_OPENBSD },
151 };
152 
153 #define	ELFROUNDSIZE	sizeof(Elf_Word)
154 #define	elfround(x)	roundup((x), ELFROUNDSIZE)
155 
156 
157 /*
158  * Check header for validity; return 0 for ok, ENOEXEC if error
159  */
160 int
161 elf_check_header(Elf_Ehdr *ehdr)
162 {
163 	/*
164 	 * We need to check magic, class size, endianness, and version before
165 	 * we look at the rest of the Elf_Ehdr structure. These few elements
166 	 * are represented in a machine independent fashion.
167 	 */
168 	if (!IS_ELF(*ehdr) ||
169 	    ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
170 	    ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
171 	    ehdr->e_ident[EI_VERSION] != ELF_TARG_VER)
172 		return (ENOEXEC);
173 
174 	/* Now check the machine dependent header */
175 	if (ehdr->e_machine != ELF_TARG_MACH ||
176 	    ehdr->e_version != ELF_TARG_VER)
177 		return (ENOEXEC);
178 
179 	/* Don't allow an insane amount of sections. */
180 	if (ehdr->e_phnum > ELF_MAX_VALID_PHDR)
181 		return (ENOEXEC);
182 
183 	return (0);
184 }
185 
186 /*
187  * Load a psection at the appropriate address
188  */
189 void
190 elf_load_psection(struct exec_vmcmd_set *vcset, struct vnode *vp,
191     Elf_Phdr *ph, Elf_Addr *addr, Elf_Addr *size, int *prot, int flags)
192 {
193 	u_long msize, lsize, psize, rm, rf;
194 	long diff, offset, bdiff;
195 	Elf_Addr base;
196 
197 	/*
198 	 * If the user specified an address, then we load there.
199 	 */
200 	if (*addr != ELF_NO_ADDR) {
201 		if (ph->p_align > 1) {
202 			*addr = ELF_TRUNC(*addr, ph->p_align);
203 			diff = ph->p_vaddr - ELF_TRUNC(ph->p_vaddr, ph->p_align);
204 			/* page align vaddr */
205 			base = *addr + trunc_page(ph->p_vaddr)
206 			    - ELF_TRUNC(ph->p_vaddr, ph->p_align);
207 		} else {
208 			diff = 0;
209 			base = *addr + trunc_page(ph->p_vaddr) - ph->p_vaddr;
210 		}
211 	} else {
212 		*addr = ph->p_vaddr;
213 		if (ph->p_align > 1)
214 			*addr = ELF_TRUNC(*addr, ph->p_align);
215 		base = trunc_page(ph->p_vaddr);
216 		diff = ph->p_vaddr - *addr;
217 	}
218 	bdiff = ph->p_vaddr - trunc_page(ph->p_vaddr);
219 
220 	/*
221 	 * Enforce W^X and map W|X segments without X permission
222 	 * initially.  The dynamic linker will make these read-only
223 	 * and add back X permission after relocation processing.
224 	 * Static executables with W|X segments will probably crash.
225 	 */
226 	*prot |= (ph->p_flags & PF_R) ? PROT_READ : 0;
227 	*prot |= (ph->p_flags & PF_W) ? PROT_WRITE : 0;
228 	if ((ph->p_flags & PF_W) == 0)
229 		*prot |= (ph->p_flags & PF_X) ? PROT_EXEC : 0;
230 
231 	msize = ph->p_memsz + diff;
232 	offset = ph->p_offset - bdiff;
233 	lsize = ph->p_filesz + bdiff;
234 	psize = round_page(lsize);
235 
236 	/*
237 	 * Because the pagedvn pager can't handle zero fill of the last
238 	 * data page if it's not page aligned we map the last page readvn.
239 	 */
240 	if (ph->p_flags & PF_W) {
241 		psize = trunc_page(lsize);
242 		if (psize > 0)
243 			NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp,
244 			    offset, *prot, flags);
245 		if (psize != lsize) {
246 			NEW_VMCMD2(vcset, vmcmd_map_readvn, lsize - psize,
247 			    base + psize, vp, offset + psize, *prot, flags);
248 		}
249 	} else {
250 		NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp, offset,
251 		    *prot, flags);
252 	}
253 
254 	/*
255 	 * Check if we need to extend the size of the segment
256 	 */
257 	rm = round_page(*addr + ph->p_memsz + diff);
258 	rf = round_page(*addr + ph->p_filesz + diff);
259 
260 	if (rm != rf) {
261 		NEW_VMCMD2(vcset, vmcmd_map_zero, rm - rf, rf, NULLVP, 0,
262 		    *prot, flags);
263 	}
264 	*size = msize;
265 }
266 
267 /*
268  * Read from vnode into buffer at offset.
269  */
270 int
271 elf_read_from(struct proc *p, struct vnode *vp, u_long off, void *buf,
272     int size)
273 {
274 	int error;
275 	size_t resid;
276 
277 	if ((error = vn_rdwr(UIO_READ, vp, buf, size, off, UIO_SYSSPACE,
278 	    0, p->p_ucred, &resid, p)) != 0)
279 		return error;
280 	/*
281 	 * See if we got all of it
282 	 */
283 	if (resid != 0)
284 		return (ENOEXEC);
285 	return (0);
286 }
287 
288 /*
289  * Load a file (interpreter/library) pointed to by path [stolen from
290  * coff_load_shlib()]. Made slightly generic so it might be used externally.
291  */
292 int
293 elf_load_file(struct proc *p, char *path, struct exec_package *epp,
294     struct elf_args *ap)
295 {
296 	int error, i;
297 	struct nameidata nd;
298 	Elf_Ehdr eh;
299 	Elf_Phdr *ph = NULL;
300 	u_long phsize = 0;
301 	Elf_Addr addr;
302 	struct vnode *vp;
303 	Elf_Phdr *base_ph = NULL;
304 	struct interp_ld_sec {
305 		Elf_Addr vaddr;
306 		u_long memsz;
307 	} loadmap[ELF_MAX_VALID_PHDR];
308 	int nload, idx = 0;
309 	Elf_Addr pos;
310 	int file_align;
311 	int loop;
312 	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
313 
314 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p);
315 	nd.ni_pledge = PLEDGE_RPATH;
316 	nd.ni_unveil = UNVEIL_READ;
317 	if ((error = namei(&nd)) != 0) {
318 		return (error);
319 	}
320 	vp = nd.ni_vp;
321 	if (vp->v_type != VREG) {
322 		error = EACCES;
323 		goto bad;
324 	}
325 	if ((error = VOP_GETATTR(vp, epp->ep_vap, p->p_ucred, p)) != 0)
326 		goto bad;
327 	if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
328 		error = EACCES;
329 		goto bad;
330 	}
331 	if ((error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) != 0)
332 		goto bad1;
333 	if ((error = elf_read_from(p, nd.ni_vp, 0, &eh, sizeof(eh))) != 0)
334 		goto bad1;
335 
336 	if (elf_check_header(&eh) || eh.e_type != ET_DYN) {
337 		error = ENOEXEC;
338 		goto bad1;
339 	}
340 
341 	ph = mallocarray(eh.e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
342 	phsize = eh.e_phnum * sizeof(Elf_Phdr);
343 
344 	if ((error = elf_read_from(p, nd.ni_vp, eh.e_phoff, ph, phsize)) != 0)
345 		goto bad1;
346 
347 	for (i = 0; i < eh.e_phnum; i++) {
348 		if (ph[i].p_type == PT_LOAD) {
349 			if (ph[i].p_filesz > ph[i].p_memsz ||
350 			    ph[i].p_memsz == 0) {
351 				error = EINVAL;
352 				goto bad1;
353 			}
354 			loadmap[idx].vaddr = trunc_page(ph[i].p_vaddr);
355 			loadmap[idx].memsz = round_page (ph[i].p_vaddr +
356 			    ph[i].p_memsz - loadmap[idx].vaddr);
357 			file_align = ph[i].p_align;
358 			idx++;
359 		}
360 	}
361 	nload = idx;
362 
363 	/*
364 	 * Load the interpreter where a non-fixed mmap(NULL, ...)
365 	 * would (i.e. something safely out of the way).
366 	 */
367 	pos = uvm_map_hint(p->p_vmspace, PROT_EXEC, VM_MIN_ADDRESS,
368 	    VM_MAXUSER_ADDRESS);
369 	pos = ELF_ROUND(pos, file_align);
370 
371 	loop = 0;
372 	for (i = 0; i < nload;/**/) {
373 		vaddr_t	addr;
374 		struct	uvm_object *uobj;
375 		off_t	uoff;
376 		size_t	size;
377 
378 #ifdef this_needs_fixing
379 		if (i == 0) {
380 			uobj = &vp->v_uvm.u_obj;
381 			/* need to fix uoff */
382 		} else {
383 #endif
384 			uobj = NULL;
385 			uoff = 0;
386 #ifdef this_needs_fixing
387 		}
388 #endif
389 
390 		addr = trunc_page(pos + loadmap[i].vaddr);
391 		size =  round_page(addr + loadmap[i].memsz) - addr;
392 
393 		/* CRAP - map_findspace does not avoid daddr+BRKSIZ */
394 		if ((addr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
395 		    (addr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ))
396 			addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
397 			    BRKSIZ);
398 
399 		if (uvm_map_mquery(&p->p_vmspace->vm_map, &addr, size,
400 		    (i == 0 ? uoff : UVM_UNKNOWN_OFFSET), 0) != 0) {
401 			if (loop == 0) {
402 				loop = 1;
403 				i = 0;
404 				pos = 0;
405 				continue;
406 			}
407 			error = ENOMEM;
408 			goto bad1;
409 		}
410 		if (addr != pos + loadmap[i].vaddr) {
411 			/* base changed. */
412 			pos = addr - trunc_page(loadmap[i].vaddr);
413 			pos = ELF_ROUND(pos,file_align);
414 			i = 0;
415 			continue;
416 		}
417 
418 		i++;
419 	}
420 
421 	/*
422 	 * Load all the necessary sections
423 	 */
424 	for (i = 0; i < eh.e_phnum; i++) {
425 		Elf_Addr size = 0;
426 		int prot = 0;
427 		int flags;
428 
429 		switch (ph[i].p_type) {
430 		case PT_LOAD:
431 			if (base_ph == NULL) {
432 				flags = VMCMD_BASE;
433 				addr = pos;
434 				base_ph = &ph[i];
435 			} else {
436 				flags = VMCMD_RELATIVE;
437 				addr = ph[i].p_vaddr - base_ph->p_vaddr;
438 			}
439 			elf_load_psection(&epp->ep_vmcmds, nd.ni_vp,
440 			    &ph[i], &addr, &size, &prot, flags | VMCMD_SYSCALL);
441 			/* If entry is within this section it must be text */
442 			if (eh.e_entry >= ph[i].p_vaddr &&
443 			    eh.e_entry < (ph[i].p_vaddr + size)) {
444  				epp->ep_entry = addr + eh.e_entry -
445 				    ELF_TRUNC(ph[i].p_vaddr,ph[i].p_align);
446 				if (flags == VMCMD_RELATIVE)
447 					epp->ep_entry += pos;
448 				ap->arg_interp = pos;
449 			}
450 			addr += size;
451 			break;
452 
453 		case PT_DYNAMIC:
454 		case PT_PHDR:
455 		case PT_NOTE:
456 			break;
457 
458 		case PT_OPENBSD_RANDOMIZE:
459 			if (ph[i].p_memsz > randomizequota) {
460 				error = ENOMEM;
461 				goto bad1;
462 			}
463 			randomizequota -= ph[i].p_memsz;
464 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
465 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
466 			break;
467 
468 		default:
469 			break;
470 		}
471 	}
472 
473 	vn_marktext(nd.ni_vp);
474 
475 bad1:
476 	VOP_CLOSE(nd.ni_vp, FREAD, p->p_ucred, p);
477 bad:
478 	free(ph, M_TEMP, phsize);
479 
480 	vput(nd.ni_vp);
481 	return (error);
482 }
483 
484 /*
485  * Prepare an Elf binary's exec package
486  *
487  * First, set of the various offsets/lengths in the exec package.
488  *
489  * Then, mark the text image busy (so it can be demand paged) or error out if
490  * this is not possible.  Finally, set up vmcmds for the text, data, bss, and
491  * stack segments.
492  */
493 int
494 exec_elf_makecmds(struct proc *p, struct exec_package *epp)
495 {
496 	Elf_Ehdr *eh = epp->ep_hdr;
497 	Elf_Phdr *ph, *pp, *base_ph = NULL;
498 	Elf_Addr phdr = 0, exe_base = 0;
499 	int error, i, has_phdr = 0, names = 0;
500 	char *interp = NULL;
501 	u_long phsize;
502 	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
503 
504 	if (epp->ep_hdrvalid < sizeof(Elf_Ehdr))
505 		return (ENOEXEC);
506 
507 	if (elf_check_header(eh) ||
508 	   (eh->e_type != ET_EXEC && eh->e_type != ET_DYN))
509 		return (ENOEXEC);
510 
511 	/*
512 	 * check if vnode is in open for writing, because we want to demand-
513 	 * page out of it.  if it is, don't do it, for various reasons.
514 	 */
515 	if (epp->ep_vp->v_writecount != 0) {
516 #ifdef DIAGNOSTIC
517 		if (epp->ep_vp->v_flag & VTEXT)
518 			panic("exec: a VTEXT vnode has writecount != 0");
519 #endif
520 		return (ETXTBSY);
521 	}
522 	/*
523 	 * Allocate space to hold all the program headers, and read them
524 	 * from the file
525 	 */
526 	ph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
527 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
528 
529 	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff, ph,
530 	    phsize)) != 0)
531 		goto bad;
532 
533 	epp->ep_tsize = ELF_NO_ADDR;
534 	epp->ep_dsize = ELF_NO_ADDR;
535 
536 	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
537 		if (pp->p_type == PT_INTERP && !interp) {
538 			if (pp->p_filesz < 2 || pp->p_filesz > MAXPATHLEN)
539 				goto bad;
540 			interp = pool_get(&namei_pool, PR_WAITOK);
541 			if ((error = elf_read_from(p, epp->ep_vp,
542 			    pp->p_offset, interp, pp->p_filesz)) != 0) {
543 				goto bad;
544 			}
545 			if (interp[pp->p_filesz - 1] != '\0')
546 				goto bad;
547 		} else if (pp->p_type == PT_LOAD) {
548 			if (pp->p_filesz > pp->p_memsz ||
549 			    pp->p_memsz == 0) {
550 				error = EINVAL;
551 				goto bad;
552 			}
553 			if (base_ph == NULL)
554 				base_ph = pp;
555 		} else if (pp->p_type == PT_PHDR) {
556 			has_phdr = 1;
557 		}
558 	}
559 
560 	if (eh->e_type == ET_DYN) {
561 		/* need phdr and load sections for PIE */
562 		if (!has_phdr || base_ph == NULL) {
563 			error = EINVAL;
564 			goto bad;
565 		}
566 		/* randomize exe_base for PIE */
567 		exe_base = uvm_map_pie(base_ph->p_align);
568 	}
569 
570 	/*
571 	 * OK, we want a slightly different twist of the
572 	 * standard emulation package for "real" elf.
573 	 */
574 	epp->ep_emul = &emul_elf;
575 
576 	/*
577 	 * Verify this is an OpenBSD executable.  If it's marked that way
578 	 * via a PT_NOTE then also check for a PT_OPENBSD_WXNEEDED segment.
579 	 */
580 	if ((error = elf_os_pt_note(p, epp, epp->ep_hdr, &names)) != 0)
581 		goto bad;
582 	if (eh->e_ident[EI_OSABI] == ELFOSABI_OPENBSD)
583 		names |= ELF_NOTE_NAME_OPENBSD;
584 
585 	/*
586 	 * Load all the necessary sections
587 	 */
588 	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
589 		Elf_Addr addr, size = 0;
590 		int prot = 0;
591 		int flags = 0;
592 
593 		switch (pp->p_type) {
594 		case PT_LOAD:
595 			if (exe_base != 0) {
596 				if (pp == base_ph) {
597 					flags = VMCMD_BASE;
598 					addr = exe_base;
599 				} else {
600 					flags = VMCMD_RELATIVE;
601 					addr = pp->p_vaddr - base_ph->p_vaddr;
602 				}
603 			} else
604 				addr = ELF_NO_ADDR;
605 
606 			/* Permit system calls in specific main-programs */
607 			if (interp == NULL) {
608 				/* statics. Also block the ld.so syscall-grant */
609 				flags |= VMCMD_SYSCALL;
610 				p->p_vmspace->vm_map.flags |= VM_MAP_SYSCALL_ONCE;
611 			}
612 
613 			/*
614 			 * Calculates size of text and data segments
615 			 * by starting at first and going to end of last.
616 			 * 'rwx' sections are treated as data.
617 			 * this is correct for BSS_PLT, but may not be
618 			 * for DATA_PLT, is fine for TEXT_PLT.
619 			 */
620 			elf_load_psection(&epp->ep_vmcmds, epp->ep_vp,
621 			    pp, &addr, &size, &prot, flags);
622 
623 			/*
624 			 * Update exe_base in case alignment was off.
625 			 * For PIE, addr is relative to exe_base so
626 			 * adjust it (non PIE exe_base is 0 so no change).
627 			 */
628 			if (flags == VMCMD_BASE)
629 				exe_base = addr;
630 			else
631 				addr += exe_base;
632 
633 			/*
634 			 * Decide whether it's text or data by looking
635 			 * at the protection of the section
636 			 */
637 			if (prot & PROT_WRITE) {
638 				/* data section */
639 				if (epp->ep_dsize == ELF_NO_ADDR) {
640 					epp->ep_daddr = addr;
641 					epp->ep_dsize = size;
642 				} else {
643 					if (addr < epp->ep_daddr) {
644 						epp->ep_dsize =
645 						    epp->ep_dsize +
646 						    epp->ep_daddr -
647 						    addr;
648 						epp->ep_daddr = addr;
649 					} else
650 						epp->ep_dsize = addr+size -
651 						    epp->ep_daddr;
652 				}
653 			} else if (prot & PROT_EXEC) {
654 				/* text section */
655 				if (epp->ep_tsize == ELF_NO_ADDR) {
656 					epp->ep_taddr = addr;
657 					epp->ep_tsize = size;
658 				} else {
659 					if (addr < epp->ep_taddr) {
660 						epp->ep_tsize =
661 						    epp->ep_tsize +
662 						    epp->ep_taddr -
663 						    addr;
664 						epp->ep_taddr = addr;
665 					} else
666 						epp->ep_tsize = addr+size -
667 						    epp->ep_taddr;
668 				}
669 			}
670 			break;
671 
672 		case PT_SHLIB:
673 			error = ENOEXEC;
674 			goto bad;
675 
676 		case PT_INTERP:
677 			/* Already did this one */
678 		case PT_DYNAMIC:
679 		case PT_NOTE:
680 			break;
681 
682 		case PT_PHDR:
683 			/* Note address of program headers (in text segment) */
684 			phdr = pp->p_vaddr;
685 			break;
686 
687 		case PT_OPENBSD_RANDOMIZE:
688 			if (ph[i].p_memsz > randomizequota) {
689 				error = ENOMEM;
690 				goto bad;
691 			}
692 			randomizequota -= ph[i].p_memsz;
693 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
694 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
695 			break;
696 
697 		default:
698 			/*
699 			 * Not fatal, we don't need to understand everything
700 			 * :-)
701 			 */
702 			break;
703 		}
704 	}
705 
706 	phdr += exe_base;
707 
708 	/*
709 	 * Strangely some linux programs may have all load sections marked
710 	 * writeable, in this case, textsize is not -1, but rather 0;
711 	 */
712 	if (epp->ep_tsize == ELF_NO_ADDR)
713 		epp->ep_tsize = 0;
714 	/*
715 	 * Another possibility is that it has all load sections marked
716 	 * read-only.  Fake a zero-sized data segment right after the
717 	 * text segment.
718 	 */
719 	if (epp->ep_dsize == ELF_NO_ADDR) {
720 		epp->ep_daddr = round_page(epp->ep_taddr + epp->ep_tsize);
721 		epp->ep_dsize = 0;
722 	}
723 
724 	epp->ep_interp = interp;
725 	epp->ep_entry = eh->e_entry + exe_base;
726 
727 	/*
728 	 * Check if we found a dynamically linked binary and arrange to load
729 	 * its interpreter when the exec file is released.
730 	 */
731 	if (interp || eh->e_type == ET_DYN) {
732 		struct elf_args *ap;
733 
734 		ap = malloc(sizeof(*ap), M_TEMP, M_WAITOK);
735 
736 		ap->arg_phaddr = phdr;
737 		ap->arg_phentsize = eh->e_phentsize;
738 		ap->arg_phnum = eh->e_phnum;
739 		ap->arg_entry = eh->e_entry + exe_base;
740 		ap->arg_interp = exe_base;
741 
742 		epp->ep_args = ap;
743 	}
744 
745 	free(ph, M_TEMP, phsize);
746 	vn_marktext(epp->ep_vp);
747 	return (exec_setup_stack(p, epp));
748 
749 bad:
750 	if (interp)
751 		pool_put(&namei_pool, interp);
752 	free(ph, M_TEMP, phsize);
753 	kill_vmcmds(&epp->ep_vmcmds);
754 	if (error == 0)
755 		return (ENOEXEC);
756 	return (error);
757 }
758 
759 /*
760  * Phase II of load. It is now safe to load the interpreter. Info collected
761  * when loading the program is available for setup of the interpreter.
762  */
763 int
764 exec_elf_fixup(struct proc *p, struct exec_package *epp)
765 {
766 	char	*interp;
767 	int	error = 0;
768 	struct	elf_args *ap;
769 	AuxInfo ai[ELF_AUX_ENTRIES], *a;
770 
771 	ap = epp->ep_args;
772 	if (ap == NULL) {
773 		return (0);
774 	}
775 
776 	interp = epp->ep_interp;
777 
778 	if (interp &&
779 	    (error = elf_load_file(p, interp, epp, ap)) != 0) {
780 		free(ap, M_TEMP, sizeof *ap);
781 		pool_put(&namei_pool, interp);
782 		kill_vmcmds(&epp->ep_vmcmds);
783 		return (error);
784 	}
785 	/*
786 	 * We have to do this ourselves...
787 	 */
788 	error = exec_process_vmcmds(p, epp);
789 
790 	/*
791 	 * Push extra arguments on the stack needed by dynamically
792 	 * linked binaries
793 	 */
794 	if (error == 0) {
795 		memset(&ai, 0, sizeof ai);
796 		a = ai;
797 
798 		a->au_id = AUX_phdr;
799 		a->au_v = ap->arg_phaddr;
800 		a++;
801 
802 		a->au_id = AUX_phent;
803 		a->au_v = ap->arg_phentsize;
804 		a++;
805 
806 		a->au_id = AUX_phnum;
807 		a->au_v = ap->arg_phnum;
808 		a++;
809 
810 		a->au_id = AUX_pagesz;
811 		a->au_v = PAGE_SIZE;
812 		a++;
813 
814 		a->au_id = AUX_base;
815 		a->au_v = ap->arg_interp;
816 		a++;
817 
818 		a->au_id = AUX_flags;
819 		a->au_v = 0;
820 		a++;
821 
822 		a->au_id = AUX_entry;
823 		a->au_v = ap->arg_entry;
824 		a++;
825 
826 		a->au_id = AUX_openbsd_timekeep;
827 		a->au_v = p->p_p->ps_timekeep;
828 		a++;
829 
830 		a->au_id = AUX_null;
831 		a->au_v = 0;
832 		a++;
833 
834 		error = copyout(ai, epp->ep_auxinfo, sizeof ai);
835 	}
836 	free(ap, M_TEMP, sizeof *ap);
837 	if (interp)
838 		pool_put(&namei_pool, interp);
839 	return (error);
840 }
841 
842 int
843 elf_os_pt_note_name(Elf_Note *np)
844 {
845 	int i, j;
846 
847 	for (i = 0; i < nitems(elf_note_names); i++) {
848 		size_t namlen = strlen(elf_note_names[i].name);
849 		if (np->namesz < namlen)
850 			continue;
851 		/* verify name padding (after the NUL) is NUL */
852 		for (j = namlen + 1; j < elfround(np->namesz); j++)
853 			if (((char *)(np + 1))[j] != '\0')
854 				continue;
855 		/* verify desc padding is NUL */
856 		for (j = np->descsz; j < elfround(np->descsz); j++)
857 			if (((char *)(np + 1))[j] != '\0')
858 				continue;
859 		if (strcmp((char *)(np + 1), elf_note_names[i].name) == 0)
860 			return elf_note_names[i].id;
861 	}
862 	return (0);
863 }
864 
865 int
866 elf_os_pt_note(struct proc *p, struct exec_package *epp, Elf_Ehdr *eh, int *namesp)
867 {
868 	Elf_Phdr *hph, *ph;
869 	Elf_Note *np = NULL;
870 	size_t phsize, offset, pfilesz = 0, total;
871 	int error, names = 0;
872 
873 	hph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
874 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
875 	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff,
876 	    hph, phsize)) != 0)
877 		goto out1;
878 
879 	for (ph = hph;  ph < &hph[eh->e_phnum]; ph++) {
880 		if (ph->p_type == PT_OPENBSD_WXNEEDED) {
881 			epp->ep_flags |= EXEC_WXNEEDED;
882 			continue;
883 		}
884 
885 		if (ph->p_type != PT_NOTE || ph->p_filesz > 1024)
886 			continue;
887 
888 		if (np && ph->p_filesz != pfilesz) {
889 			free(np, M_TEMP, pfilesz);
890 			np = NULL;
891 		}
892 		if (!np)
893 			np = malloc(ph->p_filesz, M_TEMP, M_WAITOK);
894 		pfilesz = ph->p_filesz;
895 		if ((error = elf_read_from(p, epp->ep_vp, ph->p_offset,
896 		    np, ph->p_filesz)) != 0)
897 			goto out2;
898 
899 		for (offset = 0; offset < ph->p_filesz; offset += total) {
900 			Elf_Note *np2 = (Elf_Note *)((char *)np + offset);
901 
902 			if (offset + sizeof(Elf_Note) > ph->p_filesz)
903 				break;
904 			total = sizeof(Elf_Note) + elfround(np2->namesz) +
905 			    elfround(np2->descsz);
906 			if (offset + total > ph->p_filesz)
907 				break;
908 			names |= elf_os_pt_note_name(np2);
909 		}
910 	}
911 
912 out2:
913 	free(np, M_TEMP, pfilesz);
914 out1:
915 	free(hph, M_TEMP, phsize);
916 	*namesp = names;
917 	return ((names & ELF_NOTE_NAME_OPENBSD) ? 0 : ENOEXEC);
918 }
919 
920 /*
921  * Start of routines related to dumping core
922  */
923 
924 #ifdef SMALL_KERNEL
925 int
926 coredump_elf(struct proc *p, void *cookie)
927 {
928 	return EPERM;
929 }
930 #else /* !SMALL_KERNEL */
931 
932 struct writesegs_state {
933 	off_t	notestart;
934 	off_t	secstart;
935 	off_t	secoff;
936 	struct	proc *p;
937 	void	*iocookie;
938 	Elf_Phdr *psections;
939 	size_t	psectionslen;
940 	size_t	notesize;
941 	int	npsections;
942 };
943 
944 uvm_coredump_setup_cb	coredump_setup_elf;
945 uvm_coredump_walk_cb	coredump_walk_elf;
946 
947 int	coredump_notes_elf(struct proc *, void *, size_t *);
948 int	coredump_note_elf(struct proc *, void *, size_t *);
949 int	coredump_writenote_elf(struct proc *, void *, Elf_Note *,
950 	    const char *, void *);
951 
952 int
953 coredump_elf(struct proc *p, void *cookie)
954 {
955 #ifdef DIAGNOSTIC
956 	off_t offset;
957 #endif
958 	struct writesegs_state ws;
959 	size_t notesize;
960 	int error, i;
961 
962 	ws.p = p;
963 	ws.iocookie = cookie;
964 	ws.psections = NULL;
965 
966 	/*
967 	 * Walk the map to get all the segment offsets and lengths,
968 	 * write out the ELF header.
969 	 */
970 	error = uvm_coredump_walkmap(p, coredump_setup_elf,
971 	    coredump_walk_elf, &ws);
972 	if (error)
973 		goto out;
974 
975 	error = coredump_write(cookie, UIO_SYSSPACE, ws.psections,
976 	    ws.psectionslen);
977 	if (error)
978 		goto out;
979 
980 	/* Write out the notes. */
981 	error = coredump_notes_elf(p, cookie, &notesize);
982 	if (error)
983 		goto out;
984 
985 #ifdef DIAGNOSTIC
986 	if (notesize != ws.notesize)
987 		panic("coredump: notesize changed: %zu != %zu",
988 		    ws.notesize, notesize);
989 	offset = ws.notestart + notesize;
990 	if (offset != ws.secstart)
991 		panic("coredump: offset %lld != secstart %lld",
992 		    (long long) offset, (long long) ws.secstart);
993 #endif
994 
995 	/* Pass 3: finally, write the sections themselves. */
996 	for (i = 0; i < ws.npsections - 1; i++) {
997 		Elf_Phdr *pent = &ws.psections[i];
998 		if (pent->p_filesz == 0)
999 			continue;
1000 
1001 #ifdef DIAGNOSTIC
1002 		if (offset != pent->p_offset)
1003 			panic("coredump: offset %lld != p_offset[%d] %lld",
1004 			    (long long) offset, i,
1005 			    (long long) pent->p_filesz);
1006 #endif
1007 
1008 		error = coredump_write(cookie, UIO_USERSPACE,
1009 		    (void *)(vaddr_t)pent->p_vaddr, pent->p_filesz);
1010 		if (error)
1011 			goto out;
1012 
1013 		coredump_unmap(cookie, (vaddr_t)pent->p_vaddr,
1014 		    (vaddr_t)pent->p_vaddr + pent->p_filesz);
1015 
1016 #ifdef DIAGNOSTIC
1017 		offset += ws.psections[i].p_filesz;
1018 #endif
1019 	}
1020 
1021 out:
1022 	free(ws.psections, M_TEMP, ws.psectionslen);
1023 	return (error);
1024 }
1025 
1026 
1027 /*
1028  * Normally we lay out core files like this:
1029  *	[ELF Header] [Program headers] [Notes] [data for PT_LOAD segments]
1030  *
1031  * However, if there's >= 65535 segments then it overflows the field
1032  * in the ELF header, so the standard specifies putting a magic
1033  * number there and saving the real count in the .sh_info field of
1034  * the first *section* header...which requires generating a section
1035  * header.  To avoid confusing tools, we include an .shstrtab section
1036  * as well so all the indexes look valid.  So in this case we lay
1037  * out the core file like this:
1038  *	[ELF Header] [Section Headers] [.shstrtab] [Program headers] \
1039  *	[Notes] [data for PT_LOAD segments]
1040  *
1041  * The 'shstrtab' structure below is data for the second of the two
1042  * section headers, plus the .shstrtab itself, in one const buffer.
1043  */
1044 static const struct {
1045     Elf_Shdr	shdr;
1046     char	shstrtab[sizeof(ELF_SHSTRTAB) + 1];
1047 } shstrtab = {
1048     .shdr = {
1049 	.sh_name = 1,			/* offset in .shstrtab below */
1050 	.sh_type = SHT_STRTAB,
1051 	.sh_offset = sizeof(Elf_Ehdr) + 2*sizeof(Elf_Shdr),
1052 	.sh_size = sizeof(ELF_SHSTRTAB) + 1,
1053 	.sh_addralign = 1,
1054     },
1055     .shstrtab = "\0" ELF_SHSTRTAB,
1056 };
1057 
1058 int
1059 coredump_setup_elf(int segment_count, void *cookie)
1060 {
1061 	Elf_Ehdr ehdr;
1062 	struct writesegs_state *ws = cookie;
1063 	Elf_Phdr *note;
1064 	int error;
1065 
1066 	/* Get the count of segments, plus one for the PT_NOTE */
1067 	ws->npsections = segment_count + 1;
1068 
1069 	/* Get the size of the notes. */
1070 	error = coredump_notes_elf(ws->p, NULL, &ws->notesize);
1071 	if (error)
1072 		return error;
1073 
1074 	/* Setup the ELF header */
1075 	memset(&ehdr, 0, sizeof(ehdr));
1076 	memcpy(ehdr.e_ident, ELFMAG, SELFMAG);
1077 	ehdr.e_ident[EI_CLASS] = ELF_TARG_CLASS;
1078 	ehdr.e_ident[EI_DATA] = ELF_TARG_DATA;
1079 	ehdr.e_ident[EI_VERSION] = EV_CURRENT;
1080 	/* XXX Should be the OSABI/ABI version of the executable. */
1081 	ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV;
1082 	ehdr.e_ident[EI_ABIVERSION] = 0;
1083 	ehdr.e_type = ET_CORE;
1084 	/* XXX This should be the e_machine of the executable. */
1085 	ehdr.e_machine = ELF_TARG_MACH;
1086 	ehdr.e_version = EV_CURRENT;
1087 	ehdr.e_entry = 0;
1088 	ehdr.e_flags = 0;
1089 	ehdr.e_ehsize = sizeof(ehdr);
1090 	ehdr.e_phentsize = sizeof(Elf_Phdr);
1091 
1092 	if (ws->npsections < PN_XNUM) {
1093 		ehdr.e_phoff = sizeof(ehdr);
1094 		ehdr.e_shoff = 0;
1095 		ehdr.e_phnum = ws->npsections;
1096 		ehdr.e_shentsize = 0;
1097 		ehdr.e_shnum = 0;
1098 		ehdr.e_shstrndx = 0;
1099 	} else {
1100 		/* too many segments, use extension setup */
1101 		ehdr.e_shoff = sizeof(ehdr);
1102 		ehdr.e_phnum = PN_XNUM;
1103 		ehdr.e_shentsize = sizeof(Elf_Shdr);
1104 		ehdr.e_shnum = 2;
1105 		ehdr.e_shstrndx = 1;
1106 		ehdr.e_phoff = shstrtab.shdr.sh_offset + shstrtab.shdr.sh_size;
1107 	}
1108 
1109 	/* Write out the ELF header. */
1110 	error = coredump_write(ws->iocookie, UIO_SYSSPACE, &ehdr, sizeof(ehdr));
1111 	if (error)
1112 		return error;
1113 
1114 	/*
1115 	 * If an section header is needed to store extension info, write
1116 	 * it out after the ELF header and before the program header.
1117 	 */
1118 	if (ehdr.e_shnum != 0) {
1119 		Elf_Shdr shdr = { .sh_info = ws->npsections };
1120 		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shdr,
1121 		    sizeof shdr);
1122 		if (error)
1123 			return error;
1124 		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shstrtab,
1125 		    sizeof(shstrtab.shdr) + sizeof(shstrtab.shstrtab));
1126 		if (error)
1127 			return error;
1128 	}
1129 
1130 	/*
1131 	 * Allocate the segment header array and setup to collect
1132 	 * the section sizes and offsets
1133 	 */
1134 	ws->psections = mallocarray(ws->npsections, sizeof(Elf_Phdr),
1135 	    M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO);
1136 	if (ws->psections == NULL)
1137 		return ENOMEM;
1138 	ws->psectionslen = ws->npsections * sizeof(Elf_Phdr);
1139 
1140 	ws->notestart = ehdr.e_phoff + ws->psectionslen;
1141 	ws->secstart = ws->notestart + ws->notesize;
1142 	ws->secoff = ws->secstart;
1143 
1144 	/* Fill in the PT_NOTE segment header in the last slot */
1145 	note = &ws->psections[ws->npsections - 1];
1146 	note->p_type = PT_NOTE;
1147 	note->p_offset = ws->notestart;
1148 	note->p_vaddr = 0;
1149 	note->p_paddr = 0;
1150 	note->p_filesz = ws->notesize;
1151 	note->p_memsz = 0;
1152 	note->p_flags = PF_R;
1153 	note->p_align = ELFROUNDSIZE;
1154 
1155 	return (0);
1156 }
1157 
1158 int
1159 coredump_walk_elf(vaddr_t start, vaddr_t realend, vaddr_t end, vm_prot_t prot,
1160     int nsegment, void *cookie)
1161 {
1162 	struct writesegs_state *ws = cookie;
1163 	Elf_Phdr phdr;
1164 	vsize_t size, realsize;
1165 
1166 	size = end - start;
1167 	realsize = realend - start;
1168 
1169 	phdr.p_type = PT_LOAD;
1170 	phdr.p_offset = ws->secoff;
1171 	phdr.p_vaddr = start;
1172 	phdr.p_paddr = 0;
1173 	phdr.p_filesz = realsize;
1174 	phdr.p_memsz = size;
1175 	phdr.p_flags = 0;
1176 	if (prot & PROT_READ)
1177 		phdr.p_flags |= PF_R;
1178 	if (prot & PROT_WRITE)
1179 		phdr.p_flags |= PF_W;
1180 	if (prot & PROT_EXEC)
1181 		phdr.p_flags |= PF_X;
1182 	phdr.p_align = PAGE_SIZE;
1183 
1184 	ws->secoff += phdr.p_filesz;
1185 	ws->psections[nsegment] = phdr;
1186 
1187 	return (0);
1188 }
1189 
1190 int
1191 coredump_notes_elf(struct proc *p, void *iocookie, size_t *sizep)
1192 {
1193 	struct ps_strings pss;
1194 	struct iovec iov;
1195 	struct uio uio;
1196 	struct elfcore_procinfo cpi;
1197 	Elf_Note nhdr;
1198 	struct process *pr = p->p_p;
1199 	struct proc *q;
1200 	size_t size, notesize;
1201 	int error;
1202 
1203 	size = 0;
1204 
1205 	/* First, write an elfcore_procinfo. */
1206 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1207 	    elfround(sizeof(cpi));
1208 	if (iocookie) {
1209 		memset(&cpi, 0, sizeof(cpi));
1210 
1211 		cpi.cpi_version = ELFCORE_PROCINFO_VERSION;
1212 		cpi.cpi_cpisize = sizeof(cpi);
1213 		cpi.cpi_signo = p->p_sisig;
1214 		cpi.cpi_sigcode = p->p_sicode;
1215 
1216 		cpi.cpi_sigpend = p->p_siglist | pr->ps_siglist;
1217 		cpi.cpi_sigmask = p->p_sigmask;
1218 		cpi.cpi_sigignore = pr->ps_sigacts->ps_sigignore;
1219 		cpi.cpi_sigcatch = pr->ps_sigacts->ps_sigcatch;
1220 
1221 		cpi.cpi_pid = pr->ps_pid;
1222 		cpi.cpi_ppid = pr->ps_ppid;
1223 		cpi.cpi_pgrp = pr->ps_pgid;
1224 		if (pr->ps_session->s_leader)
1225 			cpi.cpi_sid = pr->ps_session->s_leader->ps_pid;
1226 		else
1227 			cpi.cpi_sid = 0;
1228 
1229 		cpi.cpi_ruid = p->p_ucred->cr_ruid;
1230 		cpi.cpi_euid = p->p_ucred->cr_uid;
1231 		cpi.cpi_svuid = p->p_ucred->cr_svuid;
1232 
1233 		cpi.cpi_rgid = p->p_ucred->cr_rgid;
1234 		cpi.cpi_egid = p->p_ucred->cr_gid;
1235 		cpi.cpi_svgid = p->p_ucred->cr_svgid;
1236 
1237 		(void)strlcpy(cpi.cpi_name, pr->ps_comm, sizeof(cpi.cpi_name));
1238 
1239 		nhdr.namesz = sizeof("OpenBSD");
1240 		nhdr.descsz = sizeof(cpi);
1241 		nhdr.type = NT_OPENBSD_PROCINFO;
1242 
1243 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1244 		    "OpenBSD", &cpi);
1245 		if (error)
1246 			return (error);
1247 	}
1248 	size += notesize;
1249 
1250 	/* Second, write an NT_OPENBSD_AUXV note. */
1251 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1252 	    elfround(ELF_AUX_WORDS * sizeof(char *));
1253 	if (iocookie) {
1254 		iov.iov_base = &pss;
1255 		iov.iov_len = sizeof(pss);
1256 		uio.uio_iov = &iov;
1257 		uio.uio_iovcnt = 1;
1258 		uio.uio_offset = (off_t)pr->ps_strings;
1259 		uio.uio_resid = sizeof(pss);
1260 		uio.uio_segflg = UIO_SYSSPACE;
1261 		uio.uio_rw = UIO_READ;
1262 		uio.uio_procp = NULL;
1263 
1264 		error = uvm_io(&p->p_vmspace->vm_map, &uio, 0);
1265 		if (error)
1266 			return (error);
1267 
1268 		if (pss.ps_envstr == NULL)
1269 			return (EIO);
1270 
1271 		nhdr.namesz = sizeof("OpenBSD");
1272 		nhdr.descsz = ELF_AUX_WORDS * sizeof(char *);
1273 		nhdr.type = NT_OPENBSD_AUXV;
1274 
1275 		error = coredump_write(iocookie, UIO_SYSSPACE,
1276 		    &nhdr, sizeof(nhdr));
1277 		if (error)
1278 			return (error);
1279 
1280 		error = coredump_write(iocookie, UIO_SYSSPACE,
1281 		    "OpenBSD", elfround(nhdr.namesz));
1282 		if (error)
1283 			return (error);
1284 
1285 		error = coredump_write(iocookie, UIO_USERSPACE,
1286 		    pss.ps_envstr + pss.ps_nenvstr + 1, nhdr.descsz);
1287 		if (error)
1288 			return (error);
1289 	}
1290 	size += notesize;
1291 
1292 #ifdef PT_WCOOKIE
1293 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1294 	    elfround(sizeof(register_t));
1295 	if (iocookie) {
1296 		register_t wcookie;
1297 
1298 		nhdr.namesz = sizeof("OpenBSD");
1299 		nhdr.descsz = sizeof(register_t);
1300 		nhdr.type = NT_OPENBSD_WCOOKIE;
1301 
1302 		wcookie = process_get_wcookie(p);
1303 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1304 		    "OpenBSD", &wcookie);
1305 		if (error)
1306 			return (error);
1307 	}
1308 	size += notesize;
1309 #endif
1310 
1311 	/*
1312 	 * Now write the register info for the thread that caused the
1313 	 * coredump.
1314 	 */
1315 	error = coredump_note_elf(p, iocookie, &notesize);
1316 	if (error)
1317 		return (error);
1318 	size += notesize;
1319 
1320 	/*
1321 	 * Now, for each thread, write the register info and any other
1322 	 * per-thread notes.  Since we're dumping core, all the other
1323 	 * threads in the process have been stopped and the list can't
1324 	 * change.
1325 	 */
1326 	TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
1327 		if (q == p)		/* we've taken care of this thread */
1328 			continue;
1329 		error = coredump_note_elf(q, iocookie, &notesize);
1330 		if (error)
1331 			return (error);
1332 		size += notesize;
1333 	}
1334 
1335 	*sizep = size;
1336 	return (0);
1337 }
1338 
1339 int
1340 coredump_note_elf(struct proc *p, void *iocookie, size_t *sizep)
1341 {
1342 	Elf_Note nhdr;
1343 	int size, notesize, error;
1344 	int namesize;
1345 	char name[64+ELFROUNDSIZE];
1346 	struct reg intreg;
1347 #ifdef PT_GETFPREGS
1348 	struct fpreg freg;
1349 #endif
1350 
1351 	size = 0;
1352 
1353 	snprintf(name, sizeof(name)-ELFROUNDSIZE, "%s@%d",
1354 	    "OpenBSD", p->p_tid + THREAD_PID_OFFSET);
1355 	namesize = strlen(name) + 1;
1356 	memset(name + namesize, 0, elfround(namesize) - namesize);
1357 
1358 	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(intreg));
1359 	if (iocookie) {
1360 		error = process_read_regs(p, &intreg);
1361 		if (error)
1362 			return (error);
1363 
1364 		nhdr.namesz = namesize;
1365 		nhdr.descsz = sizeof(intreg);
1366 		nhdr.type = NT_OPENBSD_REGS;
1367 
1368 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1369 		    name, &intreg);
1370 		if (error)
1371 			return (error);
1372 
1373 	}
1374 	size += notesize;
1375 
1376 #ifdef PT_GETFPREGS
1377 	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(freg));
1378 	if (iocookie) {
1379 		error = process_read_fpregs(p, &freg);
1380 		if (error)
1381 			return (error);
1382 
1383 		nhdr.namesz = namesize;
1384 		nhdr.descsz = sizeof(freg);
1385 		nhdr.type = NT_OPENBSD_FPREGS;
1386 
1387 		error = coredump_writenote_elf(p, iocookie, &nhdr, name, &freg);
1388 		if (error)
1389 			return (error);
1390 	}
1391 	size += notesize;
1392 #endif
1393 
1394 	*sizep = size;
1395 	/* XXX Add hook for machdep per-LWP notes. */
1396 	return (0);
1397 }
1398 
1399 int
1400 coredump_writenote_elf(struct proc *p, void *cookie, Elf_Note *nhdr,
1401     const char *name, void *data)
1402 {
1403 	int error;
1404 
1405 	error = coredump_write(cookie, UIO_SYSSPACE, nhdr, sizeof(*nhdr));
1406 	if (error)
1407 		return error;
1408 
1409 	error = coredump_write(cookie, UIO_SYSSPACE, name,
1410 	    elfround(nhdr->namesz));
1411 	if (error)
1412 		return error;
1413 
1414 	return coredump_write(cookie, UIO_SYSSPACE, data, nhdr->descsz);
1415 }
1416 #endif /* !SMALL_KERNEL */
1417