xref: /openbsd-src/sys/kern/exec_elf.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*	$OpenBSD: exec_elf.c,v 1.183 2023/07/12 19:34:14 jasper Exp $	*/
2 
3 /*
4  * Copyright (c) 1996 Per Fogelstrom
5  * All rights reserved.
6  *
7  * Copyright (c) 1994 Christos Zoulas
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  */
33 
34 /*
35  * Copyright (c) 2001 Wasabi Systems, Inc.
36  * All rights reserved.
37  *
38  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed for the NetBSD Project by
51  *	Wasabi Systems, Inc.
52  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
53  *    or promote products derived from this software without specific prior
54  *    written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/malloc.h>
73 #include <sys/pool.h>
74 #include <sys/mount.h>
75 #include <sys/namei.h>
76 #include <sys/vnode.h>
77 #include <sys/core.h>
78 #include <sys/exec.h>
79 #include <sys/exec_elf.h>
80 #include <sys/fcntl.h>
81 #include <sys/ptrace.h>
82 #include <sys/signalvar.h>
83 #include <sys/pledge.h>
84 
85 #include <sys/mman.h>
86 
87 #include <uvm/uvm_extern.h>
88 
89 #include <machine/reg.h>
90 #include <machine/exec.h>
91 
92 int	elf_load_file(struct proc *, char *, struct exec_package *,
93 	    struct elf_args *);
94 int	elf_check_header(Elf_Ehdr *);
95 int	elf_read_from(struct proc *, struct vnode *, u_long, void *, int);
96 void	elf_load_psection(struct exec_vmcmd_set *, struct vnode *,
97 	    Elf_Phdr *, Elf_Addr *, Elf_Addr *, int *, int);
98 int	elf_os_pt_note_name(Elf_Note *);
99 int	elf_os_pt_note(struct proc *, struct exec_package *, Elf_Ehdr *, int *);
100 
101 /* round up and down to page boundaries. */
102 #define ELF_ROUND(a, b)		(((a) + (b) - 1) & ~((b) - 1))
103 #define ELF_TRUNC(a, b)		((a) & ~((b) - 1))
104 
105 /*
106  * We limit the number of program headers to 32, this should
107  * be a reasonable limit for ELF, the most we have seen so far is 12
108  */
109 #define ELF_MAX_VALID_PHDR 32
110 
111 #define ELF_NOTE_NAME_OPENBSD	0x01
112 
113 struct elf_note_name {
114 	char *name;
115 	int id;
116 } elf_note_names[] = {
117 	{ "OpenBSD",	ELF_NOTE_NAME_OPENBSD },
118 };
119 
120 #define	ELFROUNDSIZE	sizeof(Elf_Word)
121 #define	elfround(x)	roundup((x), ELFROUNDSIZE)
122 
123 
124 /*
125  * Check header for validity; return 0 for ok, ENOEXEC if error
126  */
127 int
128 elf_check_header(Elf_Ehdr *ehdr)
129 {
130 	/*
131 	 * We need to check magic, class size, endianness, and version before
132 	 * we look at the rest of the Elf_Ehdr structure. These few elements
133 	 * are represented in a machine independent fashion.
134 	 */
135 	if (!IS_ELF(*ehdr) ||
136 	    ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
137 	    ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
138 	    ehdr->e_ident[EI_VERSION] != ELF_TARG_VER)
139 		return (ENOEXEC);
140 
141 	/* Now check the machine dependent header */
142 	if (ehdr->e_machine != ELF_TARG_MACH ||
143 	    ehdr->e_version != ELF_TARG_VER)
144 		return (ENOEXEC);
145 
146 	/* Don't allow an insane amount of sections. */
147 	if (ehdr->e_phnum > ELF_MAX_VALID_PHDR)
148 		return (ENOEXEC);
149 
150 	return (0);
151 }
152 
153 /*
154  * Load a psection at the appropriate address
155  */
156 void
157 elf_load_psection(struct exec_vmcmd_set *vcset, struct vnode *vp,
158     Elf_Phdr *ph, Elf_Addr *addr, Elf_Addr *size, int *prot, int flags)
159 {
160 	u_long msize, lsize, psize, rm, rf;
161 	long diff, offset, bdiff;
162 	Elf_Addr base;
163 
164 	/*
165 	 * If the user specified an address, then we load there.
166 	 */
167 	if (*addr != ELF_NO_ADDR) {
168 		if (ph->p_align > 1) {
169 			*addr = ELF_TRUNC(*addr, ph->p_align);
170 			diff = ph->p_vaddr - ELF_TRUNC(ph->p_vaddr, ph->p_align);
171 			/* page align vaddr */
172 			base = *addr + trunc_page(ph->p_vaddr)
173 			    - ELF_TRUNC(ph->p_vaddr, ph->p_align);
174 		} else {
175 			diff = 0;
176 			base = *addr + trunc_page(ph->p_vaddr) - ph->p_vaddr;
177 		}
178 	} else {
179 		*addr = ph->p_vaddr;
180 		if (ph->p_align > 1)
181 			*addr = ELF_TRUNC(*addr, ph->p_align);
182 		base = trunc_page(ph->p_vaddr);
183 		diff = ph->p_vaddr - *addr;
184 	}
185 	bdiff = ph->p_vaddr - trunc_page(ph->p_vaddr);
186 
187 	/*
188 	 * Enforce W^X and map W|X segments without X permission
189 	 * initially.  The dynamic linker will make these read-only
190 	 * and add back X permission after relocation processing.
191 	 * Static executables with W|X segments will probably crash.
192 	 */
193 	*prot |= (ph->p_flags & PF_R) ? PROT_READ : 0;
194 	*prot |= (ph->p_flags & PF_W) ? PROT_WRITE : 0;
195 	if ((ph->p_flags & PF_W) == 0)
196 		*prot |= (ph->p_flags & PF_X) ? PROT_EXEC : 0;
197 
198 	/*
199 	 * Apply immutability as much as possible, but not text/rodata
200 	 * segments of textrel binaries, or RELRO or PT_OPENBSD_MUTABLE
201 	 * sections, or LOADS marked PF_OPENBSD_MUTABLE, or LOADS which
202 	 * violate W^X.
203 	 * Userland (meaning crt0 or ld.so) will repair those regions.
204 	 */
205 	if ((ph->p_flags & (PF_X | PF_W)) != (PF_X | PF_W) &&
206 	    ((ph->p_flags & PF_OPENBSD_MUTABLE) == 0))
207 		flags |= VMCMD_IMMUTABLE;
208 	if ((flags & VMCMD_TEXTREL) && (ph->p_flags & PF_W) == 0)
209 		flags &= ~VMCMD_IMMUTABLE;
210 
211 	msize = ph->p_memsz + diff;
212 	offset = ph->p_offset - bdiff;
213 	lsize = ph->p_filesz + bdiff;
214 	psize = round_page(lsize);
215 
216 	/*
217 	 * Because the pagedvn pager can't handle zero fill of the last
218 	 * data page if it's not page aligned we map the last page readvn.
219 	 */
220 	if (ph->p_flags & PF_W) {
221 		psize = trunc_page(lsize);
222 		if (psize > 0)
223 			NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp,
224 			    offset, *prot, flags);
225 		if (psize != lsize) {
226 			NEW_VMCMD2(vcset, vmcmd_map_readvn, lsize - psize,
227 			    base + psize, vp, offset + psize, *prot, flags);
228 		}
229 	} else {
230 		NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp, offset,
231 		    *prot, flags);
232 	}
233 
234 	/*
235 	 * Check if we need to extend the size of the segment
236 	 */
237 	rm = round_page(*addr + ph->p_memsz + diff);
238 	rf = round_page(*addr + ph->p_filesz + diff);
239 
240 	if (rm != rf) {
241 		NEW_VMCMD2(vcset, vmcmd_map_zero, rm - rf, rf, NULLVP, 0,
242 		    *prot, flags);
243 	}
244 	*size = msize;
245 }
246 
247 /*
248  * Read from vnode into buffer at offset.
249  */
250 int
251 elf_read_from(struct proc *p, struct vnode *vp, u_long off, void *buf,
252     int size)
253 {
254 	int error;
255 	size_t resid;
256 
257 	if ((error = vn_rdwr(UIO_READ, vp, buf, size, off, UIO_SYSSPACE,
258 	    0, p->p_ucred, &resid, p)) != 0)
259 		return error;
260 	/*
261 	 * See if we got all of it
262 	 */
263 	if (resid != 0)
264 		return (ENOEXEC);
265 	return (0);
266 }
267 
268 /*
269  * Load a file (interpreter/library) pointed to by path [stolen from
270  * coff_load_shlib()]. Made slightly generic so it might be used externally.
271  */
272 int
273 elf_load_file(struct proc *p, char *path, struct exec_package *epp,
274     struct elf_args *ap)
275 {
276 	int error, i;
277 	struct nameidata nd;
278 	Elf_Ehdr eh;
279 	Elf_Phdr *ph = NULL;
280 	u_long phsize = 0;
281 	Elf_Addr addr;
282 	struct vnode *vp;
283 	Elf_Phdr *base_ph = NULL;
284 	struct interp_ld_sec {
285 		Elf_Addr vaddr;
286 		u_long memsz;
287 	} loadmap[ELF_MAX_VALID_PHDR];
288 	int nload, idx = 0;
289 	Elf_Addr pos;
290 	int file_align;
291 	int loop;
292 	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
293 
294 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p);
295 	nd.ni_pledge = PLEDGE_RPATH;
296 	nd.ni_unveil = UNVEIL_READ;
297 	if ((error = namei(&nd)) != 0) {
298 		return (error);
299 	}
300 	vp = nd.ni_vp;
301 	if (vp->v_type != VREG) {
302 		error = EACCES;
303 		goto bad;
304 	}
305 	if ((error = VOP_GETATTR(vp, epp->ep_vap, p->p_ucred, p)) != 0)
306 		goto bad;
307 	if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
308 		error = EACCES;
309 		goto bad;
310 	}
311 	if ((error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) != 0)
312 		goto bad1;
313 	if ((error = elf_read_from(p, nd.ni_vp, 0, &eh, sizeof(eh))) != 0)
314 		goto bad1;
315 
316 	if (elf_check_header(&eh) || eh.e_type != ET_DYN) {
317 		error = ENOEXEC;
318 		goto bad1;
319 	}
320 
321 	ph = mallocarray(eh.e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
322 	phsize = eh.e_phnum * sizeof(Elf_Phdr);
323 
324 	if ((error = elf_read_from(p, nd.ni_vp, eh.e_phoff, ph, phsize)) != 0)
325 		goto bad1;
326 
327 	for (i = 0; i < eh.e_phnum; i++) {
328 		if ((ph[i].p_align > 1) && !powerof2(ph[i].p_align)) {
329 			error = EINVAL;
330 			goto bad1;
331 		}
332 
333 		if (ph[i].p_type == PT_LOAD) {
334 			if (ph[i].p_filesz > ph[i].p_memsz ||
335 			    ph[i].p_memsz == 0) {
336 				error = EINVAL;
337 				goto bad1;
338 			}
339 			loadmap[idx].vaddr = trunc_page(ph[i].p_vaddr);
340 			loadmap[idx].memsz = round_page (ph[i].p_vaddr +
341 			    ph[i].p_memsz - loadmap[idx].vaddr);
342 			file_align = ph[i].p_align;
343 			idx++;
344 		}
345 	}
346 	nload = idx;
347 
348 	/*
349 	 * Load the interpreter where a non-fixed mmap(NULL, ...)
350 	 * would (i.e. something safely out of the way).
351 	 */
352 	pos = uvm_map_hint(p->p_vmspace, PROT_EXEC, VM_MIN_ADDRESS,
353 	    VM_MAXUSER_ADDRESS);
354 	pos = ELF_ROUND(pos, file_align);
355 
356 	loop = 0;
357 	for (i = 0; i < nload;/**/) {
358 		vaddr_t	addr;
359 		struct	uvm_object *uobj;
360 		off_t	uoff;
361 		size_t	size;
362 
363 #ifdef this_needs_fixing
364 		if (i == 0) {
365 			uobj = &vp->v_uvm.u_obj;
366 			/* need to fix uoff */
367 		} else {
368 #endif
369 			uobj = NULL;
370 			uoff = 0;
371 #ifdef this_needs_fixing
372 		}
373 #endif
374 
375 		addr = trunc_page(pos + loadmap[i].vaddr);
376 		size =  round_page(addr + loadmap[i].memsz) - addr;
377 
378 		/* CRAP - map_findspace does not avoid daddr+BRKSIZ */
379 		if ((addr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
380 		    (addr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ))
381 			addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
382 			    BRKSIZ);
383 
384 		if (uvm_map_mquery(&p->p_vmspace->vm_map, &addr, size,
385 		    (i == 0 ? uoff : UVM_UNKNOWN_OFFSET), 0) != 0) {
386 			if (loop == 0) {
387 				loop = 1;
388 				i = 0;
389 				pos = 0;
390 				continue;
391 			}
392 			error = ENOMEM;
393 			goto bad1;
394 		}
395 		if (addr != pos + loadmap[i].vaddr) {
396 			/* base changed. */
397 			pos = addr - trunc_page(loadmap[i].vaddr);
398 			pos = ELF_ROUND(pos,file_align);
399 			i = 0;
400 			continue;
401 		}
402 
403 		i++;
404 	}
405 
406 	/*
407 	 * Load all the necessary sections
408 	 */
409 	for (i = 0; i < eh.e_phnum; i++) {
410 		Elf_Addr size = 0;
411 		int prot = 0;
412 		int flags;
413 
414 		switch (ph[i].p_type) {
415 		case PT_LOAD:
416 			if (base_ph == NULL) {
417 				flags = VMCMD_BASE;
418 				addr = pos;
419 				base_ph = &ph[i];
420 			} else {
421 				flags = VMCMD_RELATIVE;
422 				addr = ph[i].p_vaddr - base_ph->p_vaddr;
423 			}
424 			elf_load_psection(&epp->ep_vmcmds, nd.ni_vp,
425 			    &ph[i], &addr, &size, &prot, flags | VMCMD_SYSCALL);
426 			/* If entry is within this section it must be text */
427 			if (eh.e_entry >= ph[i].p_vaddr &&
428 			    eh.e_entry < (ph[i].p_vaddr + size)) {
429  				epp->ep_entry = addr + eh.e_entry -
430 				    ELF_TRUNC(ph[i].p_vaddr,ph[i].p_align);
431 				if (flags == VMCMD_RELATIVE)
432 					epp->ep_entry += pos;
433 				ap->arg_interp = pos;
434 			}
435 			addr += size;
436 			break;
437 
438 		case PT_PHDR:
439 		case PT_NOTE:
440 			break;
441 
442 		case PT_OPENBSD_RANDOMIZE:
443 			if (ph[i].p_memsz > randomizequota) {
444 				error = ENOMEM;
445 				goto bad1;
446 			}
447 			randomizequota -= ph[i].p_memsz;
448 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
449 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
450 			break;
451 
452 		case PT_DYNAMIC:
453 #if defined (__mips__)
454 			/* DT_DEBUG is not ready on mips */
455 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
456 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
457 #endif
458 			break;
459 		case PT_GNU_RELRO:
460 		case PT_OPENBSD_MUTABLE:
461 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
462 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
463 			break;
464 
465 		default:
466 			break;
467 		}
468 	}
469 
470 	vn_marktext(nd.ni_vp);
471 
472 bad1:
473 	VOP_CLOSE(nd.ni_vp, FREAD, p->p_ucred, p);
474 bad:
475 	free(ph, M_TEMP, phsize);
476 
477 	vput(nd.ni_vp);
478 	return (error);
479 }
480 
481 /*
482  * Prepare an Elf binary's exec package
483  *
484  * First, set of the various offsets/lengths in the exec package.
485  *
486  * Then, mark the text image busy (so it can be demand paged) or error out if
487  * this is not possible.  Finally, set up vmcmds for the text, data, bss, and
488  * stack segments.
489  */
490 int
491 exec_elf_makecmds(struct proc *p, struct exec_package *epp)
492 {
493 	Elf_Ehdr *eh = epp->ep_hdr;
494 	Elf_Phdr *ph, *pp, *base_ph = NULL;
495 	Elf_Addr phdr = 0, exe_base = 0;
496 	int error, i, has_phdr = 0, names = 0, textrel = 0;
497 	char *interp = NULL;
498 	u_long phsize;
499 	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
500 
501 	if (epp->ep_hdrvalid < sizeof(Elf_Ehdr))
502 		return (ENOEXEC);
503 
504 	if (elf_check_header(eh) ||
505 	   (eh->e_type != ET_EXEC && eh->e_type != ET_DYN))
506 		return (ENOEXEC);
507 
508 	/*
509 	 * check if vnode is in open for writing, because we want to demand-
510 	 * page out of it.  if it is, don't do it, for various reasons.
511 	 */
512 	if (epp->ep_vp->v_writecount != 0) {
513 #ifdef DIAGNOSTIC
514 		if (epp->ep_vp->v_flag & VTEXT)
515 			panic("exec: a VTEXT vnode has writecount != 0");
516 #endif
517 		return (ETXTBSY);
518 	}
519 	/*
520 	 * Allocate space to hold all the program headers, and read them
521 	 * from the file
522 	 */
523 	ph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
524 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
525 
526 	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff, ph,
527 	    phsize)) != 0)
528 		goto bad;
529 
530 	epp->ep_tsize = ELF_NO_ADDR;
531 	epp->ep_dsize = ELF_NO_ADDR;
532 
533 	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
534 		if ((pp->p_align > 1) && !powerof2(pp->p_align)) {
535 			error = EINVAL;
536 			goto bad;
537 		}
538 
539 		if (pp->p_type == PT_INTERP && !interp) {
540 			if (pp->p_filesz < 2 || pp->p_filesz > MAXPATHLEN)
541 				goto bad;
542 			interp = pool_get(&namei_pool, PR_WAITOK);
543 			if ((error = elf_read_from(p, epp->ep_vp,
544 			    pp->p_offset, interp, pp->p_filesz)) != 0) {
545 				goto bad;
546 			}
547 			if (interp[pp->p_filesz - 1] != '\0')
548 				goto bad;
549 		} else if (pp->p_type == PT_LOAD) {
550 			if (pp->p_filesz > pp->p_memsz ||
551 			    pp->p_memsz == 0) {
552 				error = EINVAL;
553 				goto bad;
554 			}
555 			if (base_ph == NULL)
556 				base_ph = pp;
557 		} else if (pp->p_type == PT_PHDR) {
558 			has_phdr = 1;
559 		}
560 	}
561 
562 	/*
563 	 * Verify this is an OpenBSD executable.  If it's marked that way
564 	 * via a PT_NOTE then also check for a PT_OPENBSD_WXNEEDED segment.
565 	 */
566 	if ((error = elf_os_pt_note(p, epp, epp->ep_hdr, &names)) != 0)
567 		goto bad;
568 	if (eh->e_ident[EI_OSABI] == ELFOSABI_OPENBSD)
569 		names |= ELF_NOTE_NAME_OPENBSD;
570 
571 	if (eh->e_type == ET_DYN) {
572 		/* need phdr and load sections for PIE */
573 		if (!has_phdr || base_ph == NULL || base_ph->p_vaddr != 0) {
574 			error = EINVAL;
575 			goto bad;
576 		}
577 		/* randomize exe_base for PIE */
578 		exe_base = uvm_map_pie(base_ph->p_align);
579 
580 		/*
581 		 * Check if DYNAMIC contains DT_TEXTREL
582 		 */
583 		for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
584 			Elf_Dyn *dt;
585 			int j;
586 
587 			switch (pp->p_type) {
588 			case PT_DYNAMIC:
589 				if (pp->p_filesz > 64*1024)
590 					break;
591 				dt = malloc(pp->p_filesz, M_TEMP, M_WAITOK);
592 				error = vn_rdwr(UIO_READ, epp->ep_vp,
593 				    (caddr_t)dt, pp->p_filesz, pp->p_offset,
594 				    UIO_SYSSPACE, IO_UNIT, p->p_ucred, NULL, p);
595 				if (error) {
596 					free(dt, M_TEMP, pp->p_filesz);
597 					break;
598 				}
599 				for (j = 0; j < pp->p_filesz / sizeof(*dt); j++) {
600 					if (dt[j].d_tag == DT_TEXTREL) {
601 						textrel = VMCMD_TEXTREL;
602 						break;
603 					}
604 				}
605 				free(dt, M_TEMP, pp->p_filesz);
606 				break;
607 			default:
608 				break;
609 			}
610 		}
611 	}
612 
613 	/*
614 	 * Load all the necessary sections
615 	 */
616 	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
617 		Elf_Addr addr, size = 0;
618 		int prot = 0, syscall = 0;
619 		int flags = 0;
620 
621 		switch (pp->p_type) {
622 		case PT_LOAD:
623 			if (exe_base != 0) {
624 				if (pp == base_ph) {
625 					flags = VMCMD_BASE;
626 					addr = exe_base;
627 				} else {
628 					flags = VMCMD_RELATIVE;
629 					addr = pp->p_vaddr - base_ph->p_vaddr;
630 				}
631 			} else
632 				addr = ELF_NO_ADDR;
633 
634 			/*
635 			 * Permit system calls in main-text static binaries.
636 			 * Also block the ld.so syscall-grant
637 			 */
638 			if (interp == NULL) {
639 				syscall = VMCMD_SYSCALL;
640 				p->p_vmspace->vm_map.flags |= VM_MAP_SYSCALL_ONCE;
641 			}
642 
643 			/*
644 			 * Calculates size of text and data segments
645 			 * by starting at first and going to end of last.
646 			 * 'rwx' sections are treated as data.
647 			 * this is correct for BSS_PLT, but may not be
648 			 * for DATA_PLT, is fine for TEXT_PLT.
649 			 */
650 			elf_load_psection(&epp->ep_vmcmds, epp->ep_vp,
651 			    pp, &addr, &size, &prot, flags | textrel | syscall);
652 
653 			/*
654 			 * Update exe_base in case alignment was off.
655 			 * For PIE, addr is relative to exe_base so
656 			 * adjust it (non PIE exe_base is 0 so no change).
657 			 */
658 			if (flags == VMCMD_BASE)
659 				exe_base = addr;
660 			else
661 				addr += exe_base;
662 
663 			/*
664 			 * Decide whether it's text or data by looking
665 			 * at the protection of the section
666 			 */
667 			if (prot & PROT_WRITE) {
668 				/* data section */
669 				if (epp->ep_dsize == ELF_NO_ADDR) {
670 					epp->ep_daddr = addr;
671 					epp->ep_dsize = size;
672 				} else {
673 					if (addr < epp->ep_daddr) {
674 						epp->ep_dsize =
675 						    epp->ep_dsize +
676 						    epp->ep_daddr -
677 						    addr;
678 						epp->ep_daddr = addr;
679 					} else
680 						epp->ep_dsize = addr+size -
681 						    epp->ep_daddr;
682 				}
683 			} else if (prot & PROT_EXEC) {
684 				/* text section */
685 				if (epp->ep_tsize == ELF_NO_ADDR) {
686 					epp->ep_taddr = addr;
687 					epp->ep_tsize = size;
688 				} else {
689 					if (addr < epp->ep_taddr) {
690 						epp->ep_tsize =
691 						    epp->ep_tsize +
692 						    epp->ep_taddr -
693 						    addr;
694 						epp->ep_taddr = addr;
695 					} else
696 						epp->ep_tsize = addr+size -
697 						    epp->ep_taddr;
698 				}
699 			}
700 			break;
701 
702 		case PT_SHLIB:
703 			error = ENOEXEC;
704 			goto bad;
705 
706 		case PT_INTERP:
707 			/* Already did this one */
708 		case PT_NOTE:
709 			break;
710 
711 		case PT_PHDR:
712 			/* Note address of program headers (in text segment) */
713 			phdr = pp->p_vaddr;
714 			break;
715 
716 		case PT_OPENBSD_RANDOMIZE:
717 			if (ph[i].p_memsz > randomizequota) {
718 				error = ENOMEM;
719 				goto bad;
720 			}
721 			randomizequota -= ph[i].p_memsz;
722 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
723 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
724 			break;
725 
726 		case PT_DYNAMIC:
727 #if defined (__mips__)
728 			/* DT_DEBUG is not ready on mips */
729 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
730 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
731 #endif
732 			break;
733 		case PT_GNU_RELRO:
734 		case PT_OPENBSD_MUTABLE:
735 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
736 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
737 			break;
738 
739 		default:
740 			/*
741 			 * Not fatal, we don't need to understand everything
742 			 * :-)
743 			 */
744 			break;
745 		}
746 	}
747 
748 	phdr += exe_base;
749 
750 	/*
751 	 * Strangely some linux programs may have all load sections marked
752 	 * writeable, in this case, textsize is not -1, but rather 0;
753 	 */
754 	if (epp->ep_tsize == ELF_NO_ADDR)
755 		epp->ep_tsize = 0;
756 	/*
757 	 * Another possibility is that it has all load sections marked
758 	 * read-only.  Fake a zero-sized data segment right after the
759 	 * text segment.
760 	 */
761 	if (epp->ep_dsize == ELF_NO_ADDR) {
762 		epp->ep_daddr = round_page(epp->ep_taddr + epp->ep_tsize);
763 		epp->ep_dsize = 0;
764 	}
765 
766 	epp->ep_interp = interp;
767 	epp->ep_entry = eh->e_entry + exe_base;
768 
769 	/*
770 	 * Check if we found a dynamically linked binary and arrange to load
771 	 * its interpreter when the exec file is released.
772 	 */
773 	if (interp || eh->e_type == ET_DYN) {
774 		struct elf_args *ap;
775 
776 		ap = malloc(sizeof(*ap), M_TEMP, M_WAITOK);
777 
778 		ap->arg_phaddr = phdr;
779 		ap->arg_phentsize = eh->e_phentsize;
780 		ap->arg_phnum = eh->e_phnum;
781 		ap->arg_entry = eh->e_entry + exe_base;
782 		ap->arg_interp = exe_base;
783 
784 		epp->ep_args = ap;
785 	}
786 
787 	free(ph, M_TEMP, phsize);
788 	vn_marktext(epp->ep_vp);
789 	return (exec_setup_stack(p, epp));
790 
791 bad:
792 	if (interp)
793 		pool_put(&namei_pool, interp);
794 	free(ph, M_TEMP, phsize);
795 	kill_vmcmds(&epp->ep_vmcmds);
796 	if (error == 0)
797 		return (ENOEXEC);
798 	return (error);
799 }
800 
801 /*
802  * Phase II of load. It is now safe to load the interpreter. Info collected
803  * when loading the program is available for setup of the interpreter.
804  */
805 int
806 exec_elf_fixup(struct proc *p, struct exec_package *epp)
807 {
808 	char	*interp;
809 	int	error = 0;
810 	struct	elf_args *ap;
811 	AuxInfo ai[ELF_AUX_ENTRIES], *a;
812 
813 	ap = epp->ep_args;
814 	if (ap == NULL) {
815 		return (0);
816 	}
817 
818 	interp = epp->ep_interp;
819 
820 	/* disable kbind in programs that don't use ld.so */
821 	if (interp == NULL)
822 		p->p_p->ps_kbind_addr = BOGO_PC;
823 
824 	if (interp &&
825 	    (error = elf_load_file(p, interp, epp, ap)) != 0) {
826 		uprintf("execve: cannot load %s\n", interp);
827 		free(ap, M_TEMP, sizeof *ap);
828 		pool_put(&namei_pool, interp);
829 		kill_vmcmds(&epp->ep_vmcmds);
830 		return (error);
831 	}
832 	/*
833 	 * We have to do this ourselves...
834 	 */
835 	error = exec_process_vmcmds(p, epp);
836 
837 	/*
838 	 * Push extra arguments on the stack needed by dynamically
839 	 * linked binaries
840 	 */
841 	if (error == 0) {
842 		memset(&ai, 0, sizeof ai);
843 		a = ai;
844 
845 		a->au_id = AUX_phdr;
846 		a->au_v = ap->arg_phaddr;
847 		a++;
848 
849 		a->au_id = AUX_phent;
850 		a->au_v = ap->arg_phentsize;
851 		a++;
852 
853 		a->au_id = AUX_phnum;
854 		a->au_v = ap->arg_phnum;
855 		a++;
856 
857 		a->au_id = AUX_pagesz;
858 		a->au_v = PAGE_SIZE;
859 		a++;
860 
861 		a->au_id = AUX_base;
862 		a->au_v = ap->arg_interp;
863 		a++;
864 
865 		a->au_id = AUX_flags;
866 		a->au_v = 0;
867 		a++;
868 
869 		a->au_id = AUX_entry;
870 		a->au_v = ap->arg_entry;
871 		a++;
872 
873 		a->au_id = AUX_openbsd_timekeep;
874 		a->au_v = p->p_p->ps_timekeep;
875 		a++;
876 
877 		a->au_id = AUX_null;
878 		a->au_v = 0;
879 		a++;
880 
881 		error = copyout(ai, epp->ep_auxinfo, sizeof ai);
882 	}
883 	free(ap, M_TEMP, sizeof *ap);
884 	if (interp)
885 		pool_put(&namei_pool, interp);
886 	return (error);
887 }
888 
889 int
890 elf_os_pt_note_name(Elf_Note *np)
891 {
892 	int i, j;
893 
894 	for (i = 0; i < nitems(elf_note_names); i++) {
895 		size_t namlen = strlen(elf_note_names[i].name);
896 		if (np->namesz < namlen)
897 			continue;
898 		/* verify name padding (after the NUL) is NUL */
899 		for (j = namlen + 1; j < elfround(np->namesz); j++)
900 			if (((char *)(np + 1))[j] != '\0')
901 				continue;
902 		/* verify desc padding is NUL */
903 		for (j = np->descsz; j < elfround(np->descsz); j++)
904 			if (((char *)(np + 1))[j] != '\0')
905 				continue;
906 		if (strcmp((char *)(np + 1), elf_note_names[i].name) == 0)
907 			return elf_note_names[i].id;
908 	}
909 	return (0);
910 }
911 
912 int
913 elf_os_pt_note(struct proc *p, struct exec_package *epp, Elf_Ehdr *eh, int *namesp)
914 {
915 	Elf_Phdr *hph, *ph;
916 	Elf_Note *np = NULL;
917 	size_t phsize, offset, pfilesz = 0, total;
918 	int error, names = 0;
919 
920 	hph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
921 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
922 	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff,
923 	    hph, phsize)) != 0)
924 		goto out1;
925 
926 	for (ph = hph;  ph < &hph[eh->e_phnum]; ph++) {
927 		if (ph->p_type == PT_OPENBSD_WXNEEDED) {
928 			epp->ep_flags |= EXEC_WXNEEDED;
929 			continue;
930 		}
931 		if (ph->p_type == PT_OPENBSD_NOBTCFI) {
932 			epp->ep_flags |= EXEC_NOBTCFI;
933 			continue;
934 		}
935 
936 		if (ph->p_type != PT_NOTE || ph->p_filesz > 1024)
937 			continue;
938 
939 		if (np && ph->p_filesz != pfilesz) {
940 			free(np, M_TEMP, pfilesz);
941 			np = NULL;
942 		}
943 		if (!np)
944 			np = malloc(ph->p_filesz, M_TEMP, M_WAITOK);
945 		pfilesz = ph->p_filesz;
946 		if ((error = elf_read_from(p, epp->ep_vp, ph->p_offset,
947 		    np, ph->p_filesz)) != 0)
948 			goto out2;
949 
950 		for (offset = 0; offset < ph->p_filesz; offset += total) {
951 			Elf_Note *np2 = (Elf_Note *)((char *)np + offset);
952 
953 			if (offset + sizeof(Elf_Note) > ph->p_filesz)
954 				break;
955 			total = sizeof(Elf_Note) + elfround(np2->namesz) +
956 			    elfround(np2->descsz);
957 			if (offset + total > ph->p_filesz)
958 				break;
959 			names |= elf_os_pt_note_name(np2);
960 		}
961 	}
962 
963 out2:
964 	free(np, M_TEMP, pfilesz);
965 out1:
966 	free(hph, M_TEMP, phsize);
967 	*namesp = names;
968 	return ((names & ELF_NOTE_NAME_OPENBSD) ? 0 : ENOEXEC);
969 }
970 
971 /*
972  * Start of routines related to dumping core
973  */
974 
975 #ifdef SMALL_KERNEL
976 int
977 coredump_elf(struct proc *p, void *cookie)
978 {
979 	return EPERM;
980 }
981 #else /* !SMALL_KERNEL */
982 
983 struct writesegs_state {
984 	off_t	notestart;
985 	off_t	secstart;
986 	off_t	secoff;
987 	struct	proc *p;
988 	void	*iocookie;
989 	Elf_Phdr *psections;
990 	size_t	psectionslen;
991 	size_t	notesize;
992 	int	npsections;
993 };
994 
995 uvm_coredump_setup_cb	coredump_setup_elf;
996 uvm_coredump_walk_cb	coredump_walk_elf;
997 
998 int	coredump_notes_elf(struct proc *, void *, size_t *);
999 int	coredump_note_elf(struct proc *, void *, size_t *);
1000 int	coredump_writenote_elf(struct proc *, void *, Elf_Note *,
1001 	    const char *, void *);
1002 
1003 extern vaddr_t sigcode_va;
1004 extern vsize_t sigcode_sz;
1005 
1006 int
1007 coredump_elf(struct proc *p, void *cookie)
1008 {
1009 #ifdef DIAGNOSTIC
1010 	off_t offset;
1011 #endif
1012 	struct writesegs_state ws;
1013 	size_t notesize;
1014 	int error, i;
1015 
1016 	ws.p = p;
1017 	ws.iocookie = cookie;
1018 	ws.psections = NULL;
1019 
1020 	/*
1021 	 * Walk the map to get all the segment offsets and lengths,
1022 	 * write out the ELF header.
1023 	 */
1024 	error = uvm_coredump_walkmap(p, coredump_setup_elf,
1025 	    coredump_walk_elf, &ws);
1026 	if (error)
1027 		goto out;
1028 
1029 	error = coredump_write(cookie, UIO_SYSSPACE, ws.psections,
1030 	    ws.psectionslen);
1031 	if (error)
1032 		goto out;
1033 
1034 	/* Write out the notes. */
1035 	error = coredump_notes_elf(p, cookie, &notesize);
1036 	if (error)
1037 		goto out;
1038 
1039 #ifdef DIAGNOSTIC
1040 	if (notesize != ws.notesize)
1041 		panic("coredump: notesize changed: %zu != %zu",
1042 		    ws.notesize, notesize);
1043 	offset = ws.notestart + notesize;
1044 	if (offset != ws.secstart)
1045 		panic("coredump: offset %lld != secstart %lld",
1046 		    (long long) offset, (long long) ws.secstart);
1047 #endif
1048 
1049 	/* Pass 3: finally, write the sections themselves. */
1050 	for (i = 0; i < ws.npsections - 1; i++) {
1051 		Elf_Phdr *pent = &ws.psections[i];
1052 		if (pent->p_filesz == 0)
1053 			continue;
1054 
1055 #ifdef DIAGNOSTIC
1056 		if (offset != pent->p_offset)
1057 			panic("coredump: offset %lld != p_offset[%d] %lld",
1058 			    (long long) offset, i,
1059 			    (long long) pent->p_filesz);
1060 #endif
1061 
1062 		/*
1063 		 * Since the sigcode is mapped execute-only, we can't
1064 		 * read it.  So use the kernel mapping for it instead.
1065 		 */
1066 		if (pent->p_vaddr == p->p_p->ps_sigcode &&
1067 		    pent->p_filesz == sigcode_sz) {
1068 			error = coredump_write(cookie, UIO_SYSSPACE,
1069 			    (void *)sigcode_va, sigcode_sz);
1070 		} else {
1071 			error = coredump_write(cookie, UIO_USERSPACE,
1072 			    (void *)(vaddr_t)pent->p_vaddr, pent->p_filesz);
1073 		}
1074 		if (error)
1075 			goto out;
1076 
1077 		coredump_unmap(cookie, (vaddr_t)pent->p_vaddr,
1078 		    (vaddr_t)pent->p_vaddr + pent->p_filesz);
1079 
1080 #ifdef DIAGNOSTIC
1081 		offset += ws.psections[i].p_filesz;
1082 #endif
1083 	}
1084 
1085 out:
1086 	free(ws.psections, M_TEMP, ws.psectionslen);
1087 	return (error);
1088 }
1089 
1090 
1091 /*
1092  * Normally we lay out core files like this:
1093  *	[ELF Header] [Program headers] [Notes] [data for PT_LOAD segments]
1094  *
1095  * However, if there's >= 65535 segments then it overflows the field
1096  * in the ELF header, so the standard specifies putting a magic
1097  * number there and saving the real count in the .sh_info field of
1098  * the first *section* header...which requires generating a section
1099  * header.  To avoid confusing tools, we include an .shstrtab section
1100  * as well so all the indexes look valid.  So in this case we lay
1101  * out the core file like this:
1102  *	[ELF Header] [Section Headers] [.shstrtab] [Program headers] \
1103  *	[Notes] [data for PT_LOAD segments]
1104  *
1105  * The 'shstrtab' structure below is data for the second of the two
1106  * section headers, plus the .shstrtab itself, in one const buffer.
1107  */
1108 static const struct {
1109     Elf_Shdr	shdr;
1110     char	shstrtab[sizeof(ELF_SHSTRTAB) + 1];
1111 } shstrtab = {
1112     .shdr = {
1113 	.sh_name = 1,			/* offset in .shstrtab below */
1114 	.sh_type = SHT_STRTAB,
1115 	.sh_offset = sizeof(Elf_Ehdr) + 2*sizeof(Elf_Shdr),
1116 	.sh_size = sizeof(ELF_SHSTRTAB) + 1,
1117 	.sh_addralign = 1,
1118     },
1119     .shstrtab = "\0" ELF_SHSTRTAB,
1120 };
1121 
1122 int
1123 coredump_setup_elf(int segment_count, void *cookie)
1124 {
1125 	Elf_Ehdr ehdr;
1126 	struct writesegs_state *ws = cookie;
1127 	Elf_Phdr *note;
1128 	int error;
1129 
1130 	/* Get the count of segments, plus one for the PT_NOTE */
1131 	ws->npsections = segment_count + 1;
1132 
1133 	/* Get the size of the notes. */
1134 	error = coredump_notes_elf(ws->p, NULL, &ws->notesize);
1135 	if (error)
1136 		return error;
1137 
1138 	/* Setup the ELF header */
1139 	memset(&ehdr, 0, sizeof(ehdr));
1140 	memcpy(ehdr.e_ident, ELFMAG, SELFMAG);
1141 	ehdr.e_ident[EI_CLASS] = ELF_TARG_CLASS;
1142 	ehdr.e_ident[EI_DATA] = ELF_TARG_DATA;
1143 	ehdr.e_ident[EI_VERSION] = EV_CURRENT;
1144 	/* XXX Should be the OSABI/ABI version of the executable. */
1145 	ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV;
1146 	ehdr.e_ident[EI_ABIVERSION] = 0;
1147 	ehdr.e_type = ET_CORE;
1148 	/* XXX This should be the e_machine of the executable. */
1149 	ehdr.e_machine = ELF_TARG_MACH;
1150 	ehdr.e_version = EV_CURRENT;
1151 	ehdr.e_entry = 0;
1152 	ehdr.e_flags = 0;
1153 	ehdr.e_ehsize = sizeof(ehdr);
1154 	ehdr.e_phentsize = sizeof(Elf_Phdr);
1155 
1156 	if (ws->npsections < PN_XNUM) {
1157 		ehdr.e_phoff = sizeof(ehdr);
1158 		ehdr.e_shoff = 0;
1159 		ehdr.e_phnum = ws->npsections;
1160 		ehdr.e_shentsize = 0;
1161 		ehdr.e_shnum = 0;
1162 		ehdr.e_shstrndx = 0;
1163 	} else {
1164 		/* too many segments, use extension setup */
1165 		ehdr.e_shoff = sizeof(ehdr);
1166 		ehdr.e_phnum = PN_XNUM;
1167 		ehdr.e_shentsize = sizeof(Elf_Shdr);
1168 		ehdr.e_shnum = 2;
1169 		ehdr.e_shstrndx = 1;
1170 		ehdr.e_phoff = shstrtab.shdr.sh_offset + shstrtab.shdr.sh_size;
1171 	}
1172 
1173 	/* Write out the ELF header. */
1174 	error = coredump_write(ws->iocookie, UIO_SYSSPACE, &ehdr, sizeof(ehdr));
1175 	if (error)
1176 		return error;
1177 
1178 	/*
1179 	 * If an section header is needed to store extension info, write
1180 	 * it out after the ELF header and before the program header.
1181 	 */
1182 	if (ehdr.e_shnum != 0) {
1183 		Elf_Shdr shdr = { .sh_info = ws->npsections };
1184 		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shdr,
1185 		    sizeof shdr);
1186 		if (error)
1187 			return error;
1188 		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shstrtab,
1189 		    sizeof(shstrtab.shdr) + sizeof(shstrtab.shstrtab));
1190 		if (error)
1191 			return error;
1192 	}
1193 
1194 	/*
1195 	 * Allocate the segment header array and setup to collect
1196 	 * the section sizes and offsets
1197 	 */
1198 	ws->psections = mallocarray(ws->npsections, sizeof(Elf_Phdr),
1199 	    M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO);
1200 	if (ws->psections == NULL)
1201 		return ENOMEM;
1202 	ws->psectionslen = ws->npsections * sizeof(Elf_Phdr);
1203 
1204 	ws->notestart = ehdr.e_phoff + ws->psectionslen;
1205 	ws->secstart = ws->notestart + ws->notesize;
1206 	ws->secoff = ws->secstart;
1207 
1208 	/* Fill in the PT_NOTE segment header in the last slot */
1209 	note = &ws->psections[ws->npsections - 1];
1210 	note->p_type = PT_NOTE;
1211 	note->p_offset = ws->notestart;
1212 	note->p_vaddr = 0;
1213 	note->p_paddr = 0;
1214 	note->p_filesz = ws->notesize;
1215 	note->p_memsz = 0;
1216 	note->p_flags = PF_R;
1217 	note->p_align = ELFROUNDSIZE;
1218 
1219 	return (0);
1220 }
1221 
1222 int
1223 coredump_walk_elf(vaddr_t start, vaddr_t realend, vaddr_t end, vm_prot_t prot,
1224     int nsegment, void *cookie)
1225 {
1226 	struct writesegs_state *ws = cookie;
1227 	Elf_Phdr phdr;
1228 	vsize_t size, realsize;
1229 
1230 	size = end - start;
1231 	realsize = realend - start;
1232 
1233 	phdr.p_type = PT_LOAD;
1234 	phdr.p_offset = ws->secoff;
1235 	phdr.p_vaddr = start;
1236 	phdr.p_paddr = 0;
1237 	phdr.p_filesz = realsize;
1238 	phdr.p_memsz = size;
1239 	phdr.p_flags = 0;
1240 	if (prot & PROT_READ)
1241 		phdr.p_flags |= PF_R;
1242 	if (prot & PROT_WRITE)
1243 		phdr.p_flags |= PF_W;
1244 	if (prot & PROT_EXEC)
1245 		phdr.p_flags |= PF_X;
1246 	phdr.p_align = PAGE_SIZE;
1247 
1248 	ws->secoff += phdr.p_filesz;
1249 	ws->psections[nsegment] = phdr;
1250 
1251 	return (0);
1252 }
1253 
1254 int
1255 coredump_notes_elf(struct proc *p, void *iocookie, size_t *sizep)
1256 {
1257 	struct elfcore_procinfo cpi;
1258 	Elf_Note nhdr;
1259 	struct process *pr = p->p_p;
1260 	struct proc *q;
1261 	size_t size, notesize;
1262 	int error;
1263 
1264 	KASSERT(!P_HASSIBLING(p) || pr->ps_single != NULL);
1265 	size = 0;
1266 
1267 	/* First, write an elfcore_procinfo. */
1268 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1269 	    elfround(sizeof(cpi));
1270 	if (iocookie) {
1271 		memset(&cpi, 0, sizeof(cpi));
1272 
1273 		cpi.cpi_version = ELFCORE_PROCINFO_VERSION;
1274 		cpi.cpi_cpisize = sizeof(cpi);
1275 		cpi.cpi_signo = p->p_sisig;
1276 		cpi.cpi_sigcode = p->p_sicode;
1277 
1278 		cpi.cpi_sigpend = p->p_siglist | pr->ps_siglist;
1279 		cpi.cpi_sigmask = p->p_sigmask;
1280 		cpi.cpi_sigignore = pr->ps_sigacts->ps_sigignore;
1281 		cpi.cpi_sigcatch = pr->ps_sigacts->ps_sigcatch;
1282 
1283 		cpi.cpi_pid = pr->ps_pid;
1284 		cpi.cpi_ppid = pr->ps_ppid;
1285 		cpi.cpi_pgrp = pr->ps_pgid;
1286 		if (pr->ps_session->s_leader)
1287 			cpi.cpi_sid = pr->ps_session->s_leader->ps_pid;
1288 		else
1289 			cpi.cpi_sid = 0;
1290 
1291 		cpi.cpi_ruid = p->p_ucred->cr_ruid;
1292 		cpi.cpi_euid = p->p_ucred->cr_uid;
1293 		cpi.cpi_svuid = p->p_ucred->cr_svuid;
1294 
1295 		cpi.cpi_rgid = p->p_ucred->cr_rgid;
1296 		cpi.cpi_egid = p->p_ucred->cr_gid;
1297 		cpi.cpi_svgid = p->p_ucred->cr_svgid;
1298 
1299 		(void)strlcpy(cpi.cpi_name, pr->ps_comm, sizeof(cpi.cpi_name));
1300 
1301 		nhdr.namesz = sizeof("OpenBSD");
1302 		nhdr.descsz = sizeof(cpi);
1303 		nhdr.type = NT_OPENBSD_PROCINFO;
1304 
1305 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1306 		    "OpenBSD", &cpi);
1307 		if (error)
1308 			return (error);
1309 	}
1310 	size += notesize;
1311 
1312 	/* Second, write an NT_OPENBSD_AUXV note. */
1313 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1314 	    elfround(ELF_AUX_WORDS * sizeof(char *));
1315 	if (iocookie && pr->ps_auxinfo) {
1316 
1317 		nhdr.namesz = sizeof("OpenBSD");
1318 		nhdr.descsz = ELF_AUX_WORDS * sizeof(char *);
1319 		nhdr.type = NT_OPENBSD_AUXV;
1320 
1321 		error = coredump_write(iocookie, UIO_SYSSPACE,
1322 		    &nhdr, sizeof(nhdr));
1323 		if (error)
1324 			return (error);
1325 
1326 		error = coredump_write(iocookie, UIO_SYSSPACE,
1327 		    "OpenBSD", elfround(nhdr.namesz));
1328 		if (error)
1329 			return (error);
1330 
1331 		error = coredump_write(iocookie, UIO_USERSPACE,
1332 		    (caddr_t)pr->ps_auxinfo, nhdr.descsz);
1333 		if (error)
1334 			return (error);
1335 	}
1336 	size += notesize;
1337 
1338 #ifdef PT_WCOOKIE
1339 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1340 	    elfround(sizeof(register_t));
1341 	if (iocookie) {
1342 		register_t wcookie;
1343 
1344 		nhdr.namesz = sizeof("OpenBSD");
1345 		nhdr.descsz = sizeof(register_t);
1346 		nhdr.type = NT_OPENBSD_WCOOKIE;
1347 
1348 		wcookie = process_get_wcookie(p);
1349 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1350 		    "OpenBSD", &wcookie);
1351 		if (error)
1352 			return (error);
1353 	}
1354 	size += notesize;
1355 #endif
1356 
1357 	/*
1358 	 * Now write the register info for the thread that caused the
1359 	 * coredump.
1360 	 */
1361 	error = coredump_note_elf(p, iocookie, &notesize);
1362 	if (error)
1363 		return (error);
1364 	size += notesize;
1365 
1366 	/*
1367 	 * Now, for each thread, write the register info and any other
1368 	 * per-thread notes.  Since we're dumping core, all the other
1369 	 * threads in the process have been stopped and the list can't
1370 	 * change.
1371 	 */
1372 	TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
1373 		if (q == p)		/* we've taken care of this thread */
1374 			continue;
1375 		error = coredump_note_elf(q, iocookie, &notesize);
1376 		if (error)
1377 			return (error);
1378 		size += notesize;
1379 	}
1380 
1381 	*sizep = size;
1382 	return (0);
1383 }
1384 
1385 int
1386 coredump_note_elf(struct proc *p, void *iocookie, size_t *sizep)
1387 {
1388 	Elf_Note nhdr;
1389 	int size, notesize, error;
1390 	int namesize;
1391 	char name[64+ELFROUNDSIZE];
1392 	struct reg intreg;
1393 #ifdef PT_GETFPREGS
1394 	struct fpreg freg;
1395 #endif
1396 #ifdef PT_PACMASK
1397 	register_t pacmask[2];
1398 #endif
1399 
1400 	size = 0;
1401 
1402 	snprintf(name, sizeof(name)-ELFROUNDSIZE, "%s@%d",
1403 	    "OpenBSD", p->p_tid + THREAD_PID_OFFSET);
1404 	namesize = strlen(name) + 1;
1405 	memset(name + namesize, 0, elfround(namesize) - namesize);
1406 
1407 	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(intreg));
1408 	if (iocookie) {
1409 		error = process_read_regs(p, &intreg);
1410 		if (error)
1411 			return (error);
1412 
1413 		nhdr.namesz = namesize;
1414 		nhdr.descsz = sizeof(intreg);
1415 		nhdr.type = NT_OPENBSD_REGS;
1416 
1417 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1418 		    name, &intreg);
1419 		if (error)
1420 			return (error);
1421 
1422 	}
1423 	size += notesize;
1424 
1425 #ifdef PT_GETFPREGS
1426 	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(freg));
1427 	if (iocookie) {
1428 		error = process_read_fpregs(p, &freg);
1429 		if (error)
1430 			return (error);
1431 
1432 		nhdr.namesz = namesize;
1433 		nhdr.descsz = sizeof(freg);
1434 		nhdr.type = NT_OPENBSD_FPREGS;
1435 
1436 		error = coredump_writenote_elf(p, iocookie, &nhdr, name, &freg);
1437 		if (error)
1438 			return (error);
1439 	}
1440 	size += notesize;
1441 #endif
1442 
1443 #ifdef PT_PACMASK
1444 	notesize = sizeof(nhdr) + elfround(namesize) +
1445 	    elfround(sizeof(pacmask));
1446 	if (iocookie) {
1447 		pacmask[0] = pacmask[1] = process_get_pacmask(p);
1448 
1449 		nhdr.namesz = namesize;
1450 		nhdr.descsz = sizeof(pacmask);
1451 		nhdr.type = NT_OPENBSD_PACMASK;
1452 
1453 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1454 		    name, &pacmask);
1455 		if (error)
1456 			return (error);
1457 	}
1458 	size += notesize;
1459 #endif
1460 
1461 	*sizep = size;
1462 	/* XXX Add hook for machdep per-LWP notes. */
1463 	return (0);
1464 }
1465 
1466 int
1467 coredump_writenote_elf(struct proc *p, void *cookie, Elf_Note *nhdr,
1468     const char *name, void *data)
1469 {
1470 	int error;
1471 
1472 	error = coredump_write(cookie, UIO_SYSSPACE, nhdr, sizeof(*nhdr));
1473 	if (error)
1474 		return error;
1475 
1476 	error = coredump_write(cookie, UIO_SYSSPACE, name,
1477 	    elfround(nhdr->namesz));
1478 	if (error)
1479 		return error;
1480 
1481 	return coredump_write(cookie, UIO_SYSSPACE, data, nhdr->descsz);
1482 }
1483 #endif /* !SMALL_KERNEL */
1484