xref: /openbsd-src/sys/kern/exec_elf.c (revision dcc91c2622318df8f66a9bca2d2864253df1bfc3)
1 /*	$OpenBSD: exec_elf.c,v 1.190 2024/08/21 03:16:25 deraadt Exp $	*/
2 
3 /*
4  * Copyright (c) 1996 Per Fogelstrom
5  * All rights reserved.
6  *
7  * Copyright (c) 1994 Christos Zoulas
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  */
33 
34 /*
35  * Copyright (c) 2001 Wasabi Systems, Inc.
36  * All rights reserved.
37  *
38  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed for the NetBSD Project by
51  *	Wasabi Systems, Inc.
52  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
53  *    or promote products derived from this software without specific prior
54  *    written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/malloc.h>
73 #include <sys/pool.h>
74 #include <sys/mount.h>
75 #include <sys/namei.h>
76 #include <sys/vnode.h>
77 #include <sys/core.h>
78 #include <sys/exec.h>
79 #include <sys/exec_elf.h>
80 #include <sys/fcntl.h>
81 #include <sys/ptrace.h>
82 #include <sys/signalvar.h>
83 #include <sys/pledge.h>
84 #include <sys/syscall.h>
85 
86 #include <sys/mman.h>
87 
88 #include <uvm/uvm_extern.h>
89 
90 #include <machine/reg.h>
91 #include <machine/exec.h>
92 #include <machine/elf.h>
93 
94 int	elf_load_file(struct proc *, char *, struct exec_package *,
95 	    struct elf_args *);
96 int	elf_check_header(Elf_Ehdr *);
97 int	elf_read_from(struct proc *, struct vnode *, u_long, void *, int);
98 void	elf_load_psection(struct exec_vmcmd_set *, struct vnode *,
99 	    Elf_Phdr *, Elf_Addr *, Elf_Addr *, int *, int);
100 int	elf_os_pt_note_name(Elf_Note *);
101 int	elf_os_pt_note(struct proc *, struct exec_package *, Elf_Ehdr *, int *);
102 int	elf_read_pintable(struct proc *p, struct vnode *vp, Elf_Phdr *pp,
103 	    u_int **pinp, int is_ldso, size_t len);
104 
105 /* round up and down to page boundaries. */
106 #define ELF_ROUND(a, b)		(((a) + (b) - 1) & ~((b) - 1))
107 #define ELF_TRUNC(a, b)		((a) & ~((b) - 1))
108 
109 /*
110  * We limit the number of program headers to 32, this should
111  * be a reasonable limit for ELF, the most we have seen so far is 12
112  */
113 #define ELF_MAX_VALID_PHDR 32
114 
115 #define ELF_NOTE_NAME_OPENBSD	0x01
116 
117 struct elf_note_name {
118 	char *name;
119 	int id;
120 } elf_note_names[] = {
121 	{ "OpenBSD",	ELF_NOTE_NAME_OPENBSD },
122 };
123 
124 #define	ELFROUNDSIZE	sizeof(Elf_Word)
125 #define	elfround(x)	roundup((x), ELFROUNDSIZE)
126 
127 
128 /*
129  * Check header for validity; return 0 for ok, ENOEXEC if error
130  */
131 int
132 elf_check_header(Elf_Ehdr *ehdr)
133 {
134 	/*
135 	 * We need to check magic, class size, endianness, and version before
136 	 * we look at the rest of the Elf_Ehdr structure. These few elements
137 	 * are represented in a machine independent fashion.
138 	 */
139 	if (!IS_ELF(*ehdr) ||
140 	    ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
141 	    ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
142 	    ehdr->e_ident[EI_VERSION] != ELF_TARG_VER)
143 		return (ENOEXEC);
144 
145 	/* Now check the machine dependent header */
146 	if (ehdr->e_machine != ELF_TARG_MACH ||
147 	    ehdr->e_version != ELF_TARG_VER)
148 		return (ENOEXEC);
149 
150 	/* Don't allow an insane amount of sections. */
151 	if (ehdr->e_phnum > ELF_MAX_VALID_PHDR)
152 		return (ENOEXEC);
153 
154 	return (0);
155 }
156 
157 /*
158  * Load a psection at the appropriate address
159  */
160 void
161 elf_load_psection(struct exec_vmcmd_set *vcset, struct vnode *vp,
162     Elf_Phdr *ph, Elf_Addr *addr, Elf_Addr *size, int *prot, int flags)
163 {
164 	u_long msize, lsize, psize, rm, rf;
165 	long diff, offset, bdiff;
166 	Elf_Addr base;
167 
168 	/*
169 	 * If the user specified an address, then we load there.
170 	 */
171 	if (*addr != ELF_NO_ADDR) {
172 		if (ph->p_align > 1) {
173 			*addr = ELF_TRUNC(*addr, ph->p_align);
174 			diff = ph->p_vaddr - ELF_TRUNC(ph->p_vaddr, ph->p_align);
175 			/* page align vaddr */
176 			base = *addr + trunc_page(ph->p_vaddr)
177 			    - ELF_TRUNC(ph->p_vaddr, ph->p_align);
178 		} else {
179 			diff = 0;
180 			base = *addr + trunc_page(ph->p_vaddr) - ph->p_vaddr;
181 		}
182 	} else {
183 		*addr = ph->p_vaddr;
184 		if (ph->p_align > 1)
185 			*addr = ELF_TRUNC(*addr, ph->p_align);
186 		base = trunc_page(ph->p_vaddr);
187 		diff = ph->p_vaddr - *addr;
188 	}
189 	bdiff = ph->p_vaddr - trunc_page(ph->p_vaddr);
190 
191 	/*
192 	 * Enforce W^X and map W|X segments without X permission
193 	 * initially.  The dynamic linker will make these read-only
194 	 * and add back X permission after relocation processing.
195 	 * Static executables with W|X segments will probably crash.
196 	 */
197 	*prot |= (ph->p_flags & PF_R) ? PROT_READ : 0;
198 	*prot |= (ph->p_flags & PF_W) ? PROT_WRITE : 0;
199 	if ((ph->p_flags & PF_W) == 0)
200 		*prot |= (ph->p_flags & PF_X) ? PROT_EXEC : 0;
201 
202 	/*
203 	 * Apply immutability as much as possible, but not text/rodata
204 	 * segments of textrel binaries, or RELRO or PT_OPENBSD_MUTABLE
205 	 * sections, or LOADS marked PF_OPENBSD_MUTABLE, or LOADS which
206 	 * violate W^X.
207 	 * Userland (meaning crt0 or ld.so) will repair those regions.
208 	 */
209 	if ((ph->p_flags & (PF_X | PF_W)) != (PF_X | PF_W) &&
210 	    ((ph->p_flags & PF_OPENBSD_MUTABLE) == 0))
211 		flags |= VMCMD_IMMUTABLE;
212 	if ((flags & VMCMD_TEXTREL) && (ph->p_flags & PF_W) == 0)
213 		flags &= ~VMCMD_IMMUTABLE;
214 
215 	msize = ph->p_memsz + diff;
216 	offset = ph->p_offset - bdiff;
217 	lsize = ph->p_filesz + bdiff;
218 	psize = round_page(lsize);
219 
220 	/*
221 	 * Because the pagedvn pager can't handle zero fill of the last
222 	 * data page if it's not page aligned we map the last page readvn.
223 	 */
224 	if (ph->p_flags & PF_W) {
225 		psize = trunc_page(lsize);
226 		if (psize > 0)
227 			NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp,
228 			    offset, *prot, flags);
229 		if (psize != lsize) {
230 			NEW_VMCMD2(vcset, vmcmd_map_readvn, lsize - psize,
231 			    base + psize, vp, offset + psize, *prot, flags);
232 		}
233 	} else {
234 		NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp, offset,
235 		    *prot, flags);
236 	}
237 
238 	/*
239 	 * Check if we need to extend the size of the segment
240 	 */
241 	rm = round_page(*addr + ph->p_memsz + diff);
242 	rf = round_page(*addr + ph->p_filesz + diff);
243 
244 	if (rm != rf) {
245 		NEW_VMCMD2(vcset, vmcmd_map_zero, rm - rf, rf, NULLVP, 0,
246 		    *prot, flags);
247 	}
248 	*size = msize;
249 }
250 
251 /*
252  * Read from vnode into buffer at offset.
253  */
254 int
255 elf_read_from(struct proc *p, struct vnode *vp, u_long off, void *buf,
256     int size)
257 {
258 	int error;
259 	size_t resid;
260 
261 	if ((error = vn_rdwr(UIO_READ, vp, buf, size, off, UIO_SYSSPACE,
262 	    0, p->p_ucred, &resid, p)) != 0)
263 		return error;
264 	/*
265 	 * See if we got all of it
266 	 */
267 	if (resid != 0)
268 		return (ENOEXEC);
269 	return (0);
270 }
271 
272 /*
273  * rebase the pin offsets inside a base,len window for the text segment only.
274  */
275 void
276 elf_adjustpins(vaddr_t *basep, size_t *lenp, u_int *pins, int npins, u_int offset)
277 {
278 	int i;
279 
280 	/* Adjust offsets, base, len */
281 	for (i = 0; i < npins; i++) {
282 		if (pins[i] == -1 || pins[i] == 0)
283 			continue;
284 		pins[i] -= offset;
285 	}
286 	*basep += offset;
287 	*lenp -= offset;
288 }
289 
290 int
291 elf_read_pintable(struct proc *p, struct vnode *vp, Elf_Phdr *pp,
292     u_int **pinp, int is_ldso, size_t len)
293 {
294 	struct pinsyscalls {
295 		u_int offset;
296 		u_int sysno;
297 	} *syscalls = NULL;
298 	int i, nsyscalls = 0, npins = 0;
299 	u_int *pins = NULL;
300 
301 	if (pp->p_filesz > SYS_MAXSYSCALL * 2 * sizeof(*syscalls) ||
302 	    pp->p_filesz % sizeof(*syscalls) != 0)
303 		goto bad;
304 	nsyscalls = pp->p_filesz / sizeof(*syscalls);
305 	syscalls = malloc(pp->p_filesz, M_PINSYSCALL, M_WAITOK);
306 	if (elf_read_from(p, vp, pp->p_offset, syscalls,
307 	    pp->p_filesz) != 0)
308 		goto bad;
309 
310 	/* Validate, and calculate pintable size */
311 	for (i = 0; i < nsyscalls; i++) {
312 		if (syscalls[i].sysno <= 0 ||
313 		    syscalls[i].sysno >= SYS_MAXSYSCALL ||
314 		    syscalls[i].offset > len)
315 			goto bad;
316 		npins = MAX(npins, syscalls[i].sysno);
317 	}
318 	if (is_ldso)
319 		npins = MAX(npins, SYS_kbind);	/* XXX see ld.so/loader.c */
320 	npins++;
321 
322 	/* Fill pintable: 0 = invalid, -1 = allowed, else offset from base */
323 	pins = mallocarray(npins, sizeof(u_int), M_PINSYSCALL, M_WAITOK|M_ZERO);
324 	for (i = 0; i < nsyscalls; i++) {
325 		if (pins[syscalls[i].sysno])
326 			pins[syscalls[i].sysno] = -1;	/* duplicated */
327 		else
328 			pins[syscalls[i].sysno] = syscalls[i].offset;
329 	}
330 	if (is_ldso)
331 		pins[SYS_kbind] = -1;	/* XXX see ld.so/loader.c */
332 	*pinp = pins;
333 	pins = NULL;
334 bad:
335 	free(syscalls, M_PINSYSCALL, nsyscalls * sizeof(*syscalls));
336 	free(pins, M_PINSYSCALL, npins * sizeof(u_int));
337 	return npins;
338 }
339 
340 /*
341  * Load a file (interpreter/library) pointed to by path [stolen from
342  * coff_load_shlib()]. Made slightly generic so it might be used externally.
343  */
344 int
345 elf_load_file(struct proc *p, char *path, struct exec_package *epp,
346     struct elf_args *ap)
347 {
348 	int error, i;
349 	struct nameidata nd;
350 	Elf_Ehdr eh;
351 	Elf_Phdr *ph = NULL, *syscall_ph = NULL;
352 	u_long phsize = 0;
353 	Elf_Addr addr;
354 	struct vnode *vp;
355 	Elf_Phdr *base_ph = NULL;
356 	struct interp_ld_sec {
357 		Elf_Addr vaddr;
358 		u_long memsz;
359 	} loadmap[ELF_MAX_VALID_PHDR];
360 	int nload, idx = 0;
361 	Elf_Addr pos;
362 	int file_align;
363 	int loop;
364 	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
365 	vaddr_t text_start = -1, text_end = 0;
366 
367 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p);
368 	nd.ni_pledge = PLEDGE_RPATH;
369 	nd.ni_unveil = UNVEIL_READ;
370 	if ((error = namei(&nd)) != 0) {
371 		return (error);
372 	}
373 	vp = nd.ni_vp;
374 	if (vp->v_type != VREG) {
375 		error = EACCES;
376 		goto bad;
377 	}
378 	if ((error = VOP_GETATTR(vp, epp->ep_vap, p->p_ucred, p)) != 0)
379 		goto bad;
380 	if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
381 		error = EACCES;
382 		goto bad;
383 	}
384 	if ((error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) != 0)
385 		goto bad1;
386 	if ((error = elf_read_from(p, nd.ni_vp, 0, &eh, sizeof(eh))) != 0)
387 		goto bad1;
388 
389 	if (elf_check_header(&eh) || eh.e_type != ET_DYN) {
390 		error = ENOEXEC;
391 		goto bad1;
392 	}
393 
394 	ph = mallocarray(eh.e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
395 	phsize = eh.e_phnum * sizeof(Elf_Phdr);
396 
397 	if ((error = elf_read_from(p, nd.ni_vp, eh.e_phoff, ph, phsize)) != 0)
398 		goto bad1;
399 
400 	for (i = 0; i < eh.e_phnum; i++) {
401 		if ((ph[i].p_align > 1) && !powerof2(ph[i].p_align)) {
402 			error = EINVAL;
403 			goto bad1;
404 		}
405 
406 		if (ph[i].p_type == PT_LOAD) {
407 			if (ph[i].p_filesz > ph[i].p_memsz ||
408 			    ph[i].p_memsz == 0) {
409 				error = EINVAL;
410 				goto bad1;
411 			}
412 			loadmap[idx].vaddr = trunc_page(ph[i].p_vaddr);
413 			loadmap[idx].memsz = round_page (ph[i].p_vaddr +
414 			    ph[i].p_memsz - loadmap[idx].vaddr);
415 			file_align = ph[i].p_align;
416 			idx++;
417 		}
418 	}
419 	nload = idx;
420 
421 	/*
422 	 * Load the interpreter where a non-fixed mmap(NULL, ...)
423 	 * would (i.e. something safely out of the way).
424 	 */
425 	pos = uvm_map_hint(p->p_vmspace, PROT_EXEC, VM_MIN_ADDRESS,
426 	    VM_MAXUSER_ADDRESS);
427 	pos = ELF_ROUND(pos, file_align);
428 
429 	loop = 0;
430 	for (i = 0; i < nload;/**/) {
431 		vaddr_t	addr;
432 		struct	uvm_object *uobj;
433 		off_t	uoff;
434 		size_t	size;
435 
436 #ifdef this_needs_fixing
437 		if (i == 0) {
438 			uobj = &vp->v_uvm.u_obj;
439 			/* need to fix uoff */
440 		} else {
441 #endif
442 			uobj = NULL;
443 			uoff = 0;
444 #ifdef this_needs_fixing
445 		}
446 #endif
447 
448 		addr = trunc_page(pos + loadmap[i].vaddr);
449 		size =  round_page(addr + loadmap[i].memsz) - addr;
450 
451 		/* CRAP - map_findspace does not avoid daddr+BRKSIZ */
452 		if ((addr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
453 		    (addr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ))
454 			addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
455 			    BRKSIZ);
456 
457 		if (uvm_map_mquery(&p->p_vmspace->vm_map, &addr, size,
458 		    (i == 0 ? uoff : UVM_UNKNOWN_OFFSET), 0) != 0) {
459 			if (loop == 0) {
460 				loop = 1;
461 				i = 0;
462 				pos = 0;
463 				continue;
464 			}
465 			error = ENOMEM;
466 			goto bad1;
467 		}
468 		if (addr != pos + loadmap[i].vaddr) {
469 			/* base changed. */
470 			pos = addr - trunc_page(loadmap[i].vaddr);
471 			pos = ELF_ROUND(pos,file_align);
472 			i = 0;
473 			continue;
474 		}
475 
476 		i++;
477 	}
478 
479 	/*
480 	 * Load all the necessary sections
481 	 */
482 	for (i = 0; i < eh.e_phnum; i++) {
483 		Elf_Addr size = 0;
484 		int prot = 0;
485 		int flags;
486 
487 		switch (ph[i].p_type) {
488 		case PT_LOAD:
489 			if (base_ph == NULL) {
490 				flags = VMCMD_BASE;
491 				addr = pos;
492 				base_ph = &ph[i];
493 			} else {
494 				flags = VMCMD_RELATIVE;
495 				addr = ph[i].p_vaddr - base_ph->p_vaddr;
496 			}
497 			elf_load_psection(&epp->ep_vmcmds, nd.ni_vp,
498 			    &ph[i], &addr, &size, &prot, flags);
499 			/* If entry is within this section it must be text */
500 			if (eh.e_entry >= ph[i].p_vaddr &&
501 			    eh.e_entry < (ph[i].p_vaddr + size)) {
502 				/* LOAD containing e_entry may not be writable */
503 				if (prot & PROT_WRITE) {
504 					error = ENOEXEC;
505 					goto bad1;
506 				}
507  				epp->ep_entry = addr + eh.e_entry -
508 				    ELF_TRUNC(ph[i].p_vaddr,ph[i].p_align);
509 				if (flags == VMCMD_RELATIVE)
510 					epp->ep_entry += pos;
511 				ap->arg_interp = pos;
512 			}
513 			if (prot & PROT_EXEC) {
514 				if (addr < text_start)
515 					text_start = addr;
516 				if (addr+size >= text_end)
517 					text_end = addr + size;
518 			}
519 			addr += size;
520 			break;
521 
522 		case PT_PHDR:
523 		case PT_NOTE:
524 			break;
525 
526 		case PT_OPENBSD_RANDOMIZE:
527 			if (ph[i].p_memsz > randomizequota) {
528 				error = ENOMEM;
529 				goto bad1;
530 			}
531 			randomizequota -= ph[i].p_memsz;
532 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
533 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
534 			break;
535 
536 		case PT_DYNAMIC:
537 #if defined (__mips__)
538 			/* DT_DEBUG is not ready on mips */
539 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
540 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
541 #endif
542 			break;
543 		case PT_GNU_RELRO:
544 		case PT_OPENBSD_MUTABLE:
545 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
546 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
547 			break;
548 		case PT_OPENBSD_SYSCALLS:
549 			syscall_ph = &ph[i];
550 			break;
551 		default:
552 			break;
553 		}
554 	}
555 
556 	if (syscall_ph) {
557 		struct process *pr = p->p_p;
558 		vaddr_t base = pos;
559 		size_t len = text_end;
560 		u_int *pins;
561 		int npins;
562 
563 		npins = elf_read_pintable(p, nd.ni_vp, syscall_ph,
564 		    &pins, 1, len);
565 		if (npins) {
566 			elf_adjustpins(&base, &len, pins, npins,
567 			    text_start);
568 			pr->ps_pin.pn_start = base;
569 			pr->ps_pin.pn_end = base + len;
570 			pr->ps_pin.pn_pins = pins;
571 			pr->ps_pin.pn_npins = npins;
572 		}
573 	} else {
574 		error = EINVAL;	/* no pin table */
575 		goto bad1;
576 	}
577 
578 	vn_marktext(nd.ni_vp);
579 
580 bad1:
581 	VOP_CLOSE(nd.ni_vp, FREAD, p->p_ucred, p);
582 bad:
583 	free(ph, M_TEMP, phsize);
584 
585 	vput(nd.ni_vp);
586 	return (error);
587 }
588 
589 /*
590  * Prepare an Elf binary's exec package
591  *
592  * First, set of the various offsets/lengths in the exec package.
593  *
594  * Then, mark the text image busy (so it can be demand paged) or error out if
595  * this is not possible.  Finally, set up vmcmds for the text, data, bss, and
596  * stack segments.
597  */
598 int
599 exec_elf_makecmds(struct proc *p, struct exec_package *epp)
600 {
601 	Elf_Ehdr *eh = epp->ep_hdr;
602 	Elf_Phdr *ph, *pp, *base_ph = NULL, *syscall_ph = NULL;
603 	Elf_Addr phdr = 0, exe_base = 0, exe_end = 0;
604 	int error, i, has_phdr = 0, names = 0, textrel = 0;
605 	char *interp = NULL;
606 	u_long phsize;
607 	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
608 
609 	if (epp->ep_hdrvalid < sizeof(Elf_Ehdr))
610 		return (ENOEXEC);
611 
612 	if (elf_check_header(eh) ||
613 	   (eh->e_type != ET_EXEC && eh->e_type != ET_DYN))
614 		return (ENOEXEC);
615 
616 	/*
617 	 * check if vnode is in open for writing, because we want to demand-
618 	 * page out of it.  if it is, don't do it, for various reasons.
619 	 */
620 	if (epp->ep_vp->v_writecount != 0) {
621 #ifdef DIAGNOSTIC
622 		if (epp->ep_vp->v_flag & VTEXT)
623 			panic("exec: a VTEXT vnode has writecount != 0");
624 #endif
625 		return (ETXTBSY);
626 	}
627 	/*
628 	 * Allocate space to hold all the program headers, and read them
629 	 * from the file
630 	 */
631 	ph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
632 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
633 
634 	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff, ph,
635 	    phsize)) != 0)
636 		goto bad;
637 
638 	epp->ep_tsize = ELF_NO_ADDR;
639 	epp->ep_dsize = ELF_NO_ADDR;
640 
641 	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
642 		if ((pp->p_align > 1) && !powerof2(pp->p_align)) {
643 			error = EINVAL;
644 			goto bad;
645 		}
646 
647 		if (pp->p_type == PT_INTERP && !interp) {
648 			if (pp->p_filesz < 2 || pp->p_filesz > MAXPATHLEN)
649 				goto bad;
650 			interp = pool_get(&namei_pool, PR_WAITOK);
651 			if ((error = elf_read_from(p, epp->ep_vp,
652 			    pp->p_offset, interp, pp->p_filesz)) != 0) {
653 				goto bad;
654 			}
655 			if (interp[pp->p_filesz - 1] != '\0')
656 				goto bad;
657 		} else if (pp->p_type == PT_LOAD) {
658 			if (pp->p_filesz > pp->p_memsz ||
659 			    pp->p_memsz == 0) {
660 				error = EINVAL;
661 				goto bad;
662 			}
663 			if (base_ph == NULL)
664 				base_ph = pp;
665 		} else if (pp->p_type == PT_PHDR) {
666 			has_phdr = 1;
667 		}
668 	}
669 
670 	/*
671 	 * Verify this is an OpenBSD executable.  If it's marked that way
672 	 * via a PT_NOTE then also check for a PT_OPENBSD_WXNEEDED segment.
673 	 */
674 	if ((error = elf_os_pt_note(p, epp, epp->ep_hdr, &names)) != 0)
675 		goto bad;
676 	if (eh->e_ident[EI_OSABI] == ELFOSABI_OPENBSD)
677 		names |= ELF_NOTE_NAME_OPENBSD;
678 
679 	if (eh->e_type == ET_DYN) {
680 		/* need phdr and load sections for PIE */
681 		if (!has_phdr || base_ph == NULL || base_ph->p_vaddr != 0) {
682 			error = EINVAL;
683 			goto bad;
684 		}
685 		/* randomize exe_base for PIE */
686 		exe_base = uvm_map_pie(base_ph->p_align);
687 
688 		/*
689 		 * Check if DYNAMIC contains DT_TEXTREL
690 		 */
691 		for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
692 			Elf_Dyn *dt;
693 			int j;
694 
695 			switch (pp->p_type) {
696 			case PT_DYNAMIC:
697 				if (pp->p_filesz > 64*1024)
698 					break;
699 				dt = malloc(pp->p_filesz, M_TEMP, M_WAITOK);
700 				error = vn_rdwr(UIO_READ, epp->ep_vp,
701 				    (caddr_t)dt, pp->p_filesz, pp->p_offset,
702 				    UIO_SYSSPACE, IO_UNIT, p->p_ucred, NULL, p);
703 				if (error) {
704 					free(dt, M_TEMP, pp->p_filesz);
705 					break;
706 				}
707 				for (j = 0; j < pp->p_filesz / sizeof(*dt); j++) {
708 					if (dt[j].d_tag == DT_TEXTREL) {
709 						textrel = VMCMD_TEXTREL;
710 						break;
711 					}
712 				}
713 				free(dt, M_TEMP, pp->p_filesz);
714 				break;
715 			default:
716 				break;
717 			}
718 		}
719 	}
720 
721 	/*
722 	 * Load all the necessary sections
723 	 */
724 	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
725 		Elf_Addr addr, size = 0;
726 		int prot = 0;
727 		int flags = 0;
728 
729 		switch (pp->p_type) {
730 		case PT_LOAD:
731 			if (exe_base != 0) {
732 				if (pp == base_ph) {
733 					flags = VMCMD_BASE;
734 					addr = exe_base;
735 				} else {
736 					flags = VMCMD_RELATIVE;
737 					addr = pp->p_vaddr - base_ph->p_vaddr;
738 				}
739 			} else
740 				addr = ELF_NO_ADDR;
741 
742 			/* Static binaries may not call pinsyscalls() */
743 			if (interp == NULL)
744 				p->p_vmspace->vm_map.flags |= VM_MAP_PINSYSCALL_ONCE;
745 
746 			/*
747 			 * Calculates size of text and data segments
748 			 * by starting at first and going to end of last.
749 			 * 'rwx' sections are treated as data.
750 			 * this is correct for BSS_PLT, but may not be
751 			 * for DATA_PLT, is fine for TEXT_PLT.
752 			 */
753 			elf_load_psection(&epp->ep_vmcmds, epp->ep_vp,
754 			    pp, &addr, &size, &prot, flags | textrel);
755 
756 			/*
757 			 * Update exe_base in case alignment was off.
758 			 * For PIE, addr is relative to exe_base so
759 			 * adjust it (non PIE exe_base is 0 so no change).
760 			 */
761 			if (flags == VMCMD_BASE)
762 				exe_base = addr;
763 			else
764 				addr += exe_base;
765 
766 			/*
767 			 * Decide whether it's text or data by looking
768 			 * at the protection of the section
769 			 */
770 			if (prot & PROT_WRITE) {
771 				/* data section */
772 				if (epp->ep_dsize == ELF_NO_ADDR) {
773 					epp->ep_daddr = addr;
774 					epp->ep_dsize = size;
775 				} else {
776 					if (addr < epp->ep_daddr) {
777 						epp->ep_dsize =
778 						    epp->ep_dsize +
779 						    epp->ep_daddr -
780 						    addr;
781 						epp->ep_daddr = addr;
782 					} else
783 						epp->ep_dsize = addr+size -
784 						    epp->ep_daddr;
785 				}
786 			} else if (prot & PROT_EXEC) {
787 				/* text section */
788 				if (epp->ep_tsize == ELF_NO_ADDR) {
789 					epp->ep_taddr = addr;
790 					epp->ep_tsize = size;
791 				} else {
792 					if (addr < epp->ep_taddr) {
793 						epp->ep_tsize =
794 						    epp->ep_tsize +
795 						    epp->ep_taddr -
796 						    addr;
797 						epp->ep_taddr = addr;
798 					} else
799 						epp->ep_tsize = addr+size -
800 						    epp->ep_taddr;
801 				}
802 				if (interp == NULL)
803 					exe_end = epp->ep_taddr +
804 					    epp->ep_tsize;	/* end of TEXT */
805 			}
806 			break;
807 
808 		case PT_SHLIB:
809 			error = ENOEXEC;
810 			goto bad;
811 
812 		case PT_INTERP:
813 			/* Already did this one */
814 		case PT_NOTE:
815 			break;
816 
817 		case PT_PHDR:
818 			/* Note address of program headers (in text segment) */
819 			phdr = pp->p_vaddr;
820 			break;
821 
822 		case PT_OPENBSD_RANDOMIZE:
823 			if (ph[i].p_memsz > randomizequota) {
824 				error = ENOMEM;
825 				goto bad;
826 			}
827 			randomizequota -= ph[i].p_memsz;
828 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
829 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
830 			break;
831 
832 		case PT_DYNAMIC:
833 #if defined (__mips__)
834 			/* DT_DEBUG is not ready on mips */
835 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
836 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
837 #endif
838 			break;
839 		case PT_GNU_RELRO:
840 		case PT_OPENBSD_MUTABLE:
841 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
842 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
843 			break;
844 		case PT_OPENBSD_SYSCALLS:
845 			if (interp == NULL)
846 				syscall_ph = &ph[i];
847 			break;
848 		default:
849 			/*
850 			 * Not fatal, we don't need to understand everything
851 			 * :-)
852 			 */
853 			break;
854 		}
855 	}
856 
857 	if (syscall_ph) {
858 		vaddr_t base = exe_base;
859 		size_t len = exe_end - exe_base;
860 		u_int *pins;
861 		int npins;
862 
863 		npins = elf_read_pintable(p, epp->ep_vp, syscall_ph,
864 		    &pins, 0, len);
865 		if (npins) {
866 			elf_adjustpins(&base, &len, pins, npins,
867 			    epp->ep_taddr - exe_base);
868 			epp->ep_pinstart = base;
869 			epp->ep_pinend = base + len;
870 			epp->ep_pins = pins;
871 			epp->ep_npins = npins;
872 		}
873 	}
874 
875 	phdr += exe_base;
876 
877 	/*
878 	 * Strangely some linux programs may have all load sections marked
879 	 * writeable, in this case, textsize is not -1, but rather 0;
880 	 */
881 	if (epp->ep_tsize == ELF_NO_ADDR)
882 		epp->ep_tsize = 0;
883 	/*
884 	 * Another possibility is that it has all load sections marked
885 	 * read-only.  Fake a zero-sized data segment right after the
886 	 * text segment.
887 	 */
888 	if (epp->ep_dsize == ELF_NO_ADDR) {
889 		epp->ep_daddr = round_page(epp->ep_taddr + epp->ep_tsize);
890 		epp->ep_dsize = 0;
891 	}
892 
893 	epp->ep_interp = interp;
894 	epp->ep_entry = eh->e_entry + exe_base;
895 
896 	/*
897 	 * Check if we found a dynamically linked binary and arrange to load
898 	 * its interpreter when the exec file is released.
899 	 */
900 	if (interp || eh->e_type == ET_DYN) {
901 		struct elf_args *ap;
902 
903 		ap = malloc(sizeof(*ap), M_TEMP, M_WAITOK);
904 
905 		ap->arg_phaddr = phdr;
906 		ap->arg_phentsize = eh->e_phentsize;
907 		ap->arg_phnum = eh->e_phnum;
908 		ap->arg_entry = eh->e_entry + exe_base;
909 		ap->arg_interp = exe_base;
910 
911 		epp->ep_args = ap;
912 	}
913 
914 	free(ph, M_TEMP, phsize);
915 	vn_marktext(epp->ep_vp);
916 	return (exec_setup_stack(p, epp));
917 
918 bad:
919 	if (interp)
920 		pool_put(&namei_pool, interp);
921 	free(ph, M_TEMP, phsize);
922 	kill_vmcmds(&epp->ep_vmcmds);
923 	if (error == 0)
924 		return (ENOEXEC);
925 	return (error);
926 }
927 
928 #ifdef __HAVE_CPU_HWCAP
929 unsigned long hwcap;
930 #endif /* __HAVE_CPU_HWCAP */
931 
932 #ifdef __HAVE_CPU_HWCAP2
933 unsigned long hwcap2;
934 #endif /* __HAVE_CPU_HWCAP2 */
935 
936 /*
937  * Phase II of load. It is now safe to load the interpreter. Info collected
938  * when loading the program is available for setup of the interpreter.
939  */
940 int
941 exec_elf_fixup(struct proc *p, struct exec_package *epp)
942 {
943 	char	*interp;
944 	int	error = 0;
945 	struct	elf_args *ap;
946 	AuxInfo ai[ELF_AUX_ENTRIES], *a;
947 
948 	ap = epp->ep_args;
949 	if (ap == NULL) {
950 		return (0);
951 	}
952 
953 	interp = epp->ep_interp;
954 
955 	/* disable kbind in programs that don't use ld.so */
956 	if (interp == NULL)
957 		p->p_p->ps_kbind_addr = BOGO_PC;
958 
959 	if (interp &&
960 	    (error = elf_load_file(p, interp, epp, ap)) != 0) {
961 		uprintf("execve: cannot load %s\n", interp);
962 		free(ap, M_TEMP, sizeof *ap);
963 		pool_put(&namei_pool, interp);
964 		kill_vmcmds(&epp->ep_vmcmds);
965 		return (error);
966 	}
967 	/*
968 	 * We have to do this ourselves...
969 	 */
970 	error = exec_process_vmcmds(p, epp);
971 
972 	/*
973 	 * Push extra arguments on the stack needed by dynamically
974 	 * linked binaries
975 	 */
976 	if (error == 0) {
977 		memset(&ai, 0, sizeof ai);
978 		a = ai;
979 
980 		a->au_id = AUX_phdr;
981 		a->au_v = ap->arg_phaddr;
982 		a++;
983 
984 		a->au_id = AUX_phent;
985 		a->au_v = ap->arg_phentsize;
986 		a++;
987 
988 		a->au_id = AUX_phnum;
989 		a->au_v = ap->arg_phnum;
990 		a++;
991 
992 		a->au_id = AUX_pagesz;
993 		a->au_v = PAGE_SIZE;
994 		a++;
995 
996 		a->au_id = AUX_base;
997 		a->au_v = ap->arg_interp;
998 		a++;
999 
1000 		a->au_id = AUX_flags;
1001 		a->au_v = 0;
1002 		a++;
1003 
1004 		a->au_id = AUX_entry;
1005 		a->au_v = ap->arg_entry;
1006 		a++;
1007 
1008 #ifdef __HAVE_CPU_HWCAP
1009 		a->au_id = AUX_hwcap;
1010 		a->au_v = hwcap;
1011 		a++;
1012 #endif /* __HAVE_CPU_HWCAP */
1013 
1014 #ifdef __HAVE_CPU_HWCAP2
1015 		a->au_id = AUX_hwcap2;
1016 		a->au_v = hwcap2;
1017 		a++;
1018 #endif /* __HAVE_CPU_HWCAP2 */
1019 
1020 		a->au_id = AUX_openbsd_timekeep;
1021 		a->au_v = p->p_p->ps_timekeep;
1022 		a++;
1023 
1024 		a->au_id = AUX_null;
1025 		a->au_v = 0;
1026 		a++;
1027 
1028 		error = copyout(ai, epp->ep_auxinfo, sizeof ai);
1029 	}
1030 	free(ap, M_TEMP, sizeof *ap);
1031 	if (interp)
1032 		pool_put(&namei_pool, interp);
1033 	return (error);
1034 }
1035 
1036 int
1037 elf_os_pt_note_name(Elf_Note *np)
1038 {
1039 	int i, j;
1040 
1041 	for (i = 0; i < nitems(elf_note_names); i++) {
1042 		size_t namlen = strlen(elf_note_names[i].name);
1043 		if (np->namesz < namlen)
1044 			continue;
1045 		/* verify name padding (after the NUL) is NUL */
1046 		for (j = namlen + 1; j < elfround(np->namesz); j++)
1047 			if (((char *)(np + 1))[j] != '\0')
1048 				continue;
1049 		/* verify desc padding is NUL */
1050 		for (j = np->descsz; j < elfround(np->descsz); j++)
1051 			if (((char *)(np + 1))[j] != '\0')
1052 				continue;
1053 		if (strcmp((char *)(np + 1), elf_note_names[i].name) == 0)
1054 			return elf_note_names[i].id;
1055 	}
1056 	return (0);
1057 }
1058 
1059 int
1060 elf_os_pt_note(struct proc *p, struct exec_package *epp, Elf_Ehdr *eh, int *namesp)
1061 {
1062 	Elf_Phdr *hph, *ph;
1063 	Elf_Note *np = NULL;
1064 	size_t phsize, offset, pfilesz = 0, total;
1065 	int error, names = 0;
1066 
1067 	hph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
1068 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
1069 	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff,
1070 	    hph, phsize)) != 0)
1071 		goto out1;
1072 
1073 	for (ph = hph;  ph < &hph[eh->e_phnum]; ph++) {
1074 		if (ph->p_type == PT_OPENBSD_WXNEEDED) {
1075 			epp->ep_flags |= EXEC_WXNEEDED;
1076 			continue;
1077 		}
1078 		if (ph->p_type == PT_OPENBSD_NOBTCFI) {
1079 			epp->ep_flags |= EXEC_NOBTCFI;
1080 			continue;
1081 		}
1082 
1083 		if (ph->p_type != PT_NOTE || ph->p_filesz > 1024)
1084 			continue;
1085 
1086 		if (np && ph->p_filesz != pfilesz) {
1087 			free(np, M_TEMP, pfilesz);
1088 			np = NULL;
1089 		}
1090 		if (!np)
1091 			np = malloc(ph->p_filesz, M_TEMP, M_WAITOK);
1092 		pfilesz = ph->p_filesz;
1093 		if ((error = elf_read_from(p, epp->ep_vp, ph->p_offset,
1094 		    np, ph->p_filesz)) != 0)
1095 			goto out2;
1096 
1097 		for (offset = 0; offset < ph->p_filesz; offset += total) {
1098 			Elf_Note *np2 = (Elf_Note *)((char *)np + offset);
1099 
1100 			if (offset + sizeof(Elf_Note) > ph->p_filesz)
1101 				break;
1102 			total = sizeof(Elf_Note) + elfround(np2->namesz) +
1103 			    elfround(np2->descsz);
1104 			if (offset + total > ph->p_filesz)
1105 				break;
1106 			names |= elf_os_pt_note_name(np2);
1107 		}
1108 	}
1109 
1110 out2:
1111 	free(np, M_TEMP, pfilesz);
1112 out1:
1113 	free(hph, M_TEMP, phsize);
1114 	*namesp = names;
1115 	return ((names & ELF_NOTE_NAME_OPENBSD) ? 0 : ENOEXEC);
1116 }
1117 
1118 /*
1119  * Start of routines related to dumping core
1120  */
1121 
1122 #ifdef SMALL_KERNEL
1123 int
1124 coredump_elf(struct proc *p, void *cookie)
1125 {
1126 	return EPERM;
1127 }
1128 #else /* !SMALL_KERNEL */
1129 
1130 struct writesegs_state {
1131 	off_t	notestart;
1132 	off_t	secstart;
1133 	off_t	secoff;
1134 	struct	proc *p;
1135 	void	*iocookie;
1136 	Elf_Phdr *psections;
1137 	size_t	psectionslen;
1138 	size_t	notesize;
1139 	int	npsections;
1140 };
1141 
1142 uvm_coredump_setup_cb	coredump_setup_elf;
1143 uvm_coredump_walk_cb	coredump_walk_elf;
1144 
1145 int	coredump_notes_elf(struct proc *, void *, size_t *);
1146 int	coredump_note_elf(struct proc *, void *, size_t *);
1147 int	coredump_writenote_elf(struct proc *, void *, Elf_Note *,
1148 	    const char *, void *);
1149 
1150 extern vaddr_t sigcode_va;
1151 extern vsize_t sigcode_sz;
1152 
1153 int
1154 coredump_elf(struct proc *p, void *cookie)
1155 {
1156 #ifdef DIAGNOSTIC
1157 	off_t offset;
1158 #endif
1159 	struct writesegs_state ws;
1160 	size_t notesize;
1161 	int error, i;
1162 
1163 	ws.p = p;
1164 	ws.iocookie = cookie;
1165 	ws.psections = NULL;
1166 
1167 	/*
1168 	 * Walk the map to get all the segment offsets and lengths,
1169 	 * write out the ELF header.
1170 	 */
1171 	error = uvm_coredump_walkmap(p, coredump_setup_elf,
1172 	    coredump_walk_elf, &ws);
1173 	if (error)
1174 		goto out;
1175 
1176 	error = coredump_write(cookie, UIO_SYSSPACE, ws.psections,
1177 	    ws.psectionslen, 0);
1178 	if (error)
1179 		goto out;
1180 
1181 	/* Write out the notes. */
1182 	error = coredump_notes_elf(p, cookie, &notesize);
1183 	if (error)
1184 		goto out;
1185 
1186 #ifdef DIAGNOSTIC
1187 	if (notesize != ws.notesize)
1188 		panic("coredump: notesize changed: %zu != %zu",
1189 		    ws.notesize, notesize);
1190 	offset = ws.notestart + notesize;
1191 	if (offset != ws.secstart)
1192 		panic("coredump: offset %lld != secstart %lld",
1193 		    (long long) offset, (long long) ws.secstart);
1194 #endif
1195 
1196 	/* Pass 3: finally, write the sections themselves. */
1197 	for (i = 0; i < ws.npsections - 1; i++) {
1198 		Elf_Phdr *pent = &ws.psections[i];
1199 		if (pent->p_filesz == 0)
1200 			continue;
1201 
1202 #ifdef DIAGNOSTIC
1203 		if (offset != pent->p_offset)
1204 			panic("coredump: offset %lld != p_offset[%d] %lld",
1205 			    (long long) offset, i,
1206 			    (long long) pent->p_filesz);
1207 #endif
1208 
1209 		/*
1210 		 * Since the sigcode is mapped execute-only, we can't
1211 		 * read it.  So use the kernel mapping for it instead.
1212 		 */
1213 		if (pent->p_vaddr == p->p_p->ps_sigcode &&
1214 		    pent->p_filesz == sigcode_sz) {
1215 			error = coredump_write(cookie, UIO_SYSSPACE,
1216 			    (void *)sigcode_va, sigcode_sz, 0);
1217 		} else {
1218 			error = coredump_write(cookie, UIO_USERSPACE,
1219 			    (void *)(vaddr_t)pent->p_vaddr, pent->p_filesz,
1220 			    (pent->p_flags & PF_ISVNODE));
1221 		}
1222 		if (error)
1223 			goto out;
1224 
1225 		coredump_unmap(cookie, (vaddr_t)pent->p_vaddr,
1226 		    (vaddr_t)pent->p_vaddr + pent->p_filesz);
1227 
1228 #ifdef DIAGNOSTIC
1229 		offset += ws.psections[i].p_filesz;
1230 #endif
1231 	}
1232 
1233 out:
1234 	free(ws.psections, M_TEMP, ws.psectionslen);
1235 	return (error);
1236 }
1237 
1238 
1239 /*
1240  * Normally we lay out core files like this:
1241  *	[ELF Header] [Program headers] [Notes] [data for PT_LOAD segments]
1242  *
1243  * However, if there's >= 65535 segments then it overflows the field
1244  * in the ELF header, so the standard specifies putting a magic
1245  * number there and saving the real count in the .sh_info field of
1246  * the first *section* header...which requires generating a section
1247  * header.  To avoid confusing tools, we include an .shstrtab section
1248  * as well so all the indexes look valid.  So in this case we lay
1249  * out the core file like this:
1250  *	[ELF Header] [Section Headers] [.shstrtab] [Program headers] \
1251  *	[Notes] [data for PT_LOAD segments]
1252  *
1253  * The 'shstrtab' structure below is data for the second of the two
1254  * section headers, plus the .shstrtab itself, in one const buffer.
1255  */
1256 static const struct {
1257     Elf_Shdr	shdr;
1258     char	shstrtab[sizeof(ELF_SHSTRTAB) + 1];
1259 } shstrtab = {
1260     .shdr = {
1261 	.sh_name = 1,			/* offset in .shstrtab below */
1262 	.sh_type = SHT_STRTAB,
1263 	.sh_offset = sizeof(Elf_Ehdr) + 2*sizeof(Elf_Shdr),
1264 	.sh_size = sizeof(ELF_SHSTRTAB) + 1,
1265 	.sh_addralign = 1,
1266     },
1267     .shstrtab = "\0" ELF_SHSTRTAB,
1268 };
1269 
1270 int
1271 coredump_setup_elf(int segment_count, void *cookie)
1272 {
1273 	Elf_Ehdr ehdr;
1274 	struct writesegs_state *ws = cookie;
1275 	Elf_Phdr *note;
1276 	int error;
1277 
1278 	/* Get the count of segments, plus one for the PT_NOTE */
1279 	ws->npsections = segment_count + 1;
1280 
1281 	/* Get the size of the notes. */
1282 	error = coredump_notes_elf(ws->p, NULL, &ws->notesize);
1283 	if (error)
1284 		return error;
1285 
1286 	/* Setup the ELF header */
1287 	memset(&ehdr, 0, sizeof(ehdr));
1288 	memcpy(ehdr.e_ident, ELFMAG, SELFMAG);
1289 	ehdr.e_ident[EI_CLASS] = ELF_TARG_CLASS;
1290 	ehdr.e_ident[EI_DATA] = ELF_TARG_DATA;
1291 	ehdr.e_ident[EI_VERSION] = EV_CURRENT;
1292 	/* XXX Should be the OSABI/ABI version of the executable. */
1293 	ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV;
1294 	ehdr.e_ident[EI_ABIVERSION] = 0;
1295 	ehdr.e_type = ET_CORE;
1296 	/* XXX This should be the e_machine of the executable. */
1297 	ehdr.e_machine = ELF_TARG_MACH;
1298 	ehdr.e_version = EV_CURRENT;
1299 	ehdr.e_entry = 0;
1300 	ehdr.e_flags = 0;
1301 	ehdr.e_ehsize = sizeof(ehdr);
1302 	ehdr.e_phentsize = sizeof(Elf_Phdr);
1303 
1304 	if (ws->npsections < PN_XNUM) {
1305 		ehdr.e_phoff = sizeof(ehdr);
1306 		ehdr.e_shoff = 0;
1307 		ehdr.e_phnum = ws->npsections;
1308 		ehdr.e_shentsize = 0;
1309 		ehdr.e_shnum = 0;
1310 		ehdr.e_shstrndx = 0;
1311 	} else {
1312 		/* too many segments, use extension setup */
1313 		ehdr.e_shoff = sizeof(ehdr);
1314 		ehdr.e_phnum = PN_XNUM;
1315 		ehdr.e_shentsize = sizeof(Elf_Shdr);
1316 		ehdr.e_shnum = 2;
1317 		ehdr.e_shstrndx = 1;
1318 		ehdr.e_phoff = shstrtab.shdr.sh_offset + shstrtab.shdr.sh_size;
1319 	}
1320 
1321 	/* Write out the ELF header. */
1322 	error = coredump_write(ws->iocookie, UIO_SYSSPACE, &ehdr, sizeof(ehdr), 0);
1323 	if (error)
1324 		return error;
1325 
1326 	/*
1327 	 * If an section header is needed to store extension info, write
1328 	 * it out after the ELF header and before the program header.
1329 	 */
1330 	if (ehdr.e_shnum != 0) {
1331 		Elf_Shdr shdr = { .sh_info = ws->npsections };
1332 		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shdr,
1333 		    sizeof shdr, 0);
1334 		if (error)
1335 			return error;
1336 		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shstrtab,
1337 		    sizeof(shstrtab.shdr) + sizeof(shstrtab.shstrtab), 0);
1338 		if (error)
1339 			return error;
1340 	}
1341 
1342 	/*
1343 	 * Allocate the segment header array and setup to collect
1344 	 * the section sizes and offsets
1345 	 */
1346 	ws->psections = mallocarray(ws->npsections, sizeof(Elf_Phdr),
1347 	    M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO);
1348 	if (ws->psections == NULL)
1349 		return ENOMEM;
1350 	ws->psectionslen = ws->npsections * sizeof(Elf_Phdr);
1351 
1352 	ws->notestart = ehdr.e_phoff + ws->psectionslen;
1353 	ws->secstart = ws->notestart + ws->notesize;
1354 	ws->secoff = ws->secstart;
1355 
1356 	/* Fill in the PT_NOTE segment header in the last slot */
1357 	note = &ws->psections[ws->npsections - 1];
1358 	note->p_type = PT_NOTE;
1359 	note->p_offset = ws->notestart;
1360 	note->p_vaddr = 0;
1361 	note->p_paddr = 0;
1362 	note->p_filesz = ws->notesize;
1363 	note->p_memsz = 0;
1364 	note->p_flags = PF_R;
1365 	note->p_align = ELFROUNDSIZE;
1366 
1367 	return (0);
1368 }
1369 
1370 int
1371 coredump_walk_elf(vaddr_t start, vaddr_t realend, vaddr_t end, vm_prot_t prot,
1372     int isvnode, int nsegment, void *cookie)
1373 {
1374 	struct writesegs_state *ws = cookie;
1375 	Elf_Phdr phdr;
1376 	vsize_t size, realsize;
1377 
1378 	size = end - start;
1379 	realsize = realend - start;
1380 
1381 	phdr.p_type = PT_LOAD;
1382 	phdr.p_offset = ws->secoff;
1383 	phdr.p_vaddr = start;
1384 	phdr.p_paddr = 0;
1385 	phdr.p_filesz = realsize;
1386 	phdr.p_memsz = size;
1387 	phdr.p_flags = 0;
1388 	if (prot & PROT_READ)
1389 		phdr.p_flags |= PF_R;
1390 	if (prot & PROT_WRITE)
1391 		phdr.p_flags |= PF_W;
1392 	if (prot & PROT_EXEC)
1393 		phdr.p_flags |= PF_X;
1394 	if (isvnode)
1395 		phdr.p_flags |= PF_ISVNODE;
1396 	phdr.p_align = PAGE_SIZE;
1397 
1398 	ws->secoff += phdr.p_filesz;
1399 	ws->psections[nsegment] = phdr;
1400 
1401 	return (0);
1402 }
1403 
1404 int
1405 coredump_notes_elf(struct proc *p, void *iocookie, size_t *sizep)
1406 {
1407 	struct elfcore_procinfo cpi;
1408 	Elf_Note nhdr;
1409 	struct process *pr = p->p_p;
1410 	struct proc *q;
1411 	size_t size, notesize;
1412 	int error;
1413 
1414 	KASSERT(!P_HASSIBLING(p) || pr->ps_single != NULL);
1415 	size = 0;
1416 
1417 	/* First, write an elfcore_procinfo. */
1418 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1419 	    elfround(sizeof(cpi));
1420 	if (iocookie) {
1421 		memset(&cpi, 0, sizeof(cpi));
1422 
1423 		cpi.cpi_version = ELFCORE_PROCINFO_VERSION;
1424 		cpi.cpi_cpisize = sizeof(cpi);
1425 		cpi.cpi_signo = p->p_sisig;
1426 		cpi.cpi_sigcode = p->p_sicode;
1427 
1428 		cpi.cpi_sigpend = p->p_siglist | pr->ps_siglist;
1429 		cpi.cpi_sigmask = p->p_sigmask;
1430 		cpi.cpi_sigignore = pr->ps_sigacts->ps_sigignore;
1431 		cpi.cpi_sigcatch = pr->ps_sigacts->ps_sigcatch;
1432 
1433 		cpi.cpi_pid = pr->ps_pid;
1434 		cpi.cpi_ppid = pr->ps_ppid;
1435 		cpi.cpi_pgrp = pr->ps_pgid;
1436 		if (pr->ps_session->s_leader)
1437 			cpi.cpi_sid = pr->ps_session->s_leader->ps_pid;
1438 		else
1439 			cpi.cpi_sid = 0;
1440 
1441 		cpi.cpi_ruid = p->p_ucred->cr_ruid;
1442 		cpi.cpi_euid = p->p_ucred->cr_uid;
1443 		cpi.cpi_svuid = p->p_ucred->cr_svuid;
1444 
1445 		cpi.cpi_rgid = p->p_ucred->cr_rgid;
1446 		cpi.cpi_egid = p->p_ucred->cr_gid;
1447 		cpi.cpi_svgid = p->p_ucred->cr_svgid;
1448 
1449 		(void)strlcpy(cpi.cpi_name, pr->ps_comm, sizeof(cpi.cpi_name));
1450 
1451 		nhdr.namesz = sizeof("OpenBSD");
1452 		nhdr.descsz = sizeof(cpi);
1453 		nhdr.type = NT_OPENBSD_PROCINFO;
1454 
1455 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1456 		    "OpenBSD", &cpi);
1457 		if (error)
1458 			return (error);
1459 	}
1460 	size += notesize;
1461 
1462 	/* Second, write an NT_OPENBSD_AUXV note. */
1463 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1464 	    elfround(ELF_AUX_WORDS * sizeof(char *));
1465 	if (iocookie && pr->ps_auxinfo) {
1466 
1467 		nhdr.namesz = sizeof("OpenBSD");
1468 		nhdr.descsz = ELF_AUX_WORDS * sizeof(char *);
1469 		nhdr.type = NT_OPENBSD_AUXV;
1470 
1471 		error = coredump_write(iocookie, UIO_SYSSPACE,
1472 		    &nhdr, sizeof(nhdr), 0);
1473 		if (error)
1474 			return (error);
1475 
1476 		error = coredump_write(iocookie, UIO_SYSSPACE,
1477 		    "OpenBSD", elfround(nhdr.namesz), 0);
1478 		if (error)
1479 			return (error);
1480 
1481 		error = coredump_write(iocookie, UIO_USERSPACE,
1482 		    (caddr_t)pr->ps_auxinfo, nhdr.descsz, 0);
1483 		if (error)
1484 			return (error);
1485 	}
1486 	size += notesize;
1487 
1488 #ifdef PT_WCOOKIE
1489 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1490 	    elfround(sizeof(register_t));
1491 	if (iocookie) {
1492 		register_t wcookie;
1493 
1494 		nhdr.namesz = sizeof("OpenBSD");
1495 		nhdr.descsz = sizeof(register_t);
1496 		nhdr.type = NT_OPENBSD_WCOOKIE;
1497 
1498 		wcookie = process_get_wcookie(p);
1499 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1500 		    "OpenBSD", &wcookie);
1501 		if (error)
1502 			return (error);
1503 	}
1504 	size += notesize;
1505 #endif
1506 
1507 	/*
1508 	 * Now write the register info for the thread that caused the
1509 	 * coredump.
1510 	 */
1511 	error = coredump_note_elf(p, iocookie, &notesize);
1512 	if (error)
1513 		return (error);
1514 	size += notesize;
1515 
1516 	/*
1517 	 * Now, for each thread, write the register info and any other
1518 	 * per-thread notes.  Since we're dumping core, all the other
1519 	 * threads in the process have been stopped and the list can't
1520 	 * change.
1521 	 */
1522 	TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
1523 		if (q == p)		/* we've taken care of this thread */
1524 			continue;
1525 		error = coredump_note_elf(q, iocookie, &notesize);
1526 		if (error)
1527 			return (error);
1528 		size += notesize;
1529 	}
1530 
1531 	*sizep = size;
1532 	return (0);
1533 }
1534 
1535 int
1536 coredump_note_elf(struct proc *p, void *iocookie, size_t *sizep)
1537 {
1538 	Elf_Note nhdr;
1539 	int size, notesize, error;
1540 	int namesize;
1541 	char name[64+ELFROUNDSIZE];
1542 	struct reg intreg;
1543 #ifdef PT_GETFPREGS
1544 	struct fpreg freg;
1545 #endif
1546 #ifdef PT_PACMASK
1547 	register_t pacmask[2];
1548 #endif
1549 
1550 	size = 0;
1551 
1552 	snprintf(name, sizeof(name)-ELFROUNDSIZE, "%s@%d",
1553 	    "OpenBSD", p->p_tid + THREAD_PID_OFFSET);
1554 	namesize = strlen(name) + 1;
1555 	memset(name + namesize, 0, elfround(namesize) - namesize);
1556 
1557 	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(intreg));
1558 	if (iocookie) {
1559 		error = process_read_regs(p, &intreg);
1560 		if (error)
1561 			return (error);
1562 
1563 		nhdr.namesz = namesize;
1564 		nhdr.descsz = sizeof(intreg);
1565 		nhdr.type = NT_OPENBSD_REGS;
1566 
1567 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1568 		    name, &intreg);
1569 		if (error)
1570 			return (error);
1571 
1572 	}
1573 	size += notesize;
1574 
1575 #ifdef PT_GETFPREGS
1576 	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(freg));
1577 	if (iocookie) {
1578 		error = process_read_fpregs(p, &freg);
1579 		if (error)
1580 			return (error);
1581 
1582 		nhdr.namesz = namesize;
1583 		nhdr.descsz = sizeof(freg);
1584 		nhdr.type = NT_OPENBSD_FPREGS;
1585 
1586 		error = coredump_writenote_elf(p, iocookie, &nhdr, name, &freg);
1587 		if (error)
1588 			return (error);
1589 	}
1590 	size += notesize;
1591 #endif
1592 
1593 #ifdef PT_PACMASK
1594 	notesize = sizeof(nhdr) + elfround(namesize) +
1595 	    elfround(sizeof(pacmask));
1596 	if (iocookie) {
1597 		pacmask[0] = pacmask[1] = process_get_pacmask(p);
1598 
1599 		nhdr.namesz = namesize;
1600 		nhdr.descsz = sizeof(pacmask);
1601 		nhdr.type = NT_OPENBSD_PACMASK;
1602 
1603 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1604 		    name, &pacmask);
1605 		if (error)
1606 			return (error);
1607 	}
1608 	size += notesize;
1609 #endif
1610 
1611 	*sizep = size;
1612 	/* XXX Add hook for machdep per-LWP notes. */
1613 	return (0);
1614 }
1615 
1616 int
1617 coredump_writenote_elf(struct proc *p, void *cookie, Elf_Note *nhdr,
1618     const char *name, void *data)
1619 {
1620 	int error;
1621 
1622 	error = coredump_write(cookie, UIO_SYSSPACE, nhdr, sizeof(*nhdr), 0);
1623 	if (error)
1624 		return error;
1625 
1626 	error = coredump_write(cookie, UIO_SYSSPACE, name,
1627 	    elfround(nhdr->namesz), 0);
1628 	if (error)
1629 		return error;
1630 
1631 	return coredump_write(cookie, UIO_SYSSPACE, data, nhdr->descsz, 0);
1632 }
1633 #endif /* !SMALL_KERNEL */
1634