xref: /openbsd-src/sys/kern/exec_elf.c (revision 68dd5bb1859285b71cb62a10bf107b8ad54064d9)
1 /*	$OpenBSD: exec_elf.c,v 1.185 2024/01/17 22:22:25 kurt Exp $	*/
2 
3 /*
4  * Copyright (c) 1996 Per Fogelstrom
5  * All rights reserved.
6  *
7  * Copyright (c) 1994 Christos Zoulas
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  */
33 
34 /*
35  * Copyright (c) 2001 Wasabi Systems, Inc.
36  * All rights reserved.
37  *
38  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed for the NetBSD Project by
51  *	Wasabi Systems, Inc.
52  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
53  *    or promote products derived from this software without specific prior
54  *    written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/malloc.h>
73 #include <sys/pool.h>
74 #include <sys/mount.h>
75 #include <sys/namei.h>
76 #include <sys/vnode.h>
77 #include <sys/core.h>
78 #include <sys/exec.h>
79 #include <sys/exec_elf.h>
80 #include <sys/fcntl.h>
81 #include <sys/ptrace.h>
82 #include <sys/signalvar.h>
83 #include <sys/pledge.h>
84 #include <sys/syscall.h>
85 
86 #include <sys/mman.h>
87 
88 #include <uvm/uvm_extern.h>
89 
90 #include <machine/reg.h>
91 #include <machine/exec.h>
92 
93 int	elf_load_file(struct proc *, char *, struct exec_package *,
94 	    struct elf_args *);
95 int	elf_check_header(Elf_Ehdr *);
96 int	elf_read_from(struct proc *, struct vnode *, u_long, void *, int);
97 void	elf_load_psection(struct exec_vmcmd_set *, struct vnode *,
98 	    Elf_Phdr *, Elf_Addr *, Elf_Addr *, int *, int);
99 int	elf_os_pt_note_name(Elf_Note *);
100 int	elf_os_pt_note(struct proc *, struct exec_package *, Elf_Ehdr *, int *);
101 int	elf_read_pintable(struct proc *p, struct vnode *vp, Elf_Phdr *pp,
102 	    u_int **pinp, int is_ldso, size_t len);
103 
104 /* round up and down to page boundaries. */
105 #define ELF_ROUND(a, b)		(((a) + (b) - 1) & ~((b) - 1))
106 #define ELF_TRUNC(a, b)		((a) & ~((b) - 1))
107 
108 /*
109  * We limit the number of program headers to 32, this should
110  * be a reasonable limit for ELF, the most we have seen so far is 12
111  */
112 #define ELF_MAX_VALID_PHDR 32
113 
114 #define ELF_NOTE_NAME_OPENBSD	0x01
115 
116 struct elf_note_name {
117 	char *name;
118 	int id;
119 } elf_note_names[] = {
120 	{ "OpenBSD",	ELF_NOTE_NAME_OPENBSD },
121 };
122 
123 #define	ELFROUNDSIZE	sizeof(Elf_Word)
124 #define	elfround(x)	roundup((x), ELFROUNDSIZE)
125 
126 
127 /*
128  * Check header for validity; return 0 for ok, ENOEXEC if error
129  */
130 int
131 elf_check_header(Elf_Ehdr *ehdr)
132 {
133 	/*
134 	 * We need to check magic, class size, endianness, and version before
135 	 * we look at the rest of the Elf_Ehdr structure. These few elements
136 	 * are represented in a machine independent fashion.
137 	 */
138 	if (!IS_ELF(*ehdr) ||
139 	    ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
140 	    ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
141 	    ehdr->e_ident[EI_VERSION] != ELF_TARG_VER)
142 		return (ENOEXEC);
143 
144 	/* Now check the machine dependent header */
145 	if (ehdr->e_machine != ELF_TARG_MACH ||
146 	    ehdr->e_version != ELF_TARG_VER)
147 		return (ENOEXEC);
148 
149 	/* Don't allow an insane amount of sections. */
150 	if (ehdr->e_phnum > ELF_MAX_VALID_PHDR)
151 		return (ENOEXEC);
152 
153 	return (0);
154 }
155 
156 /*
157  * Load a psection at the appropriate address
158  */
159 void
160 elf_load_psection(struct exec_vmcmd_set *vcset, struct vnode *vp,
161     Elf_Phdr *ph, Elf_Addr *addr, Elf_Addr *size, int *prot, int flags)
162 {
163 	u_long msize, lsize, psize, rm, rf;
164 	long diff, offset, bdiff;
165 	Elf_Addr base;
166 
167 	/*
168 	 * If the user specified an address, then we load there.
169 	 */
170 	if (*addr != ELF_NO_ADDR) {
171 		if (ph->p_align > 1) {
172 			*addr = ELF_TRUNC(*addr, ph->p_align);
173 			diff = ph->p_vaddr - ELF_TRUNC(ph->p_vaddr, ph->p_align);
174 			/* page align vaddr */
175 			base = *addr + trunc_page(ph->p_vaddr)
176 			    - ELF_TRUNC(ph->p_vaddr, ph->p_align);
177 		} else {
178 			diff = 0;
179 			base = *addr + trunc_page(ph->p_vaddr) - ph->p_vaddr;
180 		}
181 	} else {
182 		*addr = ph->p_vaddr;
183 		if (ph->p_align > 1)
184 			*addr = ELF_TRUNC(*addr, ph->p_align);
185 		base = trunc_page(ph->p_vaddr);
186 		diff = ph->p_vaddr - *addr;
187 	}
188 	bdiff = ph->p_vaddr - trunc_page(ph->p_vaddr);
189 
190 	/*
191 	 * Enforce W^X and map W|X segments without X permission
192 	 * initially.  The dynamic linker will make these read-only
193 	 * and add back X permission after relocation processing.
194 	 * Static executables with W|X segments will probably crash.
195 	 */
196 	*prot |= (ph->p_flags & PF_R) ? PROT_READ : 0;
197 	*prot |= (ph->p_flags & PF_W) ? PROT_WRITE : 0;
198 	if ((ph->p_flags & PF_W) == 0)
199 		*prot |= (ph->p_flags & PF_X) ? PROT_EXEC : 0;
200 
201 	/*
202 	 * Apply immutability as much as possible, but not text/rodata
203 	 * segments of textrel binaries, or RELRO or PT_OPENBSD_MUTABLE
204 	 * sections, or LOADS marked PF_OPENBSD_MUTABLE, or LOADS which
205 	 * violate W^X.
206 	 * Userland (meaning crt0 or ld.so) will repair those regions.
207 	 */
208 	if ((ph->p_flags & (PF_X | PF_W)) != (PF_X | PF_W) &&
209 	    ((ph->p_flags & PF_OPENBSD_MUTABLE) == 0))
210 		flags |= VMCMD_IMMUTABLE;
211 	if ((flags & VMCMD_TEXTREL) && (ph->p_flags & PF_W) == 0)
212 		flags &= ~VMCMD_IMMUTABLE;
213 
214 	msize = ph->p_memsz + diff;
215 	offset = ph->p_offset - bdiff;
216 	lsize = ph->p_filesz + bdiff;
217 	psize = round_page(lsize);
218 
219 	/*
220 	 * Because the pagedvn pager can't handle zero fill of the last
221 	 * data page if it's not page aligned we map the last page readvn.
222 	 */
223 	if (ph->p_flags & PF_W) {
224 		psize = trunc_page(lsize);
225 		if (psize > 0)
226 			NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp,
227 			    offset, *prot, flags);
228 		if (psize != lsize) {
229 			NEW_VMCMD2(vcset, vmcmd_map_readvn, lsize - psize,
230 			    base + psize, vp, offset + psize, *prot, flags);
231 		}
232 	} else {
233 		NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp, offset,
234 		    *prot, flags);
235 	}
236 
237 	/*
238 	 * Check if we need to extend the size of the segment
239 	 */
240 	rm = round_page(*addr + ph->p_memsz + diff);
241 	rf = round_page(*addr + ph->p_filesz + diff);
242 
243 	if (rm != rf) {
244 		NEW_VMCMD2(vcset, vmcmd_map_zero, rm - rf, rf, NULLVP, 0,
245 		    *prot, flags);
246 	}
247 	*size = msize;
248 }
249 
250 /*
251  * Read from vnode into buffer at offset.
252  */
253 int
254 elf_read_from(struct proc *p, struct vnode *vp, u_long off, void *buf,
255     int size)
256 {
257 	int error;
258 	size_t resid;
259 
260 	if ((error = vn_rdwr(UIO_READ, vp, buf, size, off, UIO_SYSSPACE,
261 	    0, p->p_ucred, &resid, p)) != 0)
262 		return error;
263 	/*
264 	 * See if we got all of it
265 	 */
266 	if (resid != 0)
267 		return (ENOEXEC);
268 	return (0);
269 }
270 
271 /*
272  * rebase the pin offsets inside a base,len window for the text segment only.
273  */
274 void
275 elf_adjustpins(vaddr_t *basep, size_t *lenp, u_int *pins, int npins, u_int offset)
276 {
277 	int i;
278 
279 	/* Adjust offsets, base, len */
280 	for (i = 0; i < npins; i++) {
281 		if (pins[i] == -1 || pins[i] == 0)
282 			continue;
283 		pins[i] -= offset;
284 	}
285 	*basep += offset;
286 	*lenp -= offset;
287 }
288 
289 int
290 elf_read_pintable(struct proc *p, struct vnode *vp, Elf_Phdr *pp,
291     u_int **pinp, int is_ldso, size_t len)
292 {
293 	struct pinsyscalls {
294 		u_int offset;
295 		u_int sysno;
296 	} *syscalls = NULL;
297 	int i, nsyscalls = 0, npins = 0;
298 	u_int *pins = NULL;
299 
300 	if (pp->p_filesz > SYS_MAXSYSCALL * 2 * sizeof(*syscalls) ||
301 	    pp->p_filesz % sizeof(*syscalls) != 0)
302 		goto bad;
303 	nsyscalls = pp->p_filesz / sizeof(*syscalls);
304 	syscalls = malloc(pp->p_filesz, M_PINSYSCALL, M_WAITOK);
305 	if (elf_read_from(p, vp, pp->p_offset, syscalls,
306 	    pp->p_filesz) != 0)
307 		goto bad;
308 
309 	/* Validate, and calculate pintable size */
310 	for (i = 0; i < nsyscalls; i++) {
311 		if (syscalls[i].sysno <= 0 ||
312 		    syscalls[i].sysno >= SYS_MAXSYSCALL ||
313 		    syscalls[i].offset > len)
314 			goto bad;
315 		npins = MAX(npins, syscalls[i].sysno);
316 	}
317 	if (is_ldso)
318 		npins = MAX(npins, SYS_kbind);	/* XXX see ld.so/loader.c */
319 	npins++;
320 
321 	/* Fill pintable: 0 = invalid, -1 = allowed, else offset from base */
322 	pins = mallocarray(npins, sizeof(u_int), M_PINSYSCALL, M_WAITOK|M_ZERO);
323 	for (i = 0; i < nsyscalls; i++) {
324 		if (pins[syscalls[i].sysno])
325 			pins[syscalls[i].sysno] = -1;	/* duplicated */
326 		else
327 			pins[syscalls[i].sysno] = syscalls[i].offset;
328 	}
329 	if (is_ldso)
330 		pins[SYS_kbind] = -1;	/* XXX see ld.so/loader.c */
331 	*pinp = pins;
332 	pins = NULL;
333 bad:
334 	free(syscalls, M_PINSYSCALL, nsyscalls * sizeof(*syscalls));
335 	free(pins, M_PINSYSCALL, npins * sizeof(u_int));
336 	return npins;
337 }
338 
339 /*
340  * Load a file (interpreter/library) pointed to by path [stolen from
341  * coff_load_shlib()]. Made slightly generic so it might be used externally.
342  */
343 int
344 elf_load_file(struct proc *p, char *path, struct exec_package *epp,
345     struct elf_args *ap)
346 {
347 	int error, i;
348 	struct nameidata nd;
349 	Elf_Ehdr eh;
350 	Elf_Phdr *ph = NULL, *syscall_ph = NULL;
351 	u_long phsize = 0;
352 	Elf_Addr addr;
353 	struct vnode *vp;
354 	Elf_Phdr *base_ph = NULL;
355 	struct interp_ld_sec {
356 		Elf_Addr vaddr;
357 		u_long memsz;
358 	} loadmap[ELF_MAX_VALID_PHDR];
359 	int nload, idx = 0;
360 	Elf_Addr pos;
361 	int file_align;
362 	int loop;
363 	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
364 	vaddr_t text_start = -1, text_end = 0;
365 
366 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p);
367 	nd.ni_pledge = PLEDGE_RPATH;
368 	nd.ni_unveil = UNVEIL_READ;
369 	if ((error = namei(&nd)) != 0) {
370 		return (error);
371 	}
372 	vp = nd.ni_vp;
373 	if (vp->v_type != VREG) {
374 		error = EACCES;
375 		goto bad;
376 	}
377 	if ((error = VOP_GETATTR(vp, epp->ep_vap, p->p_ucred, p)) != 0)
378 		goto bad;
379 	if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
380 		error = EACCES;
381 		goto bad;
382 	}
383 	if ((error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) != 0)
384 		goto bad1;
385 	if ((error = elf_read_from(p, nd.ni_vp, 0, &eh, sizeof(eh))) != 0)
386 		goto bad1;
387 
388 	if (elf_check_header(&eh) || eh.e_type != ET_DYN) {
389 		error = ENOEXEC;
390 		goto bad1;
391 	}
392 
393 	ph = mallocarray(eh.e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
394 	phsize = eh.e_phnum * sizeof(Elf_Phdr);
395 
396 	if ((error = elf_read_from(p, nd.ni_vp, eh.e_phoff, ph, phsize)) != 0)
397 		goto bad1;
398 
399 	for (i = 0; i < eh.e_phnum; i++) {
400 		if ((ph[i].p_align > 1) && !powerof2(ph[i].p_align)) {
401 			error = EINVAL;
402 			goto bad1;
403 		}
404 
405 		if (ph[i].p_type == PT_LOAD) {
406 			if (ph[i].p_filesz > ph[i].p_memsz ||
407 			    ph[i].p_memsz == 0) {
408 				error = EINVAL;
409 				goto bad1;
410 			}
411 			loadmap[idx].vaddr = trunc_page(ph[i].p_vaddr);
412 			loadmap[idx].memsz = round_page (ph[i].p_vaddr +
413 			    ph[i].p_memsz - loadmap[idx].vaddr);
414 			file_align = ph[i].p_align;
415 			idx++;
416 		}
417 	}
418 	nload = idx;
419 
420 	/*
421 	 * Load the interpreter where a non-fixed mmap(NULL, ...)
422 	 * would (i.e. something safely out of the way).
423 	 */
424 	pos = uvm_map_hint(p->p_vmspace, PROT_EXEC, VM_MIN_ADDRESS,
425 	    VM_MAXUSER_ADDRESS);
426 	pos = ELF_ROUND(pos, file_align);
427 
428 	loop = 0;
429 	for (i = 0; i < nload;/**/) {
430 		vaddr_t	addr;
431 		struct	uvm_object *uobj;
432 		off_t	uoff;
433 		size_t	size;
434 
435 #ifdef this_needs_fixing
436 		if (i == 0) {
437 			uobj = &vp->v_uvm.u_obj;
438 			/* need to fix uoff */
439 		} else {
440 #endif
441 			uobj = NULL;
442 			uoff = 0;
443 #ifdef this_needs_fixing
444 		}
445 #endif
446 
447 		addr = trunc_page(pos + loadmap[i].vaddr);
448 		size =  round_page(addr + loadmap[i].memsz) - addr;
449 
450 		/* CRAP - map_findspace does not avoid daddr+BRKSIZ */
451 		if ((addr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
452 		    (addr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ))
453 			addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
454 			    BRKSIZ);
455 
456 		if (uvm_map_mquery(&p->p_vmspace->vm_map, &addr, size,
457 		    (i == 0 ? uoff : UVM_UNKNOWN_OFFSET), 0) != 0) {
458 			if (loop == 0) {
459 				loop = 1;
460 				i = 0;
461 				pos = 0;
462 				continue;
463 			}
464 			error = ENOMEM;
465 			goto bad1;
466 		}
467 		if (addr != pos + loadmap[i].vaddr) {
468 			/* base changed. */
469 			pos = addr - trunc_page(loadmap[i].vaddr);
470 			pos = ELF_ROUND(pos,file_align);
471 			i = 0;
472 			continue;
473 		}
474 
475 		i++;
476 	}
477 
478 	/*
479 	 * Load all the necessary sections
480 	 */
481 	for (i = 0; i < eh.e_phnum; i++) {
482 		Elf_Addr size = 0;
483 		int prot = 0;
484 		int flags;
485 
486 		switch (ph[i].p_type) {
487 		case PT_LOAD:
488 			if (base_ph == NULL) {
489 				flags = VMCMD_BASE;
490 				addr = pos;
491 				base_ph = &ph[i];
492 			} else {
493 				flags = VMCMD_RELATIVE;
494 				addr = ph[i].p_vaddr - base_ph->p_vaddr;
495 			}
496 			elf_load_psection(&epp->ep_vmcmds, nd.ni_vp,
497 			    &ph[i], &addr, &size, &prot, flags | VMCMD_SYSCALL);
498 			/* If entry is within this section it must be text */
499 			if (eh.e_entry >= ph[i].p_vaddr &&
500 			    eh.e_entry < (ph[i].p_vaddr + size)) {
501  				epp->ep_entry = addr + eh.e_entry -
502 				    ELF_TRUNC(ph[i].p_vaddr,ph[i].p_align);
503 				if (flags == VMCMD_RELATIVE)
504 					epp->ep_entry += pos;
505 				ap->arg_interp = pos;
506 			}
507 			if (prot & PROT_EXEC) {
508 				if (addr < text_start)
509 					text_start = addr;
510 				if (addr+size >= text_end)
511 					text_end = addr + size;
512 			}
513 			addr += size;
514 			break;
515 
516 		case PT_PHDR:
517 		case PT_NOTE:
518 			break;
519 
520 		case PT_OPENBSD_RANDOMIZE:
521 			if (ph[i].p_memsz > randomizequota) {
522 				error = ENOMEM;
523 				goto bad1;
524 			}
525 			randomizequota -= ph[i].p_memsz;
526 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
527 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
528 			break;
529 
530 		case PT_DYNAMIC:
531 #if defined (__mips__)
532 			/* DT_DEBUG is not ready on mips */
533 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
534 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
535 #endif
536 			break;
537 		case PT_GNU_RELRO:
538 		case PT_OPENBSD_MUTABLE:
539 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
540 			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
541 			break;
542 		case PT_OPENBSD_SYSCALLS:
543 			syscall_ph = &ph[i];
544 			break;
545 		default:
546 			break;
547 		}
548 	}
549 
550 	if (syscall_ph) {
551 		struct process *pr = p->p_p;
552 		vaddr_t base = pos;
553 		size_t len = text_end;
554 		u_int *pins;
555 		int npins;
556 
557 		npins = elf_read_pintable(p, nd.ni_vp, syscall_ph,
558 		    &pins, 1, len);
559 		if (npins) {
560 			elf_adjustpins(&base, &len, pins, npins,
561 			    text_start);
562 			pr->ps_pin.pn_start = base;
563 			pr->ps_pin.pn_end = base + len;
564 			pr->ps_pin.pn_pins = pins;
565 			pr->ps_pin.pn_npins = npins;
566 			pr->ps_flags |= PS_PIN;
567 		}
568 	}
569 
570 	vn_marktext(nd.ni_vp);
571 
572 bad1:
573 	VOP_CLOSE(nd.ni_vp, FREAD, p->p_ucred, p);
574 bad:
575 	free(ph, M_TEMP, phsize);
576 
577 	vput(nd.ni_vp);
578 	return (error);
579 }
580 
581 /*
582  * Prepare an Elf binary's exec package
583  *
584  * First, set of the various offsets/lengths in the exec package.
585  *
586  * Then, mark the text image busy (so it can be demand paged) or error out if
587  * this is not possible.  Finally, set up vmcmds for the text, data, bss, and
588  * stack segments.
589  */
590 int
591 exec_elf_makecmds(struct proc *p, struct exec_package *epp)
592 {
593 	Elf_Ehdr *eh = epp->ep_hdr;
594 	Elf_Phdr *ph, *pp, *base_ph = NULL, *syscall_ph = NULL;
595 	Elf_Addr phdr = 0, exe_base = 0, exe_end = 0;
596 	int error, i, has_phdr = 0, names = 0, textrel = 0;
597 	char *interp = NULL;
598 	u_long phsize;
599 	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
600 
601 	if (epp->ep_hdrvalid < sizeof(Elf_Ehdr))
602 		return (ENOEXEC);
603 
604 	if (elf_check_header(eh) ||
605 	   (eh->e_type != ET_EXEC && eh->e_type != ET_DYN))
606 		return (ENOEXEC);
607 
608 	/*
609 	 * check if vnode is in open for writing, because we want to demand-
610 	 * page out of it.  if it is, don't do it, for various reasons.
611 	 */
612 	if (epp->ep_vp->v_writecount != 0) {
613 #ifdef DIAGNOSTIC
614 		if (epp->ep_vp->v_flag & VTEXT)
615 			panic("exec: a VTEXT vnode has writecount != 0");
616 #endif
617 		return (ETXTBSY);
618 	}
619 	/*
620 	 * Allocate space to hold all the program headers, and read them
621 	 * from the file
622 	 */
623 	ph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
624 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
625 
626 	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff, ph,
627 	    phsize)) != 0)
628 		goto bad;
629 
630 	epp->ep_tsize = ELF_NO_ADDR;
631 	epp->ep_dsize = ELF_NO_ADDR;
632 
633 	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
634 		if ((pp->p_align > 1) && !powerof2(pp->p_align)) {
635 			error = EINVAL;
636 			goto bad;
637 		}
638 
639 		if (pp->p_type == PT_INTERP && !interp) {
640 			if (pp->p_filesz < 2 || pp->p_filesz > MAXPATHLEN)
641 				goto bad;
642 			interp = pool_get(&namei_pool, PR_WAITOK);
643 			if ((error = elf_read_from(p, epp->ep_vp,
644 			    pp->p_offset, interp, pp->p_filesz)) != 0) {
645 				goto bad;
646 			}
647 			if (interp[pp->p_filesz - 1] != '\0')
648 				goto bad;
649 		} else if (pp->p_type == PT_LOAD) {
650 			if (pp->p_filesz > pp->p_memsz ||
651 			    pp->p_memsz == 0) {
652 				error = EINVAL;
653 				goto bad;
654 			}
655 			if (base_ph == NULL)
656 				base_ph = pp;
657 		} else if (pp->p_type == PT_PHDR) {
658 			has_phdr = 1;
659 		}
660 	}
661 
662 	/*
663 	 * Verify this is an OpenBSD executable.  If it's marked that way
664 	 * via a PT_NOTE then also check for a PT_OPENBSD_WXNEEDED segment.
665 	 */
666 	if ((error = elf_os_pt_note(p, epp, epp->ep_hdr, &names)) != 0)
667 		goto bad;
668 	if (eh->e_ident[EI_OSABI] == ELFOSABI_OPENBSD)
669 		names |= ELF_NOTE_NAME_OPENBSD;
670 
671 	if (eh->e_type == ET_DYN) {
672 		/* need phdr and load sections for PIE */
673 		if (!has_phdr || base_ph == NULL || base_ph->p_vaddr != 0) {
674 			error = EINVAL;
675 			goto bad;
676 		}
677 		/* randomize exe_base for PIE */
678 		exe_base = uvm_map_pie(base_ph->p_align);
679 
680 		/*
681 		 * Check if DYNAMIC contains DT_TEXTREL
682 		 */
683 		for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
684 			Elf_Dyn *dt;
685 			int j;
686 
687 			switch (pp->p_type) {
688 			case PT_DYNAMIC:
689 				if (pp->p_filesz > 64*1024)
690 					break;
691 				dt = malloc(pp->p_filesz, M_TEMP, M_WAITOK);
692 				error = vn_rdwr(UIO_READ, epp->ep_vp,
693 				    (caddr_t)dt, pp->p_filesz, pp->p_offset,
694 				    UIO_SYSSPACE, IO_UNIT, p->p_ucred, NULL, p);
695 				if (error) {
696 					free(dt, M_TEMP, pp->p_filesz);
697 					break;
698 				}
699 				for (j = 0; j < pp->p_filesz / sizeof(*dt); j++) {
700 					if (dt[j].d_tag == DT_TEXTREL) {
701 						textrel = VMCMD_TEXTREL;
702 						break;
703 					}
704 				}
705 				free(dt, M_TEMP, pp->p_filesz);
706 				break;
707 			default:
708 				break;
709 			}
710 		}
711 	}
712 
713 	/*
714 	 * Load all the necessary sections
715 	 */
716 	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
717 		Elf_Addr addr, size = 0;
718 		int prot = 0, syscall = 0;
719 		int flags = 0;
720 
721 		switch (pp->p_type) {
722 		case PT_LOAD:
723 			if (exe_base != 0) {
724 				if (pp == base_ph) {
725 					flags = VMCMD_BASE;
726 					addr = exe_base;
727 				} else {
728 					flags = VMCMD_RELATIVE;
729 					addr = pp->p_vaddr - base_ph->p_vaddr;
730 				}
731 			} else
732 				addr = ELF_NO_ADDR;
733 
734 			/*
735 			 * Permit system calls in main-text static binaries.
736 			 * static binaries may not call msyscall() or
737 			 * pinsyscalls()
738 			 */
739 			if (interp == NULL) {
740 				syscall = VMCMD_SYSCALL;
741 				p->p_vmspace->vm_map.flags |= VM_MAP_SYSCALL_ONCE;
742 				p->p_vmspace->vm_map.flags |= VM_MAP_PINSYSCALL_ONCE;
743 			}
744 
745 			/*
746 			 * Calculates size of text and data segments
747 			 * by starting at first and going to end of last.
748 			 * 'rwx' sections are treated as data.
749 			 * this is correct for BSS_PLT, but may not be
750 			 * for DATA_PLT, is fine for TEXT_PLT.
751 			 */
752 			elf_load_psection(&epp->ep_vmcmds, epp->ep_vp,
753 			    pp, &addr, &size, &prot, flags | textrel | syscall);
754 
755 			/*
756 			 * Update exe_base in case alignment was off.
757 			 * For PIE, addr is relative to exe_base so
758 			 * adjust it (non PIE exe_base is 0 so no change).
759 			 */
760 			if (flags == VMCMD_BASE)
761 				exe_base = addr;
762 			else
763 				addr += exe_base;
764 
765 			/*
766 			 * Decide whether it's text or data by looking
767 			 * at the protection of the section
768 			 */
769 			if (prot & PROT_WRITE) {
770 				/* data section */
771 				if (epp->ep_dsize == ELF_NO_ADDR) {
772 					epp->ep_daddr = addr;
773 					epp->ep_dsize = size;
774 				} else {
775 					if (addr < epp->ep_daddr) {
776 						epp->ep_dsize =
777 						    epp->ep_dsize +
778 						    epp->ep_daddr -
779 						    addr;
780 						epp->ep_daddr = addr;
781 					} else
782 						epp->ep_dsize = addr+size -
783 						    epp->ep_daddr;
784 				}
785 			} else if (prot & PROT_EXEC) {
786 				/* text section */
787 				if (epp->ep_tsize == ELF_NO_ADDR) {
788 					epp->ep_taddr = addr;
789 					epp->ep_tsize = size;
790 				} else {
791 					if (addr < epp->ep_taddr) {
792 						epp->ep_tsize =
793 						    epp->ep_tsize +
794 						    epp->ep_taddr -
795 						    addr;
796 						epp->ep_taddr = addr;
797 					} else
798 						epp->ep_tsize = addr+size -
799 						    epp->ep_taddr;
800 				}
801 				if (interp == NULL)
802 					exe_end = epp->ep_taddr +
803 					    epp->ep_tsize;	/* end of TEXT */
804 			}
805 			break;
806 
807 		case PT_SHLIB:
808 			error = ENOEXEC;
809 			goto bad;
810 
811 		case PT_INTERP:
812 			/* Already did this one */
813 		case PT_NOTE:
814 			break;
815 
816 		case PT_PHDR:
817 			/* Note address of program headers (in text segment) */
818 			phdr = pp->p_vaddr;
819 			break;
820 
821 		case PT_OPENBSD_RANDOMIZE:
822 			if (ph[i].p_memsz > randomizequota) {
823 				error = ENOMEM;
824 				goto bad;
825 			}
826 			randomizequota -= ph[i].p_memsz;
827 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
828 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
829 			break;
830 
831 		case PT_DYNAMIC:
832 #if defined (__mips__)
833 			/* DT_DEBUG is not ready on mips */
834 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
835 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
836 #endif
837 			break;
838 		case PT_GNU_RELRO:
839 		case PT_OPENBSD_MUTABLE:
840 			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
841 			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
842 			break;
843 		case PT_OPENBSD_SYSCALLS:
844 			if (interp == NULL)
845 				syscall_ph = &ph[i];
846 			break;
847 		default:
848 			/*
849 			 * Not fatal, we don't need to understand everything
850 			 * :-)
851 			 */
852 			break;
853 		}
854 	}
855 
856 	if (syscall_ph) {
857 		vaddr_t base = exe_base;
858 		size_t len = exe_end - exe_base;
859 		u_int *pins;
860 		int npins;
861 
862 		npins = elf_read_pintable(p, epp->ep_vp, syscall_ph,
863 		    &pins, 0, len);
864 		if (npins) {
865 			elf_adjustpins(&base, &len, pins, npins,
866 			    epp->ep_taddr - exe_base);
867 			epp->ep_pinstart = base;
868 			epp->ep_pinend = base + len;
869 			epp->ep_pins = pins;
870 			epp->ep_npins = npins;
871 			p->p_p->ps_flags |= PS_PIN;
872 		}
873 	}
874 
875 	phdr += exe_base;
876 
877 	/*
878 	 * Strangely some linux programs may have all load sections marked
879 	 * writeable, in this case, textsize is not -1, but rather 0;
880 	 */
881 	if (epp->ep_tsize == ELF_NO_ADDR)
882 		epp->ep_tsize = 0;
883 	/*
884 	 * Another possibility is that it has all load sections marked
885 	 * read-only.  Fake a zero-sized data segment right after the
886 	 * text segment.
887 	 */
888 	if (epp->ep_dsize == ELF_NO_ADDR) {
889 		epp->ep_daddr = round_page(epp->ep_taddr + epp->ep_tsize);
890 		epp->ep_dsize = 0;
891 	}
892 
893 	epp->ep_interp = interp;
894 	epp->ep_entry = eh->e_entry + exe_base;
895 
896 	/*
897 	 * Check if we found a dynamically linked binary and arrange to load
898 	 * its interpreter when the exec file is released.
899 	 */
900 	if (interp || eh->e_type == ET_DYN) {
901 		struct elf_args *ap;
902 
903 		ap = malloc(sizeof(*ap), M_TEMP, M_WAITOK);
904 
905 		ap->arg_phaddr = phdr;
906 		ap->arg_phentsize = eh->e_phentsize;
907 		ap->arg_phnum = eh->e_phnum;
908 		ap->arg_entry = eh->e_entry + exe_base;
909 		ap->arg_interp = exe_base;
910 
911 		epp->ep_args = ap;
912 	}
913 
914 	free(ph, M_TEMP, phsize);
915 	vn_marktext(epp->ep_vp);
916 	return (exec_setup_stack(p, epp));
917 
918 bad:
919 	if (interp)
920 		pool_put(&namei_pool, interp);
921 	free(ph, M_TEMP, phsize);
922 	kill_vmcmds(&epp->ep_vmcmds);
923 	if (error == 0)
924 		return (ENOEXEC);
925 	return (error);
926 }
927 
928 /*
929  * Phase II of load. It is now safe to load the interpreter. Info collected
930  * when loading the program is available for setup of the interpreter.
931  */
932 int
933 exec_elf_fixup(struct proc *p, struct exec_package *epp)
934 {
935 	char	*interp;
936 	int	error = 0;
937 	struct	elf_args *ap;
938 	AuxInfo ai[ELF_AUX_ENTRIES], *a;
939 
940 	ap = epp->ep_args;
941 	if (ap == NULL) {
942 		return (0);
943 	}
944 
945 	interp = epp->ep_interp;
946 
947 	/* disable kbind in programs that don't use ld.so */
948 	if (interp == NULL)
949 		p->p_p->ps_kbind_addr = BOGO_PC;
950 
951 	if (interp &&
952 	    (error = elf_load_file(p, interp, epp, ap)) != 0) {
953 		uprintf("execve: cannot load %s\n", interp);
954 		free(ap, M_TEMP, sizeof *ap);
955 		pool_put(&namei_pool, interp);
956 		kill_vmcmds(&epp->ep_vmcmds);
957 		return (error);
958 	}
959 	/*
960 	 * We have to do this ourselves...
961 	 */
962 	error = exec_process_vmcmds(p, epp);
963 
964 	/*
965 	 * Push extra arguments on the stack needed by dynamically
966 	 * linked binaries
967 	 */
968 	if (error == 0) {
969 		memset(&ai, 0, sizeof ai);
970 		a = ai;
971 
972 		a->au_id = AUX_phdr;
973 		a->au_v = ap->arg_phaddr;
974 		a++;
975 
976 		a->au_id = AUX_phent;
977 		a->au_v = ap->arg_phentsize;
978 		a++;
979 
980 		a->au_id = AUX_phnum;
981 		a->au_v = ap->arg_phnum;
982 		a++;
983 
984 		a->au_id = AUX_pagesz;
985 		a->au_v = PAGE_SIZE;
986 		a++;
987 
988 		a->au_id = AUX_base;
989 		a->au_v = ap->arg_interp;
990 		a++;
991 
992 		a->au_id = AUX_flags;
993 		a->au_v = 0;
994 		a++;
995 
996 		a->au_id = AUX_entry;
997 		a->au_v = ap->arg_entry;
998 		a++;
999 
1000 		a->au_id = AUX_openbsd_timekeep;
1001 		a->au_v = p->p_p->ps_timekeep;
1002 		a++;
1003 
1004 		a->au_id = AUX_null;
1005 		a->au_v = 0;
1006 		a++;
1007 
1008 		error = copyout(ai, epp->ep_auxinfo, sizeof ai);
1009 	}
1010 	free(ap, M_TEMP, sizeof *ap);
1011 	if (interp)
1012 		pool_put(&namei_pool, interp);
1013 	return (error);
1014 }
1015 
1016 int
1017 elf_os_pt_note_name(Elf_Note *np)
1018 {
1019 	int i, j;
1020 
1021 	for (i = 0; i < nitems(elf_note_names); i++) {
1022 		size_t namlen = strlen(elf_note_names[i].name);
1023 		if (np->namesz < namlen)
1024 			continue;
1025 		/* verify name padding (after the NUL) is NUL */
1026 		for (j = namlen + 1; j < elfround(np->namesz); j++)
1027 			if (((char *)(np + 1))[j] != '\0')
1028 				continue;
1029 		/* verify desc padding is NUL */
1030 		for (j = np->descsz; j < elfround(np->descsz); j++)
1031 			if (((char *)(np + 1))[j] != '\0')
1032 				continue;
1033 		if (strcmp((char *)(np + 1), elf_note_names[i].name) == 0)
1034 			return elf_note_names[i].id;
1035 	}
1036 	return (0);
1037 }
1038 
1039 int
1040 elf_os_pt_note(struct proc *p, struct exec_package *epp, Elf_Ehdr *eh, int *namesp)
1041 {
1042 	Elf_Phdr *hph, *ph;
1043 	Elf_Note *np = NULL;
1044 	size_t phsize, offset, pfilesz = 0, total;
1045 	int error, names = 0;
1046 
1047 	hph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
1048 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
1049 	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff,
1050 	    hph, phsize)) != 0)
1051 		goto out1;
1052 
1053 	for (ph = hph;  ph < &hph[eh->e_phnum]; ph++) {
1054 		if (ph->p_type == PT_OPENBSD_WXNEEDED) {
1055 			epp->ep_flags |= EXEC_WXNEEDED;
1056 			continue;
1057 		}
1058 		if (ph->p_type == PT_OPENBSD_NOBTCFI) {
1059 			epp->ep_flags |= EXEC_NOBTCFI;
1060 			continue;
1061 		}
1062 
1063 		if (ph->p_type != PT_NOTE || ph->p_filesz > 1024)
1064 			continue;
1065 
1066 		if (np && ph->p_filesz != pfilesz) {
1067 			free(np, M_TEMP, pfilesz);
1068 			np = NULL;
1069 		}
1070 		if (!np)
1071 			np = malloc(ph->p_filesz, M_TEMP, M_WAITOK);
1072 		pfilesz = ph->p_filesz;
1073 		if ((error = elf_read_from(p, epp->ep_vp, ph->p_offset,
1074 		    np, ph->p_filesz)) != 0)
1075 			goto out2;
1076 
1077 		for (offset = 0; offset < ph->p_filesz; offset += total) {
1078 			Elf_Note *np2 = (Elf_Note *)((char *)np + offset);
1079 
1080 			if (offset + sizeof(Elf_Note) > ph->p_filesz)
1081 				break;
1082 			total = sizeof(Elf_Note) + elfround(np2->namesz) +
1083 			    elfround(np2->descsz);
1084 			if (offset + total > ph->p_filesz)
1085 				break;
1086 			names |= elf_os_pt_note_name(np2);
1087 		}
1088 	}
1089 
1090 out2:
1091 	free(np, M_TEMP, pfilesz);
1092 out1:
1093 	free(hph, M_TEMP, phsize);
1094 	*namesp = names;
1095 	return ((names & ELF_NOTE_NAME_OPENBSD) ? 0 : ENOEXEC);
1096 }
1097 
1098 /*
1099  * Start of routines related to dumping core
1100  */
1101 
1102 #ifdef SMALL_KERNEL
1103 int
1104 coredump_elf(struct proc *p, void *cookie)
1105 {
1106 	return EPERM;
1107 }
1108 #else /* !SMALL_KERNEL */
1109 
1110 struct writesegs_state {
1111 	off_t	notestart;
1112 	off_t	secstart;
1113 	off_t	secoff;
1114 	struct	proc *p;
1115 	void	*iocookie;
1116 	Elf_Phdr *psections;
1117 	size_t	psectionslen;
1118 	size_t	notesize;
1119 	int	npsections;
1120 };
1121 
1122 uvm_coredump_setup_cb	coredump_setup_elf;
1123 uvm_coredump_walk_cb	coredump_walk_elf;
1124 
1125 int	coredump_notes_elf(struct proc *, void *, size_t *);
1126 int	coredump_note_elf(struct proc *, void *, size_t *);
1127 int	coredump_writenote_elf(struct proc *, void *, Elf_Note *,
1128 	    const char *, void *);
1129 
1130 extern vaddr_t sigcode_va;
1131 extern vsize_t sigcode_sz;
1132 
1133 int
1134 coredump_elf(struct proc *p, void *cookie)
1135 {
1136 #ifdef DIAGNOSTIC
1137 	off_t offset;
1138 #endif
1139 	struct writesegs_state ws;
1140 	size_t notesize;
1141 	int error, i;
1142 
1143 	ws.p = p;
1144 	ws.iocookie = cookie;
1145 	ws.psections = NULL;
1146 
1147 	/*
1148 	 * Walk the map to get all the segment offsets and lengths,
1149 	 * write out the ELF header.
1150 	 */
1151 	error = uvm_coredump_walkmap(p, coredump_setup_elf,
1152 	    coredump_walk_elf, &ws);
1153 	if (error)
1154 		goto out;
1155 
1156 	error = coredump_write(cookie, UIO_SYSSPACE, ws.psections,
1157 	    ws.psectionslen, 0);
1158 	if (error)
1159 		goto out;
1160 
1161 	/* Write out the notes. */
1162 	error = coredump_notes_elf(p, cookie, &notesize);
1163 	if (error)
1164 		goto out;
1165 
1166 #ifdef DIAGNOSTIC
1167 	if (notesize != ws.notesize)
1168 		panic("coredump: notesize changed: %zu != %zu",
1169 		    ws.notesize, notesize);
1170 	offset = ws.notestart + notesize;
1171 	if (offset != ws.secstart)
1172 		panic("coredump: offset %lld != secstart %lld",
1173 		    (long long) offset, (long long) ws.secstart);
1174 #endif
1175 
1176 	/* Pass 3: finally, write the sections themselves. */
1177 	for (i = 0; i < ws.npsections - 1; i++) {
1178 		Elf_Phdr *pent = &ws.psections[i];
1179 		if (pent->p_filesz == 0)
1180 			continue;
1181 
1182 #ifdef DIAGNOSTIC
1183 		if (offset != pent->p_offset)
1184 			panic("coredump: offset %lld != p_offset[%d] %lld",
1185 			    (long long) offset, i,
1186 			    (long long) pent->p_filesz);
1187 #endif
1188 
1189 		/*
1190 		 * Since the sigcode is mapped execute-only, we can't
1191 		 * read it.  So use the kernel mapping for it instead.
1192 		 */
1193 		if (pent->p_vaddr == p->p_p->ps_sigcode &&
1194 		    pent->p_filesz == sigcode_sz) {
1195 			error = coredump_write(cookie, UIO_SYSSPACE,
1196 			    (void *)sigcode_va, sigcode_sz, 0);
1197 		} else {
1198 			error = coredump_write(cookie, UIO_USERSPACE,
1199 			    (void *)(vaddr_t)pent->p_vaddr, pent->p_filesz,
1200 			    (pent->p_flags & PF_ISVNODE));
1201 		}
1202 		if (error)
1203 			goto out;
1204 
1205 		coredump_unmap(cookie, (vaddr_t)pent->p_vaddr,
1206 		    (vaddr_t)pent->p_vaddr + pent->p_filesz);
1207 
1208 #ifdef DIAGNOSTIC
1209 		offset += ws.psections[i].p_filesz;
1210 #endif
1211 	}
1212 
1213 out:
1214 	free(ws.psections, M_TEMP, ws.psectionslen);
1215 	return (error);
1216 }
1217 
1218 
1219 /*
1220  * Normally we lay out core files like this:
1221  *	[ELF Header] [Program headers] [Notes] [data for PT_LOAD segments]
1222  *
1223  * However, if there's >= 65535 segments then it overflows the field
1224  * in the ELF header, so the standard specifies putting a magic
1225  * number there and saving the real count in the .sh_info field of
1226  * the first *section* header...which requires generating a section
1227  * header.  To avoid confusing tools, we include an .shstrtab section
1228  * as well so all the indexes look valid.  So in this case we lay
1229  * out the core file like this:
1230  *	[ELF Header] [Section Headers] [.shstrtab] [Program headers] \
1231  *	[Notes] [data for PT_LOAD segments]
1232  *
1233  * The 'shstrtab' structure below is data for the second of the two
1234  * section headers, plus the .shstrtab itself, in one const buffer.
1235  */
1236 static const struct {
1237     Elf_Shdr	shdr;
1238     char	shstrtab[sizeof(ELF_SHSTRTAB) + 1];
1239 } shstrtab = {
1240     .shdr = {
1241 	.sh_name = 1,			/* offset in .shstrtab below */
1242 	.sh_type = SHT_STRTAB,
1243 	.sh_offset = sizeof(Elf_Ehdr) + 2*sizeof(Elf_Shdr),
1244 	.sh_size = sizeof(ELF_SHSTRTAB) + 1,
1245 	.sh_addralign = 1,
1246     },
1247     .shstrtab = "\0" ELF_SHSTRTAB,
1248 };
1249 
1250 int
1251 coredump_setup_elf(int segment_count, void *cookie)
1252 {
1253 	Elf_Ehdr ehdr;
1254 	struct writesegs_state *ws = cookie;
1255 	Elf_Phdr *note;
1256 	int error;
1257 
1258 	/* Get the count of segments, plus one for the PT_NOTE */
1259 	ws->npsections = segment_count + 1;
1260 
1261 	/* Get the size of the notes. */
1262 	error = coredump_notes_elf(ws->p, NULL, &ws->notesize);
1263 	if (error)
1264 		return error;
1265 
1266 	/* Setup the ELF header */
1267 	memset(&ehdr, 0, sizeof(ehdr));
1268 	memcpy(ehdr.e_ident, ELFMAG, SELFMAG);
1269 	ehdr.e_ident[EI_CLASS] = ELF_TARG_CLASS;
1270 	ehdr.e_ident[EI_DATA] = ELF_TARG_DATA;
1271 	ehdr.e_ident[EI_VERSION] = EV_CURRENT;
1272 	/* XXX Should be the OSABI/ABI version of the executable. */
1273 	ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV;
1274 	ehdr.e_ident[EI_ABIVERSION] = 0;
1275 	ehdr.e_type = ET_CORE;
1276 	/* XXX This should be the e_machine of the executable. */
1277 	ehdr.e_machine = ELF_TARG_MACH;
1278 	ehdr.e_version = EV_CURRENT;
1279 	ehdr.e_entry = 0;
1280 	ehdr.e_flags = 0;
1281 	ehdr.e_ehsize = sizeof(ehdr);
1282 	ehdr.e_phentsize = sizeof(Elf_Phdr);
1283 
1284 	if (ws->npsections < PN_XNUM) {
1285 		ehdr.e_phoff = sizeof(ehdr);
1286 		ehdr.e_shoff = 0;
1287 		ehdr.e_phnum = ws->npsections;
1288 		ehdr.e_shentsize = 0;
1289 		ehdr.e_shnum = 0;
1290 		ehdr.e_shstrndx = 0;
1291 	} else {
1292 		/* too many segments, use extension setup */
1293 		ehdr.e_shoff = sizeof(ehdr);
1294 		ehdr.e_phnum = PN_XNUM;
1295 		ehdr.e_shentsize = sizeof(Elf_Shdr);
1296 		ehdr.e_shnum = 2;
1297 		ehdr.e_shstrndx = 1;
1298 		ehdr.e_phoff = shstrtab.shdr.sh_offset + shstrtab.shdr.sh_size;
1299 	}
1300 
1301 	/* Write out the ELF header. */
1302 	error = coredump_write(ws->iocookie, UIO_SYSSPACE, &ehdr, sizeof(ehdr), 0);
1303 	if (error)
1304 		return error;
1305 
1306 	/*
1307 	 * If an section header is needed to store extension info, write
1308 	 * it out after the ELF header and before the program header.
1309 	 */
1310 	if (ehdr.e_shnum != 0) {
1311 		Elf_Shdr shdr = { .sh_info = ws->npsections };
1312 		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shdr,
1313 		    sizeof shdr, 0);
1314 		if (error)
1315 			return error;
1316 		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shstrtab,
1317 		    sizeof(shstrtab.shdr) + sizeof(shstrtab.shstrtab), 0);
1318 		if (error)
1319 			return error;
1320 	}
1321 
1322 	/*
1323 	 * Allocate the segment header array and setup to collect
1324 	 * the section sizes and offsets
1325 	 */
1326 	ws->psections = mallocarray(ws->npsections, sizeof(Elf_Phdr),
1327 	    M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO);
1328 	if (ws->psections == NULL)
1329 		return ENOMEM;
1330 	ws->psectionslen = ws->npsections * sizeof(Elf_Phdr);
1331 
1332 	ws->notestart = ehdr.e_phoff + ws->psectionslen;
1333 	ws->secstart = ws->notestart + ws->notesize;
1334 	ws->secoff = ws->secstart;
1335 
1336 	/* Fill in the PT_NOTE segment header in the last slot */
1337 	note = &ws->psections[ws->npsections - 1];
1338 	note->p_type = PT_NOTE;
1339 	note->p_offset = ws->notestart;
1340 	note->p_vaddr = 0;
1341 	note->p_paddr = 0;
1342 	note->p_filesz = ws->notesize;
1343 	note->p_memsz = 0;
1344 	note->p_flags = PF_R;
1345 	note->p_align = ELFROUNDSIZE;
1346 
1347 	return (0);
1348 }
1349 
1350 int
1351 coredump_walk_elf(vaddr_t start, vaddr_t realend, vaddr_t end, vm_prot_t prot,
1352     int isvnode, int nsegment, void *cookie)
1353 {
1354 	struct writesegs_state *ws = cookie;
1355 	Elf_Phdr phdr;
1356 	vsize_t size, realsize;
1357 
1358 	size = end - start;
1359 	realsize = realend - start;
1360 
1361 	phdr.p_type = PT_LOAD;
1362 	phdr.p_offset = ws->secoff;
1363 	phdr.p_vaddr = start;
1364 	phdr.p_paddr = 0;
1365 	phdr.p_filesz = realsize;
1366 	phdr.p_memsz = size;
1367 	phdr.p_flags = 0;
1368 	if (prot & PROT_READ)
1369 		phdr.p_flags |= PF_R;
1370 	if (prot & PROT_WRITE)
1371 		phdr.p_flags |= PF_W;
1372 	if (prot & PROT_EXEC)
1373 		phdr.p_flags |= PF_X;
1374 	if (isvnode)
1375 		phdr.p_flags |= PF_ISVNODE;
1376 	phdr.p_align = PAGE_SIZE;
1377 
1378 	ws->secoff += phdr.p_filesz;
1379 	ws->psections[nsegment] = phdr;
1380 
1381 	return (0);
1382 }
1383 
1384 int
1385 coredump_notes_elf(struct proc *p, void *iocookie, size_t *sizep)
1386 {
1387 	struct elfcore_procinfo cpi;
1388 	Elf_Note nhdr;
1389 	struct process *pr = p->p_p;
1390 	struct proc *q;
1391 	size_t size, notesize;
1392 	int error;
1393 
1394 	KASSERT(!P_HASSIBLING(p) || pr->ps_single != NULL);
1395 	size = 0;
1396 
1397 	/* First, write an elfcore_procinfo. */
1398 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1399 	    elfround(sizeof(cpi));
1400 	if (iocookie) {
1401 		memset(&cpi, 0, sizeof(cpi));
1402 
1403 		cpi.cpi_version = ELFCORE_PROCINFO_VERSION;
1404 		cpi.cpi_cpisize = sizeof(cpi);
1405 		cpi.cpi_signo = p->p_sisig;
1406 		cpi.cpi_sigcode = p->p_sicode;
1407 
1408 		cpi.cpi_sigpend = p->p_siglist | pr->ps_siglist;
1409 		cpi.cpi_sigmask = p->p_sigmask;
1410 		cpi.cpi_sigignore = pr->ps_sigacts->ps_sigignore;
1411 		cpi.cpi_sigcatch = pr->ps_sigacts->ps_sigcatch;
1412 
1413 		cpi.cpi_pid = pr->ps_pid;
1414 		cpi.cpi_ppid = pr->ps_ppid;
1415 		cpi.cpi_pgrp = pr->ps_pgid;
1416 		if (pr->ps_session->s_leader)
1417 			cpi.cpi_sid = pr->ps_session->s_leader->ps_pid;
1418 		else
1419 			cpi.cpi_sid = 0;
1420 
1421 		cpi.cpi_ruid = p->p_ucred->cr_ruid;
1422 		cpi.cpi_euid = p->p_ucred->cr_uid;
1423 		cpi.cpi_svuid = p->p_ucred->cr_svuid;
1424 
1425 		cpi.cpi_rgid = p->p_ucred->cr_rgid;
1426 		cpi.cpi_egid = p->p_ucred->cr_gid;
1427 		cpi.cpi_svgid = p->p_ucred->cr_svgid;
1428 
1429 		(void)strlcpy(cpi.cpi_name, pr->ps_comm, sizeof(cpi.cpi_name));
1430 
1431 		nhdr.namesz = sizeof("OpenBSD");
1432 		nhdr.descsz = sizeof(cpi);
1433 		nhdr.type = NT_OPENBSD_PROCINFO;
1434 
1435 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1436 		    "OpenBSD", &cpi);
1437 		if (error)
1438 			return (error);
1439 	}
1440 	size += notesize;
1441 
1442 	/* Second, write an NT_OPENBSD_AUXV note. */
1443 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1444 	    elfround(ELF_AUX_WORDS * sizeof(char *));
1445 	if (iocookie && pr->ps_auxinfo) {
1446 
1447 		nhdr.namesz = sizeof("OpenBSD");
1448 		nhdr.descsz = ELF_AUX_WORDS * sizeof(char *);
1449 		nhdr.type = NT_OPENBSD_AUXV;
1450 
1451 		error = coredump_write(iocookie, UIO_SYSSPACE,
1452 		    &nhdr, sizeof(nhdr), 0);
1453 		if (error)
1454 			return (error);
1455 
1456 		error = coredump_write(iocookie, UIO_SYSSPACE,
1457 		    "OpenBSD", elfround(nhdr.namesz), 0);
1458 		if (error)
1459 			return (error);
1460 
1461 		error = coredump_write(iocookie, UIO_USERSPACE,
1462 		    (caddr_t)pr->ps_auxinfo, nhdr.descsz, 0);
1463 		if (error)
1464 			return (error);
1465 	}
1466 	size += notesize;
1467 
1468 #ifdef PT_WCOOKIE
1469 	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1470 	    elfround(sizeof(register_t));
1471 	if (iocookie) {
1472 		register_t wcookie;
1473 
1474 		nhdr.namesz = sizeof("OpenBSD");
1475 		nhdr.descsz = sizeof(register_t);
1476 		nhdr.type = NT_OPENBSD_WCOOKIE;
1477 
1478 		wcookie = process_get_wcookie(p);
1479 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1480 		    "OpenBSD", &wcookie);
1481 		if (error)
1482 			return (error);
1483 	}
1484 	size += notesize;
1485 #endif
1486 
1487 	/*
1488 	 * Now write the register info for the thread that caused the
1489 	 * coredump.
1490 	 */
1491 	error = coredump_note_elf(p, iocookie, &notesize);
1492 	if (error)
1493 		return (error);
1494 	size += notesize;
1495 
1496 	/*
1497 	 * Now, for each thread, write the register info and any other
1498 	 * per-thread notes.  Since we're dumping core, all the other
1499 	 * threads in the process have been stopped and the list can't
1500 	 * change.
1501 	 */
1502 	TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
1503 		if (q == p)		/* we've taken care of this thread */
1504 			continue;
1505 		error = coredump_note_elf(q, iocookie, &notesize);
1506 		if (error)
1507 			return (error);
1508 		size += notesize;
1509 	}
1510 
1511 	*sizep = size;
1512 	return (0);
1513 }
1514 
1515 int
1516 coredump_note_elf(struct proc *p, void *iocookie, size_t *sizep)
1517 {
1518 	Elf_Note nhdr;
1519 	int size, notesize, error;
1520 	int namesize;
1521 	char name[64+ELFROUNDSIZE];
1522 	struct reg intreg;
1523 #ifdef PT_GETFPREGS
1524 	struct fpreg freg;
1525 #endif
1526 #ifdef PT_PACMASK
1527 	register_t pacmask[2];
1528 #endif
1529 
1530 	size = 0;
1531 
1532 	snprintf(name, sizeof(name)-ELFROUNDSIZE, "%s@%d",
1533 	    "OpenBSD", p->p_tid + THREAD_PID_OFFSET);
1534 	namesize = strlen(name) + 1;
1535 	memset(name + namesize, 0, elfround(namesize) - namesize);
1536 
1537 	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(intreg));
1538 	if (iocookie) {
1539 		error = process_read_regs(p, &intreg);
1540 		if (error)
1541 			return (error);
1542 
1543 		nhdr.namesz = namesize;
1544 		nhdr.descsz = sizeof(intreg);
1545 		nhdr.type = NT_OPENBSD_REGS;
1546 
1547 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1548 		    name, &intreg);
1549 		if (error)
1550 			return (error);
1551 
1552 	}
1553 	size += notesize;
1554 
1555 #ifdef PT_GETFPREGS
1556 	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(freg));
1557 	if (iocookie) {
1558 		error = process_read_fpregs(p, &freg);
1559 		if (error)
1560 			return (error);
1561 
1562 		nhdr.namesz = namesize;
1563 		nhdr.descsz = sizeof(freg);
1564 		nhdr.type = NT_OPENBSD_FPREGS;
1565 
1566 		error = coredump_writenote_elf(p, iocookie, &nhdr, name, &freg);
1567 		if (error)
1568 			return (error);
1569 	}
1570 	size += notesize;
1571 #endif
1572 
1573 #ifdef PT_PACMASK
1574 	notesize = sizeof(nhdr) + elfround(namesize) +
1575 	    elfround(sizeof(pacmask));
1576 	if (iocookie) {
1577 		pacmask[0] = pacmask[1] = process_get_pacmask(p);
1578 
1579 		nhdr.namesz = namesize;
1580 		nhdr.descsz = sizeof(pacmask);
1581 		nhdr.type = NT_OPENBSD_PACMASK;
1582 
1583 		error = coredump_writenote_elf(p, iocookie, &nhdr,
1584 		    name, &pacmask);
1585 		if (error)
1586 			return (error);
1587 	}
1588 	size += notesize;
1589 #endif
1590 
1591 	*sizep = size;
1592 	/* XXX Add hook for machdep per-LWP notes. */
1593 	return (0);
1594 }
1595 
1596 int
1597 coredump_writenote_elf(struct proc *p, void *cookie, Elf_Note *nhdr,
1598     const char *name, void *data)
1599 {
1600 	int error;
1601 
1602 	error = coredump_write(cookie, UIO_SYSSPACE, nhdr, sizeof(*nhdr), 0);
1603 	if (error)
1604 		return error;
1605 
1606 	error = coredump_write(cookie, UIO_SYSSPACE, name,
1607 	    elfround(nhdr->namesz), 0);
1608 	if (error)
1609 		return error;
1610 
1611 	return coredump_write(cookie, UIO_SYSSPACE, data, nhdr->descsz, 0);
1612 }
1613 #endif /* !SMALL_KERNEL */
1614