xref: /netbsd-src/sys/kern/kern_exec.c (revision aaf4ece63a859a04e37cf3a7229b5fab0157cc06)
1 /*	$NetBSD: kern_exec.c,v 1.212 2005/12/11 12:24:29 christos Exp $	*/
2 
3 /*-
4  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
5  * Copyright (C) 1992 Wolfgang Solfrank.
6  * Copyright (C) 1992 TooLs GmbH.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by TooLs GmbH.
20  * 4. The name of TooLs GmbH may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
29  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
30  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
31  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
32  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.212 2005/12/11 12:24:29 christos Exp $");
37 
38 #include "opt_ktrace.h"
39 #include "opt_syscall_debug.h"
40 #include "opt_compat_netbsd.h"
41 #include "opt_verified_exec.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/filedesc.h>
46 #include <sys/kernel.h>
47 #include <sys/proc.h>
48 #include <sys/mount.h>
49 #include <sys/malloc.h>
50 #include <sys/namei.h>
51 #include <sys/vnode.h>
52 #include <sys/file.h>
53 #include <sys/acct.h>
54 #include <sys/exec.h>
55 #include <sys/ktrace.h>
56 #include <sys/resourcevar.h>
57 #include <sys/wait.h>
58 #include <sys/mman.h>
59 #include <sys/ras.h>
60 #include <sys/signalvar.h>
61 #include <sys/stat.h>
62 #include <sys/syscall.h>
63 
64 #include <sys/sa.h>
65 #include <sys/savar.h>
66 #include <sys/syscallargs.h>
67 #ifdef VERIFIED_EXEC
68 #include <sys/verified_exec.h>
69 #endif
70 
71 #ifdef SYSTRACE
72 #include <sys/systrace.h>
73 #endif /* SYSTRACE */
74 
75 #include <uvm/uvm_extern.h>
76 
77 #include <machine/cpu.h>
78 #include <machine/reg.h>
79 
80 static int exec_sigcode_map(struct proc *, const struct emul *);
81 
82 #ifdef DEBUG_EXEC
83 #define DPRINTF(a) uprintf a
84 #else
85 #define DPRINTF(a)
86 #endif /* DEBUG_EXEC */
87 
88 MALLOC_DEFINE(M_EXEC, "exec", "argument lists & other mem used by exec");
89 
90 /*
91  * Exec function switch:
92  *
93  * Note that each makecmds function is responsible for loading the
94  * exec package with the necessary functions for any exec-type-specific
95  * handling.
96  *
97  * Functions for specific exec types should be defined in their own
98  * header file.
99  */
100 extern const struct execsw	execsw_builtin[];
101 extern int			nexecs_builtin;
102 static const struct execsw	**execsw = NULL;
103 static int			nexecs;
104 
105 u_int	exec_maxhdrsz;		/* must not be static - netbsd32 needs it */
106 
107 #ifdef LKM
108 /* list of supported emulations */
109 static
110 LIST_HEAD(emlist_head, emul_entry) el_head = LIST_HEAD_INITIALIZER(el_head);
111 struct emul_entry {
112 	LIST_ENTRY(emul_entry)	el_list;
113 	const struct emul	*el_emul;
114 	int			ro_entry;
115 };
116 
117 /* list of dynamically loaded execsw entries */
118 static
119 LIST_HEAD(execlist_head, exec_entry) ex_head = LIST_HEAD_INITIALIZER(ex_head);
120 struct exec_entry {
121 	LIST_ENTRY(exec_entry)	ex_list;
122 	const struct execsw	*es;
123 };
124 
125 /* structure used for building execw[] */
126 struct execsw_entry {
127 	struct execsw_entry	*next;
128 	const struct execsw	*es;
129 };
130 #endif /* LKM */
131 
132 #ifdef SYSCALL_DEBUG
133 extern const char * const syscallnames[];
134 #endif
135 
136 #ifdef COMPAT_16
137 extern char	sigcode[], esigcode[];
138 struct uvm_object *emul_netbsd_object;
139 #endif
140 
141 #ifndef __HAVE_SYSCALL_INTERN
142 void	syscall(void);
143 #endif
144 
145 /* NetBSD emul struct */
146 const struct emul emul_netbsd = {
147 	"netbsd",
148 	NULL,		/* emulation path */
149 #ifndef __HAVE_MINIMAL_EMUL
150 	EMUL_HAS_SYS___syscall,
151 	NULL,
152 	SYS_syscall,
153 	SYS_NSYSENT,
154 #endif
155 	sysent,
156 #ifdef SYSCALL_DEBUG
157 	syscallnames,
158 #else
159 	NULL,
160 #endif
161 	sendsig,
162 	trapsignal,
163 	NULL,
164 #ifdef COMPAT_16
165 	sigcode,
166 	esigcode,
167 	&emul_netbsd_object,
168 #else
169 	NULL,
170 	NULL,
171 	NULL,
172 #endif
173 	setregs,
174 	NULL,
175 	NULL,
176 	NULL,
177 	NULL,
178 	NULL,
179 #ifdef __HAVE_SYSCALL_INTERN
180 	syscall_intern,
181 #else
182 	syscall,
183 #endif
184 	NULL,
185 	NULL,
186 
187 	uvm_default_mapaddr,
188 };
189 
190 #ifdef LKM
191 /*
192  * Exec lock. Used to control access to execsw[] structures.
193  * This must not be static so that netbsd32 can access it, too.
194  */
195 struct lock exec_lock;
196 
197 static void link_es(struct execsw_entry **, const struct execsw *);
198 #endif /* LKM */
199 
200 /*
201  * check exec:
202  * given an "executable" described in the exec package's namei info,
203  * see what we can do with it.
204  *
205  * ON ENTRY:
206  *	exec package with appropriate namei info
207  *	lwp pointer of exec'ing lwp
208  *      if verified exec enabled then flag indicating a direct exec or
209  *        an indirect exec (i.e. for a shell script interpreter)
210  *	NO SELF-LOCKED VNODES
211  *
212  * ON EXIT:
213  *	error:	nothing held, etc.  exec header still allocated.
214  *	ok:	filled exec package, executable's vnode (unlocked).
215  *
216  * EXEC SWITCH ENTRY:
217  * 	Locked vnode to check, exec package, proc.
218  *
219  * EXEC SWITCH EXIT:
220  *	ok:	return 0, filled exec package, executable's vnode (unlocked).
221  *	error:	destructive:
222  *			everything deallocated execept exec header.
223  *		non-destructive:
224  *			error code, executable's vnode (unlocked),
225  *			exec header unmodified.
226  */
227 int
228 /*ARGSUSED*/
229 check_exec(struct lwp *l, struct exec_package *epp, int flag)
230 {
231 	int		error, i;
232 	struct vnode	*vp;
233 	struct nameidata *ndp;
234 	size_t		resid;
235 	struct proc	*p;
236 
237 	p = l->l_proc;
238 	ndp = epp->ep_ndp;
239 	ndp->ni_cnd.cn_nameiop = LOOKUP;
240 	ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME;
241 	/* first get the vnode */
242 	if ((error = namei(ndp)) != 0)
243 		return error;
244 	epp->ep_vp = vp = ndp->ni_vp;
245 
246 	/* check access and type */
247 	if (vp->v_type != VREG) {
248 		error = EACCES;
249 		goto bad1;
250 	}
251 	if ((error = VOP_ACCESS(vp, VEXEC, p->p_ucred, l)) != 0)
252 		goto bad1;
253 
254 	/* get attributes */
255 	if ((error = VOP_GETATTR(vp, epp->ep_vap, p->p_ucred, l)) != 0)
256 		goto bad1;
257 
258 	/* Check mount point */
259 	if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
260 		error = EACCES;
261 		goto bad1;
262 	}
263 	if (vp->v_mount->mnt_flag & MNT_NOSUID)
264 		epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
265 
266 	/* try to open it */
267 	if ((error = VOP_OPEN(vp, FREAD, p->p_ucred, l)) != 0)
268 		goto bad1;
269 
270 	/* unlock vp, since we need it unlocked from here on out. */
271 	VOP_UNLOCK(vp, 0);
272 
273 
274 #ifdef VERIFIED_EXEC
275         if ((error = veriexec_verify(l, vp, epp->ep_vap, epp->ep_ndp->ni_dirp,
276 				     flag, NULL)) != 0)
277                 goto bad2;
278 #endif
279 
280 	/* now we have the file, get the exec header */
281 	uvn_attach(vp, VM_PROT_READ);
282 	error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
283 			UIO_SYSSPACE, 0, p->p_ucred, &resid, NULL);
284 	if (error)
285 		goto bad2;
286 	epp->ep_hdrvalid = epp->ep_hdrlen - resid;
287 
288 	/*
289 	 * Set up default address space limits.  Can be overridden
290 	 * by individual exec packages.
291 	 *
292 	 * XXX probably should be all done in the exec pakages.
293 	 */
294 	epp->ep_vm_minaddr = VM_MIN_ADDRESS;
295 	epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
296 	/*
297 	 * set up the vmcmds for creation of the process
298 	 * address space
299 	 */
300 	error = ENOEXEC;
301 	for (i = 0; i < nexecs && error != 0; i++) {
302 		int newerror;
303 
304 		epp->ep_esch = execsw[i];
305 		newerror = (*execsw[i]->es_makecmds)(l, epp);
306 		/* make sure the first "interesting" error code is saved. */
307 		if (!newerror || error == ENOEXEC)
308 			error = newerror;
309 
310 		/* if es_makecmds call was successful, update epp->ep_es */
311 		if (!newerror && (epp->ep_flags & EXEC_HASES) == 0)
312 			epp->ep_es = execsw[i];
313 
314 		if (epp->ep_flags & EXEC_DESTR && error != 0)
315 			return error;
316 	}
317 	if (!error) {
318 		/* check that entry point is sane */
319 		if (epp->ep_entry > VM_MAXUSER_ADDRESS)
320 			error = ENOEXEC;
321 
322 		/* check limits */
323 		if ((epp->ep_tsize > MAXTSIZ) ||
324 		    (epp->ep_dsize >
325 		     (u_quad_t)p->p_rlimit[RLIMIT_DATA].rlim_cur))
326 			error = ENOMEM;
327 
328 		if (!error)
329 			return (0);
330 	}
331 
332 	/*
333 	 * free any vmspace-creation commands,
334 	 * and release their references
335 	 */
336 	kill_vmcmds(&epp->ep_vmcmds);
337 
338 bad2:
339 	/*
340 	 * close and release the vnode, restore the old one, free the
341 	 * pathname buf, and punt.
342 	 */
343 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
344 	VOP_CLOSE(vp, FREAD, p->p_ucred, l);
345 	vput(vp);
346 	PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
347 	return error;
348 
349 bad1:
350 	/*
351 	 * free the namei pathname buffer, and put the vnode
352 	 * (which we don't yet have open).
353 	 */
354 	vput(vp);				/* was still locked */
355 	PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
356 	return error;
357 }
358 
359 #ifdef __MACHINE_STACK_GROWS_UP
360 #define STACK_PTHREADSPACE NBPG
361 #else
362 #define STACK_PTHREADSPACE 0
363 #endif
364 
365 static int
366 execve_fetch_element(char * const *array, size_t index, char **value)
367 {
368 	return copyin(array + index, value, sizeof(*value));
369 }
370 
371 /*
372  * exec system call
373  */
374 /* ARGSUSED */
375 int
376 sys_execve(struct lwp *l, void *v, register_t *retval)
377 {
378 	struct sys_execve_args /* {
379 		syscallarg(const char *)	path;
380 		syscallarg(char * const *)	argp;
381 		syscallarg(char * const *)	envp;
382 	} */ *uap = v;
383 
384 	return execve1(l, SCARG(uap, path), SCARG(uap, argp),
385 	    SCARG(uap, envp), execve_fetch_element);
386 }
387 
388 int
389 execve1(struct lwp *l, const char *path, char * const *args,
390     char * const *envs, execve_fetch_element_t fetch_element)
391 {
392 	int			error;
393 	u_int			i;
394 	struct exec_package	pack;
395 	struct nameidata	nid;
396 	struct vattr		attr;
397 	struct proc		*p;
398 	struct ucred		*cred;
399 	char			*argp;
400 	char			*dp, *sp;
401 	long			argc, envc;
402 	size_t			len;
403 	char			*stack;
404 	struct ps_strings	arginfo;
405 	struct vmspace		*vm;
406 	char			**tmpfap;
407 	int			szsigcode;
408 	struct exec_vmcmd	*base_vcp;
409 	int			oldlwpflags;
410 #ifdef SYSTRACE
411 	int			wassugid = ISSET(p->p_flag, P_SUGID);
412 	char			pathbuf[MAXPATHLEN];
413 	size_t			pathbuflen;
414 #endif /* SYSTRACE */
415 
416 	/* Disable scheduler activation upcalls. */
417 	oldlwpflags = l->l_flag & (L_SA | L_SA_UPCALL);
418 	if (l->l_flag & L_SA)
419 		l->l_flag &= ~(L_SA | L_SA_UPCALL);
420 
421 	p = l->l_proc;
422 	/*
423 	 * Lock the process and set the P_INEXEC flag to indicate that
424 	 * it should be left alone until we're done here.  This is
425 	 * necessary to avoid race conditions - e.g. in ptrace() -
426 	 * that might allow a local user to illicitly obtain elevated
427 	 * privileges.
428 	 */
429 	p->p_flag |= P_INEXEC;
430 
431 	cred = p->p_ucred;
432 	base_vcp = NULL;
433 	/*
434 	 * Init the namei data to point the file user's program name.
435 	 * This is done here rather than in check_exec(), so that it's
436 	 * possible to override this settings if any of makecmd/probe
437 	 * functions call check_exec() recursively - for example,
438 	 * see exec_script_makecmds().
439 	 */
440 #ifdef SYSTRACE
441 	if (ISSET(p->p_flag, P_SYSTRACE))
442 		systrace_execve0(p);
443 
444 	error = copyinstr(path, pathbuf, sizeof(pathbuf),
445 			  &pathbuflen);
446 	if (error)
447 		goto clrflg;
448 
449 	NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, l);
450 #else
451 	NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_USERSPACE, path, l);
452 #endif /* SYSTRACE */
453 
454 	/*
455 	 * initialize the fields of the exec package.
456 	 */
457 #ifdef SYSTRACE
458 	pack.ep_name = pathbuf;
459 #else
460 	pack.ep_name = path;
461 #endif /* SYSTRACE */
462 	pack.ep_hdr = malloc(exec_maxhdrsz, M_EXEC, M_WAITOK);
463 	pack.ep_hdrlen = exec_maxhdrsz;
464 	pack.ep_hdrvalid = 0;
465 	pack.ep_ndp = &nid;
466 	pack.ep_emul_arg = NULL;
467 	pack.ep_vmcmds.evs_cnt = 0;
468 	pack.ep_vmcmds.evs_used = 0;
469 	pack.ep_vap = &attr;
470 	pack.ep_flags = 0;
471 
472 #ifdef LKM
473 	lockmgr(&exec_lock, LK_SHARED, NULL);
474 #endif
475 
476 	/* see if we can run it. */
477 #ifdef VERIFIED_EXEC
478         if ((error = check_exec(l, &pack, VERIEXEC_DIRECT)) != 0)
479 #else
480         if ((error = check_exec(l, &pack, 0)) != 0)
481 #endif
482 		goto freehdr;
483 
484 	/* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
485 
486 	/* allocate an argument buffer */
487 	argp = (char *) uvm_km_alloc(exec_map, NCARGS, 0,
488 	    UVM_KMF_PAGEABLE|UVM_KMF_WAITVA);
489 #ifdef DIAGNOSTIC
490 	if (argp == NULL)
491 		panic("execve: argp == NULL");
492 #endif
493 	dp = argp;
494 	argc = 0;
495 
496 	/* copy the fake args list, if there's one, freeing it as we go */
497 	if (pack.ep_flags & EXEC_HASARGL) {
498 		tmpfap = pack.ep_fa;
499 		while (*tmpfap != NULL) {
500 			char *cp;
501 
502 			cp = *tmpfap;
503 			while (*cp)
504 				*dp++ = *cp++;
505 			dp++;
506 
507 			FREE(*tmpfap, M_EXEC);
508 			tmpfap++; argc++;
509 		}
510 		FREE(pack.ep_fa, M_EXEC);
511 		pack.ep_flags &= ~EXEC_HASARGL;
512 	}
513 
514 	/* Now get argv & environment */
515 	if (args == NULL) {
516 		error = EINVAL;
517 		goto bad;
518 	}
519 	/* 'i' will index the argp/envp element to be retrieved */
520 	i = 0;
521 	if (pack.ep_flags & EXEC_SKIPARG)
522 		i++;
523 
524 	while (1) {
525 		len = argp + ARG_MAX - dp;
526 		if ((error = (*fetch_element)(args, i, &sp)) != 0)
527 			goto bad;
528 		if (!sp)
529 			break;
530 		if ((error = copyinstr(sp, dp, len, &len)) != 0) {
531 			if (error == ENAMETOOLONG)
532 				error = E2BIG;
533 			goto bad;
534 		}
535 #ifdef KTRACE
536 		if (KTRPOINT(p, KTR_EXEC_ARG))
537 			ktrkmem(l, KTR_EXEC_ARG, dp, len - 1);
538 #endif
539 		dp += len;
540 		i++;
541 		argc++;
542 	}
543 
544 	envc = 0;
545 	/* environment need not be there */
546 	if (envs != NULL) {
547 		i = 0;
548 		while (1) {
549 			len = argp + ARG_MAX - dp;
550 			if ((error = (*fetch_element)(envs, i, &sp)) != 0)
551 				goto bad;
552 			if (!sp)
553 				break;
554 			if ((error = copyinstr(sp, dp, len, &len)) != 0) {
555 				if (error == ENAMETOOLONG)
556 					error = E2BIG;
557 				goto bad;
558 			}
559 #ifdef KTRACE
560 			if (KTRPOINT(p, KTR_EXEC_ENV))
561 				ktrkmem(l, KTR_EXEC_ENV, dp, len - 1);
562 #endif
563 			dp += len;
564 			i++;
565 			envc++;
566 		}
567 	}
568 
569 	dp = (char *) ALIGN(dp);
570 
571 	szsigcode = pack.ep_es->es_emul->e_esigcode -
572 	    pack.ep_es->es_emul->e_sigcode;
573 
574 	/* Now check if args & environ fit into new stack */
575 	if (pack.ep_flags & EXEC_32)
576 		len = ((argc + envc + 2 + pack.ep_es->es_arglen) *
577 		    sizeof(int) + sizeof(int) + dp + STACKGAPLEN +
578 		    szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
579 		    - argp;
580 	else
581 		len = ((argc + envc + 2 + pack.ep_es->es_arglen) *
582 		    sizeof(char *) + sizeof(int) + dp + STACKGAPLEN +
583 		    szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
584 		    - argp;
585 
586 	len = ALIGN(len);	/* make the stack "safely" aligned */
587 
588 	if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
589 		error = ENOMEM;
590 		goto bad;
591 	}
592 
593 	/* Get rid of other LWPs/ */
594 	p->p_flag |= P_WEXIT; /* XXX hack. lwp-exit stuff wants to see it. */
595 	exit_lwps(l);
596 	p->p_flag &= ~P_WEXIT;
597 	KDASSERT(p->p_nlwps == 1);
598 
599 	/* This is now LWP 1 */
600 	l->l_lid = 1;
601 	p->p_nlwpid = 1;
602 
603 	/* Release any SA state. */
604 	if (p->p_sa)
605 		sa_release(p);
606 
607 	/* Remove POSIX timers */
608 	timers_free(p, TIMERS_POSIX);
609 
610 	/* adjust "active stack depth" for process VSZ */
611 	pack.ep_ssize = len;	/* maybe should go elsewhere, but... */
612 
613 	/*
614 	 * Do whatever is necessary to prepare the address space
615 	 * for remapping.  Note that this might replace the current
616 	 * vmspace with another!
617 	 */
618 	uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
619 
620 	/* record proc's vnode, for use by procfs and others */
621         if (p->p_textvp)
622                 vrele(p->p_textvp);
623 	VREF(pack.ep_vp);
624 	p->p_textvp = pack.ep_vp;
625 
626 	/* Now map address space */
627 	vm = p->p_vmspace;
628 	vm->vm_taddr = (caddr_t) pack.ep_taddr;
629 	vm->vm_tsize = btoc(pack.ep_tsize);
630 	vm->vm_daddr = (caddr_t) pack.ep_daddr;
631 	vm->vm_dsize = btoc(pack.ep_dsize);
632 	vm->vm_ssize = btoc(pack.ep_ssize);
633 	vm->vm_maxsaddr = (caddr_t) pack.ep_maxsaddr;
634 	vm->vm_minsaddr = (caddr_t) pack.ep_minsaddr;
635 
636 	/* create the new process's VM space by running the vmcmds */
637 #ifdef DIAGNOSTIC
638 	if (pack.ep_vmcmds.evs_used == 0)
639 		panic("execve: no vmcmds");
640 #endif
641 	for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
642 		struct exec_vmcmd *vcp;
643 
644 		vcp = &pack.ep_vmcmds.evs_cmds[i];
645 		if (vcp->ev_flags & VMCMD_RELATIVE) {
646 #ifdef DIAGNOSTIC
647 			if (base_vcp == NULL)
648 				panic("execve: relative vmcmd with no base");
649 			if (vcp->ev_flags & VMCMD_BASE)
650 				panic("execve: illegal base & relative vmcmd");
651 #endif
652 			vcp->ev_addr += base_vcp->ev_addr;
653 		}
654 		error = (*vcp->ev_proc)(l, vcp);
655 #ifdef DEBUG_EXEC
656 		if (error) {
657 			int j;
658 			struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
659 			for (j = 0; j <= i; j++)
660 				uprintf(
661 			    "vmcmd[%d] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
662 				    j, vp[j].ev_addr, vp[j].ev_len,
663 				    vp[j].ev_offset, vp[j].ev_prot,
664 				    vp[j].ev_flags);
665 		}
666 #endif /* DEBUG_EXEC */
667 		if (vcp->ev_flags & VMCMD_BASE)
668 			base_vcp = vcp;
669 	}
670 
671 	/* free the vmspace-creation commands, and release their references */
672 	kill_vmcmds(&pack.ep_vmcmds);
673 
674 	vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
675 	VOP_CLOSE(pack.ep_vp, FREAD, cred, l);
676 	vput(pack.ep_vp);
677 
678 	/* if an error happened, deallocate and punt */
679 	if (error) {
680 		DPRINTF(("execve: vmcmd %i failed: %d\n", i - 1, error));
681 		goto exec_abort;
682 	}
683 
684 	/* remember information about the process */
685 	arginfo.ps_nargvstr = argc;
686 	arginfo.ps_nenvstr = envc;
687 
688 	stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
689 		STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
690 		len - (sizeof(struct ps_strings) + szsigcode));
691 #ifdef __MACHINE_STACK_GROWS_UP
692 	/*
693 	 * The copyargs call always copies into lower addresses
694 	 * first, moving towards higher addresses, starting with
695 	 * the stack pointer that we give.  When the stack grows
696 	 * down, this puts argc/argv/envp very shallow on the
697 	 * stack, right at the first user stack pointer, and puts
698 	 * STACKGAPLEN very deep in the stack.  When the stack
699 	 * grows up, the situation is reversed.
700 	 *
701 	 * Normally, this is no big deal.  But the ld_elf.so _rtld()
702 	 * function expects to be called with a single pointer to
703 	 * a region that has a few words it can stash values into,
704 	 * followed by argc/argv/envp.  When the stack grows down,
705 	 * it's easy to decrement the stack pointer a little bit to
706 	 * allocate the space for these few words and pass the new
707 	 * stack pointer to _rtld.  When the stack grows up, however,
708 	 * a few words before argc is part of the signal trampoline, XXX
709 	 * so we have a problem.
710 	 *
711 	 * Instead of changing how _rtld works, we take the easy way
712 	 * out and steal 32 bytes before we call copyargs.  This
713 	 * space is effectively stolen from STACKGAPLEN.
714 	 */
715 	stack += 32;
716 #endif /* __MACHINE_STACK_GROWS_UP */
717 
718 	/* Now copy argc, args & environ to new stack */
719 	error = (*pack.ep_es->es_copyargs)(l, &pack, &arginfo, &stack, argp);
720 	if (error) {
721 		DPRINTF(("execve: copyargs failed %d\n", error));
722 		goto exec_abort;
723 	}
724 	/* Move the stack back to original point */
725 	stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
726 
727 	/* fill process ps_strings info */
728 	p->p_psstr = (struct ps_strings *)
729 	    STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
730 	    sizeof(struct ps_strings));
731 	p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
732 	p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
733 	p->p_psenv = offsetof(struct ps_strings, ps_envstr);
734 	p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
735 
736 	/* copy out the process's ps_strings structure */
737 	if ((error = copyout(&arginfo, (char *)p->p_psstr,
738 	    sizeof(arginfo))) != 0) {
739 		DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
740 		       &arginfo, (char *)p->p_psstr, (long)sizeof(arginfo)));
741 		goto exec_abort;
742 	}
743 
744 	stopprofclock(p);	/* stop profiling */
745 	fdcloseexec(l);		/* handle close on exec */
746 	execsigs(p);		/* reset catched signals */
747 
748 	l->l_ctxlink = NULL;	/* reset ucontext link */
749 
750 	/* set command name & other accounting info */
751 	len = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
752 	memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, len);
753 	p->p_comm[len] = 0;
754 	p->p_acflag &= ~AFORK;
755 
756 	p->p_flag |= P_EXEC;
757 	if (p->p_flag & P_PPWAIT) {
758 		p->p_flag &= ~P_PPWAIT;
759 		wakeup((caddr_t) p->p_pptr);
760 	}
761 
762 	/*
763 	 * deal with set[ug]id.
764 	 * MNT_NOSUID has already been used to disable s[ug]id.
765 	 */
766 	if ((p->p_flag & P_TRACED) == 0 &&
767 
768 	    (((attr.va_mode & S_ISUID) != 0 &&
769 	      p->p_ucred->cr_uid != attr.va_uid) ||
770 
771 	     ((attr.va_mode & S_ISGID) != 0 &&
772 	      p->p_ucred->cr_gid != attr.va_gid))) {
773 		/*
774 		 * Mark the process as SUGID before we do
775 		 * anything that might block.
776 		 */
777 		p_sugid(p);
778 
779 		/* Make sure file descriptors 0..2 are in use. */
780 		if ((error = fdcheckstd(l)) != 0) {
781 			DPRINTF(("execve: fdcheckstd failed %d\n", error));
782 			goto exec_abort;
783 		}
784 
785 		p->p_ucred = crcopy(cred);
786 #ifdef KTRACE
787 		/*
788 		 * If process is being ktraced, turn off - unless
789 		 * root set it.
790 		 */
791 		if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT))
792 			ktrderef(p);
793 #endif
794 		if (attr.va_mode & S_ISUID)
795 			p->p_ucred->cr_uid = attr.va_uid;
796 		if (attr.va_mode & S_ISGID)
797 			p->p_ucred->cr_gid = attr.va_gid;
798 	} else {
799 		if (p->p_ucred->cr_uid == p->p_cred->p_ruid &&
800 		    p->p_ucred->cr_gid == p->p_cred->p_rgid)
801 			p->p_flag &= ~P_SUGID;
802 	}
803 	p->p_cred->p_svuid = p->p_ucred->cr_uid;
804 	p->p_cred->p_svgid = p->p_ucred->cr_gid;
805 
806 #if defined(__HAVE_RAS)
807 	/*
808 	 * Remove all RASs from the address space.
809 	 */
810 	ras_purgeall(p);
811 #endif
812 
813 	doexechooks(p);
814 
815 	uvm_km_free(exec_map, (vaddr_t) argp, NCARGS, UVM_KMF_PAGEABLE);
816 
817 	PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
818 
819 	/* notify others that we exec'd */
820 	KNOTE(&p->p_klist, NOTE_EXEC);
821 
822 	/* setup new registers and do misc. setup. */
823 	(*pack.ep_es->es_emul->e_setregs)(l, &pack, (u_long) stack);
824 	if (pack.ep_es->es_setregs)
825 		(*pack.ep_es->es_setregs)(l, &pack, (u_long) stack);
826 
827 	/* map the process's signal trampoline code */
828 	if (exec_sigcode_map(p, pack.ep_es->es_emul)) {
829 		DPRINTF(("execve: map sigcode failed %d\n", error));
830 		goto exec_abort;
831 	}
832 
833 	if (p->p_flag & P_TRACED)
834 		psignal(p, SIGTRAP);
835 
836 	free(pack.ep_hdr, M_EXEC);
837 
838 	/*
839 	 * Call emulation specific exec hook. This can setup per-process
840 	 * p->p_emuldata or do any other per-process stuff an emulation needs.
841 	 *
842 	 * If we are executing process of different emulation than the
843 	 * original forked process, call e_proc_exit() of the old emulation
844 	 * first, then e_proc_exec() of new emulation. If the emulation is
845 	 * same, the exec hook code should deallocate any old emulation
846 	 * resources held previously by this process.
847 	 */
848 	if (p->p_emul && p->p_emul->e_proc_exit
849 	    && p->p_emul != pack.ep_es->es_emul)
850 		(*p->p_emul->e_proc_exit)(p);
851 
852 	/*
853 	 * Call exec hook. Emulation code may NOT store reference to anything
854 	 * from &pack.
855 	 */
856         if (pack.ep_es->es_emul->e_proc_exec)
857                 (*pack.ep_es->es_emul->e_proc_exec)(p, &pack);
858 
859 	/* update p_emul, the old value is no longer needed */
860 	p->p_emul = pack.ep_es->es_emul;
861 
862 	/* ...and the same for p_execsw */
863 	p->p_execsw = pack.ep_es;
864 
865 #ifdef __HAVE_SYSCALL_INTERN
866 	(*p->p_emul->e_syscall_intern)(p);
867 #endif
868 #ifdef KTRACE
869 	if (KTRPOINT(p, KTR_EMUL))
870 		ktremul(l);
871 #endif
872 
873 #ifdef LKM
874 	lockmgr(&exec_lock, LK_RELEASE, NULL);
875 #endif
876 	p->p_flag &= ~P_INEXEC;
877 
878 	if (p->p_flag & P_STOPEXEC) {
879 		int s;
880 
881 		sigminusset(&contsigmask, &p->p_sigctx.ps_siglist);
882 		SCHED_LOCK(s);
883 		p->p_pptr->p_nstopchild++;
884 		p->p_stat = SSTOP;
885 		l->l_stat = LSSTOP;
886 		p->p_nrlwps--;
887 		mi_switch(l, NULL);
888 		SCHED_ASSERT_UNLOCKED();
889 		splx(s);
890 	}
891 
892 #ifdef SYSTRACE
893 	if (ISSET(p->p_flag, P_SYSTRACE) &&
894 	    wassugid && !ISSET(p->p_flag, P_SUGID))
895 		systrace_execve1(pathbuf, p);
896 #endif /* SYSTRACE */
897 
898 	return (EJUSTRETURN);
899 
900  bad:
901 	p->p_flag &= ~P_INEXEC;
902 	/* free the vmspace-creation commands, and release their references */
903 	kill_vmcmds(&pack.ep_vmcmds);
904 	/* kill any opened file descriptor, if necessary */
905 	if (pack.ep_flags & EXEC_HASFD) {
906 		pack.ep_flags &= ~EXEC_HASFD;
907 		(void) fdrelease(l, pack.ep_fd);
908 	}
909 	/* close and put the exec'd file */
910 	vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
911 	VOP_CLOSE(pack.ep_vp, FREAD, cred, l);
912 	vput(pack.ep_vp);
913 	PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
914 	uvm_km_free(exec_map, (vaddr_t) argp, NCARGS, UVM_KMF_PAGEABLE);
915 
916  freehdr:
917 	free(pack.ep_hdr, M_EXEC);
918 
919 #ifdef SYSTRACE
920  clrflg:
921 #endif /* SYSTRACE */
922 	l->l_flag |= oldlwpflags;
923 	p->p_flag &= ~P_INEXEC;
924 #ifdef LKM
925 	lockmgr(&exec_lock, LK_RELEASE, NULL);
926 #endif
927 
928 	return error;
929 
930  exec_abort:
931 	p->p_flag &= ~P_INEXEC;
932 #ifdef LKM
933 	lockmgr(&exec_lock, LK_RELEASE, NULL);
934 #endif
935 
936 	/*
937 	 * the old process doesn't exist anymore.  exit gracefully.
938 	 * get rid of the (new) address space we have created, if any, get rid
939 	 * of our namei data and vnode, and exit noting failure
940 	 */
941 	uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
942 		VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
943 	if (pack.ep_emul_arg)
944 		FREE(pack.ep_emul_arg, M_TEMP);
945 	PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
946 	uvm_km_free(exec_map, (vaddr_t) argp, NCARGS, UVM_KMF_PAGEABLE);
947 	free(pack.ep_hdr, M_EXEC);
948 	exit1(l, W_EXITCODE(error, SIGABRT));
949 
950 	/* NOTREACHED */
951 	return 0;
952 }
953 
954 
955 int
956 copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
957     char **stackp, void *argp)
958 {
959 	char	**cpp, *dp, *sp;
960 	size_t	len;
961 	void	*nullp;
962 	long	argc, envc;
963 	int	error;
964 
965 	cpp = (char **)*stackp;
966 	nullp = NULL;
967 	argc = arginfo->ps_nargvstr;
968 	envc = arginfo->ps_nenvstr;
969 	if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
970 		return error;
971 
972 	dp = (char *) (cpp + argc + envc + 2 + pack->ep_es->es_arglen);
973 	sp = argp;
974 
975 	/* XXX don't copy them out, remap them! */
976 	arginfo->ps_argvstr = cpp; /* remember location of argv for later */
977 
978 	for (; --argc >= 0; sp += len, dp += len)
979 		if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
980 		    (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
981 			return error;
982 
983 	if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
984 		return error;
985 
986 	arginfo->ps_envstr = cpp; /* remember location of envp for later */
987 
988 	for (; --envc >= 0; sp += len, dp += len)
989 		if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
990 		    (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
991 			return error;
992 
993 	if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
994 		return error;
995 
996 	*stackp = (char *)cpp;
997 	return 0;
998 }
999 
1000 #ifdef LKM
1001 /*
1002  * Find an emulation of given name in list of emulations.
1003  * Needs to be called with the exec_lock held.
1004  */
1005 const struct emul *
1006 emul_search(const char *name)
1007 {
1008 	struct emul_entry *it;
1009 
1010 	LIST_FOREACH(it, &el_head, el_list) {
1011 		if (strcmp(name, it->el_emul->e_name) == 0)
1012 			return it->el_emul;
1013 	}
1014 
1015 	return NULL;
1016 }
1017 
1018 /*
1019  * Add an emulation to list, if it's not there already.
1020  */
1021 int
1022 emul_register(const struct emul *emul, int ro_entry)
1023 {
1024 	struct emul_entry	*ee;
1025 	int			error;
1026 
1027 	error = 0;
1028 	lockmgr(&exec_lock, LK_SHARED, NULL);
1029 
1030 	if (emul_search(emul->e_name)) {
1031 		error = EEXIST;
1032 		goto out;
1033 	}
1034 
1035 	MALLOC(ee, struct emul_entry *, sizeof(struct emul_entry),
1036 		M_EXEC, M_WAITOK);
1037 	ee->el_emul = emul;
1038 	ee->ro_entry = ro_entry;
1039 	LIST_INSERT_HEAD(&el_head, ee, el_list);
1040 
1041  out:
1042 	lockmgr(&exec_lock, LK_RELEASE, NULL);
1043 	return error;
1044 }
1045 
1046 /*
1047  * Remove emulation with name 'name' from list of supported emulations.
1048  */
1049 int
1050 emul_unregister(const char *name)
1051 {
1052 	const struct proclist_desc *pd;
1053 	struct emul_entry	*it;
1054 	int			i, error;
1055 	struct proc		*ptmp;
1056 
1057 	error = 0;
1058 	lockmgr(&exec_lock, LK_SHARED, NULL);
1059 
1060 	LIST_FOREACH(it, &el_head, el_list) {
1061 		if (strcmp(it->el_emul->e_name, name) == 0)
1062 			break;
1063 	}
1064 
1065 	if (!it) {
1066 		error = ENOENT;
1067 		goto out;
1068 	}
1069 
1070 	if (it->ro_entry) {
1071 		error = EBUSY;
1072 		goto out;
1073 	}
1074 
1075 	/* test if any execw[] entry is still using this */
1076 	for(i=0; i < nexecs; i++) {
1077 		if (execsw[i]->es_emul == it->el_emul) {
1078 			error = EBUSY;
1079 			goto out;
1080 		}
1081 	}
1082 
1083 	/*
1084 	 * Test if any process is running under this emulation - since
1085 	 * emul_unregister() is running quite sendomly, it's better
1086 	 * to do expensive check here than to use any locking.
1087 	 */
1088 	proclist_lock_read();
1089 	for (pd = proclists; pd->pd_list != NULL && !error; pd++) {
1090 		PROCLIST_FOREACH(ptmp, pd->pd_list) {
1091 			if (ptmp->p_emul == it->el_emul) {
1092 				error = EBUSY;
1093 				break;
1094 			}
1095 		}
1096 	}
1097 	proclist_unlock_read();
1098 
1099 	if (error)
1100 		goto out;
1101 
1102 
1103 	/* entry is not used, remove it */
1104 	LIST_REMOVE(it, el_list);
1105 	FREE(it, M_EXEC);
1106 
1107  out:
1108 	lockmgr(&exec_lock, LK_RELEASE, NULL);
1109 	return error;
1110 }
1111 
1112 /*
1113  * Add execsw[] entry.
1114  */
1115 int
1116 exec_add(struct execsw *esp, const char *e_name)
1117 {
1118 	struct exec_entry	*it;
1119 	int			error;
1120 
1121 	error = 0;
1122 	lockmgr(&exec_lock, LK_EXCLUSIVE, NULL);
1123 
1124 	if (!esp->es_emul) {
1125 		esp->es_emul = emul_search(e_name);
1126 		if (!esp->es_emul) {
1127 			error = ENOENT;
1128 			goto out;
1129 		}
1130 	}
1131 
1132 	LIST_FOREACH(it, &ex_head, ex_list) {
1133 		/* assume tuple (makecmds, probe_func, emulation) is unique */
1134 		if (it->es->es_makecmds == esp->es_makecmds
1135 		    && it->es->u.elf_probe_func == esp->u.elf_probe_func
1136 		    && it->es->es_emul == esp->es_emul) {
1137 			error = EEXIST;
1138 			goto out;
1139 		}
1140 	}
1141 
1142 	/* if we got here, the entry doesn't exist yet */
1143 	MALLOC(it, struct exec_entry *, sizeof(struct exec_entry),
1144 		M_EXEC, M_WAITOK);
1145 	it->es = esp;
1146 	LIST_INSERT_HEAD(&ex_head, it, ex_list);
1147 
1148 	/* update execsw[] */
1149 	exec_init(0);
1150 
1151  out:
1152 	lockmgr(&exec_lock, LK_RELEASE, NULL);
1153 	return error;
1154 }
1155 
1156 /*
1157  * Remove execsw[] entry.
1158  */
1159 int
1160 exec_remove(const struct execsw *esp)
1161 {
1162 	struct exec_entry	*it;
1163 	int			error;
1164 
1165 	error = 0;
1166 	lockmgr(&exec_lock, LK_EXCLUSIVE, NULL);
1167 
1168 	LIST_FOREACH(it, &ex_head, ex_list) {
1169 		/* assume tuple (makecmds, probe_func, emulation) is unique */
1170 		if (it->es->es_makecmds == esp->es_makecmds
1171 		    && it->es->u.elf_probe_func == esp->u.elf_probe_func
1172 		    && it->es->es_emul == esp->es_emul)
1173 			break;
1174 	}
1175 	if (!it) {
1176 		error = ENOENT;
1177 		goto out;
1178 	}
1179 
1180 	/* remove item from list and free resources */
1181 	LIST_REMOVE(it, ex_list);
1182 	FREE(it, M_EXEC);
1183 
1184 	/* update execsw[] */
1185 	exec_init(0);
1186 
1187  out:
1188 	lockmgr(&exec_lock, LK_RELEASE, NULL);
1189 	return error;
1190 }
1191 
1192 static void
1193 link_es(struct execsw_entry **listp, const struct execsw *esp)
1194 {
1195 	struct execsw_entry *et, *e1;
1196 
1197 	MALLOC(et, struct execsw_entry *, sizeof(struct execsw_entry),
1198 			M_TEMP, M_WAITOK);
1199 	et->next = NULL;
1200 	et->es = esp;
1201 	if (*listp == NULL) {
1202 		*listp = et;
1203 		return;
1204 	}
1205 
1206 	switch(et->es->es_prio) {
1207 	case EXECSW_PRIO_FIRST:
1208 		/* put new entry as the first */
1209 		et->next = *listp;
1210 		*listp = et;
1211 		break;
1212 	case EXECSW_PRIO_ANY:
1213 		/* put new entry after all *_FIRST and *_ANY entries */
1214 		for(e1 = *listp; e1->next
1215 			&& e1->next->es->es_prio != EXECSW_PRIO_LAST;
1216 			e1 = e1->next);
1217 		et->next = e1->next;
1218 		e1->next = et;
1219 		break;
1220 	case EXECSW_PRIO_LAST:
1221 		/* put new entry as the last one */
1222 		for(e1 = *listp; e1->next; e1 = e1->next);
1223 		e1->next = et;
1224 		break;
1225 	default:
1226 #ifdef DIAGNOSTIC
1227 		panic("execw[] entry with unknown priority %d found",
1228 			et->es->es_prio);
1229 #endif
1230 		break;
1231 	}
1232 }
1233 
1234 /*
1235  * Initialize exec structures. If init_boot is true, also does necessary
1236  * one-time initialization (it's called from main() that way).
1237  * Once system is multiuser, this should be called with exec_lock held,
1238  * i.e. via exec_{add|remove}().
1239  */
1240 int
1241 exec_init(int init_boot)
1242 {
1243 	const struct execsw	**new_es, * const *old_es;
1244 	struct execsw_entry	*list, *e1;
1245 	struct exec_entry	*e2;
1246 	int			i, es_sz;
1247 
1248 	if (init_boot) {
1249 		/* do one-time initializations */
1250 		lockinit(&exec_lock, PWAIT, "execlck", 0, 0);
1251 
1252 		/* register compiled-in emulations */
1253 		for(i=0; i < nexecs_builtin; i++) {
1254 			if (execsw_builtin[i].es_emul)
1255 				emul_register(execsw_builtin[i].es_emul, 1);
1256 		}
1257 #ifdef DIAGNOSTIC
1258 		if (i == 0)
1259 			panic("no emulations found in execsw_builtin[]");
1260 #endif
1261 	}
1262 
1263 	/*
1264 	 * Build execsw[] array from builtin entries and entries added
1265 	 * at runtime.
1266 	 */
1267 	list = NULL;
1268 	for(i=0; i < nexecs_builtin; i++)
1269 		link_es(&list, &execsw_builtin[i]);
1270 
1271 	/* Add dynamically loaded entries */
1272 	es_sz = nexecs_builtin;
1273 	LIST_FOREACH(e2, &ex_head, ex_list) {
1274 		link_es(&list, e2->es);
1275 		es_sz++;
1276 	}
1277 
1278 	/*
1279 	 * Now that we have sorted all execw entries, create new execsw[]
1280 	 * and free no longer needed memory in the process.
1281 	 */
1282 	new_es = malloc(es_sz * sizeof(struct execsw *), M_EXEC, M_WAITOK);
1283 	for(i=0; list; i++) {
1284 		new_es[i] = list->es;
1285 		e1 = list->next;
1286 		FREE(list, M_TEMP);
1287 		list = e1;
1288 	}
1289 
1290 	/*
1291 	 * New execsw[] array built, now replace old execsw[] and free
1292 	 * used memory.
1293 	 */
1294 	old_es = execsw;
1295 	execsw = new_es;
1296 	nexecs = es_sz;
1297 	if (old_es)
1298 		/*XXXUNCONST*/
1299 		free(__UNCONST(old_es), M_EXEC);
1300 
1301 	/*
1302 	 * Figure out the maximum size of an exec header.
1303 	 */
1304 	exec_maxhdrsz = 0;
1305 	for (i = 0; i < nexecs; i++) {
1306 		if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1307 			exec_maxhdrsz = execsw[i]->es_hdrsz;
1308 	}
1309 
1310 	return 0;
1311 }
1312 #endif
1313 
1314 #ifndef LKM
1315 /*
1316  * Simplified exec_init() for kernels without LKMs. Only initialize
1317  * exec_maxhdrsz and execsw[].
1318  */
1319 int
1320 exec_init(int init_boot)
1321 {
1322 	int i;
1323 
1324 #ifdef DIAGNOSTIC
1325 	if (!init_boot)
1326 		panic("exec_init(): called with init_boot == 0");
1327 #endif
1328 
1329 	/* do one-time initializations */
1330 	nexecs = nexecs_builtin;
1331 	execsw = malloc(nexecs*sizeof(struct execsw *), M_EXEC, M_WAITOK);
1332 
1333 	/*
1334 	 * Fill in execsw[] and figure out the maximum size of an exec header.
1335 	 */
1336 	exec_maxhdrsz = 0;
1337 	for(i=0; i < nexecs; i++) {
1338 		execsw[i] = &execsw_builtin[i];
1339 		if (execsw_builtin[i].es_hdrsz > exec_maxhdrsz)
1340 			exec_maxhdrsz = execsw_builtin[i].es_hdrsz;
1341 	}
1342 
1343 	return 0;
1344 
1345 }
1346 #endif /* !LKM */
1347 
1348 static int
1349 exec_sigcode_map(struct proc *p, const struct emul *e)
1350 {
1351 	vaddr_t va;
1352 	vsize_t sz;
1353 	int error;
1354 	struct uvm_object *uobj;
1355 
1356 	sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1357 
1358 	if (e->e_sigobject == NULL || sz == 0) {
1359 		return 0;
1360 	}
1361 
1362 	/*
1363 	 * If we don't have a sigobject for this emulation, create one.
1364 	 *
1365 	 * sigobject is an anonymous memory object (just like SYSV shared
1366 	 * memory) that we keep a permanent reference to and that we map
1367 	 * in all processes that need this sigcode. The creation is simple,
1368 	 * we create an object, add a permanent reference to it, map it in
1369 	 * kernel space, copy out the sigcode to it and unmap it.
1370 	 * We map it with PROT_READ|PROT_EXEC into the process just
1371 	 * the way sys_mmap() would map it.
1372 	 */
1373 
1374 	uobj = *e->e_sigobject;
1375 	if (uobj == NULL) {
1376 		uobj = uao_create(sz, 0);
1377 		(*uobj->pgops->pgo_reference)(uobj);
1378 		va = vm_map_min(kernel_map);
1379 		if ((error = uvm_map(kernel_map, &va, round_page(sz),
1380 		    uobj, 0, 0,
1381 		    UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1382 		    UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1383 			printf("kernel mapping failed %d\n", error);
1384 			(*uobj->pgops->pgo_detach)(uobj);
1385 			return (error);
1386 		}
1387 		memcpy((void *)va, e->e_sigcode, sz);
1388 #ifdef PMAP_NEED_PROCWR
1389 		pmap_procwr(&proc0, va, sz);
1390 #endif
1391 		uvm_unmap(kernel_map, va, va + round_page(sz));
1392 		*e->e_sigobject = uobj;
1393 	}
1394 
1395 	/* Just a hint to uvm_map where to put it. */
1396 	va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1397 	    round_page(sz));
1398 
1399 #ifdef __alpha__
1400 	/*
1401 	 * Tru64 puts /sbin/loader at the end of user virtual memory,
1402 	 * which causes the above calculation to put the sigcode at
1403 	 * an invalid address.  Put it just below the text instead.
1404 	 */
1405 	if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1406 		va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1407 	}
1408 #endif
1409 
1410 	(*uobj->pgops->pgo_reference)(uobj);
1411 	error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1412 			uobj, 0, 0,
1413 			UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1414 				    UVM_ADV_RANDOM, 0));
1415 	if (error) {
1416 		(*uobj->pgops->pgo_detach)(uobj);
1417 		return (error);
1418 	}
1419 	p->p_sigctx.ps_sigcode = (void *)va;
1420 	return (0);
1421 }
1422