xref: /csrg-svn/sys/kern/kern_exec.c (revision 47652)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)kern_exec.c	7.37 (Berkeley) 03/25/91
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "filedesc.h"
12 #include "kernel.h"
13 #include "proc.h"
14 #include "mount.h"
15 #include "malloc.h"
16 #include "vnode.h"
17 #include "seg.h"
18 #include "file.h"
19 #include "acct.h"
20 #include "exec.h"
21 #include "ktrace.h"
22 #include "resourcevar.h"
23 
24 #include "machine/reg.h"
25 
26 #include "mman.h"
27 #include "vm/vm.h"
28 #include "vm/vm_param.h"
29 #include "vm/vm_map.h"
30 #include "vm/vm_kern.h"
31 #include "vm/vm_pager.h"
32 
33 #include "signalvar.h"
34 #include "kinfo_proc.h"
35 #include "user.h"			/* for pcb, sigc */
36 
37 #ifdef HPUXCOMPAT
38 #include "hp300/hpux/hpux_exec.h"
39 #endif
40 
41 #ifdef COMPAT_43
42 execv(p, uap, retval)
43 	struct proc *p;
44 	struct args {
45 		char	*fname;
46 		char	**argp;
47 		char	**envp;
48 	} *uap;
49 	int *retval;
50 {
51 
52 	uap->envp = NULL;
53 	return (execve(p, uap, retval));
54 }
55 #endif
56 
57 /*
58  * exec system call
59  */
60 /* ARGSUSED */
61 execve(p, uap, retval)
62 	register struct proc *p;
63 	register struct args {
64 		char	*fname;
65 		char	**argp;
66 		char	**envp;
67 	} *uap;
68 	int *retval;
69 {
70 	register struct ucred *cred = p->p_ucred;
71 	register struct nameidata *ndp;
72 	register struct filedesc *fdp = p->p_fd;
73 	int na, ne, ucp, ap, cc;
74 	register char *cp;
75 	register int nc;
76 	unsigned len;
77 	int indir, uid, gid;
78 	char *sharg;
79 	struct vnode *vp;
80 	int resid, error, flags = 0;
81 	vm_offset_t execargs;
82 	struct vattr vattr;
83 	char cfname[MAXCOMLEN + 1];
84 	char cfarg[MAXINTERP];
85 	union {
86 		char	ex_shell[MAXINTERP];	/* #! and interpreter name */
87 		struct	exec ex_exec;
88 #ifdef HPUXCOMPAT
89 		struct	hpux_exec ex_hexec;
90 #endif
91 	} exdata;
92 #ifdef HPUXCOMPAT
93 	struct hpux_exec hhead;
94 #endif
95 	struct nameidata nd;
96 
97 	ndp = &nd;
98   start:
99 	ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF;
100 	ndp->ni_segflg = UIO_USERSPACE;
101 	ndp->ni_dirp = uap->fname;
102 	if (error = namei(ndp, p))
103 		return (error);
104 	vp = ndp->ni_vp;
105 	indir = 0;
106 	uid = cred->cr_uid;
107 	gid = cred->cr_gid;
108 	if (error = VOP_GETATTR(vp, &vattr, cred))
109 		goto bad;
110 	if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
111 		error = EACCES;
112 		goto bad;
113 	}
114 	if ((vp->v_mount->mnt_flag & MNT_NOSUID) == 0) {
115 		if (vattr.va_mode & VSUID)
116 			uid = vattr.va_uid;
117 		if (vattr.va_mode & VSGID)
118 			gid = vattr.va_gid;
119 	}
120 
121   again:
122 	if (error = VOP_ACCESS(vp, VEXEC, cred))
123 		goto bad;
124 	if ((p->p_flag & STRC) && (error = VOP_ACCESS(vp, VREAD, cred)))
125 		goto bad;
126 	if (vp->v_type != VREG ||
127 	    (vattr.va_mode & (VEXEC|(VEXEC>>3)|(VEXEC>>6))) == 0) {
128 		error = EACCES;
129 		goto bad;
130 	}
131 
132 	/*
133 	 * Read in first few bytes of file for segment sizes, magic number:
134 	 *	OMAGIC = plain executable
135 	 *	NMAGIC = RO text
136 	 *	ZMAGIC = demand paged RO text
137 	 * Also an ASCII line beginning with #! is
138 	 * the file name of a ``shell'' and arguments may be prepended
139 	 * to the argument list if given here.
140 	 *
141 	 * SHELL NAMES ARE LIMITED IN LENGTH.
142 	 *
143 	 * ONLY ONE ARGUMENT MAY BE PASSED TO THE SHELL FROM
144 	 * THE ASCII LINE.
145 	 */
146 	exdata.ex_shell[0] = '\0';	/* for zero length files */
147 	error = vn_rdwr(UIO_READ, vp, (caddr_t)&exdata, sizeof (exdata),
148 	    (off_t)0, UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED), cred, &resid);
149 	if (error)
150 		goto bad;
151 #ifndef lint
152 	if (resid > sizeof(exdata) - sizeof(exdata.ex_exec) &&
153 	    exdata.ex_shell[0] != '#') {
154 		error = ENOEXEC;
155 		goto bad;
156 	}
157 #endif
158 #if defined(hp300)
159 	switch ((int)exdata.ex_exec.a_mid) {
160 
161 	/*
162 	 * An ancient hp200 or hp300 binary, shouldn't happen anymore.
163 	 * Mark as invalid.
164 	 */
165 	case MID_ZERO:
166 		exdata.ex_exec.a_magic = 0;
167 		break;
168 
169 	/*
170 	 * HP200 series has a smaller page size so we cannot
171 	 * demand-load or even write protect text, so we just
172 	 * treat as OMAGIC.
173 	 */
174 	case MID_HP200:
175 		exdata.ex_exec.a_magic = OMAGIC;
176 		break;
177 
178 	case MID_HP300:
179 		break;
180 
181 #ifdef HPUXCOMPAT
182 	case MID_HPUX:
183 		/*
184 		 * Save a.out header.  This is eventually saved in the pcb,
185 		 * but we cannot do that yet in case the exec fails before
186 		 * the image is overlayed.
187 		 */
188 		bcopy((caddr_t)&exdata.ex_hexec,
189 		      (caddr_t)&hhead, sizeof hhead);
190 		/*
191 		 * If version number is 0x2bad this is a native BSD
192 		 * binary created via the HPUX SGS.  Should not be
193 		 * treated as an HPUX binary.
194 		 */
195 		if (exdata.ex_hexec.ha_version != BSDVNUM)
196 			flags |= SHPUX;
197 		/*
198 		 * Shuffle important fields to their BSD locations.
199 		 * Note that the order in which this is done is important.
200 		 */
201 		exdata.ex_exec.a_text = exdata.ex_hexec.ha_text;
202 		exdata.ex_exec.a_data = exdata.ex_hexec.ha_data;
203 		exdata.ex_exec.a_bss = exdata.ex_hexec.ha_bss;
204 		exdata.ex_exec.a_entry = exdata.ex_hexec.ha_entry;
205 		/*
206 		 * For ZMAGIC files, make sizes consistant with those
207 		 * generated by BSD ld.
208 		 */
209 		if (exdata.ex_exec.a_magic == ZMAGIC) {
210 			exdata.ex_exec.a_text =
211 				ctob(btoc(exdata.ex_exec.a_text));
212 			nc = exdata.ex_exec.a_data + exdata.ex_exec.a_bss;
213 			exdata.ex_exec.a_data =
214 				ctob(btoc(exdata.ex_exec.a_data));
215 			nc -= (int)exdata.ex_exec.a_data;
216 			exdata.ex_exec.a_bss = (nc < 0) ? 0 : nc;
217 		}
218 		break;
219 #endif
220 	}
221 #endif
222 	switch ((int)exdata.ex_exec.a_magic) {
223 
224 	case OMAGIC:
225 		exdata.ex_exec.a_data += exdata.ex_exec.a_text;
226 		exdata.ex_exec.a_text = 0;
227 		break;
228 
229 	case ZMAGIC:
230 		flags |= SPAGV;
231 	case NMAGIC:
232 		if (exdata.ex_exec.a_text == 0) {
233 			error = ENOEXEC;
234 			goto bad;
235 		}
236 		break;
237 
238 	default:
239 		if (exdata.ex_shell[0] != '#' ||
240 		    exdata.ex_shell[1] != '!' ||
241 		    indir) {
242 			error = ENOEXEC;
243 			goto bad;
244 		}
245 		for (cp = &exdata.ex_shell[2];; ++cp) {
246 			if (cp >= &exdata.ex_shell[MAXINTERP]) {
247 				error = ENOEXEC;
248 				goto bad;
249 			}
250 			if (*cp == '\n') {
251 				*cp = '\0';
252 				break;
253 			}
254 			if (*cp == '\t')
255 				*cp = ' ';
256 		}
257 		cp = &exdata.ex_shell[2];
258 		while (*cp == ' ')
259 			cp++;
260 		ndp->ni_dirp = cp;
261 		while (*cp && *cp != ' ')
262 			cp++;
263 		cfarg[0] = '\0';
264 		if (*cp) {
265 			*cp++ = '\0';
266 			while (*cp == ' ')
267 				cp++;
268 			if (*cp)
269 				bcopy((caddr_t)cp, (caddr_t)cfarg, MAXINTERP);
270 		}
271 		indir = 1;
272 		vput(vp);
273 		ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF;
274 		ndp->ni_segflg = UIO_SYSSPACE;
275 		if (error = namei(ndp, p))
276 			return (error);
277 		vp = ndp->ni_vp;
278 		if (error = VOP_GETATTR(vp, &vattr, cred))
279 			goto bad;
280 		bcopy((caddr_t)ndp->ni_dent.d_name, (caddr_t)cfname,
281 		    MAXCOMLEN);
282 		cfname[MAXCOMLEN] = '\0';
283 		uid = cred->cr_uid;	/* shell scripts can't be setuid */
284 		gid = cred->cr_gid;
285 		goto again;
286 	}
287 
288 	/*
289 	 * Collect arguments on "file" in swap space.
290 	 */
291 	na = 0;
292 	ne = 0;
293 	nc = 0;
294 	cc = NCARGS;
295 	execargs = kmem_alloc_wait(exec_map, NCARGS);
296 	cp = (char *) execargs;
297 	/*
298 	 * Copy arguments into file in argdev area.
299 	 */
300 	if (uap->argp) for (;;) {
301 		ap = NULL;
302 		sharg = NULL;
303 		if (indir && na == 0) {
304 			sharg = cfname;
305 			ap = (int)sharg;
306 			uap->argp++;		/* ignore argv[0] */
307 		} else if (indir && (na == 1 && cfarg[0])) {
308 			sharg = cfarg;
309 			ap = (int)sharg;
310 		} else if (indir && (na == 1 || na == 2 && cfarg[0]))
311 			ap = (int)uap->fname;
312 		else if (uap->argp) {
313 			ap = fuword((caddr_t)uap->argp);
314 			uap->argp++;
315 		}
316 		if (ap == NULL && uap->envp) {
317 			uap->argp = NULL;
318 			if ((ap = fuword((caddr_t)uap->envp)) != NULL)
319 				uap->envp++, ne++;
320 		}
321 		if (ap == NULL)
322 			break;
323 		na++;
324 		if (ap == -1) {
325 			error = EFAULT;
326 			goto bad;
327 		}
328 		do {
329 			if (nc >= NCARGS-1) {
330 				error = E2BIG;
331 				break;
332 			}
333 			if (sharg) {
334 				error = copystr(sharg, cp, (unsigned)cc, &len);
335 				sharg += len;
336 			} else {
337 				error = copyinstr((caddr_t)ap, cp, (unsigned)cc,
338 				    &len);
339 				ap += len;
340 			}
341 			cp += len;
342 			nc += len;
343 			cc -= len;
344 		} while (error == ENAMETOOLONG);
345 		if (error)
346 			goto bad;
347 	}
348 	nc = (nc + NBPW-1) & ~(NBPW-1);
349 	error = getxfile(p, vp, &exdata.ex_exec, flags, nc + (na+4)*NBPW,
350 	    uid, gid);
351 	if (error)
352 		goto bad;
353 	vput(vp);
354 	vp = NULL;
355 
356 #ifdef HPUXCOMPAT
357 	/*
358 	 * We are now committed to the exec so we can save the exec
359 	 * header in the pcb where we can dump it if necessary in core()
360 	 */
361 	if (u.u_pcb.pcb_flags & PCB_HPUXBIN)
362 		bcopy((caddr_t)&hhead,
363 		      (caddr_t)u.u_pcb.pcb_exec, sizeof hhead);
364 #endif
365 
366 	/*
367 	 * Copy back arglist.
368 	 */
369 	ucp = USRSTACK - sizeof(u.u_pcb.pcb_sigc) - nc - NBPW;
370 	ap = ucp - na*NBPW - 3*NBPW;
371 	p->p_regs[SP] = ap;
372 	(void) suword((caddr_t)ap, na-ne);
373 	nc = 0;
374 	cp = (char *) execargs;
375 	cc = NCARGS;
376 	for (;;) {
377 		ap += NBPW;
378 		if (na == ne) {
379 			(void) suword((caddr_t)ap, 0);
380 			ap += NBPW;
381 		}
382 		if (--na < 0)
383 			break;
384 		(void) suword((caddr_t)ap, ucp);
385 		do {
386 			error = copyoutstr(cp, (caddr_t)ucp, (unsigned)cc,
387 			    &len);
388 			ucp += len;
389 			cp += len;
390 			nc += len;
391 			cc -= len;
392 		} while (error == ENAMETOOLONG);
393 		if (error == EFAULT)
394 			panic("exec: EFAULT");
395 	}
396 	(void) suword((caddr_t)ap, 0);
397 
398 	execsigs(p);
399 
400 	for (nc = fdp->fd_lastfile; nc >= 0; --nc) {
401 		if (fdp->fd_ofileflags[nc] & UF_EXCLOSE) {
402 			(void) closef(fdp->fd_ofiles[nc], p);
403 			fdp->fd_ofiles[nc] = NULL;
404 			fdp->fd_ofileflags[nc] = 0;
405 			if (nc < fdp->fd_freefile)
406 				fdp->fd_freefile = nc;
407 		}
408 		fdp->fd_ofileflags[nc] &= ~UF_MAPPED;
409 	}
410 	/*
411 	 * Adjust fd_lastfile to account for descriptors closed above.
412 	 * Don't decrement fd_lastfile past 0, as it's unsigned.
413 	 */
414 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
415 		fdp->fd_lastfile--;
416 	setregs(exdata.ex_exec.a_entry, retval);
417 	/*
418 	 * Install sigcode at top of user stack.
419 	 */
420 	copyout((caddr_t)u.u_pcb.pcb_sigc,
421 		(caddr_t)(USRSTACK - sizeof(u.u_pcb.pcb_sigc)),
422 		sizeof(u.u_pcb.pcb_sigc));
423 	/*
424 	 * Remember file name for accounting.
425 	 */
426 	p->p_acflag &= ~AFORK;
427 	if (indir)
428 		bcopy((caddr_t)cfname, (caddr_t)p->p_comm, MAXCOMLEN);
429 	else {
430 		if (ndp->ni_dent.d_namlen > MAXCOMLEN)
431 			ndp->ni_dent.d_namlen = MAXCOMLEN;
432 		bcopy((caddr_t)ndp->ni_dent.d_name, (caddr_t)p->p_comm,
433 		    (unsigned)(ndp->ni_dent.d_namlen + 1));
434 	}
435 bad:
436 	if (execargs)
437 		kmem_free_wakeup(exec_map, execargs, NCARGS);
438 	if (vp)
439 		vput(vp);
440 	return (error);
441 }
442 
443 /*
444  * Read in and set up memory for executed file.
445  */
446 getxfile(p, vp, ep, flags, nargc, uid, gid)
447 	register struct proc *p;
448 	register struct vnode *vp;
449 	register struct exec *ep;
450 	int flags, nargc, uid, gid;
451 {
452 	segsz_t ts, ds, ss;
453 	register struct ucred *cred = p->p_ucred;
454 	off_t toff;
455 	int error = 0;
456 	vm_offset_t addr;
457 	vm_size_t size;
458 	struct vmspace *vm = p->p_vmspace;
459 
460 #ifdef HPUXCOMPAT
461 	if (ep->a_mid == MID_HPUX) {
462 		if (flags & SPAGV)
463 			toff = CLBYTES;
464 		else
465 			toff = sizeof (struct hpux_exec);
466 	} else
467 #endif
468 	if (flags & SPAGV)
469 		toff = CLBYTES;
470 	else
471 		toff = sizeof (struct exec);
472 	if (ep->a_text != 0 && (vp->v_flag & VTEXT) == 0 &&
473 	    vp->v_usecount != 1) {
474 		register struct file *fp;
475 
476 		for (fp = file; fp < fileNFILE; fp++) {
477 			if (fp->f_type == DTYPE_VNODE &&
478 			    fp->f_count > 0 &&
479 			    (struct vnode *)fp->f_data == vp &&
480 			    (fp->f_flag & FWRITE)) {
481 				return (ETXTBSY);
482 			}
483 		}
484 	}
485 
486 	/*
487 	 * Compute text and data sizes and make sure not too large.
488 	 * NB - Check data and bss separately as they may overflow
489 	 * when summed together.
490 	 */
491 	ts = clrnd(btoc(ep->a_text));
492 	ds = clrnd(btoc(ep->a_data + ep->a_bss));
493 	ss = clrnd(SSIZE + btoc(nargc + sizeof(u.u_pcb.pcb_sigc)));
494 
495 	/*
496 	 * If we're sharing the address space, allocate a new space
497 	 * and release our reference to the old one.  Otherwise,
498 	 * empty out the existing vmspace.
499 	 */
500 	if (vm->vm_refcnt > 1) {
501 		p->p_vmspace = vmspace_alloc(vm_map_min(&vm->vm_map),
502 		    vm_map_max(&vm->vm_map), 1);
503 		vmspace_free(vm);
504 		vm = p->p_vmspace;
505 	} else {
506 #ifdef SYSVSHM
507 		if (vm->vm_shm)
508 			shmexit(p);
509 #endif
510 		(void) vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map),
511 		    vm_map_max(&vm->vm_map));
512 	}
513 	/*
514 	 * If parent is waiting for us to exec or exit,
515 	 * SPPWAIT will be set; clear it and wakeup parent.
516 	 */
517 	if (p->p_flag & SPPWAIT) {
518 		p->p_flag &= ~SPPWAIT;
519 		wakeup((caddr_t) p->p_pptr);
520 	}
521 #ifdef hp300
522 	u.u_pcb.pcb_flags &= ~(PCB_AST|PCB_HPUXMMAP|PCB_HPUXBIN);
523 #ifdef HPUXCOMPAT
524 	/* remember that we were loaded from an HPUX format file */
525 	if (ep->a_mid == MID_HPUX)
526 		u.u_pcb.pcb_flags |= PCB_HPUXBIN;
527 #endif
528 #endif
529 	p->p_flag &= ~(SPAGV|SSEQL|SUANOM|SHPUX);
530 	p->p_flag |= flags | SEXEC;
531 	addr = VM_MIN_ADDRESS;
532 	if (vm_allocate(&vm->vm_map, &addr, round_page(ctob(ts + ds)), FALSE)) {
533 		uprintf("Cannot allocate text+data space\n");
534 		error = ENOMEM;			/* XXX */
535 		goto badmap;
536 	}
537 	size = round_page(MAXSSIZ);		/* XXX */
538 	addr = trunc_page(VM_MAX_ADDRESS - size);
539 	if (vm_allocate(&vm->vm_map, &addr, size, FALSE)) {
540 		uprintf("Cannot allocate stack space\n");
541 		error = ENOMEM;			/* XXX */
542 		goto badmap;
543 	}
544 	vm->vm_maxsaddr = (caddr_t)addr;
545 	vm->vm_taddr = (caddr_t)VM_MIN_ADDRESS;
546 	vm->vm_daddr = (caddr_t)(VM_MIN_ADDRESS + ctob(ts));
547 
548 	if ((flags & SPAGV) == 0) {
549 		/*
550 		 * Read in data segment.
551 		 */
552 		(void) vn_rdwr(UIO_READ, vp, vm->vm_daddr, (int) ep->a_data,
553 			(off_t)(toff + ep->a_text), UIO_USERSPACE,
554 			(IO_UNIT|IO_NODELOCKED), cred, (int *)0);
555 		/*
556 		 * Read in text segment if necessary (0410),
557 		 * and read-protect it.
558 		 */
559 		if (ep->a_text > 0) {
560 			error = vn_rdwr(UIO_READ, vp, vm->vm_taddr,
561 				(int)ep->a_text, toff, UIO_USERSPACE,
562 				(IO_UNIT|IO_NODELOCKED), cred, (int *)0);
563 			(void) vm_map_protect(&vm->vm_map, VM_MIN_ADDRESS,
564 				VM_MIN_ADDRESS + trunc_page(ep->a_text),
565 				VM_PROT_READ|VM_PROT_EXECUTE, FALSE);
566 		}
567 	} else {
568 		/*
569 		 * Allocate a region backed by the exec'ed vnode.
570 		 */
571 		addr = VM_MIN_ADDRESS;
572 		size = round_page(ep->a_text + ep->a_data);
573 		error = vm_mmap(&vm->vm_map, &addr, size, VM_PROT_ALL,
574 			MAP_FILE|MAP_COPY|MAP_FIXED,
575 			(caddr_t)vp, (vm_offset_t)toff);
576 		(void) vm_map_protect(&vm->vm_map, addr,
577 			addr + trunc_page(ep->a_text),
578 			VM_PROT_READ|VM_PROT_EXECUTE, FALSE);
579 		vp->v_flag |= VTEXT;
580 	}
581 badmap:
582 	if (error) {
583 		printf("pid %d: VM allocation failure\n", p->p_pid);
584 		uprintf("sorry, pid %d was killed in exec: VM allocation\n",
585 			p->p_pid);
586 		psignal(p, SIGKILL);
587 		p->p_flag |= SULOCK;
588 		return(error);
589 	}
590 
591 	/*
592 	 * set SUID/SGID protections, if no tracing
593 	 */
594 	if ((p->p_flag&STRC)==0) {
595 		if (uid != cred->cr_uid || gid != cred->cr_gid) {
596 			p->p_ucred = cred = crcopy(cred);
597 			/*
598 			 * If process is being ktraced, turn off - unless
599 			 * root set it.
600 			 */
601 			if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT)) {
602 				vrele(p->p_tracep);
603 				p->p_tracep = NULL;
604 				p->p_traceflag = 0;
605 			}
606 		}
607 		cred->cr_uid = uid;
608 		cred->cr_gid = gid;
609 	} else
610 		psignal(p, SIGTRAP);
611 	p->p_cred->p_svuid = cred->cr_uid;
612 	p->p_cred->p_svgid = cred->cr_gid;
613 	vm->vm_tsize = ts;
614 	vm->vm_dsize = ds;
615 	vm->vm_ssize = ss;
616 	p->p_stats->p_prof.pr_scale = 0;
617 #if defined(tahoe)
618 	u.u_pcb.pcb_savacc.faddr = (float *)NULL;
619 #endif
620 	return (0);
621 }
622