xref: /openbsd-src/sys/kern/kern_fork.c (revision 8b7a4931d44e3590703e221be1483e6ac33acb8f)
1 /*	$NetBSD: kern_fork.c,v 1.27 1995/12/10 08:26:02 mycroft Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)kern_fork.c	8.6 (Berkeley) 4/8/94
41  */
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/map.h>
46 #include <sys/filedesc.h>
47 #include <sys/kernel.h>
48 #include <sys/malloc.h>
49 #include <sys/proc.h>
50 #include <sys/resourcevar.h>
51 #include <sys/vnode.h>
52 #include <sys/file.h>
53 #include <sys/acct.h>
54 #include <sys/ktrace.h>
55 
56 #include <vm/vm.h>
57 
58 #include <sys/mount.h>
59 #include <sys/syscallargs.h>
60 
61 int	nprocs = 1;		/* process 0 */
62 
63 #define	ISFORK	0
64 #define	ISVFORK	1
65 #define	ISRFORK	2
66 
67 int
68 sys_fork(p, v, retval)
69 	struct proc *p;
70 	void *v;
71 	register_t *retval;
72 {
73 	return (fork1(p, ISFORK, 0, retval));
74 }
75 
76 int
77 sys_vfork(p, v, retval)
78 	struct proc *p;
79 	void *v;
80 	register_t *retval;
81 {
82 	return (fork1(p, ISVFORK, 0, retval));
83 }
84 
85 int
86 sys_rfork(p, v, retval)
87 	struct proc *p;
88 	void *v;
89 	register_t *retval;
90 {
91 	struct sys_rfork_args /* {
92 		syscallarg(int) flags;
93 	} */ *uap = v;
94 
95 	return (fork1(p, ISRFORK, SCARG(uap, flags), retval));
96 }
97 
98 int
99 fork1(p1, forktype, rforkflags, retval)
100 	register struct proc *p1;
101 	int forktype;
102 	int rforkflags;
103 	register_t *retval;
104 {
105 	register struct proc *p2;
106 	register uid_t uid;
107 	struct proc *newproc;
108 	struct proc **hash;
109 	int count;
110 	static int nextpid, pidchecked = 0;
111 	int dupfd = 1, cleanfd = 0;
112 
113 	if (forktype == ISRFORK) {
114 		dupfd = 0;
115 		if ((rforkflags & RFPROC) == 0)
116 			return (EINVAL);
117 		if ((rforkflags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
118 			return (EINVAL);
119 		if (rforkflags & RFFDG)
120 			dupfd = 1;
121 		if (rforkflags & RFNOWAIT)
122 			return (EINVAL);	/* XXX unimplimented */
123 		if (rforkflags & RFCFDG)
124 			cleanfd = 1;
125 	}
126 
127 	/*
128 	 * Although process entries are dynamically created, we still keep
129 	 * a global limit on the maximum number we will create.  Don't allow
130 	 * a nonprivileged user to use the last process; don't let root
131 	 * exceed the limit. The variable nprocs is the current number of
132 	 * processes, maxproc is the limit.
133 	 */
134 	uid = p1->p_cred->p_ruid;
135 	if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
136 		tablefull("proc");
137 		return (EAGAIN);
138 	}
139 
140 	/*
141 	 * Increment the count of procs running with this uid. Don't allow
142 	 * a nonprivileged user to exceed their current limit.
143 	 */
144 	count = chgproccnt(uid, 1);
145 	if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
146 		(void)chgproccnt(uid, -1);
147 		return (EAGAIN);
148 	}
149 
150 	/* Allocate new proc. */
151 	MALLOC(newproc, struct proc *, sizeof(struct proc), M_PROC, M_WAITOK);
152 
153 	/*
154 	 * Find an unused process ID.  We remember a range of unused IDs
155 	 * ready to use (from nextpid+1 through pidchecked-1).
156 	 */
157 	nextpid++;
158 retry:
159 	/*
160 	 * If the process ID prototype has wrapped around,
161 	 * restart somewhat above 0, as the low-numbered procs
162 	 * tend to include daemons that don't exit.
163 	 */
164 	if (nextpid >= PID_MAX) {
165 		nextpid = 100;
166 		pidchecked = 0;
167 	}
168 	if (nextpid >= pidchecked) {
169 		int doingzomb = 0;
170 
171 		pidchecked = PID_MAX;
172 		/*
173 		 * Scan the active and zombie procs to check whether this pid
174 		 * is in use.  Remember the lowest pid that's greater
175 		 * than nextpid, so we can avoid checking for a while.
176 		 */
177 		p2 = allproc.lh_first;
178 again:
179 		for (; p2 != 0; p2 = p2->p_list.le_next) {
180 			while (p2->p_pid == nextpid ||
181 			    p2->p_pgrp->pg_id == nextpid) {
182 				nextpid++;
183 				if (nextpid >= pidchecked)
184 					goto retry;
185 			}
186 			if (p2->p_pid > nextpid && pidchecked > p2->p_pid)
187 				pidchecked = p2->p_pid;
188 			if (p2->p_pgrp->pg_id > nextpid &&
189 			    pidchecked > p2->p_pgrp->pg_id)
190 				pidchecked = p2->p_pgrp->pg_id;
191 		}
192 		if (!doingzomb) {
193 			doingzomb = 1;
194 			p2 = zombproc.lh_first;
195 			goto again;
196 		}
197 	}
198 
199 	nprocs++;
200 	p2 = newproc;
201 	p2->p_stat = SIDL;			/* protect against others */
202 	p2->p_pid = nextpid;
203 	LIST_INSERT_HEAD(&allproc, p2, p_list);
204 	p2->p_forw = p2->p_back = NULL;		/* shouldn't be necessary */
205 	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
206 
207 	/*
208 	 * Make a proc table entry for the new process.
209 	 * Start by zeroing the section of proc that is zero-initialized,
210 	 * then copy the section that is copied directly from the parent.
211 	 */
212 	bzero(&p2->p_startzero,
213 	    (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
214 	bcopy(&p1->p_startcopy, &p2->p_startcopy,
215 	    (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
216 
217 	/*
218 	 * Duplicate sub-structures as needed.
219 	 * Increase reference counts on shared objects.
220 	 * The p_stats and p_sigacts substructs are set in vm_fork.
221 	 */
222 	p2->p_flag = P_INMEM;
223 	p2->p_emul = p1->p_emul;
224 	if (p1->p_flag & P_PROFIL)
225 		startprofclock(p2);
226 	MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred),
227 	    M_SUBPROC, M_WAITOK);
228 	bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred));
229 	p2->p_cred->p_refcnt = 1;
230 	crhold(p1->p_ucred);
231 
232 	/* bump references to the text vnode (for procfs) */
233 	p2->p_textvp = p1->p_textvp;
234 	if (p2->p_textvp)
235 		VREF(p2->p_textvp);
236 
237 	if (cleanfd)
238 		p2->p_fd = fdinit(p1);
239 	else if (dupfd)
240 		p2->p_fd = fdcopy(p1);
241 	else
242 		p2->p_fd = fdshare(p1);
243 
244 	/*
245 	 * If p_limit is still copy-on-write, bump refcnt,
246 	 * otherwise get a copy that won't be modified.
247 	 * (If PL_SHAREMOD is clear, the structure is shared
248 	 * copy-on-write.)
249 	 */
250 	if (p1->p_limit->p_lflags & PL_SHAREMOD)
251 		p2->p_limit = limcopy(p1->p_limit);
252 	else {
253 		p2->p_limit = p1->p_limit;
254 		p2->p_limit->p_refcnt++;
255 	}
256 
257 	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
258 		p2->p_flag |= P_CONTROLT;
259 	if (forktype == ISVFORK)
260 		p2->p_flag |= P_PPWAIT;
261 	LIST_INSERT_AFTER(p1, p2, p_pglist);
262 	p2->p_pptr = p1;
263 	if (rforkflags & RFNOWAIT) {
264 		/* XXX should we do anything? */
265 	} else {
266 		LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
267 	}
268 	LIST_INIT(&p2->p_children);
269 
270 #ifdef KTRACE
271 	/*
272 	 * Copy traceflag and tracefile if enabled.
273 	 * If not inherited, these were zeroed above.
274 	 */
275 	if (p1->p_traceflag&KTRFAC_INHERIT) {
276 		p2->p_traceflag = p1->p_traceflag;
277 		if ((p2->p_tracep = p1->p_tracep) != NULL)
278 			VREF(p2->p_tracep);
279 	}
280 #endif
281 
282 	/*
283 	 * This begins the section where we must prevent the parent
284 	 * from being swapped.
285 	 */
286 	p1->p_holdcnt++;
287 
288 	if (forktype == ISRFORK && (rforkflags & RFMEM)) {
289 		/* share as much address space as possible */
290 		(void) vm_map_inherit(&p1->p_vmspace->vm_map,
291 		    VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS - MAXSSIZ,
292 		    VM_INHERIT_SHARE);
293 	}
294 
295 #ifdef __FORK_BRAINDAMAGE
296 	/*
297 	 * Set return values for child before vm_fork,
298 	 * so they can be copied to child stack.
299 	 * We return 0, rather than the traditional behaviour of modifying the
300 	 * return value in the system call stub.
301 	 * NOTE: the kernel stack may be at a different location in the child
302 	 * process, and thus addresses of automatic variables (including retval)
303 	 * may be invalid after vm_fork returns in the child process.
304 	 */
305 	retval[0] = 0;
306 	retval[1] = 1;
307 	if (vm_fork(p1, p2))
308 		return (0);
309 #else
310 	/*
311 	 * Finish creating the child process.  It will return through a
312 	 * different path later.
313 	 */
314 	vm_fork(p1, p2);
315 #endif
316 
317 	switch (forktype) {
318 		case ISFORK:
319 			forkstat.cntfork++;
320 			break;
321 		case ISVFORK:
322 			forkstat.cntvfork++;
323 			break;
324 		case ISRFORK:
325 			forkstat.cntrfork++;
326 			break;
327 	}
328 
329 	/*
330 	 * Make child runnable, set start time, and add to run queue.
331 	 */
332 	(void) splstatclock();
333 	p2->p_stats->p_start = time;
334 	p2->p_acflag = AFORK;
335 	p2->p_stat = SRUN;
336 	setrunqueue(p2);
337 	(void) spl0();
338 
339 	/*
340 	 * Now can be swapped.
341 	 */
342 	p1->p_holdcnt--;
343 
344 	/*
345 	 * Preserve synchronization semantics of vfork.  If waiting for
346 	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
347 	 * proc (in case of exit).
348 	 */
349 	if (forktype == ISVFORK)
350 		while (p2->p_flag & P_PPWAIT)
351 			tsleep(p1, PWAIT, "ppwait", 0);
352 
353 	/*
354 	 * Return child pid to parent process,
355 	 * marking us as parent via retval[1].
356 	 */
357 	retval[0] = p2->p_pid;
358 	retval[1] = 0;
359 	return (0);
360 }
361