xref: /netbsd-src/sys/kern/init_main.c (revision e4d7c2e329d54c97e0c0bd3016bbe74f550c3d5e)
1 /*	$NetBSD: init_main.c,v 1.163 2000/01/24 18:03:19 thorpej Exp $	*/
2 
3 /*
4  * Copyright (c) 1995 Christopher G. Demetriou.  All rights reserved.
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
42  */
43 
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49 
50 #include "rnd.h"
51 
52 #include <sys/param.h>
53 #include <sys/filedesc.h>
54 #include <sys/file.h>
55 #include <sys/errno.h>
56 #include <sys/exec.h>
57 #include <sys/callout.h>
58 #include <sys/kernel.h>
59 #include <sys/mount.h>
60 #include <sys/map.h>
61 #include <sys/proc.h>
62 #include <sys/kthread.h>
63 #include <sys/resourcevar.h>
64 #include <sys/signalvar.h>
65 #include <sys/systm.h>
66 #include <sys/vnode.h>
67 #include <sys/tty.h>
68 #include <sys/conf.h>
69 #include <sys/disklabel.h>
70 #include <sys/buf.h>
71 #include <sys/device.h>
72 #include <sys/socketvar.h>
73 #include <sys/protosw.h>
74 #include <sys/reboot.h>
75 #include <sys/user.h>
76 #ifdef SYSVSHM
77 #include <sys/shm.h>
78 #endif
79 #ifdef SYSVSEM
80 #include <sys/sem.h>
81 #endif
82 #ifdef SYSVMSG
83 #include <sys/msg.h>
84 #endif
85 #include <sys/domain.h>
86 #include <sys/mbuf.h>
87 #include <sys/namei.h>
88 #if NRND > 0
89 #include <sys/rnd.h>
90 #endif
91 
92 #include <sys/syscall.h>
93 #include <sys/syscallargs.h>
94 
95 #include <ufs/ufs/quota.h>
96 
97 #include <miscfs/genfs/genfs.h>
98 #include <miscfs/syncfs/syncfs.h>
99 
100 #include <machine/cpu.h>
101 
102 #include <vm/vm.h>
103 #include <vm/vm_pageout.h>
104 
105 #include <uvm/uvm.h>
106 
107 #include <net/if.h>
108 #include <net/raw_cb.h>
109 
110 char	copyright[] = "\
111 Copyright (c) 1996, 1997, 1998, 1999, 2000
112     The NetBSD Foundation, Inc.  All rights reserved.
113 Copyright (c) 1982, 1986, 1989, 1991, 1993
114     The Regents of the University of California.  All rights reserved.
115 
116 ";
117 
118 /* Components of the first process -- never freed. */
119 struct	session session0;
120 struct	pgrp pgrp0;
121 struct	proc proc0;
122 struct	pcred cred0;
123 struct	filedesc0 filedesc0;
124 struct	cwdinfo cwdi0;
125 struct	plimit limit0;
126 struct	vmspace vmspace0;
127 struct	sigacts sigacts0;
128 #ifndef curproc
129 struct	proc *curproc = &proc0;
130 #endif
131 struct	proc *initproc;
132 
133 int	cmask = CMASK;
134 extern	struct user *proc0paddr;
135 
136 struct	vnode *rootvp, *swapdev_vp;
137 int	boothowto;
138 int	cold = 1;			/* still working on startup */
139 struct	timeval boottime;
140 struct	timeval runtime;
141 
142 __volatile int start_init_exec;		/* semaphore for start_init() */
143 
144 static void check_console __P((struct proc *p));
145 static void start_init __P((void *));
146 static void start_pagedaemon __P((void *));
147 static void start_reaper __P((void *));
148 void main __P((void));
149 
150 extern char sigcode[], esigcode[];
151 #ifdef SYSCALL_DEBUG
152 extern char *syscallnames[];
153 #endif
154 
155 struct emul emul_netbsd = {
156 	"netbsd",
157 	NULL,
158 	sendsig,
159 	SYS_syscall,
160 	SYS_MAXSYSCALL,
161 	sysent,
162 #ifdef SYSCALL_DEBUG
163 	syscallnames,
164 #else
165 	NULL,
166 #endif
167 	0,
168 	copyargs,
169 	setregs,
170 	sigcode,
171 	esigcode,
172 };
173 
174 /*
175  * System startup; initialize the world, create process 0, mount root
176  * filesystem, and fork to create init and pagedaemon.  Most of the
177  * hard work is done in the lower-level initialization routines including
178  * startup(), which does memory initialization and autoconfiguration.
179  */
180 void
181 main()
182 {
183 	struct proc *p;
184 	struct pdevinit *pdev;
185 	int i, s, error;
186 	extern struct pdevinit pdevinit[];
187 	extern void roundrobin __P((void *));
188 	extern void schedcpu __P((void *));
189 	extern void disk_init __P((void));
190 #if defined(NFSSERVER) || defined(NFS)
191 	extern void nfs_init __P((void));
192 #endif
193 
194 	/*
195 	 * Initialize the current process pointer (curproc) before
196 	 * any possible traps/probes to simplify trap processing.
197 	 */
198 	p = &proc0;
199 	curproc = p;
200 	/*
201 	 * Attempt to find console and initialize
202 	 * in case of early panic or other messages.
203 	 */
204 	consinit();
205 	printf("%s", copyright);
206 
207 	uvm_init();
208 
209 	/* Do machine-dependent initialization. */
210 	cpu_startup();
211 
212 	/* Initialize callouts. */
213 	callout_startup();
214 
215 	/*
216 	 * Initialize mbuf's.  Do this now because we might attempt to
217 	 * allocate mbufs or mbuf clusters during autoconfiguration.
218 	 */
219 	mbinit();
220 
221 	/* Initialize sockets. */
222 	soinit();
223 
224 	/*
225 	 * The following 3 things must be done before autoconfiguration.
226 	 */
227 	disk_init();		/* initialize disk list */
228 	tty_init();		/* initialize tty list */
229 #if NRND > 0
230 	rnd_init();		/* initialize RNG */
231 #endif
232 
233 	/*
234 	 * Initialize process and pgrp structures.
235 	 */
236 	procinit();
237 
238 	/*
239 	 * Create process 0 (the swapper).
240 	 */
241 	s = proclist_lock_write();
242 	LIST_INSERT_HEAD(&allproc, p, p_list);
243 	proclist_unlock_write(s);
244 
245 	p->p_pgrp = &pgrp0;
246 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
247 	LIST_INIT(&pgrp0.pg_members);
248 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
249 
250 	pgrp0.pg_session = &session0;
251 	session0.s_count = 1;
252 	session0.s_sid = p->p_pid;
253 	session0.s_leader = p;
254 
255 	/*
256 	 * Set P_NOCLDWAIT so that kernel threads are reparented to
257 	 * init(8) when they exit.  init(8) can easily wait them out
258 	 * for us.
259 	 */
260 	p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
261 	p->p_stat = SRUN;
262 	p->p_nice = NZERO;
263 	p->p_emul = &emul_netbsd;
264 	strncpy(p->p_comm, "swapper", MAXCOMLEN);
265 
266 	/* Create credentials. */
267 	cred0.p_refcnt = 1;
268 	p->p_cred = &cred0;
269 	p->p_ucred = crget();
270 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
271 
272 	/* Create the file descriptor table. */
273 	finit();
274 	p->p_fd = &filedesc0.fd_fd;
275 	fdinit1(&filedesc0);
276 
277 	/* Create the CWD info. */
278 	p->p_cwdi = &cwdi0;
279 	cwdi0.cwdi_cmask = cmask;
280 	cwdi0.cwdi_refcnt = 1;
281 
282 	/* Create the limits structures. */
283 	p->p_limit = &limit0;
284 	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
285 		limit0.pl_rlimit[i].rlim_cur =
286 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
287 
288 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
289 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
290 	    maxfiles < NOFILE ? maxfiles : NOFILE;
291 
292 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
293 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
294 	    maxproc < MAXUPRC ? maxproc : MAXUPRC;
295 
296 	i = ptoa(uvmexp.free);
297 	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
298 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
299 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
300 	limit0.pl_corename = defcorename;
301 	limit0.p_refcnt = 1;
302 
303 	/*
304 	 * Initialize proc0's vmspace, which uses the kernel pmap.
305 	 * All kernel processes (which never have user space mappings)
306 	 * share proc0's vmspace, and thus, the kernel pmap.
307 	 */
308 	uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
309 	    trunc_page(VM_MAX_ADDRESS), TRUE);
310 	p->p_vmspace = &vmspace0;
311 
312 	p->p_addr = proc0paddr;				/* XXX */
313 
314 	/*
315 	 * We continue to place resource usage info in the
316 	 * user struct so they're pageable.
317 	 */
318 	p->p_stats = &p->p_addr->u_stats;
319 
320 	/*
321 	 * Charge root for one process.
322 	 */
323 	(void)chgproccnt(0, 1);
324 
325 	rqinit();
326 
327 	/* Configure virtual memory system, set vm rlimits. */
328 	uvm_init_limits(p);
329 
330 	/* Initialize the file systems. */
331 #if defined(NFSSERVER) || defined(NFS)
332 	nfs_init();			/* initialize server/shared data */
333 #endif
334 	vfsinit();
335 
336 	/* Configure the system hardware.  This will enable interrupts. */
337 	configure();
338 
339 #ifdef SYSVSHM
340 	/* Initialize System V style shared memory. */
341 	shminit();
342 #endif
343 
344 #ifdef SYSVSEM
345 	/* Initialize System V style semaphores. */
346 	seminit();
347 #endif
348 
349 #ifdef SYSVMSG
350 	/* Initialize System V style message queues. */
351 	msginit();
352 #endif
353 
354 	/* Attach pseudo-devices. */
355 	for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
356 		(*pdev->pdev_attach)(pdev->pdev_count);
357 
358 	/*
359 	 * Initialize protocols.  Block reception of incoming packets
360 	 * until everything is ready.
361 	 */
362 	s = splimp();
363 	ifinit();
364 	domaininit();
365 	splx(s);
366 
367 #ifdef GPROF
368 	/* Initialize kernel profiling. */
369 	kmstartup();
370 #endif
371 
372 	/*
373 	 * Initialize signal-related data structures, and signal state
374 	 * for proc0.
375 	 */
376 	signal_init();
377 	p->p_sigacts = &sigacts0;
378 	siginit(p);
379 
380 	/* Kick off timeout driven events by calling first time. */
381 	roundrobin(NULL);
382 	schedcpu(NULL);
383 
384 	/*
385 	 * Create process 1 (init(8)).  We do this now, as Unix has
386 	 * historically had init be process 1, and changing this would
387 	 * probably upset a lot of people.
388 	 *
389 	 * Note that process 1 won't immediately exec init(8), but will
390 	 * wait for us to inform it that the root file system has been
391 	 * mounted.
392 	 */
393 	if (fork1(p, 0, SIGCHLD, NULL, 0, NULL, &initproc))
394 		panic("fork init");
395 	cpu_set_kpc(initproc, start_init, initproc);
396 
397 	/*
398 	 * Create any kernel threads who's creation was deferred because
399 	 * initproc had not yet been created.
400 	 */
401 	kthread_run_deferred_queue();
402 
403 	/*
404 	 * Now that device driver threads have been created, wait for
405 	 * them to finish any deferred autoconfiguration.  Note we don't
406 	 * need to lock this semaphore, since we haven't booted any
407 	 * secondary processors, yet.
408 	 */
409 	while (config_pending)
410 		(void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
411 
412 	/*
413 	 * Now that autoconfiguration has completed, we can determine
414 	 * the root and dump devices.
415 	 */
416 	cpu_rootconf();
417 	cpu_dumpconf();
418 
419 	/* Mount the root file system. */
420 	do {
421 		domountroothook();
422 		if ((error = vfs_mountroot())) {
423 			printf("cannot mount root, error = %d\n", error);
424 			boothowto |= RB_ASKNAME;
425 			setroot(root_device,
426 			    (rootdev != NODEV) ? DISKPART(rootdev) : 0);
427 		}
428 	} while (error != 0);
429 	mountroothook_destroy();
430 
431 	mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
432 	mountlist.cqh_first->mnt_op->vfs_refcount++;
433 
434 	/*
435 	 * Get the vnode for '/'.  Set filedesc0.fd_fd.fd_cdir to
436 	 * reference it.
437 	 */
438 	if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
439 		panic("cannot find root vnode");
440 	cwdi0.cwdi_cdir = rootvnode;
441 	VREF(cwdi0.cwdi_cdir);
442 	VOP_UNLOCK(rootvnode, 0);
443 	cwdi0.cwdi_rdir = NULL;
444 
445 	/*
446 	 * Now that root is mounted, we can fixup initproc's CWD
447 	 * info.  All other processes are kthreads, which merely
448 	 * share proc0's CWD info.
449 	 */
450 	initproc->p_cwdi->cwdi_cdir = rootvnode;
451 	VREF(initproc->p_cwdi->cwdi_cdir);
452 	initproc->p_cwdi->cwdi_rdir = NULL;
453 
454 	/*
455 	 * Now can look at time, having had a chance to verify the time
456 	 * from the file system.  Reset p->p_rtime as it may have been
457 	 * munched in mi_switch() after the time got set.
458 	 */
459 	proclist_lock_read();
460 	s = splclock();		/* so we can read time */
461 	for (p = LIST_FIRST(&allproc); p != NULL;
462 	     p = LIST_NEXT(p, p_list)) {
463 		p->p_stats->p_start = runtime = mono_time = boottime = time;
464 		p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
465 	}
466 	splx(s);
467 	proclist_unlock_read();
468 
469 	/* Create the pageout daemon kernel thread. */
470 	uvm_swap_init();
471 	if (kthread_create1(start_pagedaemon, NULL, NULL, "pagedaemon"))
472 		panic("fork pagedaemon");
473 
474 	/* Create the process reaper kernel thread. */
475 	if (kthread_create1(start_reaper, NULL, NULL, "reaper"))
476 		panic("fork reaper");
477 
478 	/* Create the filesystem syncer kernel thread. */
479 	if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
480 		panic("fork syncer");
481 
482 #if defined(MULTIPROCESSOR)
483 	/* Boot the secondary processors. */
484 	cpu_boot_secondary_processors();
485 #endif
486 
487 	/*
488 	 * Okay, now we can let init(8) exec!  It's off to userland!
489 	 */
490 	start_init_exec = 1;
491 	wakeup((void *)&start_init_exec);
492 
493 	/* The scheduler is an infinite loop. */
494 	uvm_scheduler();
495 	/* NOTREACHED */
496 }
497 
498 static void
499 check_console(p)
500 	struct proc *p;
501 {
502 	struct nameidata nd;
503 	int error;
504 
505 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
506 	error = namei(&nd);
507 	if (error == 0)
508 		vrele(nd.ni_vp);
509 	else if (error == ENOENT)
510 		printf("warning: no /dev/console\n");
511 	else
512 		printf("warning: lookup /dev/console: error %d\n", error);
513 }
514 
515 /*
516  * List of paths to try when searching for "init".
517  */
518 static char *initpaths[] = {
519 	"/sbin/init",
520 	"/sbin/oinit",
521 	"/sbin/init.bak",
522 	NULL,
523 };
524 
525 /*
526  * Start the initial user process; try exec'ing each pathname in "initpaths".
527  * The program is invoked with one argument containing the boot flags.
528  */
529 static void
530 start_init(arg)
531 	void *arg;
532 {
533 	struct proc *p = arg;
534 	vaddr_t addr;
535 	struct sys_execve_args /* {
536 		syscallarg(const char *) path;
537 		syscallarg(char * const *) argp;
538 		syscallarg(char * const *) envp;
539 	} */ args;
540 	int options, i, error;
541 	register_t retval[2];
542 	char flags[4], *flagsp;
543 	char **pathp, *path, *slash, *ucp, **uap, *arg0, *arg1 = NULL;
544 
545 	/*
546 	 * Now in process 1.
547 	 */
548 	strncpy(p->p_comm, "init", MAXCOMLEN);
549 
550 	/*
551 	 * Wait for main() to tell us that it's safe to exec.
552 	 */
553 	while (start_init_exec == 0)
554 		(void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
555 
556 	/*
557 	 * This is not the right way to do this.  We really should
558 	 * hand-craft a descriptor onto /dev/console to hand to init,
559 	 * but that's a _lot_ more work, and the benefit from this easy
560 	 * hack makes up for the "good is the enemy of the best" effect.
561 	 */
562 	check_console(p);
563 
564 	/*
565 	 * Need just enough stack to hold the faked-up "execve()" arguments.
566 	 */
567 	addr = USRSTACK - PAGE_SIZE;
568 	if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
569                     NULL, UVM_UNKNOWN_OFFSET,
570                     UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
571 		    UVM_ADV_NORMAL,
572                     UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
573 		!= KERN_SUCCESS)
574 		panic("init: couldn't allocate argument space");
575 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
576 
577 	for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
578 		ucp = (char *)(addr + PAGE_SIZE);
579 
580 		/*
581 		 * Construct the boot flag argument.
582 		 */
583 		flagsp = flags;
584 		*flagsp++ = '-';
585 		options = 0;
586 
587 		if (boothowto & RB_SINGLE) {
588 			*flagsp++ = 's';
589 			options = 1;
590 		}
591 #ifdef notyet
592 		if (boothowto & RB_FASTBOOT) {
593 			*flagsp++ = 'f';
594 			options = 1;
595 		}
596 #endif
597 
598 		/*
599 		 * Move out the flags (arg 1), if necessary.
600 		 */
601 		if (options != 0) {
602 			*flagsp++ = '\0';
603 			i = flagsp - flags;
604 #ifdef DEBUG
605 			printf("init: copying out flags `%s' %d\n", flags, i);
606 #endif
607 			(void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
608 			arg1 = ucp;
609 		}
610 
611 		/*
612 		 * Move out the file name (also arg 0).
613 		 */
614 		i = strlen(path) + 1;
615 #ifdef DEBUG
616 		printf("init: copying out path `%s' %d\n", path, i);
617 #endif
618 		(void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
619 		arg0 = ucp;
620 
621 		/*
622 		 * Move out the arg pointers.
623 		 */
624 		uap = (char **)((long)ucp & ~ALIGNBYTES);
625 		(void)suword((caddr_t)--uap, 0);	/* terminator */
626 		if (options != 0)
627 			(void)suword((caddr_t)--uap, (long)arg1);
628 		slash = strrchr(path, '/');
629 		if (slash)
630 			(void)suword((caddr_t)--uap,
631 			    (long)arg0 + (slash + 1 - path));
632 		else
633 			(void)suword((caddr_t)--uap, (long)arg0);
634 
635 		/*
636 		 * Point at the arguments.
637 		 */
638 		SCARG(&args, path) = arg0;
639 		SCARG(&args, argp) = uap;
640 		SCARG(&args, envp) = NULL;
641 
642 		/*
643 		 * Now try to exec the program.  If can't for any reason
644 		 * other than it doesn't exist, complain.
645 		 */
646 		error = sys_execve(p, &args, retval);
647 		if (error == 0 || error == EJUSTRETURN)
648 			return;
649 		if (error != ENOENT)
650 			printf("exec %s: error %d\n", path, error);
651 	}
652 	printf("init: not found\n");
653 	panic("no init");
654 }
655 
656 /* ARGSUSED */
657 static void
658 start_pagedaemon(arg)
659 	void *arg;
660 {
661 
662 	uvm_pageout();
663 	/* NOTREACHED */
664 }
665 
666 /* ARGSUSED */
667 static void
668 start_reaper(arg)
669 	void *arg;
670 {
671 
672 	reaper();
673 	/* NOTREACHED */
674 }
675