1 /* $NetBSD: init_main.c,v 1.163 2000/01/24 18:03:19 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved. 5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95 42 */ 43 44 #include "fs_nfs.h" 45 #include "opt_nfsserver.h" 46 #include "opt_sysv.h" 47 #include "opt_maxuprc.h" 48 #include "opt_multiprocessor.h" 49 50 #include "rnd.h" 51 52 #include <sys/param.h> 53 #include <sys/filedesc.h> 54 #include <sys/file.h> 55 #include <sys/errno.h> 56 #include <sys/exec.h> 57 #include <sys/callout.h> 58 #include <sys/kernel.h> 59 #include <sys/mount.h> 60 #include <sys/map.h> 61 #include <sys/proc.h> 62 #include <sys/kthread.h> 63 #include <sys/resourcevar.h> 64 #include <sys/signalvar.h> 65 #include <sys/systm.h> 66 #include <sys/vnode.h> 67 #include <sys/tty.h> 68 #include <sys/conf.h> 69 #include <sys/disklabel.h> 70 #include <sys/buf.h> 71 #include <sys/device.h> 72 #include <sys/socketvar.h> 73 #include <sys/protosw.h> 74 #include <sys/reboot.h> 75 #include <sys/user.h> 76 #ifdef SYSVSHM 77 #include <sys/shm.h> 78 #endif 79 #ifdef SYSVSEM 80 #include <sys/sem.h> 81 #endif 82 #ifdef SYSVMSG 83 #include <sys/msg.h> 84 #endif 85 #include <sys/domain.h> 86 #include <sys/mbuf.h> 87 #include <sys/namei.h> 88 #if NRND > 0 89 #include <sys/rnd.h> 90 #endif 91 92 #include <sys/syscall.h> 93 #include <sys/syscallargs.h> 94 95 #include <ufs/ufs/quota.h> 96 97 #include <miscfs/genfs/genfs.h> 98 #include <miscfs/syncfs/syncfs.h> 99 100 #include <machine/cpu.h> 101 102 #include <vm/vm.h> 103 #include <vm/vm_pageout.h> 104 105 #include <uvm/uvm.h> 106 107 #include <net/if.h> 108 #include <net/raw_cb.h> 109 110 char copyright[] = "\ 111 Copyright (c) 1996, 1997, 1998, 1999, 2000 112 The NetBSD Foundation, Inc. All rights reserved. 113 Copyright (c) 1982, 1986, 1989, 1991, 1993 114 The Regents of the University of California. All rights reserved. 115 116 "; 117 118 /* Components of the first process -- never freed. */ 119 struct session session0; 120 struct pgrp pgrp0; 121 struct proc proc0; 122 struct pcred cred0; 123 struct filedesc0 filedesc0; 124 struct cwdinfo cwdi0; 125 struct plimit limit0; 126 struct vmspace vmspace0; 127 struct sigacts sigacts0; 128 #ifndef curproc 129 struct proc *curproc = &proc0; 130 #endif 131 struct proc *initproc; 132 133 int cmask = CMASK; 134 extern struct user *proc0paddr; 135 136 struct vnode *rootvp, *swapdev_vp; 137 int boothowto; 138 int cold = 1; /* still working on startup */ 139 struct timeval boottime; 140 struct timeval runtime; 141 142 __volatile int start_init_exec; /* semaphore for start_init() */ 143 144 static void check_console __P((struct proc *p)); 145 static void start_init __P((void *)); 146 static void start_pagedaemon __P((void *)); 147 static void start_reaper __P((void *)); 148 void main __P((void)); 149 150 extern char sigcode[], esigcode[]; 151 #ifdef SYSCALL_DEBUG 152 extern char *syscallnames[]; 153 #endif 154 155 struct emul emul_netbsd = { 156 "netbsd", 157 NULL, 158 sendsig, 159 SYS_syscall, 160 SYS_MAXSYSCALL, 161 sysent, 162 #ifdef SYSCALL_DEBUG 163 syscallnames, 164 #else 165 NULL, 166 #endif 167 0, 168 copyargs, 169 setregs, 170 sigcode, 171 esigcode, 172 }; 173 174 /* 175 * System startup; initialize the world, create process 0, mount root 176 * filesystem, and fork to create init and pagedaemon. Most of the 177 * hard work is done in the lower-level initialization routines including 178 * startup(), which does memory initialization and autoconfiguration. 179 */ 180 void 181 main() 182 { 183 struct proc *p; 184 struct pdevinit *pdev; 185 int i, s, error; 186 extern struct pdevinit pdevinit[]; 187 extern void roundrobin __P((void *)); 188 extern void schedcpu __P((void *)); 189 extern void disk_init __P((void)); 190 #if defined(NFSSERVER) || defined(NFS) 191 extern void nfs_init __P((void)); 192 #endif 193 194 /* 195 * Initialize the current process pointer (curproc) before 196 * any possible traps/probes to simplify trap processing. 197 */ 198 p = &proc0; 199 curproc = p; 200 /* 201 * Attempt to find console and initialize 202 * in case of early panic or other messages. 203 */ 204 consinit(); 205 printf("%s", copyright); 206 207 uvm_init(); 208 209 /* Do machine-dependent initialization. */ 210 cpu_startup(); 211 212 /* Initialize callouts. */ 213 callout_startup(); 214 215 /* 216 * Initialize mbuf's. Do this now because we might attempt to 217 * allocate mbufs or mbuf clusters during autoconfiguration. 218 */ 219 mbinit(); 220 221 /* Initialize sockets. */ 222 soinit(); 223 224 /* 225 * The following 3 things must be done before autoconfiguration. 226 */ 227 disk_init(); /* initialize disk list */ 228 tty_init(); /* initialize tty list */ 229 #if NRND > 0 230 rnd_init(); /* initialize RNG */ 231 #endif 232 233 /* 234 * Initialize process and pgrp structures. 235 */ 236 procinit(); 237 238 /* 239 * Create process 0 (the swapper). 240 */ 241 s = proclist_lock_write(); 242 LIST_INSERT_HEAD(&allproc, p, p_list); 243 proclist_unlock_write(s); 244 245 p->p_pgrp = &pgrp0; 246 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); 247 LIST_INIT(&pgrp0.pg_members); 248 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); 249 250 pgrp0.pg_session = &session0; 251 session0.s_count = 1; 252 session0.s_sid = p->p_pid; 253 session0.s_leader = p; 254 255 /* 256 * Set P_NOCLDWAIT so that kernel threads are reparented to 257 * init(8) when they exit. init(8) can easily wait them out 258 * for us. 259 */ 260 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT; 261 p->p_stat = SRUN; 262 p->p_nice = NZERO; 263 p->p_emul = &emul_netbsd; 264 strncpy(p->p_comm, "swapper", MAXCOMLEN); 265 266 /* Create credentials. */ 267 cred0.p_refcnt = 1; 268 p->p_cred = &cred0; 269 p->p_ucred = crget(); 270 p->p_ucred->cr_ngroups = 1; /* group 0 */ 271 272 /* Create the file descriptor table. */ 273 finit(); 274 p->p_fd = &filedesc0.fd_fd; 275 fdinit1(&filedesc0); 276 277 /* Create the CWD info. */ 278 p->p_cwdi = &cwdi0; 279 cwdi0.cwdi_cmask = cmask; 280 cwdi0.cwdi_refcnt = 1; 281 282 /* Create the limits structures. */ 283 p->p_limit = &limit0; 284 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++) 285 limit0.pl_rlimit[i].rlim_cur = 286 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; 287 288 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 289 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = 290 maxfiles < NOFILE ? maxfiles : NOFILE; 291 292 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 293 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = 294 maxproc < MAXUPRC ? maxproc : MAXUPRC; 295 296 i = ptoa(uvmexp.free); 297 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i; 298 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i; 299 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3; 300 limit0.pl_corename = defcorename; 301 limit0.p_refcnt = 1; 302 303 /* 304 * Initialize proc0's vmspace, which uses the kernel pmap. 305 * All kernel processes (which never have user space mappings) 306 * share proc0's vmspace, and thus, the kernel pmap. 307 */ 308 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 309 trunc_page(VM_MAX_ADDRESS), TRUE); 310 p->p_vmspace = &vmspace0; 311 312 p->p_addr = proc0paddr; /* XXX */ 313 314 /* 315 * We continue to place resource usage info in the 316 * user struct so they're pageable. 317 */ 318 p->p_stats = &p->p_addr->u_stats; 319 320 /* 321 * Charge root for one process. 322 */ 323 (void)chgproccnt(0, 1); 324 325 rqinit(); 326 327 /* Configure virtual memory system, set vm rlimits. */ 328 uvm_init_limits(p); 329 330 /* Initialize the file systems. */ 331 #if defined(NFSSERVER) || defined(NFS) 332 nfs_init(); /* initialize server/shared data */ 333 #endif 334 vfsinit(); 335 336 /* Configure the system hardware. This will enable interrupts. */ 337 configure(); 338 339 #ifdef SYSVSHM 340 /* Initialize System V style shared memory. */ 341 shminit(); 342 #endif 343 344 #ifdef SYSVSEM 345 /* Initialize System V style semaphores. */ 346 seminit(); 347 #endif 348 349 #ifdef SYSVMSG 350 /* Initialize System V style message queues. */ 351 msginit(); 352 #endif 353 354 /* Attach pseudo-devices. */ 355 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++) 356 (*pdev->pdev_attach)(pdev->pdev_count); 357 358 /* 359 * Initialize protocols. Block reception of incoming packets 360 * until everything is ready. 361 */ 362 s = splimp(); 363 ifinit(); 364 domaininit(); 365 splx(s); 366 367 #ifdef GPROF 368 /* Initialize kernel profiling. */ 369 kmstartup(); 370 #endif 371 372 /* 373 * Initialize signal-related data structures, and signal state 374 * for proc0. 375 */ 376 signal_init(); 377 p->p_sigacts = &sigacts0; 378 siginit(p); 379 380 /* Kick off timeout driven events by calling first time. */ 381 roundrobin(NULL); 382 schedcpu(NULL); 383 384 /* 385 * Create process 1 (init(8)). We do this now, as Unix has 386 * historically had init be process 1, and changing this would 387 * probably upset a lot of people. 388 * 389 * Note that process 1 won't immediately exec init(8), but will 390 * wait for us to inform it that the root file system has been 391 * mounted. 392 */ 393 if (fork1(p, 0, SIGCHLD, NULL, 0, NULL, &initproc)) 394 panic("fork init"); 395 cpu_set_kpc(initproc, start_init, initproc); 396 397 /* 398 * Create any kernel threads who's creation was deferred because 399 * initproc had not yet been created. 400 */ 401 kthread_run_deferred_queue(); 402 403 /* 404 * Now that device driver threads have been created, wait for 405 * them to finish any deferred autoconfiguration. Note we don't 406 * need to lock this semaphore, since we haven't booted any 407 * secondary processors, yet. 408 */ 409 while (config_pending) 410 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0); 411 412 /* 413 * Now that autoconfiguration has completed, we can determine 414 * the root and dump devices. 415 */ 416 cpu_rootconf(); 417 cpu_dumpconf(); 418 419 /* Mount the root file system. */ 420 do { 421 domountroothook(); 422 if ((error = vfs_mountroot())) { 423 printf("cannot mount root, error = %d\n", error); 424 boothowto |= RB_ASKNAME; 425 setroot(root_device, 426 (rootdev != NODEV) ? DISKPART(rootdev) : 0); 427 } 428 } while (error != 0); 429 mountroothook_destroy(); 430 431 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS; 432 mountlist.cqh_first->mnt_op->vfs_refcount++; 433 434 /* 435 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to 436 * reference it. 437 */ 438 if (VFS_ROOT(mountlist.cqh_first, &rootvnode)) 439 panic("cannot find root vnode"); 440 cwdi0.cwdi_cdir = rootvnode; 441 VREF(cwdi0.cwdi_cdir); 442 VOP_UNLOCK(rootvnode, 0); 443 cwdi0.cwdi_rdir = NULL; 444 445 /* 446 * Now that root is mounted, we can fixup initproc's CWD 447 * info. All other processes are kthreads, which merely 448 * share proc0's CWD info. 449 */ 450 initproc->p_cwdi->cwdi_cdir = rootvnode; 451 VREF(initproc->p_cwdi->cwdi_cdir); 452 initproc->p_cwdi->cwdi_rdir = NULL; 453 454 /* 455 * Now can look at time, having had a chance to verify the time 456 * from the file system. Reset p->p_rtime as it may have been 457 * munched in mi_switch() after the time got set. 458 */ 459 proclist_lock_read(); 460 s = splclock(); /* so we can read time */ 461 for (p = LIST_FIRST(&allproc); p != NULL; 462 p = LIST_NEXT(p, p_list)) { 463 p->p_stats->p_start = runtime = mono_time = boottime = time; 464 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0; 465 } 466 splx(s); 467 proclist_unlock_read(); 468 469 /* Create the pageout daemon kernel thread. */ 470 uvm_swap_init(); 471 if (kthread_create1(start_pagedaemon, NULL, NULL, "pagedaemon")) 472 panic("fork pagedaemon"); 473 474 /* Create the process reaper kernel thread. */ 475 if (kthread_create1(start_reaper, NULL, NULL, "reaper")) 476 panic("fork reaper"); 477 478 /* Create the filesystem syncer kernel thread. */ 479 if (kthread_create1(sched_sync, NULL, NULL, "ioflush")) 480 panic("fork syncer"); 481 482 #if defined(MULTIPROCESSOR) 483 /* Boot the secondary processors. */ 484 cpu_boot_secondary_processors(); 485 #endif 486 487 /* 488 * Okay, now we can let init(8) exec! It's off to userland! 489 */ 490 start_init_exec = 1; 491 wakeup((void *)&start_init_exec); 492 493 /* The scheduler is an infinite loop. */ 494 uvm_scheduler(); 495 /* NOTREACHED */ 496 } 497 498 static void 499 check_console(p) 500 struct proc *p; 501 { 502 struct nameidata nd; 503 int error; 504 505 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p); 506 error = namei(&nd); 507 if (error == 0) 508 vrele(nd.ni_vp); 509 else if (error == ENOENT) 510 printf("warning: no /dev/console\n"); 511 else 512 printf("warning: lookup /dev/console: error %d\n", error); 513 } 514 515 /* 516 * List of paths to try when searching for "init". 517 */ 518 static char *initpaths[] = { 519 "/sbin/init", 520 "/sbin/oinit", 521 "/sbin/init.bak", 522 NULL, 523 }; 524 525 /* 526 * Start the initial user process; try exec'ing each pathname in "initpaths". 527 * The program is invoked with one argument containing the boot flags. 528 */ 529 static void 530 start_init(arg) 531 void *arg; 532 { 533 struct proc *p = arg; 534 vaddr_t addr; 535 struct sys_execve_args /* { 536 syscallarg(const char *) path; 537 syscallarg(char * const *) argp; 538 syscallarg(char * const *) envp; 539 } */ args; 540 int options, i, error; 541 register_t retval[2]; 542 char flags[4], *flagsp; 543 char **pathp, *path, *slash, *ucp, **uap, *arg0, *arg1 = NULL; 544 545 /* 546 * Now in process 1. 547 */ 548 strncpy(p->p_comm, "init", MAXCOMLEN); 549 550 /* 551 * Wait for main() to tell us that it's safe to exec. 552 */ 553 while (start_init_exec == 0) 554 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0); 555 556 /* 557 * This is not the right way to do this. We really should 558 * hand-craft a descriptor onto /dev/console to hand to init, 559 * but that's a _lot_ more work, and the benefit from this easy 560 * hack makes up for the "good is the enemy of the best" effect. 561 */ 562 check_console(p); 563 564 /* 565 * Need just enough stack to hold the faked-up "execve()" arguments. 566 */ 567 addr = USRSTACK - PAGE_SIZE; 568 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE, 569 NULL, UVM_UNKNOWN_OFFSET, 570 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY, 571 UVM_ADV_NORMAL, 572 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) 573 != KERN_SUCCESS) 574 panic("init: couldn't allocate argument space"); 575 p->p_vmspace->vm_maxsaddr = (caddr_t)addr; 576 577 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) { 578 ucp = (char *)(addr + PAGE_SIZE); 579 580 /* 581 * Construct the boot flag argument. 582 */ 583 flagsp = flags; 584 *flagsp++ = '-'; 585 options = 0; 586 587 if (boothowto & RB_SINGLE) { 588 *flagsp++ = 's'; 589 options = 1; 590 } 591 #ifdef notyet 592 if (boothowto & RB_FASTBOOT) { 593 *flagsp++ = 'f'; 594 options = 1; 595 } 596 #endif 597 598 /* 599 * Move out the flags (arg 1), if necessary. 600 */ 601 if (options != 0) { 602 *flagsp++ = '\0'; 603 i = flagsp - flags; 604 #ifdef DEBUG 605 printf("init: copying out flags `%s' %d\n", flags, i); 606 #endif 607 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i); 608 arg1 = ucp; 609 } 610 611 /* 612 * Move out the file name (also arg 0). 613 */ 614 i = strlen(path) + 1; 615 #ifdef DEBUG 616 printf("init: copying out path `%s' %d\n", path, i); 617 #endif 618 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i); 619 arg0 = ucp; 620 621 /* 622 * Move out the arg pointers. 623 */ 624 uap = (char **)((long)ucp & ~ALIGNBYTES); 625 (void)suword((caddr_t)--uap, 0); /* terminator */ 626 if (options != 0) 627 (void)suword((caddr_t)--uap, (long)arg1); 628 slash = strrchr(path, '/'); 629 if (slash) 630 (void)suword((caddr_t)--uap, 631 (long)arg0 + (slash + 1 - path)); 632 else 633 (void)suword((caddr_t)--uap, (long)arg0); 634 635 /* 636 * Point at the arguments. 637 */ 638 SCARG(&args, path) = arg0; 639 SCARG(&args, argp) = uap; 640 SCARG(&args, envp) = NULL; 641 642 /* 643 * Now try to exec the program. If can't for any reason 644 * other than it doesn't exist, complain. 645 */ 646 error = sys_execve(p, &args, retval); 647 if (error == 0 || error == EJUSTRETURN) 648 return; 649 if (error != ENOENT) 650 printf("exec %s: error %d\n", path, error); 651 } 652 printf("init: not found\n"); 653 panic("no init"); 654 } 655 656 /* ARGSUSED */ 657 static void 658 start_pagedaemon(arg) 659 void *arg; 660 { 661 662 uvm_pageout(); 663 /* NOTREACHED */ 664 } 665 666 /* ARGSUSED */ 667 static void 668 start_reaper(arg) 669 void *arg; 670 { 671 672 reaper(); 673 /* NOTREACHED */ 674 } 675