1 /* $NetBSD: init_main.c,v 1.189 2001/03/15 06:10:55 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved. 5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95 42 */ 43 44 #include "fs_nfs.h" 45 #include "opt_nfsserver.h" 46 #include "opt_sysv.h" 47 #include "opt_maxuprc.h" 48 #include "opt_multiprocessor.h" 49 #include "opt_syscall_debug.h" 50 51 #include "rnd.h" 52 53 #include <sys/param.h> 54 #include <sys/acct.h> 55 #include <sys/filedesc.h> 56 #include <sys/file.h> 57 #include <sys/errno.h> 58 #include <sys/callout.h> 59 #include <sys/kernel.h> 60 #include <sys/mount.h> 61 #include <sys/map.h> 62 #include <sys/proc.h> 63 #include <sys/kthread.h> 64 #include <sys/resourcevar.h> 65 #include <sys/signalvar.h> 66 #include <sys/systm.h> 67 #include <sys/vnode.h> 68 #include <sys/tty.h> 69 #include <sys/conf.h> 70 #include <sys/disklabel.h> 71 #include <sys/buf.h> 72 #include <sys/device.h> 73 #include <sys/exec.h> 74 #include <sys/socketvar.h> 75 #include <sys/protosw.h> 76 #include <sys/reboot.h> 77 #include <sys/user.h> 78 #include <sys/sysctl.h> 79 #ifdef SYSVSHM 80 #include <sys/shm.h> 81 #endif 82 #ifdef SYSVSEM 83 #include <sys/sem.h> 84 #endif 85 #ifdef SYSVMSG 86 #include <sys/msg.h> 87 #endif 88 #include <sys/domain.h> 89 #include <sys/mbuf.h> 90 #include <sys/namei.h> 91 #if NRND > 0 92 #include <sys/rnd.h> 93 #endif 94 95 #include <sys/syscall.h> 96 #include <sys/syscallargs.h> 97 98 #include <ufs/ufs/quota.h> 99 100 #include <miscfs/genfs/genfs.h> 101 #include <miscfs/syncfs/syncfs.h> 102 103 #include <machine/cpu.h> 104 105 #include <uvm/uvm.h> 106 107 #include <net/if.h> 108 #include <net/raw_cb.h> 109 110 const char copyright[] = "\ 111 Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001 112 The NetBSD Foundation, Inc. All rights reserved. 113 Copyright (c) 1982, 1986, 1989, 1991, 1993 114 The Regents of the University of California. All rights reserved. 115 116 "; 117 118 /* Components of the first process -- never freed. */ 119 struct session session0; 120 struct pgrp pgrp0; 121 struct proc proc0; 122 struct pcred cred0; 123 struct filedesc0 filedesc0; 124 struct cwdinfo cwdi0; 125 struct plimit limit0; 126 struct vmspace vmspace0; 127 struct sigacts sigacts0; 128 #ifndef curproc 129 struct proc *curproc = &proc0; 130 #endif 131 struct proc *initproc; 132 133 int cmask = CMASK; 134 extern struct user *proc0paddr; 135 136 struct vnode *rootvp, *swapdev_vp; 137 int boothowto; 138 int cold = 1; /* still working on startup */ 139 struct timeval boottime; 140 141 __volatile int start_init_exec; /* semaphore for start_init() */ 142 143 static void check_console(struct proc *p); 144 static void start_init(void *); 145 void main(void); 146 147 extern const struct emul emul_netbsd; /* defined in kern_exec.c */ 148 149 /* 150 * System startup; initialize the world, create process 0, mount root 151 * filesystem, and fork to create init and pagedaemon. Most of the 152 * hard work is done in the lower-level initialization routines including 153 * startup(), which does memory initialization and autoconfiguration. 154 */ 155 void 156 main(void) 157 { 158 struct proc *p; 159 struct pdevinit *pdev; 160 int i, s, error; 161 rlim_t lim; 162 extern struct pdevinit pdevinit[]; 163 extern void schedcpu(void *); 164 extern void disk_init(void); 165 #if defined(NFSSERVER) || defined(NFS) 166 extern void nfs_init(void); 167 #endif 168 #ifdef NVNODE_IMPLICIT 169 int usevnodes; 170 #endif 171 172 /* 173 * Initialize the current process pointer (curproc) before 174 * any possible traps/probes to simplify trap processing. 175 */ 176 p = &proc0; 177 curproc = p; 178 p->p_cpu = curcpu(); 179 /* 180 * Attempt to find console and initialize 181 * in case of early panic or other messages. 182 */ 183 consinit(); 184 printf("%s", copyright); 185 186 KERNEL_LOCK_INIT(); 187 188 uvm_init(); 189 190 /* Do machine-dependent initialization. */ 191 cpu_startup(); 192 193 /* Initialize callouts. */ 194 callout_startup(); 195 196 /* 197 * Initialize mbuf's. Do this now because we might attempt to 198 * allocate mbufs or mbuf clusters during autoconfiguration. 199 */ 200 mbinit(); 201 202 /* Initialize sockets. */ 203 soinit(); 204 205 /* 206 * The following 3 things must be done before autoconfiguration. 207 */ 208 disk_init(); /* initialize disk list */ 209 tty_init(); /* initialize tty list */ 210 #if NRND > 0 211 rnd_init(); /* initialize RNG */ 212 #endif 213 214 /* Initialize the sysctl subsystem. */ 215 sysctl_init(); 216 217 /* 218 * Initialize process and pgrp structures. 219 */ 220 procinit(); 221 222 /* 223 * Create process 0 (the swapper). 224 */ 225 s = proclist_lock_write(); 226 LIST_INSERT_HEAD(&allproc, p, p_list); 227 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash); 228 proclist_unlock_write(s); 229 230 p->p_pgrp = &pgrp0; 231 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); 232 LIST_INIT(&pgrp0.pg_members); 233 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); 234 235 pgrp0.pg_session = &session0; 236 session0.s_count = 1; 237 session0.s_sid = p->p_pid; 238 session0.s_leader = p; 239 240 /* 241 * Set P_NOCLDWAIT so that kernel threads are reparented to 242 * init(8) when they exit. init(8) can easily wait them out 243 * for us. 244 */ 245 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT; 246 p->p_stat = SONPROC; 247 p->p_nice = NZERO; 248 p->p_emul = &emul_netbsd; 249 #ifdef __HAVE_SYSCALL_INTERN 250 (*p->p_emul->e_syscall_intern)(p); 251 #endif 252 strncpy(p->p_comm, "swapper", MAXCOMLEN); 253 254 callout_init(&p->p_realit_ch); 255 callout_init(&p->p_tsleep_ch); 256 257 /* Create credentials. */ 258 cred0.p_refcnt = 1; 259 p->p_cred = &cred0; 260 p->p_ucred = crget(); 261 p->p_ucred->cr_ngroups = 1; /* group 0 */ 262 263 /* Create the file descriptor table. */ 264 finit(); 265 p->p_fd = &filedesc0.fd_fd; 266 fdinit1(&filedesc0); 267 268 /* Create the CWD info. */ 269 p->p_cwdi = &cwdi0; 270 cwdi0.cwdi_cmask = cmask; 271 cwdi0.cwdi_refcnt = 1; 272 273 /* Create the limits structures. */ 274 p->p_limit = &limit0; 275 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++) 276 limit0.pl_rlimit[i].rlim_cur = 277 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; 278 279 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 280 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = 281 maxfiles < NOFILE ? maxfiles : NOFILE; 282 283 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 284 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = 285 maxproc < MAXUPRC ? maxproc : MAXUPRC; 286 287 lim = ptoa(uvmexp.free); 288 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim; 289 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim; 290 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 291 limit0.pl_corename = defcorename; 292 limit0.p_refcnt = 1; 293 294 /* 295 * Initialize proc0's vmspace, which uses the kernel pmap. 296 * All kernel processes (which never have user space mappings) 297 * share proc0's vmspace, and thus, the kernel pmap. 298 */ 299 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 300 trunc_page(VM_MAX_ADDRESS), TRUE); 301 p->p_vmspace = &vmspace0; 302 303 p->p_addr = proc0paddr; /* XXX */ 304 305 /* 306 * We continue to place resource usage info in the 307 * user struct so they're pageable. 308 */ 309 p->p_stats = &p->p_addr->u_stats; 310 311 /* 312 * Charge root for one process. 313 */ 314 (void)chgproccnt(0, 1); 315 316 rqinit(); 317 318 /* Configure virtual memory system, set vm rlimits. */ 319 uvm_init_limits(p); 320 321 /* Initialize the file systems. */ 322 #if defined(NFSSERVER) || defined(NFS) 323 nfs_init(); /* initialize server/shared data */ 324 #endif 325 vfsinit(); 326 327 /* Configure the system hardware. This will enable interrupts. */ 328 configure(); 329 330 ubc_init(); /* must be after autoconfig */ 331 332 /* Lock the kernel on behalf of proc0. */ 333 KERNEL_PROC_LOCK(p); 334 335 #ifdef SYSVSHM 336 /* Initialize System V style shared memory. */ 337 shminit(); 338 #endif 339 340 #ifdef SYSVSEM 341 /* Initialize System V style semaphores. */ 342 seminit(); 343 #endif 344 345 #ifdef SYSVMSG 346 /* Initialize System V style message queues. */ 347 msginit(); 348 #endif 349 350 /* Attach pseudo-devices. */ 351 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++) 352 (*pdev->pdev_attach)(pdev->pdev_count); 353 354 /* 355 * Initialize protocols. Block reception of incoming packets 356 * until everything is ready. 357 */ 358 s = splimp(); 359 ifinit(); 360 domaininit(); 361 splx(s); 362 363 #ifdef GPROF 364 /* Initialize kernel profiling. */ 365 kmstartup(); 366 #endif 367 368 /* Initialize system accouting. */ 369 acct_init(); 370 371 /* 372 * Initialize signal-related data structures, and signal state 373 * for proc0. 374 */ 375 signal_init(); 376 p->p_sigacts = &sigacts0; 377 siginit(p); 378 379 /* Kick off timeout driven events by calling first time. */ 380 schedcpu(NULL); 381 382 /* 383 * Create process 1 (init(8)). We do this now, as Unix has 384 * historically had init be process 1, and changing this would 385 * probably upset a lot of people. 386 * 387 * Note that process 1 won't immediately exec init(8), but will 388 * wait for us to inform it that the root file system has been 389 * mounted. 390 */ 391 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc)) 392 panic("fork init"); 393 394 /* 395 * Create any kernel threads who's creation was deferred because 396 * initproc had not yet been created. 397 */ 398 kthread_run_deferred_queue(); 399 400 /* 401 * Now that device driver threads have been created, wait for 402 * them to finish any deferred autoconfiguration. Note we don't 403 * need to lock this semaphore, since we haven't booted any 404 * secondary processors, yet. 405 */ 406 while (config_pending) 407 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0); 408 409 /* 410 * Now that autoconfiguration has completed, we can determine 411 * the root and dump devices. 412 */ 413 cpu_rootconf(); 414 cpu_dumpconf(); 415 416 /* Mount the root file system. */ 417 do { 418 domountroothook(); 419 if ((error = vfs_mountroot())) { 420 printf("cannot mount root, error = %d\n", error); 421 boothowto |= RB_ASKNAME; 422 setroot(root_device, 423 (rootdev != NODEV) ? DISKPART(rootdev) : 0); 424 } 425 } while (error != 0); 426 mountroothook_destroy(); 427 428 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS; 429 mountlist.cqh_first->mnt_op->vfs_refcount++; 430 431 /* 432 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to 433 * reference it. 434 */ 435 if (VFS_ROOT(mountlist.cqh_first, &rootvnode)) 436 panic("cannot find root vnode"); 437 cwdi0.cwdi_cdir = rootvnode; 438 VREF(cwdi0.cwdi_cdir); 439 VOP_UNLOCK(rootvnode, 0); 440 cwdi0.cwdi_rdir = NULL; 441 442 /* 443 * Now that root is mounted, we can fixup initproc's CWD 444 * info. All other processes are kthreads, which merely 445 * share proc0's CWD info. 446 */ 447 initproc->p_cwdi->cwdi_cdir = rootvnode; 448 VREF(initproc->p_cwdi->cwdi_cdir); 449 initproc->p_cwdi->cwdi_rdir = NULL; 450 451 /* 452 * Now can look at time, having had a chance to verify the time 453 * from the file system. Reset p->p_rtime as it may have been 454 * munched in mi_switch() after the time got set. 455 */ 456 proclist_lock_read(); 457 s = splsched(); 458 for (p = LIST_FIRST(&allproc); p != NULL; 459 p = LIST_NEXT(p, p_list)) { 460 p->p_stats->p_start = mono_time = boottime = time; 461 if (p->p_cpu != NULL) 462 p->p_cpu->ci_schedstate.spc_runtime = time; 463 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0; 464 } 465 splx(s); 466 proclist_unlock_read(); 467 468 /* Create the pageout daemon kernel thread. */ 469 uvm_swap_init(); 470 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon")) 471 panic("fork pagedaemon"); 472 473 /* Create the process reaper kernel thread. */ 474 if (kthread_create1(reaper, NULL, NULL, "reaper")) 475 panic("fork reaper"); 476 477 /* Create the filesystem syncer kernel thread. */ 478 if (kthread_create1(sched_sync, NULL, NULL, "ioflush")) 479 panic("fork syncer"); 480 481 /* Create the aiodone daemon kernel thread. */ 482 if (kthread_create1(uvm_aiodone_daemon, NULL, NULL, "aiodoned")) 483 panic("fork aiodoned"); 484 485 #if defined(MULTIPROCESSOR) 486 /* Boot the secondary processors. */ 487 cpu_boot_secondary_processors(); 488 #endif 489 490 /* Initialize exec structures */ 491 exec_init(1); 492 493 /* 494 * Okay, now we can let init(8) exec! It's off to userland! 495 */ 496 start_init_exec = 1; 497 wakeup((void *)&start_init_exec); 498 499 #ifdef NVNODE_IMPLICIT 500 /* 501 * If maximum number of vnodes in namei vnode cache is not explicitly 502 * defined in kernel config, adjust the number such as we use roughly 503 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes). 504 */ 505 usevnodes = (ptoa(physmem) / 200) / sizeof(struct vnode); 506 if (usevnodes > desiredvnodes) 507 desiredvnodes = usevnodes; 508 #endif 509 510 /* The scheduler is an infinite loop. */ 511 uvm_scheduler(); 512 /* NOTREACHED */ 513 } 514 515 static void 516 check_console(struct proc *p) 517 { 518 struct nameidata nd; 519 int error; 520 521 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p); 522 error = namei(&nd); 523 if (error == 0) 524 vrele(nd.ni_vp); 525 else if (error == ENOENT) 526 printf("warning: no /dev/console\n"); 527 else 528 printf("warning: lookup /dev/console: error %d\n", error); 529 } 530 531 /* 532 * List of paths to try when searching for "init". 533 */ 534 static const char *initpaths[] = { 535 "/sbin/init", 536 "/sbin/oinit", 537 "/sbin/init.bak", 538 NULL, 539 }; 540 541 /* 542 * Start the initial user process; try exec'ing each pathname in "initpaths". 543 * The program is invoked with one argument containing the boot flags. 544 */ 545 static void 546 start_init(void *arg) 547 { 548 struct proc *p = arg; 549 vaddr_t addr; 550 struct sys_execve_args /* { 551 syscallarg(const char *) path; 552 syscallarg(char * const *) argp; 553 syscallarg(char * const *) envp; 554 } */ args; 555 int options, i, error; 556 register_t retval[2]; 557 char flags[4], *flagsp; 558 const char **pathp, *path, *slash; 559 char *ucp, **uap, *arg0, *arg1 = NULL; 560 561 /* 562 * Now in process 1. 563 */ 564 strncpy(p->p_comm, "init", MAXCOMLEN); 565 566 /* 567 * Wait for main() to tell us that it's safe to exec. 568 */ 569 while (start_init_exec == 0) 570 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0); 571 572 /* 573 * This is not the right way to do this. We really should 574 * hand-craft a descriptor onto /dev/console to hand to init, 575 * but that's a _lot_ more work, and the benefit from this easy 576 * hack makes up for the "good is the enemy of the best" effect. 577 */ 578 check_console(p); 579 580 /* 581 * Need just enough stack to hold the faked-up "execve()" arguments. 582 */ 583 addr = USRSTACK - PAGE_SIZE; 584 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE, 585 NULL, UVM_UNKNOWN_OFFSET, 0, 586 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY, 587 UVM_ADV_NORMAL, 588 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) != 0) 589 panic("init: couldn't allocate argument space"); 590 p->p_vmspace->vm_maxsaddr = (caddr_t)addr; 591 592 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) { 593 ucp = (char *)(addr + PAGE_SIZE); 594 595 /* 596 * Construct the boot flag argument. 597 */ 598 flagsp = flags; 599 *flagsp++ = '-'; 600 options = 0; 601 602 if (boothowto & RB_SINGLE) { 603 *flagsp++ = 's'; 604 options = 1; 605 } 606 #ifdef notyet 607 if (boothowto & RB_FASTBOOT) { 608 *flagsp++ = 'f'; 609 options = 1; 610 } 611 #endif 612 613 /* 614 * Move out the flags (arg 1), if necessary. 615 */ 616 if (options != 0) { 617 *flagsp++ = '\0'; 618 i = flagsp - flags; 619 #ifdef DEBUG 620 printf("init: copying out flags `%s' %d\n", flags, i); 621 #endif 622 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i); 623 arg1 = ucp; 624 } 625 626 /* 627 * Move out the file name (also arg 0). 628 */ 629 i = strlen(path) + 1; 630 #ifdef DEBUG 631 printf("init: copying out path `%s' %d\n", path, i); 632 #endif 633 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i); 634 arg0 = ucp; 635 636 /* 637 * Move out the arg pointers. 638 */ 639 uap = (char **)((long)ucp & ~ALIGNBYTES); 640 (void)suword((caddr_t)--uap, 0); /* terminator */ 641 if (options != 0) 642 (void)suword((caddr_t)--uap, (long)arg1); 643 slash = strrchr(path, '/'); 644 if (slash) 645 (void)suword((caddr_t)--uap, 646 (long)arg0 + (slash + 1 - path)); 647 else 648 (void)suword((caddr_t)--uap, (long)arg0); 649 650 /* 651 * Point at the arguments. 652 */ 653 SCARG(&args, path) = arg0; 654 SCARG(&args, argp) = uap; 655 SCARG(&args, envp) = NULL; 656 657 /* 658 * Now try to exec the program. If can't for any reason 659 * other than it doesn't exist, complain. 660 */ 661 error = sys_execve(p, &args, retval); 662 if (error == 0 || error == EJUSTRETURN) { 663 KERNEL_PROC_UNLOCK(p); 664 return; 665 } 666 if (error != ENOENT) 667 printf("exec %s: error %d\n", path, error); 668 } 669 printf("init: not found\n"); 670 panic("no init"); 671 } 672