1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * zoneadmd manages zones; one zoneadmd process is launched for each
28 * non-global zone on the system. This daemon juggles four jobs:
29 *
30 * - Implement setup and teardown of the zone "virtual platform": mount and
31 * unmount filesystems; create and destroy network interfaces; communicate
32 * with devfsadmd to lay out devices for the zone; instantiate the zone
33 * console device; configure process runtime attributes such as resource
34 * controls, pool bindings, fine-grained privileges.
35 *
36 * - Launch the zone's init(1M) process.
37 *
38 * - Implement a door server; clients (like zoneadm) connect to the door
39 * server and request zone state changes. The kernel is also a client of
40 * this door server. A request to halt or reboot the zone which originates
41 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
42 *
43 * One minor problem is that messages emitted by zoneadmd need to be passed
44 * back to the zoneadm process making the request. These messages need to
45 * be rendered in the client's locale; so, this is passed in as part of the
46 * request. The exception is the kernel upcall to zoneadmd, in which case
47 * messages are syslog'd.
48 *
49 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
50 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
51 * strings which do not need to be translated.
52 *
53 * - Act as a console server for zlogin -C processes; see comments in zcons.c
54 * for more information about the zone console architecture.
55 *
56 * DESIGN NOTES
57 *
58 * Restart:
59 * A chief design constraint of zoneadmd is that it should be restartable in
60 * the case that the administrator kills it off, or it suffers a fatal error,
61 * without the running zone being impacted; this is akin to being able to
62 * reboot the service processor of a server without affecting the OS instance.
63 */
64
65 #include <sys/param.h>
66 #include <sys/mman.h>
67 #include <sys/types.h>
68 #include <sys/stat.h>
69 #include <sys/sysmacros.h>
70
71 #include <bsm/adt.h>
72 #include <bsm/adt_event.h>
73
74 #include <alloca.h>
75 #include <assert.h>
76 #include <errno.h>
77 #include <door.h>
78 #include <fcntl.h>
79 #include <locale.h>
80 #include <signal.h>
81 #include <stdarg.h>
82 #include <stdio.h>
83 #include <stdlib.h>
84 #include <string.h>
85 #include <strings.h>
86 #include <synch.h>
87 #include <syslog.h>
88 #include <thread.h>
89 #include <unistd.h>
90 #include <wait.h>
91 #include <limits.h>
92 #include <zone.h>
93 #include <libbrand.h>
94 #include <sys/brand.h>
95 #include <libcontract.h>
96 #include <libcontract_priv.h>
97 #include <sys/brand.h>
98 #include <sys/contract/process.h>
99 #include <sys/ctfs.h>
100 #include <libdladm.h>
101 #include <sys/dls_mgmt.h>
102
103 #include <libzonecfg.h>
104 #include <zonestat_impl.h>
105 #include "zoneadmd.h"
106
107 static char *progname;
108 char *zone_name; /* zone which we are managing */
109 char pool_name[MAXNAMELEN];
110 char default_brand[MAXNAMELEN];
111 char brand_name[MAXNAMELEN];
112 boolean_t zone_isnative;
113 boolean_t zone_iscluster;
114 boolean_t zone_islabeled;
115 static zoneid_t zone_id;
116 dladm_handle_t dld_handle = NULL;
117
118 static char pre_statechg_hook[2 * MAXPATHLEN];
119 static char post_statechg_hook[2 * MAXPATHLEN];
120 char query_hook[2 * MAXPATHLEN];
121
122 zlog_t logsys;
123
124 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
125 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
126
127 static sema_t scratch_sem; /* for scratch zones */
128
129 static char zone_door_path[MAXPATHLEN];
130 static int zone_door = -1;
131
132 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
133 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
134
135 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
136 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
137 #endif
138
139 #define DEFAULT_LOCALE "C"
140
141 static const char *
z_cmd_name(zone_cmd_t zcmd)142 z_cmd_name(zone_cmd_t zcmd)
143 {
144 /* This list needs to match the enum in sys/zone.h */
145 static const char *zcmdstr[] = {
146 "ready", "boot", "forceboot", "reboot", "halt",
147 "note_uninstalling", "mount", "forcemount", "unmount"
148 };
149
150 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
151 return ("unknown");
152 else
153 return (zcmdstr[(int)zcmd]);
154 }
155
156 static char *
get_execbasename(char * execfullname)157 get_execbasename(char *execfullname)
158 {
159 char *last_slash, *execbasename;
160
161 /* guard against '/' at end of command invocation */
162 for (;;) {
163 last_slash = strrchr(execfullname, '/');
164 if (last_slash == NULL) {
165 execbasename = execfullname;
166 break;
167 } else {
168 execbasename = last_slash + 1;
169 if (*execbasename == '\0') {
170 *last_slash = '\0';
171 continue;
172 }
173 break;
174 }
175 }
176 return (execbasename);
177 }
178
179 static void
usage(void)180 usage(void)
181 {
182 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
183 (void) fprintf(stderr,
184 gettext("\tNote: %s should not be run directly.\n"), progname);
185 exit(2);
186 }
187
188 /* ARGSUSED */
189 static void
sigchld(int sig)190 sigchld(int sig)
191 {
192 }
193
194 char *
localize_msg(char * locale,const char * msg)195 localize_msg(char *locale, const char *msg)
196 {
197 char *out;
198
199 (void) mutex_lock(&msglock);
200 (void) setlocale(LC_MESSAGES, locale);
201 out = gettext(msg);
202 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
203 (void) mutex_unlock(&msglock);
204 return (out);
205 }
206
207 /* PRINTFLIKE3 */
208 void
zerror(zlog_t * zlogp,boolean_t use_strerror,const char * fmt,...)209 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
210 {
211 va_list alist;
212 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
213 char *bp;
214 int saved_errno = errno;
215
216 if (zlogp == NULL)
217 return;
218 if (zlogp == &logsys)
219 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
220 zone_name);
221 else
222 buf[0] = '\0';
223 bp = &(buf[strlen(buf)]);
224
225 /*
226 * In theory, the locale pointer should be set to either "C" or a
227 * char array, so it should never be NULL
228 */
229 assert(zlogp->locale != NULL);
230 /* Locale is per process, but we are multi-threaded... */
231 fmt = localize_msg(zlogp->locale, fmt);
232
233 va_start(alist, fmt);
234 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
235 va_end(alist);
236 bp = &(buf[strlen(buf)]);
237 if (use_strerror)
238 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
239 strerror(saved_errno));
240 if (zlogp == &logsys) {
241 (void) syslog(LOG_ERR, "%s", buf);
242 } else if (zlogp->logfile != NULL) {
243 (void) fprintf(zlogp->logfile, "%s\n", buf);
244 } else {
245 size_t buflen;
246 size_t copylen;
247
248 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
249 copylen = MIN(buflen, zlogp->loglen);
250 zlogp->log += copylen;
251 zlogp->loglen -= copylen;
252 }
253 }
254
255 /*
256 * Emit a warning for any boot arguments which are unrecognized. Since
257 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
258 * put the arguments into an argv style array, use getopt to process them,
259 * and put the resultant argument string back into outargs.
260 *
261 * During the filtering, we pull out any arguments which are truly "boot"
262 * arguments, leaving only those which are to be passed intact to the
263 * progenitor process. The one we support at the moment is -i, which
264 * indicates to the kernel which program should be launched as 'init'.
265 *
266 * A return of Z_INVAL indicates specifically that the arguments are
267 * not valid; this is a non-fatal error. Except for Z_OK, all other return
268 * values are treated as fatal.
269 */
270 static int
filter_bootargs(zlog_t * zlogp,const char * inargs,char * outargs,char * init_file,char * badarg)271 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
272 char *init_file, char *badarg)
273 {
274 int argc = 0, argc_save;
275 int i;
276 int err;
277 char *arg, *lasts, **argv = NULL, **argv_save;
278 char zonecfg_args[BOOTARGS_MAX];
279 char scratchargs[BOOTARGS_MAX], *sargs;
280 char c;
281
282 bzero(outargs, BOOTARGS_MAX);
283 bzero(badarg, BOOTARGS_MAX);
284
285 /*
286 * If the user didn't specify transient boot arguments, check
287 * to see if there were any specified in the zone configuration,
288 * and use them if applicable.
289 */
290 if (inargs == NULL || inargs[0] == '\0') {
291 zone_dochandle_t handle;
292 if ((handle = zonecfg_init_handle()) == NULL) {
293 zerror(zlogp, B_TRUE,
294 "getting zone configuration handle");
295 return (Z_BAD_HANDLE);
296 }
297 err = zonecfg_get_snapshot_handle(zone_name, handle);
298 if (err != Z_OK) {
299 zerror(zlogp, B_FALSE,
300 "invalid configuration snapshot");
301 zonecfg_fini_handle(handle);
302 return (Z_BAD_HANDLE);
303 }
304
305 bzero(zonecfg_args, sizeof (zonecfg_args));
306 (void) zonecfg_get_bootargs(handle, zonecfg_args,
307 sizeof (zonecfg_args));
308 inargs = zonecfg_args;
309 zonecfg_fini_handle(handle);
310 }
311
312 if (strlen(inargs) >= BOOTARGS_MAX) {
313 zerror(zlogp, B_FALSE, "boot argument string too long");
314 return (Z_INVAL);
315 }
316
317 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
318 sargs = scratchargs;
319 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
320 sargs = NULL;
321 argc++;
322 }
323
324 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
325 zerror(zlogp, B_FALSE, "memory allocation failed");
326 return (Z_NOMEM);
327 }
328
329 argv_save = argv;
330 argc_save = argc;
331
332 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
333 sargs = scratchargs;
334 i = 0;
335 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
336 sargs = NULL;
337 if ((argv[i] = strdup(arg)) == NULL) {
338 err = Z_NOMEM;
339 zerror(zlogp, B_FALSE, "memory allocation failed");
340 goto done;
341 }
342 i++;
343 }
344
345 /*
346 * We preserve compatibility with the Solaris system boot behavior,
347 * which allows:
348 *
349 * # reboot kernel/unix -s -m verbose
350 *
351 * In this example, kernel/unix tells the booter what file to
352 * boot. We don't want reboot in a zone to be gratuitously different,
353 * so we silently ignore the boot file, if necessary.
354 */
355 if (argv[0] == NULL)
356 goto done;
357
358 assert(argv[0][0] != ' ');
359 assert(argv[0][0] != '\t');
360
361 if (argv[0][0] != '-' && argv[0][0] != '\0') {
362 argv = &argv[1];
363 argc--;
364 }
365
366 optind = 0;
367 opterr = 0;
368 err = Z_OK;
369 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
370 switch (c) {
371 case 'i':
372 /*
373 * -i is handled by the runtime and is not passed
374 * along to userland
375 */
376 (void) strlcpy(init_file, optarg, MAXPATHLEN);
377 break;
378 case 'f':
379 /* This has already been processed by zoneadm */
380 break;
381 case 'm':
382 case 's':
383 /* These pass through unmolested */
384 (void) snprintf(outargs, BOOTARGS_MAX,
385 "%s -%c %s ", outargs, c, optarg ? optarg : "");
386 break;
387 case '?':
388 /*
389 * We warn about unknown arguments but pass them
390 * along anyway-- if someone wants to develop their
391 * own init replacement, they can pass it whatever
392 * args they want.
393 */
394 err = Z_INVAL;
395 (void) snprintf(outargs, BOOTARGS_MAX,
396 "%s -%c", outargs, optopt);
397 (void) snprintf(badarg, BOOTARGS_MAX,
398 "%s -%c", badarg, optopt);
399 break;
400 }
401 }
402
403 /*
404 * For Solaris Zones we warn about and discard non-option arguments.
405 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
406 * to the kernel, we concat up all the other remaining boot args.
407 * and warn on them as a group.
408 */
409 if (optind < argc) {
410 err = Z_INVAL;
411 while (optind < argc) {
412 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
413 badarg, strlen(badarg) > 0 ? " " : "",
414 argv[optind]);
415 optind++;
416 }
417 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
418 "arguments `%s'.", badarg);
419 }
420
421 done:
422 for (i = 0; i < argc_save; i++) {
423 if (argv_save[i] != NULL)
424 free(argv_save[i]);
425 }
426 free(argv_save);
427 return (err);
428 }
429
430
431 static int
mkzonedir(zlog_t * zlogp)432 mkzonedir(zlog_t *zlogp)
433 {
434 struct stat st;
435 /*
436 * We must create and lock everyone but root out of ZONES_TMPDIR
437 * since anyone can open any UNIX domain socket, regardless of
438 * its file system permissions. Sigh...
439 */
440 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
441 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
442 return (-1);
443 }
444 /* paranoia */
445 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
446 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
447 return (-1);
448 }
449 (void) chmod(ZONES_TMPDIR, S_IRWXU);
450 return (0);
451 }
452
453 /*
454 * Run the brand's pre-state change callback, if it exists.
455 */
456 static int
brand_prestatechg(zlog_t * zlogp,int state,int cmd)457 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
458 {
459 char cmdbuf[2 * MAXPATHLEN];
460 const char *altroot;
461
462 if (pre_statechg_hook[0] == '\0')
463 return (0);
464
465 altroot = zonecfg_get_root();
466 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
467 state, cmd, altroot) > sizeof (cmdbuf))
468 return (-1);
469
470 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
471 return (-1);
472
473 return (0);
474 }
475
476 /*
477 * Run the brand's post-state change callback, if it exists.
478 */
479 static int
brand_poststatechg(zlog_t * zlogp,int state,int cmd)480 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
481 {
482 char cmdbuf[2 * MAXPATHLEN];
483 const char *altroot;
484
485 if (post_statechg_hook[0] == '\0')
486 return (0);
487
488 altroot = zonecfg_get_root();
489 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
490 state, cmd, altroot) > sizeof (cmdbuf))
491 return (-1);
492
493 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
494 return (-1);
495
496 return (0);
497 }
498
499 /*
500 * Notify zonestatd of the new zone. If zonestatd is not running, this
501 * will do nothing.
502 */
503 static void
notify_zonestatd(zoneid_t zoneid)504 notify_zonestatd(zoneid_t zoneid)
505 {
506 int cmd[2];
507 int fd;
508 door_arg_t params;
509
510 fd = open(ZS_DOOR_PATH, O_RDONLY);
511 if (fd < 0)
512 return;
513
514 cmd[0] = ZSD_CMD_NEW_ZONE;
515 cmd[1] = zoneid;
516 params.data_ptr = (char *)&cmd;
517 params.data_size = sizeof (cmd);
518 params.desc_ptr = NULL;
519 params.desc_num = 0;
520 params.rbuf = NULL;
521 params.rsize = NULL;
522 (void) door_call(fd, ¶ms);
523 (void) close(fd);
524 }
525
526 /*
527 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
528 * 'true' if this is being invoked as part of the processing for the "mount"
529 * subcommand.
530 */
531 static int
zone_ready(zlog_t * zlogp,zone_mnt_t mount_cmd,int zstate)532 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
533 {
534 int err;
535
536 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
537 return (-1);
538
539 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
540 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
541 zonecfg_strerror(err));
542 goto bad;
543 }
544
545 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
546 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
547 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
548 zonecfg_strerror(err));
549 goto bad;
550 }
551 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
552 bringup_failure_recovery = B_TRUE;
553 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
554 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
555 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
556 zonecfg_strerror(err));
557 goto bad;
558 }
559
560 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
561 goto bad;
562
563 return (0);
564
565 bad:
566 /*
567 * If something goes wrong, we up the zones's state to the target
568 * state, READY, and then invoke the hook as if we're halting.
569 */
570 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
571 return (-1);
572 }
573
574 int
init_template(void)575 init_template(void)
576 {
577 int fd;
578 int err = 0;
579
580 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
581 if (fd == -1)
582 return (-1);
583
584 /*
585 * For now, zoneadmd doesn't do anything with the contract.
586 * Deliver no events, don't inherit, and allow it to be orphaned.
587 */
588 err |= ct_tmpl_set_critical(fd, 0);
589 err |= ct_tmpl_set_informative(fd, 0);
590 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
591 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
592 if (err || ct_tmpl_activate(fd)) {
593 (void) close(fd);
594 return (-1);
595 }
596
597 return (fd);
598 }
599
600 typedef struct fs_callback {
601 zlog_t *zlogp;
602 zoneid_t zoneid;
603 boolean_t mount_cmd;
604 } fs_callback_t;
605
606 static int
mount_early_fs(void * data,const char * spec,const char * dir,const char * fstype,const char * opt)607 mount_early_fs(void *data, const char *spec, const char *dir,
608 const char *fstype, const char *opt)
609 {
610 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
611 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
612 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
613 char rootpath[MAXPATHLEN];
614 pid_t child;
615 int child_status;
616 int tmpl_fd;
617 int rv;
618 ctid_t ct;
619
620 /* determine the zone rootpath */
621 if (mount_cmd) {
622 char zonepath[MAXPATHLEN];
623 char luroot[MAXPATHLEN];
624
625 if (zone_get_zonepath(zone_name,
626 zonepath, sizeof (zonepath)) != Z_OK) {
627 zerror(zlogp, B_FALSE, "unable to determine zone path");
628 return (-1);
629 }
630
631 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
632 resolve_lofs(zlogp, luroot, sizeof (luroot));
633 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
634 } else {
635 if (zone_get_rootpath(zone_name,
636 rootpath, sizeof (rootpath)) != Z_OK) {
637 zerror(zlogp, B_FALSE, "unable to determine zone root");
638 return (-1);
639 }
640 }
641
642 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
643 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
644 rootpath, dir);
645 return (-1);
646 } else if (rv > 0) {
647 /* The mount point path doesn't exist, create it now. */
648 if (make_one_dir(zlogp, rootpath, dir,
649 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
650 DEFAULT_DIR_GROUP) != 0) {
651 zerror(zlogp, B_FALSE, "failed to create mount point");
652 return (-1);
653 }
654
655 /*
656 * Now this might seem weird, but we need to invoke
657 * valid_mount_path() again. Why? Because it checks
658 * to make sure that the mount point path is canonical,
659 * which it can only do if the path exists, so now that
660 * we've created the path we have to verify it again.
661 */
662 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
663 fstype)) < 0) {
664 zerror(zlogp, B_FALSE,
665 "%s%s is not a valid mount point", rootpath, dir);
666 return (-1);
667 }
668 }
669
670 if ((tmpl_fd = init_template()) == -1) {
671 zerror(zlogp, B_TRUE, "failed to create contract");
672 return (-1);
673 }
674
675 if ((child = fork()) == -1) {
676 (void) ct_tmpl_clear(tmpl_fd);
677 (void) close(tmpl_fd);
678 zerror(zlogp, B_TRUE, "failed to fork");
679 return (-1);
680
681 } else if (child == 0) { /* child */
682 char opt_buf[MAX_MNTOPT_STR];
683 int optlen = 0;
684 int mflag = MS_DATA;
685
686 (void) ct_tmpl_clear(tmpl_fd);
687 /*
688 * Even though there are no procs running in the zone, we
689 * do this for paranoia's sake.
690 */
691 (void) closefrom(0);
692
693 if (zone_enter(zoneid) == -1) {
694 _exit(errno);
695 }
696 if (opt != NULL) {
697 /*
698 * The mount() system call is incredibly annoying.
699 * If options are specified, we need to copy them
700 * into a temporary buffer since the mount() system
701 * call will overwrite the options string. It will
702 * also fail if the new option string it wants to
703 * write is bigger than the one we passed in, so
704 * you must pass in a buffer of the maximum possible
705 * option string length. sigh.
706 */
707 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
708 opt = opt_buf;
709 optlen = MAX_MNTOPT_STR;
710 mflag = MS_OPTIONSTR;
711 }
712 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
713 _exit(errno);
714 _exit(0);
715 }
716
717 /* parent */
718 if (contract_latest(&ct) == -1)
719 ct = -1;
720 (void) ct_tmpl_clear(tmpl_fd);
721 (void) close(tmpl_fd);
722 if (waitpid(child, &child_status, 0) != child) {
723 /* unexpected: we must have been signalled */
724 (void) contract_abandon_id(ct);
725 return (-1);
726 }
727 (void) contract_abandon_id(ct);
728 if (WEXITSTATUS(child_status) != 0) {
729 errno = WEXITSTATUS(child_status);
730 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
731 return (-1);
732 }
733
734 return (0);
735 }
736
737 /*
738 * If retstr is not NULL, the output of the subproc is returned in the str,
739 * otherwise it is output using zerror(). Any memory allocated for retstr
740 * should be freed by the caller.
741 */
742 int
do_subproc(zlog_t * zlogp,char * cmdbuf,char ** retstr)743 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
744 {
745 char buf[1024]; /* arbitrary large amount */
746 char *inbuf;
747 FILE *file;
748 int status;
749 int rd_cnt;
750
751 if (retstr != NULL) {
752 if ((*retstr = malloc(1024)) == NULL) {
753 zerror(zlogp, B_FALSE, "out of memory");
754 return (-1);
755 }
756 inbuf = *retstr;
757 rd_cnt = 0;
758 } else {
759 inbuf = buf;
760 }
761
762 file = popen(cmdbuf, "r");
763 if (file == NULL) {
764 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
765 return (-1);
766 }
767
768 while (fgets(inbuf, 1024, file) != NULL) {
769 if (retstr == NULL) {
770 if (zlogp != &logsys)
771 zerror(zlogp, B_FALSE, "%s", inbuf);
772 } else {
773 char *p;
774
775 rd_cnt += 1024 - 1;
776 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
777 zerror(zlogp, B_FALSE, "out of memory");
778 (void) pclose(file);
779 return (-1);
780 }
781
782 *retstr = p;
783 inbuf = *retstr + rd_cnt;
784 }
785 }
786 status = pclose(file);
787
788 if (WIFSIGNALED(status)) {
789 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
790 "signal %d", cmdbuf, WTERMSIG(status));
791 return (-1);
792 }
793 assert(WIFEXITED(status));
794 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
795 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
796 return (-1);
797 }
798 return (WEXITSTATUS(status));
799 }
800
801 static int
zone_bootup(zlog_t * zlogp,const char * bootargs,int zstate)802 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
803 {
804 zoneid_t zoneid;
805 struct stat st;
806 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
807 char nbootargs[BOOTARGS_MAX];
808 char cmdbuf[MAXPATHLEN];
809 fs_callback_t cb;
810 brand_handle_t bh;
811 zone_iptype_t iptype;
812 boolean_t links_loaded = B_FALSE;
813 dladm_status_t status;
814 char errmsg[DLADM_STRSIZE];
815 int err;
816
817 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
818 return (-1);
819
820 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
821 zerror(zlogp, B_TRUE, "unable to get zoneid");
822 goto bad;
823 }
824
825 cb.zlogp = zlogp;
826 cb.zoneid = zoneid;
827 cb.mount_cmd = B_FALSE;
828
829 /* Get a handle to the brand info for this zone */
830 if ((bh = brand_open(brand_name)) == NULL) {
831 zerror(zlogp, B_FALSE, "unable to determine zone brand");
832 goto bad;
833 }
834
835 /*
836 * Get the list of filesystems to mount from the brand
837 * configuration. These mounts are done via a thread that will
838 * enter the zone, so they are done from within the context of the
839 * zone.
840 */
841 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
842 zerror(zlogp, B_FALSE, "unable to mount filesystems");
843 brand_close(bh);
844 goto bad;
845 }
846
847 /*
848 * Get the brand's boot callback if it exists.
849 */
850 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
851 zerror(zlogp, B_FALSE, "unable to determine zone path");
852 brand_close(bh);
853 goto bad;
854 }
855 (void) strcpy(cmdbuf, EXEC_PREFIX);
856 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
857 sizeof (cmdbuf) - EXEC_LEN) != 0) {
858 zerror(zlogp, B_FALSE,
859 "unable to determine branded zone's boot callback");
860 brand_close(bh);
861 goto bad;
862 }
863
864 /* Get the path for this zone's init(1M) (or equivalent) process. */
865 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
866 zerror(zlogp, B_FALSE,
867 "unable to determine zone's init(1M) location");
868 brand_close(bh);
869 goto bad;
870 }
871
872 brand_close(bh);
873
874 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
875 bad_boot_arg);
876 if (err == Z_INVAL)
877 eventstream_write(Z_EVT_ZONE_BADARGS);
878 else if (err != Z_OK)
879 goto bad;
880
881 assert(init_file[0] != '\0');
882
883 /* Try to anticipate possible problems: Make sure init is executable. */
884 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
885 zerror(zlogp, B_FALSE, "unable to determine zone root");
886 goto bad;
887 }
888
889 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
890
891 if (stat(initpath, &st) == -1) {
892 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
893 goto bad;
894 }
895
896 if ((st.st_mode & S_IXUSR) == 0) {
897 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
898 goto bad;
899 }
900
901 /*
902 * Exclusive stack zones interact with the dlmgmtd running in the
903 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
904 * booting, and loads its datalinks from the zone's datalink
905 * configuration file.
906 */
907 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
908 status = dladm_zone_boot(dld_handle, zoneid);
909 if (status != DLADM_STATUS_OK) {
910 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
911 " %s", dladm_status2str(status, errmsg));
912 goto bad;
913 }
914 links_loaded = B_TRUE;
915 }
916
917 /*
918 * If there is a brand 'boot' callback, execute it now to give the
919 * brand one last chance to do any additional setup before the zone
920 * is booted.
921 */
922 if ((strlen(cmdbuf) > EXEC_LEN) &&
923 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
924 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
925 goto bad;
926 }
927
928 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
929 zerror(zlogp, B_TRUE, "could not set zone boot file");
930 goto bad;
931 }
932
933 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
934 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
935 goto bad;
936 }
937
938 /*
939 * Inform zonestatd of a new zone so that it can install a door for
940 * the zone to contact it.
941 */
942 notify_zonestatd(zone_id);
943
944 if (zone_boot(zoneid) == -1) {
945 zerror(zlogp, B_TRUE, "unable to boot zone");
946 goto bad;
947 }
948
949 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
950 goto bad;
951
952 return (0);
953
954 bad:
955 /*
956 * If something goes wrong, we up the zones's state to the target
957 * state, RUNNING, and then invoke the hook as if we're halting.
958 */
959 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
960 if (links_loaded)
961 (void) dladm_zone_halt(dld_handle, zoneid);
962 return (-1);
963 }
964
965 static int
zone_halt(zlog_t * zlogp,boolean_t unmount_cmd,boolean_t rebooting,int zstate)966 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
967 {
968 int err;
969
970 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
971 return (-1);
972
973 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
974 if (!bringup_failure_recovery)
975 zerror(zlogp, B_FALSE, "unable to destroy zone");
976 return (-1);
977 }
978
979 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
980 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
981 zonecfg_strerror(err));
982
983 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
984 return (-1);
985
986 return (0);
987 }
988
989 /*
990 * Generate AUE_zone_state for a command that boots a zone.
991 */
992 static void
audit_put_record(zlog_t * zlogp,ucred_t * uc,int return_val,char * new_state)993 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
994 char *new_state)
995 {
996 adt_session_data_t *ah;
997 adt_event_data_t *event;
998 int pass_fail, fail_reason;
999
1000 if (!adt_audit_enabled())
1001 return;
1002
1003 if (return_val == 0) {
1004 pass_fail = ADT_SUCCESS;
1005 fail_reason = ADT_SUCCESS;
1006 } else {
1007 pass_fail = ADT_FAILURE;
1008 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1009 }
1010
1011 if (adt_start_session(&ah, NULL, 0)) {
1012 zerror(zlogp, B_TRUE, gettext("audit failure."));
1013 return;
1014 }
1015 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1016 zerror(zlogp, B_TRUE, gettext("audit failure."));
1017 (void) adt_end_session(ah);
1018 return;
1019 }
1020
1021 event = adt_alloc_event(ah, ADT_zone_state);
1022 if (event == NULL) {
1023 zerror(zlogp, B_TRUE, gettext("audit failure."));
1024 (void) adt_end_session(ah);
1025 return;
1026 }
1027 event->adt_zone_state.zonename = zone_name;
1028 event->adt_zone_state.new_state = new_state;
1029
1030 if (adt_put_event(event, pass_fail, fail_reason))
1031 zerror(zlogp, B_TRUE, gettext("audit failure."));
1032
1033 adt_free_event(event);
1034
1035 (void) adt_end_session(ah);
1036 }
1037
1038 /*
1039 * The main routine for the door server that deals with zone state transitions.
1040 */
1041 /* ARGSUSED */
1042 static void
server(void * cookie,char * args,size_t alen,door_desc_t * dp,uint_t n_desc)1043 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1044 uint_t n_desc)
1045 {
1046 ucred_t *uc = NULL;
1047 const priv_set_t *eset;
1048
1049 zone_state_t zstate;
1050 zone_cmd_t cmd;
1051 zone_cmd_arg_t *zargp;
1052
1053 boolean_t kernelcall;
1054
1055 int rval = -1;
1056 uint64_t uniqid;
1057 zoneid_t zoneid = -1;
1058 zlog_t zlog;
1059 zlog_t *zlogp;
1060 zone_cmd_rval_t *rvalp;
1061 size_t rlen = getpagesize(); /* conservative */
1062 fs_callback_t cb;
1063 brand_handle_t bh;
1064
1065 /* LINTED E_BAD_PTR_CAST_ALIGN */
1066 zargp = (zone_cmd_arg_t *)args;
1067
1068 /*
1069 * When we get the door unref message, we've fdetach'd the door, and
1070 * it is time for us to shut down zoneadmd.
1071 */
1072 if (zargp == DOOR_UNREF_DATA) {
1073 /*
1074 * See comment at end of main() for info on the last rites.
1075 */
1076 exit(0);
1077 }
1078
1079 if (zargp == NULL) {
1080 (void) door_return(NULL, 0, 0, 0);
1081 }
1082
1083 rvalp = alloca(rlen);
1084 bzero(rvalp, rlen);
1085 zlog.logfile = NULL;
1086 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1087 zlog.buf = rvalp->errbuf;
1088 zlog.log = zlog.buf;
1089 /* defer initialization of zlog.locale until after credential check */
1090 zlogp = &zlog;
1091
1092 if (alen != sizeof (zone_cmd_arg_t)) {
1093 /*
1094 * This really shouldn't be happening.
1095 */
1096 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1097 "unexpected (expected %d bytes)", alen,
1098 sizeof (zone_cmd_arg_t));
1099 goto out;
1100 }
1101 cmd = zargp->cmd;
1102
1103 if (door_ucred(&uc) != 0) {
1104 zerror(&logsys, B_TRUE, "door_ucred");
1105 goto out;
1106 }
1107 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1108 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1109 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1110 ucred_geteuid(uc) != 0)) {
1111 zerror(&logsys, B_FALSE, "insufficient privileges");
1112 goto out;
1113 }
1114
1115 kernelcall = ucred_getpid(uc) == 0;
1116
1117 /*
1118 * This is safe because we only use a zlog_t throughout the
1119 * duration of a door call; i.e., by the time the pointer
1120 * might become invalid, the door call would be over.
1121 */
1122 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1123
1124 (void) mutex_lock(&lock);
1125
1126 /*
1127 * Once we start to really die off, we don't want more connections.
1128 */
1129 if (in_death_throes) {
1130 (void) mutex_unlock(&lock);
1131 ucred_free(uc);
1132 (void) door_return(NULL, 0, 0, 0);
1133 thr_exit(NULL);
1134 }
1135
1136 /*
1137 * Check for validity of command.
1138 */
1139 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1140 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING &&
1141 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1142 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1143 goto out;
1144 }
1145
1146 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1147 /*
1148 * Can't happen
1149 */
1150 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1151 cmd);
1152 goto out;
1153 }
1154 /*
1155 * We ignore the possibility of someone calling zone_create(2)
1156 * explicitly; all requests must come through zoneadmd.
1157 */
1158 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1159 /*
1160 * Something terribly wrong happened
1161 */
1162 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1163 goto out;
1164 }
1165
1166 if (kernelcall) {
1167 /*
1168 * Kernel-initiated requests may lose their validity if the
1169 * zone_t the kernel was referring to has gone away.
1170 */
1171 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1172 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1173 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1174 /*
1175 * We're not talking about the same zone. The request
1176 * must have arrived too late. Return error.
1177 */
1178 rval = -1;
1179 goto out;
1180 }
1181 zlogp = &logsys; /* Log errors to syslog */
1182 }
1183
1184 /*
1185 * If we are being asked to forcibly mount or boot a zone, we
1186 * pretend that an INCOMPLETE zone is actually INSTALLED.
1187 */
1188 if (zstate == ZONE_STATE_INCOMPLETE &&
1189 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1190 zstate = ZONE_STATE_INSTALLED;
1191
1192 switch (zstate) {
1193 case ZONE_STATE_CONFIGURED:
1194 case ZONE_STATE_INCOMPLETE:
1195 /*
1196 * Not our area of expertise; we just print a nice message
1197 * and die off.
1198 */
1199 zerror(zlogp, B_FALSE,
1200 "%s operation is invalid for zones in state '%s'",
1201 z_cmd_name(cmd), zone_state_str(zstate));
1202 break;
1203
1204 case ZONE_STATE_INSTALLED:
1205 switch (cmd) {
1206 case Z_READY:
1207 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1208 if (rval == 0)
1209 eventstream_write(Z_EVT_ZONE_READIED);
1210 break;
1211 case Z_BOOT:
1212 case Z_FORCEBOOT:
1213 eventstream_write(Z_EVT_ZONE_BOOTING);
1214 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1215 == 0) {
1216 rval = zone_bootup(zlogp, zargp->bootbuf,
1217 zstate);
1218 }
1219 audit_put_record(zlogp, uc, rval, "boot");
1220 if (rval != 0) {
1221 bringup_failure_recovery = B_TRUE;
1222 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1223 zstate);
1224 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1225 }
1226 break;
1227 case Z_HALT:
1228 if (kernelcall) /* Invalid; can't happen */
1229 abort();
1230 /*
1231 * We could have two clients racing to halt this
1232 * zone; the second client loses, but his request
1233 * doesn't fail, since the zone is now in the desired
1234 * state.
1235 */
1236 zerror(zlogp, B_FALSE, "zone is already halted");
1237 rval = 0;
1238 break;
1239 case Z_REBOOT:
1240 if (kernelcall) /* Invalid; can't happen */
1241 abort();
1242 zerror(zlogp, B_FALSE, "%s operation is invalid "
1243 "for zones in state '%s'", z_cmd_name(cmd),
1244 zone_state_str(zstate));
1245 rval = -1;
1246 break;
1247 case Z_NOTE_UNINSTALLING:
1248 if (kernelcall) /* Invalid; can't happen */
1249 abort();
1250 /*
1251 * Tell the console to print out a message about this.
1252 * Once it does, we will be in_death_throes.
1253 */
1254 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1255 break;
1256 case Z_MOUNT:
1257 case Z_FORCEMOUNT:
1258 if (kernelcall) /* Invalid; can't happen */
1259 abort();
1260 if (!zone_isnative && !zone_iscluster &&
1261 !zone_islabeled) {
1262 /*
1263 * -U mounts the zone without lofs mounting
1264 * zone file systems back into the scratch
1265 * zone. This is required when mounting
1266 * non-native branded zones.
1267 */
1268 (void) strlcpy(zargp->bootbuf, "-U",
1269 BOOTARGS_MAX);
1270 }
1271
1272 rval = zone_ready(zlogp,
1273 strcmp(zargp->bootbuf, "-U") == 0 ?
1274 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1275 if (rval != 0)
1276 break;
1277
1278 eventstream_write(Z_EVT_ZONE_READIED);
1279
1280 /*
1281 * Get a handle to the default brand info.
1282 * We must always use the default brand file system
1283 * list when mounting the zone.
1284 */
1285 if ((bh = brand_open(default_brand)) == NULL) {
1286 rval = -1;
1287 break;
1288 }
1289
1290 /*
1291 * Get the list of filesystems to mount from
1292 * the brand configuration. These mounts are done
1293 * via a thread that will enter the zone, so they
1294 * are done from within the context of the zone.
1295 */
1296 cb.zlogp = zlogp;
1297 cb.zoneid = zone_id;
1298 cb.mount_cmd = B_TRUE;
1299 rval = brand_platform_iter_mounts(bh,
1300 mount_early_fs, &cb);
1301
1302 brand_close(bh);
1303
1304 /*
1305 * Ordinarily, /dev/fd would be mounted inside the zone
1306 * by svc:/system/filesystem/usr:default, but since
1307 * we're not booting the zone, we need to do this
1308 * manually.
1309 */
1310 if (rval == 0)
1311 rval = mount_early_fs(&cb,
1312 "fd", "/dev/fd", "fd", NULL);
1313 break;
1314 case Z_UNMOUNT:
1315 if (kernelcall) /* Invalid; can't happen */
1316 abort();
1317 zerror(zlogp, B_FALSE, "zone is already unmounted");
1318 rval = 0;
1319 break;
1320 }
1321 break;
1322
1323 case ZONE_STATE_READY:
1324 switch (cmd) {
1325 case Z_READY:
1326 /*
1327 * We could have two clients racing to ready this
1328 * zone; the second client loses, but his request
1329 * doesn't fail, since the zone is now in the desired
1330 * state.
1331 */
1332 zerror(zlogp, B_FALSE, "zone is already ready");
1333 rval = 0;
1334 break;
1335 case Z_BOOT:
1336 (void) strlcpy(boot_args, zargp->bootbuf,
1337 sizeof (boot_args));
1338 eventstream_write(Z_EVT_ZONE_BOOTING);
1339 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1340 audit_put_record(zlogp, uc, rval, "boot");
1341 if (rval != 0) {
1342 bringup_failure_recovery = B_TRUE;
1343 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1344 zstate);
1345 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1346 }
1347 boot_args[0] = '\0';
1348 break;
1349 case Z_HALT:
1350 if (kernelcall) /* Invalid; can't happen */
1351 abort();
1352 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1353 != 0)
1354 break;
1355 eventstream_write(Z_EVT_ZONE_HALTED);
1356 break;
1357 case Z_REBOOT:
1358 case Z_NOTE_UNINSTALLING:
1359 case Z_MOUNT:
1360 case Z_UNMOUNT:
1361 if (kernelcall) /* Invalid; can't happen */
1362 abort();
1363 zerror(zlogp, B_FALSE, "%s operation is invalid "
1364 "for zones in state '%s'", z_cmd_name(cmd),
1365 zone_state_str(zstate));
1366 rval = -1;
1367 break;
1368 }
1369 break;
1370
1371 case ZONE_STATE_MOUNTED:
1372 switch (cmd) {
1373 case Z_UNMOUNT:
1374 if (kernelcall) /* Invalid; can't happen */
1375 abort();
1376 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1377 if (rval == 0) {
1378 eventstream_write(Z_EVT_ZONE_HALTED);
1379 (void) sema_post(&scratch_sem);
1380 }
1381 break;
1382 default:
1383 if (kernelcall) /* Invalid; can't happen */
1384 abort();
1385 zerror(zlogp, B_FALSE, "%s operation is invalid "
1386 "for zones in state '%s'", z_cmd_name(cmd),
1387 zone_state_str(zstate));
1388 rval = -1;
1389 break;
1390 }
1391 break;
1392
1393 case ZONE_STATE_RUNNING:
1394 case ZONE_STATE_SHUTTING_DOWN:
1395 case ZONE_STATE_DOWN:
1396 switch (cmd) {
1397 case Z_READY:
1398 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1399 != 0)
1400 break;
1401 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1402 eventstream_write(Z_EVT_ZONE_READIED);
1403 else
1404 eventstream_write(Z_EVT_ZONE_HALTED);
1405 break;
1406 case Z_BOOT:
1407 /*
1408 * We could have two clients racing to boot this
1409 * zone; the second client loses, but his request
1410 * doesn't fail, since the zone is now in the desired
1411 * state.
1412 */
1413 zerror(zlogp, B_FALSE, "zone is already booted");
1414 rval = 0;
1415 break;
1416 case Z_HALT:
1417 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1418 != 0)
1419 break;
1420 eventstream_write(Z_EVT_ZONE_HALTED);
1421 break;
1422 case Z_REBOOT:
1423 (void) strlcpy(boot_args, zargp->bootbuf,
1424 sizeof (boot_args));
1425 eventstream_write(Z_EVT_ZONE_REBOOTING);
1426 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1427 != 0) {
1428 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1429 boot_args[0] = '\0';
1430 break;
1431 }
1432 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1433 != 0) {
1434 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1435 boot_args[0] = '\0';
1436 break;
1437 }
1438 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1439 audit_put_record(zlogp, uc, rval, "reboot");
1440 if (rval != 0) {
1441 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1442 zstate);
1443 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1444 }
1445 boot_args[0] = '\0';
1446 break;
1447 case Z_NOTE_UNINSTALLING:
1448 case Z_MOUNT:
1449 case Z_UNMOUNT:
1450 zerror(zlogp, B_FALSE, "%s operation is invalid "
1451 "for zones in state '%s'", z_cmd_name(cmd),
1452 zone_state_str(zstate));
1453 rval = -1;
1454 break;
1455 }
1456 break;
1457 default:
1458 abort();
1459 }
1460
1461 /*
1462 * Because the state of the zone may have changed, we make sure
1463 * to wake the console poller, which is in charge of initiating
1464 * the shutdown procedure as necessary.
1465 */
1466 eventstream_write(Z_EVT_NULL);
1467
1468 out:
1469 (void) mutex_unlock(&lock);
1470 if (kernelcall) {
1471 rvalp = NULL;
1472 rlen = 0;
1473 } else {
1474 rvalp->rval = rval;
1475 }
1476 if (uc != NULL)
1477 ucred_free(uc);
1478 (void) door_return((char *)rvalp, rlen, NULL, 0);
1479 thr_exit(NULL);
1480 }
1481
1482 static int
setup_door(zlog_t * zlogp)1483 setup_door(zlog_t *zlogp)
1484 {
1485 if ((zone_door = door_create(server, NULL,
1486 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1487 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1488 return (-1);
1489 }
1490 (void) fdetach(zone_door_path);
1491
1492 if (fattach(zone_door, zone_door_path) != 0) {
1493 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1494 (void) door_revoke(zone_door);
1495 (void) fdetach(zone_door_path);
1496 zone_door = -1;
1497 return (-1);
1498 }
1499 return (0);
1500 }
1501
1502 /*
1503 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1504 * is where zoneadmd itself will check to see that another instance of
1505 * zoneadmd isn't already controlling this zone.
1506 *
1507 * The idea here is that we want to open the path to which we will
1508 * attach our door, lock it, and then make sure that no-one has beat us
1509 * to fattach(3c)ing onto it.
1510 *
1511 * fattach(3c) is really a mount, so there are actually two possible
1512 * vnodes we could be dealing with. Our strategy is as follows:
1513 *
1514 * - If the file we opened is a regular file (common case):
1515 * There is no fattach(3c)ed door, so we have a chance of becoming
1516 * the managing zoneadmd. We attempt to lock the file: if it is
1517 * already locked, that means someone else raced us here, so we
1518 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1519 * that beat us to it.
1520 *
1521 * - If the file we opened is a namefs file:
1522 * This means there is already an established door fattach(3c)'ed
1523 * to the rendezvous path. We've lost the race, so we give up.
1524 * Note that in this case we also try to grab the file lock, and
1525 * will succeed in acquiring it since the vnode locked by the
1526 * "winning" zoneadmd was a regular one, and the one we locked was
1527 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1528 * we just return to zoneadm(1m) which knows to retry.
1529 */
1530 static int
make_daemon_exclusive(zlog_t * zlogp)1531 make_daemon_exclusive(zlog_t *zlogp)
1532 {
1533 int doorfd = -1;
1534 int err, ret = -1;
1535 struct stat st;
1536 struct flock flock;
1537 zone_state_t zstate;
1538
1539 top:
1540 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1541 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1542 zonecfg_strerror(err));
1543 goto out;
1544 }
1545 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1546 S_IREAD|S_IWRITE)) < 0) {
1547 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1548 goto out;
1549 }
1550 if (fstat(doorfd, &st) < 0) {
1551 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1552 goto out;
1553 }
1554 /*
1555 * Lock the file to synchronize with other zoneadmd
1556 */
1557 flock.l_type = F_WRLCK;
1558 flock.l_whence = SEEK_SET;
1559 flock.l_start = (off_t)0;
1560 flock.l_len = (off_t)0;
1561 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1562 /*
1563 * Someone else raced us here and grabbed the lock file
1564 * first. A warning here is inappropriate since nothing
1565 * went wrong.
1566 */
1567 goto out;
1568 }
1569
1570 if (strcmp(st.st_fstype, "namefs") == 0) {
1571 struct door_info info;
1572
1573 /*
1574 * There is already something fattach()'ed to this file.
1575 * Lets see what the door is up to.
1576 */
1577 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1578 /*
1579 * Another zoneadmd process seems to be in
1580 * control of the situation and we don't need to
1581 * be here. A warning here is inappropriate
1582 * since nothing went wrong.
1583 *
1584 * If the door has been revoked, the zoneadmd
1585 * process currently managing the zone is going
1586 * away. We'll return control to zoneadm(1m)
1587 * which will try again (by which time zoneadmd
1588 * will hopefully have exited).
1589 */
1590 goto out;
1591 }
1592
1593 /*
1594 * If we got this far, there's a fattach(3c)'ed door
1595 * that belongs to a process that has exited, which can
1596 * happen if the previous zoneadmd died unexpectedly.
1597 *
1598 * Let user know that something is amiss, but that we can
1599 * recover; if the zone is in the installed state, then don't
1600 * message, since having a running zoneadmd isn't really
1601 * expected/needed. We want to keep occurences of this message
1602 * limited to times when zoneadmd is picking back up from a
1603 * zoneadmd that died while the zone was in some non-trivial
1604 * state.
1605 */
1606 if (zstate > ZONE_STATE_INSTALLED) {
1607 zerror(zlogp, B_FALSE,
1608 "zone '%s': WARNING: zone is in state '%s', but "
1609 "zoneadmd does not appear to be available; "
1610 "restarted zoneadmd to recover.",
1611 zone_name, zone_state_str(zstate));
1612 }
1613
1614 (void) fdetach(zone_door_path);
1615 (void) close(doorfd);
1616 goto top;
1617 }
1618 ret = 0;
1619 out:
1620 (void) close(doorfd);
1621 return (ret);
1622 }
1623
1624 /*
1625 * Setup the brand's pre and post state change callbacks, as well as the
1626 * query callback, if any of these exist.
1627 */
1628 static int
brand_callback_init(brand_handle_t bh,char * zone_name)1629 brand_callback_init(brand_handle_t bh, char *zone_name)
1630 {
1631 char zpath[MAXPATHLEN];
1632
1633 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
1634 return (-1);
1635
1636 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1637 sizeof (pre_statechg_hook));
1638
1639 if (brand_get_prestatechange(bh, zone_name, zpath,
1640 pre_statechg_hook + EXEC_LEN,
1641 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1642 return (-1);
1643
1644 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1645 pre_statechg_hook[0] = '\0';
1646
1647 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1648 sizeof (post_statechg_hook));
1649
1650 if (brand_get_poststatechange(bh, zone_name, zpath,
1651 post_statechg_hook + EXEC_LEN,
1652 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1653 return (-1);
1654
1655 if (strlen(post_statechg_hook) <= EXEC_LEN)
1656 post_statechg_hook[0] = '\0';
1657
1658 (void) strlcpy(query_hook, EXEC_PREFIX,
1659 sizeof (query_hook));
1660
1661 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
1662 sizeof (query_hook) - EXEC_LEN) != 0)
1663 return (-1);
1664
1665 if (strlen(query_hook) <= EXEC_LEN)
1666 query_hook[0] = '\0';
1667
1668 return (0);
1669 }
1670
1671 int
main(int argc,char * argv[])1672 main(int argc, char *argv[])
1673 {
1674 int opt;
1675 zoneid_t zid;
1676 priv_set_t *privset;
1677 zone_state_t zstate;
1678 char parents_locale[MAXPATHLEN];
1679 brand_handle_t bh;
1680 int err;
1681
1682 pid_t pid;
1683 sigset_t blockset;
1684 sigset_t block_cld;
1685
1686 struct {
1687 sema_t sem;
1688 int status;
1689 zlog_t log;
1690 } *shstate;
1691 size_t shstatelen = getpagesize();
1692
1693 zlog_t errlog;
1694 zlog_t *zlogp;
1695
1696 int ctfd;
1697
1698 progname = get_execbasename(argv[0]);
1699
1700 /*
1701 * Make sure stderr is unbuffered
1702 */
1703 (void) setbuffer(stderr, NULL, 0);
1704
1705 /*
1706 * Get out of the way of mounted filesystems, since we will daemonize
1707 * soon.
1708 */
1709 (void) chdir("/");
1710
1711 /*
1712 * Use the default system umask per PSARC 1998/110 rather than
1713 * anything that may have been set by the caller.
1714 */
1715 (void) umask(CMASK);
1716
1717 /*
1718 * Initially we want to use our parent's locale.
1719 */
1720 (void) setlocale(LC_ALL, "");
1721 (void) textdomain(TEXT_DOMAIN);
1722 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
1723 sizeof (parents_locale));
1724
1725 /*
1726 * This zlog_t is used for writing to stderr
1727 */
1728 errlog.logfile = stderr;
1729 errlog.buflen = errlog.loglen = 0;
1730 errlog.buf = errlog.log = NULL;
1731 errlog.locale = parents_locale;
1732
1733 /*
1734 * We start off writing to stderr until we're ready to daemonize.
1735 */
1736 zlogp = &errlog;
1737
1738 /*
1739 * Process options.
1740 */
1741 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
1742 switch (opt) {
1743 case 'R':
1744 zonecfg_set_root(optarg);
1745 break;
1746 case 'z':
1747 zone_name = optarg;
1748 break;
1749 default:
1750 usage();
1751 }
1752 }
1753
1754 if (zone_name == NULL)
1755 usage();
1756
1757 /*
1758 * Because usage() prints directly to stderr, it has gettext()
1759 * wrapping, which depends on the locale. But since zerror() calls
1760 * localize() which tweaks the locale, it is not safe to call zerror()
1761 * until after the last call to usage(). Fortunately, the last call
1762 * to usage() is just above and the first call to zerror() is just
1763 * below. Don't mess this up.
1764 */
1765 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
1766 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
1767 GLOBAL_ZONENAME);
1768 return (1);
1769 }
1770
1771 if (zone_get_id(zone_name, &zid) != 0) {
1772 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
1773 zonecfg_strerror(Z_NO_ZONE));
1774 return (1);
1775 }
1776
1777 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1778 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1779 zonecfg_strerror(err));
1780 return (1);
1781 }
1782 if (zstate < ZONE_STATE_INCOMPLETE) {
1783 zerror(zlogp, B_FALSE,
1784 "cannot manage a zone which is in state '%s'",
1785 zone_state_str(zstate));
1786 return (1);
1787 }
1788
1789 if (zonecfg_default_brand(default_brand,
1790 sizeof (default_brand)) != Z_OK) {
1791 zerror(zlogp, B_FALSE, "unable to determine default brand");
1792 return (1);
1793 }
1794
1795 /* Get a handle to the brand info for this zone */
1796 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
1797 != Z_OK) {
1798 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1799 return (1);
1800 }
1801 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
1802 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
1803
1804 /*
1805 * In the alternate root environment, the only supported
1806 * operations are mount and unmount. In this case, just treat
1807 * the zone as native if it is cluster. Cluster zones can be
1808 * native for the purpose of LU or upgrade, and the cluster
1809 * brand may not exist in the miniroot (such as in net install
1810 * upgrade).
1811 */
1812 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
1813 zone_iscluster = B_TRUE;
1814 if (zonecfg_in_alt_root()) {
1815 (void) strlcpy(brand_name, default_brand,
1816 sizeof (brand_name));
1817 }
1818 } else {
1819 zone_iscluster = B_FALSE;
1820 }
1821
1822 if ((bh = brand_open(brand_name)) == NULL) {
1823 zerror(zlogp, B_FALSE, "unable to open zone brand");
1824 return (1);
1825 }
1826
1827 /* Get state change brand hooks. */
1828 if (brand_callback_init(bh, zone_name) == -1) {
1829 zerror(zlogp, B_TRUE,
1830 "failed to initialize brand state change hooks");
1831 brand_close(bh);
1832 return (1);
1833 }
1834
1835 brand_close(bh);
1836
1837 /*
1838 * Check that we have all privileges. It would be nice to pare
1839 * this down, but this is at least a first cut.
1840 */
1841 if ((privset = priv_allocset()) == NULL) {
1842 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
1843 return (1);
1844 }
1845
1846 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1847 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
1848 priv_freeset(privset);
1849 return (1);
1850 }
1851
1852 if (priv_isfullset(privset) == B_FALSE) {
1853 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
1854 "run this command (all privs required)");
1855 priv_freeset(privset);
1856 return (1);
1857 }
1858 priv_freeset(privset);
1859
1860 if (mkzonedir(zlogp) != 0)
1861 return (1);
1862
1863 /*
1864 * Pre-fork: setup shared state
1865 */
1866 if ((shstate = (void *)mmap(NULL, shstatelen,
1867 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
1868 MAP_FAILED) {
1869 zerror(zlogp, B_TRUE, "%s failed", "mmap");
1870 return (1);
1871 }
1872 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
1873 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
1874 (void) munmap((char *)shstate, shstatelen);
1875 return (1);
1876 }
1877 shstate->log.logfile = NULL;
1878 shstate->log.buflen = shstatelen - sizeof (*shstate);
1879 shstate->log.loglen = shstate->log.buflen;
1880 shstate->log.buf = (char *)shstate + sizeof (*shstate);
1881 shstate->log.log = shstate->log.buf;
1882 shstate->log.locale = parents_locale;
1883 shstate->status = -1;
1884
1885 /*
1886 * We need a SIGCHLD handler so the sema_wait() below will wake
1887 * up if the child dies without doing a sema_post().
1888 */
1889 (void) sigset(SIGCHLD, sigchld);
1890 /*
1891 * We must mask SIGCHLD until after we've coped with the fork
1892 * sufficiently to deal with it; otherwise we can race and
1893 * receive the signal before pid has been initialized
1894 * (yes, this really happens).
1895 */
1896 (void) sigemptyset(&block_cld);
1897 (void) sigaddset(&block_cld, SIGCHLD);
1898 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1899
1900 if ((ctfd = init_template()) == -1) {
1901 zerror(zlogp, B_TRUE, "failed to create contract");
1902 return (1);
1903 }
1904
1905 /*
1906 * Do not let another thread localize a message while we are forking.
1907 */
1908 (void) mutex_lock(&msglock);
1909 pid = fork();
1910 (void) mutex_unlock(&msglock);
1911
1912 /*
1913 * In all cases (parent, child, and in the event of an error) we
1914 * don't want to cause creation of contracts on subsequent fork()s.
1915 */
1916 (void) ct_tmpl_clear(ctfd);
1917 (void) close(ctfd);
1918
1919 if (pid == -1) {
1920 zerror(zlogp, B_TRUE, "could not fork");
1921 return (1);
1922
1923 } else if (pid > 0) { /* parent */
1924 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1925 /*
1926 * This marks a window of vulnerability in which we receive
1927 * the SIGCLD before falling into sema_wait (normally we would
1928 * get woken up from sema_wait with EINTR upon receipt of
1929 * SIGCLD). So we may need to use some other scheme like
1930 * sema_posting in the sigcld handler.
1931 * blech
1932 */
1933 (void) sema_wait(&shstate->sem);
1934 (void) sema_destroy(&shstate->sem);
1935 if (shstate->status != 0)
1936 (void) waitpid(pid, NULL, WNOHANG);
1937 /*
1938 * It's ok if we die with SIGPIPE. It's not like we could have
1939 * done anything about it.
1940 */
1941 (void) fprintf(stderr, "%s", shstate->log.buf);
1942 _exit(shstate->status == 0 ? 0 : 1);
1943 }
1944
1945 /*
1946 * The child charges on.
1947 */
1948 (void) sigset(SIGCHLD, SIG_DFL);
1949 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1950
1951 /*
1952 * SIGPIPE can be delivered if we write to a socket for which the
1953 * peer endpoint is gone. That can lead to too-early termination
1954 * of zoneadmd, and that's not good eats.
1955 */
1956 (void) sigset(SIGPIPE, SIG_IGN);
1957 /*
1958 * Stop using stderr
1959 */
1960 zlogp = &shstate->log;
1961
1962 /*
1963 * We don't need stdout/stderr from now on.
1964 */
1965 closefrom(0);
1966
1967 /*
1968 * Initialize the syslog zlog_t. This needs to be done after
1969 * the call to closefrom().
1970 */
1971 logsys.buf = logsys.log = NULL;
1972 logsys.buflen = logsys.loglen = 0;
1973 logsys.logfile = NULL;
1974 logsys.locale = DEFAULT_LOCALE;
1975
1976 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
1977
1978 /*
1979 * The eventstream is used to publish state changes in the zone
1980 * from the door threads to the console I/O poller.
1981 */
1982 if (eventstream_init() == -1) {
1983 zerror(zlogp, B_TRUE, "unable to create eventstream");
1984 goto child_out;
1985 }
1986
1987 (void) snprintf(zone_door_path, sizeof (zone_door_path),
1988 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
1989
1990 /*
1991 * See if another zoneadmd is running for this zone. If not, then we
1992 * can now modify system state.
1993 */
1994 if (make_daemon_exclusive(zlogp) == -1)
1995 goto child_out;
1996
1997
1998 /*
1999 * Create/join a new session; we need to be careful of what we do with
2000 * the console from now on so we don't end up being the session leader
2001 * for the terminal we're going to be handing out.
2002 */
2003 (void) setsid();
2004
2005 /*
2006 * This thread shouldn't be receiving any signals; in particular,
2007 * SIGCHLD should be received by the thread doing the fork().
2008 */
2009 (void) sigfillset(&blockset);
2010 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2011
2012 /*
2013 * Setup the console device and get ready to serve the console;
2014 * once this has completed, we're ready to let console clients
2015 * make an attempt to connect (they will block until
2016 * serve_console_sock() below gets called, and any pending
2017 * connection is accept()ed).
2018 */
2019 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2020 goto child_out;
2021
2022 /*
2023 * Take the lock now, so that when the door server gets going, we
2024 * are guaranteed that it won't take a request until we are sure
2025 * that everything is completely set up. See the child_out: label
2026 * below to see why this matters.
2027 */
2028 (void) mutex_lock(&lock);
2029
2030 /* Init semaphore for scratch zones. */
2031 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2032 zerror(zlogp, B_TRUE,
2033 "failed to initialize semaphore for scratch zone");
2034 goto child_out;
2035 }
2036
2037 /* open the dladm handle */
2038 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2039 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2040 goto child_out;
2041 }
2042
2043 /*
2044 * Note: door setup must occur *after* the console is setup.
2045 * This is so that as zlogin tests the door to see if zoneadmd
2046 * is ready yet, we know that the console will get serviced
2047 * once door_info() indicates that the door is "up".
2048 */
2049 if (setup_door(zlogp) == -1)
2050 goto child_out;
2051
2052 /*
2053 * Things seem OK so far; tell the parent process that we're done
2054 * with setup tasks. This will cause the parent to exit, signalling
2055 * to zoneadm, zlogin, or whatever forked it that we are ready to
2056 * service requests.
2057 */
2058 shstate->status = 0;
2059 (void) sema_post(&shstate->sem);
2060 (void) munmap((char *)shstate, shstatelen);
2061 shstate = NULL;
2062
2063 (void) mutex_unlock(&lock);
2064
2065 /*
2066 * zlogp is now invalid, so reset it to the syslog logger.
2067 */
2068 zlogp = &logsys;
2069
2070 /*
2071 * Now that we are free of any parents, switch to the default locale.
2072 */
2073 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2074
2075 /*
2076 * At this point the setup portion of main() is basically done, so
2077 * we reuse this thread to manage the zone console. When
2078 * serve_console() has returned, we are past the point of no return
2079 * in the life of this zoneadmd.
2080 */
2081 if (zonecfg_in_alt_root()) {
2082 /*
2083 * This is just awful, but mounted scratch zones don't (and
2084 * can't) have consoles. We just wait for unmount instead.
2085 */
2086 while (sema_wait(&scratch_sem) == EINTR)
2087 ;
2088 } else {
2089 serve_console(zlogp);
2090 assert(in_death_throes);
2091 }
2092
2093 /*
2094 * This is the next-to-last part of the exit interlock. Upon calling
2095 * fdetach(), the door will go unreferenced; once any
2096 * outstanding requests (like the door thread doing Z_HALT) are
2097 * done, the door will get an UNREF notification; when it handles
2098 * the UNREF, the door server will cause the exit. It's possible
2099 * that fdetach() can fail because the file is in use, in which
2100 * case we'll retry the operation.
2101 */
2102 assert(!MUTEX_HELD(&lock));
2103 for (;;) {
2104 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2105 break;
2106 yield();
2107 }
2108
2109 for (;;)
2110 (void) pause();
2111
2112 child_out:
2113 assert(pid == 0);
2114 if (shstate != NULL) {
2115 shstate->status = -1;
2116 (void) sema_post(&shstate->sem);
2117 (void) munmap((char *)shstate, shstatelen);
2118 }
2119
2120 /*
2121 * This might trigger an unref notification, but if so,
2122 * we are still holding the lock, so our call to exit will
2123 * ultimately win the race and will publish the right exit
2124 * code.
2125 */
2126 if (zone_door != -1) {
2127 assert(MUTEX_HELD(&lock));
2128 (void) door_revoke(zone_door);
2129 (void) fdetach(zone_door_path);
2130 }
2131
2132 if (dld_handle != NULL)
2133 dladm_close(dld_handle);
2134
2135 return (1); /* return from main() forcibly exits an MT process */
2136 }
2137