xref: /onnv-gate/usr/src/cmd/zoneadmd/zoneadmd.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * zoneadmd manages zones; one zoneadmd process is launched for each
31*0Sstevel@tonic-gate  * non-global zone on the system.  This daemon juggles four jobs:
32*0Sstevel@tonic-gate  *
33*0Sstevel@tonic-gate  * - Implement setup and teardown of the zone "virtual platform": mount and
34*0Sstevel@tonic-gate  *   unmount filesystems; create and destroy network interfaces; communicate
35*0Sstevel@tonic-gate  *   with devfsadmd to lay out devices for the zone; instantiate the zone
36*0Sstevel@tonic-gate  *   console device; configure process runtime attributes such as resource
37*0Sstevel@tonic-gate  *   controls, pool bindings, fine-grained privileges.
38*0Sstevel@tonic-gate  *
39*0Sstevel@tonic-gate  * - Launch the zone's init(1M) process.
40*0Sstevel@tonic-gate  *
41*0Sstevel@tonic-gate  * - Implement a door server; clients (like zoneadm) connect to the door
42*0Sstevel@tonic-gate  *   server and request zone state changes.  The kernel is also a client of
43*0Sstevel@tonic-gate  *   this door server.  A request to halt or reboot the zone which originates
44*0Sstevel@tonic-gate  *   *inside* the zone results in a door upcall from the kernel into zoneadmd.
45*0Sstevel@tonic-gate  *
46*0Sstevel@tonic-gate  *   One minor problem is that messages emitted by zoneadmd need to be passed
47*0Sstevel@tonic-gate  *   back to the zoneadm process making the request.  These messages need to
48*0Sstevel@tonic-gate  *   be rendered in the client's locale; so, this is passed in as part of the
49*0Sstevel@tonic-gate  *   request.  The exception is the kernel upcall to zoneadmd, in which case
50*0Sstevel@tonic-gate  *   messages are syslog'd.
51*0Sstevel@tonic-gate  *
52*0Sstevel@tonic-gate  *   To make all of this work, the Makefile adds -a to xgettext to extract *all*
53*0Sstevel@tonic-gate  *   strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
54*0Sstevel@tonic-gate  *   strings which do not need to be translated.
55*0Sstevel@tonic-gate  *
56*0Sstevel@tonic-gate  * - Act as a console server for zlogin -C processes; see comments in zcons.c
57*0Sstevel@tonic-gate  *   for more information about the zone console architecture.
58*0Sstevel@tonic-gate  *
59*0Sstevel@tonic-gate  * DESIGN NOTES
60*0Sstevel@tonic-gate  *
61*0Sstevel@tonic-gate  * Restart:
62*0Sstevel@tonic-gate  *   A chief design constraint of zoneadmd is that it should be restartable in
63*0Sstevel@tonic-gate  *   the case that the administrator kills it off, or it suffers a fatal error,
64*0Sstevel@tonic-gate  *   without the running zone being impacted; this is akin to being able to
65*0Sstevel@tonic-gate  *   reboot the service processor of a server without affecting the OS instance.
66*0Sstevel@tonic-gate  */
67*0Sstevel@tonic-gate 
68*0Sstevel@tonic-gate #include <sys/param.h>
69*0Sstevel@tonic-gate #include <sys/mman.h>
70*0Sstevel@tonic-gate #include <sys/types.h>
71*0Sstevel@tonic-gate #include <sys/stat.h>
72*0Sstevel@tonic-gate #include <sys/sysmacros.h>
73*0Sstevel@tonic-gate 
74*0Sstevel@tonic-gate #include <bsm/adt.h>
75*0Sstevel@tonic-gate #include <bsm/adt_event.h>
76*0Sstevel@tonic-gate 
77*0Sstevel@tonic-gate #include <alloca.h>
78*0Sstevel@tonic-gate #include <assert.h>
79*0Sstevel@tonic-gate #include <errno.h>
80*0Sstevel@tonic-gate #include <door.h>
81*0Sstevel@tonic-gate #include <fcntl.h>
82*0Sstevel@tonic-gate #include <locale.h>
83*0Sstevel@tonic-gate #include <signal.h>
84*0Sstevel@tonic-gate #include <stdarg.h>
85*0Sstevel@tonic-gate #include <stdio.h>
86*0Sstevel@tonic-gate #include <stdlib.h>
87*0Sstevel@tonic-gate #include <string.h>
88*0Sstevel@tonic-gate #include <strings.h>
89*0Sstevel@tonic-gate #include <synch.h>
90*0Sstevel@tonic-gate #include <syslog.h>
91*0Sstevel@tonic-gate #include <thread.h>
92*0Sstevel@tonic-gate #include <unistd.h>
93*0Sstevel@tonic-gate #include <wait.h>
94*0Sstevel@tonic-gate #include <limits.h>
95*0Sstevel@tonic-gate #include <zone.h>
96*0Sstevel@tonic-gate #include <libcontract.h>
97*0Sstevel@tonic-gate #include <libcontract_priv.h>
98*0Sstevel@tonic-gate #include <sys/contract/process.h>
99*0Sstevel@tonic-gate #include <sys/ctfs.h>
100*0Sstevel@tonic-gate 
101*0Sstevel@tonic-gate #include <libzonecfg.h>
102*0Sstevel@tonic-gate #include "zoneadmd.h"
103*0Sstevel@tonic-gate 
104*0Sstevel@tonic-gate static char *progname;
105*0Sstevel@tonic-gate char *zone_name;	/* zone which we are managing */
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate static zlog_t logsys;
108*0Sstevel@tonic-gate 
109*0Sstevel@tonic-gate mutex_t	lock = DEFAULTMUTEX;	/* to serialize stuff */
110*0Sstevel@tonic-gate mutex_t	msglock = DEFAULTMUTEX;	/* for calling setlocale() */
111*0Sstevel@tonic-gate 
112*0Sstevel@tonic-gate static char	zone_door_path[MAXPATHLEN];
113*0Sstevel@tonic-gate static int	zone_door = -1;
114*0Sstevel@tonic-gate 
115*0Sstevel@tonic-gate boolean_t in_death_throes = B_FALSE;	/* daemon is dying */
116*0Sstevel@tonic-gate boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
117*0Sstevel@tonic-gate 
118*0Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
119*0Sstevel@tonic-gate #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
120*0Sstevel@tonic-gate #endif
121*0Sstevel@tonic-gate 
122*0Sstevel@tonic-gate #define	PATH_TO_INIT	"/sbin/init"
123*0Sstevel@tonic-gate 
124*0Sstevel@tonic-gate #define	DEFAULT_LOCALE	"C"
125*0Sstevel@tonic-gate 
126*0Sstevel@tonic-gate static char *
127*0Sstevel@tonic-gate get_execbasename(char *execfullname)
128*0Sstevel@tonic-gate {
129*0Sstevel@tonic-gate 	char *last_slash, *execbasename;
130*0Sstevel@tonic-gate 
131*0Sstevel@tonic-gate 	/* guard against '/' at end of command invocation */
132*0Sstevel@tonic-gate 	for (;;) {
133*0Sstevel@tonic-gate 		last_slash = strrchr(execfullname, '/');
134*0Sstevel@tonic-gate 		if (last_slash == NULL) {
135*0Sstevel@tonic-gate 			execbasename = execfullname;
136*0Sstevel@tonic-gate 			break;
137*0Sstevel@tonic-gate 		} else {
138*0Sstevel@tonic-gate 			execbasename = last_slash + 1;
139*0Sstevel@tonic-gate 			if (*execbasename == '\0') {
140*0Sstevel@tonic-gate 				*last_slash = '\0';
141*0Sstevel@tonic-gate 				continue;
142*0Sstevel@tonic-gate 			}
143*0Sstevel@tonic-gate 			break;
144*0Sstevel@tonic-gate 		}
145*0Sstevel@tonic-gate 	}
146*0Sstevel@tonic-gate 	return (execbasename);
147*0Sstevel@tonic-gate }
148*0Sstevel@tonic-gate 
149*0Sstevel@tonic-gate static void
150*0Sstevel@tonic-gate usage(void)
151*0Sstevel@tonic-gate {
152*0Sstevel@tonic-gate 	(void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
153*0Sstevel@tonic-gate 	(void) fprintf(stderr,
154*0Sstevel@tonic-gate 	    gettext("\tNote: %s should not be run directly.\n"), progname);
155*0Sstevel@tonic-gate 	exit(2);
156*0Sstevel@tonic-gate }
157*0Sstevel@tonic-gate 
158*0Sstevel@tonic-gate /* ARGSUSED */
159*0Sstevel@tonic-gate static void
160*0Sstevel@tonic-gate sigchld(int sig)
161*0Sstevel@tonic-gate {
162*0Sstevel@tonic-gate }
163*0Sstevel@tonic-gate 
164*0Sstevel@tonic-gate char *
165*0Sstevel@tonic-gate localize_msg(char *locale, const char *msg)
166*0Sstevel@tonic-gate {
167*0Sstevel@tonic-gate 	char *out;
168*0Sstevel@tonic-gate 
169*0Sstevel@tonic-gate 	(void) mutex_lock(&msglock);
170*0Sstevel@tonic-gate 	(void) setlocale(LC_MESSAGES, locale);
171*0Sstevel@tonic-gate 	out = gettext(msg);
172*0Sstevel@tonic-gate 	(void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
173*0Sstevel@tonic-gate 	(void) mutex_unlock(&msglock);
174*0Sstevel@tonic-gate 	return (out);
175*0Sstevel@tonic-gate }
176*0Sstevel@tonic-gate 
177*0Sstevel@tonic-gate /* PRINTFLIKE3 */
178*0Sstevel@tonic-gate void
179*0Sstevel@tonic-gate zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
180*0Sstevel@tonic-gate {
181*0Sstevel@tonic-gate 	va_list alist;
182*0Sstevel@tonic-gate 	char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
183*0Sstevel@tonic-gate 	char *bp;
184*0Sstevel@tonic-gate 	int saved_errno = errno;
185*0Sstevel@tonic-gate 
186*0Sstevel@tonic-gate 	if (zlogp == NULL)
187*0Sstevel@tonic-gate 		return;
188*0Sstevel@tonic-gate 	if (zlogp == &logsys)
189*0Sstevel@tonic-gate 		(void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
190*0Sstevel@tonic-gate 		    zone_name);
191*0Sstevel@tonic-gate 	else
192*0Sstevel@tonic-gate 		buf[0] = '\0';
193*0Sstevel@tonic-gate 	bp = &(buf[strlen(buf)]);
194*0Sstevel@tonic-gate 
195*0Sstevel@tonic-gate 	/*
196*0Sstevel@tonic-gate 	 * In theory, the locale pointer should be set to either "C" or a
197*0Sstevel@tonic-gate 	 * char array, so it should never be NULL
198*0Sstevel@tonic-gate 	 */
199*0Sstevel@tonic-gate 	assert(zlogp->locale != NULL);
200*0Sstevel@tonic-gate 	/* Locale is per process, but we are multi-threaded... */
201*0Sstevel@tonic-gate 	fmt = localize_msg(zlogp->locale, fmt);
202*0Sstevel@tonic-gate 
203*0Sstevel@tonic-gate 	va_start(alist, fmt);
204*0Sstevel@tonic-gate 	(void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
205*0Sstevel@tonic-gate 	va_end(alist);
206*0Sstevel@tonic-gate 	bp = &(buf[strlen(buf)]);
207*0Sstevel@tonic-gate 	if (use_strerror)
208*0Sstevel@tonic-gate 		(void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
209*0Sstevel@tonic-gate 		    strerror(saved_errno));
210*0Sstevel@tonic-gate 	if (zlogp == &logsys) {
211*0Sstevel@tonic-gate 		(void) syslog(LOG_ERR, "%s", buf);
212*0Sstevel@tonic-gate 	} else if (zlogp->logfile != NULL) {
213*0Sstevel@tonic-gate 		(void) fprintf(zlogp->logfile, "%s\n", buf);
214*0Sstevel@tonic-gate 	} else {
215*0Sstevel@tonic-gate 		size_t buflen;
216*0Sstevel@tonic-gate 		size_t copylen;
217*0Sstevel@tonic-gate 
218*0Sstevel@tonic-gate 		buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
219*0Sstevel@tonic-gate 		copylen = MIN(buflen, zlogp->loglen);
220*0Sstevel@tonic-gate 		zlogp->log += copylen;
221*0Sstevel@tonic-gate 		zlogp->loglen -= copylen;
222*0Sstevel@tonic-gate 	}
223*0Sstevel@tonic-gate }
224*0Sstevel@tonic-gate 
225*0Sstevel@tonic-gate static int
226*0Sstevel@tonic-gate mkzonedir(zlog_t *zlogp)
227*0Sstevel@tonic-gate {
228*0Sstevel@tonic-gate 	struct stat st;
229*0Sstevel@tonic-gate 	/*
230*0Sstevel@tonic-gate 	 * We must create and lock everyone but root out of ZONES_TMPDIR
231*0Sstevel@tonic-gate 	 * since anyone can open any UNIX domain socket, regardless of
232*0Sstevel@tonic-gate 	 * its file system permissions.  Sigh...
233*0Sstevel@tonic-gate 	 */
234*0Sstevel@tonic-gate 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
235*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
236*0Sstevel@tonic-gate 		return (-1);
237*0Sstevel@tonic-gate 	}
238*0Sstevel@tonic-gate 	/* paranoia */
239*0Sstevel@tonic-gate 	if ((stat(ZONES_TMPDIR, &st) < 0) || ((st.st_mode & S_IFDIR) == 0)) {
240*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
241*0Sstevel@tonic-gate 		return (-1);
242*0Sstevel@tonic-gate 	}
243*0Sstevel@tonic-gate 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
244*0Sstevel@tonic-gate 	return (0);
245*0Sstevel@tonic-gate }
246*0Sstevel@tonic-gate 
247*0Sstevel@tonic-gate static zoneid_t
248*0Sstevel@tonic-gate zone_ready(zlog_t *zlogp)
249*0Sstevel@tonic-gate {
250*0Sstevel@tonic-gate 	int err;
251*0Sstevel@tonic-gate 
252*0Sstevel@tonic-gate 	if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
253*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
254*0Sstevel@tonic-gate 		    zonecfg_strerror(err));
255*0Sstevel@tonic-gate 		return (-1);
256*0Sstevel@tonic-gate 	}
257*0Sstevel@tonic-gate 
258*0Sstevel@tonic-gate 	if (vplat_create(zlogp) != 0) {
259*0Sstevel@tonic-gate 		if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
260*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "destroying snapshot: %s",
261*0Sstevel@tonic-gate 			    zonecfg_strerror(err));
262*0Sstevel@tonic-gate 		return (-1);
263*0Sstevel@tonic-gate 	}
264*0Sstevel@tonic-gate 	if (vplat_bringup(zlogp) != 0) {
265*0Sstevel@tonic-gate 		bringup_failure_recovery = B_TRUE;
266*0Sstevel@tonic-gate 		(void) vplat_teardown(NULL);
267*0Sstevel@tonic-gate 		if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
268*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "destroying snapshot: %s",
269*0Sstevel@tonic-gate 			    zonecfg_strerror(err));
270*0Sstevel@tonic-gate 		return (-1);
271*0Sstevel@tonic-gate 	}
272*0Sstevel@tonic-gate 
273*0Sstevel@tonic-gate 	return (0);
274*0Sstevel@tonic-gate }
275*0Sstevel@tonic-gate 
276*0Sstevel@tonic-gate static int
277*0Sstevel@tonic-gate init_template()
278*0Sstevel@tonic-gate {
279*0Sstevel@tonic-gate 	int fd;
280*0Sstevel@tonic-gate 	int err = 0;
281*0Sstevel@tonic-gate 
282*0Sstevel@tonic-gate 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
283*0Sstevel@tonic-gate 	if (fd == -1)
284*0Sstevel@tonic-gate 		return (-1);
285*0Sstevel@tonic-gate 
286*0Sstevel@tonic-gate 	/*
287*0Sstevel@tonic-gate 	 * For now, zoneadmd doesn't do anything with the contract.
288*0Sstevel@tonic-gate 	 * Deliver no events, don't inherit, and allow it to be orphaned.
289*0Sstevel@tonic-gate 	 */
290*0Sstevel@tonic-gate 	err |= ct_tmpl_set_critical(fd, 0);
291*0Sstevel@tonic-gate 	err |= ct_tmpl_set_informative(fd, 0);
292*0Sstevel@tonic-gate 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
293*0Sstevel@tonic-gate 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
294*0Sstevel@tonic-gate 	if (err || ct_tmpl_activate(fd)) {
295*0Sstevel@tonic-gate 		(void) close(fd);
296*0Sstevel@tonic-gate 		return (-1);
297*0Sstevel@tonic-gate 	}
298*0Sstevel@tonic-gate 
299*0Sstevel@tonic-gate 	return (fd);
300*0Sstevel@tonic-gate }
301*0Sstevel@tonic-gate 
302*0Sstevel@tonic-gate static int
303*0Sstevel@tonic-gate mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec,
304*0Sstevel@tonic-gate     const char *dir, char *fstype)
305*0Sstevel@tonic-gate {
306*0Sstevel@tonic-gate 	pid_t child;
307*0Sstevel@tonic-gate 	int child_status;
308*0Sstevel@tonic-gate 	int tmpl_fd;
309*0Sstevel@tonic-gate 	ctid_t ct;
310*0Sstevel@tonic-gate 
311*0Sstevel@tonic-gate 	if ((tmpl_fd = init_template()) == -1) {
312*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to create contract");
313*0Sstevel@tonic-gate 		return (-1);
314*0Sstevel@tonic-gate 	}
315*0Sstevel@tonic-gate 
316*0Sstevel@tonic-gate 	if ((child = fork()) == -1) {
317*0Sstevel@tonic-gate 		(void) ct_tmpl_clear(tmpl_fd);
318*0Sstevel@tonic-gate 		(void) close(tmpl_fd);
319*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to fork");
320*0Sstevel@tonic-gate 		return (-1);
321*0Sstevel@tonic-gate 
322*0Sstevel@tonic-gate 	} else if (child == 0) {	/* child */
323*0Sstevel@tonic-gate 		(void) ct_tmpl_clear(tmpl_fd);
324*0Sstevel@tonic-gate 		/*
325*0Sstevel@tonic-gate 		 * Even though there are no procs running in the zone, we
326*0Sstevel@tonic-gate 		 * do this for paranoia's sake.
327*0Sstevel@tonic-gate 		 */
328*0Sstevel@tonic-gate 		(void) closefrom(0);
329*0Sstevel@tonic-gate 
330*0Sstevel@tonic-gate 		if (zone_enter(zoneid) == -1) {
331*0Sstevel@tonic-gate 			_exit(errno);
332*0Sstevel@tonic-gate 		}
333*0Sstevel@tonic-gate 		if (mount(spec, dir, MS_DATA, fstype, NULL, 0, NULL, 0) != 0)
334*0Sstevel@tonic-gate 			_exit(errno);
335*0Sstevel@tonic-gate 		_exit(0);
336*0Sstevel@tonic-gate 	}
337*0Sstevel@tonic-gate 
338*0Sstevel@tonic-gate 	/* parent */
339*0Sstevel@tonic-gate 	if (contract_latest(&ct) == -1)
340*0Sstevel@tonic-gate 		ct = -1;
341*0Sstevel@tonic-gate 	(void) ct_tmpl_clear(tmpl_fd);
342*0Sstevel@tonic-gate 	(void) close(tmpl_fd);
343*0Sstevel@tonic-gate 	if (waitpid(child, &child_status, 0) != child) {
344*0Sstevel@tonic-gate 		/* unexpected: we must have been signalled */
345*0Sstevel@tonic-gate 		(void) contract_abandon_id(ct);
346*0Sstevel@tonic-gate 		return (-1);
347*0Sstevel@tonic-gate 	}
348*0Sstevel@tonic-gate 	(void) contract_abandon_id(ct);
349*0Sstevel@tonic-gate 	if (WEXITSTATUS(child_status) != 0) {
350*0Sstevel@tonic-gate 		errno = WEXITSTATUS(child_status);
351*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "mount of %s failed", dir);
352*0Sstevel@tonic-gate 		return (-1);
353*0Sstevel@tonic-gate 	}
354*0Sstevel@tonic-gate 
355*0Sstevel@tonic-gate 	return (0);
356*0Sstevel@tonic-gate }
357*0Sstevel@tonic-gate 
358*0Sstevel@tonic-gate static int
359*0Sstevel@tonic-gate zone_bootup(zlog_t *zlogp, const char *bootargs)
360*0Sstevel@tonic-gate {
361*0Sstevel@tonic-gate 	zoneid_t zoneid;
362*0Sstevel@tonic-gate 	struct stat st;
363*0Sstevel@tonic-gate 	char zroot[MAXPATHLEN], initpath[MAXPATHLEN];
364*0Sstevel@tonic-gate 
365*0Sstevel@tonic-gate 	if (init_console_slave(zlogp) != 0)
366*0Sstevel@tonic-gate 		return (-1);
367*0Sstevel@tonic-gate 	reset_slave_terminal(zlogp);
368*0Sstevel@tonic-gate 
369*0Sstevel@tonic-gate 	if ((zoneid = getzoneidbyname(zone_name)) == -1) {
370*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to get zoneid");
371*0Sstevel@tonic-gate 		return (-1);
372*0Sstevel@tonic-gate 	}
373*0Sstevel@tonic-gate 
374*0Sstevel@tonic-gate 	if (mount_early_fs(zlogp, zoneid, "/proc", "/proc", "proc") != 0)
375*0Sstevel@tonic-gate 		return (-1);
376*0Sstevel@tonic-gate 
377*0Sstevel@tonic-gate 	if (mount_early_fs(zlogp, zoneid, "ctfs", CTFS_ROOT, "ctfs") != 0)
378*0Sstevel@tonic-gate 		return (-1);
379*0Sstevel@tonic-gate 
380*0Sstevel@tonic-gate 	if (mount_early_fs(zlogp, zoneid, "swap", "/etc/svc/volatile",
381*0Sstevel@tonic-gate 	    "tmpfs") != 0)
382*0Sstevel@tonic-gate 		return (-1);
383*0Sstevel@tonic-gate 
384*0Sstevel@tonic-gate 	if (mount_early_fs(zlogp, zoneid, "mnttab", "/etc/mnttab",
385*0Sstevel@tonic-gate 	    "mntfs") != 0)
386*0Sstevel@tonic-gate 		return (-1);
387*0Sstevel@tonic-gate 
388*0Sstevel@tonic-gate 	/*
389*0Sstevel@tonic-gate 	 * Try to anticipate possible problems: Make sure init is executable.
390*0Sstevel@tonic-gate 	 */
391*0Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
392*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "unable to determine zone root");
393*0Sstevel@tonic-gate 		return (-1);
394*0Sstevel@tonic-gate 	}
395*0Sstevel@tonic-gate 	(void) snprintf(initpath, sizeof (initpath), "%s%s", zroot,
396*0Sstevel@tonic-gate 	    PATH_TO_INIT);
397*0Sstevel@tonic-gate 
398*0Sstevel@tonic-gate 	if (stat(initpath, &st) == -1) {
399*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not stat %s", initpath);
400*0Sstevel@tonic-gate 		return (-1);
401*0Sstevel@tonic-gate 	}
402*0Sstevel@tonic-gate 
403*0Sstevel@tonic-gate 	if ((st.st_mode & S_IXUSR) == 0) {
404*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is not executable", initpath);
405*0Sstevel@tonic-gate 		return (-1);
406*0Sstevel@tonic-gate 	}
407*0Sstevel@tonic-gate 
408*0Sstevel@tonic-gate 	if (zone_boot(zoneid, bootargs) == -1) {
409*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to boot zone");
410*0Sstevel@tonic-gate 		return (-1);
411*0Sstevel@tonic-gate 	}
412*0Sstevel@tonic-gate 
413*0Sstevel@tonic-gate 	return (0);
414*0Sstevel@tonic-gate }
415*0Sstevel@tonic-gate 
416*0Sstevel@tonic-gate static int
417*0Sstevel@tonic-gate zone_halt(zlog_t *zlogp)
418*0Sstevel@tonic-gate {
419*0Sstevel@tonic-gate 	int err;
420*0Sstevel@tonic-gate 
421*0Sstevel@tonic-gate 	if (vplat_teardown(zlogp) != 0) {
422*0Sstevel@tonic-gate 		if (!bringup_failure_recovery)
423*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "unable to destroy zone");
424*0Sstevel@tonic-gate 		return (-1);
425*0Sstevel@tonic-gate 	}
426*0Sstevel@tonic-gate 
427*0Sstevel@tonic-gate 	if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
428*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "destroying snapshot: %s",
429*0Sstevel@tonic-gate 		    zonecfg_strerror(err));
430*0Sstevel@tonic-gate 
431*0Sstevel@tonic-gate 	return (0);
432*0Sstevel@tonic-gate }
433*0Sstevel@tonic-gate 
434*0Sstevel@tonic-gate /*
435*0Sstevel@tonic-gate  * Generate AUE_zone_state for a command that boots a zone.
436*0Sstevel@tonic-gate  */
437*0Sstevel@tonic-gate static void
438*0Sstevel@tonic-gate audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
439*0Sstevel@tonic-gate     char *new_state)
440*0Sstevel@tonic-gate {
441*0Sstevel@tonic-gate 	adt_session_data_t	*ah;
442*0Sstevel@tonic-gate 	adt_event_data_t	*event;
443*0Sstevel@tonic-gate 	int			pass_fail, fail_reason;
444*0Sstevel@tonic-gate 
445*0Sstevel@tonic-gate 	if (!adt_audit_enabled())
446*0Sstevel@tonic-gate 		return;
447*0Sstevel@tonic-gate 
448*0Sstevel@tonic-gate 	if (return_val == 0) {
449*0Sstevel@tonic-gate 		pass_fail = ADT_SUCCESS;
450*0Sstevel@tonic-gate 		fail_reason = ADT_SUCCESS;
451*0Sstevel@tonic-gate 	} else {
452*0Sstevel@tonic-gate 		pass_fail = ADT_FAILURE;
453*0Sstevel@tonic-gate 		fail_reason = ADT_FAIL_VALUE_PROGRAM;
454*0Sstevel@tonic-gate 	}
455*0Sstevel@tonic-gate 
456*0Sstevel@tonic-gate 	if (adt_start_session(&ah, NULL, 0)) {
457*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, gettext("audit failure."));
458*0Sstevel@tonic-gate 		return;
459*0Sstevel@tonic-gate 	}
460*0Sstevel@tonic-gate 	if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
461*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, gettext("audit failure."));
462*0Sstevel@tonic-gate 		(void) adt_end_session(ah);
463*0Sstevel@tonic-gate 		return;
464*0Sstevel@tonic-gate 	}
465*0Sstevel@tonic-gate 
466*0Sstevel@tonic-gate 	event = adt_alloc_event(ah, ADT_zone_state);
467*0Sstevel@tonic-gate 	if (event == NULL) {
468*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, gettext("audit failure."));
469*0Sstevel@tonic-gate 		(void) adt_end_session(ah);
470*0Sstevel@tonic-gate 		return;
471*0Sstevel@tonic-gate 	}
472*0Sstevel@tonic-gate 	event->adt_zone_state.zonename = zone_name;
473*0Sstevel@tonic-gate 	event->adt_zone_state.new_state = new_state;
474*0Sstevel@tonic-gate 
475*0Sstevel@tonic-gate 	if (adt_put_event(event, pass_fail, fail_reason))
476*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, gettext("audit failure."));
477*0Sstevel@tonic-gate 
478*0Sstevel@tonic-gate 	adt_free_event(event);
479*0Sstevel@tonic-gate 
480*0Sstevel@tonic-gate 	(void) adt_end_session(ah);
481*0Sstevel@tonic-gate }
482*0Sstevel@tonic-gate 
483*0Sstevel@tonic-gate /*
484*0Sstevel@tonic-gate  * The main routine for the door server that deals with zone state transitions.
485*0Sstevel@tonic-gate  */
486*0Sstevel@tonic-gate /* ARGSUSED */
487*0Sstevel@tonic-gate static void
488*0Sstevel@tonic-gate server(void *cookie, char *args, size_t alen, door_desc_t *dp,
489*0Sstevel@tonic-gate     uint_t n_desc)
490*0Sstevel@tonic-gate {
491*0Sstevel@tonic-gate 	ucred_t *uc = NULL;
492*0Sstevel@tonic-gate 	const priv_set_t *eset;
493*0Sstevel@tonic-gate 
494*0Sstevel@tonic-gate 	zone_state_t zstate;
495*0Sstevel@tonic-gate 	zone_cmd_t cmd;
496*0Sstevel@tonic-gate 	zone_cmd_arg_t *zargp;
497*0Sstevel@tonic-gate 
498*0Sstevel@tonic-gate 	boolean_t kernelcall;
499*0Sstevel@tonic-gate 
500*0Sstevel@tonic-gate 	int rval = -1;
501*0Sstevel@tonic-gate 	uint64_t uniqid;
502*0Sstevel@tonic-gate 	zoneid_t zoneid = -1;
503*0Sstevel@tonic-gate 	zlog_t zlog;
504*0Sstevel@tonic-gate 	zlog_t *zlogp;
505*0Sstevel@tonic-gate 	zone_cmd_rval_t *rvalp;
506*0Sstevel@tonic-gate 	size_t rlen = getpagesize(); /* conservative */
507*0Sstevel@tonic-gate 	char *cmd_str = NULL;
508*0Sstevel@tonic-gate 
509*0Sstevel@tonic-gate 	/* LINTED E_BAD_PTR_CAST_ALIGN */
510*0Sstevel@tonic-gate 	zargp = (zone_cmd_arg_t *)args;
511*0Sstevel@tonic-gate 
512*0Sstevel@tonic-gate 	/*
513*0Sstevel@tonic-gate 	 * When we get the door unref message, we've fdetach'd the door, and
514*0Sstevel@tonic-gate 	 * it is time for us to shut down zoneadmd.
515*0Sstevel@tonic-gate 	 */
516*0Sstevel@tonic-gate 	if (zargp == DOOR_UNREF_DATA) {
517*0Sstevel@tonic-gate 		/*
518*0Sstevel@tonic-gate 		 * See comment at end of main() for info on the last rites.
519*0Sstevel@tonic-gate 		 */
520*0Sstevel@tonic-gate 		exit(0);
521*0Sstevel@tonic-gate 	}
522*0Sstevel@tonic-gate 
523*0Sstevel@tonic-gate 	if (zargp == NULL) {
524*0Sstevel@tonic-gate 		(void) door_return(NULL, 0, 0, 0);
525*0Sstevel@tonic-gate 	}
526*0Sstevel@tonic-gate 
527*0Sstevel@tonic-gate 	rvalp = alloca(rlen);
528*0Sstevel@tonic-gate 	bzero(rvalp, rlen);
529*0Sstevel@tonic-gate 	zlog.logfile = NULL;
530*0Sstevel@tonic-gate 	zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
531*0Sstevel@tonic-gate 	zlog.buf = rvalp->errbuf;
532*0Sstevel@tonic-gate 	zlog.log = zlog.buf;
533*0Sstevel@tonic-gate 	/* defer initialization of zlog.locale until after credential check */
534*0Sstevel@tonic-gate 	zlogp = &zlog;
535*0Sstevel@tonic-gate 
536*0Sstevel@tonic-gate 	if (alen != sizeof (zone_cmd_arg_t)) {
537*0Sstevel@tonic-gate 		/*
538*0Sstevel@tonic-gate 		 * This really shouldn't be happening.
539*0Sstevel@tonic-gate 		 */
540*0Sstevel@tonic-gate 		zerror(&logsys, B_FALSE, "invalid argument");
541*0Sstevel@tonic-gate 		goto out;
542*0Sstevel@tonic-gate 	}
543*0Sstevel@tonic-gate 	cmd = zargp->cmd;
544*0Sstevel@tonic-gate 
545*0Sstevel@tonic-gate 	if (door_ucred(&uc) != 0) {
546*0Sstevel@tonic-gate 		zerror(&logsys, B_TRUE, "door_ucred");
547*0Sstevel@tonic-gate 		goto out;
548*0Sstevel@tonic-gate 	}
549*0Sstevel@tonic-gate 	eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
550*0Sstevel@tonic-gate 	if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
551*0Sstevel@tonic-gate 	    (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
552*0Sstevel@tonic-gate 	    ucred_geteuid(uc) != 0)) {
553*0Sstevel@tonic-gate 		zerror(&logsys, B_FALSE, "insufficient privileges");
554*0Sstevel@tonic-gate 		goto out;
555*0Sstevel@tonic-gate 	}
556*0Sstevel@tonic-gate 
557*0Sstevel@tonic-gate 	kernelcall = ucred_getpid(uc) == 0;
558*0Sstevel@tonic-gate 
559*0Sstevel@tonic-gate 	/*
560*0Sstevel@tonic-gate 	 * This is safe because we only use a zlog_t throughout the
561*0Sstevel@tonic-gate 	 * duration of a door call; i.e., by the time the pointer
562*0Sstevel@tonic-gate 	 * might become invalid, the door call would be over.
563*0Sstevel@tonic-gate 	 */
564*0Sstevel@tonic-gate 	zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
565*0Sstevel@tonic-gate 
566*0Sstevel@tonic-gate 	(void) mutex_lock(&lock);
567*0Sstevel@tonic-gate 
568*0Sstevel@tonic-gate 	/*
569*0Sstevel@tonic-gate 	 * Once we start to really die off, we don't want more connections.
570*0Sstevel@tonic-gate 	 */
571*0Sstevel@tonic-gate 	if (in_death_throes) {
572*0Sstevel@tonic-gate 		(void) mutex_unlock(&lock);
573*0Sstevel@tonic-gate 		ucred_free(uc);
574*0Sstevel@tonic-gate 		(void) door_return(NULL, 0, 0, 0);
575*0Sstevel@tonic-gate 		thr_exit(NULL);
576*0Sstevel@tonic-gate 	}
577*0Sstevel@tonic-gate 
578*0Sstevel@tonic-gate 	/*
579*0Sstevel@tonic-gate 	 * Check for validity of command.
580*0Sstevel@tonic-gate 	 */
581*0Sstevel@tonic-gate 	if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_REBOOT &&
582*0Sstevel@tonic-gate 	    cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING) {
583*0Sstevel@tonic-gate 		zerror(&logsys, B_FALSE, "invalid command");
584*0Sstevel@tonic-gate 		goto out;
585*0Sstevel@tonic-gate 	}
586*0Sstevel@tonic-gate 
587*0Sstevel@tonic-gate 	if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
588*0Sstevel@tonic-gate 		/*
589*0Sstevel@tonic-gate 		 * Can't happen
590*0Sstevel@tonic-gate 		 */
591*0Sstevel@tonic-gate 		zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
592*0Sstevel@tonic-gate 		    cmd);
593*0Sstevel@tonic-gate 		goto out;
594*0Sstevel@tonic-gate 	}
595*0Sstevel@tonic-gate 	/*
596*0Sstevel@tonic-gate 	 * We ignore the possibility of someone calling zone_create(2)
597*0Sstevel@tonic-gate 	 * explicitly; all requests must come through zoneadmd.
598*0Sstevel@tonic-gate 	 */
599*0Sstevel@tonic-gate 	if (zone_get_state(zone_name, &zstate) != Z_OK) {
600*0Sstevel@tonic-gate 		/*
601*0Sstevel@tonic-gate 		 * Something terribly wrong happened
602*0Sstevel@tonic-gate 		 */
603*0Sstevel@tonic-gate 		zerror(&logsys, B_FALSE, "unable to determine state of zone");
604*0Sstevel@tonic-gate 		goto out;
605*0Sstevel@tonic-gate 	}
606*0Sstevel@tonic-gate 
607*0Sstevel@tonic-gate 	if (kernelcall) {
608*0Sstevel@tonic-gate 		/*
609*0Sstevel@tonic-gate 		 * Kernel-initiated requests may lose their validity if the
610*0Sstevel@tonic-gate 		 * zone_t the kernel was referring to has gone away.
611*0Sstevel@tonic-gate 		 */
612*0Sstevel@tonic-gate 		if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
613*0Sstevel@tonic-gate 		    zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
614*0Sstevel@tonic-gate 		    sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
615*0Sstevel@tonic-gate 			/*
616*0Sstevel@tonic-gate 			 * We're not talking about the same zone. The request
617*0Sstevel@tonic-gate 			 * must have arrived too late.  Return error.
618*0Sstevel@tonic-gate 			 */
619*0Sstevel@tonic-gate 			rval = -1;
620*0Sstevel@tonic-gate 			goto out;
621*0Sstevel@tonic-gate 		}
622*0Sstevel@tonic-gate 		zlogp = &logsys;	/* Log errors to syslog */
623*0Sstevel@tonic-gate 	}
624*0Sstevel@tonic-gate 
625*0Sstevel@tonic-gate 	switch (zstate) {
626*0Sstevel@tonic-gate 	case ZONE_STATE_CONFIGURED:
627*0Sstevel@tonic-gate 	case ZONE_STATE_INCOMPLETE:
628*0Sstevel@tonic-gate 		/*
629*0Sstevel@tonic-gate 		 * Not our area of expertise; we just print a nice message
630*0Sstevel@tonic-gate 		 * and die off.
631*0Sstevel@tonic-gate 		 */
632*0Sstevel@tonic-gate 		switch (cmd) {
633*0Sstevel@tonic-gate 		case Z_READY:
634*0Sstevel@tonic-gate 			cmd_str = "ready";
635*0Sstevel@tonic-gate 			break;
636*0Sstevel@tonic-gate 		case Z_BOOT:
637*0Sstevel@tonic-gate 			cmd_str = "boot";
638*0Sstevel@tonic-gate 			break;
639*0Sstevel@tonic-gate 		case Z_HALT:
640*0Sstevel@tonic-gate 			cmd_str = "halt";
641*0Sstevel@tonic-gate 			break;
642*0Sstevel@tonic-gate 		case Z_REBOOT:
643*0Sstevel@tonic-gate 			cmd_str = "reboot";
644*0Sstevel@tonic-gate 			break;
645*0Sstevel@tonic-gate 		}
646*0Sstevel@tonic-gate 		assert(cmd_str != NULL);
647*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
648*0Sstevel@tonic-gate 		    "%s operation is invalid for zones in state '%s'",
649*0Sstevel@tonic-gate 		    cmd_str, zone_state_str(zstate));
650*0Sstevel@tonic-gate 		break;
651*0Sstevel@tonic-gate 
652*0Sstevel@tonic-gate 	case ZONE_STATE_INSTALLED:
653*0Sstevel@tonic-gate 		switch (cmd) {
654*0Sstevel@tonic-gate 		case Z_READY:
655*0Sstevel@tonic-gate 			rval = zone_ready(zlogp);
656*0Sstevel@tonic-gate 			if (rval == 0)
657*0Sstevel@tonic-gate 				eventstream_write(Z_EVT_ZONE_READIED);
658*0Sstevel@tonic-gate 			break;
659*0Sstevel@tonic-gate 		case Z_BOOT:
660*0Sstevel@tonic-gate 			eventstream_write(Z_EVT_ZONE_BOOTING);
661*0Sstevel@tonic-gate 			if ((rval = zone_ready(zlogp)) == 0)
662*0Sstevel@tonic-gate 				rval = zone_bootup(zlogp, zargp->bootbuf);
663*0Sstevel@tonic-gate 			audit_put_record(zlogp, uc, rval, "boot");
664*0Sstevel@tonic-gate 			if (rval != 0) {
665*0Sstevel@tonic-gate 				bringup_failure_recovery = B_TRUE;
666*0Sstevel@tonic-gate 				(void) zone_halt(zlogp);
667*0Sstevel@tonic-gate 			}
668*0Sstevel@tonic-gate 			break;
669*0Sstevel@tonic-gate 		case Z_HALT:
670*0Sstevel@tonic-gate 			if (kernelcall)	/* Invalid; can't happen */
671*0Sstevel@tonic-gate 				abort();
672*0Sstevel@tonic-gate 			/*
673*0Sstevel@tonic-gate 			 * We could have two clients racing to halt this
674*0Sstevel@tonic-gate 			 * zone; the second client loses, but his request
675*0Sstevel@tonic-gate 			 * doesn't fail, since the zone is now in the desired
676*0Sstevel@tonic-gate 			 * state.
677*0Sstevel@tonic-gate 			 */
678*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "zone is already halted");
679*0Sstevel@tonic-gate 			rval = 0;
680*0Sstevel@tonic-gate 			break;
681*0Sstevel@tonic-gate 		case Z_REBOOT:
682*0Sstevel@tonic-gate 			if (kernelcall)	/* Invalid; can't happen */
683*0Sstevel@tonic-gate 				abort();
684*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s operation is invalid "
685*0Sstevel@tonic-gate 			    "for zones in state '%s'", "reboot",
686*0Sstevel@tonic-gate 			    zone_state_str(zstate));
687*0Sstevel@tonic-gate 			rval = -1;
688*0Sstevel@tonic-gate 			break;
689*0Sstevel@tonic-gate 		case Z_NOTE_UNINSTALLING:
690*0Sstevel@tonic-gate 			if (kernelcall)	/* Invalid; can't happen */
691*0Sstevel@tonic-gate 				abort();
692*0Sstevel@tonic-gate 			/*
693*0Sstevel@tonic-gate 			 * Tell the console to print out a message about this.
694*0Sstevel@tonic-gate 			 * Once it does, we will be in_death_throes.
695*0Sstevel@tonic-gate 			 */
696*0Sstevel@tonic-gate 			eventstream_write(Z_EVT_ZONE_UNINSTALLING);
697*0Sstevel@tonic-gate 			break;
698*0Sstevel@tonic-gate 		}
699*0Sstevel@tonic-gate 		break;
700*0Sstevel@tonic-gate 
701*0Sstevel@tonic-gate 	case ZONE_STATE_READY:
702*0Sstevel@tonic-gate 		switch (cmd) {
703*0Sstevel@tonic-gate 		case Z_READY:
704*0Sstevel@tonic-gate 			/*
705*0Sstevel@tonic-gate 			 * We could have two clients racing to ready this
706*0Sstevel@tonic-gate 			 * zone; the second client loses, but his request
707*0Sstevel@tonic-gate 			 * doesn't fail, since the zone is now in the desired
708*0Sstevel@tonic-gate 			 * state.
709*0Sstevel@tonic-gate 			 */
710*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "zone is already ready");
711*0Sstevel@tonic-gate 			rval = 0;
712*0Sstevel@tonic-gate 			break;
713*0Sstevel@tonic-gate 		case Z_BOOT:
714*0Sstevel@tonic-gate 			eventstream_write(Z_EVT_ZONE_BOOTING);
715*0Sstevel@tonic-gate 			rval = zone_bootup(zlogp, zargp->bootbuf);
716*0Sstevel@tonic-gate 			audit_put_record(zlogp, uc, rval, "boot");
717*0Sstevel@tonic-gate 			if (rval != 0) {
718*0Sstevel@tonic-gate 				bringup_failure_recovery = B_TRUE;
719*0Sstevel@tonic-gate 				(void) zone_halt(zlogp);
720*0Sstevel@tonic-gate 			}
721*0Sstevel@tonic-gate 			break;
722*0Sstevel@tonic-gate 		case Z_HALT:
723*0Sstevel@tonic-gate 			if (kernelcall)	/* Invalid; can't happen */
724*0Sstevel@tonic-gate 				abort();
725*0Sstevel@tonic-gate 			if ((rval = zone_halt(zlogp)) != 0)
726*0Sstevel@tonic-gate 				break;
727*0Sstevel@tonic-gate 			eventstream_write(Z_EVT_ZONE_HALTED);
728*0Sstevel@tonic-gate 			break;
729*0Sstevel@tonic-gate 		case Z_REBOOT:
730*0Sstevel@tonic-gate 			if (kernelcall)	/* Invalid; can't happen */
731*0Sstevel@tonic-gate 				abort();
732*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s operation is invalid "
733*0Sstevel@tonic-gate 			    "for zones in state '%s'", "reboot",
734*0Sstevel@tonic-gate 			    zone_state_str(zstate));
735*0Sstevel@tonic-gate 			rval = -1;
736*0Sstevel@tonic-gate 			break;
737*0Sstevel@tonic-gate 		case Z_NOTE_UNINSTALLING:
738*0Sstevel@tonic-gate 			if (kernelcall)	/* Invalid; can't happen */
739*0Sstevel@tonic-gate 				abort();
740*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s operation is "
741*0Sstevel@tonic-gate 			    "invalid for zones in state '%s'",
742*0Sstevel@tonic-gate 			    "note_uninstall", zone_state_str(zstate));
743*0Sstevel@tonic-gate 			rval = -1;
744*0Sstevel@tonic-gate 			break;
745*0Sstevel@tonic-gate 		}
746*0Sstevel@tonic-gate 		break;
747*0Sstevel@tonic-gate 
748*0Sstevel@tonic-gate 	case ZONE_STATE_RUNNING:
749*0Sstevel@tonic-gate 	case ZONE_STATE_SHUTTING_DOWN:
750*0Sstevel@tonic-gate 	case ZONE_STATE_DOWN:
751*0Sstevel@tonic-gate 		switch (cmd) {
752*0Sstevel@tonic-gate 		case Z_READY:
753*0Sstevel@tonic-gate 			if ((rval = zone_halt(zlogp)) != 0)
754*0Sstevel@tonic-gate 				break;
755*0Sstevel@tonic-gate 			if ((rval = zone_ready(zlogp)) == 0)
756*0Sstevel@tonic-gate 				eventstream_write(Z_EVT_ZONE_READIED);
757*0Sstevel@tonic-gate 			break;
758*0Sstevel@tonic-gate 		case Z_BOOT:
759*0Sstevel@tonic-gate 			/*
760*0Sstevel@tonic-gate 			 * We could have two clients racing to boot this
761*0Sstevel@tonic-gate 			 * zone; the second client loses, but his request
762*0Sstevel@tonic-gate 			 * doesn't fail, since the zone is now in the desired
763*0Sstevel@tonic-gate 			 * state.
764*0Sstevel@tonic-gate 			 */
765*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "zone is already booted");
766*0Sstevel@tonic-gate 			rval = 0;
767*0Sstevel@tonic-gate 			break;
768*0Sstevel@tonic-gate 		case Z_HALT:
769*0Sstevel@tonic-gate 			if ((rval = zone_halt(zlogp)) != 0)
770*0Sstevel@tonic-gate 				break;
771*0Sstevel@tonic-gate 			eventstream_write(Z_EVT_ZONE_HALTED);
772*0Sstevel@tonic-gate 			break;
773*0Sstevel@tonic-gate 		case Z_REBOOT:
774*0Sstevel@tonic-gate 			eventstream_write(Z_EVT_ZONE_REBOOTING);
775*0Sstevel@tonic-gate 			if ((rval = zone_halt(zlogp)) != 0)
776*0Sstevel@tonic-gate 				break;
777*0Sstevel@tonic-gate 			if ((rval = zone_ready(zlogp)) == 0) {
778*0Sstevel@tonic-gate 				rval = zone_bootup(zlogp, "");
779*0Sstevel@tonic-gate 				audit_put_record(zlogp, uc, rval, "reboot");
780*0Sstevel@tonic-gate 				if (rval != 0)
781*0Sstevel@tonic-gate 					(void) zone_halt(zlogp);
782*0Sstevel@tonic-gate 			}
783*0Sstevel@tonic-gate 			break;
784*0Sstevel@tonic-gate 		case Z_NOTE_UNINSTALLING:
785*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s operation is "
786*0Sstevel@tonic-gate 			    "invalid for zones in state '%s'",
787*0Sstevel@tonic-gate 			    "note_uninstall", zone_state_str(zstate));
788*0Sstevel@tonic-gate 			rval = -1;
789*0Sstevel@tonic-gate 			break;
790*0Sstevel@tonic-gate 		}
791*0Sstevel@tonic-gate 		break;
792*0Sstevel@tonic-gate 	default:
793*0Sstevel@tonic-gate 		abort();
794*0Sstevel@tonic-gate 	}
795*0Sstevel@tonic-gate 
796*0Sstevel@tonic-gate 	/*
797*0Sstevel@tonic-gate 	 * Because the state of the zone may have changed, we make sure
798*0Sstevel@tonic-gate 	 * to wake the console poller, which is in charge of initiating
799*0Sstevel@tonic-gate 	 * the shutdown procedure as necessary.
800*0Sstevel@tonic-gate 	 */
801*0Sstevel@tonic-gate 	eventstream_write(Z_EVT_NULL);
802*0Sstevel@tonic-gate 
803*0Sstevel@tonic-gate out:
804*0Sstevel@tonic-gate 	(void) mutex_unlock(&lock);
805*0Sstevel@tonic-gate 	if (kernelcall) {
806*0Sstevel@tonic-gate 		rvalp = NULL;
807*0Sstevel@tonic-gate 		rlen = 0;
808*0Sstevel@tonic-gate 	} else {
809*0Sstevel@tonic-gate 		rvalp->rval = rval;
810*0Sstevel@tonic-gate 	}
811*0Sstevel@tonic-gate 	if (uc != NULL)
812*0Sstevel@tonic-gate 		ucred_free(uc);
813*0Sstevel@tonic-gate 	(void) door_return((char *)rvalp, rlen, NULL, 0);
814*0Sstevel@tonic-gate 	thr_exit(NULL);
815*0Sstevel@tonic-gate }
816*0Sstevel@tonic-gate 
817*0Sstevel@tonic-gate static int
818*0Sstevel@tonic-gate setup_door(zlog_t *zlogp)
819*0Sstevel@tonic-gate {
820*0Sstevel@tonic-gate 	if ((zone_door = door_create(server, NULL,
821*0Sstevel@tonic-gate 	    DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
822*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "door_create");
823*0Sstevel@tonic-gate 		return (-1);
824*0Sstevel@tonic-gate 	}
825*0Sstevel@tonic-gate 	(void) fdetach(zone_door_path);
826*0Sstevel@tonic-gate 
827*0Sstevel@tonic-gate 	if (fattach(zone_door, zone_door_path) != 0) {
828*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
829*0Sstevel@tonic-gate 		(void) door_revoke(zone_door);
830*0Sstevel@tonic-gate 		(void) fdetach(zone_door_path);
831*0Sstevel@tonic-gate 		zone_door = -1;
832*0Sstevel@tonic-gate 		return (-1);
833*0Sstevel@tonic-gate 	}
834*0Sstevel@tonic-gate 	return (0);
835*0Sstevel@tonic-gate }
836*0Sstevel@tonic-gate 
837*0Sstevel@tonic-gate /*
838*0Sstevel@tonic-gate  * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
839*0Sstevel@tonic-gate  * is where zoneadmd itself will check to see that another instance of
840*0Sstevel@tonic-gate  * zoneadmd isn't already controlling this zone.
841*0Sstevel@tonic-gate  *
842*0Sstevel@tonic-gate  * The idea here is that we want to open the path to which we will
843*0Sstevel@tonic-gate  * attach our door, lock it, and then make sure that no-one has beat us
844*0Sstevel@tonic-gate  * to fattach(3c)ing onto it.
845*0Sstevel@tonic-gate  *
846*0Sstevel@tonic-gate  * fattach(3c) is really a mount, so there are actually two possible
847*0Sstevel@tonic-gate  * vnodes we could be dealing with.  Our strategy is as follows:
848*0Sstevel@tonic-gate  *
849*0Sstevel@tonic-gate  * - If the file we opened is a regular file (common case):
850*0Sstevel@tonic-gate  * 	There is no fattach(3c)ed door, so we have a chance of becoming
851*0Sstevel@tonic-gate  * 	the managing zoneadmd. We attempt to lock the file: if it is
852*0Sstevel@tonic-gate  * 	already locked, that means someone else raced us here, so we
853*0Sstevel@tonic-gate  * 	lose and give up.  zoneadm(1m) will try to contact the zoneadmd
854*0Sstevel@tonic-gate  * 	that beat us to it.
855*0Sstevel@tonic-gate  *
856*0Sstevel@tonic-gate  * - If the file we opened is a namefs file:
857*0Sstevel@tonic-gate  * 	This means there is already an established door fattach(3c)'ed
858*0Sstevel@tonic-gate  * 	to the rendezvous path.  We've lost the race, so we give up.
859*0Sstevel@tonic-gate  * 	Note that in this case we also try to grab the file lock, and
860*0Sstevel@tonic-gate  * 	will succeed in acquiring it since the vnode locked by the
861*0Sstevel@tonic-gate  * 	"winning" zoneadmd was a regular one, and the one we locked was
862*0Sstevel@tonic-gate  * 	the fattach(3c)'ed door node.  At any rate, no harm is done, and
863*0Sstevel@tonic-gate  * 	we just return to zoneadm(1m) which knows to retry.
864*0Sstevel@tonic-gate  */
865*0Sstevel@tonic-gate static int
866*0Sstevel@tonic-gate make_daemon_exclusive(zlog_t *zlogp)
867*0Sstevel@tonic-gate {
868*0Sstevel@tonic-gate 	int doorfd = -1;
869*0Sstevel@tonic-gate 	int err, ret = -1;
870*0Sstevel@tonic-gate 	struct stat st;
871*0Sstevel@tonic-gate 	struct flock flock;
872*0Sstevel@tonic-gate 	zone_state_t zstate;
873*0Sstevel@tonic-gate 
874*0Sstevel@tonic-gate top:
875*0Sstevel@tonic-gate 	if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
876*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "failed to get zone state: %s\n",
877*0Sstevel@tonic-gate 		    zonecfg_strerror(err));
878*0Sstevel@tonic-gate 		goto out;
879*0Sstevel@tonic-gate 	}
880*0Sstevel@tonic-gate 	if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
881*0Sstevel@tonic-gate 	    S_IREAD|S_IWRITE)) < 0) {
882*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
883*0Sstevel@tonic-gate 		goto out;
884*0Sstevel@tonic-gate 	}
885*0Sstevel@tonic-gate 	if (fstat(doorfd, &st) < 0) {
886*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
887*0Sstevel@tonic-gate 		goto out;
888*0Sstevel@tonic-gate 	}
889*0Sstevel@tonic-gate 	/*
890*0Sstevel@tonic-gate 	 * Lock the file to synchronize with other zoneadmd
891*0Sstevel@tonic-gate 	 */
892*0Sstevel@tonic-gate 	flock.l_type = F_WRLCK;
893*0Sstevel@tonic-gate 	flock.l_whence = SEEK_SET;
894*0Sstevel@tonic-gate 	flock.l_start = (off_t)0;
895*0Sstevel@tonic-gate 	flock.l_len = (off_t)0;
896*0Sstevel@tonic-gate 	if (fcntl(doorfd, F_SETLK, &flock) < 0) {
897*0Sstevel@tonic-gate 		/*
898*0Sstevel@tonic-gate 		 * Someone else raced us here and grabbed the lock file
899*0Sstevel@tonic-gate 		 * first.  A warning here is inappropriate since nothing
900*0Sstevel@tonic-gate 		 * went wrong.
901*0Sstevel@tonic-gate 		 */
902*0Sstevel@tonic-gate 		goto out;
903*0Sstevel@tonic-gate 	}
904*0Sstevel@tonic-gate 
905*0Sstevel@tonic-gate 	if (strcmp(st.st_fstype, "namefs") == 0) {
906*0Sstevel@tonic-gate 		struct door_info info;
907*0Sstevel@tonic-gate 
908*0Sstevel@tonic-gate 		/*
909*0Sstevel@tonic-gate 		 * There is already something fattach()'ed to this file.
910*0Sstevel@tonic-gate 		 * Lets see what the door is up to.
911*0Sstevel@tonic-gate 		 */
912*0Sstevel@tonic-gate 		if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
913*0Sstevel@tonic-gate 			/*
914*0Sstevel@tonic-gate 			 * Another zoneadmd process seems to be in
915*0Sstevel@tonic-gate 			 * control of the situation and we don't need to
916*0Sstevel@tonic-gate 			 * be here.  A warning here is inappropriate
917*0Sstevel@tonic-gate 			 * since nothing went wrong.
918*0Sstevel@tonic-gate 			 *
919*0Sstevel@tonic-gate 			 * If the door has been revoked, the zoneadmd
920*0Sstevel@tonic-gate 			 * process currently managing the zone is going
921*0Sstevel@tonic-gate 			 * away.  We'll return control to zoneadm(1m)
922*0Sstevel@tonic-gate 			 * which will try again (by which time zoneadmd
923*0Sstevel@tonic-gate 			 * will hopefully have exited).
924*0Sstevel@tonic-gate 			 */
925*0Sstevel@tonic-gate 			goto out;
926*0Sstevel@tonic-gate 		}
927*0Sstevel@tonic-gate 
928*0Sstevel@tonic-gate 		/*
929*0Sstevel@tonic-gate 		 * If we got this far, there's a fattach(3c)'ed door
930*0Sstevel@tonic-gate 		 * that belongs to a process that has exited, which can
931*0Sstevel@tonic-gate 		 * happen if the previous zoneadmd died unexpectedly.
932*0Sstevel@tonic-gate 		 *
933*0Sstevel@tonic-gate 		 * Let user know that something is amiss, but that we can
934*0Sstevel@tonic-gate 		 * recover; if the zone is in the installed state, then don't
935*0Sstevel@tonic-gate 		 * message, since having a running zoneadmd isn't really
936*0Sstevel@tonic-gate 		 * expected/needed.  We want to keep occurences of this message
937*0Sstevel@tonic-gate 		 * limited to times when zoneadmd is picking back up from a
938*0Sstevel@tonic-gate 		 * zoneadmd that died while the zone was in some non-trivial
939*0Sstevel@tonic-gate 		 * state.
940*0Sstevel@tonic-gate 		 */
941*0Sstevel@tonic-gate 		if (zstate > ZONE_STATE_INSTALLED) {
942*0Sstevel@tonic-gate 			zerror(zlogp, B_FALSE,
943*0Sstevel@tonic-gate 			    "zone '%s': WARNING: zone is in state '%s', but "
944*0Sstevel@tonic-gate 			    "zoneadmd does not appear to be available; "
945*0Sstevel@tonic-gate 			    "restarted zoneadmd to recover.",
946*0Sstevel@tonic-gate 			    zone_name, zone_state_str(zstate));
947*0Sstevel@tonic-gate 		}
948*0Sstevel@tonic-gate 
949*0Sstevel@tonic-gate 		(void) fdetach(zone_door_path);
950*0Sstevel@tonic-gate 		(void) close(doorfd);
951*0Sstevel@tonic-gate 		goto top;
952*0Sstevel@tonic-gate 	}
953*0Sstevel@tonic-gate 	ret = 0;
954*0Sstevel@tonic-gate out:
955*0Sstevel@tonic-gate 	(void) close(doorfd);
956*0Sstevel@tonic-gate 	return (ret);
957*0Sstevel@tonic-gate }
958*0Sstevel@tonic-gate 
959*0Sstevel@tonic-gate int
960*0Sstevel@tonic-gate main(int argc, char *argv[])
961*0Sstevel@tonic-gate {
962*0Sstevel@tonic-gate 	int opt;
963*0Sstevel@tonic-gate 	zoneid_t zid;
964*0Sstevel@tonic-gate 	priv_set_t *privset;
965*0Sstevel@tonic-gate 	zone_state_t zstate;
966*0Sstevel@tonic-gate 	char parents_locale[MAXPATHLEN];
967*0Sstevel@tonic-gate 	int err;
968*0Sstevel@tonic-gate 
969*0Sstevel@tonic-gate 	pid_t pid;
970*0Sstevel@tonic-gate 	sigset_t blockset;
971*0Sstevel@tonic-gate 	sigset_t block_cld;
972*0Sstevel@tonic-gate 
973*0Sstevel@tonic-gate 	struct {
974*0Sstevel@tonic-gate 		sema_t sem;
975*0Sstevel@tonic-gate 		int status;
976*0Sstevel@tonic-gate 		zlog_t log;
977*0Sstevel@tonic-gate 	} *shstate;
978*0Sstevel@tonic-gate 	size_t shstatelen = getpagesize();
979*0Sstevel@tonic-gate 
980*0Sstevel@tonic-gate 	zlog_t errlog;
981*0Sstevel@tonic-gate 	zlog_t *zlogp;
982*0Sstevel@tonic-gate 
983*0Sstevel@tonic-gate 	progname = get_execbasename(argv[0]);
984*0Sstevel@tonic-gate 
985*0Sstevel@tonic-gate 	/*
986*0Sstevel@tonic-gate 	 * Make sure stderr is unbuffered
987*0Sstevel@tonic-gate 	 */
988*0Sstevel@tonic-gate 	(void) setbuffer(stderr, NULL, 0);
989*0Sstevel@tonic-gate 
990*0Sstevel@tonic-gate 	/*
991*0Sstevel@tonic-gate 	 * Get out of the way of mounted filesystems, since we will daemonize
992*0Sstevel@tonic-gate 	 * soon.
993*0Sstevel@tonic-gate 	 */
994*0Sstevel@tonic-gate 	(void) chdir("/");
995*0Sstevel@tonic-gate 
996*0Sstevel@tonic-gate 	/*
997*0Sstevel@tonic-gate 	 * Use the default system umask per PSARC 1998/110 rather than
998*0Sstevel@tonic-gate 	 * anything that may have been set by the caller.
999*0Sstevel@tonic-gate 	 */
1000*0Sstevel@tonic-gate 	(void) umask(CMASK);
1001*0Sstevel@tonic-gate 
1002*0Sstevel@tonic-gate 	/*
1003*0Sstevel@tonic-gate 	 * Initially we want to use our parent's locale.
1004*0Sstevel@tonic-gate 	 */
1005*0Sstevel@tonic-gate 	(void) setlocale(LC_ALL, "");
1006*0Sstevel@tonic-gate 	(void) textdomain(TEXT_DOMAIN);
1007*0Sstevel@tonic-gate 	(void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
1008*0Sstevel@tonic-gate 	    sizeof (parents_locale));
1009*0Sstevel@tonic-gate 
1010*0Sstevel@tonic-gate 	/*
1011*0Sstevel@tonic-gate 	 * This zlog_t is used for writing to stderr
1012*0Sstevel@tonic-gate 	 */
1013*0Sstevel@tonic-gate 	errlog.logfile = stderr;
1014*0Sstevel@tonic-gate 	errlog.buflen = errlog.loglen = 0;
1015*0Sstevel@tonic-gate 	errlog.buf = errlog.log = NULL;
1016*0Sstevel@tonic-gate 	errlog.locale = parents_locale;
1017*0Sstevel@tonic-gate 
1018*0Sstevel@tonic-gate 	/*
1019*0Sstevel@tonic-gate 	 * We start off writing to stderr until we're ready to daemonize.
1020*0Sstevel@tonic-gate 	 */
1021*0Sstevel@tonic-gate 	zlogp = &errlog;
1022*0Sstevel@tonic-gate 
1023*0Sstevel@tonic-gate 	/*
1024*0Sstevel@tonic-gate 	 * Process options.
1025*0Sstevel@tonic-gate 	 */
1026*0Sstevel@tonic-gate 	while ((opt = getopt(argc, argv, "z:")) != EOF) {
1027*0Sstevel@tonic-gate 		switch (opt) {
1028*0Sstevel@tonic-gate 		case 'z':
1029*0Sstevel@tonic-gate 			zone_name = optarg;
1030*0Sstevel@tonic-gate 			break;
1031*0Sstevel@tonic-gate 		default:
1032*0Sstevel@tonic-gate 			usage();
1033*0Sstevel@tonic-gate 		}
1034*0Sstevel@tonic-gate 	}
1035*0Sstevel@tonic-gate 
1036*0Sstevel@tonic-gate 	if (zone_name == NULL)
1037*0Sstevel@tonic-gate 		usage();
1038*0Sstevel@tonic-gate 
1039*0Sstevel@tonic-gate 	/*
1040*0Sstevel@tonic-gate 	 * Because usage() prints directly to stderr, it has gettext()
1041*0Sstevel@tonic-gate 	 * wrapping, which depends on the locale.  But since zerror() calls
1042*0Sstevel@tonic-gate 	 * localize() which tweaks the locale, it is not safe to call zerror()
1043*0Sstevel@tonic-gate 	 * until after the last call to usage().  Fortunately, the last call
1044*0Sstevel@tonic-gate 	 * to usage() is just above and the first call to zerror() is just
1045*0Sstevel@tonic-gate 	 * below.  Don't mess this up.
1046*0Sstevel@tonic-gate 	 */
1047*0Sstevel@tonic-gate 	if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
1048*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "cannot manage the %s zone",
1049*0Sstevel@tonic-gate 		    GLOBAL_ZONENAME);
1050*0Sstevel@tonic-gate 		return (1);
1051*0Sstevel@tonic-gate 	}
1052*0Sstevel@tonic-gate 
1053*0Sstevel@tonic-gate 	if (zone_get_id(zone_name, &zid) != 0) {
1054*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "could not manage %s: %s\n", zone_name,
1055*0Sstevel@tonic-gate 		    zonecfg_strerror(Z_NO_ZONE));
1056*0Sstevel@tonic-gate 		return (1);
1057*0Sstevel@tonic-gate 	}
1058*0Sstevel@tonic-gate 
1059*0Sstevel@tonic-gate 	if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1060*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "failed to get zone state: %s\n",
1061*0Sstevel@tonic-gate 		    zonecfg_strerror(err));
1062*0Sstevel@tonic-gate 		return (1);
1063*0Sstevel@tonic-gate 	}
1064*0Sstevel@tonic-gate 	if (zstate < ZONE_STATE_INSTALLED) {
1065*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
1066*0Sstevel@tonic-gate 		    "cannot manage a zone which is in state '%s'",
1067*0Sstevel@tonic-gate 		    zone_state_str(zstate));
1068*0Sstevel@tonic-gate 		return (1);
1069*0Sstevel@tonic-gate 	}
1070*0Sstevel@tonic-gate 
1071*0Sstevel@tonic-gate 	/*
1072*0Sstevel@tonic-gate 	 * Check that we have all privileges.  It would be nice to pare
1073*0Sstevel@tonic-gate 	 * this down, but this is at least a first cut.
1074*0Sstevel@tonic-gate 	 */
1075*0Sstevel@tonic-gate 	if ((privset = priv_allocset()) == NULL) {
1076*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
1077*0Sstevel@tonic-gate 		return (1);
1078*0Sstevel@tonic-gate 	}
1079*0Sstevel@tonic-gate 
1080*0Sstevel@tonic-gate 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1081*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "getppriv");
1082*0Sstevel@tonic-gate 		priv_freeset(privset);
1083*0Sstevel@tonic-gate 		return (1);
1084*0Sstevel@tonic-gate 	}
1085*0Sstevel@tonic-gate 
1086*0Sstevel@tonic-gate 	if (priv_isfullset(privset) == B_FALSE) {
1087*0Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
1088*0Sstevel@tonic-gate 		    "run this command (all privs required)\n");
1089*0Sstevel@tonic-gate 		priv_freeset(privset);
1090*0Sstevel@tonic-gate 		return (1);
1091*0Sstevel@tonic-gate 	}
1092*0Sstevel@tonic-gate 	priv_freeset(privset);
1093*0Sstevel@tonic-gate 
1094*0Sstevel@tonic-gate 	if (mkzonedir(zlogp) != 0)
1095*0Sstevel@tonic-gate 		return (1);
1096*0Sstevel@tonic-gate 
1097*0Sstevel@tonic-gate 	/*
1098*0Sstevel@tonic-gate 	 * Pre-fork: setup shared state
1099*0Sstevel@tonic-gate 	 */
1100*0Sstevel@tonic-gate 	if ((shstate = (void *)mmap(NULL, shstatelen,
1101*0Sstevel@tonic-gate 	    PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
1102*0Sstevel@tonic-gate 	    MAP_FAILED) {
1103*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "mmap");
1104*0Sstevel@tonic-gate 		return (1);
1105*0Sstevel@tonic-gate 	}
1106*0Sstevel@tonic-gate 	if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
1107*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
1108*0Sstevel@tonic-gate 		(void) munmap((char *)shstate, shstatelen);
1109*0Sstevel@tonic-gate 		return (1);
1110*0Sstevel@tonic-gate 	}
1111*0Sstevel@tonic-gate 	shstate->log.logfile = NULL;
1112*0Sstevel@tonic-gate 	shstate->log.buflen = shstatelen - sizeof (*shstate);
1113*0Sstevel@tonic-gate 	shstate->log.loglen = shstate->log.buflen;
1114*0Sstevel@tonic-gate 	shstate->log.buf = (char *)shstate + sizeof (*shstate);
1115*0Sstevel@tonic-gate 	shstate->log.log = shstate->log.buf;
1116*0Sstevel@tonic-gate 	shstate->log.locale = parents_locale;
1117*0Sstevel@tonic-gate 	shstate->status = -1;
1118*0Sstevel@tonic-gate 
1119*0Sstevel@tonic-gate 	/*
1120*0Sstevel@tonic-gate 	 * We need a SIGCHLD handler so the sema_wait() below will wake
1121*0Sstevel@tonic-gate 	 * up if the child dies without doing a sema_post().
1122*0Sstevel@tonic-gate 	 */
1123*0Sstevel@tonic-gate 	(void) sigset(SIGCHLD, sigchld);
1124*0Sstevel@tonic-gate 	/*
1125*0Sstevel@tonic-gate 	 * We must mask SIGCHLD until after we've coped with the fork
1126*0Sstevel@tonic-gate 	 * sufficiently to deal with it; otherwise we can race and
1127*0Sstevel@tonic-gate 	 * receive the signal before pid has been initialized
1128*0Sstevel@tonic-gate 	 * (yes, this really happens).
1129*0Sstevel@tonic-gate 	 */
1130*0Sstevel@tonic-gate 	(void) sigemptyset(&block_cld);
1131*0Sstevel@tonic-gate 	(void) sigaddset(&block_cld, SIGCHLD);
1132*0Sstevel@tonic-gate 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1133*0Sstevel@tonic-gate 
1134*0Sstevel@tonic-gate 	/*
1135*0Sstevel@tonic-gate 	 * Do not let another thread localize a message while we are forking.
1136*0Sstevel@tonic-gate 	 */
1137*0Sstevel@tonic-gate 	(void) mutex_lock(&msglock);
1138*0Sstevel@tonic-gate 	pid = fork();
1139*0Sstevel@tonic-gate 	(void) mutex_unlock(&msglock);
1140*0Sstevel@tonic-gate 	if (pid == -1) {
1141*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not fork");
1142*0Sstevel@tonic-gate 		return (1);
1143*0Sstevel@tonic-gate 
1144*0Sstevel@tonic-gate 	} else if (pid > 0) { /* parent */
1145*0Sstevel@tonic-gate 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1146*0Sstevel@tonic-gate 		/*
1147*0Sstevel@tonic-gate 		 * This marks a window of vulnerability in which we receive
1148*0Sstevel@tonic-gate 		 * the SIGCLD before falling into sema_wait (normally we would
1149*0Sstevel@tonic-gate 		 * get woken up from sema_wait with EINTR upon receipt of
1150*0Sstevel@tonic-gate 		 * SIGCLD).  So we may need to use some other scheme like
1151*0Sstevel@tonic-gate 		 * sema_posting in the sigcld handler.
1152*0Sstevel@tonic-gate 		 * blech
1153*0Sstevel@tonic-gate 		 */
1154*0Sstevel@tonic-gate 		(void) sema_wait(&shstate->sem);
1155*0Sstevel@tonic-gate 		(void) sema_destroy(&shstate->sem);
1156*0Sstevel@tonic-gate 		if (shstate->status != 0)
1157*0Sstevel@tonic-gate 			(void) waitpid(pid, NULL, WNOHANG);
1158*0Sstevel@tonic-gate 		/*
1159*0Sstevel@tonic-gate 		 * It's ok if we die with SIGPIPE.  It's not like we could have
1160*0Sstevel@tonic-gate 		 * done anything about it.
1161*0Sstevel@tonic-gate 		 */
1162*0Sstevel@tonic-gate 		(void) fprintf(stderr, "%s", shstate->log.buf);
1163*0Sstevel@tonic-gate 		_exit(shstate->status == 0 ? 0 : 1);
1164*0Sstevel@tonic-gate 	}
1165*0Sstevel@tonic-gate 
1166*0Sstevel@tonic-gate 	/*
1167*0Sstevel@tonic-gate 	 * The child charges on.
1168*0Sstevel@tonic-gate 	 */
1169*0Sstevel@tonic-gate 	(void) sigset(SIGCHLD, SIG_DFL);
1170*0Sstevel@tonic-gate 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1171*0Sstevel@tonic-gate 
1172*0Sstevel@tonic-gate 	/*
1173*0Sstevel@tonic-gate 	 * SIGPIPE can be delivered if we write to a socket for which the
1174*0Sstevel@tonic-gate 	 * peer endpoint is gone.  That can lead to too-early termination
1175*0Sstevel@tonic-gate 	 * of zoneadmd, and that's not good eats.
1176*0Sstevel@tonic-gate 	 */
1177*0Sstevel@tonic-gate 	(void) sigset(SIGPIPE, SIG_IGN);
1178*0Sstevel@tonic-gate 	/*
1179*0Sstevel@tonic-gate 	 * Stop using stderr
1180*0Sstevel@tonic-gate 	 */
1181*0Sstevel@tonic-gate 	zlogp = &shstate->log;
1182*0Sstevel@tonic-gate 
1183*0Sstevel@tonic-gate 	/*
1184*0Sstevel@tonic-gate 	 * We don't need stdout/stderr from now on.
1185*0Sstevel@tonic-gate 	 */
1186*0Sstevel@tonic-gate 	closefrom(0);
1187*0Sstevel@tonic-gate 
1188*0Sstevel@tonic-gate 	/*
1189*0Sstevel@tonic-gate 	 * Initialize the syslog zlog_t.  This needs to be done after
1190*0Sstevel@tonic-gate 	 * the call to closefrom().
1191*0Sstevel@tonic-gate 	 */
1192*0Sstevel@tonic-gate 	logsys.buf = logsys.log = NULL;
1193*0Sstevel@tonic-gate 	logsys.buflen = logsys.loglen = 0;
1194*0Sstevel@tonic-gate 	logsys.logfile = NULL;
1195*0Sstevel@tonic-gate 	logsys.locale = DEFAULT_LOCALE;
1196*0Sstevel@tonic-gate 
1197*0Sstevel@tonic-gate 	openlog("zoneadmd", LOG_PID, LOG_DAEMON);
1198*0Sstevel@tonic-gate 
1199*0Sstevel@tonic-gate 	/*
1200*0Sstevel@tonic-gate 	 * The eventstream is used to publish state changes in the zone
1201*0Sstevel@tonic-gate 	 * from the door threads to the console I/O poller.
1202*0Sstevel@tonic-gate 	 */
1203*0Sstevel@tonic-gate 	if (eventstream_init() == -1) {
1204*0Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to create eventstream");
1205*0Sstevel@tonic-gate 		goto child_out;
1206*0Sstevel@tonic-gate 	}
1207*0Sstevel@tonic-gate 
1208*0Sstevel@tonic-gate 	(void) snprintf(zone_door_path, sizeof (zone_door_path),
1209*0Sstevel@tonic-gate 	    ZONE_DOOR_PATH, zone_name);
1210*0Sstevel@tonic-gate 
1211*0Sstevel@tonic-gate 	/*
1212*0Sstevel@tonic-gate 	 * See if another zoneadmd is running for this zone.  If not, then we
1213*0Sstevel@tonic-gate 	 * can now modify system state.
1214*0Sstevel@tonic-gate 	 */
1215*0Sstevel@tonic-gate 	if (make_daemon_exclusive(zlogp) == -1)
1216*0Sstevel@tonic-gate 		goto child_out;
1217*0Sstevel@tonic-gate 
1218*0Sstevel@tonic-gate 
1219*0Sstevel@tonic-gate 	/*
1220*0Sstevel@tonic-gate 	 * Create/join a new session; we need to be careful of what we do with
1221*0Sstevel@tonic-gate 	 * the console from now on so we don't end up being the session leader
1222*0Sstevel@tonic-gate 	 * for the terminal we're going to be handing out.
1223*0Sstevel@tonic-gate 	 */
1224*0Sstevel@tonic-gate 	(void) setsid();
1225*0Sstevel@tonic-gate 
1226*0Sstevel@tonic-gate 	/*
1227*0Sstevel@tonic-gate 	 * This thread shouldn't be receiving any signals; in particular,
1228*0Sstevel@tonic-gate 	 * SIGCHLD should be received by the thread doing the fork().
1229*0Sstevel@tonic-gate 	 */
1230*0Sstevel@tonic-gate 	(void) sigfillset(&blockset);
1231*0Sstevel@tonic-gate 	(void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
1232*0Sstevel@tonic-gate 
1233*0Sstevel@tonic-gate 	/*
1234*0Sstevel@tonic-gate 	 * Setup the console device and get ready to serve the console;
1235*0Sstevel@tonic-gate 	 * once this has completed, we're ready to let console clients
1236*0Sstevel@tonic-gate 	 * make an attempt to connect (they will block until
1237*0Sstevel@tonic-gate 	 * serve_console_sock() below gets called, and any pending
1238*0Sstevel@tonic-gate 	 * connection is accept()ed).
1239*0Sstevel@tonic-gate 	 */
1240*0Sstevel@tonic-gate 	if (init_console(zlogp) == -1)
1241*0Sstevel@tonic-gate 		goto child_out;
1242*0Sstevel@tonic-gate 
1243*0Sstevel@tonic-gate 	/*
1244*0Sstevel@tonic-gate 	 * Take the lock now, so that when the door server gets going, we
1245*0Sstevel@tonic-gate 	 * are guaranteed that it won't take a request until we are sure
1246*0Sstevel@tonic-gate 	 * that everything is completely set up.  See the child_out: label
1247*0Sstevel@tonic-gate 	 * below to see why this matters.
1248*0Sstevel@tonic-gate 	 */
1249*0Sstevel@tonic-gate 	(void) mutex_lock(&lock);
1250*0Sstevel@tonic-gate 
1251*0Sstevel@tonic-gate 	/*
1252*0Sstevel@tonic-gate 	 * Note: door setup must occur *after* the console is setup.
1253*0Sstevel@tonic-gate 	 * This is so that as zlogin tests the door to see if zoneadmd
1254*0Sstevel@tonic-gate 	 * is ready yet, we know that the console will get serviced
1255*0Sstevel@tonic-gate 	 * once door_info() indicates that the door is "up".
1256*0Sstevel@tonic-gate 	 */
1257*0Sstevel@tonic-gate 	if (setup_door(zlogp) == -1)
1258*0Sstevel@tonic-gate 		goto child_out;
1259*0Sstevel@tonic-gate 
1260*0Sstevel@tonic-gate 	/*
1261*0Sstevel@tonic-gate 	 * Things seem OK so far; tell the parent process that we're done
1262*0Sstevel@tonic-gate 	 * with setup tasks.  This will cause the parent to exit, signalling
1263*0Sstevel@tonic-gate 	 * to zoneadm, zlogin, or whatever forked it that we are ready to
1264*0Sstevel@tonic-gate 	 * service requests.
1265*0Sstevel@tonic-gate 	 */
1266*0Sstevel@tonic-gate 	shstate->status = 0;
1267*0Sstevel@tonic-gate 	(void) sema_post(&shstate->sem);
1268*0Sstevel@tonic-gate 	(void) munmap((char *)shstate, shstatelen);
1269*0Sstevel@tonic-gate 	shstate = NULL;
1270*0Sstevel@tonic-gate 
1271*0Sstevel@tonic-gate 	(void) mutex_unlock(&lock);
1272*0Sstevel@tonic-gate 
1273*0Sstevel@tonic-gate 	/*
1274*0Sstevel@tonic-gate 	 * zlogp is now invalid, so reset it to the syslog logger.
1275*0Sstevel@tonic-gate 	 */
1276*0Sstevel@tonic-gate 	zlogp = &logsys;
1277*0Sstevel@tonic-gate 
1278*0Sstevel@tonic-gate 	/*
1279*0Sstevel@tonic-gate 	 * Now that we are free of any parents, switch to the default locale.
1280*0Sstevel@tonic-gate 	 */
1281*0Sstevel@tonic-gate 	(void) setlocale(LC_ALL, DEFAULT_LOCALE);
1282*0Sstevel@tonic-gate 
1283*0Sstevel@tonic-gate 	/*
1284*0Sstevel@tonic-gate 	 * At this point the setup portion of main() is basically done, so
1285*0Sstevel@tonic-gate 	 * we reuse this thread to manage the zone console.  When
1286*0Sstevel@tonic-gate 	 * serve_console() has returned, we are past the point of no return
1287*0Sstevel@tonic-gate 	 * in the life of this zoneadmd.
1288*0Sstevel@tonic-gate 	 */
1289*0Sstevel@tonic-gate 	serve_console(zlogp);
1290*0Sstevel@tonic-gate 	assert(in_death_throes);
1291*0Sstevel@tonic-gate 
1292*0Sstevel@tonic-gate 	/*
1293*0Sstevel@tonic-gate 	 * This is the next-to-last part of the exit interlock.  Upon calling
1294*0Sstevel@tonic-gate 	 * fdetach(), the door will go unreferenced; once any
1295*0Sstevel@tonic-gate 	 * outstanding requests (like the door thread doing Z_HALT) are
1296*0Sstevel@tonic-gate 	 * done, the door will get an UNREF notification; when it handles
1297*0Sstevel@tonic-gate 	 * the UNREF, the door server will cause the exit.
1298*0Sstevel@tonic-gate 	 */
1299*0Sstevel@tonic-gate 	assert(!MUTEX_HELD(&lock));
1300*0Sstevel@tonic-gate 	(void) fdetach(zone_door_path);
1301*0Sstevel@tonic-gate 	for (;;)
1302*0Sstevel@tonic-gate 		(void) pause();
1303*0Sstevel@tonic-gate 
1304*0Sstevel@tonic-gate child_out:
1305*0Sstevel@tonic-gate 	assert(pid == 0);
1306*0Sstevel@tonic-gate 	if (shstate != NULL) {
1307*0Sstevel@tonic-gate 		shstate->status = -1;
1308*0Sstevel@tonic-gate 		(void) sema_post(&shstate->sem);
1309*0Sstevel@tonic-gate 		(void) munmap((char *)shstate, shstatelen);
1310*0Sstevel@tonic-gate 	}
1311*0Sstevel@tonic-gate 
1312*0Sstevel@tonic-gate 	/*
1313*0Sstevel@tonic-gate 	 * This might trigger an unref notification, but if so,
1314*0Sstevel@tonic-gate 	 * we are still holding the lock, so our call to exit will
1315*0Sstevel@tonic-gate 	 * ultimately win the race and will publish the right exit
1316*0Sstevel@tonic-gate 	 * code.
1317*0Sstevel@tonic-gate 	 */
1318*0Sstevel@tonic-gate 	if (zone_door != -1) {
1319*0Sstevel@tonic-gate 		assert(MUTEX_HELD(&lock));
1320*0Sstevel@tonic-gate 		(void) door_revoke(zone_door);
1321*0Sstevel@tonic-gate 		(void) fdetach(zone_door_path);
1322*0Sstevel@tonic-gate 	}
1323*0Sstevel@tonic-gate 	return (1); /* return from main() forcibly exits an MT process */
1324*0Sstevel@tonic-gate }
1325