xref: /onnv-gate/usr/src/cmd/zoneadmd/vplat.c (revision 766:c521de78a32f)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
230Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * This module contains functions used to bring up and tear down the
310Sstevel@tonic-gate  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
320Sstevel@tonic-gate  * interfaces, [un]configuring devices, establishing resource controls,
330Sstevel@tonic-gate  * and creating/destroying the zone in the kernel.  These actions, on
340Sstevel@tonic-gate  * the way up, ready the zone; on the way down, they halt the zone.
350Sstevel@tonic-gate  * See the much longer block comment at the beginning of zoneadmd.c
360Sstevel@tonic-gate  * for a bigger picture of how the whole program functions.
37*766Scarlsonj  *
38*766Scarlsonj  * This module also has primary responsibility for the layout of "scratch
39*766Scarlsonj  * zones."  These are mounted, but inactive, zones that are used during
40*766Scarlsonj  * operating system upgrade and potentially other administrative action.  The
41*766Scarlsonj  * scratch zone environment is similar to the miniroot environment.  The zone's
42*766Scarlsonj  * actual root is mounted read-write on /a, and the standard paths (/usr,
43*766Scarlsonj  * /sbin, /lib) all lead to read-only copies of the running system's binaries.
44*766Scarlsonj  * This allows the administrative tools to manipulate the zone using "-R /a"
45*766Scarlsonj  * without relying on any binaries in the zone itself.
46*766Scarlsonj  *
47*766Scarlsonj  * If the scratch zone is on an alternate root (Live Upgrade [LU] boot
48*766Scarlsonj  * environment), then we must resolve the lofs mounts used there to uncover
49*766Scarlsonj  * writable (unshared) resources.  Shared resources, though, are always
50*766Scarlsonj  * read-only.  In addition, if the "same" zone with a different root path is
51*766Scarlsonj  * currently running, then "/b" inside the zone points to the running zone's
52*766Scarlsonj  * root.  This allows LU to synchronize configuration files during the upgrade
53*766Scarlsonj  * process.
54*766Scarlsonj  *
55*766Scarlsonj  * To construct this environment, this module creates a tmpfs mount on
56*766Scarlsonj  * $ZONEPATH/lu.  Inside this scratch area, the miniroot-like environment as
57*766Scarlsonj  * described above is constructed on the fly.  The zone is then created using
58*766Scarlsonj  * $ZONEPATH/lu as the root.
59*766Scarlsonj  *
60*766Scarlsonj  * Note that scratch zones are inactive.  The zone's bits are not running and
61*766Scarlsonj  * likely cannot be run correctly until upgrade is done.  Init is not running
62*766Scarlsonj  * there, nor is SMF.  Because of this, the "mounted" state of a scratch zone
63*766Scarlsonj  * is not a part of the usual halt/ready/boot state machine.
640Sstevel@tonic-gate  */
650Sstevel@tonic-gate 
660Sstevel@tonic-gate #include <sys/param.h>
670Sstevel@tonic-gate #include <sys/mount.h>
680Sstevel@tonic-gate #include <sys/mntent.h>
690Sstevel@tonic-gate #include <sys/socket.h>
700Sstevel@tonic-gate #include <sys/utsname.h>
710Sstevel@tonic-gate #include <sys/types.h>
720Sstevel@tonic-gate #include <sys/stat.h>
730Sstevel@tonic-gate #include <sys/sockio.h>
740Sstevel@tonic-gate #include <sys/stropts.h>
750Sstevel@tonic-gate #include <sys/conf.h>
760Sstevel@tonic-gate 
770Sstevel@tonic-gate #include <inet/tcp.h>
780Sstevel@tonic-gate #include <arpa/inet.h>
790Sstevel@tonic-gate #include <netinet/in.h>
800Sstevel@tonic-gate #include <net/route.h>
810Sstevel@tonic-gate #include <netdb.h>
820Sstevel@tonic-gate 
830Sstevel@tonic-gate #include <stdio.h>
840Sstevel@tonic-gate #include <errno.h>
850Sstevel@tonic-gate #include <fcntl.h>
860Sstevel@tonic-gate #include <unistd.h>
870Sstevel@tonic-gate #include <rctl.h>
880Sstevel@tonic-gate #include <stdlib.h>
890Sstevel@tonic-gate #include <string.h>
900Sstevel@tonic-gate #include <strings.h>
910Sstevel@tonic-gate #include <wait.h>
920Sstevel@tonic-gate #include <limits.h>
930Sstevel@tonic-gate #include <libgen.h>
940Sstevel@tonic-gate #include <zone.h>
950Sstevel@tonic-gate #include <assert.h>
960Sstevel@tonic-gate 
970Sstevel@tonic-gate #include <sys/mntio.h>
980Sstevel@tonic-gate #include <sys/mnttab.h>
990Sstevel@tonic-gate #include <sys/fs/autofs.h>	/* for _autofssys() */
1000Sstevel@tonic-gate #include <sys/fs/lofs_info.h>
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate #include <pool.h>
1030Sstevel@tonic-gate #include <sys/pool.h>
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate #include <libzonecfg.h>
1060Sstevel@tonic-gate #include "zoneadmd.h"
1070Sstevel@tonic-gate 
1080Sstevel@tonic-gate #define	V4_ADDR_LEN	32
1090Sstevel@tonic-gate #define	V6_ADDR_LEN	128
1100Sstevel@tonic-gate 
1110Sstevel@tonic-gate /* 0755 is the default directory mode. */
1120Sstevel@tonic-gate #define	DEFAULT_DIR_MODE \
1130Sstevel@tonic-gate 	(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)
1140Sstevel@tonic-gate 
1150Sstevel@tonic-gate #define	IPD_DEFAULT_OPTS \
1160Sstevel@tonic-gate 	MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate #define	DFSTYPES	"/etc/dfs/fstypes"
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate /*
1210Sstevel@tonic-gate  * A list of directories which should be created.
1220Sstevel@tonic-gate  */
1230Sstevel@tonic-gate 
1240Sstevel@tonic-gate struct dir_info {
1250Sstevel@tonic-gate 	char *dir_name;
1260Sstevel@tonic-gate 	mode_t dir_mode;
1270Sstevel@tonic-gate };
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate /*
1300Sstevel@tonic-gate  * The pathnames below are relative to the zonepath
1310Sstevel@tonic-gate  */
1320Sstevel@tonic-gate static struct dir_info dev_dirs[] = {
1330Sstevel@tonic-gate 	{ "/dev",	0755 },
1340Sstevel@tonic-gate 	{ "/dev/dsk",	0755 },
1350Sstevel@tonic-gate 	{ "/dev/fd",	0555 },
1360Sstevel@tonic-gate 	{ "/dev/pts",	0755 },
1370Sstevel@tonic-gate 	{ "/dev/rdsk",	0755 },
1380Sstevel@tonic-gate 	{ "/dev/rmt",	0755 },
1390Sstevel@tonic-gate 	{ "/dev/sad",	0755 },
1400Sstevel@tonic-gate 	{ "/dev/swap",	0755 },
1410Sstevel@tonic-gate 	{ "/dev/term",	0755 },
1420Sstevel@tonic-gate };
1430Sstevel@tonic-gate 
1440Sstevel@tonic-gate /*
1450Sstevel@tonic-gate  * A list of devices which should be symlinked to /dev/zconsole.
1460Sstevel@tonic-gate  */
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate struct symlink_info {
1490Sstevel@tonic-gate 	char *sl_source;
1500Sstevel@tonic-gate 	char *sl_target;
1510Sstevel@tonic-gate };
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate  * The "source" paths are relative to the zonepath
1550Sstevel@tonic-gate  */
1560Sstevel@tonic-gate static struct symlink_info dev_symlinks[] = {
1570Sstevel@tonic-gate 	{ "/dev/stderr",	"./fd/2" },
1580Sstevel@tonic-gate 	{ "/dev/stdin",		"./fd/0" },
1590Sstevel@tonic-gate 	{ "/dev/stdout",	"./fd/1" },
1600Sstevel@tonic-gate 	{ "/dev/dtremote",	"/dev/null" },
1610Sstevel@tonic-gate 	{ "/dev/console",	"zconsole" },
1620Sstevel@tonic-gate 	{ "/dev/syscon",	"zconsole" },
1630Sstevel@tonic-gate 	{ "/dev/sysmsg",	"zconsole" },
1640Sstevel@tonic-gate 	{ "/dev/systty",	"zconsole" },
1650Sstevel@tonic-gate 	{ "/dev/msglog",	"zconsole" },
1660Sstevel@tonic-gate };
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate /* for routing socket */
1690Sstevel@tonic-gate static int rts_seqno = 0;
1700Sstevel@tonic-gate 
171*766Scarlsonj /* mangled zone name when mounting in an alternate root environment */
172*766Scarlsonj static char kernzone[ZONENAME_MAX];
173*766Scarlsonj 
174*766Scarlsonj /* array of cached mount entries for resolve_lofs */
175*766Scarlsonj static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
176*766Scarlsonj 
1770Sstevel@tonic-gate /* from libsocket, not in any header file */
1780Sstevel@tonic-gate extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate /*
181*766Scarlsonj  * An optimization for build_mnttable: reallocate (and potentially copy the
182*766Scarlsonj  * data) only once every N times through the loop.
183*766Scarlsonj  */
184*766Scarlsonj #define	MNTTAB_HUNK	32
185*766Scarlsonj 
186*766Scarlsonj /*
1870Sstevel@tonic-gate  * Private autofs system call
1880Sstevel@tonic-gate  */
1890Sstevel@tonic-gate extern int _autofssys(int, void *);
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate static int
1920Sstevel@tonic-gate autofs_cleanup(zoneid_t zoneid)
1930Sstevel@tonic-gate {
1940Sstevel@tonic-gate 	/*
1950Sstevel@tonic-gate 	 * Ask autofs to unmount all trigger nodes in the given zone.
1960Sstevel@tonic-gate 	 */
1970Sstevel@tonic-gate 	return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
1980Sstevel@tonic-gate }
1990Sstevel@tonic-gate 
200*766Scarlsonj static void
201*766Scarlsonj free_mnttable(struct mnttab *mnt_array, uint_t nelem)
202*766Scarlsonj {
203*766Scarlsonj 	uint_t i;
204*766Scarlsonj 
205*766Scarlsonj 	if (mnt_array == NULL)
206*766Scarlsonj 		return;
207*766Scarlsonj 	for (i = 0; i < nelem; i++) {
208*766Scarlsonj 		free(mnt_array[i].mnt_mountp);
209*766Scarlsonj 		free(mnt_array[i].mnt_fstype);
210*766Scarlsonj 		free(mnt_array[i].mnt_special);
211*766Scarlsonj 		free(mnt_array[i].mnt_mntopts);
212*766Scarlsonj 		assert(mnt_array[i].mnt_time == NULL);
213*766Scarlsonj 	}
214*766Scarlsonj 	free(mnt_array);
215*766Scarlsonj }
216*766Scarlsonj 
217*766Scarlsonj /*
218*766Scarlsonj  * Build the mount table for the zone rooted at "zroot", storing the resulting
219*766Scarlsonj  * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
220*766Scarlsonj  * array in "nelemp".
221*766Scarlsonj  */
222*766Scarlsonj static int
223*766Scarlsonj build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
224*766Scarlsonj     struct mnttab **mnt_arrayp, uint_t *nelemp)
225*766Scarlsonj {
226*766Scarlsonj 	struct mnttab mnt;
227*766Scarlsonj 	struct mnttab *mnts;
228*766Scarlsonj 	struct mnttab *mnp;
229*766Scarlsonj 	uint_t nmnt;
230*766Scarlsonj 
231*766Scarlsonj 	rewind(mnttab);
232*766Scarlsonj 	resetmnttab(mnttab);
233*766Scarlsonj 	nmnt = 0;
234*766Scarlsonj 	mnts = NULL;
235*766Scarlsonj 	while (getmntent(mnttab, &mnt) == 0) {
236*766Scarlsonj 		struct mnttab *tmp_array;
237*766Scarlsonj 
238*766Scarlsonj 		if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
239*766Scarlsonj 			continue;
240*766Scarlsonj 		if (nmnt % MNTTAB_HUNK == 0) {
241*766Scarlsonj 			tmp_array = realloc(mnts,
242*766Scarlsonj 			    (nmnt + MNTTAB_HUNK) * sizeof (*mnts));
243*766Scarlsonj 			if (tmp_array == NULL) {
244*766Scarlsonj 				free_mnttable(mnts, nmnt);
245*766Scarlsonj 				return (-1);
246*766Scarlsonj 			}
247*766Scarlsonj 			mnts = tmp_array;
248*766Scarlsonj 		}
249*766Scarlsonj 		mnp = &mnts[nmnt++];
250*766Scarlsonj 
251*766Scarlsonj 		/*
252*766Scarlsonj 		 * Zero out any fields we're not using.
253*766Scarlsonj 		 */
254*766Scarlsonj 		(void) memset(mnp, 0, sizeof (*mnp));
255*766Scarlsonj 
256*766Scarlsonj 		if (mnt.mnt_special != NULL)
257*766Scarlsonj 			mnp->mnt_special = strdup(mnt.mnt_special);
258*766Scarlsonj 		if (mnt.mnt_mntopts != NULL)
259*766Scarlsonj 			mnp->mnt_mntopts = strdup(mnt.mnt_mntopts);
260*766Scarlsonj 		mnp->mnt_mountp = strdup(mnt.mnt_mountp);
261*766Scarlsonj 		mnp->mnt_fstype = strdup(mnt.mnt_fstype);
262*766Scarlsonj 		if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) ||
263*766Scarlsonj 		    (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) ||
264*766Scarlsonj 		    mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) {
265*766Scarlsonj 			zerror(zlogp, B_TRUE, "memory allocation failed");
266*766Scarlsonj 			free_mnttable(mnts, nmnt);
267*766Scarlsonj 			return (-1);
268*766Scarlsonj 		}
269*766Scarlsonj 	}
270*766Scarlsonj 	*mnt_arrayp = mnts;
271*766Scarlsonj 	*nelemp = nmnt;
272*766Scarlsonj 	return (0);
273*766Scarlsonj }
274*766Scarlsonj 
275*766Scarlsonj /*
276*766Scarlsonj  * This is an optimization.  The resolve_lofs function is used quite frequently
277*766Scarlsonj  * to manipulate file paths, and on a machine with a large number of zones,
278*766Scarlsonj  * there will be a huge number of mounted file systems.  Thus, we trigger a
279*766Scarlsonj  * reread of the list of mount points
280*766Scarlsonj  */
281*766Scarlsonj static void
282*766Scarlsonj lofs_discard_mnttab(void)
283*766Scarlsonj {
284*766Scarlsonj 	free_mnttable(resolve_lofs_mnts,
285*766Scarlsonj 	    resolve_lofs_mnt_max - resolve_lofs_mnts);
286*766Scarlsonj 	resolve_lofs_mnts = resolve_lofs_mnt_max = NULL;
287*766Scarlsonj }
288*766Scarlsonj 
289*766Scarlsonj static int
290*766Scarlsonj lofs_read_mnttab(zlog_t *zlogp)
291*766Scarlsonj {
292*766Scarlsonj 	FILE *mnttab;
293*766Scarlsonj 	uint_t nmnts;
294*766Scarlsonj 
295*766Scarlsonj 	if ((mnttab = fopen(MNTTAB, "r")) == NULL)
296*766Scarlsonj 		return (-1);
297*766Scarlsonj 	if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts,
298*766Scarlsonj 	    &nmnts) == -1) {
299*766Scarlsonj 		(void) fclose(mnttab);
300*766Scarlsonj 		return (-1);
301*766Scarlsonj 	}
302*766Scarlsonj 	(void) fclose(mnttab);
303*766Scarlsonj 	resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts;
304*766Scarlsonj 	return (0);
305*766Scarlsonj }
306*766Scarlsonj 
307*766Scarlsonj /*
308*766Scarlsonj  * This function loops over potential loopback mounts and symlinks in a given
309*766Scarlsonj  * path and resolves them all down to an absolute path.
310*766Scarlsonj  */
311*766Scarlsonj static void
312*766Scarlsonj resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen)
313*766Scarlsonj {
314*766Scarlsonj 	int len, arlen;
315*766Scarlsonj 	const char *altroot;
316*766Scarlsonj 	char tmppath[MAXPATHLEN];
317*766Scarlsonj 	boolean_t outside_altroot;
318*766Scarlsonj 
319*766Scarlsonj 	if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1)
320*766Scarlsonj 		return;
321*766Scarlsonj 	tmppath[len] = '\0';
322*766Scarlsonj 	(void) strlcpy(path, tmppath, sizeof (tmppath));
323*766Scarlsonj 
324*766Scarlsonj 	/* This happens once per zoneadmd operation. */
325*766Scarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
326*766Scarlsonj 		return;
327*766Scarlsonj 
328*766Scarlsonj 	altroot = zonecfg_get_root();
329*766Scarlsonj 	arlen = strlen(altroot);
330*766Scarlsonj 	outside_altroot = B_FALSE;
331*766Scarlsonj 	for (;;) {
332*766Scarlsonj 		struct mnttab *mnp;
333*766Scarlsonj 
334*766Scarlsonj 		for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
335*766Scarlsonj 		    mnp++) {
336*766Scarlsonj 			if (mnp->mnt_fstype == NULL ||
337*766Scarlsonj 			    mnp->mnt_mountp == NULL ||
338*766Scarlsonj 			    mnp->mnt_special == NULL ||
339*766Scarlsonj 			    strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0)
340*766Scarlsonj 				continue;
341*766Scarlsonj 			len = strlen(mnp->mnt_mountp);
342*766Scarlsonj 			if (strncmp(mnp->mnt_mountp, path, len) == 0 &&
343*766Scarlsonj 			    (path[len] == '/' || path[len] == '\0'))
344*766Scarlsonj 				break;
345*766Scarlsonj 		}
346*766Scarlsonj 		if (mnp >= resolve_lofs_mnt_max)
347*766Scarlsonj 			break;
348*766Scarlsonj 		if (outside_altroot) {
349*766Scarlsonj 			char *cp;
350*766Scarlsonj 			int olen = sizeof (MNTOPT_RO) - 1;
351*766Scarlsonj 
352*766Scarlsonj 			/*
353*766Scarlsonj 			 * If we run into a read-only mount outside of the
354*766Scarlsonj 			 * alternate root environment, then the user doesn't
355*766Scarlsonj 			 * want this path to be made read-write.
356*766Scarlsonj 			 */
357*766Scarlsonj 			if (mnp->mnt_mntopts != NULL &&
358*766Scarlsonj 			    (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) !=
359*766Scarlsonj 			    NULL &&
360*766Scarlsonj 			    (cp == mnp->mnt_mntopts || cp[-1] == ',') &&
361*766Scarlsonj 			    (cp[olen] == '\0' || cp[olen] == ',')) {
362*766Scarlsonj 				break;
363*766Scarlsonj 			}
364*766Scarlsonj 		} else if (arlen > 0 &&
365*766Scarlsonj 		    (strncmp(mnp->mnt_special, altroot, arlen) != 0 ||
366*766Scarlsonj 		    (mnp->mnt_special[arlen] != '\0' &&
367*766Scarlsonj 		    mnp->mnt_special[arlen] != '/'))) {
368*766Scarlsonj 			outside_altroot = B_TRUE;
369*766Scarlsonj 		}
370*766Scarlsonj 		/* use temporary buffer because new path might be longer */
371*766Scarlsonj 		(void) snprintf(tmppath, sizeof (tmppath), "%s%s",
372*766Scarlsonj 		    mnp->mnt_special, path + len);
373*766Scarlsonj 		if ((len = resolvepath(tmppath, path, pathlen)) == -1)
374*766Scarlsonj 			break;
375*766Scarlsonj 		path[len] = '\0';
376*766Scarlsonj 	}
377*766Scarlsonj }
378*766Scarlsonj 
379*766Scarlsonj /*
380*766Scarlsonj  * For a regular mount, check if a replacement lofs mount is needed because the
381*766Scarlsonj  * referenced device is already mounted somewhere.
382*766Scarlsonj  */
383*766Scarlsonj static int
384*766Scarlsonj check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr)
385*766Scarlsonj {
386*766Scarlsonj 	struct mnttab *mnp;
387*766Scarlsonj 	zone_fsopt_t *optptr, *onext;
388*766Scarlsonj 
389*766Scarlsonj 	/* This happens once per zoneadmd operation. */
390*766Scarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
391*766Scarlsonj 		return (-1);
392*766Scarlsonj 
393*766Scarlsonj 	/*
394*766Scarlsonj 	 * If this special node isn't already in use, then it's ours alone;
395*766Scarlsonj 	 * no need to worry about conflicting mounts.
396*766Scarlsonj 	 */
397*766Scarlsonj 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
398*766Scarlsonj 	    mnp++) {
399*766Scarlsonj 		if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0)
400*766Scarlsonj 			break;
401*766Scarlsonj 	}
402*766Scarlsonj 	if (mnp >= resolve_lofs_mnt_max)
403*766Scarlsonj 		return (0);
404*766Scarlsonj 
405*766Scarlsonj 	/*
406*766Scarlsonj 	 * Convert this duplicate mount into a lofs mount.
407*766Scarlsonj 	 */
408*766Scarlsonj 	(void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp,
409*766Scarlsonj 	    sizeof (fsptr->zone_fs_special));
410*766Scarlsonj 	(void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS,
411*766Scarlsonj 	    sizeof (fsptr->zone_fs_type));
412*766Scarlsonj 	fsptr->zone_fs_raw[0] = '\0';
413*766Scarlsonj 
414*766Scarlsonj 	/*
415*766Scarlsonj 	 * Discard all but one of the original options and set that to be the
416*766Scarlsonj 	 * same set of options used for inherit package directory resources.
417*766Scarlsonj 	 */
418*766Scarlsonj 	optptr = fsptr->zone_fs_options;
419*766Scarlsonj 	if (optptr == NULL) {
420*766Scarlsonj 		optptr = malloc(sizeof (*optptr));
421*766Scarlsonj 		if (optptr == NULL) {
422*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s",
423*766Scarlsonj 			    fsptr->zone_fs_dir);
424*766Scarlsonj 			return (-1);
425*766Scarlsonj 		}
426*766Scarlsonj 	} else {
427*766Scarlsonj 		while ((onext = optptr->zone_fsopt_next) != NULL) {
428*766Scarlsonj 			optptr->zone_fsopt_next = onext->zone_fsopt_next;
429*766Scarlsonj 			free(onext);
430*766Scarlsonj 		}
431*766Scarlsonj 	}
432*766Scarlsonj 	(void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS);
433*766Scarlsonj 	optptr->zone_fsopt_next = NULL;
434*766Scarlsonj 	fsptr->zone_fs_options = optptr;
435*766Scarlsonj 	return (0);
436*766Scarlsonj }
437*766Scarlsonj 
4380Sstevel@tonic-gate static int
4390Sstevel@tonic-gate make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode)
4400Sstevel@tonic-gate {
4410Sstevel@tonic-gate 	char path[MAXPATHLEN];
4420Sstevel@tonic-gate 	struct stat st;
4430Sstevel@tonic-gate 
4440Sstevel@tonic-gate 	if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) >
4450Sstevel@tonic-gate 	    sizeof (path)) {
4460Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix,
4470Sstevel@tonic-gate 		    subdir);
4480Sstevel@tonic-gate 		return (-1);
4490Sstevel@tonic-gate 	}
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 	if (lstat(path, &st) == 0) {
4520Sstevel@tonic-gate 		/*
4530Sstevel@tonic-gate 		 * We don't check the file mode since presumably the zone
4540Sstevel@tonic-gate 		 * administrator may have had good reason to change the mode,
4550Sstevel@tonic-gate 		 * and we don't need to second guess him.
4560Sstevel@tonic-gate 		 */
4570Sstevel@tonic-gate 		if (!S_ISDIR(st.st_mode)) {
4580Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s is not a directory", path);
4590Sstevel@tonic-gate 			return (-1);
4600Sstevel@tonic-gate 		}
4610Sstevel@tonic-gate 	} else if (mkdirp(path, mode) != 0) {
4620Sstevel@tonic-gate 		if (errno == EROFS)
4630Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on "
4640Sstevel@tonic-gate 			    "a read-only file system in this local zone.\nMake "
4650Sstevel@tonic-gate 			    "sure %s exists in the global zone.", path, subdir);
4660Sstevel@tonic-gate 		else
4670Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "mkdirp of %s failed", path);
4680Sstevel@tonic-gate 		return (-1);
4690Sstevel@tonic-gate 	}
4700Sstevel@tonic-gate 	return (0);
4710Sstevel@tonic-gate }
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate /*
4740Sstevel@tonic-gate  * Make /dev and various directories underneath it.
4750Sstevel@tonic-gate  */
4760Sstevel@tonic-gate static int
4770Sstevel@tonic-gate make_dev_dirs(zlog_t *zlogp, const char *zonepath)
4780Sstevel@tonic-gate {
4790Sstevel@tonic-gate 	int i;
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 	for (i = 0; i < sizeof (dev_dirs) / sizeof (struct dir_info); i++) {
4820Sstevel@tonic-gate 		if (make_one_dir(zlogp, zonepath, dev_dirs[i].dir_name,
4830Sstevel@tonic-gate 		    dev_dirs[i].dir_mode) != 0)
4840Sstevel@tonic-gate 			return (-1);
4850Sstevel@tonic-gate 	}
4860Sstevel@tonic-gate 	return (0);
4870Sstevel@tonic-gate }
4880Sstevel@tonic-gate 
4890Sstevel@tonic-gate /*
4900Sstevel@tonic-gate  * Make various sym-links underneath /dev.
4910Sstevel@tonic-gate  */
4920Sstevel@tonic-gate static int
4930Sstevel@tonic-gate make_dev_links(zlog_t *zlogp, char *zonepath)
4940Sstevel@tonic-gate {
4950Sstevel@tonic-gate 	int i;
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 	for (i = 0; i < sizeof (dev_symlinks) / sizeof (struct symlink_info);
4980Sstevel@tonic-gate 	    i++) {
4990Sstevel@tonic-gate 		char dev[MAXPATHLEN];
5000Sstevel@tonic-gate 		struct stat st;
5010Sstevel@tonic-gate 
5020Sstevel@tonic-gate 		(void) snprintf(dev, sizeof (dev), "%s%s", zonepath,
5030Sstevel@tonic-gate 		    dev_symlinks[i].sl_source);
5040Sstevel@tonic-gate 		if (lstat(dev, &st) == 0) {
5050Sstevel@tonic-gate 			/*
5060Sstevel@tonic-gate 			 * Try not to call unlink(2) on directories, since that
5070Sstevel@tonic-gate 			 * makes UFS unhappy.
5080Sstevel@tonic-gate 			 */
5090Sstevel@tonic-gate 			if (S_ISDIR(st.st_mode)) {
5100Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "symlink path %s is a "
5110Sstevel@tonic-gate 				    "directory", dev_symlinks[i].sl_source);
5120Sstevel@tonic-gate 				return (-1);
5130Sstevel@tonic-gate 			}
5140Sstevel@tonic-gate 			(void) unlink(dev);
5150Sstevel@tonic-gate 		}
5160Sstevel@tonic-gate 		if (symlink(dev_symlinks[i].sl_target, dev) != 0) {
517*766Scarlsonj 			zerror(zlogp, B_TRUE, "could not setup %s->%s symlink",
518*766Scarlsonj 			    dev_symlinks[i].sl_source,
519*766Scarlsonj 			    dev_symlinks[i].sl_target);
5200Sstevel@tonic-gate 			return (-1);
5210Sstevel@tonic-gate 		}
5220Sstevel@tonic-gate 	}
5230Sstevel@tonic-gate 	return (0);
5240Sstevel@tonic-gate }
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate /*
5270Sstevel@tonic-gate  * Create various directories and sym-links under /dev.
5280Sstevel@tonic-gate  */
5290Sstevel@tonic-gate static int
5300Sstevel@tonic-gate create_dev_files(zlog_t *zlogp)
5310Sstevel@tonic-gate {
5320Sstevel@tonic-gate 	char zonepath[MAXPATHLEN];
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate 	if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
5350Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone root");
5360Sstevel@tonic-gate 		return (-1);
5370Sstevel@tonic-gate 	}
538*766Scarlsonj 	if (zonecfg_in_alt_root())
539*766Scarlsonj 		resolve_lofs(zlogp, zonepath, sizeof (zonepath));
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	if (make_dev_dirs(zlogp, zonepath) != 0)
5420Sstevel@tonic-gate 		return (-1);
5430Sstevel@tonic-gate 	if (make_dev_links(zlogp, zonepath) != 0)
5440Sstevel@tonic-gate 		return (-1);
5450Sstevel@tonic-gate 	return (0);
5460Sstevel@tonic-gate }
5470Sstevel@tonic-gate 
5480Sstevel@tonic-gate static void
5490Sstevel@tonic-gate free_remote_fstypes(char **types)
5500Sstevel@tonic-gate {
5510Sstevel@tonic-gate 	uint_t i;
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 	if (types == NULL)
5540Sstevel@tonic-gate 		return;
5550Sstevel@tonic-gate 	for (i = 0; types[i] != NULL; i++)
5560Sstevel@tonic-gate 		free(types[i]);
5570Sstevel@tonic-gate 	free(types);
5580Sstevel@tonic-gate }
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate static char **
5610Sstevel@tonic-gate get_remote_fstypes(zlog_t *zlogp)
5620Sstevel@tonic-gate {
5630Sstevel@tonic-gate 	char **types = NULL;
5640Sstevel@tonic-gate 	FILE *fp;
5650Sstevel@tonic-gate 	char buf[MAXPATHLEN];
5660Sstevel@tonic-gate 	char fstype[MAXPATHLEN];
5670Sstevel@tonic-gate 	uint_t lines = 0;
5680Sstevel@tonic-gate 	uint_t i;
5690Sstevel@tonic-gate 
5700Sstevel@tonic-gate 	if ((fp = fopen(DFSTYPES, "r")) == NULL) {
5710Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES);
5720Sstevel@tonic-gate 		return (NULL);
5730Sstevel@tonic-gate 	}
5740Sstevel@tonic-gate 	/*
5750Sstevel@tonic-gate 	 * Count the number of lines
5760Sstevel@tonic-gate 	 */
5770Sstevel@tonic-gate 	while (fgets(buf, sizeof (buf), fp) != NULL)
5780Sstevel@tonic-gate 		lines++;
5790Sstevel@tonic-gate 	if (lines == 0)	/* didn't read anything; empty file */
5800Sstevel@tonic-gate 		goto out;
5810Sstevel@tonic-gate 	rewind(fp);
5820Sstevel@tonic-gate 	/*
5830Sstevel@tonic-gate 	 * Allocate enough space for a NULL-terminated array.
5840Sstevel@tonic-gate 	 */
5850Sstevel@tonic-gate 	types = calloc(lines + 1, sizeof (char *));
5860Sstevel@tonic-gate 	if (types == NULL) {
5870Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
5880Sstevel@tonic-gate 		goto out;
5890Sstevel@tonic-gate 	}
5900Sstevel@tonic-gate 	i = 0;
5910Sstevel@tonic-gate 	while (fgets(buf, sizeof (buf), fp) != NULL) {
5920Sstevel@tonic-gate 		/* LINTED - fstype is big enough to hold buf */
5930Sstevel@tonic-gate 		if (sscanf(buf, "%s", fstype) == 0) {
5940Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES);
5950Sstevel@tonic-gate 			free_remote_fstypes(types);
5960Sstevel@tonic-gate 			types = NULL;
5970Sstevel@tonic-gate 			goto out;
5980Sstevel@tonic-gate 		}
5990Sstevel@tonic-gate 		types[i] = strdup(fstype);
6000Sstevel@tonic-gate 		if (types[i] == NULL) {
6010Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
6020Sstevel@tonic-gate 			free_remote_fstypes(types);
6030Sstevel@tonic-gate 			types = NULL;
6040Sstevel@tonic-gate 			goto out;
6050Sstevel@tonic-gate 		}
6060Sstevel@tonic-gate 		i++;
6070Sstevel@tonic-gate 	}
6080Sstevel@tonic-gate out:
6090Sstevel@tonic-gate 	(void) fclose(fp);
6100Sstevel@tonic-gate 	return (types);
6110Sstevel@tonic-gate }
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate static boolean_t
6140Sstevel@tonic-gate is_remote_fstype(const char *fstype, char *const *remote_fstypes)
6150Sstevel@tonic-gate {
6160Sstevel@tonic-gate 	uint_t i;
6170Sstevel@tonic-gate 
6180Sstevel@tonic-gate 	if (remote_fstypes == NULL)
6190Sstevel@tonic-gate 		return (B_FALSE);
6200Sstevel@tonic-gate 	for (i = 0; remote_fstypes[i] != NULL; i++) {
6210Sstevel@tonic-gate 		if (strcmp(remote_fstypes[i], fstype) == 0)
6220Sstevel@tonic-gate 			return (B_TRUE);
6230Sstevel@tonic-gate 	}
6240Sstevel@tonic-gate 	return (B_FALSE);
6250Sstevel@tonic-gate }
6260Sstevel@tonic-gate 
627*766Scarlsonj /*
628*766Scarlsonj  * This converts a zone root path (normally of the form .../root) to a Live
629*766Scarlsonj  * Upgrade scratch zone root (of the form .../lu).
630*766Scarlsonj  */
6310Sstevel@tonic-gate static void
632*766Scarlsonj root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
6330Sstevel@tonic-gate {
634*766Scarlsonj 	if (!isresolved && zonecfg_in_alt_root())
635*766Scarlsonj 		resolve_lofs(zlogp, zroot, zrootlen);
636*766Scarlsonj 	(void) strcpy(strrchr(zroot, '/') + 1, "lu");
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate /*
6400Sstevel@tonic-gate  * The general strategy for unmounting filesystems is as follows:
6410Sstevel@tonic-gate  *
6420Sstevel@tonic-gate  * - Remote filesystems may be dead, and attempting to contact them as
6430Sstevel@tonic-gate  * part of a regular unmount may hang forever; we want to always try to
6440Sstevel@tonic-gate  * forcibly unmount such filesystems and only fall back to regular
6450Sstevel@tonic-gate  * unmounts if the filesystem doesn't support forced unmounts.
6460Sstevel@tonic-gate  *
6470Sstevel@tonic-gate  * - We don't want to unnecessarily corrupt metadata on local
6480Sstevel@tonic-gate  * filesystems (ie UFS), so we want to start off with graceful unmounts,
6490Sstevel@tonic-gate  * and only escalate to doing forced unmounts if we get stuck.
6500Sstevel@tonic-gate  *
6510Sstevel@tonic-gate  * We start off walking backwards through the mount table.  This doesn't
6520Sstevel@tonic-gate  * give us strict ordering but ensures that we try to unmount submounts
6530Sstevel@tonic-gate  * first.  We thus limit the number of failed umount2(2) calls.
6540Sstevel@tonic-gate  *
6550Sstevel@tonic-gate  * The mechanism for determining if we're stuck is to count the number
6560Sstevel@tonic-gate  * of failed unmounts each iteration through the mount table.  This
6570Sstevel@tonic-gate  * gives us an upper bound on the number of filesystems which remain
6580Sstevel@tonic-gate  * mounted (autofs trigger nodes are dealt with separately).  If at the
6590Sstevel@tonic-gate  * end of one unmount+autofs_cleanup cycle we still have the same number
6600Sstevel@tonic-gate  * of mounts that we started out with, we're stuck and try a forced
6610Sstevel@tonic-gate  * unmount.  If that fails (filesystem doesn't support forced unmounts)
6620Sstevel@tonic-gate  * then we bail and are unable to teardown the zone.  If it succeeds,
6630Sstevel@tonic-gate  * we're no longer stuck so we continue with our policy of trying
6640Sstevel@tonic-gate  * graceful mounts first.
6650Sstevel@tonic-gate  *
6660Sstevel@tonic-gate  * Zone must be down (ie, no processes or threads active).
6670Sstevel@tonic-gate  */
6680Sstevel@tonic-gate static int
669*766Scarlsonj unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
6700Sstevel@tonic-gate {
6710Sstevel@tonic-gate 	int error = 0;
6720Sstevel@tonic-gate 	FILE *mnttab;
6730Sstevel@tonic-gate 	struct mnttab *mnts;
6740Sstevel@tonic-gate 	uint_t nmnt;
6750Sstevel@tonic-gate 	char zroot[MAXPATHLEN + 1];
6760Sstevel@tonic-gate 	size_t zrootlen;
6770Sstevel@tonic-gate 	uint_t oldcount = UINT_MAX;
6780Sstevel@tonic-gate 	boolean_t stuck = B_FALSE;
6790Sstevel@tonic-gate 	char **remote_fstypes = NULL;
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
6820Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "unable to determine zone root");
6830Sstevel@tonic-gate 		return (-1);
6840Sstevel@tonic-gate 	}
685*766Scarlsonj 	if (unmount_cmd)
686*766Scarlsonj 		root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate 	(void) strcat(zroot, "/");
6890Sstevel@tonic-gate 	zrootlen = strlen(zroot);
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate 	if ((mnttab = fopen(MNTTAB, "r")) == NULL) {
6920Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB);
6930Sstevel@tonic-gate 		return (-1);
6940Sstevel@tonic-gate 	}
6950Sstevel@tonic-gate 	/*
6960Sstevel@tonic-gate 	 * Use our hacky mntfs ioctl so we see everything, even mounts with
6970Sstevel@tonic-gate 	 * MS_NOMNTTAB.
6980Sstevel@tonic-gate 	 */
6990Sstevel@tonic-gate 	if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) {
7000Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB);
7010Sstevel@tonic-gate 		error++;
7020Sstevel@tonic-gate 		goto out;
7030Sstevel@tonic-gate 	}
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate 	/*
7060Sstevel@tonic-gate 	 * Build the list of remote fstypes so we know which ones we
7070Sstevel@tonic-gate 	 * should forcibly unmount.
7080Sstevel@tonic-gate 	 */
7090Sstevel@tonic-gate 	remote_fstypes = get_remote_fstypes(zlogp);
7100Sstevel@tonic-gate 	for (; /* ever */; ) {
7110Sstevel@tonic-gate 		uint_t newcount = 0;
7120Sstevel@tonic-gate 		boolean_t unmounted;
7130Sstevel@tonic-gate 		struct mnttab *mnp;
7140Sstevel@tonic-gate 		char *path;
7150Sstevel@tonic-gate 		uint_t i;
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 		mnts = NULL;
7180Sstevel@tonic-gate 		nmnt = 0;
7190Sstevel@tonic-gate 		/*
7200Sstevel@tonic-gate 		 * MNTTAB gives us a way to walk through mounted
7210Sstevel@tonic-gate 		 * filesystems; we need to be able to walk them in
7220Sstevel@tonic-gate 		 * reverse order, so we build a list of all mounted
7230Sstevel@tonic-gate 		 * filesystems.
7240Sstevel@tonic-gate 		 */
7250Sstevel@tonic-gate 		if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts,
7260Sstevel@tonic-gate 		    &nmnt) != 0) {
7270Sstevel@tonic-gate 			error++;
7280Sstevel@tonic-gate 			goto out;
7290Sstevel@tonic-gate 		}
7300Sstevel@tonic-gate 		for (i = 0; i < nmnt; i++) {
7310Sstevel@tonic-gate 			mnp = &mnts[nmnt - i - 1]; /* access in reverse order */
7320Sstevel@tonic-gate 			path = mnp->mnt_mountp;
7330Sstevel@tonic-gate 			unmounted = B_FALSE;
7340Sstevel@tonic-gate 			/*
7350Sstevel@tonic-gate 			 * Try forced unmount first for remote filesystems.
7360Sstevel@tonic-gate 			 *
7370Sstevel@tonic-gate 			 * Not all remote filesystems support forced unmounts,
7380Sstevel@tonic-gate 			 * so if this fails (ENOTSUP) we'll continue on
7390Sstevel@tonic-gate 			 * and try a regular unmount.
7400Sstevel@tonic-gate 			 */
7410Sstevel@tonic-gate 			if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) {
7420Sstevel@tonic-gate 				if (umount2(path, MS_FORCE) == 0)
7430Sstevel@tonic-gate 					unmounted = B_TRUE;
7440Sstevel@tonic-gate 			}
7450Sstevel@tonic-gate 			/*
7460Sstevel@tonic-gate 			 * Try forced unmount if we're stuck.
7470Sstevel@tonic-gate 			 */
7480Sstevel@tonic-gate 			if (stuck) {
7490Sstevel@tonic-gate 				if (umount2(path, MS_FORCE) == 0) {
7500Sstevel@tonic-gate 					unmounted = B_TRUE;
7510Sstevel@tonic-gate 					stuck = B_FALSE;
7520Sstevel@tonic-gate 				} else {
7530Sstevel@tonic-gate 					/*
7540Sstevel@tonic-gate 					 * The first failure indicates a
7550Sstevel@tonic-gate 					 * mount we won't be able to get
7560Sstevel@tonic-gate 					 * rid of automatically, so we
7570Sstevel@tonic-gate 					 * bail.
7580Sstevel@tonic-gate 					 */
7590Sstevel@tonic-gate 					error++;
7600Sstevel@tonic-gate 					zerror(zlogp, B_FALSE,
7610Sstevel@tonic-gate 					    "unable to unmount '%s'", path);
7620Sstevel@tonic-gate 					free_mnttable(mnts, nmnt);
7630Sstevel@tonic-gate 					goto out;
7640Sstevel@tonic-gate 				}
7650Sstevel@tonic-gate 			}
7660Sstevel@tonic-gate 			/*
7670Sstevel@tonic-gate 			 * Try regular unmounts for everything else.
7680Sstevel@tonic-gate 			 */
7690Sstevel@tonic-gate 			if (!unmounted && umount2(path, 0) != 0)
7700Sstevel@tonic-gate 				newcount++;
7710Sstevel@tonic-gate 		}
7720Sstevel@tonic-gate 		free_mnttable(mnts, nmnt);
7730Sstevel@tonic-gate 
7740Sstevel@tonic-gate 		if (newcount == 0)
7750Sstevel@tonic-gate 			break;
7760Sstevel@tonic-gate 		if (newcount >= oldcount) {
7770Sstevel@tonic-gate 			/*
7780Sstevel@tonic-gate 			 * Last round didn't unmount anything; we're stuck and
7790Sstevel@tonic-gate 			 * should start trying forced unmounts.
7800Sstevel@tonic-gate 			 */
7810Sstevel@tonic-gate 			stuck = B_TRUE;
7820Sstevel@tonic-gate 		}
7830Sstevel@tonic-gate 		oldcount = newcount;
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 		/*
7860Sstevel@tonic-gate 		 * Autofs doesn't let you unmount its trigger nodes from
7870Sstevel@tonic-gate 		 * userland so we have to tell the kernel to cleanup for us.
7880Sstevel@tonic-gate 		 */
7890Sstevel@tonic-gate 		if (autofs_cleanup(zoneid) != 0) {
7900Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "unable to remove autofs nodes");
7910Sstevel@tonic-gate 			error++;
7920Sstevel@tonic-gate 			goto out;
7930Sstevel@tonic-gate 		}
7940Sstevel@tonic-gate 	}
7950Sstevel@tonic-gate 
7960Sstevel@tonic-gate out:
7970Sstevel@tonic-gate 	free_remote_fstypes(remote_fstypes);
7980Sstevel@tonic-gate 	(void) fclose(mnttab);
7990Sstevel@tonic-gate 	return (error ? -1 : 0);
8000Sstevel@tonic-gate }
8010Sstevel@tonic-gate 
8020Sstevel@tonic-gate static int
8030Sstevel@tonic-gate fs_compare(const void *m1, const void *m2)
8040Sstevel@tonic-gate {
8050Sstevel@tonic-gate 	struct zone_fstab *i = (struct zone_fstab *)m1;
8060Sstevel@tonic-gate 	struct zone_fstab *j = (struct zone_fstab *)m2;
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate 	return (strcmp(i->zone_fs_dir, j->zone_fs_dir));
8090Sstevel@tonic-gate }
8100Sstevel@tonic-gate 
8110Sstevel@tonic-gate /*
8120Sstevel@tonic-gate  * Fork and exec (and wait for) the mentioned binary with the provided
8130Sstevel@tonic-gate  * arguments.  Returns (-1) if something went wrong with fork(2) or exec(2),
8140Sstevel@tonic-gate  * returns the exit status otherwise.
8150Sstevel@tonic-gate  *
8160Sstevel@tonic-gate  * If we were unable to exec the provided pathname (for whatever
8170Sstevel@tonic-gate  * reason), we return the special token ZEXIT_EXEC.  The current value
8180Sstevel@tonic-gate  * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the
8190Sstevel@tonic-gate  * consumers of this function; any future consumers must make sure this
8200Sstevel@tonic-gate  * remains the case.
8210Sstevel@tonic-gate  */
8220Sstevel@tonic-gate static int
8230Sstevel@tonic-gate forkexec(zlog_t *zlogp, const char *path, char *const argv[])
8240Sstevel@tonic-gate {
8250Sstevel@tonic-gate 	pid_t child_pid;
8260Sstevel@tonic-gate 	int child_status = 0;
8270Sstevel@tonic-gate 
8280Sstevel@tonic-gate 	/*
8290Sstevel@tonic-gate 	 * Do not let another thread localize a message while we are forking.
8300Sstevel@tonic-gate 	 */
8310Sstevel@tonic-gate 	(void) mutex_lock(&msglock);
8320Sstevel@tonic-gate 	child_pid = fork();
8330Sstevel@tonic-gate 	(void) mutex_unlock(&msglock);
8340Sstevel@tonic-gate 	if (child_pid == -1) {
8350Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]);
8360Sstevel@tonic-gate 		return (-1);
8370Sstevel@tonic-gate 	} else if (child_pid == 0) {
8380Sstevel@tonic-gate 		closefrom(0);
8390Sstevel@tonic-gate 		(void) execv(path, argv);
8400Sstevel@tonic-gate 		/*
8410Sstevel@tonic-gate 		 * Since we are in the child, there is no point calling zerror()
8420Sstevel@tonic-gate 		 * since there is nobody waiting to consume it.  So exit with a
8430Sstevel@tonic-gate 		 * special code that the parent will recognize and call zerror()
8440Sstevel@tonic-gate 		 * accordingly.
8450Sstevel@tonic-gate 		 */
8460Sstevel@tonic-gate 
8470Sstevel@tonic-gate 		_exit(ZEXIT_EXEC);
8480Sstevel@tonic-gate 	} else {
8490Sstevel@tonic-gate 		(void) waitpid(child_pid, &child_status, 0);
8500Sstevel@tonic-gate 	}
8510Sstevel@tonic-gate 
8520Sstevel@tonic-gate 	if (WIFSIGNALED(child_status)) {
8530Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
8540Sstevel@tonic-gate 		    "signal %d", path, WTERMSIG(child_status));
8550Sstevel@tonic-gate 		return (-1);
8560Sstevel@tonic-gate 	}
8570Sstevel@tonic-gate 	assert(WIFEXITED(child_status));
8580Sstevel@tonic-gate 	if (WEXITSTATUS(child_status) == ZEXIT_EXEC) {
8590Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "failed to exec %s", path);
8600Sstevel@tonic-gate 		return (-1);
8610Sstevel@tonic-gate 	}
8620Sstevel@tonic-gate 	return (WEXITSTATUS(child_status));
8630Sstevel@tonic-gate }
8640Sstevel@tonic-gate 
8650Sstevel@tonic-gate static int
8660Sstevel@tonic-gate dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev)
8670Sstevel@tonic-gate {
8680Sstevel@tonic-gate 	char cmdbuf[MAXPATHLEN];
8690Sstevel@tonic-gate 	char *argv[4];
8700Sstevel@tonic-gate 	int status;
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate 	/*
8730Sstevel@tonic-gate 	 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but
8740Sstevel@tonic-gate 	 * that would cost us an extra fork/exec without buying us anything.
8750Sstevel@tonic-gate 	 */
8760Sstevel@tonic-gate 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype)
8770Sstevel@tonic-gate 	    > sizeof (cmdbuf)) {
8780Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
8790Sstevel@tonic-gate 		return (-1);
8800Sstevel@tonic-gate 	}
8810Sstevel@tonic-gate 
8820Sstevel@tonic-gate 	argv[0] = "fsck";
8830Sstevel@tonic-gate 	argv[1] = "-m";
8840Sstevel@tonic-gate 	argv[2] = (char *)rawdev;
8850Sstevel@tonic-gate 	argv[3] = NULL;
8860Sstevel@tonic-gate 
8870Sstevel@tonic-gate 	status = forkexec(zlogp, cmdbuf, argv);
8880Sstevel@tonic-gate 	if (status == 0 || status == -1)
8890Sstevel@tonic-gate 		return (status);
8900Sstevel@tonic-gate 	zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; "
8910Sstevel@tonic-gate 	    "run fsck manually", rawdev, status);
8920Sstevel@tonic-gate 	return (-1);
8930Sstevel@tonic-gate }
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate static int
8960Sstevel@tonic-gate domount(zlog_t *zlogp, const char *fstype, const char *opts,
8970Sstevel@tonic-gate     const char *special, const char *directory)
8980Sstevel@tonic-gate {
8990Sstevel@tonic-gate 	char cmdbuf[MAXPATHLEN];
9000Sstevel@tonic-gate 	char *argv[6];
9010Sstevel@tonic-gate 	int status;
9020Sstevel@tonic-gate 
9030Sstevel@tonic-gate 	/*
9040Sstevel@tonic-gate 	 * We could alternatively have called /usr/sbin/mount -F <fstype>, but
9050Sstevel@tonic-gate 	 * that would cost us an extra fork/exec without buying us anything.
9060Sstevel@tonic-gate 	 */
9070Sstevel@tonic-gate 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype)
9080Sstevel@tonic-gate 	    > sizeof (cmdbuf)) {
9090Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
9100Sstevel@tonic-gate 		return (-1);
9110Sstevel@tonic-gate 	}
9120Sstevel@tonic-gate 	argv[0] = "mount";
9130Sstevel@tonic-gate 	if (opts[0] == '\0') {
9140Sstevel@tonic-gate 		argv[1] = (char *)special;
9150Sstevel@tonic-gate 		argv[2] = (char *)directory;
9160Sstevel@tonic-gate 		argv[3] = NULL;
9170Sstevel@tonic-gate 	} else {
9180Sstevel@tonic-gate 		argv[1] = "-o";
9190Sstevel@tonic-gate 		argv[2] = (char *)opts;
9200Sstevel@tonic-gate 		argv[3] = (char *)special;
9210Sstevel@tonic-gate 		argv[4] = (char *)directory;
9220Sstevel@tonic-gate 		argv[5] = NULL;
9230Sstevel@tonic-gate 	}
9240Sstevel@tonic-gate 
9250Sstevel@tonic-gate 	status = forkexec(zlogp, cmdbuf, argv);
9260Sstevel@tonic-gate 	if (status == 0 || status == -1)
9270Sstevel@tonic-gate 		return (status);
9280Sstevel@tonic-gate 	if (opts[0] == '\0')
9290Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "\"%s %s %s\" "
9300Sstevel@tonic-gate 		    "failed with exit code %d",
9310Sstevel@tonic-gate 		    cmdbuf, special, directory, status);
9320Sstevel@tonic-gate 	else
9330Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" "
9340Sstevel@tonic-gate 		    "failed with exit code %d",
9350Sstevel@tonic-gate 		    cmdbuf, opts, special, directory, status);
9360Sstevel@tonic-gate 	return (-1);
9370Sstevel@tonic-gate }
9380Sstevel@tonic-gate 
9390Sstevel@tonic-gate /*
9400Sstevel@tonic-gate  * Make sure if a given path exists, it is not a sym-link, and is a directory.
9410Sstevel@tonic-gate  */
9420Sstevel@tonic-gate static int
9430Sstevel@tonic-gate check_path(zlog_t *zlogp, const char *path)
9440Sstevel@tonic-gate {
9450Sstevel@tonic-gate 	struct stat statbuf;
9460Sstevel@tonic-gate 	char respath[MAXPATHLEN];
9470Sstevel@tonic-gate 	int res;
9480Sstevel@tonic-gate 
9490Sstevel@tonic-gate 	if (lstat(path, &statbuf) != 0) {
9500Sstevel@tonic-gate 		if (errno == ENOENT)
9510Sstevel@tonic-gate 			return (0);
9520Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "can't stat %s", path);
9530Sstevel@tonic-gate 		return (-1);
9540Sstevel@tonic-gate 	}
9550Sstevel@tonic-gate 	if (S_ISLNK(statbuf.st_mode)) {
9560Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is a symlink", path);
9570Sstevel@tonic-gate 		return (-1);
9580Sstevel@tonic-gate 	}
9590Sstevel@tonic-gate 	if (!S_ISDIR(statbuf.st_mode)) {
9600Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is not a directory", path);
9610Sstevel@tonic-gate 		return (-1);
9620Sstevel@tonic-gate 	}
9630Sstevel@tonic-gate 	if ((res = resolvepath(path, respath, sizeof (respath))) == -1) {
9640Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to resolve path %s", path);
9650Sstevel@tonic-gate 		return (-1);
9660Sstevel@tonic-gate 	}
9670Sstevel@tonic-gate 	respath[res] = '\0';
9680Sstevel@tonic-gate 	if (strcmp(path, respath) != 0) {
9690Sstevel@tonic-gate 		/*
9700Sstevel@tonic-gate 		 * We don't like ".."s and "."s throwing us off
9710Sstevel@tonic-gate 		 */
9720Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is not a canonical path", path);
9730Sstevel@tonic-gate 		return (-1);
9740Sstevel@tonic-gate 	}
9750Sstevel@tonic-gate 	return (0);
9760Sstevel@tonic-gate }
9770Sstevel@tonic-gate 
9780Sstevel@tonic-gate /*
9790Sstevel@tonic-gate  * Check every component of rootpath/relpath.  If any component fails (ie,
9800Sstevel@tonic-gate  * exists but isn't the canonical path to a directory), it is returned in
9810Sstevel@tonic-gate  * badpath, which is assumed to be at least of size MAXPATHLEN.
9820Sstevel@tonic-gate  *
9830Sstevel@tonic-gate  * Relpath must begin with '/'.
9840Sstevel@tonic-gate  */
9850Sstevel@tonic-gate static boolean_t
9860Sstevel@tonic-gate valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath)
9870Sstevel@tonic-gate {
9880Sstevel@tonic-gate 	char abspath[MAXPATHLEN], *slashp;
9890Sstevel@tonic-gate 
9900Sstevel@tonic-gate 	/*
9910Sstevel@tonic-gate 	 * Make sure abspath has at least one '/' after its rootpath
9920Sstevel@tonic-gate 	 * component, and ends with '/'.
9930Sstevel@tonic-gate 	 */
9940Sstevel@tonic-gate 	if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) >
9950Sstevel@tonic-gate 	    sizeof (abspath)) {
9960Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath,
9970Sstevel@tonic-gate 		    relpath);
9980Sstevel@tonic-gate 		return (B_FALSE);
9990Sstevel@tonic-gate 	}
10000Sstevel@tonic-gate 
10010Sstevel@tonic-gate 	slashp = &abspath[strlen(rootpath)];
10020Sstevel@tonic-gate 	assert(*slashp == '/');
10030Sstevel@tonic-gate 	do {
10040Sstevel@tonic-gate 		*slashp = '\0';
10050Sstevel@tonic-gate 		if (check_path(zlogp, abspath) != 0)
10060Sstevel@tonic-gate 			return (B_FALSE);
10070Sstevel@tonic-gate 		*slashp = '/';
10080Sstevel@tonic-gate 		slashp++;
10090Sstevel@tonic-gate 	} while ((slashp = strchr(slashp, '/')) != NULL);
10100Sstevel@tonic-gate 	return (B_TRUE);
10110Sstevel@tonic-gate }
10120Sstevel@tonic-gate 
10130Sstevel@tonic-gate static int
10140Sstevel@tonic-gate mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
10150Sstevel@tonic-gate {
10160Sstevel@tonic-gate 	char    path[MAXPATHLEN];
1017*766Scarlsonj 	char	specpath[MAXPATHLEN];
10180Sstevel@tonic-gate 	char    optstr[MAX_MNTOPT_STR];
10190Sstevel@tonic-gate 	zone_fsopt_t *optptr;
10200Sstevel@tonic-gate 
10210Sstevel@tonic-gate 	if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) {
10220Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
10230Sstevel@tonic-gate 		    rootpath, fsptr->zone_fs_dir);
10240Sstevel@tonic-gate 		return (-1);
10250Sstevel@tonic-gate 	}
10260Sstevel@tonic-gate 
10270Sstevel@tonic-gate 	if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir,
10280Sstevel@tonic-gate 	    DEFAULT_DIR_MODE) != 0)
10290Sstevel@tonic-gate 		return (-1);
10300Sstevel@tonic-gate 
10310Sstevel@tonic-gate 	(void) snprintf(path, sizeof (path), "%s%s", rootpath,
10320Sstevel@tonic-gate 	    fsptr->zone_fs_dir);
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate 	if (strlen(fsptr->zone_fs_special) == 0) {
10350Sstevel@tonic-gate 		/*
10360Sstevel@tonic-gate 		 * A zero-length special is how we distinguish IPDs from
1037*766Scarlsonj 		 * general-purpose FSs.  Make sure it mounts from a place that
1038*766Scarlsonj 		 * can be seen via the alternate zone's root.
10390Sstevel@tonic-gate 		 */
1040*766Scarlsonj 		if (snprintf(specpath, sizeof (specpath), "%s%s",
1041*766Scarlsonj 		    zonecfg_get_root(), fsptr->zone_fs_dir) >=
1042*766Scarlsonj 		    sizeof (specpath)) {
1043*766Scarlsonj 			zerror(zlogp, B_FALSE, "cannot mount %s: path too "
1044*766Scarlsonj 			    "long in alternate root", fsptr->zone_fs_dir);
1045*766Scarlsonj 			return (-1);
1046*766Scarlsonj 		}
1047*766Scarlsonj 		if (zonecfg_in_alt_root())
1048*766Scarlsonj 			resolve_lofs(zlogp, specpath, sizeof (specpath));
10490Sstevel@tonic-gate 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS,
1050*766Scarlsonj 		    specpath, path) != 0) {
10510Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "failed to loopback mount %s",
1052*766Scarlsonj 			    specpath);
10530Sstevel@tonic-gate 			return (-1);
10540Sstevel@tonic-gate 		}
10550Sstevel@tonic-gate 		return (0);
10560Sstevel@tonic-gate 	}
10570Sstevel@tonic-gate 
10580Sstevel@tonic-gate 	/*
10590Sstevel@tonic-gate 	 * In general the strategy here is to do just as much verification as
10600Sstevel@tonic-gate 	 * necessary to avoid crashing or otherwise doing something bad; if the
10610Sstevel@tonic-gate 	 * administrator initiated the operation via zoneadm(1m), he'll get
10620Sstevel@tonic-gate 	 * auto-verification which will let him know what's wrong.  If he
10630Sstevel@tonic-gate 	 * modifies the zone configuration of a running zone and doesn't attempt
10640Sstevel@tonic-gate 	 * to verify that it's OK we won't crash but won't bother trying to be
10650Sstevel@tonic-gate 	 * too helpful either.  zoneadm verify is only a couple keystrokes away.
10660Sstevel@tonic-gate 	 */
10670Sstevel@tonic-gate 	if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) {
10680Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "cannot mount %s on %s: "
10690Sstevel@tonic-gate 		    "invalid file-system type %s", fsptr->zone_fs_special,
10700Sstevel@tonic-gate 		    fsptr->zone_fs_dir, fsptr->zone_fs_type);
10710Sstevel@tonic-gate 		return (-1);
10720Sstevel@tonic-gate 	}
10730Sstevel@tonic-gate 
10740Sstevel@tonic-gate 	/*
1075*766Scarlsonj 	 * If we're looking at an alternate root environment, then construct
1076*766Scarlsonj 	 * read-only loopback mounts as necessary.  For all lofs mounts, make
1077*766Scarlsonj 	 * sure that the 'special' entry points inside the alternate root.  (We
1078*766Scarlsonj 	 * don't do this with other mounts, as devfs isn't in the alternate
1079*766Scarlsonj 	 * root, and we need to assume the device environment is roughly the
1080*766Scarlsonj 	 * same.)
1081*766Scarlsonj 	 */
1082*766Scarlsonj 	if (zonecfg_in_alt_root()) {
1083*766Scarlsonj 		struct stat64 st;
1084*766Scarlsonj 
1085*766Scarlsonj 		if (stat64(fsptr->zone_fs_special, &st) != -1 &&
1086*766Scarlsonj 		    S_ISBLK(st.st_mode) &&
1087*766Scarlsonj 		    check_lofs_needed(zlogp, fsptr) == -1)
1088*766Scarlsonj 			return (-1);
1089*766Scarlsonj 		if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) {
1090*766Scarlsonj 			if (snprintf(specpath, sizeof (specpath), "%s%s",
1091*766Scarlsonj 			    zonecfg_get_root(), fsptr->zone_fs_special) >=
1092*766Scarlsonj 			    sizeof (specpath)) {
1093*766Scarlsonj 				zerror(zlogp, B_FALSE, "cannot mount %s: path "
1094*766Scarlsonj 				    "too long in alternate root",
1095*766Scarlsonj 				    fsptr->zone_fs_special);
1096*766Scarlsonj 				return (-1);
1097*766Scarlsonj 			}
1098*766Scarlsonj 			resolve_lofs(zlogp, specpath, sizeof (specpath));
1099*766Scarlsonj 			(void) strlcpy(fsptr->zone_fs_special, specpath,
1100*766Scarlsonj 			    sizeof (fsptr->zone_fs_special));
1101*766Scarlsonj 		}
1102*766Scarlsonj 	}
1103*766Scarlsonj 
1104*766Scarlsonj 	/*
11050Sstevel@tonic-gate 	 * Run 'fsck -m' if there's a device to fsck.
11060Sstevel@tonic-gate 	 */
11070Sstevel@tonic-gate 	if (fsptr->zone_fs_raw[0] != '\0' &&
11080Sstevel@tonic-gate 	    dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0)
11090Sstevel@tonic-gate 		return (-1);
11100Sstevel@tonic-gate 
11110Sstevel@tonic-gate 	/*
11120Sstevel@tonic-gate 	 * Build up mount option string.
11130Sstevel@tonic-gate 	 */
11140Sstevel@tonic-gate 	optstr[0] = '\0';
11150Sstevel@tonic-gate 	if (fsptr->zone_fs_options != NULL) {
11160Sstevel@tonic-gate 		(void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt,
11170Sstevel@tonic-gate 		    sizeof (optstr));
11180Sstevel@tonic-gate 		for (optptr = fsptr->zone_fs_options->zone_fsopt_next;
11190Sstevel@tonic-gate 		    optptr != NULL; optptr = optptr->zone_fsopt_next) {
11200Sstevel@tonic-gate 			(void) strlcat(optstr, ",", sizeof (optstr));
11210Sstevel@tonic-gate 			(void) strlcat(optstr, optptr->zone_fsopt_opt,
11220Sstevel@tonic-gate 			    sizeof (optstr));
11230Sstevel@tonic-gate 		}
11240Sstevel@tonic-gate 	}
11250Sstevel@tonic-gate 	return (domount(zlogp, fsptr->zone_fs_type, optstr,
11260Sstevel@tonic-gate 	    fsptr->zone_fs_special, path));
11270Sstevel@tonic-gate }
11280Sstevel@tonic-gate 
11290Sstevel@tonic-gate static void
11300Sstevel@tonic-gate free_fs_data(struct zone_fstab *fsarray, uint_t nelem)
11310Sstevel@tonic-gate {
11320Sstevel@tonic-gate 	uint_t i;
11330Sstevel@tonic-gate 
11340Sstevel@tonic-gate 	if (fsarray == NULL)
11350Sstevel@tonic-gate 		return;
11360Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
11370Sstevel@tonic-gate 		zonecfg_free_fs_option_list(fsarray[i].zone_fs_options);
11380Sstevel@tonic-gate 	free(fsarray);
11390Sstevel@tonic-gate }
11400Sstevel@tonic-gate 
1141*766Scarlsonj /*
1142*766Scarlsonj  * This function constructs the miniroot-like "scratch zone" environment.  If
1143*766Scarlsonj  * it returns B_FALSE, then the error has already been logged.
1144*766Scarlsonj  */
1145*766Scarlsonj static boolean_t
1146*766Scarlsonj build_mounted(zlog_t *zlogp, char *rootpath, size_t rootlen,
1147*766Scarlsonj     const char *zonepath)
1148*766Scarlsonj {
1149*766Scarlsonj 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
1150*766Scarlsonj 	char luroot[MAXPATHLEN];
1151*766Scarlsonj 	const char **cpp;
1152*766Scarlsonj 	static const char *mkdirs[] = {
1153*766Scarlsonj 		"/system", "/system/contract", "/proc", "/dev", "/tmp",
1154*766Scarlsonj 		"/a", NULL
1155*766Scarlsonj 	};
1156*766Scarlsonj 	static const char *localdirs[] = {
1157*766Scarlsonj 		"/etc", "/var", NULL
1158*766Scarlsonj 	};
1159*766Scarlsonj 	static const char *loopdirs[] = {
1160*766Scarlsonj 		"/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform",
1161*766Scarlsonj 		"/usr", NULL
1162*766Scarlsonj 	};
1163*766Scarlsonj 	static const char *tmpdirs[] = {
1164*766Scarlsonj 		"/tmp", "/var/run", NULL
1165*766Scarlsonj 	};
1166*766Scarlsonj 	FILE *fp;
1167*766Scarlsonj 	struct stat st;
1168*766Scarlsonj 	char *altstr;
1169*766Scarlsonj 	uuid_t uuid;
1170*766Scarlsonj 
1171*766Scarlsonj 	/*
1172*766Scarlsonj 	 * Construct a small Solaris environment, including the zone root
1173*766Scarlsonj 	 * mounted on '/a' inside that environment.
1174*766Scarlsonj 	 */
1175*766Scarlsonj 	resolve_lofs(zlogp, rootpath, rootlen);
1176*766Scarlsonj 	(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
1177*766Scarlsonj 	resolve_lofs(zlogp, luroot, sizeof (luroot));
1178*766Scarlsonj 	(void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot);
1179*766Scarlsonj 	(void) symlink("./usr/bin", tmp);
1180*766Scarlsonj 
1181*766Scarlsonj 	/*
1182*766Scarlsonj 	 * These are mostly special mount points; not handled here.  (See
1183*766Scarlsonj 	 * zone_mount_early.)
1184*766Scarlsonj 	 */
1185*766Scarlsonj 	for (cpp = mkdirs; *cpp != NULL; cpp++) {
1186*766Scarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1187*766Scarlsonj 		if (mkdir(tmp, 0755) != 0) {
1188*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1189*766Scarlsonj 			return (B_FALSE);
1190*766Scarlsonj 		}
1191*766Scarlsonj 	}
1192*766Scarlsonj 
1193*766Scarlsonj 	/*
1194*766Scarlsonj 	 * These are mounted read-write from the zone undergoing upgrade.  We
1195*766Scarlsonj 	 * must be careful not to 'leak' things from the main system into the
1196*766Scarlsonj 	 * zone, and this accomplishes that goal.
1197*766Scarlsonj 	 */
1198*766Scarlsonj 	for (cpp = localdirs; *cpp != NULL; cpp++) {
1199*766Scarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1200*766Scarlsonj 		(void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath,
1201*766Scarlsonj 		    *cpp);
1202*766Scarlsonj 		if (mkdir(tmp, 0755) != 0) {
1203*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1204*766Scarlsonj 			return (B_FALSE);
1205*766Scarlsonj 		}
1206*766Scarlsonj 		if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) {
1207*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1208*766Scarlsonj 			    *cpp);
1209*766Scarlsonj 			return (B_FALSE);
1210*766Scarlsonj 		}
1211*766Scarlsonj 	}
1212*766Scarlsonj 
1213*766Scarlsonj 	/*
1214*766Scarlsonj 	 * These are things mounted read-only from the running system because
1215*766Scarlsonj 	 * they contain binaries that must match system.
1216*766Scarlsonj 	 */
1217*766Scarlsonj 	for (cpp = loopdirs; *cpp != NULL; cpp++) {
1218*766Scarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1219*766Scarlsonj 		if (mkdir(tmp, 0755) != 0) {
1220*766Scarlsonj 			if (errno != EEXIST) {
1221*766Scarlsonj 				zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1222*766Scarlsonj 				return (B_FALSE);
1223*766Scarlsonj 			}
1224*766Scarlsonj 			if (lstat(tmp, &st) != 0) {
1225*766Scarlsonj 				zerror(zlogp, B_TRUE, "cannot stat %s", tmp);
1226*766Scarlsonj 				return (B_FALSE);
1227*766Scarlsonj 			}
1228*766Scarlsonj 			/*
1229*766Scarlsonj 			 * Ignore any non-directories encountered.  These are
1230*766Scarlsonj 			 * things that have been converted into symlinks
1231*766Scarlsonj 			 * (/etc/fs and /etc/lib) and no longer need a lofs
1232*766Scarlsonj 			 * fixup.
1233*766Scarlsonj 			 */
1234*766Scarlsonj 			if (!S_ISDIR(st.st_mode))
1235*766Scarlsonj 				continue;
1236*766Scarlsonj 		}
1237*766Scarlsonj 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp,
1238*766Scarlsonj 		    tmp) != 0) {
1239*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1240*766Scarlsonj 			    *cpp);
1241*766Scarlsonj 			return (B_FALSE);
1242*766Scarlsonj 		}
1243*766Scarlsonj 	}
1244*766Scarlsonj 
1245*766Scarlsonj 	/*
1246*766Scarlsonj 	 * These are things with tmpfs mounted inside.
1247*766Scarlsonj 	 */
1248*766Scarlsonj 	for (cpp = tmpdirs; *cpp != NULL; cpp++) {
1249*766Scarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1250*766Scarlsonj 		if (mkdir(tmp, 0755) != 0 && errno != EEXIST) {
1251*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1252*766Scarlsonj 			return (B_FALSE);
1253*766Scarlsonj 		}
1254*766Scarlsonj 		if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) {
1255*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp);
1256*766Scarlsonj 			return (B_FALSE);
1257*766Scarlsonj 		}
1258*766Scarlsonj 	}
1259*766Scarlsonj 
1260*766Scarlsonj 	/*
1261*766Scarlsonj 	 * This is here to support lucopy.  If there's an instance of this same
1262*766Scarlsonj 	 * zone on the current running system, then we mount its root up as
1263*766Scarlsonj 	 * read-only inside the scratch zone.
1264*766Scarlsonj 	 */
1265*766Scarlsonj 	(void) zonecfg_get_uuid(zone_name, uuid);
1266*766Scarlsonj 	altstr = strdup(zonecfg_get_root());
1267*766Scarlsonj 	if (altstr == NULL) {
1268*766Scarlsonj 		zerror(zlogp, B_TRUE, "out of memory");
1269*766Scarlsonj 		return (B_FALSE);
1270*766Scarlsonj 	}
1271*766Scarlsonj 	zonecfg_set_root("");
1272*766Scarlsonj 	(void) strlcpy(tmp, zone_name, sizeof (tmp));
1273*766Scarlsonj 	(void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp));
1274*766Scarlsonj 	if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK &&
1275*766Scarlsonj 	    strcmp(fromdir, rootpath) != 0) {
1276*766Scarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s/b", luroot);
1277*766Scarlsonj 		if (mkdir(tmp, 0755) != 0) {
1278*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1279*766Scarlsonj 			return (B_FALSE);
1280*766Scarlsonj 		}
1281*766Scarlsonj 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir,
1282*766Scarlsonj 		    tmp) != 0) {
1283*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1284*766Scarlsonj 			    fromdir);
1285*766Scarlsonj 			return (B_FALSE);
1286*766Scarlsonj 		}
1287*766Scarlsonj 	}
1288*766Scarlsonj 	zonecfg_set_root(altstr);
1289*766Scarlsonj 	free(altstr);
1290*766Scarlsonj 
1291*766Scarlsonj 	if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) {
1292*766Scarlsonj 		zerror(zlogp, B_TRUE, "cannot open zone mapfile");
1293*766Scarlsonj 		return (B_FALSE);
1294*766Scarlsonj 	}
1295*766Scarlsonj 	(void) ftruncate(fileno(fp), 0);
1296*766Scarlsonj 	if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) {
1297*766Scarlsonj 		zerror(zlogp, B_TRUE, "cannot add zone mapfile entry");
1298*766Scarlsonj 	}
1299*766Scarlsonj 	zonecfg_close_scratch(fp);
1300*766Scarlsonj 	(void) snprintf(tmp, sizeof (tmp), "%s/a", luroot);
1301*766Scarlsonj 	if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0)
1302*766Scarlsonj 		return (B_FALSE);
1303*766Scarlsonj 	(void) strlcpy(rootpath, tmp, rootlen);
1304*766Scarlsonj 	return (B_TRUE);
1305*766Scarlsonj }
1306*766Scarlsonj 
13070Sstevel@tonic-gate static int
1308*766Scarlsonj mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd)
13090Sstevel@tonic-gate {
13100Sstevel@tonic-gate 	char	rootpath[MAXPATHLEN];
13110Sstevel@tonic-gate 	char	zonepath[MAXPATHLEN];
13120Sstevel@tonic-gate 	int	num_fs = 0, i;
13130Sstevel@tonic-gate 	struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr;
13140Sstevel@tonic-gate 	struct zone_fstab *fsp;
13150Sstevel@tonic-gate 	zone_dochandle_t handle = NULL;
13160Sstevel@tonic-gate 	zone_state_t zstate;
13170Sstevel@tonic-gate 
13180Sstevel@tonic-gate 	if (zone_get_state(zone_name, &zstate) != Z_OK ||
1319*766Scarlsonj 	    (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
13200Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
1321*766Scarlsonj 		    "zone must be in '%s' or '%s' state to mount file-systems",
1322*766Scarlsonj 		    zone_state_str(ZONE_STATE_READY),
1323*766Scarlsonj 		    zone_state_str(ZONE_STATE_MOUNTED));
13240Sstevel@tonic-gate 		goto bad;
13250Sstevel@tonic-gate 	}
13260Sstevel@tonic-gate 
13270Sstevel@tonic-gate 	if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
13280Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone path");
13290Sstevel@tonic-gate 		goto bad;
13300Sstevel@tonic-gate 	}
13310Sstevel@tonic-gate 
13320Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
13330Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone root");
13340Sstevel@tonic-gate 		goto bad;
13350Sstevel@tonic-gate 	}
13360Sstevel@tonic-gate 
13370Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
13380Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
13390Sstevel@tonic-gate 		    "could not get zone configuration handle");
13400Sstevel@tonic-gate 		goto bad;
13410Sstevel@tonic-gate 	}
13420Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
13430Sstevel@tonic-gate 	    zonecfg_setfsent(handle) != Z_OK) {
13440Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
13450Sstevel@tonic-gate 		goto bad;
13460Sstevel@tonic-gate 	}
13470Sstevel@tonic-gate 
13480Sstevel@tonic-gate 	/*
13490Sstevel@tonic-gate 	 * /dev in the zone is loopback'd from the external /dev repository,
13500Sstevel@tonic-gate 	 * in order to provide a largely read-only semantic.  But because
13510Sstevel@tonic-gate 	 * processes in the zone need to be able to chown, chmod, etc. zone
13520Sstevel@tonic-gate 	 * /dev files, we can't use a 'ro' lofs mount.  Instead we use a
13530Sstevel@tonic-gate 	 * special mode just for zones, "zonedevfs".
13540Sstevel@tonic-gate 	 *
13550Sstevel@tonic-gate 	 * In the future we should front /dev with a full-fledged filesystem.
13560Sstevel@tonic-gate 	 */
13570Sstevel@tonic-gate 	num_fs++;
13580Sstevel@tonic-gate 	if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) {
13590Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
13600Sstevel@tonic-gate 		num_fs--;
13610Sstevel@tonic-gate 		goto bad;
13620Sstevel@tonic-gate 	}
13630Sstevel@tonic-gate 	fs_ptr = tmp_ptr;
13640Sstevel@tonic-gate 	fsp = &fs_ptr[num_fs - 1];
1365*766Scarlsonj 	/*
1366*766Scarlsonj 	 * Note that mount_one will prepend the alternate root to
1367*766Scarlsonj 	 * zone_fs_special and do the necessary resolution, so all that is
1368*766Scarlsonj 	 * needed here is to strip the root added by zone_get_zonepath.
1369*766Scarlsonj 	 */
13700Sstevel@tonic-gate 	(void) strlcpy(fsp->zone_fs_dir, "/dev", sizeof (fsp->zone_fs_dir));
13710Sstevel@tonic-gate 	(void) snprintf(fsp->zone_fs_special, sizeof (fsp->zone_fs_special),
1372*766Scarlsonj 	    "%s/dev", zonepath + strlen(zonecfg_get_root()));
13730Sstevel@tonic-gate 	fsp->zone_fs_raw[0] = '\0';
13740Sstevel@tonic-gate 	(void) strlcpy(fsp->zone_fs_type, MNTTYPE_LOFS,
13750Sstevel@tonic-gate 	    sizeof (fsp->zone_fs_type));
13760Sstevel@tonic-gate 	fsp->zone_fs_options = NULL;
13770Sstevel@tonic-gate 	if (zonecfg_add_fs_option(fsp, MNTOPT_LOFS_ZONEDEVFS) != Z_OK) {
13780Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "error adding property");
13790Sstevel@tonic-gate 		goto bad;
13800Sstevel@tonic-gate 	}
13810Sstevel@tonic-gate 
13820Sstevel@tonic-gate 	/*
13830Sstevel@tonic-gate 	 * Iterate through the rest of the filesystems, first the IPDs, then
13840Sstevel@tonic-gate 	 * the general FSs.  Sort them all, then mount them in sorted order.
13850Sstevel@tonic-gate 	 * This is to make sure the higher level directories (e.g., /usr)
13860Sstevel@tonic-gate 	 * get mounted before any beneath them (e.g., /usr/local).
13870Sstevel@tonic-gate 	 */
13880Sstevel@tonic-gate 	if (zonecfg_setipdent(handle) != Z_OK) {
13890Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
13900Sstevel@tonic-gate 		goto bad;
13910Sstevel@tonic-gate 	}
13920Sstevel@tonic-gate 	while (zonecfg_getipdent(handle, &fstab) == Z_OK) {
13930Sstevel@tonic-gate 		num_fs++;
13940Sstevel@tonic-gate 		if ((tmp_ptr = realloc(fs_ptr,
13950Sstevel@tonic-gate 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
13960Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
13970Sstevel@tonic-gate 			num_fs--;
13980Sstevel@tonic-gate 			(void) zonecfg_endipdent(handle);
13990Sstevel@tonic-gate 			goto bad;
14000Sstevel@tonic-gate 		}
14010Sstevel@tonic-gate 		fs_ptr = tmp_ptr;
14020Sstevel@tonic-gate 		fsp = &fs_ptr[num_fs - 1];
14030Sstevel@tonic-gate 		/*
14040Sstevel@tonic-gate 		 * IPDs logically only have a mount point; all other properties
14050Sstevel@tonic-gate 		 * are implied.
14060Sstevel@tonic-gate 		 */
14070Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_dir,
14080Sstevel@tonic-gate 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
14090Sstevel@tonic-gate 		fsp->zone_fs_special[0] = '\0';
14100Sstevel@tonic-gate 		fsp->zone_fs_raw[0] = '\0';
14110Sstevel@tonic-gate 		fsp->zone_fs_type[0] = '\0';
14120Sstevel@tonic-gate 		fsp->zone_fs_options = NULL;
14130Sstevel@tonic-gate 	}
14140Sstevel@tonic-gate 	(void) zonecfg_endipdent(handle);
14150Sstevel@tonic-gate 
14160Sstevel@tonic-gate 	if (zonecfg_setfsent(handle) != Z_OK) {
14170Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
14180Sstevel@tonic-gate 		goto bad;
14190Sstevel@tonic-gate 	}
14200Sstevel@tonic-gate 	while (zonecfg_getfsent(handle, &fstab) == Z_OK) {
14210Sstevel@tonic-gate 		num_fs++;
14220Sstevel@tonic-gate 		if ((tmp_ptr = realloc(fs_ptr,
14230Sstevel@tonic-gate 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
14240Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
14250Sstevel@tonic-gate 			num_fs--;
14260Sstevel@tonic-gate 			(void) zonecfg_endfsent(handle);
14270Sstevel@tonic-gate 			goto bad;
14280Sstevel@tonic-gate 		}
14290Sstevel@tonic-gate 		fs_ptr = tmp_ptr;
14300Sstevel@tonic-gate 		fsp = &fs_ptr[num_fs - 1];
14310Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_dir,
14320Sstevel@tonic-gate 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
14330Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special,
14340Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_special));
14350Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw,
14360Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_raw));
14370Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type,
14380Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_type));
14390Sstevel@tonic-gate 		fsp->zone_fs_options = fstab.zone_fs_options;
14400Sstevel@tonic-gate 	}
14410Sstevel@tonic-gate 	(void) zonecfg_endfsent(handle);
14420Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
14430Sstevel@tonic-gate 	handle = NULL;
14440Sstevel@tonic-gate 
1445*766Scarlsonj 	/*
1446*766Scarlsonj 	 * If we're mounting a zone for administration, then we need to set up
1447*766Scarlsonj 	 * the "/a" environment inside the zone so that the commands that run
1448*766Scarlsonj 	 * in there have access to both the running system's utilities and the
1449*766Scarlsonj 	 * to-be-modified zone's files.
1450*766Scarlsonj 	 */
1451*766Scarlsonj 	if (mount_cmd &&
1452*766Scarlsonj 	    !build_mounted(zlogp, rootpath, sizeof (rootpath), zonepath))
1453*766Scarlsonj 		goto bad;
1454*766Scarlsonj 
14550Sstevel@tonic-gate 	qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
14560Sstevel@tonic-gate 	for (i = 0; i < num_fs; i++) {
1457*766Scarlsonj 		if (mount_cmd && strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
1458*766Scarlsonj 			size_t slen = strlen(rootpath) - 2;
1459*766Scarlsonj 
1460*766Scarlsonj 			/* /dev is special and always goes at the top */
1461*766Scarlsonj 			rootpath[slen] = '\0';
1462*766Scarlsonj 			if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
1463*766Scarlsonj 				goto bad;
1464*766Scarlsonj 			rootpath[slen] = '/';
1465*766Scarlsonj 			continue;
1466*766Scarlsonj 		}
14670Sstevel@tonic-gate 		if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
14680Sstevel@tonic-gate 			goto bad;
14690Sstevel@tonic-gate 	}
14700Sstevel@tonic-gate 	free_fs_data(fs_ptr, num_fs);
14710Sstevel@tonic-gate 
14720Sstevel@tonic-gate 	/*
14730Sstevel@tonic-gate 	 * Everything looks fine.
14740Sstevel@tonic-gate 	 */
14750Sstevel@tonic-gate 	return (0);
14760Sstevel@tonic-gate 
14770Sstevel@tonic-gate bad:
14780Sstevel@tonic-gate 	if (handle != NULL)
14790Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
14800Sstevel@tonic-gate 	free_fs_data(fs_ptr, num_fs);
14810Sstevel@tonic-gate 	return (-1);
14820Sstevel@tonic-gate }
14830Sstevel@tonic-gate 
14840Sstevel@tonic-gate /* caller makes sure neither parameter is NULL */
14850Sstevel@tonic-gate static int
14860Sstevel@tonic-gate addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr)
14870Sstevel@tonic-gate {
14880Sstevel@tonic-gate 	int prefixlen;
14890Sstevel@tonic-gate 
14900Sstevel@tonic-gate 	prefixlen = atoi(prefixstr);
14910Sstevel@tonic-gate 	if (prefixlen < 0 || prefixlen > maxprefixlen)
14920Sstevel@tonic-gate 		return (1);
14930Sstevel@tonic-gate 	while (prefixlen > 0) {
14940Sstevel@tonic-gate 		if (prefixlen >= 8) {
14950Sstevel@tonic-gate 			*maskstr++ = 0xFF;
14960Sstevel@tonic-gate 			prefixlen -= 8;
14970Sstevel@tonic-gate 			continue;
14980Sstevel@tonic-gate 		}
14990Sstevel@tonic-gate 		*maskstr |= 1 << (8 - prefixlen);
15000Sstevel@tonic-gate 		prefixlen--;
15010Sstevel@tonic-gate 	}
15020Sstevel@tonic-gate 	return (0);
15030Sstevel@tonic-gate }
15040Sstevel@tonic-gate 
15050Sstevel@tonic-gate /*
15060Sstevel@tonic-gate  * Tear down all interfaces belonging to the given zone.  This should
15070Sstevel@tonic-gate  * be called with the zone in a state other than "running", so that
15080Sstevel@tonic-gate  * interfaces can't be assigned to the zone after this returns.
15090Sstevel@tonic-gate  *
15100Sstevel@tonic-gate  * If anything goes wrong, log an error message and return an error.
15110Sstevel@tonic-gate  */
15120Sstevel@tonic-gate static int
15130Sstevel@tonic-gate unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id)
15140Sstevel@tonic-gate {
15150Sstevel@tonic-gate 	struct lifnum lifn;
15160Sstevel@tonic-gate 	struct lifconf lifc;
15170Sstevel@tonic-gate 	struct lifreq *lifrp, lifrl;
15180Sstevel@tonic-gate 	int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES;
15190Sstevel@tonic-gate 	int num_ifs, s, i, ret_code = 0;
15200Sstevel@tonic-gate 	uint_t bufsize;
15210Sstevel@tonic-gate 	char *buf = NULL;
15220Sstevel@tonic-gate 
15230Sstevel@tonic-gate 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
15240Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get socket");
15250Sstevel@tonic-gate 		ret_code = -1;
15260Sstevel@tonic-gate 		goto bad;
15270Sstevel@tonic-gate 	}
15280Sstevel@tonic-gate 	lifn.lifn_family = AF_UNSPEC;
15290Sstevel@tonic-gate 	lifn.lifn_flags = (int)lifc_flags;
15300Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) {
15310Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
15320Sstevel@tonic-gate 		    "could not determine number of interfaces");
15330Sstevel@tonic-gate 		ret_code = -1;
15340Sstevel@tonic-gate 		goto bad;
15350Sstevel@tonic-gate 	}
15360Sstevel@tonic-gate 	num_ifs = lifn.lifn_count;
15370Sstevel@tonic-gate 	bufsize = num_ifs * sizeof (struct lifreq);
15380Sstevel@tonic-gate 	if ((buf = malloc(bufsize)) == NULL) {
15390Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
15400Sstevel@tonic-gate 		ret_code = -1;
15410Sstevel@tonic-gate 		goto bad;
15420Sstevel@tonic-gate 	}
15430Sstevel@tonic-gate 	lifc.lifc_family = AF_UNSPEC;
15440Sstevel@tonic-gate 	lifc.lifc_flags = (int)lifc_flags;
15450Sstevel@tonic-gate 	lifc.lifc_len = bufsize;
15460Sstevel@tonic-gate 	lifc.lifc_buf = buf;
15470Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) {
15480Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get configured interfaces");
15490Sstevel@tonic-gate 		ret_code = -1;
15500Sstevel@tonic-gate 		goto bad;
15510Sstevel@tonic-gate 	}
15520Sstevel@tonic-gate 	lifrp = lifc.lifc_req;
15530Sstevel@tonic-gate 	for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) {
15540Sstevel@tonic-gate 		(void) close(s);
15550Sstevel@tonic-gate 		if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) <
15560Sstevel@tonic-gate 		    0) {
15570Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get socket",
15580Sstevel@tonic-gate 			    lifrl.lifr_name);
15590Sstevel@tonic-gate 			ret_code = -1;
15600Sstevel@tonic-gate 			continue;
15610Sstevel@tonic-gate 		}
15620Sstevel@tonic-gate 		(void) memset(&lifrl, 0, sizeof (lifrl));
15630Sstevel@tonic-gate 		(void) strncpy(lifrl.lifr_name, lifrp->lifr_name,
15640Sstevel@tonic-gate 		    sizeof (lifrl.lifr_name));
15650Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) {
15660Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
15670Sstevel@tonic-gate 			    "%s: could not determine zone interface belongs to",
15680Sstevel@tonic-gate 			    lifrl.lifr_name);
15690Sstevel@tonic-gate 			ret_code = -1;
15700Sstevel@tonic-gate 			continue;
15710Sstevel@tonic-gate 		}
15720Sstevel@tonic-gate 		if (lifrl.lifr_zoneid == zone_id) {
15730Sstevel@tonic-gate 			if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) {
15740Sstevel@tonic-gate 				zerror(zlogp, B_TRUE,
15750Sstevel@tonic-gate 				    "%s: could not remove interface",
15760Sstevel@tonic-gate 				    lifrl.lifr_name);
15770Sstevel@tonic-gate 				ret_code = -1;
15780Sstevel@tonic-gate 				continue;
15790Sstevel@tonic-gate 			}
15800Sstevel@tonic-gate 		}
15810Sstevel@tonic-gate 	}
15820Sstevel@tonic-gate bad:
15830Sstevel@tonic-gate 	if (s > 0)
15840Sstevel@tonic-gate 		(void) close(s);
15850Sstevel@tonic-gate 	if (buf)
15860Sstevel@tonic-gate 		free(buf);
15870Sstevel@tonic-gate 	return (ret_code);
15880Sstevel@tonic-gate }
15890Sstevel@tonic-gate 
15900Sstevel@tonic-gate static union	sockunion {
15910Sstevel@tonic-gate 	struct	sockaddr sa;
15920Sstevel@tonic-gate 	struct	sockaddr_in sin;
15930Sstevel@tonic-gate 	struct	sockaddr_dl sdl;
15940Sstevel@tonic-gate 	struct	sockaddr_in6 sin6;
15950Sstevel@tonic-gate } so_dst, so_ifp;
15960Sstevel@tonic-gate 
15970Sstevel@tonic-gate static struct {
15980Sstevel@tonic-gate 	struct	rt_msghdr hdr;
15990Sstevel@tonic-gate 	char	space[512];
16000Sstevel@tonic-gate } rtmsg;
16010Sstevel@tonic-gate 
16020Sstevel@tonic-gate static int
16030Sstevel@tonic-gate salen(struct sockaddr *sa)
16040Sstevel@tonic-gate {
16050Sstevel@tonic-gate 	switch (sa->sa_family) {
16060Sstevel@tonic-gate 	case AF_INET:
16070Sstevel@tonic-gate 		return (sizeof (struct sockaddr_in));
16080Sstevel@tonic-gate 	case AF_LINK:
16090Sstevel@tonic-gate 		return (sizeof (struct sockaddr_dl));
16100Sstevel@tonic-gate 	case AF_INET6:
16110Sstevel@tonic-gate 		return (sizeof (struct sockaddr_in6));
16120Sstevel@tonic-gate 	default:
16130Sstevel@tonic-gate 		return (sizeof (struct sockaddr));
16140Sstevel@tonic-gate 	}
16150Sstevel@tonic-gate }
16160Sstevel@tonic-gate 
16170Sstevel@tonic-gate #define	ROUNDUP_LONG(a) \
16180Sstevel@tonic-gate 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
16190Sstevel@tonic-gate 
16200Sstevel@tonic-gate /*
16210Sstevel@tonic-gate  * Look up which zone is using a given IP address.  The address in question
16220Sstevel@tonic-gate  * is expected to have been stuffed into the structure to which lifr points
16230Sstevel@tonic-gate  * via a previous SIOCGLIFADDR ioctl().
16240Sstevel@tonic-gate  *
16250Sstevel@tonic-gate  * This is done using black router socket magic.
16260Sstevel@tonic-gate  *
16270Sstevel@tonic-gate  * Return the name of the zone on success or NULL on failure.
16280Sstevel@tonic-gate  *
16290Sstevel@tonic-gate  * This is a lot of code for a simple task; a new ioctl request to take care
16300Sstevel@tonic-gate  * of this might be a useful RFE.
16310Sstevel@tonic-gate  */
16320Sstevel@tonic-gate 
16330Sstevel@tonic-gate static char *
16340Sstevel@tonic-gate who_is_using(zlog_t *zlogp, struct lifreq *lifr)
16350Sstevel@tonic-gate {
16360Sstevel@tonic-gate 	static char answer[ZONENAME_MAX];
16370Sstevel@tonic-gate 	pid_t pid;
16380Sstevel@tonic-gate 	int s, rlen, l, i;
16390Sstevel@tonic-gate 	char *cp = rtmsg.space;
16400Sstevel@tonic-gate 	struct sockaddr_dl *ifp = NULL;
16410Sstevel@tonic-gate 	struct sockaddr *sa;
16420Sstevel@tonic-gate 	char save_if_name[LIFNAMSIZ];
16430Sstevel@tonic-gate 
16440Sstevel@tonic-gate 	answer[0] = '\0';
16450Sstevel@tonic-gate 
16460Sstevel@tonic-gate 	pid = getpid();
16470Sstevel@tonic-gate 	if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) {
16480Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get routing socket");
16490Sstevel@tonic-gate 		return (NULL);
16500Sstevel@tonic-gate 	}
16510Sstevel@tonic-gate 
16520Sstevel@tonic-gate 	if (lifr->lifr_addr.ss_family == AF_INET) {
16530Sstevel@tonic-gate 		struct sockaddr_in *sin4;
16540Sstevel@tonic-gate 
16550Sstevel@tonic-gate 		so_dst.sa.sa_family = AF_INET;
16560Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)&lifr->lifr_addr;
16570Sstevel@tonic-gate 		so_dst.sin.sin_addr = sin4->sin_addr;
16580Sstevel@tonic-gate 	} else {
16590Sstevel@tonic-gate 		struct sockaddr_in6 *sin6;
16600Sstevel@tonic-gate 
16610Sstevel@tonic-gate 		so_dst.sa.sa_family = AF_INET6;
16620Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
16630Sstevel@tonic-gate 		so_dst.sin6.sin6_addr = sin6->sin6_addr;
16640Sstevel@tonic-gate 	}
16650Sstevel@tonic-gate 
16660Sstevel@tonic-gate 	so_ifp.sa.sa_family = AF_LINK;
16670Sstevel@tonic-gate 
16680Sstevel@tonic-gate 	(void) memset(&rtmsg, 0, sizeof (rtmsg));
16690Sstevel@tonic-gate 	rtmsg.hdr.rtm_type = RTM_GET;
16700Sstevel@tonic-gate 	rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST;
16710Sstevel@tonic-gate 	rtmsg.hdr.rtm_version = RTM_VERSION;
16720Sstevel@tonic-gate 	rtmsg.hdr.rtm_seq = ++rts_seqno;
16730Sstevel@tonic-gate 	rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST;
16740Sstevel@tonic-gate 
16750Sstevel@tonic-gate 	l = ROUNDUP_LONG(salen(&so_dst.sa));
16760Sstevel@tonic-gate 	(void) memmove(cp, &(so_dst), l);
16770Sstevel@tonic-gate 	cp += l;
16780Sstevel@tonic-gate 	l = ROUNDUP_LONG(salen(&so_ifp.sa));
16790Sstevel@tonic-gate 	(void) memmove(cp, &(so_ifp), l);
16800Sstevel@tonic-gate 	cp += l;
16810Sstevel@tonic-gate 
16820Sstevel@tonic-gate 	rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg;
16830Sstevel@tonic-gate 
16840Sstevel@tonic-gate 	if ((rlen = write(s, &rtmsg, l)) < 0) {
16850Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "writing to routing socket");
16860Sstevel@tonic-gate 		return (NULL);
16870Sstevel@tonic-gate 	} else if (rlen < (int)rtmsg.hdr.rtm_msglen) {
16880Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
16890Sstevel@tonic-gate 		    "write to routing socket got only %d for len\n", rlen);
16900Sstevel@tonic-gate 		return (NULL);
16910Sstevel@tonic-gate 	}
16920Sstevel@tonic-gate 	do {
16930Sstevel@tonic-gate 		l = read(s, &rtmsg, sizeof (rtmsg));
16940Sstevel@tonic-gate 	} while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno ||
16950Sstevel@tonic-gate 	    rtmsg.hdr.rtm_pid != pid));
16960Sstevel@tonic-gate 	if (l < 0) {
16970Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "reading from routing socket");
16980Sstevel@tonic-gate 		return (NULL);
16990Sstevel@tonic-gate 	}
17000Sstevel@tonic-gate 
17010Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_version != RTM_VERSION) {
17020Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
17030Sstevel@tonic-gate 		    "routing message version %d not understood",
17040Sstevel@tonic-gate 		    rtmsg.hdr.rtm_version);
17050Sstevel@tonic-gate 		return (NULL);
17060Sstevel@tonic-gate 	}
17070Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_msglen != (ushort_t)l) {
17080Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "message length mismatch, "
17090Sstevel@tonic-gate 		    "expected %d bytes, returned %d bytes",
17100Sstevel@tonic-gate 		    rtmsg.hdr.rtm_msglen, l);
17110Sstevel@tonic-gate 		return (NULL);
17120Sstevel@tonic-gate 	}
17130Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_errno != 0)  {
17140Sstevel@tonic-gate 		errno = rtmsg.hdr.rtm_errno;
17150Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "RTM_GET routing socket message");
17160Sstevel@tonic-gate 		return (NULL);
17170Sstevel@tonic-gate 	}
17180Sstevel@tonic-gate 	if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) {
17190Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "interface not found");
17200Sstevel@tonic-gate 		return (NULL);
17210Sstevel@tonic-gate 	}
17220Sstevel@tonic-gate 	cp = ((char *)(&rtmsg.hdr + 1));
17230Sstevel@tonic-gate 	for (i = 1; i != 0; i <<= 1) {
17240Sstevel@tonic-gate 		/* LINTED E_BAD_PTR_CAST_ALIGN */
17250Sstevel@tonic-gate 		sa = (struct sockaddr *)cp;
17260Sstevel@tonic-gate 		if (i != RTA_IFP) {
17270Sstevel@tonic-gate 			if ((i & rtmsg.hdr.rtm_addrs) != 0)
17280Sstevel@tonic-gate 				cp += ROUNDUP_LONG(salen(sa));
17290Sstevel@tonic-gate 			continue;
17300Sstevel@tonic-gate 		}
17310Sstevel@tonic-gate 		if (sa->sa_family == AF_LINK &&
17320Sstevel@tonic-gate 		    ((struct sockaddr_dl *)sa)->sdl_nlen != 0)
17330Sstevel@tonic-gate 			ifp = (struct sockaddr_dl *)sa;
17340Sstevel@tonic-gate 		break;
17350Sstevel@tonic-gate 	}
17360Sstevel@tonic-gate 	if (ifp == NULL) {
17370Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "interface could not be determined");
17380Sstevel@tonic-gate 		return (NULL);
17390Sstevel@tonic-gate 	}
17400Sstevel@tonic-gate 
17410Sstevel@tonic-gate 	/*
17420Sstevel@tonic-gate 	 * We need to set the I/F name to what we got above, then do the
17430Sstevel@tonic-gate 	 * appropriate ioctl to get its zone name.  But lifr->lifr_name is
17440Sstevel@tonic-gate 	 * used by the calling function to do a REMOVEIF, so if we leave the
17450Sstevel@tonic-gate 	 * "good" zone's I/F name in place, *that* I/F will be removed instead
17460Sstevel@tonic-gate 	 * of the bad one.  So we save the old (bad) I/F name before over-
17470Sstevel@tonic-gate 	 * writing it and doing the ioctl, then restore it after the ioctl.
17480Sstevel@tonic-gate 	 */
17490Sstevel@tonic-gate 	(void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name));
17500Sstevel@tonic-gate 	(void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen);
17510Sstevel@tonic-gate 	lifr->lifr_name[ifp->sdl_nlen] = '\0';
17520Sstevel@tonic-gate 	i = ioctl(s, SIOCGLIFZONE, lifr);
17530Sstevel@tonic-gate 	(void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name));
17540Sstevel@tonic-gate 	if (i < 0) {
17550Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
17560Sstevel@tonic-gate 		    "%s: could not determine the zone interface belongs to",
17570Sstevel@tonic-gate 		    lifr->lifr_name);
17580Sstevel@tonic-gate 		return (NULL);
17590Sstevel@tonic-gate 	}
17600Sstevel@tonic-gate 	if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0)
17610Sstevel@tonic-gate 		(void) snprintf(answer, sizeof (answer), "%d",
17620Sstevel@tonic-gate 		    lifr->lifr_zoneid);
17630Sstevel@tonic-gate 
17640Sstevel@tonic-gate 	if (strlen(answer) > 0)
17650Sstevel@tonic-gate 		return (answer);
17660Sstevel@tonic-gate 	return (NULL);
17670Sstevel@tonic-gate }
17680Sstevel@tonic-gate 
17690Sstevel@tonic-gate typedef struct mcast_rtmsg_s {
17700Sstevel@tonic-gate 	struct rt_msghdr	m_rtm;
17710Sstevel@tonic-gate 	union {
17720Sstevel@tonic-gate 		struct {
17730Sstevel@tonic-gate 			struct sockaddr_in	m_dst;
17740Sstevel@tonic-gate 			struct sockaddr_in	m_gw;
17750Sstevel@tonic-gate 			struct sockaddr_in	m_netmask;
17760Sstevel@tonic-gate 		} m_v4;
17770Sstevel@tonic-gate 		struct {
17780Sstevel@tonic-gate 			struct sockaddr_in6	m_dst;
17790Sstevel@tonic-gate 			struct sockaddr_in6	m_gw;
17800Sstevel@tonic-gate 			struct sockaddr_in6	m_netmask;
17810Sstevel@tonic-gate 		} m_v6;
17820Sstevel@tonic-gate 	} m_u;
17830Sstevel@tonic-gate } mcast_rtmsg_t;
17840Sstevel@tonic-gate #define	m_dst4		m_u.m_v4.m_dst
17850Sstevel@tonic-gate #define	m_dst6		m_u.m_v6.m_dst
17860Sstevel@tonic-gate #define	m_gw4		m_u.m_v4.m_gw
17870Sstevel@tonic-gate #define	m_gw6		m_u.m_v6.m_gw
17880Sstevel@tonic-gate #define	m_netmask4	m_u.m_v4.m_netmask
17890Sstevel@tonic-gate #define	m_netmask6	m_u.m_v6.m_netmask
17900Sstevel@tonic-gate 
17910Sstevel@tonic-gate /*
17920Sstevel@tonic-gate  * Configures a single interface: a new virtual interface is added, based on
17930Sstevel@tonic-gate  * the physical interface nwiftabptr->zone_nwif_physical, with the address
17940Sstevel@tonic-gate  * specified in nwiftabptr->zone_nwif_address, for zone zone_id.  Note that
17950Sstevel@tonic-gate  * the "address" can be an IPv6 address (with a /prefixlength required), an
17960Sstevel@tonic-gate  * IPv4 address (with a /prefixlength optional), or a name; for the latter,
17970Sstevel@tonic-gate  * an IPv4 name-to-address resolution will be attempted.
17980Sstevel@tonic-gate  *
17990Sstevel@tonic-gate  * A default interface route for multicast is created on the first IPv4 and
18000Sstevel@tonic-gate  * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively.
18010Sstevel@tonic-gate  * This should really be done in the init scripts if we ever allow zones to
18020Sstevel@tonic-gate  * modify the routing tables.
18030Sstevel@tonic-gate  *
18040Sstevel@tonic-gate  * If anything goes wrong, we log an detailed error message, attempt to tear
18050Sstevel@tonic-gate  * down whatever we set up and return an error.
18060Sstevel@tonic-gate  */
18070Sstevel@tonic-gate static int
18080Sstevel@tonic-gate configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
18090Sstevel@tonic-gate     struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp,
18100Sstevel@tonic-gate     boolean_t *mcast_rt_v6_setp)
18110Sstevel@tonic-gate {
18120Sstevel@tonic-gate 	struct lifreq lifr;
18130Sstevel@tonic-gate 	struct sockaddr_in netmask4;
18140Sstevel@tonic-gate 	struct sockaddr_in6 netmask6;
18150Sstevel@tonic-gate 	struct in_addr in4;
18160Sstevel@tonic-gate 	struct in6_addr in6;
18170Sstevel@tonic-gate 	sa_family_t af;
18180Sstevel@tonic-gate 	char *slashp = strchr(nwiftabptr->zone_nwif_address, '/');
18190Sstevel@tonic-gate 	mcast_rtmsg_t mcast_rtmsg;
18200Sstevel@tonic-gate 	int s;
18210Sstevel@tonic-gate 	int rs;
18220Sstevel@tonic-gate 	int rlen;
18230Sstevel@tonic-gate 	boolean_t got_netmask = B_FALSE;
18240Sstevel@tonic-gate 	char addrstr4[INET_ADDRSTRLEN];
18250Sstevel@tonic-gate 	int res;
18260Sstevel@tonic-gate 
18270Sstevel@tonic-gate 	res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr);
18280Sstevel@tonic-gate 	if (res != Z_OK) {
18290Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res),
18300Sstevel@tonic-gate 		    nwiftabptr->zone_nwif_address);
18310Sstevel@tonic-gate 		return (-1);
18320Sstevel@tonic-gate 	}
18330Sstevel@tonic-gate 	af = lifr.lifr_addr.ss_family;
18340Sstevel@tonic-gate 	if (af == AF_INET)
18350Sstevel@tonic-gate 		in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr;
18360Sstevel@tonic-gate 	else
18370Sstevel@tonic-gate 		in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr;
18380Sstevel@tonic-gate 
18390Sstevel@tonic-gate 	if ((s = socket(af, SOCK_DGRAM, 0)) < 0) {
18400Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get socket");
18410Sstevel@tonic-gate 		return (-1);
18420Sstevel@tonic-gate 	}
18430Sstevel@tonic-gate 
18440Sstevel@tonic-gate 	(void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical,
18450Sstevel@tonic-gate 	    sizeof (lifr.lifr_name));
18460Sstevel@tonic-gate 	if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
18470Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s: could not add interface",
18480Sstevel@tonic-gate 		    lifr.lifr_name);
18490Sstevel@tonic-gate 		(void) close(s);
18500Sstevel@tonic-gate 		return (-1);
18510Sstevel@tonic-gate 	}
18520Sstevel@tonic-gate 
18530Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
18540Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
18550Sstevel@tonic-gate 		    "%s: could not set IP address to %s",
18560Sstevel@tonic-gate 		    lifr.lifr_name, nwiftabptr->zone_nwif_address);
18570Sstevel@tonic-gate 		goto bad;
18580Sstevel@tonic-gate 	}
18590Sstevel@tonic-gate 
18600Sstevel@tonic-gate 	/* Preserve literal IPv4 address for later potential printing. */
18610Sstevel@tonic-gate 	if (af == AF_INET)
18620Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN);
18630Sstevel@tonic-gate 
18640Sstevel@tonic-gate 	lifr.lifr_zoneid = zone_id;
18650Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) {
18660Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s: could not place interface into zone",
18670Sstevel@tonic-gate 		    lifr.lifr_name);
18680Sstevel@tonic-gate 		goto bad;
18690Sstevel@tonic-gate 	}
18700Sstevel@tonic-gate 
18710Sstevel@tonic-gate 	if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) {
18720Sstevel@tonic-gate 		got_netmask = B_TRUE;	/* default setting will be correct */
18730Sstevel@tonic-gate 	} else {
18740Sstevel@tonic-gate 		if (af == AF_INET) {
18750Sstevel@tonic-gate 			/*
18760Sstevel@tonic-gate 			 * The IPv4 netmask can be determined either
18770Sstevel@tonic-gate 			 * directly if a prefix length was supplied with
18780Sstevel@tonic-gate 			 * the address or via the netmasks database.  Not
18790Sstevel@tonic-gate 			 * being able to determine it is a common failure,
18800Sstevel@tonic-gate 			 * but it often is not fatal to operation of the
18810Sstevel@tonic-gate 			 * interface.  In that case, a warning will be
18820Sstevel@tonic-gate 			 * printed after the rest of the interface's
18830Sstevel@tonic-gate 			 * parameters have been configured.
18840Sstevel@tonic-gate 			 */
18850Sstevel@tonic-gate 			(void) memset(&netmask4, 0, sizeof (netmask4));
18860Sstevel@tonic-gate 			if (slashp != NULL) {
18870Sstevel@tonic-gate 				if (addr2netmask(slashp + 1, V4_ADDR_LEN,
18880Sstevel@tonic-gate 				    (uchar_t *)&netmask4.sin_addr) != 0) {
18890Sstevel@tonic-gate 					*slashp = '/';
18900Sstevel@tonic-gate 					zerror(zlogp, B_FALSE,
18910Sstevel@tonic-gate 					    "%s: invalid prefix length in %s",
18920Sstevel@tonic-gate 					    lifr.lifr_name,
18930Sstevel@tonic-gate 					    nwiftabptr->zone_nwif_address);
18940Sstevel@tonic-gate 					goto bad;
18950Sstevel@tonic-gate 				}
18960Sstevel@tonic-gate 				got_netmask = B_TRUE;
18970Sstevel@tonic-gate 			} else if (getnetmaskbyaddr(in4,
18980Sstevel@tonic-gate 			    &netmask4.sin_addr) == 0) {
18990Sstevel@tonic-gate 				got_netmask = B_TRUE;
19000Sstevel@tonic-gate 			}
19010Sstevel@tonic-gate 			if (got_netmask) {
19020Sstevel@tonic-gate 				netmask4.sin_family = af;
19030Sstevel@tonic-gate 				(void) memcpy(&lifr.lifr_addr, &netmask4,
19040Sstevel@tonic-gate 				    sizeof (netmask4));
19050Sstevel@tonic-gate 			}
19060Sstevel@tonic-gate 		} else {
19070Sstevel@tonic-gate 			(void) memset(&netmask6, 0, sizeof (netmask6));
19080Sstevel@tonic-gate 			if (addr2netmask(slashp + 1, V6_ADDR_LEN,
19090Sstevel@tonic-gate 			    (uchar_t *)&netmask6.sin6_addr) != 0) {
19100Sstevel@tonic-gate 				*slashp = '/';
19110Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
19120Sstevel@tonic-gate 				    "%s: invalid prefix length in %s",
19130Sstevel@tonic-gate 				    lifr.lifr_name,
19140Sstevel@tonic-gate 				    nwiftabptr->zone_nwif_address);
19150Sstevel@tonic-gate 				goto bad;
19160Sstevel@tonic-gate 			}
19170Sstevel@tonic-gate 			got_netmask = B_TRUE;
19180Sstevel@tonic-gate 			netmask6.sin6_family = af;
19190Sstevel@tonic-gate 			(void) memcpy(&lifr.lifr_addr, &netmask6,
19200Sstevel@tonic-gate 			    sizeof (netmask6));
19210Sstevel@tonic-gate 		}
19220Sstevel@tonic-gate 		if (got_netmask &&
19230Sstevel@tonic-gate 		    ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) {
19240Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not set netmask",
19250Sstevel@tonic-gate 			    lifr.lifr_name);
19260Sstevel@tonic-gate 			goto bad;
19270Sstevel@tonic-gate 		}
19280Sstevel@tonic-gate 
19290Sstevel@tonic-gate 		/*
19300Sstevel@tonic-gate 		 * This doesn't set the broadcast address at all. Rather, it
19310Sstevel@tonic-gate 		 * gets, then sets the interface's address, relying on the fact
19320Sstevel@tonic-gate 		 * that resetting the address will reset the broadcast address.
19330Sstevel@tonic-gate 		 */
19340Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
19350Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get address",
19360Sstevel@tonic-gate 			    lifr.lifr_name);
19370Sstevel@tonic-gate 			goto bad;
19380Sstevel@tonic-gate 		}
19390Sstevel@tonic-gate 		if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
19400Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19410Sstevel@tonic-gate 			    "%s: could not reset broadcast address",
19420Sstevel@tonic-gate 			    lifr.lifr_name);
19430Sstevel@tonic-gate 			goto bad;
19440Sstevel@tonic-gate 		}
19450Sstevel@tonic-gate 	}
19460Sstevel@tonic-gate 
19470Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
19480Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s: could not get flags",
19490Sstevel@tonic-gate 		    lifr.lifr_name);
19500Sstevel@tonic-gate 		goto bad;
19510Sstevel@tonic-gate 	}
19520Sstevel@tonic-gate 	lifr.lifr_flags |= IFF_UP;
19530Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
19540Sstevel@tonic-gate 		int save_errno = errno;
19550Sstevel@tonic-gate 		char *zone_using;
19560Sstevel@tonic-gate 
19570Sstevel@tonic-gate 		/*
19580Sstevel@tonic-gate 		 * If we failed with something other than EADDRNOTAVAIL,
19590Sstevel@tonic-gate 		 * then skip to the end.  Otherwise, look up our address,
19600Sstevel@tonic-gate 		 * then call a function to determine which zone is already
19610Sstevel@tonic-gate 		 * using that address.
19620Sstevel@tonic-gate 		 */
19630Sstevel@tonic-gate 		if (errno != EADDRNOTAVAIL) {
19640Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19650Sstevel@tonic-gate 			    "%s: could not bring interface up", lifr.lifr_name);
19660Sstevel@tonic-gate 			goto bad;
19670Sstevel@tonic-gate 		}
19680Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
19690Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get address",
19700Sstevel@tonic-gate 			    lifr.lifr_name);
19710Sstevel@tonic-gate 			goto bad;
19720Sstevel@tonic-gate 		}
19730Sstevel@tonic-gate 		zone_using = who_is_using(zlogp, &lifr);
19740Sstevel@tonic-gate 		errno = save_errno;
19750Sstevel@tonic-gate 		if (zone_using == NULL)
19760Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19770Sstevel@tonic-gate 			    "%s: could not bring interface up", lifr.lifr_name);
19780Sstevel@tonic-gate 		else
19790Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not bring interface "
19800Sstevel@tonic-gate 			    "up: address in use by zone '%s'", lifr.lifr_name,
19810Sstevel@tonic-gate 			    zone_using);
19820Sstevel@tonic-gate 		goto bad;
19830Sstevel@tonic-gate 	}
19840Sstevel@tonic-gate 	if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET &&
19850Sstevel@tonic-gate 	    mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) ||
19860Sstevel@tonic-gate 	    (af == AF_INET6 &&
19870Sstevel@tonic-gate 	    mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) {
19880Sstevel@tonic-gate 		rs = socket(PF_ROUTE, SOCK_RAW, 0);
19890Sstevel@tonic-gate 		if (rs < 0) {
19900Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not create "
19910Sstevel@tonic-gate 			    "routing socket", lifr.lifr_name);
19920Sstevel@tonic-gate 			goto bad;
19930Sstevel@tonic-gate 		}
19940Sstevel@tonic-gate 		(void) shutdown(rs, 0);
19950Sstevel@tonic-gate 		(void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t));
19960Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_msglen =  sizeof (struct rt_msghdr) +
19970Sstevel@tonic-gate 		    3 * (af == AF_INET ? sizeof (struct sockaddr_in) :
19980Sstevel@tonic-gate 		    sizeof (struct sockaddr_in6));
19990Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION;
20000Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_type = RTM_ADD;
20010Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_flags = RTF_UP;
20020Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_addrs =
20030Sstevel@tonic-gate 		    RTA_DST | RTA_GATEWAY | RTA_NETMASK;
20040Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno;
20050Sstevel@tonic-gate 		if (af == AF_INET) {
20060Sstevel@tonic-gate 			mcast_rtmsg.m_dst4.sin_family = AF_INET;
20070Sstevel@tonic-gate 			mcast_rtmsg.m_dst4.sin_addr.s_addr =
20080Sstevel@tonic-gate 			    htonl(INADDR_UNSPEC_GROUP);
20090Sstevel@tonic-gate 			mcast_rtmsg.m_gw4.sin_family = AF_INET;
20100Sstevel@tonic-gate 			mcast_rtmsg.m_gw4.sin_addr = in4;
20110Sstevel@tonic-gate 			mcast_rtmsg.m_netmask4.sin_family = AF_INET;
20120Sstevel@tonic-gate 			mcast_rtmsg.m_netmask4.sin_addr.s_addr =
20130Sstevel@tonic-gate 			    htonl(IN_CLASSD_NET);
20140Sstevel@tonic-gate 		} else {
20150Sstevel@tonic-gate 			mcast_rtmsg.m_dst6.sin6_family = AF_INET6;
20160Sstevel@tonic-gate 			mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU;
20170Sstevel@tonic-gate 			mcast_rtmsg.m_gw6.sin6_family = AF_INET6;
20180Sstevel@tonic-gate 			mcast_rtmsg.m_gw6.sin6_addr = in6;
20190Sstevel@tonic-gate 			mcast_rtmsg.m_netmask6.sin6_family = AF_INET6;
20200Sstevel@tonic-gate 			mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU;
20210Sstevel@tonic-gate 		}
20220Sstevel@tonic-gate 		rlen = write(rs, (char *)&mcast_rtmsg,
20230Sstevel@tonic-gate 		    mcast_rtmsg.m_rtm.rtm_msglen);
20240Sstevel@tonic-gate 		if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) {
20250Sstevel@tonic-gate 			if (rlen < 0) {
20260Sstevel@tonic-gate 				zerror(zlogp, B_TRUE, "%s: could not set "
20270Sstevel@tonic-gate 				    "default interface for multicast",
20280Sstevel@tonic-gate 				    lifr.lifr_name);
20290Sstevel@tonic-gate 			} else {
20300Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s: write to routing "
20310Sstevel@tonic-gate 				    "socket returned %d", lifr.lifr_name, rlen);
20320Sstevel@tonic-gate 			}
20330Sstevel@tonic-gate 			(void) close(rs);
20340Sstevel@tonic-gate 			goto bad;
20350Sstevel@tonic-gate 		}
20360Sstevel@tonic-gate 		if (af == AF_INET) {
20370Sstevel@tonic-gate 			*mcast_rt_v4_setp = B_TRUE;
20380Sstevel@tonic-gate 		} else {
20390Sstevel@tonic-gate 			*mcast_rt_v6_setp = B_TRUE;
20400Sstevel@tonic-gate 		}
20410Sstevel@tonic-gate 		(void) close(rs);
20420Sstevel@tonic-gate 	}
20430Sstevel@tonic-gate 
20440Sstevel@tonic-gate 	if (!got_netmask) {
20450Sstevel@tonic-gate 		/*
20460Sstevel@tonic-gate 		 * A common, but often non-fatal problem, is that the system
20470Sstevel@tonic-gate 		 * cannot find the netmask for an interface address. This is
20480Sstevel@tonic-gate 		 * often caused by it being only in /etc/inet/netmasks, but
20490Sstevel@tonic-gate 		 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not
20500Sstevel@tonic-gate 		 * in that. This doesn't show up at boot because the netmask
20510Sstevel@tonic-gate 		 * is obtained from /etc/inet/netmasks when no network
20520Sstevel@tonic-gate 		 * interfaces are up, but isn't consulted when NIS/NIS+ is
20530Sstevel@tonic-gate 		 * available. We warn the user here that something like this
20540Sstevel@tonic-gate 		 * has happened and we're just running with a default and
20550Sstevel@tonic-gate 		 * possible incorrect netmask.
20560Sstevel@tonic-gate 		 */
20570Sstevel@tonic-gate 		char buffer[INET6_ADDRSTRLEN];
20580Sstevel@tonic-gate 		void  *addr;
20590Sstevel@tonic-gate 
20600Sstevel@tonic-gate 		if (af == AF_INET)
20610Sstevel@tonic-gate 			addr = &((struct sockaddr_in *)
20620Sstevel@tonic-gate 			    (&lifr.lifr_addr))->sin_addr;
20630Sstevel@tonic-gate 		else
20640Sstevel@tonic-gate 			addr = &((struct sockaddr_in6 *)
20650Sstevel@tonic-gate 			    (&lifr.lifr_addr))->sin6_addr;
20660Sstevel@tonic-gate 
20670Sstevel@tonic-gate 		/* Find out what netmask interface is going to be using */
20680Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 ||
20690Sstevel@tonic-gate 		    inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL)
20700Sstevel@tonic-gate 			goto bad;
20710Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
20720Sstevel@tonic-gate 		    "WARNING: %s: no matching subnet found in netmasks(4) for "
20730Sstevel@tonic-gate 		    "%s; using default of %s.",
20740Sstevel@tonic-gate 		    lifr.lifr_name, addrstr4, buffer);
20750Sstevel@tonic-gate 	}
20760Sstevel@tonic-gate 
20770Sstevel@tonic-gate 	(void) close(s);
20780Sstevel@tonic-gate 	return (Z_OK);
20790Sstevel@tonic-gate bad:
20800Sstevel@tonic-gate 	(void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr);
20810Sstevel@tonic-gate 	(void) close(s);
20820Sstevel@tonic-gate 	return (-1);
20830Sstevel@tonic-gate }
20840Sstevel@tonic-gate 
20850Sstevel@tonic-gate /*
20860Sstevel@tonic-gate  * Sets up network interfaces based on information from the zone configuration.
20870Sstevel@tonic-gate  * An IPv4 loopback interface is set up "for free", modeling the global system.
20880Sstevel@tonic-gate  * If any of the configuration interfaces were IPv6, then an IPv6 loopback
20890Sstevel@tonic-gate  * address is set up as well.
20900Sstevel@tonic-gate  *
20910Sstevel@tonic-gate  * If anything goes wrong, we log a general error message, attempt to tear down
20920Sstevel@tonic-gate  * whatever we set up, and return an error.
20930Sstevel@tonic-gate  */
20940Sstevel@tonic-gate static int
20950Sstevel@tonic-gate configure_network_interfaces(zlog_t *zlogp)
20960Sstevel@tonic-gate {
20970Sstevel@tonic-gate 	zone_dochandle_t handle;
20980Sstevel@tonic-gate 	struct zone_nwiftab nwiftab, loopback_iftab;
20990Sstevel@tonic-gate 	boolean_t saw_v6 = B_FALSE;
21000Sstevel@tonic-gate 	boolean_t mcast_rt_v4_set = B_FALSE;
21010Sstevel@tonic-gate 	boolean_t mcast_rt_v6_set = B_FALSE;
21020Sstevel@tonic-gate 	zoneid_t zoneid;
21030Sstevel@tonic-gate 
21040Sstevel@tonic-gate 	if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
21050Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to get zoneid");
21060Sstevel@tonic-gate 		return (-1);
21070Sstevel@tonic-gate 	}
21080Sstevel@tonic-gate 
21090Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
21100Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
21110Sstevel@tonic-gate 		return (-1);
21120Sstevel@tonic-gate 	}
21130Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
21140Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
21150Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
21160Sstevel@tonic-gate 		return (-1);
21170Sstevel@tonic-gate 	}
21180Sstevel@tonic-gate 	if (zonecfg_setnwifent(handle) == Z_OK) {
21190Sstevel@tonic-gate 		for (;;) {
21200Sstevel@tonic-gate 			struct in6_addr in6;
21210Sstevel@tonic-gate 
21220Sstevel@tonic-gate 			if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
21230Sstevel@tonic-gate 				break;
21240Sstevel@tonic-gate 			if (configure_one_interface(zlogp, zoneid,
21250Sstevel@tonic-gate 			    &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) !=
21260Sstevel@tonic-gate 			    Z_OK) {
21270Sstevel@tonic-gate 				(void) zonecfg_endnwifent(handle);
21280Sstevel@tonic-gate 				zonecfg_fini_handle(handle);
21290Sstevel@tonic-gate 				return (-1);
21300Sstevel@tonic-gate 			}
21310Sstevel@tonic-gate 			if (inet_pton(AF_INET6, nwiftab.zone_nwif_address,
21320Sstevel@tonic-gate 			    &in6) == 1)
21330Sstevel@tonic-gate 				saw_v6 = B_TRUE;
21340Sstevel@tonic-gate 		}
21350Sstevel@tonic-gate 		(void) zonecfg_endnwifent(handle);
21360Sstevel@tonic-gate 	}
21370Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
21380Sstevel@tonic-gate 	(void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0",
21390Sstevel@tonic-gate 	    sizeof (loopback_iftab.zone_nwif_physical));
21400Sstevel@tonic-gate 	(void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1",
21410Sstevel@tonic-gate 	    sizeof (loopback_iftab.zone_nwif_address));
21420Sstevel@tonic-gate 	if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL)
21430Sstevel@tonic-gate 	    != Z_OK) {
21440Sstevel@tonic-gate 		return (-1);
21450Sstevel@tonic-gate 	}
21460Sstevel@tonic-gate 	if (saw_v6) {
21470Sstevel@tonic-gate 		(void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128",
21480Sstevel@tonic-gate 		    sizeof (loopback_iftab.zone_nwif_address));
21490Sstevel@tonic-gate 		if (configure_one_interface(zlogp, zoneid,
21500Sstevel@tonic-gate 		    &loopback_iftab, NULL, NULL) != Z_OK) {
21510Sstevel@tonic-gate 			return (-1);
21520Sstevel@tonic-gate 		}
21530Sstevel@tonic-gate 	}
21540Sstevel@tonic-gate 	return (0);
21550Sstevel@tonic-gate }
21560Sstevel@tonic-gate 
21570Sstevel@tonic-gate static int
21580Sstevel@tonic-gate tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
21590Sstevel@tonic-gate     const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
21600Sstevel@tonic-gate {
21610Sstevel@tonic-gate 	int fd;
21620Sstevel@tonic-gate 	struct strioctl ioc;
21630Sstevel@tonic-gate 	tcp_ioc_abort_conn_t conn;
21640Sstevel@tonic-gate 	int error;
21650Sstevel@tonic-gate 
21660Sstevel@tonic-gate 	conn.ac_local = *local;
21670Sstevel@tonic-gate 	conn.ac_remote = *remote;
21680Sstevel@tonic-gate 	conn.ac_start = TCPS_SYN_SENT;
21690Sstevel@tonic-gate 	conn.ac_end = TCPS_TIME_WAIT;
21700Sstevel@tonic-gate 	conn.ac_zoneid = zoneid;
21710Sstevel@tonic-gate 
21720Sstevel@tonic-gate 	ioc.ic_cmd = TCP_IOC_ABORT_CONN;
21730Sstevel@tonic-gate 	ioc.ic_timout = -1; /* infinite timeout */
21740Sstevel@tonic-gate 	ioc.ic_len = sizeof (conn);
21750Sstevel@tonic-gate 	ioc.ic_dp = (char *)&conn;
21760Sstevel@tonic-gate 
21770Sstevel@tonic-gate 	if ((fd = open("/dev/tcp", O_RDONLY)) < 0) {
21780Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp");
21790Sstevel@tonic-gate 		return (-1);
21800Sstevel@tonic-gate 	}
21810Sstevel@tonic-gate 
21820Sstevel@tonic-gate 	error = ioctl(fd, I_STR, &ioc);
21830Sstevel@tonic-gate 	(void) close(fd);
21840Sstevel@tonic-gate 	if (error == 0 || errno == ENOENT)	/* ENOENT is not an error */
21850Sstevel@tonic-gate 		return (0);
21860Sstevel@tonic-gate 	return (-1);
21870Sstevel@tonic-gate }
21880Sstevel@tonic-gate 
21890Sstevel@tonic-gate static int
21900Sstevel@tonic-gate tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid)
21910Sstevel@tonic-gate {
21920Sstevel@tonic-gate 	struct sockaddr_storage l, r;
21930Sstevel@tonic-gate 	struct sockaddr_in *local, *remote;
21940Sstevel@tonic-gate 	struct sockaddr_in6 *local6, *remote6;
21950Sstevel@tonic-gate 	int error;
21960Sstevel@tonic-gate 
21970Sstevel@tonic-gate 	/*
21980Sstevel@tonic-gate 	 * Abort IPv4 connections.
21990Sstevel@tonic-gate 	 */
22000Sstevel@tonic-gate 	bzero(&l, sizeof (*local));
22010Sstevel@tonic-gate 	local = (struct sockaddr_in *)&l;
22020Sstevel@tonic-gate 	local->sin_family = AF_INET;
22030Sstevel@tonic-gate 	local->sin_addr.s_addr = INADDR_ANY;
22040Sstevel@tonic-gate 	local->sin_port = 0;
22050Sstevel@tonic-gate 
22060Sstevel@tonic-gate 	bzero(&r, sizeof (*remote));
22070Sstevel@tonic-gate 	remote = (struct sockaddr_in *)&r;
22080Sstevel@tonic-gate 	remote->sin_family = AF_INET;
22090Sstevel@tonic-gate 	remote->sin_addr.s_addr = INADDR_ANY;
22100Sstevel@tonic-gate 	remote->sin_port = 0;
22110Sstevel@tonic-gate 
22120Sstevel@tonic-gate 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
22130Sstevel@tonic-gate 		return (error);
22140Sstevel@tonic-gate 
22150Sstevel@tonic-gate 	/*
22160Sstevel@tonic-gate 	 * Abort IPv6 connections.
22170Sstevel@tonic-gate 	 */
22180Sstevel@tonic-gate 	bzero(&l, sizeof (*local6));
22190Sstevel@tonic-gate 	local6 = (struct sockaddr_in6 *)&l;
22200Sstevel@tonic-gate 	local6->sin6_family = AF_INET6;
22210Sstevel@tonic-gate 	local6->sin6_port = 0;
22220Sstevel@tonic-gate 	local6->sin6_addr = in6addr_any;
22230Sstevel@tonic-gate 
22240Sstevel@tonic-gate 	bzero(&r, sizeof (*remote6));
22250Sstevel@tonic-gate 	remote6 = (struct sockaddr_in6 *)&r;
22260Sstevel@tonic-gate 	remote6->sin6_family = AF_INET6;
22270Sstevel@tonic-gate 	remote6->sin6_port = 0;
22280Sstevel@tonic-gate 	remote6->sin6_addr = in6addr_any;
22290Sstevel@tonic-gate 
22300Sstevel@tonic-gate 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
22310Sstevel@tonic-gate 		return (error);
22320Sstevel@tonic-gate 	return (0);
22330Sstevel@tonic-gate }
22340Sstevel@tonic-gate 
22350Sstevel@tonic-gate static int
22360Sstevel@tonic-gate devfsadm_call(zlog_t *zlogp, const char *arg)
22370Sstevel@tonic-gate {
22380Sstevel@tonic-gate 	char *argv[4];
22390Sstevel@tonic-gate 	int status;
22400Sstevel@tonic-gate 
22410Sstevel@tonic-gate 	argv[0] = DEVFSADM;
22420Sstevel@tonic-gate 	argv[1] = (char *)arg;
22430Sstevel@tonic-gate 	argv[2] = zone_name;
22440Sstevel@tonic-gate 	argv[3] = NULL;
22450Sstevel@tonic-gate 	status = forkexec(zlogp, DEVFSADM_PATH, argv);
22460Sstevel@tonic-gate 	if (status == 0 || status == -1)
22470Sstevel@tonic-gate 		return (status);
22480Sstevel@tonic-gate 	zerror(zlogp, B_FALSE, "%s call (%s %s %s) unexpectedly returned %d",
2249*766Scarlsonj 		    DEVFSADM, DEVFSADM_PATH, arg, zone_name, status);
22500Sstevel@tonic-gate 	return (-1);
22510Sstevel@tonic-gate }
22520Sstevel@tonic-gate 
22530Sstevel@tonic-gate static int
22540Sstevel@tonic-gate devfsadm_register(zlog_t *zlogp)
22550Sstevel@tonic-gate {
22560Sstevel@tonic-gate 	/*
22570Sstevel@tonic-gate 	 * Ready the zone's devices.
22580Sstevel@tonic-gate 	 */
22590Sstevel@tonic-gate 	return (devfsadm_call(zlogp, "-z"));
22600Sstevel@tonic-gate }
22610Sstevel@tonic-gate 
22620Sstevel@tonic-gate static int
22630Sstevel@tonic-gate devfsadm_unregister(zlog_t *zlogp)
22640Sstevel@tonic-gate {
22650Sstevel@tonic-gate 	return (devfsadm_call(zlogp, "-Z"));
22660Sstevel@tonic-gate }
22670Sstevel@tonic-gate 
22680Sstevel@tonic-gate static int
22690Sstevel@tonic-gate get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
22700Sstevel@tonic-gate {
22710Sstevel@tonic-gate 	nvlist_t *nvl = NULL;
22720Sstevel@tonic-gate 	char *nvl_packed = NULL;
22730Sstevel@tonic-gate 	size_t nvl_size = 0;
22740Sstevel@tonic-gate 	nvlist_t **nvlv = NULL;
22750Sstevel@tonic-gate 	int rctlcount = 0;
22760Sstevel@tonic-gate 	int error = -1;
22770Sstevel@tonic-gate 	zone_dochandle_t handle;
22780Sstevel@tonic-gate 	struct zone_rctltab rctltab;
22790Sstevel@tonic-gate 	rctlblk_t *rctlblk = NULL;
22800Sstevel@tonic-gate 
22810Sstevel@tonic-gate 	*bufp = NULL;
22820Sstevel@tonic-gate 	*bufsizep = 0;
22830Sstevel@tonic-gate 
22840Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
22850Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
22860Sstevel@tonic-gate 		return (-1);
22870Sstevel@tonic-gate 	}
22880Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
22890Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
22900Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
22910Sstevel@tonic-gate 		return (-1);
22920Sstevel@tonic-gate 	}
22930Sstevel@tonic-gate 
22940Sstevel@tonic-gate 	rctltab.zone_rctl_valptr = NULL;
22950Sstevel@tonic-gate 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
22960Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
22970Sstevel@tonic-gate 		goto out;
22980Sstevel@tonic-gate 	}
22990Sstevel@tonic-gate 
23000Sstevel@tonic-gate 	if (zonecfg_setrctlent(handle) != Z_OK) {
23010Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
23020Sstevel@tonic-gate 		goto out;
23030Sstevel@tonic-gate 	}
23040Sstevel@tonic-gate 
23050Sstevel@tonic-gate 	if ((rctlblk = malloc(rctlblk_size())) == NULL) {
23060Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
23070Sstevel@tonic-gate 		goto out;
23080Sstevel@tonic-gate 	}
23090Sstevel@tonic-gate 	while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
23100Sstevel@tonic-gate 		struct zone_rctlvaltab *rctlval;
23110Sstevel@tonic-gate 		uint_t i, count;
23120Sstevel@tonic-gate 		const char *name = rctltab.zone_rctl_name;
23130Sstevel@tonic-gate 
23140Sstevel@tonic-gate 		/* zoneadm should have already warned about unknown rctls. */
23150Sstevel@tonic-gate 		if (!zonecfg_is_rctl(name)) {
23160Sstevel@tonic-gate 			zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
23170Sstevel@tonic-gate 			rctltab.zone_rctl_valptr = NULL;
23180Sstevel@tonic-gate 			continue;
23190Sstevel@tonic-gate 		}
23200Sstevel@tonic-gate 		count = 0;
23210Sstevel@tonic-gate 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
23220Sstevel@tonic-gate 		    rctlval = rctlval->zone_rctlval_next) {
23230Sstevel@tonic-gate 			count++;
23240Sstevel@tonic-gate 		}
23250Sstevel@tonic-gate 		if (count == 0) {	/* ignore */
23260Sstevel@tonic-gate 			continue;	/* Nothing to free */
23270Sstevel@tonic-gate 		}
23280Sstevel@tonic-gate 		if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
23290Sstevel@tonic-gate 			goto out;
23300Sstevel@tonic-gate 		i = 0;
23310Sstevel@tonic-gate 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
23320Sstevel@tonic-gate 		    rctlval = rctlval->zone_rctlval_next, i++) {
23330Sstevel@tonic-gate 			if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) {
23340Sstevel@tonic-gate 				zerror(zlogp, B_TRUE, "%s failed",
23350Sstevel@tonic-gate 				    "nvlist_alloc");
23360Sstevel@tonic-gate 				goto out;
23370Sstevel@tonic-gate 			}
23380Sstevel@tonic-gate 			if (zonecfg_construct_rctlblk(rctlval, rctlblk)
23390Sstevel@tonic-gate 			    != Z_OK) {
23400Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "invalid rctl value: "
23410Sstevel@tonic-gate 				    "(priv=%s,limit=%s,action=%s)",
23420Sstevel@tonic-gate 				    rctlval->zone_rctlval_priv,
23430Sstevel@tonic-gate 				    rctlval->zone_rctlval_limit,
23440Sstevel@tonic-gate 				    rctlval->zone_rctlval_action);
23450Sstevel@tonic-gate 				goto out;
23460Sstevel@tonic-gate 			}
23470Sstevel@tonic-gate 			if (!zonecfg_valid_rctl(name, rctlblk)) {
23480Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
23490Sstevel@tonic-gate 				    "(priv=%s,limit=%s,action=%s) is not a "
23500Sstevel@tonic-gate 				    "valid value for rctl '%s'",
23510Sstevel@tonic-gate 				    rctlval->zone_rctlval_priv,
23520Sstevel@tonic-gate 				    rctlval->zone_rctlval_limit,
23530Sstevel@tonic-gate 				    rctlval->zone_rctlval_action,
23540Sstevel@tonic-gate 				    name);
23550Sstevel@tonic-gate 				goto out;
23560Sstevel@tonic-gate 			}
23570Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "privilege",
23580Sstevel@tonic-gate 				    rctlblk_get_privilege(rctlblk)) != 0) {
23590Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
23600Sstevel@tonic-gate 				    "nvlist_add_uint64");
23610Sstevel@tonic-gate 				goto out;
23620Sstevel@tonic-gate 			}
23630Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "limit",
23640Sstevel@tonic-gate 				    rctlblk_get_value(rctlblk)) != 0) {
23650Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
23660Sstevel@tonic-gate 				    "nvlist_add_uint64");
23670Sstevel@tonic-gate 				goto out;
23680Sstevel@tonic-gate 			}
23690Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "action",
23700Sstevel@tonic-gate 			    (uint_t)rctlblk_get_local_action(rctlblk, NULL))
23710Sstevel@tonic-gate 			    != 0) {
23720Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
23730Sstevel@tonic-gate 				    "nvlist_add_uint64");
23740Sstevel@tonic-gate 				goto out;
23750Sstevel@tonic-gate 			}
23760Sstevel@tonic-gate 		}
23770Sstevel@tonic-gate 		zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
23780Sstevel@tonic-gate 		rctltab.zone_rctl_valptr = NULL;
23790Sstevel@tonic-gate 		if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
23800Sstevel@tonic-gate 		    != 0) {
23810Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s failed",
23820Sstevel@tonic-gate 			    "nvlist_add_nvlist_array");
23830Sstevel@tonic-gate 			goto out;
23840Sstevel@tonic-gate 		}
23850Sstevel@tonic-gate 		for (i = 0; i < count; i++)
23860Sstevel@tonic-gate 			nvlist_free(nvlv[i]);
23870Sstevel@tonic-gate 		free(nvlv);
23880Sstevel@tonic-gate 		nvlv = NULL;
23890Sstevel@tonic-gate 		rctlcount++;
23900Sstevel@tonic-gate 	}
23910Sstevel@tonic-gate 	(void) zonecfg_endrctlent(handle);
23920Sstevel@tonic-gate 
23930Sstevel@tonic-gate 	if (rctlcount == 0) {
23940Sstevel@tonic-gate 		error = 0;
23950Sstevel@tonic-gate 		goto out;
23960Sstevel@tonic-gate 	}
23970Sstevel@tonic-gate 	if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
23980Sstevel@tonic-gate 	    != 0) {
23990Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack");
24000Sstevel@tonic-gate 		goto out;
24010Sstevel@tonic-gate 	}
24020Sstevel@tonic-gate 
24030Sstevel@tonic-gate 	error = 0;
24040Sstevel@tonic-gate 	*bufp = nvl_packed;
24050Sstevel@tonic-gate 	*bufsizep = nvl_size;
24060Sstevel@tonic-gate 
24070Sstevel@tonic-gate out:
24080Sstevel@tonic-gate 	free(rctlblk);
24090Sstevel@tonic-gate 	zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
24100Sstevel@tonic-gate 	if (error && nvl_packed != NULL)
24110Sstevel@tonic-gate 		free(nvl_packed);
24120Sstevel@tonic-gate 	if (nvl != NULL)
24130Sstevel@tonic-gate 		nvlist_free(nvl);
24140Sstevel@tonic-gate 	if (nvlv != NULL)
24150Sstevel@tonic-gate 		free(nvlv);
24160Sstevel@tonic-gate 	if (handle != NULL)
24170Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
24180Sstevel@tonic-gate 	return (error);
24190Sstevel@tonic-gate }
24200Sstevel@tonic-gate 
24210Sstevel@tonic-gate static int
24220Sstevel@tonic-gate get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz)
24230Sstevel@tonic-gate {
24240Sstevel@tonic-gate 	zone_dochandle_t handle;
24250Sstevel@tonic-gate 	int error;
24260Sstevel@tonic-gate 
24270Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
24280Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
24290Sstevel@tonic-gate 		return (-1);
24300Sstevel@tonic-gate 	}
24310Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
24320Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
24330Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
24340Sstevel@tonic-gate 		return (-1);
24350Sstevel@tonic-gate 	}
24360Sstevel@tonic-gate 	error = zonecfg_get_pool(handle, poolbuf, bufsz);
24370Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
24380Sstevel@tonic-gate 	return (error);
24390Sstevel@tonic-gate }
24400Sstevel@tonic-gate 
24410Sstevel@tonic-gate static int
24420Sstevel@tonic-gate bind_to_pool(zlog_t *zlogp, zoneid_t zoneid)
24430Sstevel@tonic-gate {
24440Sstevel@tonic-gate 	pool_conf_t *poolconf;
24450Sstevel@tonic-gate 	pool_t *pool;
24460Sstevel@tonic-gate 	char poolname[MAXPATHLEN];
24470Sstevel@tonic-gate 	int status;
24480Sstevel@tonic-gate 	int error;
24490Sstevel@tonic-gate 
24500Sstevel@tonic-gate 	/*
24510Sstevel@tonic-gate 	 * Find the pool mentioned in the zone configuration, and bind to it.
24520Sstevel@tonic-gate 	 */
24530Sstevel@tonic-gate 	error = get_zone_pool(zlogp, poolname, sizeof (poolname));
24540Sstevel@tonic-gate 	if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) {
24550Sstevel@tonic-gate 		/*
24560Sstevel@tonic-gate 		 * The property is not set on the zone, so the pool
24570Sstevel@tonic-gate 		 * should be bound to the default pool.  But that's
24580Sstevel@tonic-gate 		 * already done by the kernel, so we can just return.
24590Sstevel@tonic-gate 		 */
24600Sstevel@tonic-gate 		return (0);
24610Sstevel@tonic-gate 	}
24620Sstevel@tonic-gate 	if (error != Z_OK) {
24630Sstevel@tonic-gate 		/*
24640Sstevel@tonic-gate 		 * Not an error, even though it shouldn't be happening.
24650Sstevel@tonic-gate 		 */
24660Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
24670Sstevel@tonic-gate 		    "WARNING: unable to retrieve default pool.");
24680Sstevel@tonic-gate 		return (0);
24690Sstevel@tonic-gate 	}
24700Sstevel@tonic-gate 	/*
24710Sstevel@tonic-gate 	 * Don't do anything if pools aren't enabled.
24720Sstevel@tonic-gate 	 */
24730Sstevel@tonic-gate 	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) {
24740Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: pools facility not active; "
24750Sstevel@tonic-gate 		    "zone will not be bound to pool '%s'.", poolname);
24760Sstevel@tonic-gate 		return (0);
24770Sstevel@tonic-gate 	}
24780Sstevel@tonic-gate 	/*
24790Sstevel@tonic-gate 	 * Try to provide a sane error message if the requested pool doesn't
24800Sstevel@tonic-gate 	 * exist.
24810Sstevel@tonic-gate 	 */
24820Sstevel@tonic-gate 	if ((poolconf = pool_conf_alloc()) == NULL) {
24830Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc");
24840Sstevel@tonic-gate 		return (-1);
24850Sstevel@tonic-gate 	}
24860Sstevel@tonic-gate 	if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) !=
24870Sstevel@tonic-gate 	    PO_SUCCESS) {
24880Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open");
24890Sstevel@tonic-gate 		pool_conf_free(poolconf);
24900Sstevel@tonic-gate 		return (-1);
24910Sstevel@tonic-gate 	}
24920Sstevel@tonic-gate 	pool = pool_get_pool(poolconf, poolname);
24930Sstevel@tonic-gate 	(void) pool_conf_close(poolconf);
24940Sstevel@tonic-gate 	pool_conf_free(poolconf);
24950Sstevel@tonic-gate 	if (pool == NULL) {
24960Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; "
24970Sstevel@tonic-gate 		    "using default pool.", poolname);
24980Sstevel@tonic-gate 		return (0);
24990Sstevel@tonic-gate 	}
25000Sstevel@tonic-gate 	/*
25010Sstevel@tonic-gate 	 * Bind the zone to the pool.
25020Sstevel@tonic-gate 	 */
25030Sstevel@tonic-gate 	if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) {
25040Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; "
25050Sstevel@tonic-gate 		    "using default pool.", poolname);
25060Sstevel@tonic-gate 	}
25070Sstevel@tonic-gate 	return (0);
25080Sstevel@tonic-gate }
25090Sstevel@tonic-gate 
25100Sstevel@tonic-gate int
25110Sstevel@tonic-gate prtmount(const char *fs, void *x) {
25120Sstevel@tonic-gate 	zerror((zlog_t *)x, B_FALSE, "  %s", fs);
25130Sstevel@tonic-gate 	return (0);
25140Sstevel@tonic-gate }
25150Sstevel@tonic-gate 
2516*766Scarlsonj /*
2517*766Scarlsonj  * Look for zones running on the main system that are using this root (or any
2518*766Scarlsonj  * subdirectory of it).  Return B_TRUE and print an error if a conflicting zone
2519*766Scarlsonj  * is found or if we can't tell.
2520*766Scarlsonj  */
2521*766Scarlsonj static boolean_t
2522*766Scarlsonj duplicate_zone_root(zlog_t *zlogp, const char *rootpath)
25230Sstevel@tonic-gate {
2524*766Scarlsonj 	zoneid_t *zids = NULL;
2525*766Scarlsonj 	uint_t nzids = 0;
2526*766Scarlsonj 	boolean_t retv;
2527*766Scarlsonj 	int rlen, zlen;
2528*766Scarlsonj 	char zroot[MAXPATHLEN];
2529*766Scarlsonj 	char zonename[ZONENAME_MAX];
2530*766Scarlsonj 
2531*766Scarlsonj 	for (;;) {
2532*766Scarlsonj 		nzids += 10;
2533*766Scarlsonj 		zids = malloc(nzids * sizeof (*zids));
2534*766Scarlsonj 		if (zids == NULL) {
2535*766Scarlsonj 			zerror(zlogp, B_TRUE, "unable to allocate memory");
2536*766Scarlsonj 			return (B_TRUE);
2537*766Scarlsonj 		}
2538*766Scarlsonj 		if (zone_list(zids, &nzids) == 0)
2539*766Scarlsonj 			break;
2540*766Scarlsonj 		free(zids);
2541*766Scarlsonj 	}
2542*766Scarlsonj 	retv = B_FALSE;
2543*766Scarlsonj 	rlen = strlen(rootpath);
2544*766Scarlsonj 	while (nzids > 0) {
2545*766Scarlsonj 		/*
2546*766Scarlsonj 		 * Ignore errors; they just mean that the zone has disappeared
2547*766Scarlsonj 		 * while we were busy.
2548*766Scarlsonj 		 */
2549*766Scarlsonj 		if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot,
2550*766Scarlsonj 		    sizeof (zroot)) == -1)
2551*766Scarlsonj 			continue;
2552*766Scarlsonj 		zlen = strlen(zroot);
2553*766Scarlsonj 		if (zlen > rlen)
2554*766Scarlsonj 			zlen = rlen;
2555*766Scarlsonj 		if (strncmp(rootpath, zroot, zlen) == 0 &&
2556*766Scarlsonj 		    (zroot[zlen] == '\0' || zroot[zlen] == '/') &&
2557*766Scarlsonj 		    (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) {
2558*766Scarlsonj 			if (getzonenamebyid(zids[nzids], zonename,
2559*766Scarlsonj 			    sizeof (zonename)) == -1)
2560*766Scarlsonj 				(void) snprintf(zonename, sizeof (zonename),
2561*766Scarlsonj 				    "id %d", (int)zids[nzids]);
2562*766Scarlsonj 			zerror(zlogp, B_FALSE,
2563*766Scarlsonj 			    "zone root %s already in use by zone %s",
2564*766Scarlsonj 			    rootpath, zonename);
2565*766Scarlsonj 			retv = B_TRUE;
2566*766Scarlsonj 			break;
2567*766Scarlsonj 		}
2568*766Scarlsonj 	}
2569*766Scarlsonj 	free(zids);
2570*766Scarlsonj 	return (retv);
2571*766Scarlsonj }
2572*766Scarlsonj 
2573*766Scarlsonj /*
2574*766Scarlsonj  * Search for loopback mounts that use this same source node (same device and
2575*766Scarlsonj  * inode).  Return B_TRUE if there is one or if we can't tell.
2576*766Scarlsonj  */
2577*766Scarlsonj static boolean_t
2578*766Scarlsonj duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
2579*766Scarlsonj {
2580*766Scarlsonj 	struct stat64 rst, zst;
2581*766Scarlsonj 	struct mnttab *mnp;
2582*766Scarlsonj 
2583*766Scarlsonj 	if (stat64(rootpath, &rst) == -1) {
2584*766Scarlsonj 		zerror(zlogp, B_TRUE, "can't stat %s", rootpath);
2585*766Scarlsonj 		return (B_TRUE);
2586*766Scarlsonj 	}
2587*766Scarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
2588*766Scarlsonj 		return (B_TRUE);
2589*766Scarlsonj 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
2590*766Scarlsonj 		if (mnp->mnt_fstype == NULL ||
2591*766Scarlsonj 		    strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
2592*766Scarlsonj 			continue;
2593*766Scarlsonj 		/* We're looking at a loopback mount.  Stat it. */
2594*766Scarlsonj 		if (mnp->mnt_special != NULL &&
2595*766Scarlsonj 		    stat64(mnp->mnt_special, &zst) != -1 &&
2596*766Scarlsonj 		    rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
2597*766Scarlsonj 			zerror(zlogp, B_FALSE,
2598*766Scarlsonj 			    "zone root %s is reachable through %s",
2599*766Scarlsonj 			    rootpath, mnp->mnt_mountp);
2600*766Scarlsonj 			return (B_TRUE);
2601*766Scarlsonj 		}
2602*766Scarlsonj 	}
2603*766Scarlsonj 	return (B_FALSE);
2604*766Scarlsonj }
2605*766Scarlsonj 
2606*766Scarlsonj zoneid_t
2607*766Scarlsonj vplat_create(zlog_t *zlogp, boolean_t mount_cmd)
2608*766Scarlsonj {
2609*766Scarlsonj 	zoneid_t rval = -1;
26100Sstevel@tonic-gate 	priv_set_t *privs;
26110Sstevel@tonic-gate 	char rootpath[MAXPATHLEN];
26120Sstevel@tonic-gate 	char *rctlbuf = NULL;
2613*766Scarlsonj 	size_t rctlbufsz = 0;
2614*766Scarlsonj 	zoneid_t zoneid = -1;
26150Sstevel@tonic-gate 	int xerr;
2616*766Scarlsonj 	char *kzone;
2617*766Scarlsonj 	FILE *fp = NULL;
26180Sstevel@tonic-gate 
26190Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
26200Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone root");
26210Sstevel@tonic-gate 		return (-1);
26220Sstevel@tonic-gate 	}
2623*766Scarlsonj 	if (zonecfg_in_alt_root())
2624*766Scarlsonj 		resolve_lofs(zlogp, rootpath, sizeof (rootpath));
26250Sstevel@tonic-gate 
26260Sstevel@tonic-gate 	if ((privs = priv_allocset()) == NULL) {
26270Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
26280Sstevel@tonic-gate 		return (-1);
26290Sstevel@tonic-gate 	}
26300Sstevel@tonic-gate 	priv_emptyset(privs);
26310Sstevel@tonic-gate 	if (zonecfg_get_privset(privs) != Z_OK) {
26320Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "Failed to initialize privileges");
26330Sstevel@tonic-gate 		goto error;
26340Sstevel@tonic-gate 	}
2635*766Scarlsonj 	if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
26360Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "Unable to get list of rctls");
26370Sstevel@tonic-gate 		goto error;
26380Sstevel@tonic-gate 	}
26390Sstevel@tonic-gate 
2640*766Scarlsonj 	kzone = zone_name;
2641*766Scarlsonj 
2642*766Scarlsonj 	/*
2643*766Scarlsonj 	 * We must do this scan twice.  First, we look for zones running on the
2644*766Scarlsonj 	 * main system that are using this root (or any subdirectory of it).
2645*766Scarlsonj 	 * Next, we reduce to the shortest path and search for loopback mounts
2646*766Scarlsonj 	 * that use this same source node (same device and inode).
2647*766Scarlsonj 	 */
2648*766Scarlsonj 	if (duplicate_zone_root(zlogp, rootpath))
2649*766Scarlsonj 		goto error;
2650*766Scarlsonj 	if (duplicate_reachable_path(zlogp, rootpath))
2651*766Scarlsonj 		goto error;
2652*766Scarlsonj 
2653*766Scarlsonj 	if (mount_cmd) {
2654*766Scarlsonj 		root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE);
2655*766Scarlsonj 
2656*766Scarlsonj 		/*
2657*766Scarlsonj 		 * Forge up a special root for this zone.  When a zone is
2658*766Scarlsonj 		 * mounted, we can't let the zone have its own root because the
2659*766Scarlsonj 		 * tools that will be used in this "scratch zone" need access
2660*766Scarlsonj 		 * to both the zone's resources and the running machine's
2661*766Scarlsonj 		 * executables.
2662*766Scarlsonj 		 *
2663*766Scarlsonj 		 * Note that the mkdir here also catches read-only filesystems.
2664*766Scarlsonj 		 */
2665*766Scarlsonj 		if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) {
2666*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", rootpath);
2667*766Scarlsonj 			goto error;
2668*766Scarlsonj 		}
2669*766Scarlsonj 		if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0)
2670*766Scarlsonj 			goto error;
2671*766Scarlsonj 	}
2672*766Scarlsonj 
2673*766Scarlsonj 	if (zonecfg_in_alt_root()) {
2674*766Scarlsonj 		/*
2675*766Scarlsonj 		 * If we are mounting up a zone in an alternate root partition,
2676*766Scarlsonj 		 * then we have some additional work to do before starting the
2677*766Scarlsonj 		 * zone.  First, resolve the root path down so that we're not
2678*766Scarlsonj 		 * fooled by duplicates.  Then forge up an internal name for
2679*766Scarlsonj 		 * the zone.
2680*766Scarlsonj 		 */
2681*766Scarlsonj 		if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) {
2682*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot open mapfile");
2683*766Scarlsonj 			goto error;
2684*766Scarlsonj 		}
2685*766Scarlsonj 		if (zonecfg_lock_scratch(fp) != 0) {
2686*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
2687*766Scarlsonj 			goto error;
2688*766Scarlsonj 		}
2689*766Scarlsonj 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
2690*766Scarlsonj 		    NULL, 0) == 0) {
2691*766Scarlsonj 			zerror(zlogp, B_FALSE, "scratch zone already running");
2692*766Scarlsonj 			goto error;
2693*766Scarlsonj 		}
2694*766Scarlsonj 		/* This is the preferred name */
2695*766Scarlsonj 		(void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s",
2696*766Scarlsonj 		    zone_name);
2697*766Scarlsonj 		srandom(getpid());
2698*766Scarlsonj 		while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL,
2699*766Scarlsonj 		    0) == 0) {
2700*766Scarlsonj 			/* This is just an arbitrary name; note "." usage */
2701*766Scarlsonj 			(void) snprintf(kernzone, sizeof (kernzone),
2702*766Scarlsonj 			    "SUNWlu.%08lX%08lX", random(), random());
2703*766Scarlsonj 		}
2704*766Scarlsonj 		kzone = kernzone;
2705*766Scarlsonj 	}
2706*766Scarlsonj 
27070Sstevel@tonic-gate 	xerr = 0;
2708*766Scarlsonj 	if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
27090Sstevel@tonic-gate 	    rctlbufsz, &xerr)) == -1) {
27100Sstevel@tonic-gate 		if (xerr == ZE_AREMOUNTS) {
27110Sstevel@tonic-gate 			if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
27120Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
27130Sstevel@tonic-gate 				    "An unknown file-system is mounted on "
27140Sstevel@tonic-gate 				    "a subdirectory of %s", rootpath);
27150Sstevel@tonic-gate 			} else {
27160Sstevel@tonic-gate 
27170Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
27180Sstevel@tonic-gate 				    "These file-systems are mounted on "
27190Sstevel@tonic-gate 				    "subdirectories of %s:", rootpath);
27200Sstevel@tonic-gate 				(void) zonecfg_find_mounts(rootpath,
27210Sstevel@tonic-gate 				    prtmount, zlogp);
27220Sstevel@tonic-gate 			}
27230Sstevel@tonic-gate 		} else if (xerr == ZE_CHROOTED) {
27240Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s: "
27250Sstevel@tonic-gate 			    "cannot create a zone from a chrooted "
27260Sstevel@tonic-gate 			    "environment", "zone_create");
27270Sstevel@tonic-gate 		} else {
27280Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s failed", "zone_create");
27290Sstevel@tonic-gate 		}
27300Sstevel@tonic-gate 		goto error;
27310Sstevel@tonic-gate 	}
2732*766Scarlsonj 
2733*766Scarlsonj 	if (zonecfg_in_alt_root() &&
2734*766Scarlsonj 	    zonecfg_add_scratch(fp, zone_name, kernzone,
2735*766Scarlsonj 	    zonecfg_get_root()) == -1) {
2736*766Scarlsonj 		zerror(zlogp, B_TRUE, "cannot add mapfile entry");
2737*766Scarlsonj 		goto error;
2738*766Scarlsonj 	}
2739*766Scarlsonj 
27400Sstevel@tonic-gate 	/*
2741*766Scarlsonj 	 * The following is a warning, not an error, and is not performed when
2742*766Scarlsonj 	 * merely mounting a zone for administrative use.
27430Sstevel@tonic-gate 	 */
2744*766Scarlsonj 	if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0)
27450Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to "
27460Sstevel@tonic-gate 		    "requested pool; using default pool.");
2747*766Scarlsonj 	rval = zoneid;
2748*766Scarlsonj 	zoneid = -1;
2749*766Scarlsonj 
27500Sstevel@tonic-gate error:
2751*766Scarlsonj 	if (zoneid != -1)
2752*766Scarlsonj 		(void) zone_destroy(zoneid);
27530Sstevel@tonic-gate 	if (rctlbuf != NULL)
27540Sstevel@tonic-gate 		free(rctlbuf);
27550Sstevel@tonic-gate 	priv_freeset(privs);
2756*766Scarlsonj 	if (fp != NULL)
2757*766Scarlsonj 		zonecfg_close_scratch(fp);
2758*766Scarlsonj 	lofs_discard_mnttab();
27590Sstevel@tonic-gate 	return (rval);
27600Sstevel@tonic-gate }
27610Sstevel@tonic-gate 
27620Sstevel@tonic-gate int
2763*766Scarlsonj vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd)
27640Sstevel@tonic-gate {
2765*766Scarlsonj 	if (create_dev_files(zlogp) != 0 ||
2766*766Scarlsonj 	    mount_filesystems(zlogp, mount_cmd) != 0) {
2767*766Scarlsonj 		lofs_discard_mnttab();
27680Sstevel@tonic-gate 		return (-1);
2769*766Scarlsonj 	}
2770*766Scarlsonj 	if (!mount_cmd && (devfsadm_register(zlogp) != 0 ||
2771*766Scarlsonj 	    configure_network_interfaces(zlogp) != 0)) {
2772*766Scarlsonj 		lofs_discard_mnttab();
27730Sstevel@tonic-gate 		return (-1);
2774*766Scarlsonj 	}
2775*766Scarlsonj 	lofs_discard_mnttab();
27760Sstevel@tonic-gate 	return (0);
27770Sstevel@tonic-gate }
27780Sstevel@tonic-gate 
2779*766Scarlsonj static int
2780*766Scarlsonj lu_root_teardown(zlog_t *zlogp)
2781*766Scarlsonj {
2782*766Scarlsonj 	char zroot[MAXPATHLEN];
2783*766Scarlsonj 
2784*766Scarlsonj 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
2785*766Scarlsonj 		zerror(zlogp, B_FALSE, "unable to determine zone root");
2786*766Scarlsonj 		return (-1);
2787*766Scarlsonj 	}
2788*766Scarlsonj 	root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
2789*766Scarlsonj 
2790*766Scarlsonj 	/*
2791*766Scarlsonj 	 * At this point, the processes are gone, the filesystems (save the
2792*766Scarlsonj 	 * root) are unmounted, and the zone is on death row.  But there may
2793*766Scarlsonj 	 * still be creds floating about in the system that reference the
2794*766Scarlsonj 	 * zone_t, and which pin down zone_rootvp causing this call to fail
2795*766Scarlsonj 	 * with EBUSY.  Thus, we try for a little while before just giving up.
2796*766Scarlsonj 	 * (How I wish this were not true, and umount2 just did the right
2797*766Scarlsonj 	 * thing, or tmpfs supported MS_FORCE This is a gross hack.)
2798*766Scarlsonj 	 */
2799*766Scarlsonj 	if (umount2(zroot, MS_FORCE) != 0) {
2800*766Scarlsonj 		if (errno == ENOTSUP && umount2(zroot, 0) == 0)
2801*766Scarlsonj 			goto unmounted;
2802*766Scarlsonj 		if (errno == EBUSY) {
2803*766Scarlsonj 			int tries = 10;
2804*766Scarlsonj 
2805*766Scarlsonj 			while (--tries >= 0) {
2806*766Scarlsonj 				(void) sleep(1);
2807*766Scarlsonj 				if (umount2(zroot, 0) == 0)
2808*766Scarlsonj 					goto unmounted;
2809*766Scarlsonj 				if (errno != EBUSY)
2810*766Scarlsonj 					break;
2811*766Scarlsonj 			}
2812*766Scarlsonj 		}
2813*766Scarlsonj 		zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot);
2814*766Scarlsonj 		return (-1);
2815*766Scarlsonj 	}
2816*766Scarlsonj unmounted:
2817*766Scarlsonj 
2818*766Scarlsonj 	/*
2819*766Scarlsonj 	 * Only zones in an alternate root environment have scratch zone
2820*766Scarlsonj 	 * entries.
2821*766Scarlsonj 	 */
2822*766Scarlsonj 	if (zonecfg_in_alt_root()) {
2823*766Scarlsonj 		FILE *fp;
2824*766Scarlsonj 		int retv;
2825*766Scarlsonj 
2826*766Scarlsonj 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
2827*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot open mapfile");
2828*766Scarlsonj 			return (-1);
2829*766Scarlsonj 		}
2830*766Scarlsonj 		retv = -1;
2831*766Scarlsonj 		if (zonecfg_lock_scratch(fp) != 0)
2832*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
2833*766Scarlsonj 		else if (zonecfg_delete_scratch(fp, kernzone) != 0)
2834*766Scarlsonj 			zerror(zlogp, B_TRUE, "cannot delete map entry");
2835*766Scarlsonj 		else
2836*766Scarlsonj 			retv = 0;
2837*766Scarlsonj 		zonecfg_close_scratch(fp);
2838*766Scarlsonj 		return (retv);
2839*766Scarlsonj 	} else {
2840*766Scarlsonj 		return (0);
2841*766Scarlsonj 	}
2842*766Scarlsonj }
2843*766Scarlsonj 
28440Sstevel@tonic-gate int
2845*766Scarlsonj vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd)
28460Sstevel@tonic-gate {
2847*766Scarlsonj 	char *kzone;
28480Sstevel@tonic-gate 	zoneid_t zoneid;
28490Sstevel@tonic-gate 
2850*766Scarlsonj 	kzone = zone_name;
2851*766Scarlsonj 	if (zonecfg_in_alt_root()) {
2852*766Scarlsonj 		FILE *fp;
2853*766Scarlsonj 
2854*766Scarlsonj 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
2855*766Scarlsonj 			zerror(zlogp, B_TRUE, "unable to open map file");
2856*766Scarlsonj 			goto error;
2857*766Scarlsonj 		}
2858*766Scarlsonj 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
2859*766Scarlsonj 		    kernzone, sizeof (kernzone)) != 0) {
2860*766Scarlsonj 			zerror(zlogp, B_FALSE, "unable to find scratch zone");
2861*766Scarlsonj 			zonecfg_close_scratch(fp);
2862*766Scarlsonj 			goto error;
2863*766Scarlsonj 		}
2864*766Scarlsonj 		zonecfg_close_scratch(fp);
2865*766Scarlsonj 		kzone = kernzone;
2866*766Scarlsonj 	}
2867*766Scarlsonj 
2868*766Scarlsonj 	if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) {
28690Sstevel@tonic-gate 		if (!bringup_failure_recovery)
28700Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "unable to get zoneid");
2871*766Scarlsonj 		if (unmount_cmd)
2872*766Scarlsonj 			(void) lu_root_teardown(zlogp);
28730Sstevel@tonic-gate 		goto error;
28740Sstevel@tonic-gate 	}
28750Sstevel@tonic-gate 
28760Sstevel@tonic-gate 	if (zone_shutdown(zoneid) != 0) {
28770Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to shutdown zone");
28780Sstevel@tonic-gate 		goto error;
28790Sstevel@tonic-gate 	}
28800Sstevel@tonic-gate 
2881*766Scarlsonj 	if (!unmount_cmd && devfsadm_unregister(zlogp) != 0)
28820Sstevel@tonic-gate 		goto error;
28830Sstevel@tonic-gate 
2884*766Scarlsonj 	if (!unmount_cmd &&
2885*766Scarlsonj 	    unconfigure_network_interfaces(zlogp, zoneid) != 0) {
28860Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
28870Sstevel@tonic-gate 		    "unable to unconfigure network interfaces in zone");
28880Sstevel@tonic-gate 		goto error;
28890Sstevel@tonic-gate 	}
28900Sstevel@tonic-gate 
2891*766Scarlsonj 	if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) {
28920Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to abort TCP connections");
28930Sstevel@tonic-gate 		goto error;
28940Sstevel@tonic-gate 	}
28950Sstevel@tonic-gate 
2896*766Scarlsonj 	if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) {
28970Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
28980Sstevel@tonic-gate 		    "unable to unmount file systems in zone");
28990Sstevel@tonic-gate 		goto error;
29000Sstevel@tonic-gate 	}
29010Sstevel@tonic-gate 
29020Sstevel@tonic-gate 	if (zone_destroy(zoneid) != 0) {
29030Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to destroy zone");
29040Sstevel@tonic-gate 		goto error;
29050Sstevel@tonic-gate 	}
29060Sstevel@tonic-gate 
2907*766Scarlsonj 	/*
2908*766Scarlsonj 	 * Special teardown for alternate boot environments: remove the tmpfs
2909*766Scarlsonj 	 * root for the zone and then remove it from the map file.
2910*766Scarlsonj 	 */
2911*766Scarlsonj 	if (unmount_cmd && lu_root_teardown(zlogp) != 0)
2912*766Scarlsonj 		goto error;
2913*766Scarlsonj 
2914*766Scarlsonj 	if (!unmount_cmd)
2915*766Scarlsonj 		destroy_console_slave();
2916*766Scarlsonj 
2917*766Scarlsonj 	lofs_discard_mnttab();
29180Sstevel@tonic-gate 	return (0);
29190Sstevel@tonic-gate 
29200Sstevel@tonic-gate error:
2921*766Scarlsonj 	lofs_discard_mnttab();
29220Sstevel@tonic-gate 	return (-1);
29230Sstevel@tonic-gate }
2924