10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51676Sjpk * Common Development and Distribution License (the "License").
61676Sjpk * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
21390Sraf
220Sstevel@tonic-gate /*
2312273SCasper.Dik@Sun.COM * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate * Zones
280Sstevel@tonic-gate *
290Sstevel@tonic-gate * A zone is a named collection of processes, namespace constraints,
300Sstevel@tonic-gate * and other system resources which comprise a secure and manageable
310Sstevel@tonic-gate * application containment facility.
320Sstevel@tonic-gate *
330Sstevel@tonic-gate * Zones (represented by the reference counted zone_t) are tracked in
340Sstevel@tonic-gate * the kernel in the zonehash. Elsewhere in the kernel, Zone IDs
350Sstevel@tonic-gate * (zoneid_t) are used to track zone association. Zone IDs are
360Sstevel@tonic-gate * dynamically generated when the zone is created; if a persistent
370Sstevel@tonic-gate * identifier is needed (core files, accounting logs, audit trail,
380Sstevel@tonic-gate * etc.), the zone name should be used.
390Sstevel@tonic-gate *
400Sstevel@tonic-gate *
410Sstevel@tonic-gate * Global Zone:
420Sstevel@tonic-gate *
430Sstevel@tonic-gate * The global zone (zoneid 0) is automatically associated with all
440Sstevel@tonic-gate * system resources that have not been bound to a user-created zone.
450Sstevel@tonic-gate * This means that even systems where zones are not in active use
460Sstevel@tonic-gate * have a global zone, and all processes, mounts, etc. are
470Sstevel@tonic-gate * associated with that zone. The global zone is generally
480Sstevel@tonic-gate * unconstrained in terms of privileges and access, though the usual
490Sstevel@tonic-gate * credential and privilege based restrictions apply.
500Sstevel@tonic-gate *
510Sstevel@tonic-gate *
520Sstevel@tonic-gate * Zone States:
530Sstevel@tonic-gate *
540Sstevel@tonic-gate * The states in which a zone may be in and the transitions are as
550Sstevel@tonic-gate * follows:
560Sstevel@tonic-gate *
570Sstevel@tonic-gate * ZONE_IS_UNINITIALIZED: primordial state for a zone. The partially
580Sstevel@tonic-gate * initialized zone is added to the list of active zones on the system but
590Sstevel@tonic-gate * isn't accessible.
600Sstevel@tonic-gate *
615880Snordmark * ZONE_IS_INITIALIZED: Initialization complete except the ZSD callbacks are
625880Snordmark * not yet completed. Not possible to enter the zone, but attributes can
635880Snordmark * be retrieved.
645880Snordmark *
650Sstevel@tonic-gate * ZONE_IS_READY: zsched (the kernel dummy process for a zone) is
660Sstevel@tonic-gate * ready. The zone is made visible after the ZSD constructor callbacks are
670Sstevel@tonic-gate * executed. A zone remains in this state until it transitions into
680Sstevel@tonic-gate * the ZONE_IS_BOOTING state as a result of a call to zone_boot().
690Sstevel@tonic-gate *
700Sstevel@tonic-gate * ZONE_IS_BOOTING: in this shortlived-state, zsched attempts to start
710Sstevel@tonic-gate * init. Should that fail, the zone proceeds to the ZONE_IS_SHUTTING_DOWN
720Sstevel@tonic-gate * state.
730Sstevel@tonic-gate *
740Sstevel@tonic-gate * ZONE_IS_RUNNING: The zone is open for business: zsched has
750Sstevel@tonic-gate * successfully started init. A zone remains in this state until
760Sstevel@tonic-gate * zone_shutdown() is called.
770Sstevel@tonic-gate *
780Sstevel@tonic-gate * ZONE_IS_SHUTTING_DOWN: zone_shutdown() has been called, the system is
790Sstevel@tonic-gate * killing all processes running in the zone. The zone remains
800Sstevel@tonic-gate * in this state until there are no more user processes running in the zone.
810Sstevel@tonic-gate * zone_create(), zone_enter(), and zone_destroy() on this zone will fail.
820Sstevel@tonic-gate * Since zone_shutdown() is restartable, it may be called successfully
830Sstevel@tonic-gate * multiple times for the same zone_t. Setting of the zone's state to
840Sstevel@tonic-gate * ZONE_IS_SHUTTING_DOWN is synchronized with mounts, so VOP_MOUNT() may check
850Sstevel@tonic-gate * the zone's status without worrying about it being a moving target.
860Sstevel@tonic-gate *
870Sstevel@tonic-gate * ZONE_IS_EMPTY: zone_shutdown() has been called, and there
880Sstevel@tonic-gate * are no more user processes in the zone. The zone remains in this
890Sstevel@tonic-gate * state until there are no more kernel threads associated with the
900Sstevel@tonic-gate * zone. zone_create(), zone_enter(), and zone_destroy() on this zone will
910Sstevel@tonic-gate * fail.
920Sstevel@tonic-gate *
930Sstevel@tonic-gate * ZONE_IS_DOWN: All kernel threads doing work on behalf of the zone
940Sstevel@tonic-gate * have exited. zone_shutdown() returns. Henceforth it is not possible to
950Sstevel@tonic-gate * join the zone or create kernel threads therein.
960Sstevel@tonic-gate *
970Sstevel@tonic-gate * ZONE_IS_DYING: zone_destroy() has been called on the zone; zone
980Sstevel@tonic-gate * remains in this state until zsched exits. Calls to zone_find_by_*()
990Sstevel@tonic-gate * return NULL from now on.
1000Sstevel@tonic-gate *
1010Sstevel@tonic-gate * ZONE_IS_DEAD: zsched has exited (zone_ntasks == 0). There are no
1020Sstevel@tonic-gate * processes or threads doing work on behalf of the zone. The zone is
1030Sstevel@tonic-gate * removed from the list of active zones. zone_destroy() returns, and
1040Sstevel@tonic-gate * the zone can be recreated.
1050Sstevel@tonic-gate *
1060Sstevel@tonic-gate * ZONE_IS_FREE (internal state): zone_ref goes to 0, ZSD destructor
1070Sstevel@tonic-gate * callbacks are executed, and all memory associated with the zone is
1080Sstevel@tonic-gate * freed.
1090Sstevel@tonic-gate *
1100Sstevel@tonic-gate * Threads can wait for the zone to enter a requested state by using
1110Sstevel@tonic-gate * zone_status_wait() or zone_status_timedwait() with the desired
1120Sstevel@tonic-gate * state passed in as an argument. Zone state transitions are
1130Sstevel@tonic-gate * uni-directional; it is not possible to move back to an earlier state.
1140Sstevel@tonic-gate *
1150Sstevel@tonic-gate *
1160Sstevel@tonic-gate * Zone-Specific Data:
1170Sstevel@tonic-gate *
1180Sstevel@tonic-gate * Subsystems needing to maintain zone-specific data can store that
1190Sstevel@tonic-gate * data using the ZSD mechanism. This provides a zone-specific data
1200Sstevel@tonic-gate * store, similar to thread-specific data (see pthread_getspecific(3C)
1210Sstevel@tonic-gate * or the TSD code in uts/common/disp/thread.c. Also, ZSD can be used
1220Sstevel@tonic-gate * to register callbacks to be invoked when a zone is created, shut
1230Sstevel@tonic-gate * down, or destroyed. This can be used to initialize zone-specific
1240Sstevel@tonic-gate * data for new zones and to clean up when zones go away.
1250Sstevel@tonic-gate *
1260Sstevel@tonic-gate *
1270Sstevel@tonic-gate * Data Structures:
1280Sstevel@tonic-gate *
1290Sstevel@tonic-gate * The per-zone structure (zone_t) is reference counted, and freed
1300Sstevel@tonic-gate * when all references are released. zone_hold and zone_rele can be
1310Sstevel@tonic-gate * used to adjust the reference count. In addition, reference counts
1320Sstevel@tonic-gate * associated with the cred_t structure are tracked separately using
1330Sstevel@tonic-gate * zone_cred_hold and zone_cred_rele.
1340Sstevel@tonic-gate *
1350Sstevel@tonic-gate * Pointers to active zone_t's are stored in two hash tables; one
1360Sstevel@tonic-gate * for searching by id, the other for searching by name. Lookups
1370Sstevel@tonic-gate * can be performed on either basis, using zone_find_by_id and
1380Sstevel@tonic-gate * zone_find_by_name. Both return zone_t pointers with the zone
1390Sstevel@tonic-gate * held, so zone_rele should be called when the pointer is no longer
1400Sstevel@tonic-gate * needed. Zones can also be searched by path; zone_find_by_path
1410Sstevel@tonic-gate * returns the zone with which a path name is associated (global
1420Sstevel@tonic-gate * zone if the path is not within some other zone's file system
1430Sstevel@tonic-gate * hierarchy). This currently requires iterating through each zone,
1440Sstevel@tonic-gate * so it is slower than an id or name search via a hash table.
1450Sstevel@tonic-gate *
1460Sstevel@tonic-gate *
1470Sstevel@tonic-gate * Locking:
1480Sstevel@tonic-gate *
1490Sstevel@tonic-gate * zonehash_lock: This is a top-level global lock used to protect the
1500Sstevel@tonic-gate * zone hash tables and lists. Zones cannot be created or destroyed
1510Sstevel@tonic-gate * while this lock is held.
1520Sstevel@tonic-gate * zone_status_lock: This is a global lock protecting zone state.
1530Sstevel@tonic-gate * Zones cannot change state while this lock is held. It also
1540Sstevel@tonic-gate * protects the list of kernel threads associated with a zone.
1550Sstevel@tonic-gate * zone_lock: This is a per-zone lock used to protect several fields of
1560Sstevel@tonic-gate * the zone_t (see <sys/zone.h> for details). In addition, holding
1570Sstevel@tonic-gate * this lock means that the zone cannot go away.
1583247Sgjelinek * zone_nlwps_lock: This is a per-zone lock used to protect the fields
1593247Sgjelinek * related to the zone.max-lwps rctl.
1603247Sgjelinek * zone_mem_lock: This is a per-zone lock used to protect the fields
1613247Sgjelinek * related to the zone.max-locked-memory and zone.max-swap rctls.
16212633Sjohn.levon@sun.com * zone_rctl_lock: This is a per-zone lock used to protect other rctls,
16312633Sjohn.levon@sun.com * currently just max_lofi
1640Sstevel@tonic-gate * zsd_key_lock: This is a global lock protecting the key state for ZSD.
1650Sstevel@tonic-gate * zone_deathrow_lock: This is a global lock protecting the "deathrow"
1660Sstevel@tonic-gate * list (a list of zones in the ZONE_IS_DEAD state).
1670Sstevel@tonic-gate *
1680Sstevel@tonic-gate * Ordering requirements:
1690Sstevel@tonic-gate * pool_lock --> cpu_lock --> zonehash_lock --> zone_status_lock -->
1700Sstevel@tonic-gate * zone_lock --> zsd_key_lock --> pidlock --> p_lock
1710Sstevel@tonic-gate *
1723247Sgjelinek * When taking zone_mem_lock or zone_nlwps_lock, the lock ordering is:
1733247Sgjelinek * zonehash_lock --> a_lock --> pidlock --> p_lock --> zone_mem_lock
17412725SMenno.Lageman@Sun.COM * zonehash_lock --> a_lock --> pidlock --> p_lock --> zone_nlwps_lock
1753247Sgjelinek *
1760Sstevel@tonic-gate * Blocking memory allocations are permitted while holding any of the
1770Sstevel@tonic-gate * zone locks.
1780Sstevel@tonic-gate *
1790Sstevel@tonic-gate *
1800Sstevel@tonic-gate * System Call Interface:
1810Sstevel@tonic-gate *
1820Sstevel@tonic-gate * The zone subsystem can be managed and queried from user level with
1830Sstevel@tonic-gate * the following system calls (all subcodes of the primary "zone"
1840Sstevel@tonic-gate * system call):
1850Sstevel@tonic-gate * - zone_create: creates a zone with selected attributes (name,
186789Sahrens * root path, privileges, resource controls, ZFS datasets)
1870Sstevel@tonic-gate * - zone_enter: allows the current process to enter a zone
1880Sstevel@tonic-gate * - zone_getattr: reports attributes of a zone
1892267Sdp * - zone_setattr: set attributes of a zone
1902267Sdp * - zone_boot: set 'init' running for the zone
1910Sstevel@tonic-gate * - zone_list: lists all zones active in the system
1920Sstevel@tonic-gate * - zone_lookup: looks up zone id based on name
1930Sstevel@tonic-gate * - zone_shutdown: initiates shutdown process (see states above)
1940Sstevel@tonic-gate * - zone_destroy: completes shutdown process (see states above)
1950Sstevel@tonic-gate *
1960Sstevel@tonic-gate */
1970Sstevel@tonic-gate
1980Sstevel@tonic-gate #include <sys/priv_impl.h>
1990Sstevel@tonic-gate #include <sys/cred.h>
2000Sstevel@tonic-gate #include <c2/audit.h>
2010Sstevel@tonic-gate #include <sys/debug.h>
2020Sstevel@tonic-gate #include <sys/file.h>
2030Sstevel@tonic-gate #include <sys/kmem.h>
2043247Sgjelinek #include <sys/kstat.h>
2050Sstevel@tonic-gate #include <sys/mutex.h>
2061676Sjpk #include <sys/note.h>
2070Sstevel@tonic-gate #include <sys/pathname.h>
2080Sstevel@tonic-gate #include <sys/proc.h>
2090Sstevel@tonic-gate #include <sys/project.h>
2101166Sdstaff #include <sys/sysevent.h>
2110Sstevel@tonic-gate #include <sys/task.h>
2120Sstevel@tonic-gate #include <sys/systm.h>
2130Sstevel@tonic-gate #include <sys/types.h>
2140Sstevel@tonic-gate #include <sys/utsname.h>
2150Sstevel@tonic-gate #include <sys/vnode.h>
2160Sstevel@tonic-gate #include <sys/vfs.h>
2170Sstevel@tonic-gate #include <sys/systeminfo.h>
2180Sstevel@tonic-gate #include <sys/policy.h>
2190Sstevel@tonic-gate #include <sys/cred_impl.h>
2200Sstevel@tonic-gate #include <sys/contract_impl.h>
2210Sstevel@tonic-gate #include <sys/contract/process_impl.h>
2220Sstevel@tonic-gate #include <sys/class.h>
2230Sstevel@tonic-gate #include <sys/pool.h>
2240Sstevel@tonic-gate #include <sys/pool_pset.h>
2250Sstevel@tonic-gate #include <sys/pset.h>
226*13096SJordan.Vaughan@Sun.com #include <sys/strlog.h>
2270Sstevel@tonic-gate #include <sys/sysmacros.h>
2280Sstevel@tonic-gate #include <sys/callb.h>
2290Sstevel@tonic-gate #include <sys/vmparam.h>
2300Sstevel@tonic-gate #include <sys/corectl.h>
2312677Sml93401 #include <sys/ipc_impl.h>
23212273SCasper.Dik@Sun.COM #include <sys/klpd.h>
2330Sstevel@tonic-gate
2340Sstevel@tonic-gate #include <sys/door.h>
2350Sstevel@tonic-gate #include <sys/cpuvar.h>
2365880Snordmark #include <sys/sdt.h>
2370Sstevel@tonic-gate
2380Sstevel@tonic-gate #include <sys/uadmin.h>
2390Sstevel@tonic-gate #include <sys/session.h>
2400Sstevel@tonic-gate #include <sys/cmn_err.h>
2410Sstevel@tonic-gate #include <sys/modhash.h>
2422267Sdp #include <sys/sunddi.h>
2430Sstevel@tonic-gate #include <sys/nvpair.h>
2440Sstevel@tonic-gate #include <sys/rctl.h>
2450Sstevel@tonic-gate #include <sys/fss.h>
2462712Snn35248 #include <sys/brand.h>
2470Sstevel@tonic-gate #include <sys/zone.h>
2483448Sdh155122 #include <net/if.h>
2493792Sakolb #include <sys/cpucaps.h>
2503247Sgjelinek #include <vm/seg.h>
25110616SSebastien.Roy@Sun.COM #include <sys/mac.h>
25210616SSebastien.Roy@Sun.COM
253*13096SJordan.Vaughan@Sun.com /*
254*13096SJordan.Vaughan@Sun.com * This constant specifies the number of seconds that threads waiting for
255*13096SJordan.Vaughan@Sun.com * subsystems to release a zone's general-purpose references will wait before
256*13096SJordan.Vaughan@Sun.com * they log the zone's reference counts. The constant's value shouldn't
257*13096SJordan.Vaughan@Sun.com * be so small that reference counts are unnecessarily reported for zones
258*13096SJordan.Vaughan@Sun.com * whose references are slowly released. On the other hand, it shouldn't be so
259*13096SJordan.Vaughan@Sun.com * large that users reboot their systems out of frustration over hung zones
260*13096SJordan.Vaughan@Sun.com * before the system logs the zones' reference counts.
261*13096SJordan.Vaughan@Sun.com */
262*13096SJordan.Vaughan@Sun.com #define ZONE_DESTROY_TIMEOUT_SECS 60
263*13096SJordan.Vaughan@Sun.com
26410616SSebastien.Roy@Sun.COM /* List of data link IDs which are accessible from the zone */
26510616SSebastien.Roy@Sun.COM typedef struct zone_dl {
26610616SSebastien.Roy@Sun.COM datalink_id_t zdl_id;
26712748SSowmini.Varadhan@oracle.COM nvlist_t *zdl_net;
26810616SSebastien.Roy@Sun.COM list_node_t zdl_linkage;
26910616SSebastien.Roy@Sun.COM } zone_dl_t;
2703247Sgjelinek
2710Sstevel@tonic-gate /*
2720Sstevel@tonic-gate * cv used to signal that all references to the zone have been released. This
2730Sstevel@tonic-gate * needs to be global since there may be multiple waiters, and the first to
2740Sstevel@tonic-gate * wake up will free the zone_t, hence we cannot use zone->zone_cv.
2750Sstevel@tonic-gate */
2760Sstevel@tonic-gate static kcondvar_t zone_destroy_cv;
2770Sstevel@tonic-gate /*
2780Sstevel@tonic-gate * Lock used to serialize access to zone_cv. This could have been per-zone,
2790Sstevel@tonic-gate * but then we'd need another lock for zone_destroy_cv, and why bother?
2800Sstevel@tonic-gate */
2810Sstevel@tonic-gate static kmutex_t zone_status_lock;
2820Sstevel@tonic-gate
2830Sstevel@tonic-gate /*
2840Sstevel@tonic-gate * ZSD-related global variables.
2850Sstevel@tonic-gate */
2860Sstevel@tonic-gate static kmutex_t zsd_key_lock; /* protects the following two */
2870Sstevel@tonic-gate /*
2880Sstevel@tonic-gate * The next caller of zone_key_create() will be assigned a key of ++zsd_keyval.
2890Sstevel@tonic-gate */
2900Sstevel@tonic-gate static zone_key_t zsd_keyval = 0;
2910Sstevel@tonic-gate /*
2920Sstevel@tonic-gate * Global list of registered keys. We use this when a new zone is created.
2930Sstevel@tonic-gate */
2940Sstevel@tonic-gate static list_t zsd_registered_keys;
2950Sstevel@tonic-gate
2960Sstevel@tonic-gate int zone_hash_size = 256;
2971676Sjpk static mod_hash_t *zonehashbyname, *zonehashbyid, *zonehashbylabel;
2980Sstevel@tonic-gate static kmutex_t zonehash_lock;
2990Sstevel@tonic-gate static uint_t zonecount;
3000Sstevel@tonic-gate static id_space_t *zoneid_space;
3010Sstevel@tonic-gate
3020Sstevel@tonic-gate /*
3030Sstevel@tonic-gate * The global zone (aka zone0) is the all-seeing, all-knowing zone in which the
3040Sstevel@tonic-gate * kernel proper runs, and which manages all other zones.
3050Sstevel@tonic-gate *
3060Sstevel@tonic-gate * Although not declared as static, the variable "zone0" should not be used
3070Sstevel@tonic-gate * except for by code that needs to reference the global zone early on in boot,
3080Sstevel@tonic-gate * before it is fully initialized. All other consumers should use
3090Sstevel@tonic-gate * 'global_zone'.
3100Sstevel@tonic-gate */
3110Sstevel@tonic-gate zone_t zone0;
3120Sstevel@tonic-gate zone_t *global_zone = NULL; /* Set when the global zone is initialized */
3130Sstevel@tonic-gate
3140Sstevel@tonic-gate /*
3150Sstevel@tonic-gate * List of active zones, protected by zonehash_lock.
3160Sstevel@tonic-gate */
3170Sstevel@tonic-gate static list_t zone_active;
3180Sstevel@tonic-gate
3190Sstevel@tonic-gate /*
3200Sstevel@tonic-gate * List of destroyed zones that still have outstanding cred references.
3210Sstevel@tonic-gate * Used for debugging. Uses a separate lock to avoid lock ordering
3220Sstevel@tonic-gate * problems in zone_free.
3230Sstevel@tonic-gate */
3240Sstevel@tonic-gate static list_t zone_deathrow;
3250Sstevel@tonic-gate static kmutex_t zone_deathrow_lock;
3260Sstevel@tonic-gate
3270Sstevel@tonic-gate /* number of zones is limited by virtual interface limit in IP */
3280Sstevel@tonic-gate uint_t maxzones = 8192;
3290Sstevel@tonic-gate
3301166Sdstaff /* Event channel to sent zone state change notifications */
3311166Sdstaff evchan_t *zone_event_chan;
3321166Sdstaff
3331166Sdstaff /*
3341166Sdstaff * This table holds the mapping from kernel zone states to
3351166Sdstaff * states visible in the state notification API.
3361166Sdstaff * The idea is that we only expose "obvious" states and
3371166Sdstaff * do not expose states which are just implementation details.
3381166Sdstaff */
3391166Sdstaff const char *zone_status_table[] = {
3401166Sdstaff ZONE_EVENT_UNINITIALIZED, /* uninitialized */
3415880Snordmark ZONE_EVENT_INITIALIZED, /* initialized */
3421166Sdstaff ZONE_EVENT_READY, /* ready */
3431166Sdstaff ZONE_EVENT_READY, /* booting */
3441166Sdstaff ZONE_EVENT_RUNNING, /* running */
3451166Sdstaff ZONE_EVENT_SHUTTING_DOWN, /* shutting_down */
3461166Sdstaff ZONE_EVENT_SHUTTING_DOWN, /* empty */
3471166Sdstaff ZONE_EVENT_SHUTTING_DOWN, /* down */
3481166Sdstaff ZONE_EVENT_SHUTTING_DOWN, /* dying */
3491166Sdstaff ZONE_EVENT_UNINITIALIZED, /* dead */
3501166Sdstaff };
3511166Sdstaff
3520Sstevel@tonic-gate /*
353*13096SJordan.Vaughan@Sun.com * This array contains the names of the subsystems listed in zone_ref_subsys_t
354*13096SJordan.Vaughan@Sun.com * (see sys/zone.h).
355*13096SJordan.Vaughan@Sun.com */
356*13096SJordan.Vaughan@Sun.com static char *zone_ref_subsys_names[] = {
357*13096SJordan.Vaughan@Sun.com "NFS", /* ZONE_REF_NFS */
358*13096SJordan.Vaughan@Sun.com "NFSv4", /* ZONE_REF_NFSV4 */
359*13096SJordan.Vaughan@Sun.com "SMBFS", /* ZONE_REF_SMBFS */
360*13096SJordan.Vaughan@Sun.com "MNTFS", /* ZONE_REF_MNTFS */
361*13096SJordan.Vaughan@Sun.com "LOFI", /* ZONE_REF_LOFI */
362*13096SJordan.Vaughan@Sun.com "VFS", /* ZONE_REF_VFS */
363*13096SJordan.Vaughan@Sun.com "IPC" /* ZONE_REF_IPC */
364*13096SJordan.Vaughan@Sun.com };
365*13096SJordan.Vaughan@Sun.com
366*13096SJordan.Vaughan@Sun.com /*
3670Sstevel@tonic-gate * This isn't static so lint doesn't complain.
3680Sstevel@tonic-gate */
3690Sstevel@tonic-gate rctl_hndl_t rc_zone_cpu_shares;
3702768Ssl108498 rctl_hndl_t rc_zone_locked_mem;
3713247Sgjelinek rctl_hndl_t rc_zone_max_swap;
37212633Sjohn.levon@sun.com rctl_hndl_t rc_zone_max_lofi;
3733792Sakolb rctl_hndl_t rc_zone_cpu_cap;
3740Sstevel@tonic-gate rctl_hndl_t rc_zone_nlwps;
37512725SMenno.Lageman@Sun.COM rctl_hndl_t rc_zone_nprocs;
3762677Sml93401 rctl_hndl_t rc_zone_shmmax;
3772677Sml93401 rctl_hndl_t rc_zone_shmmni;
3782677Sml93401 rctl_hndl_t rc_zone_semmni;
3792677Sml93401 rctl_hndl_t rc_zone_msgmni;
3800Sstevel@tonic-gate /*
3810Sstevel@tonic-gate * Synchronization primitives used to synchronize between mounts and zone
3820Sstevel@tonic-gate * creation/destruction.
3830Sstevel@tonic-gate */
3840Sstevel@tonic-gate static int mounts_in_progress;
3850Sstevel@tonic-gate static kcondvar_t mount_cv;
3860Sstevel@tonic-gate static kmutex_t mount_lock;
3870Sstevel@tonic-gate
3882267Sdp const char * const zone_default_initname = "/sbin/init";
3891676Sjpk static char * const zone_prefix = "/zone/";
3900Sstevel@tonic-gate static int zone_shutdown(zoneid_t zoneid);
39110616SSebastien.Roy@Sun.COM static int zone_add_datalink(zoneid_t, datalink_id_t);
39210616SSebastien.Roy@Sun.COM static int zone_remove_datalink(zoneid_t, datalink_id_t);
39310616SSebastien.Roy@Sun.COM static int zone_list_datalink(zoneid_t, int *, datalink_id_t *);
39412748SSowmini.Varadhan@oracle.COM static int zone_set_network(zoneid_t, zone_net_data_t *);
39512748SSowmini.Varadhan@oracle.COM static int zone_get_network(zoneid_t, zone_net_data_t *);
3960Sstevel@tonic-gate
3975880Snordmark typedef boolean_t zsd_applyfn_t(kmutex_t *, boolean_t, zone_t *, zone_key_t);
3985880Snordmark
3995880Snordmark static void zsd_apply_all_zones(zsd_applyfn_t *, zone_key_t);
4005880Snordmark static void zsd_apply_all_keys(zsd_applyfn_t *, zone_t *);
4015880Snordmark static boolean_t zsd_apply_create(kmutex_t *, boolean_t, zone_t *, zone_key_t);
4025880Snordmark static boolean_t zsd_apply_shutdown(kmutex_t *, boolean_t, zone_t *,
4035880Snordmark zone_key_t);
4045880Snordmark static boolean_t zsd_apply_destroy(kmutex_t *, boolean_t, zone_t *, zone_key_t);
4055880Snordmark static boolean_t zsd_wait_for_creator(zone_t *, struct zsd_entry *,
4065880Snordmark kmutex_t *);
4075880Snordmark static boolean_t zsd_wait_for_inprogress(zone_t *, struct zsd_entry *,
4085880Snordmark kmutex_t *);
4095880Snordmark
4100Sstevel@tonic-gate /*
411813Sdp * Bump this number when you alter the zone syscall interfaces; this is
412813Sdp * because we need to have support for previous API versions in libc
413813Sdp * to support patching; libc calls into the kernel to determine this number.
414813Sdp *
415813Sdp * Version 1 of the API is the version originally shipped with Solaris 10
416813Sdp * Version 2 alters the zone_create system call in order to support more
417813Sdp * arguments by moving the args into a structure; and to do better
418813Sdp * error reporting when zone_create() fails.
419813Sdp * Version 3 alters the zone_create system call in order to support the
420813Sdp * import of ZFS datasets to zones.
4211676Sjpk * Version 4 alters the zone_create system call in order to support
4221676Sjpk * Trusted Extensions.
4232267Sdp * Version 5 alters the zone_boot system call, and converts its old
4242267Sdp * bootargs parameter to be set by the zone_setattr API instead.
4253448Sdh155122 * Version 6 adds the flag argument to zone_create.
426813Sdp */
4273448Sdh155122 static const int ZONE_SYSCALL_API_VERSION = 6;
428813Sdp
429813Sdp /*
4300Sstevel@tonic-gate * Certain filesystems (such as NFS and autofs) need to know which zone
4310Sstevel@tonic-gate * the mount is being placed in. Because of this, we need to be able to
4320Sstevel@tonic-gate * ensure that a zone isn't in the process of being created such that
4330Sstevel@tonic-gate * nfs_mount() thinks it is in the global zone, while by the time it
4340Sstevel@tonic-gate * gets added the list of mounted zones, it ends up on zoneA's mount
4350Sstevel@tonic-gate * list.
4360Sstevel@tonic-gate *
4370Sstevel@tonic-gate * The following functions: block_mounts()/resume_mounts() and
4380Sstevel@tonic-gate * mount_in_progress()/mount_completed() are used by zones and the VFS
4390Sstevel@tonic-gate * layer (respectively) to synchronize zone creation and new mounts.
4400Sstevel@tonic-gate *
4410Sstevel@tonic-gate * The semantics are like a reader-reader lock such that there may
4420Sstevel@tonic-gate * either be multiple mounts (or zone creations, if that weren't
4430Sstevel@tonic-gate * serialized by zonehash_lock) in progress at the same time, but not
4440Sstevel@tonic-gate * both.
4450Sstevel@tonic-gate *
4460Sstevel@tonic-gate * We use cv's so the user can ctrl-C out of the operation if it's
4470Sstevel@tonic-gate * taking too long.
4480Sstevel@tonic-gate *
4490Sstevel@tonic-gate * The semantics are such that there is unfair bias towards the
4500Sstevel@tonic-gate * "current" operation. This means that zone creations may starve if
4510Sstevel@tonic-gate * there is a rapid succession of new mounts coming in to the system, or
4520Sstevel@tonic-gate * there is a remote possibility that zones will be created at such a
4530Sstevel@tonic-gate * rate that new mounts will not be able to proceed.
4540Sstevel@tonic-gate */
4550Sstevel@tonic-gate /*
4560Sstevel@tonic-gate * Prevent new mounts from progressing to the point of calling
4570Sstevel@tonic-gate * VFS_MOUNT(). If there are already mounts in this "region", wait for
4580Sstevel@tonic-gate * them to complete.
4590Sstevel@tonic-gate */
4600Sstevel@tonic-gate static int
block_mounts(void)4610Sstevel@tonic-gate block_mounts(void)
4620Sstevel@tonic-gate {
4630Sstevel@tonic-gate int retval = 0;
4640Sstevel@tonic-gate
4650Sstevel@tonic-gate /*
4660Sstevel@tonic-gate * Since it may block for a long time, block_mounts() shouldn't be
4670Sstevel@tonic-gate * called with zonehash_lock held.
4680Sstevel@tonic-gate */
4690Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
4700Sstevel@tonic-gate mutex_enter(&mount_lock);
4710Sstevel@tonic-gate while (mounts_in_progress > 0) {
4720Sstevel@tonic-gate if (cv_wait_sig(&mount_cv, &mount_lock) == 0)
4730Sstevel@tonic-gate goto signaled;
4740Sstevel@tonic-gate }
4750Sstevel@tonic-gate /*
4760Sstevel@tonic-gate * A negative value of mounts_in_progress indicates that mounts
4770Sstevel@tonic-gate * have been blocked by (-mounts_in_progress) different callers.
4780Sstevel@tonic-gate */
4790Sstevel@tonic-gate mounts_in_progress--;
4800Sstevel@tonic-gate retval = 1;
4810Sstevel@tonic-gate signaled:
4820Sstevel@tonic-gate mutex_exit(&mount_lock);
4830Sstevel@tonic-gate return (retval);
4840Sstevel@tonic-gate }
4850Sstevel@tonic-gate
4860Sstevel@tonic-gate /*
4870Sstevel@tonic-gate * The VFS layer may progress with new mounts as far as we're concerned.
4880Sstevel@tonic-gate * Allow them to progress if we were the last obstacle.
4890Sstevel@tonic-gate */
4900Sstevel@tonic-gate static void
resume_mounts(void)4910Sstevel@tonic-gate resume_mounts(void)
4920Sstevel@tonic-gate {
4930Sstevel@tonic-gate mutex_enter(&mount_lock);
4940Sstevel@tonic-gate if (++mounts_in_progress == 0)
4950Sstevel@tonic-gate cv_broadcast(&mount_cv);
4960Sstevel@tonic-gate mutex_exit(&mount_lock);
4970Sstevel@tonic-gate }
4980Sstevel@tonic-gate
4990Sstevel@tonic-gate /*
5000Sstevel@tonic-gate * The VFS layer is busy with a mount; zones should wait until all
5010Sstevel@tonic-gate * mounts are completed to progress.
5020Sstevel@tonic-gate */
5030Sstevel@tonic-gate void
mount_in_progress(void)5040Sstevel@tonic-gate mount_in_progress(void)
5050Sstevel@tonic-gate {
5060Sstevel@tonic-gate mutex_enter(&mount_lock);
5070Sstevel@tonic-gate while (mounts_in_progress < 0)
5080Sstevel@tonic-gate cv_wait(&mount_cv, &mount_lock);
5090Sstevel@tonic-gate mounts_in_progress++;
5100Sstevel@tonic-gate mutex_exit(&mount_lock);
5110Sstevel@tonic-gate }
5120Sstevel@tonic-gate
5130Sstevel@tonic-gate /*
5140Sstevel@tonic-gate * VFS is done with one mount; wake up any waiting block_mounts()
5150Sstevel@tonic-gate * callers if this is the last mount.
5160Sstevel@tonic-gate */
5170Sstevel@tonic-gate void
mount_completed(void)5180Sstevel@tonic-gate mount_completed(void)
5190Sstevel@tonic-gate {
5200Sstevel@tonic-gate mutex_enter(&mount_lock);
5210Sstevel@tonic-gate if (--mounts_in_progress == 0)
5220Sstevel@tonic-gate cv_broadcast(&mount_cv);
5230Sstevel@tonic-gate mutex_exit(&mount_lock);
5240Sstevel@tonic-gate }
5250Sstevel@tonic-gate
5260Sstevel@tonic-gate /*
5270Sstevel@tonic-gate * ZSD routines.
5280Sstevel@tonic-gate *
5290Sstevel@tonic-gate * Zone Specific Data (ZSD) is modeled after Thread Specific Data as
5300Sstevel@tonic-gate * defined by the pthread_key_create() and related interfaces.
5310Sstevel@tonic-gate *
5320Sstevel@tonic-gate * Kernel subsystems may register one or more data items and/or
5330Sstevel@tonic-gate * callbacks to be executed when a zone is created, shutdown, or
5340Sstevel@tonic-gate * destroyed.
5350Sstevel@tonic-gate *
5360Sstevel@tonic-gate * Unlike the thread counterpart, destructor callbacks will be executed
5370Sstevel@tonic-gate * even if the data pointer is NULL and/or there are no constructor
5380Sstevel@tonic-gate * callbacks, so it is the responsibility of such callbacks to check for
5390Sstevel@tonic-gate * NULL data values if necessary.
5400Sstevel@tonic-gate *
5410Sstevel@tonic-gate * The locking strategy and overall picture is as follows:
5420Sstevel@tonic-gate *
5430Sstevel@tonic-gate * When someone calls zone_key_create(), a template ZSD entry is added to the
5445880Snordmark * global list "zsd_registered_keys", protected by zsd_key_lock. While
5455880Snordmark * holding that lock all the existing zones are marked as
5465880Snordmark * ZSD_CREATE_NEEDED and a copy of the ZSD entry added to the per-zone
5475880Snordmark * zone_zsd list (protected by zone_lock). The global list is updated first
5485880Snordmark * (under zone_key_lock) to make sure that newly created zones use the
5495880Snordmark * most recent list of keys. Then under zonehash_lock we walk the zones
5505880Snordmark * and mark them. Similar locking is used in zone_key_delete().
5510Sstevel@tonic-gate *
5525880Snordmark * The actual create, shutdown, and destroy callbacks are done without
5535880Snordmark * holding any lock. And zsd_flags are used to ensure that the operations
5545880Snordmark * completed so that when zone_key_create (and zone_create) is done, as well as
5555880Snordmark * zone_key_delete (and zone_destroy) is done, all the necessary callbacks
5565880Snordmark * are completed.
5570Sstevel@tonic-gate *
5580Sstevel@tonic-gate * When new zones are created constructor callbacks for all registered ZSD
5595880Snordmark * entries will be called. That also uses the above two phases of marking
5605880Snordmark * what needs to be done, and then running the callbacks without holding
5615880Snordmark * any locks.
5620Sstevel@tonic-gate *
5630Sstevel@tonic-gate * The framework does not provide any locking around zone_getspecific() and
5640Sstevel@tonic-gate * zone_setspecific() apart from that needed for internal consistency, so
5650Sstevel@tonic-gate * callers interested in atomic "test-and-set" semantics will need to provide
5660Sstevel@tonic-gate * their own locking.
5670Sstevel@tonic-gate */
5680Sstevel@tonic-gate
5690Sstevel@tonic-gate /*
5700Sstevel@tonic-gate * Helper function to find the zsd_entry associated with the key in the
5710Sstevel@tonic-gate * given list.
5720Sstevel@tonic-gate */
5730Sstevel@tonic-gate static struct zsd_entry *
zsd_find(list_t * l,zone_key_t key)5740Sstevel@tonic-gate zsd_find(list_t *l, zone_key_t key)
5750Sstevel@tonic-gate {
5760Sstevel@tonic-gate struct zsd_entry *zsd;
5770Sstevel@tonic-gate
5780Sstevel@tonic-gate for (zsd = list_head(l); zsd != NULL; zsd = list_next(l, zsd)) {
5790Sstevel@tonic-gate if (zsd->zsd_key == key) {
5805880Snordmark return (zsd);
5815880Snordmark }
5825880Snordmark }
5835880Snordmark return (NULL);
5845880Snordmark }
5855880Snordmark
5865880Snordmark /*
5875880Snordmark * Helper function to find the zsd_entry associated with the key in the
5885880Snordmark * given list. Move it to the front of the list.
5895880Snordmark */
5905880Snordmark static struct zsd_entry *
zsd_find_mru(list_t * l,zone_key_t key)5915880Snordmark zsd_find_mru(list_t *l, zone_key_t key)
5925880Snordmark {
5935880Snordmark struct zsd_entry *zsd;
5945880Snordmark
5955880Snordmark for (zsd = list_head(l); zsd != NULL; zsd = list_next(l, zsd)) {
5965880Snordmark if (zsd->zsd_key == key) {
5970Sstevel@tonic-gate /*
5980Sstevel@tonic-gate * Move to head of list to keep list in MRU order.
5990Sstevel@tonic-gate */
6000Sstevel@tonic-gate if (zsd != list_head(l)) {
6010Sstevel@tonic-gate list_remove(l, zsd);
6020Sstevel@tonic-gate list_insert_head(l, zsd);
6030Sstevel@tonic-gate }
6040Sstevel@tonic-gate return (zsd);
6050Sstevel@tonic-gate }
6060Sstevel@tonic-gate }
6070Sstevel@tonic-gate return (NULL);
6080Sstevel@tonic-gate }
6090Sstevel@tonic-gate
6105880Snordmark void
zone_key_create(zone_key_t * keyp,void * (* create)(zoneid_t),void (* shutdown)(zoneid_t,void *),void (* destroy)(zoneid_t,void *))6115880Snordmark zone_key_create(zone_key_t *keyp, void *(*create)(zoneid_t),
6125880Snordmark void (*shutdown)(zoneid_t, void *), void (*destroy)(zoneid_t, void *))
6135880Snordmark {
6145880Snordmark struct zsd_entry *zsdp;
6155880Snordmark struct zsd_entry *t;
6165880Snordmark struct zone *zone;
6175880Snordmark zone_key_t key;
6185880Snordmark
6195880Snordmark zsdp = kmem_zalloc(sizeof (*zsdp), KM_SLEEP);
6205880Snordmark zsdp->zsd_data = NULL;
6215880Snordmark zsdp->zsd_create = create;
6225880Snordmark zsdp->zsd_shutdown = shutdown;
6235880Snordmark zsdp->zsd_destroy = destroy;
6245880Snordmark
6255880Snordmark /*
6265880Snordmark * Insert in global list of callbacks. Makes future zone creations
6275880Snordmark * see it.
6285880Snordmark */
6295880Snordmark mutex_enter(&zsd_key_lock);
63010865SPramod.Batni@Sun.COM key = zsdp->zsd_key = ++zsd_keyval;
6315880Snordmark ASSERT(zsd_keyval != 0);
6325880Snordmark list_insert_tail(&zsd_registered_keys, zsdp);
6335880Snordmark mutex_exit(&zsd_key_lock);
6345880Snordmark
6355880Snordmark /*
6365880Snordmark * Insert for all existing zones and mark them as needing
6375880Snordmark * a create callback.
6385880Snordmark */
6395880Snordmark mutex_enter(&zonehash_lock); /* stop the world */
6405880Snordmark for (zone = list_head(&zone_active); zone != NULL;
6415880Snordmark zone = list_next(&zone_active, zone)) {
6425880Snordmark zone_status_t status;
6435880Snordmark
6445880Snordmark mutex_enter(&zone->zone_lock);
6455880Snordmark
6465880Snordmark /* Skip zones that are on the way down or not yet up */
6475880Snordmark status = zone_status_get(zone);
6485880Snordmark if (status >= ZONE_IS_DOWN ||
6495880Snordmark status == ZONE_IS_UNINITIALIZED) {
6505880Snordmark mutex_exit(&zone->zone_lock);
6515880Snordmark continue;
6525880Snordmark }
6535880Snordmark
6545880Snordmark t = zsd_find_mru(&zone->zone_zsd, key);
6555880Snordmark if (t != NULL) {
6565880Snordmark /*
6575880Snordmark * A zsd_configure already inserted it after
6585880Snordmark * we dropped zsd_key_lock above.
6595880Snordmark */
6605880Snordmark mutex_exit(&zone->zone_lock);
6615880Snordmark continue;
6625880Snordmark }
6635880Snordmark t = kmem_zalloc(sizeof (*t), KM_SLEEP);
6645880Snordmark t->zsd_key = key;
6655880Snordmark t->zsd_create = create;
6665880Snordmark t->zsd_shutdown = shutdown;
6675880Snordmark t->zsd_destroy = destroy;
6685880Snordmark if (create != NULL) {
6695880Snordmark t->zsd_flags = ZSD_CREATE_NEEDED;
6705880Snordmark DTRACE_PROBE2(zsd__create__needed,
6715880Snordmark zone_t *, zone, zone_key_t, key);
6725880Snordmark }
6735880Snordmark list_insert_tail(&zone->zone_zsd, t);
6745880Snordmark mutex_exit(&zone->zone_lock);
6755880Snordmark }
6765880Snordmark mutex_exit(&zonehash_lock);
6775880Snordmark
6785880Snordmark if (create != NULL) {
6795880Snordmark /* Now call the create callback for this key */
6805880Snordmark zsd_apply_all_zones(zsd_apply_create, key);
6815880Snordmark }
68210865SPramod.Batni@Sun.COM /*
68310910SRobert.Harris@Sun.COM * It is safe for consumers to use the key now, make it
68410910SRobert.Harris@Sun.COM * globally visible. Specifically zone_getspecific() will
68510910SRobert.Harris@Sun.COM * always successfully return the zone specific data associated
68610910SRobert.Harris@Sun.COM * with the key.
68710910SRobert.Harris@Sun.COM */
68810865SPramod.Batni@Sun.COM *keyp = key;
68910865SPramod.Batni@Sun.COM
6905880Snordmark }
6915880Snordmark
6920Sstevel@tonic-gate /*
6930Sstevel@tonic-gate * Function called when a module is being unloaded, or otherwise wishes
6940Sstevel@tonic-gate * to unregister its ZSD key and callbacks.
6955880Snordmark *
6965880Snordmark * Remove from the global list and determine the functions that need to
6975880Snordmark * be called under a global lock. Then call the functions without
6985880Snordmark * holding any locks. Finally free up the zone_zsd entries. (The apply
6995880Snordmark * functions need to access the zone_zsd entries to find zsd_data etc.)
7000Sstevel@tonic-gate */
7010Sstevel@tonic-gate int
zone_key_delete(zone_key_t key)7020Sstevel@tonic-gate zone_key_delete(zone_key_t key)
7030Sstevel@tonic-gate {
7040Sstevel@tonic-gate struct zsd_entry *zsdp = NULL;
7050Sstevel@tonic-gate zone_t *zone;
7060Sstevel@tonic-gate
7070Sstevel@tonic-gate mutex_enter(&zsd_key_lock);
7085880Snordmark zsdp = zsd_find_mru(&zsd_registered_keys, key);
7095880Snordmark if (zsdp == NULL) {
7105880Snordmark mutex_exit(&zsd_key_lock);
7115880Snordmark return (-1);
7125880Snordmark }
7130Sstevel@tonic-gate list_remove(&zsd_registered_keys, zsdp);
7140Sstevel@tonic-gate mutex_exit(&zsd_key_lock);
7150Sstevel@tonic-gate
7165880Snordmark mutex_enter(&zonehash_lock);
7170Sstevel@tonic-gate for (zone = list_head(&zone_active); zone != NULL;
7180Sstevel@tonic-gate zone = list_next(&zone_active, zone)) {
7190Sstevel@tonic-gate struct zsd_entry *del;
7205880Snordmark
7215880Snordmark mutex_enter(&zone->zone_lock);
7225880Snordmark del = zsd_find_mru(&zone->zone_zsd, key);
7235880Snordmark if (del == NULL) {
7245880Snordmark /*
7255880Snordmark * Somebody else got here first e.g the zone going
7265880Snordmark * away.
7275880Snordmark */
7285880Snordmark mutex_exit(&zone->zone_lock);
7295880Snordmark continue;
7305880Snordmark }
7315880Snordmark ASSERT(del->zsd_shutdown == zsdp->zsd_shutdown);
7325880Snordmark ASSERT(del->zsd_destroy == zsdp->zsd_destroy);
7335880Snordmark if (del->zsd_shutdown != NULL &&
7345880Snordmark (del->zsd_flags & ZSD_SHUTDOWN_ALL) == 0) {
7355880Snordmark del->zsd_flags |= ZSD_SHUTDOWN_NEEDED;
7365880Snordmark DTRACE_PROBE2(zsd__shutdown__needed,
7375880Snordmark zone_t *, zone, zone_key_t, key);
7385880Snordmark }
7395880Snordmark if (del->zsd_destroy != NULL &&
7405880Snordmark (del->zsd_flags & ZSD_DESTROY_ALL) == 0) {
7415880Snordmark del->zsd_flags |= ZSD_DESTROY_NEEDED;
7425880Snordmark DTRACE_PROBE2(zsd__destroy__needed,
7435880Snordmark zone_t *, zone, zone_key_t, key);
7440Sstevel@tonic-gate }
7450Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
7460Sstevel@tonic-gate }
7470Sstevel@tonic-gate mutex_exit(&zonehash_lock);
7480Sstevel@tonic-gate kmem_free(zsdp, sizeof (*zsdp));
7495880Snordmark
7505880Snordmark /* Now call the shutdown and destroy callback for this key */
7515880Snordmark zsd_apply_all_zones(zsd_apply_shutdown, key);
7525880Snordmark zsd_apply_all_zones(zsd_apply_destroy, key);
7535880Snordmark
7545880Snordmark /* Now we can free up the zsdp structures in each zone */
7555880Snordmark mutex_enter(&zonehash_lock);
7560Sstevel@tonic-gate for (zone = list_head(&zone_active); zone != NULL;
7575880Snordmark zone = list_next(&zone_active, zone)) {
7585880Snordmark struct zsd_entry *del;
7595880Snordmark
7605880Snordmark mutex_enter(&zone->zone_lock);
7615880Snordmark del = zsd_find(&zone->zone_zsd, key);
7625880Snordmark if (del != NULL) {
7635880Snordmark list_remove(&zone->zone_zsd, del);
7645880Snordmark ASSERT(!(del->zsd_flags & ZSD_ALL_INPROGRESS));
7655880Snordmark kmem_free(del, sizeof (*del));
7665880Snordmark }
7670Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
7685880Snordmark }
7690Sstevel@tonic-gate mutex_exit(&zonehash_lock);
7705880Snordmark
7715880Snordmark return (0);
7720Sstevel@tonic-gate }
7730Sstevel@tonic-gate
7740Sstevel@tonic-gate /*
7750Sstevel@tonic-gate * ZSD counterpart of pthread_setspecific().
7765880Snordmark *
7775880Snordmark * Since all zsd callbacks, including those with no create function,
7785880Snordmark * have an entry in zone_zsd, if the key is registered it is part of
7795880Snordmark * the zone_zsd list.
7805880Snordmark * Return an error if the key wasn't registerd.
7810Sstevel@tonic-gate */
7820Sstevel@tonic-gate int
zone_setspecific(zone_key_t key,zone_t * zone,const void * data)7830Sstevel@tonic-gate zone_setspecific(zone_key_t key, zone_t *zone, const void *data)
7840Sstevel@tonic-gate {
7850Sstevel@tonic-gate struct zsd_entry *t;
7860Sstevel@tonic-gate
7870Sstevel@tonic-gate mutex_enter(&zone->zone_lock);
7885880Snordmark t = zsd_find_mru(&zone->zone_zsd, key);
7890Sstevel@tonic-gate if (t != NULL) {
7900Sstevel@tonic-gate /*
7910Sstevel@tonic-gate * Replace old value with new
7920Sstevel@tonic-gate */
7930Sstevel@tonic-gate t->zsd_data = (void *)data;
7940Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
7950Sstevel@tonic-gate return (0);
7960Sstevel@tonic-gate }
7970Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
7985880Snordmark return (-1);
7990Sstevel@tonic-gate }
8000Sstevel@tonic-gate
8010Sstevel@tonic-gate /*
8020Sstevel@tonic-gate * ZSD counterpart of pthread_getspecific().
8030Sstevel@tonic-gate */
8040Sstevel@tonic-gate void *
zone_getspecific(zone_key_t key,zone_t * zone)8050Sstevel@tonic-gate zone_getspecific(zone_key_t key, zone_t *zone)
8060Sstevel@tonic-gate {
8070Sstevel@tonic-gate struct zsd_entry *t;
8080Sstevel@tonic-gate void *data;
8090Sstevel@tonic-gate
8100Sstevel@tonic-gate mutex_enter(&zone->zone_lock);
8115880Snordmark t = zsd_find_mru(&zone->zone_zsd, key);
8120Sstevel@tonic-gate data = (t == NULL ? NULL : t->zsd_data);
8130Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
8140Sstevel@tonic-gate return (data);
8150Sstevel@tonic-gate }
8160Sstevel@tonic-gate
8170Sstevel@tonic-gate /*
8180Sstevel@tonic-gate * Function used to initialize a zone's list of ZSD callbacks and data
8190Sstevel@tonic-gate * when the zone is being created. The callbacks are initialized from
8205880Snordmark * the template list (zsd_registered_keys). The constructor callback is
8215880Snordmark * executed later (once the zone exists and with locks dropped).
8220Sstevel@tonic-gate */
8230Sstevel@tonic-gate static void
zone_zsd_configure(zone_t * zone)8240Sstevel@tonic-gate zone_zsd_configure(zone_t *zone)
8250Sstevel@tonic-gate {
8260Sstevel@tonic-gate struct zsd_entry *zsdp;
8270Sstevel@tonic-gate struct zsd_entry *t;
8280Sstevel@tonic-gate
8290Sstevel@tonic-gate ASSERT(MUTEX_HELD(&zonehash_lock));
8300Sstevel@tonic-gate ASSERT(list_head(&zone->zone_zsd) == NULL);
8315880Snordmark mutex_enter(&zone->zone_lock);
8320Sstevel@tonic-gate mutex_enter(&zsd_key_lock);
8330Sstevel@tonic-gate for (zsdp = list_head(&zsd_registered_keys); zsdp != NULL;
8340Sstevel@tonic-gate zsdp = list_next(&zsd_registered_keys, zsdp)) {
8355880Snordmark /*
8365880Snordmark * Since this zone is ZONE_IS_UNCONFIGURED, zone_key_create
8375880Snordmark * should not have added anything to it.
8385880Snordmark */
8395880Snordmark ASSERT(zsd_find(&zone->zone_zsd, zsdp->zsd_key) == NULL);
8405880Snordmark
8415880Snordmark t = kmem_zalloc(sizeof (*t), KM_SLEEP);
8425880Snordmark t->zsd_key = zsdp->zsd_key;
8435880Snordmark t->zsd_create = zsdp->zsd_create;
8445880Snordmark t->zsd_shutdown = zsdp->zsd_shutdown;
8455880Snordmark t->zsd_destroy = zsdp->zsd_destroy;
8460Sstevel@tonic-gate if (zsdp->zsd_create != NULL) {
8475880Snordmark t->zsd_flags = ZSD_CREATE_NEEDED;
8485880Snordmark DTRACE_PROBE2(zsd__create__needed,
8495880Snordmark zone_t *, zone, zone_key_t, zsdp->zsd_key);
8500Sstevel@tonic-gate }
8515880Snordmark list_insert_tail(&zone->zone_zsd, t);
8520Sstevel@tonic-gate }
8530Sstevel@tonic-gate mutex_exit(&zsd_key_lock);
8545880Snordmark mutex_exit(&zone->zone_lock);
8550Sstevel@tonic-gate }
8560Sstevel@tonic-gate
8570Sstevel@tonic-gate enum zsd_callback_type { ZSD_CREATE, ZSD_SHUTDOWN, ZSD_DESTROY };
8580Sstevel@tonic-gate
8590Sstevel@tonic-gate /*
8600Sstevel@tonic-gate * Helper function to execute shutdown or destructor callbacks.
8610Sstevel@tonic-gate */
8620Sstevel@tonic-gate static void
zone_zsd_callbacks(zone_t * zone,enum zsd_callback_type ct)8630Sstevel@tonic-gate zone_zsd_callbacks(zone_t *zone, enum zsd_callback_type ct)
8640Sstevel@tonic-gate {
8650Sstevel@tonic-gate struct zsd_entry *t;
8660Sstevel@tonic-gate
8670Sstevel@tonic-gate ASSERT(ct == ZSD_SHUTDOWN || ct == ZSD_DESTROY);
8680Sstevel@tonic-gate ASSERT(ct != ZSD_SHUTDOWN || zone_status_get(zone) >= ZONE_IS_EMPTY);
8690Sstevel@tonic-gate ASSERT(ct != ZSD_DESTROY || zone_status_get(zone) >= ZONE_IS_DOWN);
8700Sstevel@tonic-gate
8715880Snordmark /*
8725880Snordmark * Run the callback solely based on what is registered for the zone
8735880Snordmark * in zone_zsd. The global list can change independently of this
8745880Snordmark * as keys are registered and unregistered and we don't register new
8755880Snordmark * callbacks for a zone that is in the process of going away.
8765880Snordmark */
8770Sstevel@tonic-gate mutex_enter(&zone->zone_lock);
8785880Snordmark for (t = list_head(&zone->zone_zsd); t != NULL;
8795880Snordmark t = list_next(&zone->zone_zsd, t)) {
8805880Snordmark zone_key_t key = t->zsd_key;
8810Sstevel@tonic-gate
8820Sstevel@tonic-gate /* Skip if no callbacks registered */
8835880Snordmark
8845880Snordmark if (ct == ZSD_SHUTDOWN) {
8855880Snordmark if (t->zsd_shutdown != NULL &&
8865880Snordmark (t->zsd_flags & ZSD_SHUTDOWN_ALL) == 0) {
8875880Snordmark t->zsd_flags |= ZSD_SHUTDOWN_NEEDED;
8885880Snordmark DTRACE_PROBE2(zsd__shutdown__needed,
8895880Snordmark zone_t *, zone, zone_key_t, key);
8900Sstevel@tonic-gate }
8910Sstevel@tonic-gate } else {
8925880Snordmark if (t->zsd_destroy != NULL &&
8935880Snordmark (t->zsd_flags & ZSD_DESTROY_ALL) == 0) {
8945880Snordmark t->zsd_flags |= ZSD_DESTROY_NEEDED;
8955880Snordmark DTRACE_PROBE2(zsd__destroy__needed,
8965880Snordmark zone_t *, zone, zone_key_t, key);
8970Sstevel@tonic-gate }
8980Sstevel@tonic-gate }
8990Sstevel@tonic-gate }
9005880Snordmark mutex_exit(&zone->zone_lock);
9015880Snordmark
9025880Snordmark /* Now call the shutdown and destroy callback for this key */
9035880Snordmark zsd_apply_all_keys(zsd_apply_shutdown, zone);
9045880Snordmark zsd_apply_all_keys(zsd_apply_destroy, zone);
9055880Snordmark
9060Sstevel@tonic-gate }
9070Sstevel@tonic-gate
9080Sstevel@tonic-gate /*
9090Sstevel@tonic-gate * Called when the zone is going away; free ZSD-related memory, and
9100Sstevel@tonic-gate * destroy the zone_zsd list.
9110Sstevel@tonic-gate */
9120Sstevel@tonic-gate static void
zone_free_zsd(zone_t * zone)9130Sstevel@tonic-gate zone_free_zsd(zone_t *zone)
9140Sstevel@tonic-gate {
9150Sstevel@tonic-gate struct zsd_entry *t, *next;
9160Sstevel@tonic-gate
9170Sstevel@tonic-gate /*
9180Sstevel@tonic-gate * Free all the zsd_entry's we had on this zone.
9190Sstevel@tonic-gate */
9205880Snordmark mutex_enter(&zone->zone_lock);
9210Sstevel@tonic-gate for (t = list_head(&zone->zone_zsd); t != NULL; t = next) {
9220Sstevel@tonic-gate next = list_next(&zone->zone_zsd, t);
9230Sstevel@tonic-gate list_remove(&zone->zone_zsd, t);
9245880Snordmark ASSERT(!(t->zsd_flags & ZSD_ALL_INPROGRESS));
9250Sstevel@tonic-gate kmem_free(t, sizeof (*t));
9260Sstevel@tonic-gate }
9270Sstevel@tonic-gate list_destroy(&zone->zone_zsd);
9285880Snordmark mutex_exit(&zone->zone_lock);
9295880Snordmark
9305880Snordmark }
9315880Snordmark
9325880Snordmark /*
9335880Snordmark * Apply a function to all zones for particular key value.
9345880Snordmark *
9355880Snordmark * The applyfn has to drop zonehash_lock if it does some work, and
9365880Snordmark * then reacquire it before it returns.
9375880Snordmark * When the lock is dropped we don't follow list_next even
9385880Snordmark * if it is possible to do so without any hazards. This is
9395880Snordmark * because we want the design to allow for the list of zones
9405880Snordmark * to change in any arbitrary way during the time the
9415880Snordmark * lock was dropped.
9425880Snordmark *
9435880Snordmark * It is safe to restart the loop at list_head since the applyfn
9445880Snordmark * changes the zsd_flags as it does work, so a subsequent
9455880Snordmark * pass through will have no effect in applyfn, hence the loop will terminate
9465880Snordmark * in at worst O(N^2).
9475880Snordmark */
9485880Snordmark static void
zsd_apply_all_zones(zsd_applyfn_t * applyfn,zone_key_t key)9495880Snordmark zsd_apply_all_zones(zsd_applyfn_t *applyfn, zone_key_t key)
9505880Snordmark {
9515880Snordmark zone_t *zone;
9525880Snordmark
9535880Snordmark mutex_enter(&zonehash_lock);
9545880Snordmark zone = list_head(&zone_active);
9555880Snordmark while (zone != NULL) {
9565880Snordmark if ((applyfn)(&zonehash_lock, B_FALSE, zone, key)) {
9575880Snordmark /* Lock dropped - restart at head */
9585880Snordmark zone = list_head(&zone_active);
9595880Snordmark } else {
9605880Snordmark zone = list_next(&zone_active, zone);
9615880Snordmark }
9625880Snordmark }
9635880Snordmark mutex_exit(&zonehash_lock);
9645880Snordmark }
9655880Snordmark
9665880Snordmark /*
9675880Snordmark * Apply a function to all keys for a particular zone.
9685880Snordmark *
9695880Snordmark * The applyfn has to drop zonehash_lock if it does some work, and
9705880Snordmark * then reacquire it before it returns.
9715880Snordmark * When the lock is dropped we don't follow list_next even
9725880Snordmark * if it is possible to do so without any hazards. This is
9735880Snordmark * because we want the design to allow for the list of zsd callbacks
9745880Snordmark * to change in any arbitrary way during the time the
9755880Snordmark * lock was dropped.
9765880Snordmark *
9775880Snordmark * It is safe to restart the loop at list_head since the applyfn
9785880Snordmark * changes the zsd_flags as it does work, so a subsequent
9795880Snordmark * pass through will have no effect in applyfn, hence the loop will terminate
9805880Snordmark * in at worst O(N^2).
9815880Snordmark */
9825880Snordmark static void
zsd_apply_all_keys(zsd_applyfn_t * applyfn,zone_t * zone)9835880Snordmark zsd_apply_all_keys(zsd_applyfn_t *applyfn, zone_t *zone)
9845880Snordmark {
9855880Snordmark struct zsd_entry *t;
9865880Snordmark
9875880Snordmark mutex_enter(&zone->zone_lock);
9885880Snordmark t = list_head(&zone->zone_zsd);
9895880Snordmark while (t != NULL) {
9905880Snordmark if ((applyfn)(NULL, B_TRUE, zone, t->zsd_key)) {
9915880Snordmark /* Lock dropped - restart at head */
9925880Snordmark t = list_head(&zone->zone_zsd);
9935880Snordmark } else {
9945880Snordmark t = list_next(&zone->zone_zsd, t);
9955880Snordmark }
9965880Snordmark }
9975880Snordmark mutex_exit(&zone->zone_lock);
9985880Snordmark }
9995880Snordmark
10005880Snordmark /*
10015880Snordmark * Call the create function for the zone and key if CREATE_NEEDED
10025880Snordmark * is set.
10035880Snordmark * If some other thread gets here first and sets CREATE_INPROGRESS, then
10045880Snordmark * we wait for that thread to complete so that we can ensure that
10055880Snordmark * all the callbacks are done when we've looped over all zones/keys.
10065880Snordmark *
10075880Snordmark * When we call the create function, we drop the global held by the
10085880Snordmark * caller, and return true to tell the caller it needs to re-evalute the
10095880Snordmark * state.
10105880Snordmark * If the caller holds zone_lock then zone_lock_held is set, and zone_lock
10115880Snordmark * remains held on exit.
10125880Snordmark */
10135880Snordmark static boolean_t
zsd_apply_create(kmutex_t * lockp,boolean_t zone_lock_held,zone_t * zone,zone_key_t key)10145880Snordmark zsd_apply_create(kmutex_t *lockp, boolean_t zone_lock_held,
10155880Snordmark zone_t *zone, zone_key_t key)
10165880Snordmark {
10175880Snordmark void *result;
10185880Snordmark struct zsd_entry *t;
10195880Snordmark boolean_t dropped;
10205880Snordmark
10215880Snordmark if (lockp != NULL) {
10225880Snordmark ASSERT(MUTEX_HELD(lockp));
10235880Snordmark }
10245880Snordmark if (zone_lock_held) {
10255880Snordmark ASSERT(MUTEX_HELD(&zone->zone_lock));
10265880Snordmark } else {
10275880Snordmark mutex_enter(&zone->zone_lock);
10285880Snordmark }
10295880Snordmark
10305880Snordmark t = zsd_find(&zone->zone_zsd, key);
10315880Snordmark if (t == NULL) {
10325880Snordmark /*
10335880Snordmark * Somebody else got here first e.g the zone going
10345880Snordmark * away.
10355880Snordmark */
10365880Snordmark if (!zone_lock_held)
10375880Snordmark mutex_exit(&zone->zone_lock);
10385880Snordmark return (B_FALSE);
10395880Snordmark }
10405880Snordmark dropped = B_FALSE;
10415880Snordmark if (zsd_wait_for_inprogress(zone, t, lockp))
10425880Snordmark dropped = B_TRUE;
10435880Snordmark
10445880Snordmark if (t->zsd_flags & ZSD_CREATE_NEEDED) {
10455880Snordmark t->zsd_flags &= ~ZSD_CREATE_NEEDED;
10465880Snordmark t->zsd_flags |= ZSD_CREATE_INPROGRESS;
10475880Snordmark DTRACE_PROBE2(zsd__create__inprogress,
10485880Snordmark zone_t *, zone, zone_key_t, key);
10495880Snordmark mutex_exit(&zone->zone_lock);
10505880Snordmark if (lockp != NULL)
10515880Snordmark mutex_exit(lockp);
10525880Snordmark
10535880Snordmark dropped = B_TRUE;
10545880Snordmark ASSERT(t->zsd_create != NULL);
10555880Snordmark DTRACE_PROBE2(zsd__create__start,
10565880Snordmark zone_t *, zone, zone_key_t, key);
10575880Snordmark
10585880Snordmark result = (*t->zsd_create)(zone->zone_id);
10595880Snordmark
10605880Snordmark DTRACE_PROBE2(zsd__create__end,
10615880Snordmark zone_t *, zone, voidn *, result);
10625880Snordmark
10635880Snordmark ASSERT(result != NULL);
10645880Snordmark if (lockp != NULL)
10655880Snordmark mutex_enter(lockp);
10665880Snordmark mutex_enter(&zone->zone_lock);
10675880Snordmark t->zsd_data = result;
10685880Snordmark t->zsd_flags &= ~ZSD_CREATE_INPROGRESS;
10695880Snordmark t->zsd_flags |= ZSD_CREATE_COMPLETED;
10705880Snordmark cv_broadcast(&t->zsd_cv);
10715880Snordmark DTRACE_PROBE2(zsd__create__completed,
10725880Snordmark zone_t *, zone, zone_key_t, key);
10735880Snordmark }
10745880Snordmark if (!zone_lock_held)
10755880Snordmark mutex_exit(&zone->zone_lock);
10765880Snordmark return (dropped);
10775880Snordmark }
10785880Snordmark
10795880Snordmark /*
10805880Snordmark * Call the shutdown function for the zone and key if SHUTDOWN_NEEDED
10815880Snordmark * is set.
10825880Snordmark * If some other thread gets here first and sets *_INPROGRESS, then
10835880Snordmark * we wait for that thread to complete so that we can ensure that
10845880Snordmark * all the callbacks are done when we've looped over all zones/keys.
10855880Snordmark *
10865880Snordmark * When we call the shutdown function, we drop the global held by the
10875880Snordmark * caller, and return true to tell the caller it needs to re-evalute the
10885880Snordmark * state.
10895880Snordmark * If the caller holds zone_lock then zone_lock_held is set, and zone_lock
10905880Snordmark * remains held on exit.
10915880Snordmark */
10925880Snordmark static boolean_t
zsd_apply_shutdown(kmutex_t * lockp,boolean_t zone_lock_held,zone_t * zone,zone_key_t key)10935880Snordmark zsd_apply_shutdown(kmutex_t *lockp, boolean_t zone_lock_held,
10945880Snordmark zone_t *zone, zone_key_t key)
10955880Snordmark {
10965880Snordmark struct zsd_entry *t;
10975880Snordmark void *data;
10985880Snordmark boolean_t dropped;
10995880Snordmark
11005880Snordmark if (lockp != NULL) {
11015880Snordmark ASSERT(MUTEX_HELD(lockp));
11025880Snordmark }
11035880Snordmark if (zone_lock_held) {
11045880Snordmark ASSERT(MUTEX_HELD(&zone->zone_lock));
11055880Snordmark } else {
11065880Snordmark mutex_enter(&zone->zone_lock);
11075880Snordmark }
11085880Snordmark
11095880Snordmark t = zsd_find(&zone->zone_zsd, key);
11105880Snordmark if (t == NULL) {
11115880Snordmark /*
11125880Snordmark * Somebody else got here first e.g the zone going
11135880Snordmark * away.
11145880Snordmark */
11155880Snordmark if (!zone_lock_held)
11165880Snordmark mutex_exit(&zone->zone_lock);
11175880Snordmark return (B_FALSE);
11185880Snordmark }
11195880Snordmark dropped = B_FALSE;
11205880Snordmark if (zsd_wait_for_creator(zone, t, lockp))
11215880Snordmark dropped = B_TRUE;
11225880Snordmark
11235880Snordmark if (zsd_wait_for_inprogress(zone, t, lockp))
11245880Snordmark dropped = B_TRUE;
11255880Snordmark
11265880Snordmark if (t->zsd_flags & ZSD_SHUTDOWN_NEEDED) {
11275880Snordmark t->zsd_flags &= ~ZSD_SHUTDOWN_NEEDED;
11285880Snordmark t->zsd_flags |= ZSD_SHUTDOWN_INPROGRESS;
11295880Snordmark DTRACE_PROBE2(zsd__shutdown__inprogress,
11305880Snordmark zone_t *, zone, zone_key_t, key);
11315880Snordmark mutex_exit(&zone->zone_lock);
11325880Snordmark if (lockp != NULL)
11335880Snordmark mutex_exit(lockp);
11345880Snordmark dropped = B_TRUE;
11355880Snordmark
11365880Snordmark ASSERT(t->zsd_shutdown != NULL);
11375880Snordmark data = t->zsd_data;
11385880Snordmark
11395880Snordmark DTRACE_PROBE2(zsd__shutdown__start,
11405880Snordmark zone_t *, zone, zone_key_t, key);
11415880Snordmark
11425880Snordmark (t->zsd_shutdown)(zone->zone_id, data);
11435880Snordmark DTRACE_PROBE2(zsd__shutdown__end,
11445880Snordmark zone_t *, zone, zone_key_t, key);
11455880Snordmark
11465880Snordmark if (lockp != NULL)
11475880Snordmark mutex_enter(lockp);
11485880Snordmark mutex_enter(&zone->zone_lock);
11495880Snordmark t->zsd_flags &= ~ZSD_SHUTDOWN_INPROGRESS;
11505880Snordmark t->zsd_flags |= ZSD_SHUTDOWN_COMPLETED;
11515880Snordmark cv_broadcast(&t->zsd_cv);
11525880Snordmark DTRACE_PROBE2(zsd__shutdown__completed,
11535880Snordmark zone_t *, zone, zone_key_t, key);
11545880Snordmark }
11555880Snordmark if (!zone_lock_held)
11565880Snordmark mutex_exit(&zone->zone_lock);
11575880Snordmark return (dropped);
11585880Snordmark }
11595880Snordmark
11605880Snordmark /*
11615880Snordmark * Call the destroy function for the zone and key if DESTROY_NEEDED
11625880Snordmark * is set.
11635880Snordmark * If some other thread gets here first and sets *_INPROGRESS, then
11645880Snordmark * we wait for that thread to complete so that we can ensure that
11655880Snordmark * all the callbacks are done when we've looped over all zones/keys.
11665880Snordmark *
11675880Snordmark * When we call the destroy function, we drop the global held by the
11685880Snordmark * caller, and return true to tell the caller it needs to re-evalute the
11695880Snordmark * state.
11705880Snordmark * If the caller holds zone_lock then zone_lock_held is set, and zone_lock
11715880Snordmark * remains held on exit.
11725880Snordmark */
11735880Snordmark static boolean_t
zsd_apply_destroy(kmutex_t * lockp,boolean_t zone_lock_held,zone_t * zone,zone_key_t key)11745880Snordmark zsd_apply_destroy(kmutex_t *lockp, boolean_t zone_lock_held,
11755880Snordmark zone_t *zone, zone_key_t key)
11765880Snordmark {
11775880Snordmark struct zsd_entry *t;
11785880Snordmark void *data;
11795880Snordmark boolean_t dropped;
11805880Snordmark
11815880Snordmark if (lockp != NULL) {
11825880Snordmark ASSERT(MUTEX_HELD(lockp));
11835880Snordmark }
11845880Snordmark if (zone_lock_held) {
11855880Snordmark ASSERT(MUTEX_HELD(&zone->zone_lock));
11865880Snordmark } else {
11875880Snordmark mutex_enter(&zone->zone_lock);
11885880Snordmark }
11895880Snordmark
11905880Snordmark t = zsd_find(&zone->zone_zsd, key);
11915880Snordmark if (t == NULL) {
11925880Snordmark /*
11935880Snordmark * Somebody else got here first e.g the zone going
11945880Snordmark * away.
11955880Snordmark */
11965880Snordmark if (!zone_lock_held)
11975880Snordmark mutex_exit(&zone->zone_lock);
11985880Snordmark return (B_FALSE);
11995880Snordmark }
12005880Snordmark dropped = B_FALSE;
12015880Snordmark if (zsd_wait_for_creator(zone, t, lockp))
12025880Snordmark dropped = B_TRUE;
12035880Snordmark
12045880Snordmark if (zsd_wait_for_inprogress(zone, t, lockp))
12055880Snordmark dropped = B_TRUE;
12065880Snordmark
12075880Snordmark if (t->zsd_flags & ZSD_DESTROY_NEEDED) {
12085880Snordmark t->zsd_flags &= ~ZSD_DESTROY_NEEDED;
12095880Snordmark t->zsd_flags |= ZSD_DESTROY_INPROGRESS;
12105880Snordmark DTRACE_PROBE2(zsd__destroy__inprogress,
12115880Snordmark zone_t *, zone, zone_key_t, key);
12125880Snordmark mutex_exit(&zone->zone_lock);
12135880Snordmark if (lockp != NULL)
12145880Snordmark mutex_exit(lockp);
12155880Snordmark dropped = B_TRUE;
12165880Snordmark
12175880Snordmark ASSERT(t->zsd_destroy != NULL);
12185880Snordmark data = t->zsd_data;
12195880Snordmark DTRACE_PROBE2(zsd__destroy__start,
12205880Snordmark zone_t *, zone, zone_key_t, key);
12215880Snordmark
12225880Snordmark (t->zsd_destroy)(zone->zone_id, data);
12235880Snordmark DTRACE_PROBE2(zsd__destroy__end,
12245880Snordmark zone_t *, zone, zone_key_t, key);
12255880Snordmark
12265880Snordmark if (lockp != NULL)
12275880Snordmark mutex_enter(lockp);
12285880Snordmark mutex_enter(&zone->zone_lock);
12295880Snordmark t->zsd_data = NULL;
12305880Snordmark t->zsd_flags &= ~ZSD_DESTROY_INPROGRESS;
12315880Snordmark t->zsd_flags |= ZSD_DESTROY_COMPLETED;
12325880Snordmark cv_broadcast(&t->zsd_cv);
12335880Snordmark DTRACE_PROBE2(zsd__destroy__completed,
12345880Snordmark zone_t *, zone, zone_key_t, key);
12355880Snordmark }
12365880Snordmark if (!zone_lock_held)
12375880Snordmark mutex_exit(&zone->zone_lock);
12385880Snordmark return (dropped);
12395880Snordmark }
12405880Snordmark
12415880Snordmark /*
12425880Snordmark * Wait for any CREATE_NEEDED flag to be cleared.
12435880Snordmark * Returns true if lockp was temporarily dropped while waiting.
12445880Snordmark */
12455880Snordmark static boolean_t
zsd_wait_for_creator(zone_t * zone,struct zsd_entry * t,kmutex_t * lockp)12465880Snordmark zsd_wait_for_creator(zone_t *zone, struct zsd_entry *t, kmutex_t *lockp)
12475880Snordmark {
12485880Snordmark boolean_t dropped = B_FALSE;
12495880Snordmark
12505880Snordmark while (t->zsd_flags & ZSD_CREATE_NEEDED) {
12515880Snordmark DTRACE_PROBE2(zsd__wait__for__creator,
12525880Snordmark zone_t *, zone, struct zsd_entry *, t);
12535880Snordmark if (lockp != NULL) {
12545880Snordmark dropped = B_TRUE;
12555880Snordmark mutex_exit(lockp);
12565880Snordmark }
12575880Snordmark cv_wait(&t->zsd_cv, &zone->zone_lock);
12585880Snordmark if (lockp != NULL) {
12595880Snordmark /* First drop zone_lock to preserve order */
12605880Snordmark mutex_exit(&zone->zone_lock);
12615880Snordmark mutex_enter(lockp);
12625880Snordmark mutex_enter(&zone->zone_lock);
12635880Snordmark }
12645880Snordmark }
12655880Snordmark return (dropped);
12665880Snordmark }
12675880Snordmark
12685880Snordmark /*
12695880Snordmark * Wait for any INPROGRESS flag to be cleared.
12705880Snordmark * Returns true if lockp was temporarily dropped while waiting.
12715880Snordmark */
12725880Snordmark static boolean_t
zsd_wait_for_inprogress(zone_t * zone,struct zsd_entry * t,kmutex_t * lockp)12735880Snordmark zsd_wait_for_inprogress(zone_t *zone, struct zsd_entry *t, kmutex_t *lockp)
12745880Snordmark {
12755880Snordmark boolean_t dropped = B_FALSE;
12765880Snordmark
12775880Snordmark while (t->zsd_flags & ZSD_ALL_INPROGRESS) {
12785880Snordmark DTRACE_PROBE2(zsd__wait__for__inprogress,
12795880Snordmark zone_t *, zone, struct zsd_entry *, t);
12805880Snordmark if (lockp != NULL) {
12815880Snordmark dropped = B_TRUE;
12825880Snordmark mutex_exit(lockp);
12835880Snordmark }
12845880Snordmark cv_wait(&t->zsd_cv, &zone->zone_lock);
12855880Snordmark if (lockp != NULL) {
12865880Snordmark /* First drop zone_lock to preserve order */
12875880Snordmark mutex_exit(&zone->zone_lock);
12885880Snordmark mutex_enter(lockp);
12895880Snordmark mutex_enter(&zone->zone_lock);
12905880Snordmark }
12915880Snordmark }
12925880Snordmark return (dropped);
12930Sstevel@tonic-gate }
12940Sstevel@tonic-gate
12950Sstevel@tonic-gate /*
1296789Sahrens * Frees memory associated with the zone dataset list.
1297789Sahrens */
1298789Sahrens static void
zone_free_datasets(zone_t * zone)1299789Sahrens zone_free_datasets(zone_t *zone)
1300789Sahrens {
1301789Sahrens zone_dataset_t *t, *next;
1302789Sahrens
1303789Sahrens for (t = list_head(&zone->zone_datasets); t != NULL; t = next) {
1304789Sahrens next = list_next(&zone->zone_datasets, t);
1305789Sahrens list_remove(&zone->zone_datasets, t);
1306789Sahrens kmem_free(t->zd_dataset, strlen(t->zd_dataset) + 1);
1307789Sahrens kmem_free(t, sizeof (*t));
1308789Sahrens }
1309789Sahrens list_destroy(&zone->zone_datasets);
1310789Sahrens }
1311789Sahrens
1312789Sahrens /*
13130Sstevel@tonic-gate * zone.cpu-shares resource control support.
13140Sstevel@tonic-gate */
13150Sstevel@tonic-gate /*ARGSUSED*/
13160Sstevel@tonic-gate static rctl_qty_t
zone_cpu_shares_usage(rctl_t * rctl,struct proc * p)13170Sstevel@tonic-gate zone_cpu_shares_usage(rctl_t *rctl, struct proc *p)
13180Sstevel@tonic-gate {
13190Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock));
13200Sstevel@tonic-gate return (p->p_zone->zone_shares);
13210Sstevel@tonic-gate }
13220Sstevel@tonic-gate
13230Sstevel@tonic-gate /*ARGSUSED*/
13240Sstevel@tonic-gate static int
zone_cpu_shares_set(rctl_t * rctl,struct proc * p,rctl_entity_p_t * e,rctl_qty_t nv)13250Sstevel@tonic-gate zone_cpu_shares_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
13260Sstevel@tonic-gate rctl_qty_t nv)
13270Sstevel@tonic-gate {
13280Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock));
13290Sstevel@tonic-gate ASSERT(e->rcep_t == RCENTITY_ZONE);
13300Sstevel@tonic-gate if (e->rcep_p.zone == NULL)
13310Sstevel@tonic-gate return (0);
13320Sstevel@tonic-gate
13330Sstevel@tonic-gate e->rcep_p.zone->zone_shares = nv;
13340Sstevel@tonic-gate return (0);
13350Sstevel@tonic-gate }
13360Sstevel@tonic-gate
13370Sstevel@tonic-gate static rctl_ops_t zone_cpu_shares_ops = {
13380Sstevel@tonic-gate rcop_no_action,
13390Sstevel@tonic-gate zone_cpu_shares_usage,
13400Sstevel@tonic-gate zone_cpu_shares_set,
13410Sstevel@tonic-gate rcop_no_test
13420Sstevel@tonic-gate };
13430Sstevel@tonic-gate
13443792Sakolb /*
13453792Sakolb * zone.cpu-cap resource control support.
13463792Sakolb */
13473792Sakolb /*ARGSUSED*/
13483792Sakolb static rctl_qty_t
zone_cpu_cap_get(rctl_t * rctl,struct proc * p)13493792Sakolb zone_cpu_cap_get(rctl_t *rctl, struct proc *p)
13503792Sakolb {
13513792Sakolb ASSERT(MUTEX_HELD(&p->p_lock));
13523792Sakolb return (cpucaps_zone_get(p->p_zone));
13533792Sakolb }
13543792Sakolb
13553792Sakolb /*ARGSUSED*/
13563792Sakolb static int
zone_cpu_cap_set(rctl_t * rctl,struct proc * p,rctl_entity_p_t * e,rctl_qty_t nv)13573792Sakolb zone_cpu_cap_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
13583792Sakolb rctl_qty_t nv)
13593792Sakolb {
13603792Sakolb zone_t *zone = e->rcep_p.zone;
13613792Sakolb
13623792Sakolb ASSERT(MUTEX_HELD(&p->p_lock));
13633792Sakolb ASSERT(e->rcep_t == RCENTITY_ZONE);
13643792Sakolb
13653792Sakolb if (zone == NULL)
13663792Sakolb return (0);
13673792Sakolb
13683792Sakolb /*
13693792Sakolb * set cap to the new value.
13703792Sakolb */
13713792Sakolb return (cpucaps_zone_set(zone, nv));
13723792Sakolb }
13733792Sakolb
13743792Sakolb static rctl_ops_t zone_cpu_cap_ops = {
13753792Sakolb rcop_no_action,
13763792Sakolb zone_cpu_cap_get,
13773792Sakolb zone_cpu_cap_set,
13783792Sakolb rcop_no_test
13793792Sakolb };
13803792Sakolb
13810Sstevel@tonic-gate /*ARGSUSED*/
13820Sstevel@tonic-gate static rctl_qty_t
zone_lwps_usage(rctl_t * r,proc_t * p)13830Sstevel@tonic-gate zone_lwps_usage(rctl_t *r, proc_t *p)
13840Sstevel@tonic-gate {
13850Sstevel@tonic-gate rctl_qty_t nlwps;
13860Sstevel@tonic-gate zone_t *zone = p->p_zone;
13870Sstevel@tonic-gate
13880Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock));
13890Sstevel@tonic-gate
13900Sstevel@tonic-gate mutex_enter(&zone->zone_nlwps_lock);
13910Sstevel@tonic-gate nlwps = zone->zone_nlwps;
13920Sstevel@tonic-gate mutex_exit(&zone->zone_nlwps_lock);
13930Sstevel@tonic-gate
13940Sstevel@tonic-gate return (nlwps);
13950Sstevel@tonic-gate }
13960Sstevel@tonic-gate
13970Sstevel@tonic-gate /*ARGSUSED*/
13980Sstevel@tonic-gate static int
zone_lwps_test(rctl_t * r,proc_t * p,rctl_entity_p_t * e,rctl_val_t * rcntl,rctl_qty_t incr,uint_t flags)13990Sstevel@tonic-gate zone_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl,
14000Sstevel@tonic-gate rctl_qty_t incr, uint_t flags)
14010Sstevel@tonic-gate {
14020Sstevel@tonic-gate rctl_qty_t nlwps;
14030Sstevel@tonic-gate
14040Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock));
14050Sstevel@tonic-gate ASSERT(e->rcep_t == RCENTITY_ZONE);
14060Sstevel@tonic-gate if (e->rcep_p.zone == NULL)
14070Sstevel@tonic-gate return (0);
14080Sstevel@tonic-gate ASSERT(MUTEX_HELD(&(e->rcep_p.zone->zone_nlwps_lock)));
14090Sstevel@tonic-gate nlwps = e->rcep_p.zone->zone_nlwps;
14100Sstevel@tonic-gate
14110Sstevel@tonic-gate if (nlwps + incr > rcntl->rcv_value)
14120Sstevel@tonic-gate return (1);
14130Sstevel@tonic-gate
14140Sstevel@tonic-gate return (0);
14150Sstevel@tonic-gate }
14160Sstevel@tonic-gate
14170Sstevel@tonic-gate /*ARGSUSED*/
14180Sstevel@tonic-gate static int
zone_lwps_set(rctl_t * rctl,struct proc * p,rctl_entity_p_t * e,rctl_qty_t nv)14192768Ssl108498 zone_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv)
14202768Ssl108498 {
14210Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock));
14220Sstevel@tonic-gate ASSERT(e->rcep_t == RCENTITY_ZONE);
14230Sstevel@tonic-gate if (e->rcep_p.zone == NULL)
14240Sstevel@tonic-gate return (0);
14250Sstevel@tonic-gate e->rcep_p.zone->zone_nlwps_ctl = nv;
14260Sstevel@tonic-gate return (0);
14270Sstevel@tonic-gate }
14280Sstevel@tonic-gate
14290Sstevel@tonic-gate static rctl_ops_t zone_lwps_ops = {
14300Sstevel@tonic-gate rcop_no_action,
14310Sstevel@tonic-gate zone_lwps_usage,
14320Sstevel@tonic-gate zone_lwps_set,
14330Sstevel@tonic-gate zone_lwps_test,
14340Sstevel@tonic-gate };
14350Sstevel@tonic-gate
14362677Sml93401 /*ARGSUSED*/
143712725SMenno.Lageman@Sun.COM static rctl_qty_t
zone_procs_usage(rctl_t * r,proc_t * p)143812725SMenno.Lageman@Sun.COM zone_procs_usage(rctl_t *r, proc_t *p)
143912725SMenno.Lageman@Sun.COM {
144012725SMenno.Lageman@Sun.COM rctl_qty_t nprocs;
144112725SMenno.Lageman@Sun.COM zone_t *zone = p->p_zone;
144212725SMenno.Lageman@Sun.COM
144312725SMenno.Lageman@Sun.COM ASSERT(MUTEX_HELD(&p->p_lock));
144412725SMenno.Lageman@Sun.COM
144512725SMenno.Lageman@Sun.COM mutex_enter(&zone->zone_nlwps_lock);
144612725SMenno.Lageman@Sun.COM nprocs = zone->zone_nprocs;
144712725SMenno.Lageman@Sun.COM mutex_exit(&zone->zone_nlwps_lock);
144812725SMenno.Lageman@Sun.COM
144912725SMenno.Lageman@Sun.COM return (nprocs);
145012725SMenno.Lageman@Sun.COM }
145112725SMenno.Lageman@Sun.COM
145212725SMenno.Lageman@Sun.COM /*ARGSUSED*/
145312725SMenno.Lageman@Sun.COM static int
zone_procs_test(rctl_t * r,proc_t * p,rctl_entity_p_t * e,rctl_val_t * rcntl,rctl_qty_t incr,uint_t flags)145412725SMenno.Lageman@Sun.COM zone_procs_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl,
145512725SMenno.Lageman@Sun.COM rctl_qty_t incr, uint_t flags)
145612725SMenno.Lageman@Sun.COM {
145712725SMenno.Lageman@Sun.COM rctl_qty_t nprocs;
145812725SMenno.Lageman@Sun.COM
145912725SMenno.Lageman@Sun.COM ASSERT(MUTEX_HELD(&p->p_lock));
146012725SMenno.Lageman@Sun.COM ASSERT(e->rcep_t == RCENTITY_ZONE);
146112725SMenno.Lageman@Sun.COM if (e->rcep_p.zone == NULL)
146212725SMenno.Lageman@Sun.COM return (0);
146312725SMenno.Lageman@Sun.COM ASSERT(MUTEX_HELD(&(e->rcep_p.zone->zone_nlwps_lock)));
146412725SMenno.Lageman@Sun.COM nprocs = e->rcep_p.zone->zone_nprocs;
146512725SMenno.Lageman@Sun.COM
146612725SMenno.Lageman@Sun.COM if (nprocs + incr > rcntl->rcv_value)
146712725SMenno.Lageman@Sun.COM return (1);
146812725SMenno.Lageman@Sun.COM
146912725SMenno.Lageman@Sun.COM return (0);
147012725SMenno.Lageman@Sun.COM }
147112725SMenno.Lageman@Sun.COM
147212725SMenno.Lageman@Sun.COM /*ARGSUSED*/
147312725SMenno.Lageman@Sun.COM static int
zone_procs_set(rctl_t * rctl,struct proc * p,rctl_entity_p_t * e,rctl_qty_t nv)147412725SMenno.Lageman@Sun.COM zone_procs_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv)
147512725SMenno.Lageman@Sun.COM {
147612725SMenno.Lageman@Sun.COM ASSERT(MUTEX_HELD(&p->p_lock));
147712725SMenno.Lageman@Sun.COM ASSERT(e->rcep_t == RCENTITY_ZONE);
147812725SMenno.Lageman@Sun.COM if (e->rcep_p.zone == NULL)
147912725SMenno.Lageman@Sun.COM return (0);
148012725SMenno.Lageman@Sun.COM e->rcep_p.zone->zone_nprocs_ctl = nv;
148112725SMenno.Lageman@Sun.COM return (0);
148212725SMenno.Lageman@Sun.COM }
148312725SMenno.Lageman@Sun.COM
148412725SMenno.Lageman@Sun.COM static rctl_ops_t zone_procs_ops = {
148512725SMenno.Lageman@Sun.COM rcop_no_action,
148612725SMenno.Lageman@Sun.COM zone_procs_usage,
148712725SMenno.Lageman@Sun.COM zone_procs_set,
148812725SMenno.Lageman@Sun.COM zone_procs_test,
148912725SMenno.Lageman@Sun.COM };
149012725SMenno.Lageman@Sun.COM
149112725SMenno.Lageman@Sun.COM /*ARGSUSED*/
14922677Sml93401 static int
zone_shmmax_test(rctl_t * r,proc_t * p,rctl_entity_p_t * e,rctl_val_t * rval,rctl_qty_t incr,uint_t flags)14932677Sml93401 zone_shmmax_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
14942677Sml93401 rctl_qty_t incr, uint_t flags)
14952677Sml93401 {
14962677Sml93401 rctl_qty_t v;
14972677Sml93401 ASSERT(MUTEX_HELD(&p->p_lock));
14982677Sml93401 ASSERT(e->rcep_t == RCENTITY_ZONE);
14992677Sml93401 v = e->rcep_p.zone->zone_shmmax + incr;
15002677Sml93401 if (v > rval->rcv_value)
15012677Sml93401 return (1);
15022677Sml93401 return (0);
15032677Sml93401 }
15042677Sml93401
15052677Sml93401 static rctl_ops_t zone_shmmax_ops = {
15062677Sml93401 rcop_no_action,
15072677Sml93401 rcop_no_usage,
15082677Sml93401 rcop_no_set,
15092677Sml93401 zone_shmmax_test
15102677Sml93401 };
15112677Sml93401
15122677Sml93401 /*ARGSUSED*/
15132677Sml93401 static int
zone_shmmni_test(rctl_t * r,proc_t * p,rctl_entity_p_t * e,rctl_val_t * rval,rctl_qty_t incr,uint_t flags)15142677Sml93401 zone_shmmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
15152677Sml93401 rctl_qty_t incr, uint_t flags)
15162677Sml93401 {
15172677Sml93401 rctl_qty_t v;
15182677Sml93401 ASSERT(MUTEX_HELD(&p->p_lock));
15192677Sml93401 ASSERT(e->rcep_t == RCENTITY_ZONE);
15202677Sml93401 v = e->rcep_p.zone->zone_ipc.ipcq_shmmni + incr;
15212677Sml93401 if (v > rval->rcv_value)
15222677Sml93401 return (1);
15232677Sml93401 return (0);
15242677Sml93401 }
15252677Sml93401
15262677Sml93401 static rctl_ops_t zone_shmmni_ops = {
15272677Sml93401 rcop_no_action,
15282677Sml93401 rcop_no_usage,
15292677Sml93401 rcop_no_set,
15302677Sml93401 zone_shmmni_test
15312677Sml93401 };
15322677Sml93401
15332677Sml93401 /*ARGSUSED*/
15342677Sml93401 static int
zone_semmni_test(rctl_t * r,proc_t * p,rctl_entity_p_t * e,rctl_val_t * rval,rctl_qty_t incr,uint_t flags)15352677Sml93401 zone_semmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
15362677Sml93401 rctl_qty_t incr, uint_t flags)
15372677Sml93401 {
15382677Sml93401 rctl_qty_t v;
15392677Sml93401 ASSERT(MUTEX_HELD(&p->p_lock));
15402677Sml93401 ASSERT(e->rcep_t == RCENTITY_ZONE);
15412677Sml93401 v = e->rcep_p.zone->zone_ipc.ipcq_semmni + incr;
15422677Sml93401 if (v > rval->rcv_value)
15432677Sml93401 return (1);
15442677Sml93401 return (0);
15452677Sml93401 }
15462677Sml93401
15472677Sml93401 static rctl_ops_t zone_semmni_ops = {
15482677Sml93401 rcop_no_action,
15492677Sml93401 rcop_no_usage,
15502677Sml93401 rcop_no_set,
15512677Sml93401 zone_semmni_test
15522677Sml93401 };
15532677Sml93401
15542677Sml93401 /*ARGSUSED*/
15552677Sml93401 static int
zone_msgmni_test(rctl_t * r,proc_t * p,rctl_entity_p_t * e,rctl_val_t * rval,rctl_qty_t incr,uint_t flags)15562677Sml93401 zone_msgmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
15572677Sml93401 rctl_qty_t incr, uint_t flags)
15582677Sml93401 {
15592677Sml93401 rctl_qty_t v;
15602677Sml93401 ASSERT(MUTEX_HELD(&p->p_lock));
15612677Sml93401 ASSERT(e->rcep_t == RCENTITY_ZONE);
15622677Sml93401 v = e->rcep_p.zone->zone_ipc.ipcq_msgmni + incr;
15632677Sml93401 if (v > rval->rcv_value)
15642677Sml93401 return (1);
15652677Sml93401 return (0);
15662677Sml93401 }
15672677Sml93401
15682677Sml93401 static rctl_ops_t zone_msgmni_ops = {
15692677Sml93401 rcop_no_action,
15702677Sml93401 rcop_no_usage,
15712677Sml93401 rcop_no_set,
15722677Sml93401 zone_msgmni_test
15732677Sml93401 };
15742677Sml93401
15752768Ssl108498 /*ARGSUSED*/
15762768Ssl108498 static rctl_qty_t
zone_locked_mem_usage(rctl_t * rctl,struct proc * p)15772768Ssl108498 zone_locked_mem_usage(rctl_t *rctl, struct proc *p)
15782768Ssl108498 {
15792768Ssl108498 rctl_qty_t q;
15802768Ssl108498 ASSERT(MUTEX_HELD(&p->p_lock));
15813247Sgjelinek mutex_enter(&p->p_zone->zone_mem_lock);
15822768Ssl108498 q = p->p_zone->zone_locked_mem;
15833247Sgjelinek mutex_exit(&p->p_zone->zone_mem_lock);
15842768Ssl108498 return (q);
15852768Ssl108498 }
15862768Ssl108498
15872768Ssl108498 /*ARGSUSED*/
15882768Ssl108498 static int
zone_locked_mem_test(rctl_t * r,proc_t * p,rctl_entity_p_t * e,rctl_val_t * rcntl,rctl_qty_t incr,uint_t flags)15892768Ssl108498 zone_locked_mem_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
15902768Ssl108498 rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
15912768Ssl108498 {
15922768Ssl108498 rctl_qty_t q;
15933247Sgjelinek zone_t *z;
15943247Sgjelinek
15953247Sgjelinek z = e->rcep_p.zone;
15962768Ssl108498 ASSERT(MUTEX_HELD(&p->p_lock));
15973247Sgjelinek ASSERT(MUTEX_HELD(&z->zone_mem_lock));
15983247Sgjelinek q = z->zone_locked_mem;
15992768Ssl108498 if (q + incr > rcntl->rcv_value)
16002768Ssl108498 return (1);
16012768Ssl108498 return (0);
16022768Ssl108498 }
16032768Ssl108498
16042768Ssl108498 /*ARGSUSED*/
16052768Ssl108498 static int
zone_locked_mem_set(rctl_t * rctl,struct proc * p,rctl_entity_p_t * e,rctl_qty_t nv)16062768Ssl108498 zone_locked_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
16072768Ssl108498 rctl_qty_t nv)
16082768Ssl108498 {
16092768Ssl108498 ASSERT(MUTEX_HELD(&p->p_lock));
16102768Ssl108498 ASSERT(e->rcep_t == RCENTITY_ZONE);
16112768Ssl108498 if (e->rcep_p.zone == NULL)
16122768Ssl108498 return (0);
16132768Ssl108498 e->rcep_p.zone->zone_locked_mem_ctl = nv;
16142768Ssl108498 return (0);
16152768Ssl108498 }
16162768Ssl108498
16172768Ssl108498 static rctl_ops_t zone_locked_mem_ops = {
16182768Ssl108498 rcop_no_action,
16192768Ssl108498 zone_locked_mem_usage,
16202768Ssl108498 zone_locked_mem_set,
16212768Ssl108498 zone_locked_mem_test
16222768Ssl108498 };
16232677Sml93401
16243247Sgjelinek /*ARGSUSED*/
16253247Sgjelinek static rctl_qty_t
zone_max_swap_usage(rctl_t * rctl,struct proc * p)16263247Sgjelinek zone_max_swap_usage(rctl_t *rctl, struct proc *p)
16273247Sgjelinek {
16283247Sgjelinek rctl_qty_t q;
16293247Sgjelinek zone_t *z = p->p_zone;
16303247Sgjelinek
16313247Sgjelinek ASSERT(MUTEX_HELD(&p->p_lock));
16323247Sgjelinek mutex_enter(&z->zone_mem_lock);
16333247Sgjelinek q = z->zone_max_swap;
16343247Sgjelinek mutex_exit(&z->zone_mem_lock);
16353247Sgjelinek return (q);
16363247Sgjelinek }
16373247Sgjelinek
16383247Sgjelinek /*ARGSUSED*/
16393247Sgjelinek static int
zone_max_swap_test(rctl_t * r,proc_t * p,rctl_entity_p_t * e,rctl_val_t * rcntl,rctl_qty_t incr,uint_t flags)16403247Sgjelinek zone_max_swap_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
16413247Sgjelinek rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
16423247Sgjelinek {
16433247Sgjelinek rctl_qty_t q;
16443247Sgjelinek zone_t *z;
16453247Sgjelinek
16463247Sgjelinek z = e->rcep_p.zone;
16473247Sgjelinek ASSERT(MUTEX_HELD(&p->p_lock));
16483247Sgjelinek ASSERT(MUTEX_HELD(&z->zone_mem_lock));
16493247Sgjelinek q = z->zone_max_swap;
16503247Sgjelinek if (q + incr > rcntl->rcv_value)
16513247Sgjelinek return (1);
16523247Sgjelinek return (0);
16533247Sgjelinek }
16543247Sgjelinek
16553247Sgjelinek /*ARGSUSED*/
16563247Sgjelinek static int
zone_max_swap_set(rctl_t * rctl,struct proc * p,rctl_entity_p_t * e,rctl_qty_t nv)16573247Sgjelinek zone_max_swap_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
16583247Sgjelinek rctl_qty_t nv)
16593247Sgjelinek {
16603247Sgjelinek ASSERT(MUTEX_HELD(&p->p_lock));
16613247Sgjelinek ASSERT(e->rcep_t == RCENTITY_ZONE);
16623247Sgjelinek if (e->rcep_p.zone == NULL)
16633247Sgjelinek return (0);
16643247Sgjelinek e->rcep_p.zone->zone_max_swap_ctl = nv;
16653247Sgjelinek return (0);
16663247Sgjelinek }
16673247Sgjelinek
16683247Sgjelinek static rctl_ops_t zone_max_swap_ops = {
16693247Sgjelinek rcop_no_action,
16703247Sgjelinek zone_max_swap_usage,
16713247Sgjelinek zone_max_swap_set,
16723247Sgjelinek zone_max_swap_test
16733247Sgjelinek };
16743247Sgjelinek
167512633Sjohn.levon@sun.com /*ARGSUSED*/
167612633Sjohn.levon@sun.com static rctl_qty_t
zone_max_lofi_usage(rctl_t * rctl,struct proc * p)167712633Sjohn.levon@sun.com zone_max_lofi_usage(rctl_t *rctl, struct proc *p)
167812633Sjohn.levon@sun.com {
167912633Sjohn.levon@sun.com rctl_qty_t q;
168012633Sjohn.levon@sun.com zone_t *z = p->p_zone;
168112633Sjohn.levon@sun.com
168212633Sjohn.levon@sun.com ASSERT(MUTEX_HELD(&p->p_lock));
168312633Sjohn.levon@sun.com mutex_enter(&z->zone_rctl_lock);
168412633Sjohn.levon@sun.com q = z->zone_max_lofi;
168512633Sjohn.levon@sun.com mutex_exit(&z->zone_rctl_lock);
168612633Sjohn.levon@sun.com return (q);
168712633Sjohn.levon@sun.com }
168812633Sjohn.levon@sun.com
168912633Sjohn.levon@sun.com /*ARGSUSED*/
169012633Sjohn.levon@sun.com static int
zone_max_lofi_test(rctl_t * r,proc_t * p,rctl_entity_p_t * e,rctl_val_t * rcntl,rctl_qty_t incr,uint_t flags)169112633Sjohn.levon@sun.com zone_max_lofi_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
169212633Sjohn.levon@sun.com rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
169312633Sjohn.levon@sun.com {
169412633Sjohn.levon@sun.com rctl_qty_t q;
169512633Sjohn.levon@sun.com zone_t *z;
169612633Sjohn.levon@sun.com
169712633Sjohn.levon@sun.com z = e->rcep_p.zone;
169812633Sjohn.levon@sun.com ASSERT(MUTEX_HELD(&p->p_lock));
169912633Sjohn.levon@sun.com ASSERT(MUTEX_HELD(&z->zone_rctl_lock));
170012633Sjohn.levon@sun.com q = z->zone_max_lofi;
170112633Sjohn.levon@sun.com if (q + incr > rcntl->rcv_value)
170212633Sjohn.levon@sun.com return (1);
170312633Sjohn.levon@sun.com return (0);
170412633Sjohn.levon@sun.com }
170512633Sjohn.levon@sun.com
170612633Sjohn.levon@sun.com /*ARGSUSED*/
170712633Sjohn.levon@sun.com static int
zone_max_lofi_set(rctl_t * rctl,struct proc * p,rctl_entity_p_t * e,rctl_qty_t nv)170812633Sjohn.levon@sun.com zone_max_lofi_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
170912633Sjohn.levon@sun.com rctl_qty_t nv)
171012633Sjohn.levon@sun.com {
171112633Sjohn.levon@sun.com ASSERT(MUTEX_HELD(&p->p_lock));
171212633Sjohn.levon@sun.com ASSERT(e->rcep_t == RCENTITY_ZONE);
171312633Sjohn.levon@sun.com if (e->rcep_p.zone == NULL)
171412633Sjohn.levon@sun.com return (0);
171512633Sjohn.levon@sun.com e->rcep_p.zone->zone_max_lofi_ctl = nv;
171612633Sjohn.levon@sun.com return (0);
171712633Sjohn.levon@sun.com }
171812633Sjohn.levon@sun.com
171912633Sjohn.levon@sun.com static rctl_ops_t zone_max_lofi_ops = {
172012633Sjohn.levon@sun.com rcop_no_action,
172112633Sjohn.levon@sun.com zone_max_lofi_usage,
172212633Sjohn.levon@sun.com zone_max_lofi_set,
172312633Sjohn.levon@sun.com zone_max_lofi_test
172412633Sjohn.levon@sun.com };
172512633Sjohn.levon@sun.com
17260Sstevel@tonic-gate /*
17270Sstevel@tonic-gate * Helper function to brand the zone with a unique ID.
17280Sstevel@tonic-gate */
17290Sstevel@tonic-gate static void
zone_uniqid(zone_t * zone)17300Sstevel@tonic-gate zone_uniqid(zone_t *zone)
17310Sstevel@tonic-gate {
17320Sstevel@tonic-gate static uint64_t uniqid = 0;
17330Sstevel@tonic-gate
17340Sstevel@tonic-gate ASSERT(MUTEX_HELD(&zonehash_lock));
17350Sstevel@tonic-gate zone->zone_uniqid = uniqid++;
17360Sstevel@tonic-gate }
17370Sstevel@tonic-gate
17380Sstevel@tonic-gate /*
17390Sstevel@tonic-gate * Returns a held pointer to the "kcred" for the specified zone.
17400Sstevel@tonic-gate */
17410Sstevel@tonic-gate struct cred *
zone_get_kcred(zoneid_t zoneid)17420Sstevel@tonic-gate zone_get_kcred(zoneid_t zoneid)
17430Sstevel@tonic-gate {
17440Sstevel@tonic-gate zone_t *zone;
17450Sstevel@tonic-gate cred_t *cr;
17460Sstevel@tonic-gate
17470Sstevel@tonic-gate if ((zone = zone_find_by_id(zoneid)) == NULL)
17480Sstevel@tonic-gate return (NULL);
17490Sstevel@tonic-gate cr = zone->zone_kcred;
17500Sstevel@tonic-gate crhold(cr);
17510Sstevel@tonic-gate zone_rele(zone);
17520Sstevel@tonic-gate return (cr);
17530Sstevel@tonic-gate }
17540Sstevel@tonic-gate
17553247Sgjelinek static int
zone_lockedmem_kstat_update(kstat_t * ksp,int rw)17563247Sgjelinek zone_lockedmem_kstat_update(kstat_t *ksp, int rw)
17573247Sgjelinek {
17583247Sgjelinek zone_t *zone = ksp->ks_private;
17593247Sgjelinek zone_kstat_t *zk = ksp->ks_data;
17603247Sgjelinek
17613247Sgjelinek if (rw == KSTAT_WRITE)
17623247Sgjelinek return (EACCES);
17633247Sgjelinek
17643247Sgjelinek zk->zk_usage.value.ui64 = zone->zone_locked_mem;
17653247Sgjelinek zk->zk_value.value.ui64 = zone->zone_locked_mem_ctl;
17663247Sgjelinek return (0);
17673247Sgjelinek }
17683247Sgjelinek
17693247Sgjelinek static int
zone_nprocs_kstat_update(kstat_t * ksp,int rw)177012725SMenno.Lageman@Sun.COM zone_nprocs_kstat_update(kstat_t *ksp, int rw)
177112725SMenno.Lageman@Sun.COM {
177212725SMenno.Lageman@Sun.COM zone_t *zone = ksp->ks_private;
177312725SMenno.Lageman@Sun.COM zone_kstat_t *zk = ksp->ks_data;
177412725SMenno.Lageman@Sun.COM
177512725SMenno.Lageman@Sun.COM if (rw == KSTAT_WRITE)
177612725SMenno.Lageman@Sun.COM return (EACCES);
177712725SMenno.Lageman@Sun.COM
177812725SMenno.Lageman@Sun.COM zk->zk_usage.value.ui64 = zone->zone_nprocs;
177912725SMenno.Lageman@Sun.COM zk->zk_value.value.ui64 = zone->zone_nprocs_ctl;
178012725SMenno.Lageman@Sun.COM return (0);
178112725SMenno.Lageman@Sun.COM }
178212725SMenno.Lageman@Sun.COM
178312725SMenno.Lageman@Sun.COM static int
zone_swapresv_kstat_update(kstat_t * ksp,int rw)17843247Sgjelinek zone_swapresv_kstat_update(kstat_t *ksp, int rw)
17853247Sgjelinek {
17863247Sgjelinek zone_t *zone = ksp->ks_private;
17873247Sgjelinek zone_kstat_t *zk = ksp->ks_data;
17883247Sgjelinek
17893247Sgjelinek if (rw == KSTAT_WRITE)
17903247Sgjelinek return (EACCES);
17913247Sgjelinek
17923247Sgjelinek zk->zk_usage.value.ui64 = zone->zone_max_swap;
17933247Sgjelinek zk->zk_value.value.ui64 = zone->zone_max_swap_ctl;
17943247Sgjelinek return (0);
17953247Sgjelinek }
17963247Sgjelinek
179712725SMenno.Lageman@Sun.COM static kstat_t *
zone_kstat_create_common(zone_t * zone,char * name,int (* updatefunc)(kstat_t *,int))179812725SMenno.Lageman@Sun.COM zone_kstat_create_common(zone_t *zone, char *name,
179912725SMenno.Lageman@Sun.COM int (*updatefunc) (kstat_t *, int))
18003247Sgjelinek {
18013247Sgjelinek kstat_t *ksp;
18023247Sgjelinek zone_kstat_t *zk;
18033247Sgjelinek
180412725SMenno.Lageman@Sun.COM ksp = rctl_kstat_create_zone(zone, name, KSTAT_TYPE_NAMED,
18053247Sgjelinek sizeof (zone_kstat_t) / sizeof (kstat_named_t),
18063247Sgjelinek KSTAT_FLAG_VIRTUAL);
18073247Sgjelinek
18083247Sgjelinek if (ksp == NULL)
180912725SMenno.Lageman@Sun.COM return (NULL);
18103247Sgjelinek
18113247Sgjelinek zk = ksp->ks_data = kmem_alloc(sizeof (zone_kstat_t), KM_SLEEP);
18123247Sgjelinek ksp->ks_data_size += strlen(zone->zone_name) + 1;
18133247Sgjelinek kstat_named_init(&zk->zk_zonename, "zonename", KSTAT_DATA_STRING);
18143247Sgjelinek kstat_named_setstr(&zk->zk_zonename, zone->zone_name);
18153247Sgjelinek kstat_named_init(&zk->zk_usage, "usage", KSTAT_DATA_UINT64);
18163247Sgjelinek kstat_named_init(&zk->zk_value, "value", KSTAT_DATA_UINT64);
181712725SMenno.Lageman@Sun.COM ksp->ks_update = updatefunc;
18183247Sgjelinek ksp->ks_private = zone;
18193247Sgjelinek kstat_install(ksp);
182012725SMenno.Lageman@Sun.COM return (ksp);
182112725SMenno.Lageman@Sun.COM }
182212725SMenno.Lageman@Sun.COM
182312725SMenno.Lageman@Sun.COM static void
zone_kstat_create(zone_t * zone)182412725SMenno.Lageman@Sun.COM zone_kstat_create(zone_t *zone)
182512725SMenno.Lageman@Sun.COM {
182612725SMenno.Lageman@Sun.COM zone->zone_lockedmem_kstat = zone_kstat_create_common(zone,
182712725SMenno.Lageman@Sun.COM "lockedmem", zone_lockedmem_kstat_update);
182812725SMenno.Lageman@Sun.COM zone->zone_swapresv_kstat = zone_kstat_create_common(zone,
182912725SMenno.Lageman@Sun.COM "swapresv", zone_swapresv_kstat_update);
183012725SMenno.Lageman@Sun.COM zone->zone_nprocs_kstat = zone_kstat_create_common(zone,
183112725SMenno.Lageman@Sun.COM "nprocs", zone_nprocs_kstat_update);
183212725SMenno.Lageman@Sun.COM }
183312725SMenno.Lageman@Sun.COM
183412725SMenno.Lageman@Sun.COM static void
zone_kstat_delete_common(kstat_t ** pkstat)183512725SMenno.Lageman@Sun.COM zone_kstat_delete_common(kstat_t **pkstat)
183612725SMenno.Lageman@Sun.COM {
183712725SMenno.Lageman@Sun.COM void *data;
183812725SMenno.Lageman@Sun.COM
183912725SMenno.Lageman@Sun.COM if (*pkstat != NULL) {
184012725SMenno.Lageman@Sun.COM data = (*pkstat)->ks_data;
184112725SMenno.Lageman@Sun.COM kstat_delete(*pkstat);
184212725SMenno.Lageman@Sun.COM kmem_free(data, sizeof (zone_kstat_t));
184312725SMenno.Lageman@Sun.COM *pkstat = NULL;
184412725SMenno.Lageman@Sun.COM }
18453247Sgjelinek }
18463247Sgjelinek
18473247Sgjelinek static void
zone_kstat_delete(zone_t * zone)18483247Sgjelinek zone_kstat_delete(zone_t *zone)
18493247Sgjelinek {
185012725SMenno.Lageman@Sun.COM zone_kstat_delete_common(&zone->zone_lockedmem_kstat);
185112725SMenno.Lageman@Sun.COM zone_kstat_delete_common(&zone->zone_swapresv_kstat);
185212725SMenno.Lageman@Sun.COM zone_kstat_delete_common(&zone->zone_nprocs_kstat);
18533247Sgjelinek }
18543247Sgjelinek
18550Sstevel@tonic-gate /*
18560Sstevel@tonic-gate * Called very early on in boot to initialize the ZSD list so that
18570Sstevel@tonic-gate * zone_key_create() can be called before zone_init(). It also initializes
18580Sstevel@tonic-gate * portions of zone0 which may be used before zone_init() is called. The
18590Sstevel@tonic-gate * variable "global_zone" will be set when zone0 is fully initialized by
18600Sstevel@tonic-gate * zone_init().
18610Sstevel@tonic-gate */
18620Sstevel@tonic-gate void
zone_zsd_init(void)18630Sstevel@tonic-gate zone_zsd_init(void)
18640Sstevel@tonic-gate {
18650Sstevel@tonic-gate mutex_init(&zonehash_lock, NULL, MUTEX_DEFAULT, NULL);
18660Sstevel@tonic-gate mutex_init(&zsd_key_lock, NULL, MUTEX_DEFAULT, NULL);
18670Sstevel@tonic-gate list_create(&zsd_registered_keys, sizeof (struct zsd_entry),
18680Sstevel@tonic-gate offsetof(struct zsd_entry, zsd_linkage));
18690Sstevel@tonic-gate list_create(&zone_active, sizeof (zone_t),
18700Sstevel@tonic-gate offsetof(zone_t, zone_linkage));
18710Sstevel@tonic-gate list_create(&zone_deathrow, sizeof (zone_t),
18720Sstevel@tonic-gate offsetof(zone_t, zone_linkage));
18730Sstevel@tonic-gate
18740Sstevel@tonic-gate mutex_init(&zone0.zone_lock, NULL, MUTEX_DEFAULT, NULL);
18750Sstevel@tonic-gate mutex_init(&zone0.zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
18763247Sgjelinek mutex_init(&zone0.zone_mem_lock, NULL, MUTEX_DEFAULT, NULL);
18770Sstevel@tonic-gate zone0.zone_shares = 1;
18783247Sgjelinek zone0.zone_nlwps = 0;
18790Sstevel@tonic-gate zone0.zone_nlwps_ctl = INT_MAX;
188012725SMenno.Lageman@Sun.COM zone0.zone_nprocs = 0;
188112725SMenno.Lageman@Sun.COM zone0.zone_nprocs_ctl = INT_MAX;
18823247Sgjelinek zone0.zone_locked_mem = 0;
18833247Sgjelinek zone0.zone_locked_mem_ctl = UINT64_MAX;
18843247Sgjelinek ASSERT(zone0.zone_max_swap == 0);
18853247Sgjelinek zone0.zone_max_swap_ctl = UINT64_MAX;
188612633Sjohn.levon@sun.com zone0.zone_max_lofi = 0;
188712633Sjohn.levon@sun.com zone0.zone_max_lofi_ctl = UINT64_MAX;
18882677Sml93401 zone0.zone_shmmax = 0;
18892677Sml93401 zone0.zone_ipc.ipcq_shmmni = 0;
18902677Sml93401 zone0.zone_ipc.ipcq_semmni = 0;
18912677Sml93401 zone0.zone_ipc.ipcq_msgmni = 0;
18920Sstevel@tonic-gate zone0.zone_name = GLOBAL_ZONENAME;
18930Sstevel@tonic-gate zone0.zone_nodename = utsname.nodename;
18940Sstevel@tonic-gate zone0.zone_domain = srpc_domain;
18958662SJordan.Vaughan@Sun.com zone0.zone_hostid = HW_INVALID_HOSTID;
189612633Sjohn.levon@sun.com zone0.zone_fs_allowed = NULL;
18970Sstevel@tonic-gate zone0.zone_ref = 1;
18980Sstevel@tonic-gate zone0.zone_id = GLOBAL_ZONEID;
18990Sstevel@tonic-gate zone0.zone_status = ZONE_IS_RUNNING;
19000Sstevel@tonic-gate zone0.zone_rootpath = "/";
19010Sstevel@tonic-gate zone0.zone_rootpathlen = 2;
19020Sstevel@tonic-gate zone0.zone_psetid = ZONE_PS_INVAL;
19030Sstevel@tonic-gate zone0.zone_ncpus = 0;
19040Sstevel@tonic-gate zone0.zone_ncpus_online = 0;
19050Sstevel@tonic-gate zone0.zone_proc_initpid = 1;
19062267Sdp zone0.zone_initname = initname;
19073247Sgjelinek zone0.zone_lockedmem_kstat = NULL;
19083247Sgjelinek zone0.zone_swapresv_kstat = NULL;
190912725SMenno.Lageman@Sun.COM zone0.zone_nprocs_kstat = NULL;
1910*13096SJordan.Vaughan@Sun.com list_create(&zone0.zone_ref_list, sizeof (zone_ref_t),
1911*13096SJordan.Vaughan@Sun.com offsetof(zone_ref_t, zref_linkage));
19120Sstevel@tonic-gate list_create(&zone0.zone_zsd, sizeof (struct zsd_entry),
19130Sstevel@tonic-gate offsetof(struct zsd_entry, zsd_linkage));
19140Sstevel@tonic-gate list_insert_head(&zone_active, &zone0);
19150Sstevel@tonic-gate
19160Sstevel@tonic-gate /*
19170Sstevel@tonic-gate * The root filesystem is not mounted yet, so zone_rootvp cannot be set
19180Sstevel@tonic-gate * to anything meaningful. It is assigned to be 'rootdir' in
19190Sstevel@tonic-gate * vfs_mountroot().
19200Sstevel@tonic-gate */
19210Sstevel@tonic-gate zone0.zone_rootvp = NULL;
19220Sstevel@tonic-gate zone0.zone_vfslist = NULL;
19232267Sdp zone0.zone_bootargs = initargs;
19240Sstevel@tonic-gate zone0.zone_privset = kmem_alloc(sizeof (priv_set_t), KM_SLEEP);
19250Sstevel@tonic-gate /*
19260Sstevel@tonic-gate * The global zone has all privileges
19270Sstevel@tonic-gate */
19280Sstevel@tonic-gate priv_fillset(zone0.zone_privset);
19290Sstevel@tonic-gate /*
19300Sstevel@tonic-gate * Add p0 to the global zone
19310Sstevel@tonic-gate */
19320Sstevel@tonic-gate zone0.zone_zsched = &p0;
19330Sstevel@tonic-gate p0.p_zone = &zone0;
19340Sstevel@tonic-gate }
19350Sstevel@tonic-gate
19360Sstevel@tonic-gate /*
19371676Sjpk * Compute a hash value based on the contents of the label and the DOI. The
19381676Sjpk * hash algorithm is somewhat arbitrary, but is based on the observation that
19391676Sjpk * humans will likely pick labels that differ by amounts that work out to be
19401676Sjpk * multiples of the number of hash chains, and thus stirring in some primes
19411676Sjpk * should help.
19421676Sjpk */
19431676Sjpk static uint_t
hash_bylabel(void * hdata,mod_hash_key_t key)19441676Sjpk hash_bylabel(void *hdata, mod_hash_key_t key)
19451676Sjpk {
19461676Sjpk const ts_label_t *lab = (ts_label_t *)key;
19471676Sjpk const uint32_t *up, *ue;
19481676Sjpk uint_t hash;
19491676Sjpk int i;
19501676Sjpk
19511676Sjpk _NOTE(ARGUNUSED(hdata));
19521676Sjpk
19531676Sjpk hash = lab->tsl_doi + (lab->tsl_doi << 1);
19541676Sjpk /* we depend on alignment of label, but not representation */
19551676Sjpk up = (const uint32_t *)&lab->tsl_label;
19561676Sjpk ue = up + sizeof (lab->tsl_label) / sizeof (*up);
19571676Sjpk i = 1;
19581676Sjpk while (up < ue) {
19591676Sjpk /* using 2^n + 1, 1 <= n <= 16 as source of many primes */
19601676Sjpk hash += *up + (*up << ((i % 16) + 1));
19611676Sjpk up++;
19621676Sjpk i++;
19631676Sjpk }
19641676Sjpk return (hash);
19651676Sjpk }
19661676Sjpk
19671676Sjpk /*
19681676Sjpk * All that mod_hash cares about here is zero (equal) versus non-zero (not
19691676Sjpk * equal). This may need to be changed if less than / greater than is ever
19701676Sjpk * needed.
19711676Sjpk */
19721676Sjpk static int
hash_labelkey_cmp(mod_hash_key_t key1,mod_hash_key_t key2)19731676Sjpk hash_labelkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
19741676Sjpk {
19751676Sjpk ts_label_t *lab1 = (ts_label_t *)key1;
19761676Sjpk ts_label_t *lab2 = (ts_label_t *)key2;
19771676Sjpk
19781676Sjpk return (label_equal(lab1, lab2) ? 0 : 1);
19791676Sjpk }
19801676Sjpk
19811676Sjpk /*
19820Sstevel@tonic-gate * Called by main() to initialize the zones framework.
19830Sstevel@tonic-gate */
19840Sstevel@tonic-gate void
zone_init(void)19850Sstevel@tonic-gate zone_init(void)
19860Sstevel@tonic-gate {
19870Sstevel@tonic-gate rctl_dict_entry_t *rde;
19880Sstevel@tonic-gate rctl_val_t *dval;
19890Sstevel@tonic-gate rctl_set_t *set;
19900Sstevel@tonic-gate rctl_alloc_gp_t *gp;
19910Sstevel@tonic-gate rctl_entity_p_t e;
19921166Sdstaff int res;
19930Sstevel@tonic-gate
19940Sstevel@tonic-gate ASSERT(curproc == &p0);
19950Sstevel@tonic-gate
19960Sstevel@tonic-gate /*
19970Sstevel@tonic-gate * Create ID space for zone IDs. ID 0 is reserved for the
19980Sstevel@tonic-gate * global zone.
19990Sstevel@tonic-gate */
20000Sstevel@tonic-gate zoneid_space = id_space_create("zoneid_space", 1, MAX_ZONEID);
20010Sstevel@tonic-gate
20020Sstevel@tonic-gate /*
20030Sstevel@tonic-gate * Initialize generic zone resource controls, if any.
20040Sstevel@tonic-gate */
20050Sstevel@tonic-gate rc_zone_cpu_shares = rctl_register("zone.cpu-shares",
20060Sstevel@tonic-gate RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
20071996Sml93401 RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
20083792Sakolb FSS_MAXSHARES, FSS_MAXSHARES, &zone_cpu_shares_ops);
20093792Sakolb
20103792Sakolb rc_zone_cpu_cap = rctl_register("zone.cpu-cap",
20113792Sakolb RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_ALWAYS |
20123792Sakolb RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |RCTL_GLOBAL_SYSLOG_NEVER |
20133792Sakolb RCTL_GLOBAL_INFINITE,
20143792Sakolb MAXCAP, MAXCAP, &zone_cpu_cap_ops);
20150Sstevel@tonic-gate
20160Sstevel@tonic-gate rc_zone_nlwps = rctl_register("zone.max-lwps", RCENTITY_ZONE,
20170Sstevel@tonic-gate RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
20180Sstevel@tonic-gate INT_MAX, INT_MAX, &zone_lwps_ops);
201912725SMenno.Lageman@Sun.COM
202012725SMenno.Lageman@Sun.COM rc_zone_nprocs = rctl_register("zone.max-processes", RCENTITY_ZONE,
202112725SMenno.Lageman@Sun.COM RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
202212725SMenno.Lageman@Sun.COM INT_MAX, INT_MAX, &zone_procs_ops);
202312725SMenno.Lageman@Sun.COM
20240Sstevel@tonic-gate /*
20252677Sml93401 * System V IPC resource controls
20262677Sml93401 */
20272677Sml93401 rc_zone_msgmni = rctl_register("zone.max-msg-ids",
20282677Sml93401 RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
20292677Sml93401 RCTL_GLOBAL_COUNT, IPC_IDS_MAX, IPC_IDS_MAX, &zone_msgmni_ops);
20302677Sml93401
20312677Sml93401 rc_zone_semmni = rctl_register("zone.max-sem-ids",
20322677Sml93401 RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
20332677Sml93401 RCTL_GLOBAL_COUNT, IPC_IDS_MAX, IPC_IDS_MAX, &zone_semmni_ops);
20342677Sml93401
20352677Sml93401 rc_zone_shmmni = rctl_register("zone.max-shm-ids",
20362677Sml93401 RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
20372677Sml93401 RCTL_GLOBAL_COUNT, IPC_IDS_MAX, IPC_IDS_MAX, &zone_shmmni_ops);
20382677Sml93401
20392677Sml93401 rc_zone_shmmax = rctl_register("zone.max-shm-memory",
20402677Sml93401 RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
20412677Sml93401 RCTL_GLOBAL_BYTES, UINT64_MAX, UINT64_MAX, &zone_shmmax_ops);
20422677Sml93401
20432677Sml93401 /*
20440Sstevel@tonic-gate * Create a rctl_val with PRIVILEGED, NOACTION, value = 1. Then attach
20450Sstevel@tonic-gate * this at the head of the rctl_dict_entry for ``zone.cpu-shares''.
20460Sstevel@tonic-gate */
20470Sstevel@tonic-gate dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
20480Sstevel@tonic-gate bzero(dval, sizeof (rctl_val_t));
20490Sstevel@tonic-gate dval->rcv_value = 1;
20500Sstevel@tonic-gate dval->rcv_privilege = RCPRIV_PRIVILEGED;
20510Sstevel@tonic-gate dval->rcv_flagaction = RCTL_LOCAL_NOACTION;
20520Sstevel@tonic-gate dval->rcv_action_recip_pid = -1;
20530Sstevel@tonic-gate
20540Sstevel@tonic-gate rde = rctl_dict_lookup("zone.cpu-shares");
20550Sstevel@tonic-gate (void) rctl_val_list_insert(&rde->rcd_default_value, dval);
20560Sstevel@tonic-gate
20572768Ssl108498 rc_zone_locked_mem = rctl_register("zone.max-locked-memory",
20582768Ssl108498 RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
20592768Ssl108498 RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
20602768Ssl108498 &zone_locked_mem_ops);
20613247Sgjelinek
20623247Sgjelinek rc_zone_max_swap = rctl_register("zone.max-swap",
20633247Sgjelinek RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
20643247Sgjelinek RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
20653247Sgjelinek &zone_max_swap_ops);
20663247Sgjelinek
206712633Sjohn.levon@sun.com rc_zone_max_lofi = rctl_register("zone.max-lofi",
206812633Sjohn.levon@sun.com RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |
206912633Sjohn.levon@sun.com RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
207012633Sjohn.levon@sun.com &zone_max_lofi_ops);
207112633Sjohn.levon@sun.com
20720Sstevel@tonic-gate /*
20730Sstevel@tonic-gate * Initialize the ``global zone''.
20740Sstevel@tonic-gate */
20750Sstevel@tonic-gate set = rctl_set_create();
20760Sstevel@tonic-gate gp = rctl_set_init_prealloc(RCENTITY_ZONE);
20770Sstevel@tonic-gate mutex_enter(&p0.p_lock);
20780Sstevel@tonic-gate e.rcep_p.zone = &zone0;
20790Sstevel@tonic-gate e.rcep_t = RCENTITY_ZONE;
20800Sstevel@tonic-gate zone0.zone_rctls = rctl_set_init(RCENTITY_ZONE, &p0, &e, set,
20810Sstevel@tonic-gate gp);
20820Sstevel@tonic-gate
20830Sstevel@tonic-gate zone0.zone_nlwps = p0.p_lwpcnt;
208412725SMenno.Lageman@Sun.COM zone0.zone_nprocs = 1;
20850Sstevel@tonic-gate zone0.zone_ntasks = 1;
20860Sstevel@tonic-gate mutex_exit(&p0.p_lock);
20872712Snn35248 zone0.zone_restart_init = B_TRUE;
20882712Snn35248 zone0.zone_brand = &native_brand;
20890Sstevel@tonic-gate rctl_prealloc_destroy(gp);
20900Sstevel@tonic-gate /*
20913247Sgjelinek * pool_default hasn't been initialized yet, so we let pool_init()
20923247Sgjelinek * take care of making sure the global zone is in the default pool.
20930Sstevel@tonic-gate */
20941676Sjpk
20951676Sjpk /*
20963247Sgjelinek * Initialize global zone kstats
20973247Sgjelinek */
20983247Sgjelinek zone_kstat_create(&zone0);
20993247Sgjelinek
21003247Sgjelinek /*
21011676Sjpk * Initialize zone label.
21021676Sjpk * mlp are initialized when tnzonecfg is loaded.
21031676Sjpk */
21041676Sjpk zone0.zone_slabel = l_admin_low;
21051676Sjpk rw_init(&zone0.zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL);
21061676Sjpk label_hold(l_admin_low);
21071676Sjpk
210810910SRobert.Harris@Sun.COM /*
210910910SRobert.Harris@Sun.COM * Initialise the lock for the database structure used by mntfs.
211010910SRobert.Harris@Sun.COM */
211110910SRobert.Harris@Sun.COM rw_init(&zone0.zone_mntfs_db_lock, NULL, RW_DEFAULT, NULL);
211210910SRobert.Harris@Sun.COM
21130Sstevel@tonic-gate mutex_enter(&zonehash_lock);
21140Sstevel@tonic-gate zone_uniqid(&zone0);
21150Sstevel@tonic-gate ASSERT(zone0.zone_uniqid == GLOBAL_ZONEUNIQID);
21161676Sjpk
21170Sstevel@tonic-gate zonehashbyid = mod_hash_create_idhash("zone_by_id", zone_hash_size,
21180Sstevel@tonic-gate mod_hash_null_valdtor);
21190Sstevel@tonic-gate zonehashbyname = mod_hash_create_strhash("zone_by_name",
21200Sstevel@tonic-gate zone_hash_size, mod_hash_null_valdtor);
21211676Sjpk /*
21221676Sjpk * maintain zonehashbylabel only for labeled systems
21231676Sjpk */
21241676Sjpk if (is_system_labeled())
21251676Sjpk zonehashbylabel = mod_hash_create_extended("zone_by_label",
21261676Sjpk zone_hash_size, mod_hash_null_keydtor,
21271676Sjpk mod_hash_null_valdtor, hash_bylabel, NULL,
21281676Sjpk hash_labelkey_cmp, KM_SLEEP);
21290Sstevel@tonic-gate zonecount = 1;
21300Sstevel@tonic-gate
21310Sstevel@tonic-gate (void) mod_hash_insert(zonehashbyid, (mod_hash_key_t)GLOBAL_ZONEID,
21320Sstevel@tonic-gate (mod_hash_val_t)&zone0);
21330Sstevel@tonic-gate (void) mod_hash_insert(zonehashbyname, (mod_hash_key_t)zone0.zone_name,
21340Sstevel@tonic-gate (mod_hash_val_t)&zone0);
21351769Scarlsonj if (is_system_labeled()) {
21361769Scarlsonj zone0.zone_flags |= ZF_HASHED_LABEL;
21371676Sjpk (void) mod_hash_insert(zonehashbylabel,
21381676Sjpk (mod_hash_key_t)zone0.zone_slabel, (mod_hash_val_t)&zone0);
21391769Scarlsonj }
21401676Sjpk mutex_exit(&zonehash_lock);
21411676Sjpk
21420Sstevel@tonic-gate /*
21430Sstevel@tonic-gate * We avoid setting zone_kcred until now, since kcred is initialized
21440Sstevel@tonic-gate * sometime after zone_zsd_init() and before zone_init().
21450Sstevel@tonic-gate */
21460Sstevel@tonic-gate zone0.zone_kcred = kcred;
21470Sstevel@tonic-gate /*
21480Sstevel@tonic-gate * The global zone is fully initialized (except for zone_rootvp which
21490Sstevel@tonic-gate * will be set when the root filesystem is mounted).
21500Sstevel@tonic-gate */
21510Sstevel@tonic-gate global_zone = &zone0;
21521166Sdstaff
21531166Sdstaff /*
21541166Sdstaff * Setup an event channel to send zone status change notifications on
21551166Sdstaff */
21561166Sdstaff res = sysevent_evc_bind(ZONE_EVENT_CHANNEL, &zone_event_chan,
21571166Sdstaff EVCH_CREAT);
21581166Sdstaff
21591166Sdstaff if (res)
21601166Sdstaff panic("Sysevent_evc_bind failed during zone setup.\n");
21613247Sgjelinek
21620Sstevel@tonic-gate }
21630Sstevel@tonic-gate
21640Sstevel@tonic-gate static void
zone_free(zone_t * zone)21650Sstevel@tonic-gate zone_free(zone_t *zone)
21660Sstevel@tonic-gate {
21670Sstevel@tonic-gate ASSERT(zone != global_zone);
21680Sstevel@tonic-gate ASSERT(zone->zone_ntasks == 0);
21690Sstevel@tonic-gate ASSERT(zone->zone_nlwps == 0);
217012725SMenno.Lageman@Sun.COM ASSERT(zone->zone_nprocs == 0);
21710Sstevel@tonic-gate ASSERT(zone->zone_cred_ref == 0);
21720Sstevel@tonic-gate ASSERT(zone->zone_kcred == NULL);
21730Sstevel@tonic-gate ASSERT(zone_status_get(zone) == ZONE_IS_DEAD ||
21740Sstevel@tonic-gate zone_status_get(zone) == ZONE_IS_UNINITIALIZED);
2175*13096SJordan.Vaughan@Sun.com ASSERT(list_is_empty(&zone->zone_ref_list));
21760Sstevel@tonic-gate
21773792Sakolb /*
21783792Sakolb * Remove any zone caps.
21793792Sakolb */
21803792Sakolb cpucaps_zone_remove(zone);
21813792Sakolb
21823792Sakolb ASSERT(zone->zone_cpucap == NULL);
21833792Sakolb
21840Sstevel@tonic-gate /* remove from deathrow list */
21850Sstevel@tonic-gate if (zone_status_get(zone) == ZONE_IS_DEAD) {
21860Sstevel@tonic-gate ASSERT(zone->zone_ref == 0);
21870Sstevel@tonic-gate mutex_enter(&zone_deathrow_lock);
21880Sstevel@tonic-gate list_remove(&zone_deathrow, zone);
21890Sstevel@tonic-gate mutex_exit(&zone_deathrow_lock);
21900Sstevel@tonic-gate }
21910Sstevel@tonic-gate
2192*13096SJordan.Vaughan@Sun.com list_destroy(&zone->zone_ref_list);
21930Sstevel@tonic-gate zone_free_zsd(zone);
2194789Sahrens zone_free_datasets(zone);
219510616SSebastien.Roy@Sun.COM list_destroy(&zone->zone_dl_list);
21960Sstevel@tonic-gate
21970Sstevel@tonic-gate if (zone->zone_rootvp != NULL)
21980Sstevel@tonic-gate VN_RELE(zone->zone_rootvp);
21990Sstevel@tonic-gate if (zone->zone_rootpath)
22000Sstevel@tonic-gate kmem_free(zone->zone_rootpath, zone->zone_rootpathlen);
22010Sstevel@tonic-gate if (zone->zone_name != NULL)
22020Sstevel@tonic-gate kmem_free(zone->zone_name, ZONENAME_MAX);
22031676Sjpk if (zone->zone_slabel != NULL)
22041676Sjpk label_rele(zone->zone_slabel);
22050Sstevel@tonic-gate if (zone->zone_nodename != NULL)
22060Sstevel@tonic-gate kmem_free(zone->zone_nodename, _SYS_NMLN);
22070Sstevel@tonic-gate if (zone->zone_domain != NULL)
22080Sstevel@tonic-gate kmem_free(zone->zone_domain, _SYS_NMLN);
22090Sstevel@tonic-gate if (zone->zone_privset != NULL)
22100Sstevel@tonic-gate kmem_free(zone->zone_privset, sizeof (priv_set_t));
22110Sstevel@tonic-gate if (zone->zone_rctls != NULL)
22120Sstevel@tonic-gate rctl_set_free(zone->zone_rctls);
22130Sstevel@tonic-gate if (zone->zone_bootargs != NULL)
221412633Sjohn.levon@sun.com strfree(zone->zone_bootargs);
22152267Sdp if (zone->zone_initname != NULL)
221612633Sjohn.levon@sun.com strfree(zone->zone_initname);
221712633Sjohn.levon@sun.com if (zone->zone_fs_allowed != NULL)
221812633Sjohn.levon@sun.com strfree(zone->zone_fs_allowed);
221912273SCasper.Dik@Sun.COM if (zone->zone_pfexecd != NULL)
222012273SCasper.Dik@Sun.COM klpd_freelist(&zone->zone_pfexecd);
22210Sstevel@tonic-gate id_free(zoneid_space, zone->zone_id);
22220Sstevel@tonic-gate mutex_destroy(&zone->zone_lock);
22230Sstevel@tonic-gate cv_destroy(&zone->zone_cv);
22241676Sjpk rw_destroy(&zone->zone_mlps.mlpl_rwlock);
222510910SRobert.Harris@Sun.COM rw_destroy(&zone->zone_mntfs_db_lock);
22260Sstevel@tonic-gate kmem_free(zone, sizeof (zone_t));
22270Sstevel@tonic-gate }
22280Sstevel@tonic-gate
22290Sstevel@tonic-gate /*
22300Sstevel@tonic-gate * See block comment at the top of this file for information about zone
22310Sstevel@tonic-gate * status values.
22320Sstevel@tonic-gate */
22330Sstevel@tonic-gate /*
22340Sstevel@tonic-gate * Convenience function for setting zone status.
22350Sstevel@tonic-gate */
22360Sstevel@tonic-gate static void
zone_status_set(zone_t * zone,zone_status_t status)22370Sstevel@tonic-gate zone_status_set(zone_t *zone, zone_status_t status)
22380Sstevel@tonic-gate {
22391166Sdstaff
22401166Sdstaff nvlist_t *nvl = NULL;
22410Sstevel@tonic-gate ASSERT(MUTEX_HELD(&zone_status_lock));
22420Sstevel@tonic-gate ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE &&
22430Sstevel@tonic-gate status >= zone_status_get(zone));
22441166Sdstaff
22451166Sdstaff if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) ||
22461166Sdstaff nvlist_add_string(nvl, ZONE_CB_NAME, zone->zone_name) ||
22471166Sdstaff nvlist_add_string(nvl, ZONE_CB_NEWSTATE,
22482267Sdp zone_status_table[status]) ||
22491166Sdstaff nvlist_add_string(nvl, ZONE_CB_OLDSTATE,
22502267Sdp zone_status_table[zone->zone_status]) ||
22511166Sdstaff nvlist_add_int32(nvl, ZONE_CB_ZONEID, zone->zone_id) ||
22521166Sdstaff nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, (uint64_t)gethrtime()) ||
22531166Sdstaff sysevent_evc_publish(zone_event_chan, ZONE_EVENT_STATUS_CLASS,
22542267Sdp ZONE_EVENT_STATUS_SUBCLASS, "sun.com", "kernel", nvl, EVCH_SLEEP)) {
22551166Sdstaff #ifdef DEBUG
22561166Sdstaff (void) printf(
22571166Sdstaff "Failed to allocate and send zone state change event.\n");
22581166Sdstaff #endif
22591166Sdstaff }
22601166Sdstaff nvlist_free(nvl);
22611166Sdstaff
22620Sstevel@tonic-gate zone->zone_status = status;
22631166Sdstaff
22640Sstevel@tonic-gate cv_broadcast(&zone->zone_cv);
22650Sstevel@tonic-gate }
22660Sstevel@tonic-gate
22670Sstevel@tonic-gate /*
22680Sstevel@tonic-gate * Public function to retrieve the zone status. The zone status may
22690Sstevel@tonic-gate * change after it is retrieved.
22700Sstevel@tonic-gate */
22710Sstevel@tonic-gate zone_status_t
zone_status_get(zone_t * zone)22720Sstevel@tonic-gate zone_status_get(zone_t *zone)
22730Sstevel@tonic-gate {
22740Sstevel@tonic-gate return (zone->zone_status);
22750Sstevel@tonic-gate }
22760Sstevel@tonic-gate
22770Sstevel@tonic-gate static int
zone_set_bootargs(zone_t * zone,const char * zone_bootargs)22780Sstevel@tonic-gate zone_set_bootargs(zone_t *zone, const char *zone_bootargs)
22790Sstevel@tonic-gate {
228012633Sjohn.levon@sun.com char *buf = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
22812267Sdp int err = 0;
22822267Sdp
22832267Sdp ASSERT(zone != global_zone);
228412633Sjohn.levon@sun.com if ((err = copyinstr(zone_bootargs, buf, BOOTARGS_MAX, NULL)) != 0)
22852267Sdp goto done; /* EFAULT or ENAMETOOLONG */
22862267Sdp
22872267Sdp if (zone->zone_bootargs != NULL)
228812633Sjohn.levon@sun.com strfree(zone->zone_bootargs);
228912633Sjohn.levon@sun.com
229012633Sjohn.levon@sun.com zone->zone_bootargs = strdup(buf);
22912267Sdp
22922267Sdp done:
229312633Sjohn.levon@sun.com kmem_free(buf, BOOTARGS_MAX);
22942267Sdp return (err);
22952267Sdp }
22962267Sdp
22972267Sdp static int
zone_set_brand(zone_t * zone,const char * brand)22984141Sedp zone_set_brand(zone_t *zone, const char *brand)
22994141Sedp {
23004141Sedp struct brand_attr *attrp;
23014141Sedp brand_t *bp;
23024141Sedp
23034141Sedp attrp = kmem_alloc(sizeof (struct brand_attr), KM_SLEEP);
23044141Sedp if (copyin(brand, attrp, sizeof (struct brand_attr)) != 0) {
23054141Sedp kmem_free(attrp, sizeof (struct brand_attr));
23064141Sedp return (EFAULT);
23074141Sedp }
23084141Sedp
23094141Sedp bp = brand_register_zone(attrp);
23104141Sedp kmem_free(attrp, sizeof (struct brand_attr));
23114141Sedp if (bp == NULL)
23124141Sedp return (EINVAL);
23134141Sedp
23144141Sedp /*
23154141Sedp * This is the only place where a zone can change it's brand.
23164141Sedp * We already need to hold zone_status_lock to check the zone
23174141Sedp * status, so we'll just use that lock to serialize zone
23184141Sedp * branding requests as well.
23194141Sedp */
23204141Sedp mutex_enter(&zone_status_lock);
23214141Sedp
23224141Sedp /* Re-Branding is not allowed and the zone can't be booted yet */
23234141Sedp if ((ZONE_IS_BRANDED(zone)) ||
23244141Sedp (zone_status_get(zone) >= ZONE_IS_BOOTING)) {
23254141Sedp mutex_exit(&zone_status_lock);
23264141Sedp brand_unregister_zone(bp);
23274141Sedp return (EINVAL);
23284141Sedp }
23294141Sedp
23304888Seh208807 /* set up the brand specific data */
23314141Sedp zone->zone_brand = bp;
23324888Seh208807 ZBROP(zone)->b_init_brand_data(zone);
23334888Seh208807
23344141Sedp mutex_exit(&zone_status_lock);
23354141Sedp return (0);
23364141Sedp }
23374141Sedp
23384141Sedp static int
zone_set_fs_allowed(zone_t * zone,const char * zone_fs_allowed)233912633Sjohn.levon@sun.com zone_set_fs_allowed(zone_t *zone, const char *zone_fs_allowed)
234012633Sjohn.levon@sun.com {
234112633Sjohn.levon@sun.com char *buf = kmem_zalloc(ZONE_FS_ALLOWED_MAX, KM_SLEEP);
234212633Sjohn.levon@sun.com int err = 0;
234312633Sjohn.levon@sun.com
234412633Sjohn.levon@sun.com ASSERT(zone != global_zone);
234512633Sjohn.levon@sun.com if ((err = copyinstr(zone_fs_allowed, buf,
234612633Sjohn.levon@sun.com ZONE_FS_ALLOWED_MAX, NULL)) != 0)
234712633Sjohn.levon@sun.com goto done;
234812633Sjohn.levon@sun.com
234912633Sjohn.levon@sun.com if (zone->zone_fs_allowed != NULL)
235012633Sjohn.levon@sun.com strfree(zone->zone_fs_allowed);
235112633Sjohn.levon@sun.com
235212633Sjohn.levon@sun.com zone->zone_fs_allowed = strdup(buf);
235312633Sjohn.levon@sun.com
235412633Sjohn.levon@sun.com done:
235512633Sjohn.levon@sun.com kmem_free(buf, ZONE_FS_ALLOWED_MAX);
235612633Sjohn.levon@sun.com return (err);
235712633Sjohn.levon@sun.com }
235812633Sjohn.levon@sun.com
235912633Sjohn.levon@sun.com static int
zone_set_initname(zone_t * zone,const char * zone_initname)23602267Sdp zone_set_initname(zone_t *zone, const char *zone_initname)
23612267Sdp {
23622267Sdp char initname[INITNAME_SZ];
23630Sstevel@tonic-gate size_t len;
23642267Sdp int err = 0;
23652267Sdp
23662267Sdp ASSERT(zone != global_zone);
23672267Sdp if ((err = copyinstr(zone_initname, initname, INITNAME_SZ, &len)) != 0)
23680Sstevel@tonic-gate return (err); /* EFAULT or ENAMETOOLONG */
23692267Sdp
23702267Sdp if (zone->zone_initname != NULL)
237112633Sjohn.levon@sun.com strfree(zone->zone_initname);
23722267Sdp
23732267Sdp zone->zone_initname = kmem_alloc(strlen(initname) + 1, KM_SLEEP);
23742267Sdp (void) strcpy(zone->zone_initname, initname);
23750Sstevel@tonic-gate return (0);
23760Sstevel@tonic-gate }
23770Sstevel@tonic-gate
23783247Sgjelinek static int
zone_set_phys_mcap(zone_t * zone,const uint64_t * zone_mcap)23793247Sgjelinek zone_set_phys_mcap(zone_t *zone, const uint64_t *zone_mcap)
23803247Sgjelinek {
23813247Sgjelinek uint64_t mcap;
23823247Sgjelinek int err = 0;
23833247Sgjelinek
23843247Sgjelinek if ((err = copyin(zone_mcap, &mcap, sizeof (uint64_t))) == 0)
23853247Sgjelinek zone->zone_phys_mcap = mcap;
23863247Sgjelinek
23873247Sgjelinek return (err);
23883247Sgjelinek }
23893247Sgjelinek
23903247Sgjelinek static int
zone_set_sched_class(zone_t * zone,const char * new_class)23913247Sgjelinek zone_set_sched_class(zone_t *zone, const char *new_class)
23923247Sgjelinek {
23933247Sgjelinek char sched_class[PC_CLNMSZ];
23943247Sgjelinek id_t classid;
23953247Sgjelinek int err;
23963247Sgjelinek
23973247Sgjelinek ASSERT(zone != global_zone);
23983247Sgjelinek if ((err = copyinstr(new_class, sched_class, PC_CLNMSZ, NULL)) != 0)
23993247Sgjelinek return (err); /* EFAULT or ENAMETOOLONG */
24003247Sgjelinek
240111173SJonathan.Adams@Sun.COM if (getcid(sched_class, &classid) != 0 || CLASS_KERNEL(classid))
24023247Sgjelinek return (set_errno(EINVAL));
24033247Sgjelinek zone->zone_defaultcid = classid;
24043247Sgjelinek ASSERT(zone->zone_defaultcid > 0 &&
24053247Sgjelinek zone->zone_defaultcid < loaded_classes);
24063247Sgjelinek
24073247Sgjelinek return (0);
24083247Sgjelinek }
24093247Sgjelinek
24100Sstevel@tonic-gate /*
24110Sstevel@tonic-gate * Block indefinitely waiting for (zone_status >= status)
24120Sstevel@tonic-gate */
24130Sstevel@tonic-gate void
zone_status_wait(zone_t * zone,zone_status_t status)24140Sstevel@tonic-gate zone_status_wait(zone_t *zone, zone_status_t status)
24150Sstevel@tonic-gate {
24160Sstevel@tonic-gate ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
24170Sstevel@tonic-gate
24180Sstevel@tonic-gate mutex_enter(&zone_status_lock);
24190Sstevel@tonic-gate while (zone->zone_status < status) {
24200Sstevel@tonic-gate cv_wait(&zone->zone_cv, &zone_status_lock);
24210Sstevel@tonic-gate }
24220Sstevel@tonic-gate mutex_exit(&zone_status_lock);
24230Sstevel@tonic-gate }
24240Sstevel@tonic-gate
24250Sstevel@tonic-gate /*
24260Sstevel@tonic-gate * Private CPR-safe version of zone_status_wait().
24270Sstevel@tonic-gate */
24280Sstevel@tonic-gate static void
zone_status_wait_cpr(zone_t * zone,zone_status_t status,char * str)24290Sstevel@tonic-gate zone_status_wait_cpr(zone_t *zone, zone_status_t status, char *str)
24300Sstevel@tonic-gate {
24310Sstevel@tonic-gate callb_cpr_t cprinfo;
24320Sstevel@tonic-gate
24330Sstevel@tonic-gate ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
24340Sstevel@tonic-gate
24350Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &zone_status_lock, callb_generic_cpr,
24360Sstevel@tonic-gate str);
24370Sstevel@tonic-gate mutex_enter(&zone_status_lock);
24380Sstevel@tonic-gate while (zone->zone_status < status) {
24390Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
24400Sstevel@tonic-gate cv_wait(&zone->zone_cv, &zone_status_lock);
24410Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &zone_status_lock);
24420Sstevel@tonic-gate }
24430Sstevel@tonic-gate /*
24440Sstevel@tonic-gate * zone_status_lock is implicitly released by the following.
24450Sstevel@tonic-gate */
24460Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
24470Sstevel@tonic-gate }
24480Sstevel@tonic-gate
24490Sstevel@tonic-gate /*
24500Sstevel@tonic-gate * Block until zone enters requested state or signal is received. Return (0)
24510Sstevel@tonic-gate * if signaled, non-zero otherwise.
24520Sstevel@tonic-gate */
24530Sstevel@tonic-gate int
zone_status_wait_sig(zone_t * zone,zone_status_t status)24540Sstevel@tonic-gate zone_status_wait_sig(zone_t *zone, zone_status_t status)
24550Sstevel@tonic-gate {
24560Sstevel@tonic-gate ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
24570Sstevel@tonic-gate
24580Sstevel@tonic-gate mutex_enter(&zone_status_lock);
24590Sstevel@tonic-gate while (zone->zone_status < status) {
24600Sstevel@tonic-gate if (!cv_wait_sig(&zone->zone_cv, &zone_status_lock)) {
24610Sstevel@tonic-gate mutex_exit(&zone_status_lock);
24620Sstevel@tonic-gate return (0);
24630Sstevel@tonic-gate }
24640Sstevel@tonic-gate }
24650Sstevel@tonic-gate mutex_exit(&zone_status_lock);
24660Sstevel@tonic-gate return (1);
24670Sstevel@tonic-gate }
24680Sstevel@tonic-gate
24690Sstevel@tonic-gate /*
24700Sstevel@tonic-gate * Block until the zone enters the requested state or the timeout expires,
24710Sstevel@tonic-gate * whichever happens first. Return (-1) if operation timed out, time remaining
24720Sstevel@tonic-gate * otherwise.
24730Sstevel@tonic-gate */
24740Sstevel@tonic-gate clock_t
zone_status_timedwait(zone_t * zone,clock_t tim,zone_status_t status)24750Sstevel@tonic-gate zone_status_timedwait(zone_t *zone, clock_t tim, zone_status_t status)
24760Sstevel@tonic-gate {
24770Sstevel@tonic-gate clock_t timeleft = 0;
24780Sstevel@tonic-gate
24790Sstevel@tonic-gate ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
24800Sstevel@tonic-gate
24810Sstevel@tonic-gate mutex_enter(&zone_status_lock);
24820Sstevel@tonic-gate while (zone->zone_status < status && timeleft != -1) {
24830Sstevel@tonic-gate timeleft = cv_timedwait(&zone->zone_cv, &zone_status_lock, tim);
24840Sstevel@tonic-gate }
24850Sstevel@tonic-gate mutex_exit(&zone_status_lock);
24860Sstevel@tonic-gate return (timeleft);
24870Sstevel@tonic-gate }
24880Sstevel@tonic-gate
24890Sstevel@tonic-gate /*
24900Sstevel@tonic-gate * Block until the zone enters the requested state, the current process is
24910Sstevel@tonic-gate * signaled, or the timeout expires, whichever happens first. Return (-1) if
24920Sstevel@tonic-gate * operation timed out, 0 if signaled, time remaining otherwise.
24930Sstevel@tonic-gate */
24940Sstevel@tonic-gate clock_t
zone_status_timedwait_sig(zone_t * zone,clock_t tim,zone_status_t status)24950Sstevel@tonic-gate zone_status_timedwait_sig(zone_t *zone, clock_t tim, zone_status_t status)
24960Sstevel@tonic-gate {
249711066Srafael.vanoni@sun.com clock_t timeleft = tim - ddi_get_lbolt();
24980Sstevel@tonic-gate
24990Sstevel@tonic-gate ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
25000Sstevel@tonic-gate
25010Sstevel@tonic-gate mutex_enter(&zone_status_lock);
25020Sstevel@tonic-gate while (zone->zone_status < status) {
25030Sstevel@tonic-gate timeleft = cv_timedwait_sig(&zone->zone_cv, &zone_status_lock,
25040Sstevel@tonic-gate tim);
25050Sstevel@tonic-gate if (timeleft <= 0)
25060Sstevel@tonic-gate break;
25070Sstevel@tonic-gate }
25080Sstevel@tonic-gate mutex_exit(&zone_status_lock);
25090Sstevel@tonic-gate return (timeleft);
25100Sstevel@tonic-gate }
25110Sstevel@tonic-gate
25120Sstevel@tonic-gate /*
25130Sstevel@tonic-gate * Zones have two reference counts: one for references from credential
25140Sstevel@tonic-gate * structures (zone_cred_ref), and one (zone_ref) for everything else.
25150Sstevel@tonic-gate * This is so we can allow a zone to be rebooted while there are still
25160Sstevel@tonic-gate * outstanding cred references, since certain drivers cache dblks (which
25170Sstevel@tonic-gate * implicitly results in cached creds). We wait for zone_ref to drop to
25180Sstevel@tonic-gate * 0 (actually 1), but not zone_cred_ref. The zone structure itself is
25190Sstevel@tonic-gate * later freed when the zone_cred_ref drops to 0, though nothing other
25200Sstevel@tonic-gate * than the zone id and privilege set should be accessed once the zone
25210Sstevel@tonic-gate * is "dead".
25220Sstevel@tonic-gate *
25230Sstevel@tonic-gate * A debugging flag, zone_wait_for_cred, can be set to a non-zero value
25240Sstevel@tonic-gate * to force halt/reboot to block waiting for the zone_cred_ref to drop
25250Sstevel@tonic-gate * to 0. This can be useful to flush out other sources of cached creds
25260Sstevel@tonic-gate * that may be less innocuous than the driver case.
2527*13096SJordan.Vaughan@Sun.com *
2528*13096SJordan.Vaughan@Sun.com * Zones also provide a tracked reference counting mechanism in which zone
2529*13096SJordan.Vaughan@Sun.com * references are represented by "crumbs" (zone_ref structures). Crumbs help
2530*13096SJordan.Vaughan@Sun.com * debuggers determine the sources of leaked zone references. See
2531*13096SJordan.Vaughan@Sun.com * zone_hold_ref() and zone_rele_ref() below for more information.
25320Sstevel@tonic-gate */
25330Sstevel@tonic-gate
25340Sstevel@tonic-gate int zone_wait_for_cred = 0;
25350Sstevel@tonic-gate
25360Sstevel@tonic-gate static void
zone_hold_locked(zone_t * z)25370Sstevel@tonic-gate zone_hold_locked(zone_t *z)
25380Sstevel@tonic-gate {
25390Sstevel@tonic-gate ASSERT(MUTEX_HELD(&z->zone_lock));
25400Sstevel@tonic-gate z->zone_ref++;
25410Sstevel@tonic-gate ASSERT(z->zone_ref != 0);
25420Sstevel@tonic-gate }
25430Sstevel@tonic-gate
2544*13096SJordan.Vaughan@Sun.com /*
2545*13096SJordan.Vaughan@Sun.com * Increment the specified zone's reference count. The zone's zone_t structure
2546*13096SJordan.Vaughan@Sun.com * will not be freed as long as the zone's reference count is nonzero.
2547*13096SJordan.Vaughan@Sun.com * Decrement the zone's reference count via zone_rele().
2548*13096SJordan.Vaughan@Sun.com *
2549*13096SJordan.Vaughan@Sun.com * NOTE: This function should only be used to hold zones for short periods of
2550*13096SJordan.Vaughan@Sun.com * time. Use zone_hold_ref() if the zone must be held for a long time.
2551*13096SJordan.Vaughan@Sun.com */
25520Sstevel@tonic-gate void
zone_hold(zone_t * z)25530Sstevel@tonic-gate zone_hold(zone_t *z)
25540Sstevel@tonic-gate {
25550Sstevel@tonic-gate mutex_enter(&z->zone_lock);
25560Sstevel@tonic-gate zone_hold_locked(z);
25570Sstevel@tonic-gate mutex_exit(&z->zone_lock);
25580Sstevel@tonic-gate }
25590Sstevel@tonic-gate
25600Sstevel@tonic-gate /*
25610Sstevel@tonic-gate * If the non-cred ref count drops to 1 and either the cred ref count
25620Sstevel@tonic-gate * is 0 or we aren't waiting for cred references, the zone is ready to
25630Sstevel@tonic-gate * be destroyed.
25640Sstevel@tonic-gate */
25650Sstevel@tonic-gate #define ZONE_IS_UNREF(zone) ((zone)->zone_ref == 1 && \
25660Sstevel@tonic-gate (!zone_wait_for_cred || (zone)->zone_cred_ref == 0))
25670Sstevel@tonic-gate
2568*13096SJordan.Vaughan@Sun.com /*
2569*13096SJordan.Vaughan@Sun.com * Common zone reference release function invoked by zone_rele() and
2570*13096SJordan.Vaughan@Sun.com * zone_rele_ref(). If subsys is ZONE_REF_NUM_SUBSYS, then the specified
2571*13096SJordan.Vaughan@Sun.com * zone's subsystem-specific reference counters are not affected by the
2572*13096SJordan.Vaughan@Sun.com * release. If ref is not NULL, then the zone_ref_t to which it refers is
2573*13096SJordan.Vaughan@Sun.com * removed from the specified zone's reference list. ref must be non-NULL iff
2574*13096SJordan.Vaughan@Sun.com * subsys is not ZONE_REF_NUM_SUBSYS.
2575*13096SJordan.Vaughan@Sun.com */
2576*13096SJordan.Vaughan@Sun.com static void
zone_rele_common(zone_t * z,zone_ref_t * ref,zone_ref_subsys_t subsys)2577*13096SJordan.Vaughan@Sun.com zone_rele_common(zone_t *z, zone_ref_t *ref, zone_ref_subsys_t subsys)
25780Sstevel@tonic-gate {
25790Sstevel@tonic-gate boolean_t wakeup;
25800Sstevel@tonic-gate
25810Sstevel@tonic-gate mutex_enter(&z->zone_lock);
25820Sstevel@tonic-gate ASSERT(z->zone_ref != 0);
25830Sstevel@tonic-gate z->zone_ref--;
2584*13096SJordan.Vaughan@Sun.com if (subsys != ZONE_REF_NUM_SUBSYS) {
2585*13096SJordan.Vaughan@Sun.com ASSERT(z->zone_subsys_ref[subsys] != 0);
2586*13096SJordan.Vaughan@Sun.com z->zone_subsys_ref[subsys]--;
2587*13096SJordan.Vaughan@Sun.com list_remove(&z->zone_ref_list, ref);
2588*13096SJordan.Vaughan@Sun.com }
25890Sstevel@tonic-gate if (z->zone_ref == 0 && z->zone_cred_ref == 0) {
25900Sstevel@tonic-gate /* no more refs, free the structure */
25910Sstevel@tonic-gate mutex_exit(&z->zone_lock);
25920Sstevel@tonic-gate zone_free(z);
25930Sstevel@tonic-gate return;
25940Sstevel@tonic-gate }
25950Sstevel@tonic-gate /* signal zone_destroy so the zone can finish halting */
25960Sstevel@tonic-gate wakeup = (ZONE_IS_UNREF(z) && zone_status_get(z) >= ZONE_IS_DEAD);
25970Sstevel@tonic-gate mutex_exit(&z->zone_lock);
25980Sstevel@tonic-gate
25990Sstevel@tonic-gate if (wakeup) {
26000Sstevel@tonic-gate /*
26010Sstevel@tonic-gate * Grabbing zonehash_lock here effectively synchronizes with
26020Sstevel@tonic-gate * zone_destroy() to avoid missed signals.
26030Sstevel@tonic-gate */
26040Sstevel@tonic-gate mutex_enter(&zonehash_lock);
26050Sstevel@tonic-gate cv_broadcast(&zone_destroy_cv);
26060Sstevel@tonic-gate mutex_exit(&zonehash_lock);
26070Sstevel@tonic-gate }
26080Sstevel@tonic-gate }
26090Sstevel@tonic-gate
2610*13096SJordan.Vaughan@Sun.com /*
2611*13096SJordan.Vaughan@Sun.com * Decrement the specified zone's reference count. The specified zone will
2612*13096SJordan.Vaughan@Sun.com * cease to exist after this function returns if the reference count drops to
2613*13096SJordan.Vaughan@Sun.com * zero. This function should be paired with zone_hold().
2614*13096SJordan.Vaughan@Sun.com */
2615*13096SJordan.Vaughan@Sun.com void
zone_rele(zone_t * z)2616*13096SJordan.Vaughan@Sun.com zone_rele(zone_t *z)
2617*13096SJordan.Vaughan@Sun.com {
2618*13096SJordan.Vaughan@Sun.com zone_rele_common(z, NULL, ZONE_REF_NUM_SUBSYS);
2619*13096SJordan.Vaughan@Sun.com }
2620*13096SJordan.Vaughan@Sun.com
2621*13096SJordan.Vaughan@Sun.com /*
2622*13096SJordan.Vaughan@Sun.com * Initialize a zone reference structure. This function must be invoked for
2623*13096SJordan.Vaughan@Sun.com * a reference structure before the structure is passed to zone_hold_ref().
2624*13096SJordan.Vaughan@Sun.com */
2625*13096SJordan.Vaughan@Sun.com void
zone_init_ref(zone_ref_t * ref)2626*13096SJordan.Vaughan@Sun.com zone_init_ref(zone_ref_t *ref)
2627*13096SJordan.Vaughan@Sun.com {
2628*13096SJordan.Vaughan@Sun.com ref->zref_zone = NULL;
2629*13096SJordan.Vaughan@Sun.com list_link_init(&ref->zref_linkage);
2630*13096SJordan.Vaughan@Sun.com }
2631*13096SJordan.Vaughan@Sun.com
2632*13096SJordan.Vaughan@Sun.com /*
2633*13096SJordan.Vaughan@Sun.com * Acquire a reference to zone z. The caller must specify the
2634*13096SJordan.Vaughan@Sun.com * zone_ref_subsys_t constant associated with its subsystem. The specified
2635*13096SJordan.Vaughan@Sun.com * zone_ref_t structure will represent a reference to the specified zone. Use
2636*13096SJordan.Vaughan@Sun.com * zone_rele_ref() to release the reference.
2637*13096SJordan.Vaughan@Sun.com *
2638*13096SJordan.Vaughan@Sun.com * The referenced zone_t structure will not be freed as long as the zone_t's
2639*13096SJordan.Vaughan@Sun.com * zone_status field is not ZONE_IS_DEAD and the zone has outstanding
2640*13096SJordan.Vaughan@Sun.com * references.
2641*13096SJordan.Vaughan@Sun.com *
2642*13096SJordan.Vaughan@Sun.com * NOTE: The zone_ref_t structure must be initialized before it is used.
2643*13096SJordan.Vaughan@Sun.com * See zone_init_ref() above.
2644*13096SJordan.Vaughan@Sun.com */
2645*13096SJordan.Vaughan@Sun.com void
zone_hold_ref(zone_t * z,zone_ref_t * ref,zone_ref_subsys_t subsys)2646*13096SJordan.Vaughan@Sun.com zone_hold_ref(zone_t *z, zone_ref_t *ref, zone_ref_subsys_t subsys)
2647*13096SJordan.Vaughan@Sun.com {
2648*13096SJordan.Vaughan@Sun.com ASSERT(subsys >= 0 && subsys < ZONE_REF_NUM_SUBSYS);
2649*13096SJordan.Vaughan@Sun.com
2650*13096SJordan.Vaughan@Sun.com /*
2651*13096SJordan.Vaughan@Sun.com * Prevent consumers from reusing a reference structure before
2652*13096SJordan.Vaughan@Sun.com * releasing it.
2653*13096SJordan.Vaughan@Sun.com */
2654*13096SJordan.Vaughan@Sun.com VERIFY(ref->zref_zone == NULL);
2655*13096SJordan.Vaughan@Sun.com
2656*13096SJordan.Vaughan@Sun.com ref->zref_zone = z;
2657*13096SJordan.Vaughan@Sun.com mutex_enter(&z->zone_lock);
2658*13096SJordan.Vaughan@Sun.com zone_hold_locked(z);
2659*13096SJordan.Vaughan@Sun.com z->zone_subsys_ref[subsys]++;
2660*13096SJordan.Vaughan@Sun.com ASSERT(z->zone_subsys_ref[subsys] != 0);
2661*13096SJordan.Vaughan@Sun.com list_insert_head(&z->zone_ref_list, ref);
2662*13096SJordan.Vaughan@Sun.com mutex_exit(&z->zone_lock);
2663*13096SJordan.Vaughan@Sun.com }
2664*13096SJordan.Vaughan@Sun.com
2665*13096SJordan.Vaughan@Sun.com /*
2666*13096SJordan.Vaughan@Sun.com * Release the zone reference represented by the specified zone_ref_t.
2667*13096SJordan.Vaughan@Sun.com * The reference is invalid after it's released; however, the zone_ref_t
2668*13096SJordan.Vaughan@Sun.com * structure can be reused without having to invoke zone_init_ref().
2669*13096SJordan.Vaughan@Sun.com * subsys should be the same value that was passed to zone_hold_ref()
2670*13096SJordan.Vaughan@Sun.com * when the reference was acquired.
2671*13096SJordan.Vaughan@Sun.com */
2672*13096SJordan.Vaughan@Sun.com void
zone_rele_ref(zone_ref_t * ref,zone_ref_subsys_t subsys)2673*13096SJordan.Vaughan@Sun.com zone_rele_ref(zone_ref_t *ref, zone_ref_subsys_t subsys)
2674*13096SJordan.Vaughan@Sun.com {
2675*13096SJordan.Vaughan@Sun.com zone_rele_common(ref->zref_zone, ref, subsys);
2676*13096SJordan.Vaughan@Sun.com
2677*13096SJordan.Vaughan@Sun.com /*
2678*13096SJordan.Vaughan@Sun.com * Set the zone_ref_t's zref_zone field to NULL to generate panics
2679*13096SJordan.Vaughan@Sun.com * when consumers dereference the reference. This helps us catch
2680*13096SJordan.Vaughan@Sun.com * consumers who use released references. Furthermore, this lets
2681*13096SJordan.Vaughan@Sun.com * consumers reuse the zone_ref_t structure without having to
2682*13096SJordan.Vaughan@Sun.com * invoke zone_init_ref().
2683*13096SJordan.Vaughan@Sun.com */
2684*13096SJordan.Vaughan@Sun.com ref->zref_zone = NULL;
2685*13096SJordan.Vaughan@Sun.com }
2686*13096SJordan.Vaughan@Sun.com
26870Sstevel@tonic-gate void
zone_cred_hold(zone_t * z)26880Sstevel@tonic-gate zone_cred_hold(zone_t *z)
26890Sstevel@tonic-gate {
26900Sstevel@tonic-gate mutex_enter(&z->zone_lock);
26910Sstevel@tonic-gate z->zone_cred_ref++;
26920Sstevel@tonic-gate ASSERT(z->zone_cred_ref != 0);
26930Sstevel@tonic-gate mutex_exit(&z->zone_lock);
26940Sstevel@tonic-gate }
26950Sstevel@tonic-gate
26960Sstevel@tonic-gate void
zone_cred_rele(zone_t * z)26970Sstevel@tonic-gate zone_cred_rele(zone_t *z)
26980Sstevel@tonic-gate {
26990Sstevel@tonic-gate boolean_t wakeup;
27000Sstevel@tonic-gate
27010Sstevel@tonic-gate mutex_enter(&z->zone_lock);
27020Sstevel@tonic-gate ASSERT(z->zone_cred_ref != 0);
27030Sstevel@tonic-gate z->zone_cred_ref--;
27040Sstevel@tonic-gate if (z->zone_ref == 0 && z->zone_cred_ref == 0) {
27050Sstevel@tonic-gate /* no more refs, free the structure */
27060Sstevel@tonic-gate mutex_exit(&z->zone_lock);
27070Sstevel@tonic-gate zone_free(z);
27080Sstevel@tonic-gate return;
27090Sstevel@tonic-gate }
27100Sstevel@tonic-gate /*
27110Sstevel@tonic-gate * If zone_destroy is waiting for the cred references to drain
27120Sstevel@tonic-gate * out, and they have, signal it.
27130Sstevel@tonic-gate */
27140Sstevel@tonic-gate wakeup = (zone_wait_for_cred && ZONE_IS_UNREF(z) &&
27150Sstevel@tonic-gate zone_status_get(z) >= ZONE_IS_DEAD);
27160Sstevel@tonic-gate mutex_exit(&z->zone_lock);
27170Sstevel@tonic-gate
27180Sstevel@tonic-gate if (wakeup) {
27190Sstevel@tonic-gate /*
27200Sstevel@tonic-gate * Grabbing zonehash_lock here effectively synchronizes with
27210Sstevel@tonic-gate * zone_destroy() to avoid missed signals.
27220Sstevel@tonic-gate */
27230Sstevel@tonic-gate mutex_enter(&zonehash_lock);
27240Sstevel@tonic-gate cv_broadcast(&zone_destroy_cv);
27250Sstevel@tonic-gate mutex_exit(&zonehash_lock);
27260Sstevel@tonic-gate }
27270Sstevel@tonic-gate }
27280Sstevel@tonic-gate
27290Sstevel@tonic-gate void
zone_task_hold(zone_t * z)27300Sstevel@tonic-gate zone_task_hold(zone_t *z)
27310Sstevel@tonic-gate {
27320Sstevel@tonic-gate mutex_enter(&z->zone_lock);
27330Sstevel@tonic-gate z->zone_ntasks++;
27340Sstevel@tonic-gate ASSERT(z->zone_ntasks != 0);
27350Sstevel@tonic-gate mutex_exit(&z->zone_lock);
27360Sstevel@tonic-gate }
27370Sstevel@tonic-gate
27380Sstevel@tonic-gate void
zone_task_rele(zone_t * zone)27390Sstevel@tonic-gate zone_task_rele(zone_t *zone)
27400Sstevel@tonic-gate {
27410Sstevel@tonic-gate uint_t refcnt;
27420Sstevel@tonic-gate
27430Sstevel@tonic-gate mutex_enter(&zone->zone_lock);
27440Sstevel@tonic-gate ASSERT(zone->zone_ntasks != 0);
27450Sstevel@tonic-gate refcnt = --zone->zone_ntasks;
27460Sstevel@tonic-gate if (refcnt > 1) { /* Common case */
27470Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
27480Sstevel@tonic-gate return;
27490Sstevel@tonic-gate }
27500Sstevel@tonic-gate zone_hold_locked(zone); /* so we can use the zone_t later */
27510Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
27520Sstevel@tonic-gate if (refcnt == 1) {
27530Sstevel@tonic-gate /*
27540Sstevel@tonic-gate * See if the zone is shutting down.
27550Sstevel@tonic-gate */
27560Sstevel@tonic-gate mutex_enter(&zone_status_lock);
27570Sstevel@tonic-gate if (zone_status_get(zone) != ZONE_IS_SHUTTING_DOWN) {
27580Sstevel@tonic-gate goto out;
27590Sstevel@tonic-gate }
27600Sstevel@tonic-gate
27610Sstevel@tonic-gate /*
27620Sstevel@tonic-gate * Make sure the ntasks didn't change since we
27630Sstevel@tonic-gate * dropped zone_lock.
27640Sstevel@tonic-gate */
27650Sstevel@tonic-gate mutex_enter(&zone->zone_lock);
27660Sstevel@tonic-gate if (refcnt != zone->zone_ntasks) {
27670Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
27680Sstevel@tonic-gate goto out;
27690Sstevel@tonic-gate }
27700Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
27710Sstevel@tonic-gate
27720Sstevel@tonic-gate /*
27730Sstevel@tonic-gate * No more user processes in the zone. The zone is empty.
27740Sstevel@tonic-gate */
27750Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_EMPTY);
27760Sstevel@tonic-gate goto out;
27770Sstevel@tonic-gate }
27780Sstevel@tonic-gate
27790Sstevel@tonic-gate ASSERT(refcnt == 0);
27800Sstevel@tonic-gate /*
27810Sstevel@tonic-gate * zsched has exited; the zone is dead.
27820Sstevel@tonic-gate */
27830Sstevel@tonic-gate zone->zone_zsched = NULL; /* paranoia */
27840Sstevel@tonic-gate mutex_enter(&zone_status_lock);
27850Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_DEAD);
27860Sstevel@tonic-gate out:
27870Sstevel@tonic-gate mutex_exit(&zone_status_lock);
27880Sstevel@tonic-gate zone_rele(zone);
27890Sstevel@tonic-gate }
27900Sstevel@tonic-gate
27910Sstevel@tonic-gate zoneid_t
getzoneid(void)27920Sstevel@tonic-gate getzoneid(void)
27930Sstevel@tonic-gate {
27940Sstevel@tonic-gate return (curproc->p_zone->zone_id);
27950Sstevel@tonic-gate }
27960Sstevel@tonic-gate
27970Sstevel@tonic-gate /*
27980Sstevel@tonic-gate * Internal versions of zone_find_by_*(). These don't zone_hold() or
27990Sstevel@tonic-gate * check the validity of a zone's state.
28000Sstevel@tonic-gate */
28010Sstevel@tonic-gate static zone_t *
zone_find_all_by_id(zoneid_t zoneid)28020Sstevel@tonic-gate zone_find_all_by_id(zoneid_t zoneid)
28030Sstevel@tonic-gate {
28040Sstevel@tonic-gate mod_hash_val_t hv;
28050Sstevel@tonic-gate zone_t *zone = NULL;
28060Sstevel@tonic-gate
28070Sstevel@tonic-gate ASSERT(MUTEX_HELD(&zonehash_lock));
28080Sstevel@tonic-gate
28090Sstevel@tonic-gate if (mod_hash_find(zonehashbyid,
28100Sstevel@tonic-gate (mod_hash_key_t)(uintptr_t)zoneid, &hv) == 0)
28110Sstevel@tonic-gate zone = (zone_t *)hv;
28120Sstevel@tonic-gate return (zone);
28130Sstevel@tonic-gate }
28140Sstevel@tonic-gate
28150Sstevel@tonic-gate static zone_t *
zone_find_all_by_label(const ts_label_t * label)28161676Sjpk zone_find_all_by_label(const ts_label_t *label)
28171676Sjpk {
28181676Sjpk mod_hash_val_t hv;
28191676Sjpk zone_t *zone = NULL;
28201676Sjpk
28211676Sjpk ASSERT(MUTEX_HELD(&zonehash_lock));
28221676Sjpk
28231676Sjpk /*
28241676Sjpk * zonehashbylabel is not maintained for unlabeled systems
28251676Sjpk */
28261676Sjpk if (!is_system_labeled())
28271676Sjpk return (NULL);
28281676Sjpk if (mod_hash_find(zonehashbylabel, (mod_hash_key_t)label, &hv) == 0)
28291676Sjpk zone = (zone_t *)hv;
28301676Sjpk return (zone);
28311676Sjpk }
28321676Sjpk
28331676Sjpk static zone_t *
zone_find_all_by_name(char * name)28340Sstevel@tonic-gate zone_find_all_by_name(char *name)
28350Sstevel@tonic-gate {
28360Sstevel@tonic-gate mod_hash_val_t hv;
28370Sstevel@tonic-gate zone_t *zone = NULL;
28380Sstevel@tonic-gate
28390Sstevel@tonic-gate ASSERT(MUTEX_HELD(&zonehash_lock));
28400Sstevel@tonic-gate
28410Sstevel@tonic-gate if (mod_hash_find(zonehashbyname, (mod_hash_key_t)name, &hv) == 0)
28420Sstevel@tonic-gate zone = (zone_t *)hv;
28430Sstevel@tonic-gate return (zone);
28440Sstevel@tonic-gate }
28450Sstevel@tonic-gate
28460Sstevel@tonic-gate /*
28470Sstevel@tonic-gate * Public interface for looking up a zone by zoneid. Only returns the zone if
28480Sstevel@tonic-gate * it is fully initialized, and has not yet begun the zone_destroy() sequence.
28490Sstevel@tonic-gate * Caller must call zone_rele() once it is done with the zone.
28500Sstevel@tonic-gate *
28510Sstevel@tonic-gate * The zone may begin the zone_destroy() sequence immediately after this
28520Sstevel@tonic-gate * function returns, but may be safely used until zone_rele() is called.
28530Sstevel@tonic-gate */
28540Sstevel@tonic-gate zone_t *
zone_find_by_id(zoneid_t zoneid)28550Sstevel@tonic-gate zone_find_by_id(zoneid_t zoneid)
28560Sstevel@tonic-gate {
28570Sstevel@tonic-gate zone_t *zone;
28580Sstevel@tonic-gate zone_status_t status;
28590Sstevel@tonic-gate
28600Sstevel@tonic-gate mutex_enter(&zonehash_lock);
28610Sstevel@tonic-gate if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
28620Sstevel@tonic-gate mutex_exit(&zonehash_lock);
28630Sstevel@tonic-gate return (NULL);
28640Sstevel@tonic-gate }
28650Sstevel@tonic-gate status = zone_status_get(zone);
28660Sstevel@tonic-gate if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) {
28670Sstevel@tonic-gate /*
28680Sstevel@tonic-gate * For all practical purposes the zone doesn't exist.
28690Sstevel@tonic-gate */
28700Sstevel@tonic-gate mutex_exit(&zonehash_lock);
28710Sstevel@tonic-gate return (NULL);
28720Sstevel@tonic-gate }
28730Sstevel@tonic-gate zone_hold(zone);
28740Sstevel@tonic-gate mutex_exit(&zonehash_lock);
28750Sstevel@tonic-gate return (zone);
28760Sstevel@tonic-gate }
28770Sstevel@tonic-gate
28780Sstevel@tonic-gate /*
28791676Sjpk * Similar to zone_find_by_id, but using zone label as the key.
28801676Sjpk */
28811676Sjpk zone_t *
zone_find_by_label(const ts_label_t * label)28821676Sjpk zone_find_by_label(const ts_label_t *label)
28831676Sjpk {
28841676Sjpk zone_t *zone;
28852110Srica zone_status_t status;
28861676Sjpk
28871676Sjpk mutex_enter(&zonehash_lock);
28881676Sjpk if ((zone = zone_find_all_by_label(label)) == NULL) {
28891676Sjpk mutex_exit(&zonehash_lock);
28901676Sjpk return (NULL);
28911676Sjpk }
28922110Srica
28932110Srica status = zone_status_get(zone);
28942110Srica if (status > ZONE_IS_DOWN) {
28951676Sjpk /*
28961676Sjpk * For all practical purposes the zone doesn't exist.
28971676Sjpk */
28982110Srica mutex_exit(&zonehash_lock);
28992110Srica return (NULL);
29001676Sjpk }
29012110Srica zone_hold(zone);
29021676Sjpk mutex_exit(&zonehash_lock);
29031676Sjpk return (zone);
29041676Sjpk }
29051676Sjpk
29061676Sjpk /*
29070Sstevel@tonic-gate * Similar to zone_find_by_id, but using zone name as the key.
29080Sstevel@tonic-gate */
29090Sstevel@tonic-gate zone_t *
zone_find_by_name(char * name)29100Sstevel@tonic-gate zone_find_by_name(char *name)
29110Sstevel@tonic-gate {
29120Sstevel@tonic-gate zone_t *zone;
29130Sstevel@tonic-gate zone_status_t status;
29140Sstevel@tonic-gate
29150Sstevel@tonic-gate mutex_enter(&zonehash_lock);
29160Sstevel@tonic-gate if ((zone = zone_find_all_by_name(name)) == NULL) {
29170Sstevel@tonic-gate mutex_exit(&zonehash_lock);
29180Sstevel@tonic-gate return (NULL);
29190Sstevel@tonic-gate }
29200Sstevel@tonic-gate status = zone_status_get(zone);
29210Sstevel@tonic-gate if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) {
29220Sstevel@tonic-gate /*
29230Sstevel@tonic-gate * For all practical purposes the zone doesn't exist.
29240Sstevel@tonic-gate */
29250Sstevel@tonic-gate mutex_exit(&zonehash_lock);
29260Sstevel@tonic-gate return (NULL);
29270Sstevel@tonic-gate }
29280Sstevel@tonic-gate zone_hold(zone);
29290Sstevel@tonic-gate mutex_exit(&zonehash_lock);
29300Sstevel@tonic-gate return (zone);
29310Sstevel@tonic-gate }
29320Sstevel@tonic-gate
29330Sstevel@tonic-gate /*
29340Sstevel@tonic-gate * Similar to zone_find_by_id(), using the path as a key. For instance,
29350Sstevel@tonic-gate * if there is a zone "foo" rooted at /foo/root, and the path argument
29360Sstevel@tonic-gate * is "/foo/root/proc", it will return the held zone_t corresponding to
29370Sstevel@tonic-gate * zone "foo".
29380Sstevel@tonic-gate *
29390Sstevel@tonic-gate * zone_find_by_path() always returns a non-NULL value, since at the
29400Sstevel@tonic-gate * very least every path will be contained in the global zone.
29410Sstevel@tonic-gate *
29420Sstevel@tonic-gate * As with the other zone_find_by_*() functions, the caller is
29430Sstevel@tonic-gate * responsible for zone_rele()ing the return value of this function.
29440Sstevel@tonic-gate */
29450Sstevel@tonic-gate zone_t *
zone_find_by_path(const char * path)29460Sstevel@tonic-gate zone_find_by_path(const char *path)
29470Sstevel@tonic-gate {
29480Sstevel@tonic-gate zone_t *zone;
29490Sstevel@tonic-gate zone_t *zret = NULL;
29500Sstevel@tonic-gate zone_status_t status;
29510Sstevel@tonic-gate
29520Sstevel@tonic-gate if (path == NULL) {
29530Sstevel@tonic-gate /*
29540Sstevel@tonic-gate * Call from rootconf().
29550Sstevel@tonic-gate */
29560Sstevel@tonic-gate zone_hold(global_zone);
29570Sstevel@tonic-gate return (global_zone);
29580Sstevel@tonic-gate }
29590Sstevel@tonic-gate ASSERT(*path == '/');
29600Sstevel@tonic-gate mutex_enter(&zonehash_lock);
29610Sstevel@tonic-gate for (zone = list_head(&zone_active); zone != NULL;
29620Sstevel@tonic-gate zone = list_next(&zone_active, zone)) {
29630Sstevel@tonic-gate if (ZONE_PATH_VISIBLE(path, zone))
29640Sstevel@tonic-gate zret = zone;
29650Sstevel@tonic-gate }
29660Sstevel@tonic-gate ASSERT(zret != NULL);
29670Sstevel@tonic-gate status = zone_status_get(zret);
29680Sstevel@tonic-gate if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) {
29690Sstevel@tonic-gate /*
29700Sstevel@tonic-gate * Zone practically doesn't exist.
29710Sstevel@tonic-gate */
29720Sstevel@tonic-gate zret = global_zone;
29730Sstevel@tonic-gate }
29740Sstevel@tonic-gate zone_hold(zret);
29750Sstevel@tonic-gate mutex_exit(&zonehash_lock);
29760Sstevel@tonic-gate return (zret);
29770Sstevel@tonic-gate }
29780Sstevel@tonic-gate
29790Sstevel@tonic-gate /*
29800Sstevel@tonic-gate * Get the number of cpus visible to this zone. The system-wide global
29810Sstevel@tonic-gate * 'ncpus' is returned if pools are disabled, the caller is in the
29820Sstevel@tonic-gate * global zone, or a NULL zone argument is passed in.
29830Sstevel@tonic-gate */
29840Sstevel@tonic-gate int
zone_ncpus_get(zone_t * zone)29850Sstevel@tonic-gate zone_ncpus_get(zone_t *zone)
29860Sstevel@tonic-gate {
29870Sstevel@tonic-gate int myncpus = zone == NULL ? 0 : zone->zone_ncpus;
29880Sstevel@tonic-gate
29890Sstevel@tonic-gate return (myncpus != 0 ? myncpus : ncpus);
29900Sstevel@tonic-gate }
29910Sstevel@tonic-gate
29920Sstevel@tonic-gate /*
29930Sstevel@tonic-gate * Get the number of online cpus visible to this zone. The system-wide
29940Sstevel@tonic-gate * global 'ncpus_online' is returned if pools are disabled, the caller
29950Sstevel@tonic-gate * is in the global zone, or a NULL zone argument is passed in.
29960Sstevel@tonic-gate */
29970Sstevel@tonic-gate int
zone_ncpus_online_get(zone_t * zone)29980Sstevel@tonic-gate zone_ncpus_online_get(zone_t *zone)
29990Sstevel@tonic-gate {
30000Sstevel@tonic-gate int myncpus_online = zone == NULL ? 0 : zone->zone_ncpus_online;
30010Sstevel@tonic-gate
30020Sstevel@tonic-gate return (myncpus_online != 0 ? myncpus_online : ncpus_online);
30030Sstevel@tonic-gate }
30040Sstevel@tonic-gate
30050Sstevel@tonic-gate /*
30060Sstevel@tonic-gate * Return the pool to which the zone is currently bound.
30070Sstevel@tonic-gate */
30080Sstevel@tonic-gate pool_t *
zone_pool_get(zone_t * zone)30090Sstevel@tonic-gate zone_pool_get(zone_t *zone)
30100Sstevel@tonic-gate {
30110Sstevel@tonic-gate ASSERT(pool_lock_held());
30120Sstevel@tonic-gate
30130Sstevel@tonic-gate return (zone->zone_pool);
30140Sstevel@tonic-gate }
30150Sstevel@tonic-gate
30160Sstevel@tonic-gate /*
30170Sstevel@tonic-gate * Set the zone's pool pointer and update the zone's visibility to match
30180Sstevel@tonic-gate * the resources in the new pool.
30190Sstevel@tonic-gate */
30200Sstevel@tonic-gate void
zone_pool_set(zone_t * zone,pool_t * pool)30210Sstevel@tonic-gate zone_pool_set(zone_t *zone, pool_t *pool)
30220Sstevel@tonic-gate {
30230Sstevel@tonic-gate ASSERT(pool_lock_held());
30240Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock));
30250Sstevel@tonic-gate
30260Sstevel@tonic-gate zone->zone_pool = pool;
30270Sstevel@tonic-gate zone_pset_set(zone, pool->pool_pset->pset_id);
30280Sstevel@tonic-gate }
30290Sstevel@tonic-gate
30300Sstevel@tonic-gate /*
30310Sstevel@tonic-gate * Return the cached value of the id of the processor set to which the
30320Sstevel@tonic-gate * zone is currently bound. The value will be ZONE_PS_INVAL if the pools
30330Sstevel@tonic-gate * facility is disabled.
30340Sstevel@tonic-gate */
30350Sstevel@tonic-gate psetid_t
zone_pset_get(zone_t * zone)30360Sstevel@tonic-gate zone_pset_get(zone_t *zone)
30370Sstevel@tonic-gate {
30380Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock));
30390Sstevel@tonic-gate
30400Sstevel@tonic-gate return (zone->zone_psetid);
30410Sstevel@tonic-gate }
30420Sstevel@tonic-gate
30430Sstevel@tonic-gate /*
30440Sstevel@tonic-gate * Set the cached value of the id of the processor set to which the zone
30450Sstevel@tonic-gate * is currently bound. Also update the zone's visibility to match the
30460Sstevel@tonic-gate * resources in the new processor set.
30470Sstevel@tonic-gate */
30480Sstevel@tonic-gate void
zone_pset_set(zone_t * zone,psetid_t newpsetid)30490Sstevel@tonic-gate zone_pset_set(zone_t *zone, psetid_t newpsetid)
30500Sstevel@tonic-gate {
30510Sstevel@tonic-gate psetid_t oldpsetid;
30520Sstevel@tonic-gate
30530Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock));
30540Sstevel@tonic-gate oldpsetid = zone_pset_get(zone);
30550Sstevel@tonic-gate
30560Sstevel@tonic-gate if (oldpsetid == newpsetid)
30570Sstevel@tonic-gate return;
30580Sstevel@tonic-gate /*
30590Sstevel@tonic-gate * Global zone sees all.
30600Sstevel@tonic-gate */
30610Sstevel@tonic-gate if (zone != global_zone) {
30620Sstevel@tonic-gate zone->zone_psetid = newpsetid;
30630Sstevel@tonic-gate if (newpsetid != ZONE_PS_INVAL)
30640Sstevel@tonic-gate pool_pset_visibility_add(newpsetid, zone);
30650Sstevel@tonic-gate if (oldpsetid != ZONE_PS_INVAL)
30660Sstevel@tonic-gate pool_pset_visibility_remove(oldpsetid, zone);
30670Sstevel@tonic-gate }
30680Sstevel@tonic-gate /*
30690Sstevel@tonic-gate * Disabling pools, so we should start using the global values
30700Sstevel@tonic-gate * for ncpus and ncpus_online.
30710Sstevel@tonic-gate */
30720Sstevel@tonic-gate if (newpsetid == ZONE_PS_INVAL) {
30730Sstevel@tonic-gate zone->zone_ncpus = 0;
30740Sstevel@tonic-gate zone->zone_ncpus_online = 0;
30750Sstevel@tonic-gate }
30760Sstevel@tonic-gate }
30770Sstevel@tonic-gate
30780Sstevel@tonic-gate /*
30790Sstevel@tonic-gate * Walk the list of active zones and issue the provided callback for
30800Sstevel@tonic-gate * each of them.
30810Sstevel@tonic-gate *
30820Sstevel@tonic-gate * Caller must not be holding any locks that may be acquired under
30830Sstevel@tonic-gate * zonehash_lock. See comment at the beginning of the file for a list of
30840Sstevel@tonic-gate * common locks and their interactions with zones.
30850Sstevel@tonic-gate */
30860Sstevel@tonic-gate int
zone_walk(int (* cb)(zone_t *,void *),void * data)30870Sstevel@tonic-gate zone_walk(int (*cb)(zone_t *, void *), void *data)
30880Sstevel@tonic-gate {
30890Sstevel@tonic-gate zone_t *zone;
30900Sstevel@tonic-gate int ret = 0;
30910Sstevel@tonic-gate zone_status_t status;
30920Sstevel@tonic-gate
30930Sstevel@tonic-gate mutex_enter(&zonehash_lock);
30940Sstevel@tonic-gate for (zone = list_head(&zone_active); zone != NULL;
30950Sstevel@tonic-gate zone = list_next(&zone_active, zone)) {
30960Sstevel@tonic-gate /*
30970Sstevel@tonic-gate * Skip zones that shouldn't be externally visible.
30980Sstevel@tonic-gate */
30990Sstevel@tonic-gate status = zone_status_get(zone);
31000Sstevel@tonic-gate if (status < ZONE_IS_READY || status > ZONE_IS_DOWN)
31010Sstevel@tonic-gate continue;
31020Sstevel@tonic-gate /*
31030Sstevel@tonic-gate * Bail immediately if any callback invocation returns a
31040Sstevel@tonic-gate * non-zero value.
31050Sstevel@tonic-gate */
31060Sstevel@tonic-gate ret = (*cb)(zone, data);
31070Sstevel@tonic-gate if (ret != 0)
31080Sstevel@tonic-gate break;
31090Sstevel@tonic-gate }
31100Sstevel@tonic-gate mutex_exit(&zonehash_lock);
31110Sstevel@tonic-gate return (ret);
31120Sstevel@tonic-gate }
31130Sstevel@tonic-gate
31140Sstevel@tonic-gate static int
zone_set_root(zone_t * zone,const char * upath)31150Sstevel@tonic-gate zone_set_root(zone_t *zone, const char *upath)
31160Sstevel@tonic-gate {
31170Sstevel@tonic-gate vnode_t *vp;
31180Sstevel@tonic-gate int trycount;
31190Sstevel@tonic-gate int error = 0;
31200Sstevel@tonic-gate char *path;
31210Sstevel@tonic-gate struct pathname upn, pn;
31220Sstevel@tonic-gate size_t pathlen;
31230Sstevel@tonic-gate
31240Sstevel@tonic-gate if ((error = pn_get((char *)upath, UIO_USERSPACE, &upn)) != 0)
31250Sstevel@tonic-gate return (error);
31260Sstevel@tonic-gate
31270Sstevel@tonic-gate pn_alloc(&pn);
31280Sstevel@tonic-gate
31290Sstevel@tonic-gate /* prevent infinite loop */
31300Sstevel@tonic-gate trycount = 10;
31310Sstevel@tonic-gate for (;;) {
31320Sstevel@tonic-gate if (--trycount <= 0) {
31330Sstevel@tonic-gate error = ESTALE;
31340Sstevel@tonic-gate goto out;
31350Sstevel@tonic-gate }
31360Sstevel@tonic-gate
31370Sstevel@tonic-gate if ((error = lookuppn(&upn, &pn, FOLLOW, NULLVPP, &vp)) == 0) {
31380Sstevel@tonic-gate /*
31390Sstevel@tonic-gate * VOP_ACCESS() may cover 'vp' with a new
31400Sstevel@tonic-gate * filesystem, if 'vp' is an autoFS vnode.
31410Sstevel@tonic-gate * Get the new 'vp' if so.
31420Sstevel@tonic-gate */
31435331Samw if ((error =
31445331Samw VOP_ACCESS(vp, VEXEC, 0, CRED(), NULL)) == 0 &&
31454417Seh208807 (!vn_ismntpt(vp) ||
31460Sstevel@tonic-gate (error = traverse(&vp)) == 0)) {
31470Sstevel@tonic-gate pathlen = pn.pn_pathlen + 2;
31480Sstevel@tonic-gate path = kmem_alloc(pathlen, KM_SLEEP);
31490Sstevel@tonic-gate (void) strncpy(path, pn.pn_path,
31500Sstevel@tonic-gate pn.pn_pathlen + 1);
31510Sstevel@tonic-gate path[pathlen - 2] = '/';
31520Sstevel@tonic-gate path[pathlen - 1] = '\0';
31530Sstevel@tonic-gate pn_free(&pn);
31540Sstevel@tonic-gate pn_free(&upn);
31550Sstevel@tonic-gate
31560Sstevel@tonic-gate /* Success! */
31570Sstevel@tonic-gate break;
31580Sstevel@tonic-gate }
31590Sstevel@tonic-gate VN_RELE(vp);
31600Sstevel@tonic-gate }
31610Sstevel@tonic-gate if (error != ESTALE)
31620Sstevel@tonic-gate goto out;
31630Sstevel@tonic-gate }
31640Sstevel@tonic-gate
31650Sstevel@tonic-gate ASSERT(error == 0);
31660Sstevel@tonic-gate zone->zone_rootvp = vp; /* we hold a reference to vp */
31670Sstevel@tonic-gate zone->zone_rootpath = path;
31680Sstevel@tonic-gate zone->zone_rootpathlen = pathlen;
31691769Scarlsonj if (pathlen > 5 && strcmp(path + pathlen - 5, "/lu/") == 0)
31701769Scarlsonj zone->zone_flags |= ZF_IS_SCRATCH;
31710Sstevel@tonic-gate return (0);
31720Sstevel@tonic-gate
31730Sstevel@tonic-gate out:
31740Sstevel@tonic-gate pn_free(&pn);
31750Sstevel@tonic-gate pn_free(&upn);
31760Sstevel@tonic-gate return (error);
31770Sstevel@tonic-gate }
31780Sstevel@tonic-gate
31790Sstevel@tonic-gate #define isalnum(c) (((c) >= '0' && (c) <= '9') || \
31800Sstevel@tonic-gate ((c) >= 'a' && (c) <= 'z') || \
31810Sstevel@tonic-gate ((c) >= 'A' && (c) <= 'Z'))
31820Sstevel@tonic-gate
31830Sstevel@tonic-gate static int
zone_set_name(zone_t * zone,const char * uname)31840Sstevel@tonic-gate zone_set_name(zone_t *zone, const char *uname)
31850Sstevel@tonic-gate {
31860Sstevel@tonic-gate char *kname = kmem_zalloc(ZONENAME_MAX, KM_SLEEP);
31870Sstevel@tonic-gate size_t len;
31880Sstevel@tonic-gate int i, err;
31890Sstevel@tonic-gate
31900Sstevel@tonic-gate if ((err = copyinstr(uname, kname, ZONENAME_MAX, &len)) != 0) {
31910Sstevel@tonic-gate kmem_free(kname, ZONENAME_MAX);
31920Sstevel@tonic-gate return (err); /* EFAULT or ENAMETOOLONG */
31930Sstevel@tonic-gate }
31940Sstevel@tonic-gate
31950Sstevel@tonic-gate /* must be less than ZONENAME_MAX */
31960Sstevel@tonic-gate if (len == ZONENAME_MAX && kname[ZONENAME_MAX - 1] != '\0') {
31970Sstevel@tonic-gate kmem_free(kname, ZONENAME_MAX);
31980Sstevel@tonic-gate return (EINVAL);
31990Sstevel@tonic-gate }
32000Sstevel@tonic-gate
32010Sstevel@tonic-gate /*
32020Sstevel@tonic-gate * Name must start with an alphanumeric and must contain only
32030Sstevel@tonic-gate * alphanumerics, '-', '_' and '.'.
32040Sstevel@tonic-gate */
32050Sstevel@tonic-gate if (!isalnum(kname[0])) {
32060Sstevel@tonic-gate kmem_free(kname, ZONENAME_MAX);
32070Sstevel@tonic-gate return (EINVAL);
32080Sstevel@tonic-gate }
32090Sstevel@tonic-gate for (i = 1; i < len - 1; i++) {
32100Sstevel@tonic-gate if (!isalnum(kname[i]) && kname[i] != '-' && kname[i] != '_' &&
32110Sstevel@tonic-gate kname[i] != '.') {
32120Sstevel@tonic-gate kmem_free(kname, ZONENAME_MAX);
32130Sstevel@tonic-gate return (EINVAL);
32140Sstevel@tonic-gate }
32150Sstevel@tonic-gate }
32160Sstevel@tonic-gate
32170Sstevel@tonic-gate zone->zone_name = kname;
32180Sstevel@tonic-gate return (0);
32190Sstevel@tonic-gate }
32200Sstevel@tonic-gate
32210Sstevel@tonic-gate /*
32228662SJordan.Vaughan@Sun.com * Gets the 32-bit hostid of the specified zone as an unsigned int. If 'zonep'
32238662SJordan.Vaughan@Sun.com * is NULL or it points to a zone with no hostid emulation, then the machine's
32248662SJordan.Vaughan@Sun.com * hostid (i.e., the global zone's hostid) is returned. This function returns
32258662SJordan.Vaughan@Sun.com * zero if neither the zone nor the host machine (global zone) have hostids. It
32268662SJordan.Vaughan@Sun.com * returns HW_INVALID_HOSTID if the function attempts to return the machine's
32278662SJordan.Vaughan@Sun.com * hostid and the machine's hostid is invalid.
32288662SJordan.Vaughan@Sun.com */
32298662SJordan.Vaughan@Sun.com uint32_t
zone_get_hostid(zone_t * zonep)32308662SJordan.Vaughan@Sun.com zone_get_hostid(zone_t *zonep)
32318662SJordan.Vaughan@Sun.com {
32328662SJordan.Vaughan@Sun.com unsigned long machine_hostid;
32338662SJordan.Vaughan@Sun.com
32348662SJordan.Vaughan@Sun.com if (zonep == NULL || zonep->zone_hostid == HW_INVALID_HOSTID) {
32358662SJordan.Vaughan@Sun.com if (ddi_strtoul(hw_serial, NULL, 10, &machine_hostid) != 0)
32368662SJordan.Vaughan@Sun.com return (HW_INVALID_HOSTID);
32378662SJordan.Vaughan@Sun.com return ((uint32_t)machine_hostid);
32388662SJordan.Vaughan@Sun.com }
32398662SJordan.Vaughan@Sun.com return (zonep->zone_hostid);
32408662SJordan.Vaughan@Sun.com }
32418662SJordan.Vaughan@Sun.com
32428662SJordan.Vaughan@Sun.com /*
32430Sstevel@tonic-gate * Similar to thread_create(), but makes sure the thread is in the appropriate
32440Sstevel@tonic-gate * zone's zsched process (curproc->p_zone->zone_zsched) before returning.
32450Sstevel@tonic-gate */
32460Sstevel@tonic-gate /*ARGSUSED*/
32470Sstevel@tonic-gate kthread_t *
zthread_create(caddr_t stk,size_t stksize,void (* proc)(),void * arg,size_t len,pri_t pri)32480Sstevel@tonic-gate zthread_create(
32490Sstevel@tonic-gate caddr_t stk,
32500Sstevel@tonic-gate size_t stksize,
32510Sstevel@tonic-gate void (*proc)(),
32520Sstevel@tonic-gate void *arg,
32530Sstevel@tonic-gate size_t len,
32540Sstevel@tonic-gate pri_t pri)
32550Sstevel@tonic-gate {
32560Sstevel@tonic-gate kthread_t *t;
32570Sstevel@tonic-gate zone_t *zone = curproc->p_zone;
32580Sstevel@tonic-gate proc_t *pp = zone->zone_zsched;
32590Sstevel@tonic-gate
32600Sstevel@tonic-gate zone_hold(zone); /* Reference to be dropped when thread exits */
32610Sstevel@tonic-gate
32620Sstevel@tonic-gate /*
32630Sstevel@tonic-gate * No-one should be trying to create threads if the zone is shutting
32640Sstevel@tonic-gate * down and there aren't any kernel threads around. See comment
32650Sstevel@tonic-gate * in zthread_exit().
32660Sstevel@tonic-gate */
32670Sstevel@tonic-gate ASSERT(!(zone->zone_kthreads == NULL &&
32680Sstevel@tonic-gate zone_status_get(zone) >= ZONE_IS_EMPTY));
32690Sstevel@tonic-gate /*
32700Sstevel@tonic-gate * Create a thread, but don't let it run until we've finished setting
32710Sstevel@tonic-gate * things up.
32720Sstevel@tonic-gate */
32730Sstevel@tonic-gate t = thread_create(stk, stksize, proc, arg, len, pp, TS_STOPPED, pri);
32740Sstevel@tonic-gate ASSERT(t->t_forw == NULL);
32750Sstevel@tonic-gate mutex_enter(&zone_status_lock);
32760Sstevel@tonic-gate if (zone->zone_kthreads == NULL) {
32770Sstevel@tonic-gate t->t_forw = t->t_back = t;
32780Sstevel@tonic-gate } else {
32790Sstevel@tonic-gate kthread_t *tx = zone->zone_kthreads;
32800Sstevel@tonic-gate
32810Sstevel@tonic-gate t->t_forw = tx;
32820Sstevel@tonic-gate t->t_back = tx->t_back;
32830Sstevel@tonic-gate tx->t_back->t_forw = t;
32840Sstevel@tonic-gate tx->t_back = t;
32850Sstevel@tonic-gate }
32860Sstevel@tonic-gate zone->zone_kthreads = t;
32870Sstevel@tonic-gate mutex_exit(&zone_status_lock);
32880Sstevel@tonic-gate
32890Sstevel@tonic-gate mutex_enter(&pp->p_lock);
32900Sstevel@tonic-gate t->t_proc_flag |= TP_ZTHREAD;
32910Sstevel@tonic-gate project_rele(t->t_proj);
32920Sstevel@tonic-gate t->t_proj = project_hold(pp->p_task->tk_proj);
32930Sstevel@tonic-gate
32940Sstevel@tonic-gate /*
32950Sstevel@tonic-gate * Setup complete, let it run.
32960Sstevel@tonic-gate */
32970Sstevel@tonic-gate thread_lock(t);
32980Sstevel@tonic-gate t->t_schedflag |= TS_ALLSTART;
32990Sstevel@tonic-gate setrun_locked(t);
33000Sstevel@tonic-gate thread_unlock(t);
33010Sstevel@tonic-gate
33020Sstevel@tonic-gate mutex_exit(&pp->p_lock);
33030Sstevel@tonic-gate
33040Sstevel@tonic-gate return (t);
33050Sstevel@tonic-gate }
33060Sstevel@tonic-gate
33070Sstevel@tonic-gate /*
33080Sstevel@tonic-gate * Similar to thread_exit(). Must be called by threads created via
33090Sstevel@tonic-gate * zthread_exit().
33100Sstevel@tonic-gate */
33110Sstevel@tonic-gate void
zthread_exit(void)33120Sstevel@tonic-gate zthread_exit(void)
33130Sstevel@tonic-gate {
33140Sstevel@tonic-gate kthread_t *t = curthread;
33150Sstevel@tonic-gate proc_t *pp = curproc;
33160Sstevel@tonic-gate zone_t *zone = pp->p_zone;
33170Sstevel@tonic-gate
33180Sstevel@tonic-gate mutex_enter(&zone_status_lock);
33190Sstevel@tonic-gate
33200Sstevel@tonic-gate /*
33210Sstevel@tonic-gate * Reparent to p0
33220Sstevel@tonic-gate */
33231075Sjosephb kpreempt_disable();
33240Sstevel@tonic-gate mutex_enter(&pp->p_lock);
33250Sstevel@tonic-gate t->t_proc_flag &= ~TP_ZTHREAD;
33260Sstevel@tonic-gate t->t_procp = &p0;
33270Sstevel@tonic-gate hat_thread_exit(t);
33280Sstevel@tonic-gate mutex_exit(&pp->p_lock);
33291075Sjosephb kpreempt_enable();
33300Sstevel@tonic-gate
33310Sstevel@tonic-gate if (t->t_back == t) {
33320Sstevel@tonic-gate ASSERT(t->t_forw == t);
33330Sstevel@tonic-gate /*
33340Sstevel@tonic-gate * If the zone is empty, once the thread count
33350Sstevel@tonic-gate * goes to zero no further kernel threads can be
33360Sstevel@tonic-gate * created. This is because if the creator is a process
33370Sstevel@tonic-gate * in the zone, then it must have exited before the zone
33380Sstevel@tonic-gate * state could be set to ZONE_IS_EMPTY.
33390Sstevel@tonic-gate * Otherwise, if the creator is a kernel thread in the
33400Sstevel@tonic-gate * zone, the thread count is non-zero.
33410Sstevel@tonic-gate *
33420Sstevel@tonic-gate * This really means that non-zone kernel threads should
33430Sstevel@tonic-gate * not create zone kernel threads.
33440Sstevel@tonic-gate */
33450Sstevel@tonic-gate zone->zone_kthreads = NULL;
33460Sstevel@tonic-gate if (zone_status_get(zone) == ZONE_IS_EMPTY) {
33470Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_DOWN);
33483792Sakolb /*
33493792Sakolb * Remove any CPU caps on this zone.
33503792Sakolb */
33513792Sakolb cpucaps_zone_remove(zone);
33520Sstevel@tonic-gate }
33530Sstevel@tonic-gate } else {
33540Sstevel@tonic-gate t->t_forw->t_back = t->t_back;
33550Sstevel@tonic-gate t->t_back->t_forw = t->t_forw;
33560Sstevel@tonic-gate if (zone->zone_kthreads == t)
33570Sstevel@tonic-gate zone->zone_kthreads = t->t_forw;
33580Sstevel@tonic-gate }
33590Sstevel@tonic-gate mutex_exit(&zone_status_lock);
33600Sstevel@tonic-gate zone_rele(zone);
33610Sstevel@tonic-gate thread_exit();
33620Sstevel@tonic-gate /* NOTREACHED */
33630Sstevel@tonic-gate }
33640Sstevel@tonic-gate
33650Sstevel@tonic-gate static void
zone_chdir(vnode_t * vp,vnode_t ** vpp,proc_t * pp)33660Sstevel@tonic-gate zone_chdir(vnode_t *vp, vnode_t **vpp, proc_t *pp)
33670Sstevel@tonic-gate {
33680Sstevel@tonic-gate vnode_t *oldvp;
33690Sstevel@tonic-gate
33700Sstevel@tonic-gate /* we're going to hold a reference here to the directory */
33710Sstevel@tonic-gate VN_HOLD(vp);
33720Sstevel@tonic-gate
337311861SMarek.Pospisil@Sun.COM /* update abs cwd/root path see c2/audit.c */
337411861SMarek.Pospisil@Sun.COM if (AU_AUDITING())
33750Sstevel@tonic-gate audit_chdirec(vp, vpp);
33760Sstevel@tonic-gate
33770Sstevel@tonic-gate mutex_enter(&pp->p_lock);
33780Sstevel@tonic-gate oldvp = *vpp;
33790Sstevel@tonic-gate *vpp = vp;
33800Sstevel@tonic-gate mutex_exit(&pp->p_lock);
33810Sstevel@tonic-gate if (oldvp != NULL)
33820Sstevel@tonic-gate VN_RELE(oldvp);
33830Sstevel@tonic-gate }
33840Sstevel@tonic-gate
33850Sstevel@tonic-gate /*
33860Sstevel@tonic-gate * Convert an rctl value represented by an nvlist_t into an rctl_val_t.
33870Sstevel@tonic-gate */
33880Sstevel@tonic-gate static int
nvlist2rctlval(nvlist_t * nvl,rctl_val_t * rv)33890Sstevel@tonic-gate nvlist2rctlval(nvlist_t *nvl, rctl_val_t *rv)
33900Sstevel@tonic-gate {
33910Sstevel@tonic-gate nvpair_t *nvp = NULL;
33920Sstevel@tonic-gate boolean_t priv_set = B_FALSE;
33930Sstevel@tonic-gate boolean_t limit_set = B_FALSE;
33940Sstevel@tonic-gate boolean_t action_set = B_FALSE;
33950Sstevel@tonic-gate
33960Sstevel@tonic-gate while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
33970Sstevel@tonic-gate const char *name;
33980Sstevel@tonic-gate uint64_t ui64;
33990Sstevel@tonic-gate
34000Sstevel@tonic-gate name = nvpair_name(nvp);
34010Sstevel@tonic-gate if (nvpair_type(nvp) != DATA_TYPE_UINT64)
34020Sstevel@tonic-gate return (EINVAL);
34030Sstevel@tonic-gate (void) nvpair_value_uint64(nvp, &ui64);
34040Sstevel@tonic-gate if (strcmp(name, "privilege") == 0) {
34050Sstevel@tonic-gate /*
34060Sstevel@tonic-gate * Currently only privileged values are allowed, but
34070Sstevel@tonic-gate * this may change in the future.
34080Sstevel@tonic-gate */
34090Sstevel@tonic-gate if (ui64 != RCPRIV_PRIVILEGED)
34100Sstevel@tonic-gate return (EINVAL);
34110Sstevel@tonic-gate rv->rcv_privilege = ui64;
34120Sstevel@tonic-gate priv_set = B_TRUE;
34130Sstevel@tonic-gate } else if (strcmp(name, "limit") == 0) {
34140Sstevel@tonic-gate rv->rcv_value = ui64;
34150Sstevel@tonic-gate limit_set = B_TRUE;
34160Sstevel@tonic-gate } else if (strcmp(name, "action") == 0) {
34170Sstevel@tonic-gate if (ui64 != RCTL_LOCAL_NOACTION &&
34180Sstevel@tonic-gate ui64 != RCTL_LOCAL_DENY)
34190Sstevel@tonic-gate return (EINVAL);
34200Sstevel@tonic-gate rv->rcv_flagaction = ui64;
34210Sstevel@tonic-gate action_set = B_TRUE;
34220Sstevel@tonic-gate } else {
34230Sstevel@tonic-gate return (EINVAL);
34240Sstevel@tonic-gate }
34250Sstevel@tonic-gate }
34260Sstevel@tonic-gate
34270Sstevel@tonic-gate if (!(priv_set && limit_set && action_set))
34280Sstevel@tonic-gate return (EINVAL);
34290Sstevel@tonic-gate rv->rcv_action_signal = 0;
34300Sstevel@tonic-gate rv->rcv_action_recipient = NULL;
34310Sstevel@tonic-gate rv->rcv_action_recip_pid = -1;
34320Sstevel@tonic-gate rv->rcv_firing_time = 0;
34330Sstevel@tonic-gate
34340Sstevel@tonic-gate return (0);
34350Sstevel@tonic-gate }
34360Sstevel@tonic-gate
34372267Sdp /*
34382267Sdp * Non-global zone version of start_init.
34392267Sdp */
34400Sstevel@tonic-gate void
zone_start_init(void)34412267Sdp zone_start_init(void)
34420Sstevel@tonic-gate {
34430Sstevel@tonic-gate proc_t *p = ttoproc(curthread);
34442712Snn35248 zone_t *z = p->p_zone;
34452267Sdp
34462267Sdp ASSERT(!INGLOBALZONE(curproc));
34470Sstevel@tonic-gate
34480Sstevel@tonic-gate /*
34492712Snn35248 * For all purposes (ZONE_ATTR_INITPID and restart_init),
34502712Snn35248 * storing just the pid of init is sufficient.
34512712Snn35248 */
34522712Snn35248 z->zone_proc_initpid = p->p_pid;
34532712Snn35248
34542712Snn35248 /*
34552267Sdp * We maintain zone_boot_err so that we can return the cause of the
34562267Sdp * failure back to the caller of the zone_boot syscall.
34570Sstevel@tonic-gate */
34582267Sdp p->p_zone->zone_boot_err = start_init_common();
34590Sstevel@tonic-gate
34608364SJordan.Vaughan@Sun.com /*
34618364SJordan.Vaughan@Sun.com * We will prevent booting zones from becoming running zones if the
34628364SJordan.Vaughan@Sun.com * global zone is shutting down.
34638364SJordan.Vaughan@Sun.com */
34640Sstevel@tonic-gate mutex_enter(&zone_status_lock);
34658364SJordan.Vaughan@Sun.com if (z->zone_boot_err != 0 || zone_status_get(global_zone) >=
34668364SJordan.Vaughan@Sun.com ZONE_IS_SHUTTING_DOWN) {
34670Sstevel@tonic-gate /*
34680Sstevel@tonic-gate * Make sure we are still in the booting state-- we could have
34690Sstevel@tonic-gate * raced and already be shutting down, or even further along.
34700Sstevel@tonic-gate */
34713792Sakolb if (zone_status_get(z) == ZONE_IS_BOOTING) {
34722712Snn35248 zone_status_set(z, ZONE_IS_SHUTTING_DOWN);
34733792Sakolb }
34740Sstevel@tonic-gate mutex_exit(&zone_status_lock);
34750Sstevel@tonic-gate /* It's gone bad, dispose of the process */
34762712Snn35248 if (proc_exit(CLD_EXITED, z->zone_boot_err) != 0) {
3477390Sraf mutex_enter(&p->p_lock);
3478390Sraf ASSERT(p->p_flag & SEXITLWPS);
34790Sstevel@tonic-gate lwp_exit();
34800Sstevel@tonic-gate }
34810Sstevel@tonic-gate } else {
34822712Snn35248 if (zone_status_get(z) == ZONE_IS_BOOTING)
34832712Snn35248 zone_status_set(z, ZONE_IS_RUNNING);
34840Sstevel@tonic-gate mutex_exit(&zone_status_lock);
34850Sstevel@tonic-gate /* cause the process to return to userland. */
34860Sstevel@tonic-gate lwp_rtt();
34870Sstevel@tonic-gate }
34880Sstevel@tonic-gate }
34890Sstevel@tonic-gate
34900Sstevel@tonic-gate struct zsched_arg {
34910Sstevel@tonic-gate zone_t *zone;
34920Sstevel@tonic-gate nvlist_t *nvlist;
34930Sstevel@tonic-gate };
34940Sstevel@tonic-gate
34950Sstevel@tonic-gate /*
34960Sstevel@tonic-gate * Per-zone "sched" workalike. The similarity to "sched" doesn't have
34970Sstevel@tonic-gate * anything to do with scheduling, but rather with the fact that
34980Sstevel@tonic-gate * per-zone kernel threads are parented to zsched, just like regular
34990Sstevel@tonic-gate * kernel threads are parented to sched (p0).
35000Sstevel@tonic-gate *
35010Sstevel@tonic-gate * zsched is also responsible for launching init for the zone.
35020Sstevel@tonic-gate */
35030Sstevel@tonic-gate static void
zsched(void * arg)35040Sstevel@tonic-gate zsched(void *arg)
35050Sstevel@tonic-gate {
35060Sstevel@tonic-gate struct zsched_arg *za = arg;
35070Sstevel@tonic-gate proc_t *pp = curproc;
35080Sstevel@tonic-gate proc_t *initp = proc_init;
35090Sstevel@tonic-gate zone_t *zone = za->zone;
35100Sstevel@tonic-gate cred_t *cr, *oldcred;
35110Sstevel@tonic-gate rctl_set_t *set;
35120Sstevel@tonic-gate rctl_alloc_gp_t *gp;
35130Sstevel@tonic-gate contract_t *ct = NULL;
35140Sstevel@tonic-gate task_t *tk, *oldtk;
35150Sstevel@tonic-gate rctl_entity_p_t e;
35160Sstevel@tonic-gate kproject_t *pj;
35170Sstevel@tonic-gate
35180Sstevel@tonic-gate nvlist_t *nvl = za->nvlist;
35190Sstevel@tonic-gate nvpair_t *nvp = NULL;
35200Sstevel@tonic-gate
35213446Smrj bcopy("zsched", PTOU(pp)->u_psargs, sizeof ("zsched"));
35223446Smrj bcopy("zsched", PTOU(pp)->u_comm, sizeof ("zsched"));
35233446Smrj PTOU(pp)->u_argc = 0;
35243446Smrj PTOU(pp)->u_argv = NULL;
35253446Smrj PTOU(pp)->u_envp = NULL;
35260Sstevel@tonic-gate closeall(P_FINFO(pp));
35270Sstevel@tonic-gate
35280Sstevel@tonic-gate /*
35290Sstevel@tonic-gate * We are this zone's "zsched" process. As the zone isn't generally
35300Sstevel@tonic-gate * visible yet we don't need to grab any locks before initializing its
35310Sstevel@tonic-gate * zone_proc pointer.
35320Sstevel@tonic-gate */
35330Sstevel@tonic-gate zone_hold(zone); /* this hold is released by zone_destroy() */
35340Sstevel@tonic-gate zone->zone_zsched = pp;
35350Sstevel@tonic-gate mutex_enter(&pp->p_lock);
35360Sstevel@tonic-gate pp->p_zone = zone;
35370Sstevel@tonic-gate mutex_exit(&pp->p_lock);
35380Sstevel@tonic-gate
35390Sstevel@tonic-gate /*
35400Sstevel@tonic-gate * Disassociate process from its 'parent'; parent ourselves to init
35410Sstevel@tonic-gate * (pid 1) and change other values as needed.
35420Sstevel@tonic-gate */
35430Sstevel@tonic-gate sess_create();
35440Sstevel@tonic-gate
35450Sstevel@tonic-gate mutex_enter(&pidlock);
35460Sstevel@tonic-gate proc_detach(pp);
35470Sstevel@tonic-gate pp->p_ppid = 1;
35480Sstevel@tonic-gate pp->p_flag |= SZONETOP;
35490Sstevel@tonic-gate pp->p_ancpid = 1;
35500Sstevel@tonic-gate pp->p_parent = initp;
35510Sstevel@tonic-gate pp->p_psibling = NULL;
35520Sstevel@tonic-gate if (initp->p_child)
35530Sstevel@tonic-gate initp->p_child->p_psibling = pp;
35540Sstevel@tonic-gate pp->p_sibling = initp->p_child;
35550Sstevel@tonic-gate initp->p_child = pp;
35560Sstevel@tonic-gate
35570Sstevel@tonic-gate /* Decrement what newproc() incremented. */
35580Sstevel@tonic-gate upcount_dec(crgetruid(CRED()), GLOBAL_ZONEID);
35590Sstevel@tonic-gate /*
35600Sstevel@tonic-gate * Our credentials are about to become kcred-like, so we don't care
35610Sstevel@tonic-gate * about the caller's ruid.
35620Sstevel@tonic-gate */
35630Sstevel@tonic-gate upcount_inc(crgetruid(kcred), zone->zone_id);
35640Sstevel@tonic-gate mutex_exit(&pidlock);
35650Sstevel@tonic-gate
35660Sstevel@tonic-gate /*
356712725SMenno.Lageman@Sun.COM * getting out of global zone, so decrement lwp and process counts
35680Sstevel@tonic-gate */
35690Sstevel@tonic-gate pj = pp->p_task->tk_proj;
35700Sstevel@tonic-gate mutex_enter(&global_zone->zone_nlwps_lock);
35710Sstevel@tonic-gate pj->kpj_nlwps -= pp->p_lwpcnt;
35720Sstevel@tonic-gate global_zone->zone_nlwps -= pp->p_lwpcnt;
357312725SMenno.Lageman@Sun.COM pj->kpj_nprocs--;
357412725SMenno.Lageman@Sun.COM global_zone->zone_nprocs--;
35750Sstevel@tonic-gate mutex_exit(&global_zone->zone_nlwps_lock);
35760Sstevel@tonic-gate
35770Sstevel@tonic-gate /*
35782768Ssl108498 * Decrement locked memory counts on old zone and project.
35792768Ssl108498 */
35803247Sgjelinek mutex_enter(&global_zone->zone_mem_lock);
35812768Ssl108498 global_zone->zone_locked_mem -= pp->p_locked_mem;
35822768Ssl108498 pj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
35833247Sgjelinek mutex_exit(&global_zone->zone_mem_lock);
35842768Ssl108498
35852768Ssl108498 /*
35860Sstevel@tonic-gate * Create and join a new task in project '0' of this zone.
35870Sstevel@tonic-gate *
35880Sstevel@tonic-gate * We don't need to call holdlwps() since we know we're the only lwp in
35890Sstevel@tonic-gate * this process.
35900Sstevel@tonic-gate *
35910Sstevel@tonic-gate * task_join() returns with p_lock held.
35920Sstevel@tonic-gate */
35930Sstevel@tonic-gate tk = task_create(0, zone);
35940Sstevel@tonic-gate mutex_enter(&cpu_lock);
35950Sstevel@tonic-gate oldtk = task_join(tk, 0);
35962768Ssl108498
35972768Ssl108498 pj = pp->p_task->tk_proj;
35982768Ssl108498
35993247Sgjelinek mutex_enter(&zone->zone_mem_lock);
36002768Ssl108498 zone->zone_locked_mem += pp->p_locked_mem;
36012768Ssl108498 pj->kpj_data.kpd_locked_mem += pp->p_locked_mem;
36023247Sgjelinek mutex_exit(&zone->zone_mem_lock);
36030Sstevel@tonic-gate
36040Sstevel@tonic-gate /*
360512725SMenno.Lageman@Sun.COM * add lwp and process counts to zsched's zone, and increment
360612725SMenno.Lageman@Sun.COM * project's task and process count due to the task created in
360712725SMenno.Lageman@Sun.COM * the above task_create.
36080Sstevel@tonic-gate */
36090Sstevel@tonic-gate mutex_enter(&zone->zone_nlwps_lock);
36100Sstevel@tonic-gate pj->kpj_nlwps += pp->p_lwpcnt;
36110Sstevel@tonic-gate pj->kpj_ntasks += 1;
36120Sstevel@tonic-gate zone->zone_nlwps += pp->p_lwpcnt;
361312725SMenno.Lageman@Sun.COM pj->kpj_nprocs++;
361412725SMenno.Lageman@Sun.COM zone->zone_nprocs++;
36150Sstevel@tonic-gate mutex_exit(&zone->zone_nlwps_lock);
36160Sstevel@tonic-gate
36172768Ssl108498 mutex_exit(&curproc->p_lock);
36182768Ssl108498 mutex_exit(&cpu_lock);
36192768Ssl108498 task_rele(oldtk);
36202768Ssl108498
36210Sstevel@tonic-gate /*
36220Sstevel@tonic-gate * The process was created by a process in the global zone, hence the
36230Sstevel@tonic-gate * credentials are wrong. We might as well have kcred-ish credentials.
36240Sstevel@tonic-gate */
36250Sstevel@tonic-gate cr = zone->zone_kcred;
36260Sstevel@tonic-gate crhold(cr);
36270Sstevel@tonic-gate mutex_enter(&pp->p_crlock);
36280Sstevel@tonic-gate oldcred = pp->p_cred;
36290Sstevel@tonic-gate pp->p_cred = cr;
36300Sstevel@tonic-gate mutex_exit(&pp->p_crlock);
36310Sstevel@tonic-gate crfree(oldcred);
36320Sstevel@tonic-gate
36330Sstevel@tonic-gate /*
36340Sstevel@tonic-gate * Hold credentials again (for thread)
36350Sstevel@tonic-gate */
36360Sstevel@tonic-gate crhold(cr);
36370Sstevel@tonic-gate
36380Sstevel@tonic-gate /*
36390Sstevel@tonic-gate * p_lwpcnt can't change since this is a kernel process.
36400Sstevel@tonic-gate */
36410Sstevel@tonic-gate crset(pp, cr);
36420Sstevel@tonic-gate
36430Sstevel@tonic-gate /*
36440Sstevel@tonic-gate * Chroot
36450Sstevel@tonic-gate */
36460Sstevel@tonic-gate zone_chdir(zone->zone_rootvp, &PTOU(pp)->u_cdir, pp);
36470Sstevel@tonic-gate zone_chdir(zone->zone_rootvp, &PTOU(pp)->u_rdir, pp);
36480Sstevel@tonic-gate
36490Sstevel@tonic-gate /*
36500Sstevel@tonic-gate * Initialize zone's rctl set.
36510Sstevel@tonic-gate */
36520Sstevel@tonic-gate set = rctl_set_create();
36530Sstevel@tonic-gate gp = rctl_set_init_prealloc(RCENTITY_ZONE);
36540Sstevel@tonic-gate mutex_enter(&pp->p_lock);
36550Sstevel@tonic-gate e.rcep_p.zone = zone;
36560Sstevel@tonic-gate e.rcep_t = RCENTITY_ZONE;
36570Sstevel@tonic-gate zone->zone_rctls = rctl_set_init(RCENTITY_ZONE, pp, &e, set, gp);
36580Sstevel@tonic-gate mutex_exit(&pp->p_lock);
36590Sstevel@tonic-gate rctl_prealloc_destroy(gp);
36600Sstevel@tonic-gate
36610Sstevel@tonic-gate /*
36620Sstevel@tonic-gate * Apply the rctls passed in to zone_create(). This is basically a list
36630Sstevel@tonic-gate * assignment: all of the old values are removed and the new ones
36640Sstevel@tonic-gate * inserted. That is, if an empty list is passed in, all values are
36650Sstevel@tonic-gate * removed.
36660Sstevel@tonic-gate */
36670Sstevel@tonic-gate while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
36680Sstevel@tonic-gate rctl_dict_entry_t *rde;
36690Sstevel@tonic-gate rctl_hndl_t hndl;
36700Sstevel@tonic-gate char *name;
36710Sstevel@tonic-gate nvlist_t **nvlarray;
36720Sstevel@tonic-gate uint_t i, nelem;
36730Sstevel@tonic-gate int error; /* For ASSERT()s */
36740Sstevel@tonic-gate
36750Sstevel@tonic-gate name = nvpair_name(nvp);
36760Sstevel@tonic-gate hndl = rctl_hndl_lookup(name);
36770Sstevel@tonic-gate ASSERT(hndl != -1);
36780Sstevel@tonic-gate rde = rctl_dict_lookup_hndl(hndl);
36790Sstevel@tonic-gate ASSERT(rde != NULL);
36800Sstevel@tonic-gate
36810Sstevel@tonic-gate for (; /* ever */; ) {
36820Sstevel@tonic-gate rctl_val_t oval;
36830Sstevel@tonic-gate
36840Sstevel@tonic-gate mutex_enter(&pp->p_lock);
36850Sstevel@tonic-gate error = rctl_local_get(hndl, NULL, &oval, pp);
36860Sstevel@tonic-gate mutex_exit(&pp->p_lock);
36870Sstevel@tonic-gate ASSERT(error == 0); /* Can't fail for RCTL_FIRST */
36880Sstevel@tonic-gate ASSERT(oval.rcv_privilege != RCPRIV_BASIC);
36890Sstevel@tonic-gate if (oval.rcv_privilege == RCPRIV_SYSTEM)
36900Sstevel@tonic-gate break;
36910Sstevel@tonic-gate mutex_enter(&pp->p_lock);
36920Sstevel@tonic-gate error = rctl_local_delete(hndl, &oval, pp);
36930Sstevel@tonic-gate mutex_exit(&pp->p_lock);
36940Sstevel@tonic-gate ASSERT(error == 0);
36950Sstevel@tonic-gate }
36960Sstevel@tonic-gate error = nvpair_value_nvlist_array(nvp, &nvlarray, &nelem);
36970Sstevel@tonic-gate ASSERT(error == 0);
36980Sstevel@tonic-gate for (i = 0; i < nelem; i++) {
36990Sstevel@tonic-gate rctl_val_t *nvalp;
37000Sstevel@tonic-gate
37010Sstevel@tonic-gate nvalp = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
37020Sstevel@tonic-gate error = nvlist2rctlval(nvlarray[i], nvalp);
37030Sstevel@tonic-gate ASSERT(error == 0);
37040Sstevel@tonic-gate /*
37050Sstevel@tonic-gate * rctl_local_insert can fail if the value being
37060Sstevel@tonic-gate * inserted is a duplicate; this is OK.
37070Sstevel@tonic-gate */
37080Sstevel@tonic-gate mutex_enter(&pp->p_lock);
37090Sstevel@tonic-gate if (rctl_local_insert(hndl, nvalp, pp) != 0)
37100Sstevel@tonic-gate kmem_cache_free(rctl_val_cache, nvalp);
37110Sstevel@tonic-gate mutex_exit(&pp->p_lock);
37120Sstevel@tonic-gate }
37130Sstevel@tonic-gate }
37140Sstevel@tonic-gate /*
37150Sstevel@tonic-gate * Tell the world that we're done setting up.
37160Sstevel@tonic-gate *
37175880Snordmark * At this point we want to set the zone status to ZONE_IS_INITIALIZED
37180Sstevel@tonic-gate * and atomically set the zone's processor set visibility. Once
37190Sstevel@tonic-gate * we drop pool_lock() this zone will automatically get updated
37200Sstevel@tonic-gate * to reflect any future changes to the pools configuration.
37215880Snordmark *
37225880Snordmark * Note that after we drop the locks below (zonehash_lock in
37235880Snordmark * particular) other operations such as a zone_getattr call can
37245880Snordmark * now proceed and observe the zone. That is the reason for doing a
37255880Snordmark * state transition to the INITIALIZED state.
37260Sstevel@tonic-gate */
37270Sstevel@tonic-gate pool_lock();
37280Sstevel@tonic-gate mutex_enter(&cpu_lock);
37290Sstevel@tonic-gate mutex_enter(&zonehash_lock);
37300Sstevel@tonic-gate zone_uniqid(zone);
37310Sstevel@tonic-gate zone_zsd_configure(zone);
37320Sstevel@tonic-gate if (pool_state == POOL_ENABLED)
37330Sstevel@tonic-gate zone_pset_set(zone, pool_default->pool_pset->pset_id);
37340Sstevel@tonic-gate mutex_enter(&zone_status_lock);
37350Sstevel@tonic-gate ASSERT(zone_status_get(zone) == ZONE_IS_UNINITIALIZED);
37365880Snordmark zone_status_set(zone, ZONE_IS_INITIALIZED);
37370Sstevel@tonic-gate mutex_exit(&zone_status_lock);
37380Sstevel@tonic-gate mutex_exit(&zonehash_lock);
37390Sstevel@tonic-gate mutex_exit(&cpu_lock);
37400Sstevel@tonic-gate pool_unlock();
37410Sstevel@tonic-gate
37425880Snordmark /* Now call the create callback for this key */
37435880Snordmark zsd_apply_all_keys(zsd_apply_create, zone);
37445880Snordmark
37455880Snordmark /* The callbacks are complete. Mark ZONE_IS_READY */
37465880Snordmark mutex_enter(&zone_status_lock);
37475880Snordmark ASSERT(zone_status_get(zone) == ZONE_IS_INITIALIZED);
37485880Snordmark zone_status_set(zone, ZONE_IS_READY);
37495880Snordmark mutex_exit(&zone_status_lock);
37505880Snordmark
37510Sstevel@tonic-gate /*
37520Sstevel@tonic-gate * Once we see the zone transition to the ZONE_IS_BOOTING state,
37530Sstevel@tonic-gate * we launch init, and set the state to running.
37540Sstevel@tonic-gate */
37550Sstevel@tonic-gate zone_status_wait_cpr(zone, ZONE_IS_BOOTING, "zsched");
37560Sstevel@tonic-gate
37570Sstevel@tonic-gate if (zone_status_get(zone) == ZONE_IS_BOOTING) {
37580Sstevel@tonic-gate id_t cid;
37590Sstevel@tonic-gate
37600Sstevel@tonic-gate /*
37610Sstevel@tonic-gate * Ok, this is a little complicated. We need to grab the
37620Sstevel@tonic-gate * zone's pool's scheduling class ID; note that by now, we
37630Sstevel@tonic-gate * are already bound to a pool if we need to be (zoneadmd
37640Sstevel@tonic-gate * will have done that to us while we're in the READY
37650Sstevel@tonic-gate * state). *But* the scheduling class for the zone's 'init'
37660Sstevel@tonic-gate * must be explicitly passed to newproc, which doesn't
37670Sstevel@tonic-gate * respect pool bindings.
37680Sstevel@tonic-gate *
37690Sstevel@tonic-gate * We hold the pool_lock across the call to newproc() to
37700Sstevel@tonic-gate * close the obvious race: the pool's scheduling class
37710Sstevel@tonic-gate * could change before we manage to create the LWP with
37720Sstevel@tonic-gate * classid 'cid'.
37730Sstevel@tonic-gate */
37740Sstevel@tonic-gate pool_lock();
37753247Sgjelinek if (zone->zone_defaultcid > 0)
37763247Sgjelinek cid = zone->zone_defaultcid;
37773247Sgjelinek else
37783247Sgjelinek cid = pool_get_class(zone->zone_pool);
37790Sstevel@tonic-gate if (cid == -1)
37800Sstevel@tonic-gate cid = defaultcid;
37810Sstevel@tonic-gate
37820Sstevel@tonic-gate /*
37830Sstevel@tonic-gate * If this fails, zone_boot will ultimately fail. The
37840Sstevel@tonic-gate * state of the zone will be set to SHUTTING_DOWN-- userland
37850Sstevel@tonic-gate * will have to tear down the zone, and fail, or try again.
37860Sstevel@tonic-gate */
37872267Sdp if ((zone->zone_boot_err = newproc(zone_start_init, NULL, cid,
378811173SJonathan.Adams@Sun.COM minclsyspri - 1, &ct, 0)) != 0) {
37890Sstevel@tonic-gate mutex_enter(&zone_status_lock);
37900Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_SHUTTING_DOWN);
37910Sstevel@tonic-gate mutex_exit(&zone_status_lock);
37920Sstevel@tonic-gate }
37930Sstevel@tonic-gate pool_unlock();
37940Sstevel@tonic-gate }
37950Sstevel@tonic-gate
37960Sstevel@tonic-gate /*
37970Sstevel@tonic-gate * Wait for zone_destroy() to be called. This is what we spend
37980Sstevel@tonic-gate * most of our life doing.
37990Sstevel@tonic-gate */
38000Sstevel@tonic-gate zone_status_wait_cpr(zone, ZONE_IS_DYING, "zsched");
38010Sstevel@tonic-gate
38020Sstevel@tonic-gate if (ct)
38030Sstevel@tonic-gate /*
38040Sstevel@tonic-gate * At this point the process contract should be empty.
38050Sstevel@tonic-gate * (Though if it isn't, it's not the end of the world.)
38060Sstevel@tonic-gate */
38070Sstevel@tonic-gate VERIFY(contract_abandon(ct, curproc, B_TRUE) == 0);
38080Sstevel@tonic-gate
38090Sstevel@tonic-gate /*
38100Sstevel@tonic-gate * Allow kcred to be freed when all referring processes
38110Sstevel@tonic-gate * (including this one) go away. We can't just do this in
38120Sstevel@tonic-gate * zone_free because we need to wait for the zone_cred_ref to
38130Sstevel@tonic-gate * drop to 0 before calling zone_free, and the existence of
38140Sstevel@tonic-gate * zone_kcred will prevent that. Thus, we call crfree here to
38150Sstevel@tonic-gate * balance the crdup in zone_create. The crhold calls earlier
38160Sstevel@tonic-gate * in zsched will be dropped when the thread and process exit.
38170Sstevel@tonic-gate */
38180Sstevel@tonic-gate crfree(zone->zone_kcred);
38190Sstevel@tonic-gate zone->zone_kcred = NULL;
38200Sstevel@tonic-gate
38210Sstevel@tonic-gate exit(CLD_EXITED, 0);
38220Sstevel@tonic-gate }
38230Sstevel@tonic-gate
38240Sstevel@tonic-gate /*
38250Sstevel@tonic-gate * Helper function to determine if there are any submounts of the
38260Sstevel@tonic-gate * provided path. Used to make sure the zone doesn't "inherit" any
38270Sstevel@tonic-gate * mounts from before it is created.
38280Sstevel@tonic-gate */
38290Sstevel@tonic-gate static uint_t
zone_mount_count(const char * rootpath)38300Sstevel@tonic-gate zone_mount_count(const char *rootpath)
38310Sstevel@tonic-gate {
38320Sstevel@tonic-gate vfs_t *vfsp;
38330Sstevel@tonic-gate uint_t count = 0;
38340Sstevel@tonic-gate size_t rootpathlen = strlen(rootpath);
38350Sstevel@tonic-gate
38360Sstevel@tonic-gate /*
38370Sstevel@tonic-gate * Holding zonehash_lock prevents race conditions with
38380Sstevel@tonic-gate * vfs_list_add()/vfs_list_remove() since we serialize with
38390Sstevel@tonic-gate * zone_find_by_path().
38400Sstevel@tonic-gate */
38410Sstevel@tonic-gate ASSERT(MUTEX_HELD(&zonehash_lock));
38420Sstevel@tonic-gate /*
38430Sstevel@tonic-gate * The rootpath must end with a '/'
38440Sstevel@tonic-gate */
38450Sstevel@tonic-gate ASSERT(rootpath[rootpathlen - 1] == '/');
38460Sstevel@tonic-gate
38470Sstevel@tonic-gate /*
38480Sstevel@tonic-gate * This intentionally does not count the rootpath itself if that
38490Sstevel@tonic-gate * happens to be a mount point.
38500Sstevel@tonic-gate */
38510Sstevel@tonic-gate vfs_list_read_lock();
38520Sstevel@tonic-gate vfsp = rootvfs;
38530Sstevel@tonic-gate do {
38540Sstevel@tonic-gate if (strncmp(rootpath, refstr_value(vfsp->vfs_mntpt),
38550Sstevel@tonic-gate rootpathlen) == 0)
38560Sstevel@tonic-gate count++;
38570Sstevel@tonic-gate vfsp = vfsp->vfs_next;
38580Sstevel@tonic-gate } while (vfsp != rootvfs);
38590Sstevel@tonic-gate vfs_list_unlock();
38600Sstevel@tonic-gate return (count);
38610Sstevel@tonic-gate }
38620Sstevel@tonic-gate
38630Sstevel@tonic-gate /*
38640Sstevel@tonic-gate * Helper function to make sure that a zone created on 'rootpath'
38650Sstevel@tonic-gate * wouldn't end up containing other zones' rootpaths.
38660Sstevel@tonic-gate */
38670Sstevel@tonic-gate static boolean_t
zone_is_nested(const char * rootpath)38680Sstevel@tonic-gate zone_is_nested(const char *rootpath)
38690Sstevel@tonic-gate {
38700Sstevel@tonic-gate zone_t *zone;
38710Sstevel@tonic-gate size_t rootpathlen = strlen(rootpath);
38720Sstevel@tonic-gate size_t len;
38730Sstevel@tonic-gate
38740Sstevel@tonic-gate ASSERT(MUTEX_HELD(&zonehash_lock));
38750Sstevel@tonic-gate
38768799SDhanaraj.M@Sun.COM /*
38778799SDhanaraj.M@Sun.COM * zone_set_root() appended '/' and '\0' at the end of rootpath
38788799SDhanaraj.M@Sun.COM */
38798799SDhanaraj.M@Sun.COM if ((rootpathlen <= 3) && (rootpath[0] == '/') &&
38808799SDhanaraj.M@Sun.COM (rootpath[1] == '/') && (rootpath[2] == '\0'))
38818799SDhanaraj.M@Sun.COM return (B_TRUE);
38828799SDhanaraj.M@Sun.COM
38830Sstevel@tonic-gate for (zone = list_head(&zone_active); zone != NULL;
38840Sstevel@tonic-gate zone = list_next(&zone_active, zone)) {
38850Sstevel@tonic-gate if (zone == global_zone)
38860Sstevel@tonic-gate continue;
38870Sstevel@tonic-gate len = strlen(zone->zone_rootpath);
38880Sstevel@tonic-gate if (strncmp(rootpath, zone->zone_rootpath,
38890Sstevel@tonic-gate MIN(rootpathlen, len)) == 0)
38900Sstevel@tonic-gate return (B_TRUE);
38910Sstevel@tonic-gate }
38920Sstevel@tonic-gate return (B_FALSE);
38930Sstevel@tonic-gate }
38940Sstevel@tonic-gate
38950Sstevel@tonic-gate static int
zone_set_privset(zone_t * zone,const priv_set_t * zone_privs,size_t zone_privssz)3896813Sdp zone_set_privset(zone_t *zone, const priv_set_t *zone_privs,
3897813Sdp size_t zone_privssz)
38980Sstevel@tonic-gate {
389912820Sdp@eng.sun.com priv_set_t *privs;
39000Sstevel@tonic-gate
3901813Sdp if (zone_privssz < sizeof (priv_set_t))
390212820Sdp@eng.sun.com return (ENOMEM);
390312820Sdp@eng.sun.com
390412820Sdp@eng.sun.com privs = kmem_alloc(sizeof (priv_set_t), KM_SLEEP);
3905813Sdp
39060Sstevel@tonic-gate if (copyin(zone_privs, privs, sizeof (priv_set_t))) {
39070Sstevel@tonic-gate kmem_free(privs, sizeof (priv_set_t));
39080Sstevel@tonic-gate return (EFAULT);
39090Sstevel@tonic-gate }
39100Sstevel@tonic-gate
39110Sstevel@tonic-gate zone->zone_privset = privs;
39120Sstevel@tonic-gate return (0);
39130Sstevel@tonic-gate }
39140Sstevel@tonic-gate
39150Sstevel@tonic-gate /*
39160Sstevel@tonic-gate * We make creative use of nvlists to pass in rctls from userland. The list is
39170Sstevel@tonic-gate * a list of the following structures:
39180Sstevel@tonic-gate *
39190Sstevel@tonic-gate * (name = rctl_name, value = nvpair_list_array)
39200Sstevel@tonic-gate *
39210Sstevel@tonic-gate * Where each element of the nvpair_list_array is of the form:
39220Sstevel@tonic-gate *
39230Sstevel@tonic-gate * [(name = "privilege", value = RCPRIV_PRIVILEGED),
39240Sstevel@tonic-gate * (name = "limit", value = uint64_t),
39250Sstevel@tonic-gate * (name = "action", value = (RCTL_LOCAL_NOACTION || RCTL_LOCAL_DENY))]
39260Sstevel@tonic-gate */
39270Sstevel@tonic-gate static int
parse_rctls(caddr_t ubuf,size_t buflen,nvlist_t ** nvlp)39280Sstevel@tonic-gate parse_rctls(caddr_t ubuf, size_t buflen, nvlist_t **nvlp)
39290Sstevel@tonic-gate {
39300Sstevel@tonic-gate nvpair_t *nvp = NULL;
39310Sstevel@tonic-gate nvlist_t *nvl = NULL;
39320Sstevel@tonic-gate char *kbuf;
39330Sstevel@tonic-gate int error;
39340Sstevel@tonic-gate rctl_val_t rv;
39350Sstevel@tonic-gate
39360Sstevel@tonic-gate *nvlp = NULL;
39370Sstevel@tonic-gate
39380Sstevel@tonic-gate if (buflen == 0)
39390Sstevel@tonic-gate return (0);
39400Sstevel@tonic-gate
39410Sstevel@tonic-gate if ((kbuf = kmem_alloc(buflen, KM_NOSLEEP)) == NULL)
39420Sstevel@tonic-gate return (ENOMEM);
39430Sstevel@tonic-gate if (copyin(ubuf, kbuf, buflen)) {
39440Sstevel@tonic-gate error = EFAULT;
39450Sstevel@tonic-gate goto out;
39460Sstevel@tonic-gate }
39470Sstevel@tonic-gate if (nvlist_unpack(kbuf, buflen, &nvl, KM_SLEEP) != 0) {
39480Sstevel@tonic-gate /*
39490Sstevel@tonic-gate * nvl may have been allocated/free'd, but the value set to
39500Sstevel@tonic-gate * non-NULL, so we reset it here.
39510Sstevel@tonic-gate */
39520Sstevel@tonic-gate nvl = NULL;
39530Sstevel@tonic-gate error = EINVAL;
39540Sstevel@tonic-gate goto out;
39550Sstevel@tonic-gate }
39560Sstevel@tonic-gate while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
39570Sstevel@tonic-gate rctl_dict_entry_t *rde;
39580Sstevel@tonic-gate rctl_hndl_t hndl;
39590Sstevel@tonic-gate nvlist_t **nvlarray;
39600Sstevel@tonic-gate uint_t i, nelem;
39610Sstevel@tonic-gate char *name;
39620Sstevel@tonic-gate
39630Sstevel@tonic-gate error = EINVAL;
39640Sstevel@tonic-gate name = nvpair_name(nvp);
39650Sstevel@tonic-gate if (strncmp(nvpair_name(nvp), "zone.", sizeof ("zone.") - 1)
39660Sstevel@tonic-gate != 0 || nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) {
39670Sstevel@tonic-gate goto out;
39680Sstevel@tonic-gate }
39690Sstevel@tonic-gate if ((hndl = rctl_hndl_lookup(name)) == -1) {
39700Sstevel@tonic-gate goto out;
39710Sstevel@tonic-gate }
39720Sstevel@tonic-gate rde = rctl_dict_lookup_hndl(hndl);
39730Sstevel@tonic-gate error = nvpair_value_nvlist_array(nvp, &nvlarray, &nelem);
39740Sstevel@tonic-gate ASSERT(error == 0);
39750Sstevel@tonic-gate for (i = 0; i < nelem; i++) {
39760Sstevel@tonic-gate if (error = nvlist2rctlval(nvlarray[i], &rv))
39770Sstevel@tonic-gate goto out;
39780Sstevel@tonic-gate }
39790Sstevel@tonic-gate if (rctl_invalid_value(rde, &rv)) {
39800Sstevel@tonic-gate error = EINVAL;
39810Sstevel@tonic-gate goto out;
39820Sstevel@tonic-gate }
39830Sstevel@tonic-gate }
39840Sstevel@tonic-gate error = 0;
39850Sstevel@tonic-gate *nvlp = nvl;
39860Sstevel@tonic-gate out:
39870Sstevel@tonic-gate kmem_free(kbuf, buflen);
39880Sstevel@tonic-gate if (error && nvl != NULL)
39890Sstevel@tonic-gate nvlist_free(nvl);
39900Sstevel@tonic-gate return (error);
39910Sstevel@tonic-gate }
39920Sstevel@tonic-gate
39930Sstevel@tonic-gate int
zone_create_error(int er_error,int er_ext,int * er_out)39940Sstevel@tonic-gate zone_create_error(int er_error, int er_ext, int *er_out) {
39950Sstevel@tonic-gate if (er_out != NULL) {
39960Sstevel@tonic-gate if (copyout(&er_ext, er_out, sizeof (int))) {
39970Sstevel@tonic-gate return (set_errno(EFAULT));
39980Sstevel@tonic-gate }
39990Sstevel@tonic-gate }
40000Sstevel@tonic-gate return (set_errno(er_error));
40010Sstevel@tonic-gate }
40020Sstevel@tonic-gate
40031676Sjpk static int
zone_set_label(zone_t * zone,const bslabel_t * lab,uint32_t doi)40041676Sjpk zone_set_label(zone_t *zone, const bslabel_t *lab, uint32_t doi)
40051676Sjpk {
40061676Sjpk ts_label_t *tsl;
40071676Sjpk bslabel_t blab;
40081676Sjpk
40091676Sjpk /* Get label from user */
40101676Sjpk if (copyin(lab, &blab, sizeof (blab)) != 0)
40111676Sjpk return (EFAULT);
40121676Sjpk tsl = labelalloc(&blab, doi, KM_NOSLEEP);
40131676Sjpk if (tsl == NULL)
40141676Sjpk return (ENOMEM);
40151676Sjpk
40161676Sjpk zone->zone_slabel = tsl;
40171676Sjpk return (0);
40181676Sjpk }
40191676Sjpk
40200Sstevel@tonic-gate /*
4021789Sahrens * Parses a comma-separated list of ZFS datasets into a per-zone dictionary.
4022789Sahrens */
4023789Sahrens static int
parse_zfs(zone_t * zone,caddr_t ubuf,size_t buflen)4024789Sahrens parse_zfs(zone_t *zone, caddr_t ubuf, size_t buflen)
4025789Sahrens {
4026789Sahrens char *kbuf;
4027789Sahrens char *dataset, *next;
4028789Sahrens zone_dataset_t *zd;
4029789Sahrens size_t len;
4030789Sahrens
4031789Sahrens if (ubuf == NULL || buflen == 0)
4032789Sahrens return (0);
4033789Sahrens
4034789Sahrens if ((kbuf = kmem_alloc(buflen, KM_NOSLEEP)) == NULL)
4035789Sahrens return (ENOMEM);
4036789Sahrens
4037789Sahrens if (copyin(ubuf, kbuf, buflen) != 0) {
4038789Sahrens kmem_free(kbuf, buflen);
4039789Sahrens return (EFAULT);
4040789Sahrens }
4041789Sahrens
4042789Sahrens dataset = next = kbuf;
4043789Sahrens for (;;) {
4044789Sahrens zd = kmem_alloc(sizeof (zone_dataset_t), KM_SLEEP);
4045789Sahrens
4046789Sahrens next = strchr(dataset, ',');
4047789Sahrens
4048789Sahrens if (next == NULL)
4049789Sahrens len = strlen(dataset);
4050789Sahrens else
4051789Sahrens len = next - dataset;
4052789Sahrens
4053789Sahrens zd->zd_dataset = kmem_alloc(len + 1, KM_SLEEP);
4054789Sahrens bcopy(dataset, zd->zd_dataset, len);
4055789Sahrens zd->zd_dataset[len] = '\0';
4056789Sahrens
4057789Sahrens list_insert_head(&zone->zone_datasets, zd);
4058789Sahrens
4059789Sahrens if (next == NULL)
4060789Sahrens break;
4061789Sahrens
4062789Sahrens dataset = next + 1;
4063789Sahrens }
4064789Sahrens
4065789Sahrens kmem_free(kbuf, buflen);
4066789Sahrens return (0);
4067789Sahrens }
4068789Sahrens
4069789Sahrens /*
40700Sstevel@tonic-gate * System call to create/initialize a new zone named 'zone_name', rooted
40710Sstevel@tonic-gate * at 'zone_root', with a zone-wide privilege limit set of 'zone_privs',
40721676Sjpk * and initialized with the zone-wide rctls described in 'rctlbuf', and
40731676Sjpk * with labeling set by 'match', 'doi', and 'label'.
40740Sstevel@tonic-gate *
40750Sstevel@tonic-gate * If extended error is non-null, we may use it to return more detailed
40760Sstevel@tonic-gate * error information.
40770Sstevel@tonic-gate */
40780Sstevel@tonic-gate static zoneid_t
zone_create(const char * zone_name,const char * zone_root,const priv_set_t * zone_privs,size_t zone_privssz,caddr_t rctlbuf,size_t rctlbufsz,caddr_t zfsbuf,size_t zfsbufsz,int * extended_error,int match,uint32_t doi,const bslabel_t * label,int flags)40790Sstevel@tonic-gate zone_create(const char *zone_name, const char *zone_root,
4080813Sdp const priv_set_t *zone_privs, size_t zone_privssz,
4081813Sdp caddr_t rctlbuf, size_t rctlbufsz,
40821676Sjpk caddr_t zfsbuf, size_t zfsbufsz, int *extended_error,
40833448Sdh155122 int match, uint32_t doi, const bslabel_t *label,
40843448Sdh155122 int flags)
40850Sstevel@tonic-gate {
40860Sstevel@tonic-gate struct zsched_arg zarg;
40870Sstevel@tonic-gate nvlist_t *rctls = NULL;
40880Sstevel@tonic-gate proc_t *pp = curproc;
40890Sstevel@tonic-gate zone_t *zone, *ztmp;
40900Sstevel@tonic-gate zoneid_t zoneid;
40910Sstevel@tonic-gate int error;
40920Sstevel@tonic-gate int error2 = 0;
40930Sstevel@tonic-gate char *str;
40940Sstevel@tonic-gate cred_t *zkcr;
40951769Scarlsonj boolean_t insert_label_hash;
40960Sstevel@tonic-gate
40970Sstevel@tonic-gate if (secpolicy_zone_config(CRED()) != 0)
40980Sstevel@tonic-gate return (set_errno(EPERM));
40990Sstevel@tonic-gate
41000Sstevel@tonic-gate /* can't boot zone from within chroot environment */
41010Sstevel@tonic-gate if (PTOU(pp)->u_rdir != NULL && PTOU(pp)->u_rdir != rootdir)
41020Sstevel@tonic-gate return (zone_create_error(ENOTSUP, ZE_CHROOTED,
4103813Sdp extended_error));
41040Sstevel@tonic-gate
41050Sstevel@tonic-gate zone = kmem_zalloc(sizeof (zone_t), KM_SLEEP);
41060Sstevel@tonic-gate zoneid = zone->zone_id = id_alloc(zoneid_space);
41070Sstevel@tonic-gate zone->zone_status = ZONE_IS_UNINITIALIZED;
41080Sstevel@tonic-gate zone->zone_pool = pool_default;
41090Sstevel@tonic-gate zone->zone_pool_mod = gethrtime();
41100Sstevel@tonic-gate zone->zone_psetid = ZONE_PS_INVAL;
41110Sstevel@tonic-gate zone->zone_ncpus = 0;
41120Sstevel@tonic-gate zone->zone_ncpus_online = 0;
41132712Snn35248 zone->zone_restart_init = B_TRUE;
41142712Snn35248 zone->zone_brand = &native_brand;
41152712Snn35248 zone->zone_initname = NULL;
41160Sstevel@tonic-gate mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL);
41170Sstevel@tonic-gate mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
41183247Sgjelinek mutex_init(&zone->zone_mem_lock, NULL, MUTEX_DEFAULT, NULL);
41190Sstevel@tonic-gate cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL);
4120*13096SJordan.Vaughan@Sun.com list_create(&zone->zone_ref_list, sizeof (zone_ref_t),
4121*13096SJordan.Vaughan@Sun.com offsetof(zone_ref_t, zref_linkage));
41220Sstevel@tonic-gate list_create(&zone->zone_zsd, sizeof (struct zsd_entry),
41230Sstevel@tonic-gate offsetof(struct zsd_entry, zsd_linkage));
4124789Sahrens list_create(&zone->zone_datasets, sizeof (zone_dataset_t),
4125789Sahrens offsetof(zone_dataset_t, zd_linkage));
412610616SSebastien.Roy@Sun.COM list_create(&zone->zone_dl_list, sizeof (zone_dl_t),
412710616SSebastien.Roy@Sun.COM offsetof(zone_dl_t, zdl_linkage));
41281676Sjpk rw_init(&zone->zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL);
412910910SRobert.Harris@Sun.COM rw_init(&zone->zone_mntfs_db_lock, NULL, RW_DEFAULT, NULL);
41300Sstevel@tonic-gate
41313448Sdh155122 if (flags & ZCF_NET_EXCL) {
41323448Sdh155122 zone->zone_flags |= ZF_NET_EXCL;
41333448Sdh155122 }
41343448Sdh155122
41350Sstevel@tonic-gate if ((error = zone_set_name(zone, zone_name)) != 0) {
41360Sstevel@tonic-gate zone_free(zone);
41370Sstevel@tonic-gate return (zone_create_error(error, 0, extended_error));
41380Sstevel@tonic-gate }
41390Sstevel@tonic-gate
41400Sstevel@tonic-gate if ((error = zone_set_root(zone, zone_root)) != 0) {
41410Sstevel@tonic-gate zone_free(zone);
41420Sstevel@tonic-gate return (zone_create_error(error, 0, extended_error));
41430Sstevel@tonic-gate }
4144813Sdp if ((error = zone_set_privset(zone, zone_privs, zone_privssz)) != 0) {
41450Sstevel@tonic-gate zone_free(zone);
41460Sstevel@tonic-gate return (zone_create_error(error, 0, extended_error));
41470Sstevel@tonic-gate }
41480Sstevel@tonic-gate
41490Sstevel@tonic-gate /* initialize node name to be the same as zone name */
41500Sstevel@tonic-gate zone->zone_nodename = kmem_alloc(_SYS_NMLN, KM_SLEEP);
41510Sstevel@tonic-gate (void) strncpy(zone->zone_nodename, zone->zone_name, _SYS_NMLN);
41520Sstevel@tonic-gate zone->zone_nodename[_SYS_NMLN - 1] = '\0';
41530Sstevel@tonic-gate
41540Sstevel@tonic-gate zone->zone_domain = kmem_alloc(_SYS_NMLN, KM_SLEEP);
41550Sstevel@tonic-gate zone->zone_domain[0] = '\0';
41568662SJordan.Vaughan@Sun.com zone->zone_hostid = HW_INVALID_HOSTID;
41570Sstevel@tonic-gate zone->zone_shares = 1;
41582677Sml93401 zone->zone_shmmax = 0;
41592677Sml93401 zone->zone_ipc.ipcq_shmmni = 0;
41602677Sml93401 zone->zone_ipc.ipcq_semmni = 0;
41612677Sml93401 zone->zone_ipc.ipcq_msgmni = 0;
41620Sstevel@tonic-gate zone->zone_bootargs = NULL;
416312633Sjohn.levon@sun.com zone->zone_fs_allowed = NULL;
41642267Sdp zone->zone_initname =
41652267Sdp kmem_alloc(strlen(zone_default_initname) + 1, KM_SLEEP);
41662267Sdp (void) strcpy(zone->zone_initname, zone_default_initname);
41673247Sgjelinek zone->zone_nlwps = 0;
41683247Sgjelinek zone->zone_nlwps_ctl = INT_MAX;
416912725SMenno.Lageman@Sun.COM zone->zone_nprocs = 0;
417012725SMenno.Lageman@Sun.COM zone->zone_nprocs_ctl = INT_MAX;
41712768Ssl108498 zone->zone_locked_mem = 0;
41722768Ssl108498 zone->zone_locked_mem_ctl = UINT64_MAX;
41733247Sgjelinek zone->zone_max_swap = 0;
41743247Sgjelinek zone->zone_max_swap_ctl = UINT64_MAX;
417512633Sjohn.levon@sun.com zone->zone_max_lofi = 0;
417612633Sjohn.levon@sun.com zone->zone_max_lofi_ctl = UINT64_MAX;
41773247Sgjelinek zone0.zone_lockedmem_kstat = NULL;
41783247Sgjelinek zone0.zone_swapresv_kstat = NULL;
41790Sstevel@tonic-gate
41800Sstevel@tonic-gate /*
41810Sstevel@tonic-gate * Zsched initializes the rctls.
41820Sstevel@tonic-gate */
41830Sstevel@tonic-gate zone->zone_rctls = NULL;
41840Sstevel@tonic-gate
41850Sstevel@tonic-gate if ((error = parse_rctls(rctlbuf, rctlbufsz, &rctls)) != 0) {
41860Sstevel@tonic-gate zone_free(zone);
41870Sstevel@tonic-gate return (zone_create_error(error, 0, extended_error));
41880Sstevel@tonic-gate }
41890Sstevel@tonic-gate
4190789Sahrens if ((error = parse_zfs(zone, zfsbuf, zfsbufsz)) != 0) {
4191789Sahrens zone_free(zone);
4192789Sahrens return (set_errno(error));
4193789Sahrens }
4194789Sahrens
41950Sstevel@tonic-gate /*
41961676Sjpk * Read in the trusted system parameters:
41971676Sjpk * match flag and sensitivity label.
41981676Sjpk */
41991676Sjpk zone->zone_match = match;
42001769Scarlsonj if (is_system_labeled() && !(zone->zone_flags & ZF_IS_SCRATCH)) {
42014462Skp158701 /* Fail if requested to set doi to anything but system's doi */
42024462Skp158701 if (doi != 0 && doi != default_doi) {
42034462Skp158701 zone_free(zone);
42044462Skp158701 return (set_errno(EINVAL));
42054462Skp158701 }
42064462Skp158701 /* Always apply system's doi to the zone */
42074462Skp158701 error = zone_set_label(zone, label, default_doi);
42081676Sjpk if (error != 0) {
42091676Sjpk zone_free(zone);
42101676Sjpk return (set_errno(error));
42111676Sjpk }
42121769Scarlsonj insert_label_hash = B_TRUE;
42131676Sjpk } else {
42141676Sjpk /* all zones get an admin_low label if system is not labeled */
42151676Sjpk zone->zone_slabel = l_admin_low;
42161676Sjpk label_hold(l_admin_low);
42171769Scarlsonj insert_label_hash = B_FALSE;
42181676Sjpk }
42191676Sjpk
42201676Sjpk /*
42210Sstevel@tonic-gate * Stop all lwps since that's what normally happens as part of fork().
42220Sstevel@tonic-gate * This needs to happen before we grab any locks to avoid deadlock
42230Sstevel@tonic-gate * (another lwp in the process could be waiting for the held lock).
42240Sstevel@tonic-gate */
42250Sstevel@tonic-gate if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK)) {
42260Sstevel@tonic-gate zone_free(zone);
42270Sstevel@tonic-gate if (rctls)
42280Sstevel@tonic-gate nvlist_free(rctls);
42290Sstevel@tonic-gate return (zone_create_error(error, 0, extended_error));
42300Sstevel@tonic-gate }
42310Sstevel@tonic-gate
42320Sstevel@tonic-gate if (block_mounts() == 0) {
42330Sstevel@tonic-gate mutex_enter(&pp->p_lock);
42340Sstevel@tonic-gate if (curthread != pp->p_agenttp)
42350Sstevel@tonic-gate continuelwps(pp);
42360Sstevel@tonic-gate mutex_exit(&pp->p_lock);
42370Sstevel@tonic-gate zone_free(zone);
42380Sstevel@tonic-gate if (rctls)
42390Sstevel@tonic-gate nvlist_free(rctls);
42400Sstevel@tonic-gate return (zone_create_error(error, 0, extended_error));
42410Sstevel@tonic-gate }
42420Sstevel@tonic-gate
42430Sstevel@tonic-gate /*
42440Sstevel@tonic-gate * Set up credential for kernel access. After this, any errors
42450Sstevel@tonic-gate * should go through the dance in errout rather than calling
42460Sstevel@tonic-gate * zone_free directly.
42470Sstevel@tonic-gate */
42480Sstevel@tonic-gate zone->zone_kcred = crdup(kcred);
42490Sstevel@tonic-gate crsetzone(zone->zone_kcred, zone);
42500Sstevel@tonic-gate priv_intersect(zone->zone_privset, &CR_PPRIV(zone->zone_kcred));
42510Sstevel@tonic-gate priv_intersect(zone->zone_privset, &CR_EPRIV(zone->zone_kcred));
42520Sstevel@tonic-gate priv_intersect(zone->zone_privset, &CR_IPRIV(zone->zone_kcred));
42530Sstevel@tonic-gate priv_intersect(zone->zone_privset, &CR_LPRIV(zone->zone_kcred));
42540Sstevel@tonic-gate
42550Sstevel@tonic-gate mutex_enter(&zonehash_lock);
42560Sstevel@tonic-gate /*
42570Sstevel@tonic-gate * Make sure zone doesn't already exist.
42581676Sjpk *
42591676Sjpk * If the system and zone are labeled,
42601676Sjpk * make sure no other zone exists that has the same label.
42610Sstevel@tonic-gate */
42621676Sjpk if ((ztmp = zone_find_all_by_name(zone->zone_name)) != NULL ||
42631769Scarlsonj (insert_label_hash &&
42641676Sjpk (ztmp = zone_find_all_by_label(zone->zone_slabel)) != NULL)) {
42650Sstevel@tonic-gate zone_status_t status;
42660Sstevel@tonic-gate
42670Sstevel@tonic-gate status = zone_status_get(ztmp);
42680Sstevel@tonic-gate if (status == ZONE_IS_READY || status == ZONE_IS_RUNNING)
42690Sstevel@tonic-gate error = EEXIST;
42700Sstevel@tonic-gate else
42710Sstevel@tonic-gate error = EBUSY;
42724791Ston
42734791Ston if (insert_label_hash)
42744791Ston error2 = ZE_LABELINUSE;
42754791Ston
42760Sstevel@tonic-gate goto errout;
42770Sstevel@tonic-gate }
42780Sstevel@tonic-gate
42790Sstevel@tonic-gate /*
42800Sstevel@tonic-gate * Don't allow zone creations which would cause one zone's rootpath to
42810Sstevel@tonic-gate * be accessible from that of another (non-global) zone.
42820Sstevel@tonic-gate */
42830Sstevel@tonic-gate if (zone_is_nested(zone->zone_rootpath)) {
42840Sstevel@tonic-gate error = EBUSY;
42850Sstevel@tonic-gate goto errout;
42860Sstevel@tonic-gate }
42870Sstevel@tonic-gate
42880Sstevel@tonic-gate ASSERT(zonecount != 0); /* check for leaks */
42890Sstevel@tonic-gate if (zonecount + 1 > maxzones) {
42900Sstevel@tonic-gate error = ENOMEM;
42910Sstevel@tonic-gate goto errout;
42920Sstevel@tonic-gate }
42930Sstevel@tonic-gate
42940Sstevel@tonic-gate if (zone_mount_count(zone->zone_rootpath) != 0) {
42950Sstevel@tonic-gate error = EBUSY;
42960Sstevel@tonic-gate error2 = ZE_AREMOUNTS;
42970Sstevel@tonic-gate goto errout;
42980Sstevel@tonic-gate }
42990Sstevel@tonic-gate
43000Sstevel@tonic-gate /*
43010Sstevel@tonic-gate * Zone is still incomplete, but we need to drop all locks while
43020Sstevel@tonic-gate * zsched() initializes this zone's kernel process. We
43030Sstevel@tonic-gate * optimistically add the zone to the hashtable and associated
43040Sstevel@tonic-gate * lists so a parallel zone_create() doesn't try to create the
43050Sstevel@tonic-gate * same zone.
43060Sstevel@tonic-gate */
43070Sstevel@tonic-gate zonecount++;
43080Sstevel@tonic-gate (void) mod_hash_insert(zonehashbyid,
43090Sstevel@tonic-gate (mod_hash_key_t)(uintptr_t)zone->zone_id,
43100Sstevel@tonic-gate (mod_hash_val_t)(uintptr_t)zone);
43110Sstevel@tonic-gate str = kmem_alloc(strlen(zone->zone_name) + 1, KM_SLEEP);
43120Sstevel@tonic-gate (void) strcpy(str, zone->zone_name);
43130Sstevel@tonic-gate (void) mod_hash_insert(zonehashbyname, (mod_hash_key_t)str,
43140Sstevel@tonic-gate (mod_hash_val_t)(uintptr_t)zone);
43151769Scarlsonj if (insert_label_hash) {
43161676Sjpk (void) mod_hash_insert(zonehashbylabel,
43171676Sjpk (mod_hash_key_t)zone->zone_slabel, (mod_hash_val_t)zone);
43181769Scarlsonj zone->zone_flags |= ZF_HASHED_LABEL;
43191676Sjpk }
43201676Sjpk
43210Sstevel@tonic-gate /*
43220Sstevel@tonic-gate * Insert into active list. At this point there are no 'hold's
43230Sstevel@tonic-gate * on the zone, but everyone else knows not to use it, so we can
43240Sstevel@tonic-gate * continue to use it. zsched() will do a zone_hold() if the
43250Sstevel@tonic-gate * newproc() is successful.
43260Sstevel@tonic-gate */
43270Sstevel@tonic-gate list_insert_tail(&zone_active, zone);
43280Sstevel@tonic-gate mutex_exit(&zonehash_lock);
43290Sstevel@tonic-gate
43300Sstevel@tonic-gate zarg.zone = zone;
43310Sstevel@tonic-gate zarg.nvlist = rctls;
43320Sstevel@tonic-gate /*
43330Sstevel@tonic-gate * The process, task, and project rctls are probably wrong;
43340Sstevel@tonic-gate * we need an interface to get the default values of all rctls,
43350Sstevel@tonic-gate * and initialize zsched appropriately. I'm not sure that that
43360Sstevel@tonic-gate * makes much of a difference, though.
43370Sstevel@tonic-gate */
433811173SJonathan.Adams@Sun.COM error = newproc(zsched, (void *)&zarg, syscid, minclsyspri, NULL, 0);
433911173SJonathan.Adams@Sun.COM if (error != 0) {
43400Sstevel@tonic-gate /*
43410Sstevel@tonic-gate * We need to undo all globally visible state.
43420Sstevel@tonic-gate */
43430Sstevel@tonic-gate mutex_enter(&zonehash_lock);
43440Sstevel@tonic-gate list_remove(&zone_active, zone);
43451769Scarlsonj if (zone->zone_flags & ZF_HASHED_LABEL) {
43461676Sjpk ASSERT(zone->zone_slabel != NULL);
43471676Sjpk (void) mod_hash_destroy(zonehashbylabel,
43481676Sjpk (mod_hash_key_t)zone->zone_slabel);
43491676Sjpk }
43500Sstevel@tonic-gate (void) mod_hash_destroy(zonehashbyname,
43510Sstevel@tonic-gate (mod_hash_key_t)(uintptr_t)zone->zone_name);
43520Sstevel@tonic-gate (void) mod_hash_destroy(zonehashbyid,
43530Sstevel@tonic-gate (mod_hash_key_t)(uintptr_t)zone->zone_id);
43540Sstevel@tonic-gate ASSERT(zonecount > 1);
43550Sstevel@tonic-gate zonecount--;
43560Sstevel@tonic-gate goto errout;
43570Sstevel@tonic-gate }
43580Sstevel@tonic-gate
43590Sstevel@tonic-gate /*
43600Sstevel@tonic-gate * Zone creation can't fail from now on.
43610Sstevel@tonic-gate */
43620Sstevel@tonic-gate
43630Sstevel@tonic-gate /*
43643247Sgjelinek * Create zone kstats
43653247Sgjelinek */
43663247Sgjelinek zone_kstat_create(zone);
43673247Sgjelinek
43683247Sgjelinek /*
43690Sstevel@tonic-gate * Let the other lwps continue.
43700Sstevel@tonic-gate */
43710Sstevel@tonic-gate mutex_enter(&pp->p_lock);
43720Sstevel@tonic-gate if (curthread != pp->p_agenttp)
43730Sstevel@tonic-gate continuelwps(pp);
43740Sstevel@tonic-gate mutex_exit(&pp->p_lock);
43750Sstevel@tonic-gate
43760Sstevel@tonic-gate /*
43770Sstevel@tonic-gate * Wait for zsched to finish initializing the zone.
43780Sstevel@tonic-gate */
43790Sstevel@tonic-gate zone_status_wait(zone, ZONE_IS_READY);
43800Sstevel@tonic-gate /*
43810Sstevel@tonic-gate * The zone is fully visible, so we can let mounts progress.
43820Sstevel@tonic-gate */
43830Sstevel@tonic-gate resume_mounts();
43840Sstevel@tonic-gate if (rctls)
43850Sstevel@tonic-gate nvlist_free(rctls);
43860Sstevel@tonic-gate
43870Sstevel@tonic-gate return (zoneid);
43880Sstevel@tonic-gate
43890Sstevel@tonic-gate errout:
43900Sstevel@tonic-gate mutex_exit(&zonehash_lock);
43910Sstevel@tonic-gate /*
43920Sstevel@tonic-gate * Let the other lwps continue.
43930Sstevel@tonic-gate */
43940Sstevel@tonic-gate mutex_enter(&pp->p_lock);
43950Sstevel@tonic-gate if (curthread != pp->p_agenttp)
43960Sstevel@tonic-gate continuelwps(pp);
43970Sstevel@tonic-gate mutex_exit(&pp->p_lock);
43980Sstevel@tonic-gate
43990Sstevel@tonic-gate resume_mounts();
44000Sstevel@tonic-gate if (rctls)
44010Sstevel@tonic-gate nvlist_free(rctls);
44020Sstevel@tonic-gate /*
44030Sstevel@tonic-gate * There is currently one reference to the zone, a cred_ref from
44040Sstevel@tonic-gate * zone_kcred. To free the zone, we call crfree, which will call
44050Sstevel@tonic-gate * zone_cred_rele, which will call zone_free.
44060Sstevel@tonic-gate */
4407*13096SJordan.Vaughan@Sun.com ASSERT(zone->zone_cred_ref == 1);
44080Sstevel@tonic-gate ASSERT(zone->zone_kcred->cr_ref == 1);
44090Sstevel@tonic-gate ASSERT(zone->zone_ref == 0);
44100Sstevel@tonic-gate zkcr = zone->zone_kcred;
44110Sstevel@tonic-gate zone->zone_kcred = NULL;
44120Sstevel@tonic-gate crfree(zkcr); /* triggers call to zone_free */
44130Sstevel@tonic-gate return (zone_create_error(error, error2, extended_error));
44140Sstevel@tonic-gate }
44150Sstevel@tonic-gate
44160Sstevel@tonic-gate /*
44170Sstevel@tonic-gate * Cause the zone to boot. This is pretty simple, since we let zoneadmd do
44182267Sdp * the heavy lifting. initname is the path to the program to launch
44192267Sdp * at the "top" of the zone; if this is NULL, we use the system default,
44202267Sdp * which is stored at zone_default_initname.
44210Sstevel@tonic-gate */
44220Sstevel@tonic-gate static int
zone_boot(zoneid_t zoneid)44232267Sdp zone_boot(zoneid_t zoneid)
44240Sstevel@tonic-gate {
44250Sstevel@tonic-gate int err;
44260Sstevel@tonic-gate zone_t *zone;
44270Sstevel@tonic-gate
44280Sstevel@tonic-gate if (secpolicy_zone_config(CRED()) != 0)
44290Sstevel@tonic-gate return (set_errno(EPERM));
44300Sstevel@tonic-gate if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
44310Sstevel@tonic-gate return (set_errno(EINVAL));
44320Sstevel@tonic-gate
44330Sstevel@tonic-gate mutex_enter(&zonehash_lock);
44340Sstevel@tonic-gate /*
44350Sstevel@tonic-gate * Look for zone under hash lock to prevent races with calls to
44360Sstevel@tonic-gate * zone_shutdown, zone_destroy, etc.
44370Sstevel@tonic-gate */
44380Sstevel@tonic-gate if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
44390Sstevel@tonic-gate mutex_exit(&zonehash_lock);
44400Sstevel@tonic-gate return (set_errno(EINVAL));
44410Sstevel@tonic-gate }
44420Sstevel@tonic-gate
44430Sstevel@tonic-gate mutex_enter(&zone_status_lock);
44440Sstevel@tonic-gate if (zone_status_get(zone) != ZONE_IS_READY) {
44450Sstevel@tonic-gate mutex_exit(&zone_status_lock);
44460Sstevel@tonic-gate mutex_exit(&zonehash_lock);
44470Sstevel@tonic-gate return (set_errno(EINVAL));
44480Sstevel@tonic-gate }
44490Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_BOOTING);
44500Sstevel@tonic-gate mutex_exit(&zone_status_lock);
44510Sstevel@tonic-gate
44520Sstevel@tonic-gate zone_hold(zone); /* so we can use the zone_t later */
44530Sstevel@tonic-gate mutex_exit(&zonehash_lock);
44540Sstevel@tonic-gate
44550Sstevel@tonic-gate if (zone_status_wait_sig(zone, ZONE_IS_RUNNING) == 0) {
44560Sstevel@tonic-gate zone_rele(zone);
44570Sstevel@tonic-gate return (set_errno(EINTR));
44580Sstevel@tonic-gate }
44590Sstevel@tonic-gate
44600Sstevel@tonic-gate /*
44610Sstevel@tonic-gate * Boot (starting init) might have failed, in which case the zone
44620Sstevel@tonic-gate * will go to the SHUTTING_DOWN state; an appropriate errno will
44630Sstevel@tonic-gate * be placed in zone->zone_boot_err, and so we return that.
44640Sstevel@tonic-gate */
44650Sstevel@tonic-gate err = zone->zone_boot_err;
44660Sstevel@tonic-gate zone_rele(zone);
44670Sstevel@tonic-gate return (err ? set_errno(err) : 0);
44680Sstevel@tonic-gate }
44690Sstevel@tonic-gate
44700Sstevel@tonic-gate /*
44710Sstevel@tonic-gate * Kills all user processes in the zone, waiting for them all to exit
44720Sstevel@tonic-gate * before returning.
44730Sstevel@tonic-gate */
44740Sstevel@tonic-gate static int
zone_empty(zone_t * zone)44750Sstevel@tonic-gate zone_empty(zone_t *zone)
44760Sstevel@tonic-gate {
44770Sstevel@tonic-gate int waitstatus;
44780Sstevel@tonic-gate
44790Sstevel@tonic-gate /*
44800Sstevel@tonic-gate * We need to drop zonehash_lock before killing all
44810Sstevel@tonic-gate * processes, otherwise we'll deadlock with zone_find_*
44820Sstevel@tonic-gate * which can be called from the exit path.
44830Sstevel@tonic-gate */
44840Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
448511066Srafael.vanoni@sun.com while ((waitstatus = zone_status_timedwait_sig(zone,
448611066Srafael.vanoni@sun.com ddi_get_lbolt() + hz, ZONE_IS_EMPTY)) == -1) {
44870Sstevel@tonic-gate killall(zone->zone_id);
44880Sstevel@tonic-gate }
44890Sstevel@tonic-gate /*
44900Sstevel@tonic-gate * return EINTR if we were signaled
44910Sstevel@tonic-gate */
44920Sstevel@tonic-gate if (waitstatus == 0)
44930Sstevel@tonic-gate return (EINTR);
44940Sstevel@tonic-gate return (0);
44950Sstevel@tonic-gate }
44960Sstevel@tonic-gate
44970Sstevel@tonic-gate /*
44981676Sjpk * This function implements the policy for zone visibility.
44991676Sjpk *
45001676Sjpk * In standard Solaris, a non-global zone can only see itself.
45011676Sjpk *
45021676Sjpk * In Trusted Extensions, a labeled zone can lookup any zone whose label
45031676Sjpk * it dominates. For this test, the label of the global zone is treated as
45041676Sjpk * admin_high so it is special-cased instead of being checked for dominance.
45051676Sjpk *
45061676Sjpk * Returns true if zone attributes are viewable, false otherwise.
45071676Sjpk */
45081676Sjpk static boolean_t
zone_list_access(zone_t * zone)45091676Sjpk zone_list_access(zone_t *zone)
45101676Sjpk {
45111676Sjpk
45121676Sjpk if (curproc->p_zone == global_zone ||
45131676Sjpk curproc->p_zone == zone) {
45141676Sjpk return (B_TRUE);
45151769Scarlsonj } else if (is_system_labeled() && !(zone->zone_flags & ZF_IS_SCRATCH)) {
45161676Sjpk bslabel_t *curproc_label;
45171676Sjpk bslabel_t *zone_label;
45181676Sjpk
45191676Sjpk curproc_label = label2bslabel(curproc->p_zone->zone_slabel);
45201676Sjpk zone_label = label2bslabel(zone->zone_slabel);
45211676Sjpk
45221676Sjpk if (zone->zone_id != GLOBAL_ZONEID &&
45231676Sjpk bldominates(curproc_label, zone_label)) {
45241676Sjpk return (B_TRUE);
45251676Sjpk } else {
45261676Sjpk return (B_FALSE);
45271676Sjpk }
45281676Sjpk } else {
45291676Sjpk return (B_FALSE);
45301676Sjpk }
45311676Sjpk }
45321676Sjpk
45331676Sjpk /*
45340Sstevel@tonic-gate * Systemcall to start the zone's halt sequence. By the time this
45350Sstevel@tonic-gate * function successfully returns, all user processes and kernel threads
45360Sstevel@tonic-gate * executing in it will have exited, ZSD shutdown callbacks executed,
45370Sstevel@tonic-gate * and the zone status set to ZONE_IS_DOWN.
45380Sstevel@tonic-gate *
45390Sstevel@tonic-gate * It is possible that the call will interrupt itself if the caller is the
45400Sstevel@tonic-gate * parent of any process running in the zone, and doesn't have SIGCHLD blocked.
45410Sstevel@tonic-gate */
45420Sstevel@tonic-gate static int
zone_shutdown(zoneid_t zoneid)45430Sstevel@tonic-gate zone_shutdown(zoneid_t zoneid)
45440Sstevel@tonic-gate {
45450Sstevel@tonic-gate int error;
45460Sstevel@tonic-gate zone_t *zone;
45470Sstevel@tonic-gate zone_status_t status;
45480Sstevel@tonic-gate
45490Sstevel@tonic-gate if (secpolicy_zone_config(CRED()) != 0)
45500Sstevel@tonic-gate return (set_errno(EPERM));
45510Sstevel@tonic-gate if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
45520Sstevel@tonic-gate return (set_errno(EINVAL));
45530Sstevel@tonic-gate
45540Sstevel@tonic-gate /*
45550Sstevel@tonic-gate * Block mounts so that VFS_MOUNT() can get an accurate view of
45560Sstevel@tonic-gate * the zone's status with regards to ZONE_IS_SHUTTING down.
45570Sstevel@tonic-gate *
45580Sstevel@tonic-gate * e.g. NFS can fail the mount if it determines that the zone
45590Sstevel@tonic-gate * has already begun the shutdown sequence.
45600Sstevel@tonic-gate */
45610Sstevel@tonic-gate if (block_mounts() == 0)
45620Sstevel@tonic-gate return (set_errno(EINTR));
45630Sstevel@tonic-gate mutex_enter(&zonehash_lock);
45640Sstevel@tonic-gate /*
45650Sstevel@tonic-gate * Look for zone under hash lock to prevent races with other
45660Sstevel@tonic-gate * calls to zone_shutdown and zone_destroy.
45670Sstevel@tonic-gate */
45680Sstevel@tonic-gate if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
45690Sstevel@tonic-gate mutex_exit(&zonehash_lock);
45700Sstevel@tonic-gate resume_mounts();
45710Sstevel@tonic-gate return (set_errno(EINVAL));
45720Sstevel@tonic-gate }
45730Sstevel@tonic-gate mutex_enter(&zone_status_lock);
45740Sstevel@tonic-gate status = zone_status_get(zone);
45750Sstevel@tonic-gate /*
45760Sstevel@tonic-gate * Fail if the zone isn't fully initialized yet.
45770Sstevel@tonic-gate */
45780Sstevel@tonic-gate if (status < ZONE_IS_READY) {
45790Sstevel@tonic-gate mutex_exit(&zone_status_lock);
45800Sstevel@tonic-gate mutex_exit(&zonehash_lock);
45810Sstevel@tonic-gate resume_mounts();
45820Sstevel@tonic-gate return (set_errno(EINVAL));
45830Sstevel@tonic-gate }
45840Sstevel@tonic-gate /*
45850Sstevel@tonic-gate * If conditions required for zone_shutdown() to return have been met,
45860Sstevel@tonic-gate * return success.
45870Sstevel@tonic-gate */
45880Sstevel@tonic-gate if (status >= ZONE_IS_DOWN) {
45890Sstevel@tonic-gate mutex_exit(&zone_status_lock);
45900Sstevel@tonic-gate mutex_exit(&zonehash_lock);
45910Sstevel@tonic-gate resume_mounts();
45920Sstevel@tonic-gate return (0);
45930Sstevel@tonic-gate }
45940Sstevel@tonic-gate /*
45950Sstevel@tonic-gate * If zone_shutdown() hasn't been called before, go through the motions.
45960Sstevel@tonic-gate * If it has, there's nothing to do but wait for the kernel threads to
45970Sstevel@tonic-gate * drain.
45980Sstevel@tonic-gate */
45990Sstevel@tonic-gate if (status < ZONE_IS_EMPTY) {
46000Sstevel@tonic-gate uint_t ntasks;
46010Sstevel@tonic-gate
46020Sstevel@tonic-gate mutex_enter(&zone->zone_lock);
46030Sstevel@tonic-gate if ((ntasks = zone->zone_ntasks) != 1) {
46040Sstevel@tonic-gate /*
46050Sstevel@tonic-gate * There's still stuff running.
46060Sstevel@tonic-gate */
46070Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_SHUTTING_DOWN);
46080Sstevel@tonic-gate }
46090Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
46100Sstevel@tonic-gate if (ntasks == 1) {
46110Sstevel@tonic-gate /*
46120Sstevel@tonic-gate * The only way to create another task is through
46130Sstevel@tonic-gate * zone_enter(), which will block until we drop
46140Sstevel@tonic-gate * zonehash_lock. The zone is empty.
46150Sstevel@tonic-gate */
46160Sstevel@tonic-gate if (zone->zone_kthreads == NULL) {
46170Sstevel@tonic-gate /*
46180Sstevel@tonic-gate * Skip ahead to ZONE_IS_DOWN
46190Sstevel@tonic-gate */
46200Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_DOWN);
46210Sstevel@tonic-gate } else {
46220Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_EMPTY);
46230Sstevel@tonic-gate }
46240Sstevel@tonic-gate }
46250Sstevel@tonic-gate }
46260Sstevel@tonic-gate zone_hold(zone); /* so we can use the zone_t later */
46270Sstevel@tonic-gate mutex_exit(&zone_status_lock);
46280Sstevel@tonic-gate mutex_exit(&zonehash_lock);
46290Sstevel@tonic-gate resume_mounts();
46300Sstevel@tonic-gate
46310Sstevel@tonic-gate if (error = zone_empty(zone)) {
46320Sstevel@tonic-gate zone_rele(zone);
46330Sstevel@tonic-gate return (set_errno(error));
46340Sstevel@tonic-gate }
46350Sstevel@tonic-gate /*
46360Sstevel@tonic-gate * After the zone status goes to ZONE_IS_DOWN this zone will no
46370Sstevel@tonic-gate * longer be notified of changes to the pools configuration, so
46380Sstevel@tonic-gate * in order to not end up with a stale pool pointer, we point
46390Sstevel@tonic-gate * ourselves at the default pool and remove all resource
46400Sstevel@tonic-gate * visibility. This is especially important as the zone_t may
46410Sstevel@tonic-gate * languish on the deathrow for a very long time waiting for
46420Sstevel@tonic-gate * cred's to drain out.
46430Sstevel@tonic-gate *
46440Sstevel@tonic-gate * This rebinding of the zone can happen multiple times
46450Sstevel@tonic-gate * (presumably due to interrupted or parallel systemcalls)
46460Sstevel@tonic-gate * without any adverse effects.
46470Sstevel@tonic-gate */
46480Sstevel@tonic-gate if (pool_lock_intr() != 0) {
46490Sstevel@tonic-gate zone_rele(zone);
46500Sstevel@tonic-gate return (set_errno(EINTR));
46510Sstevel@tonic-gate }
46520Sstevel@tonic-gate if (pool_state == POOL_ENABLED) {
46530Sstevel@tonic-gate mutex_enter(&cpu_lock);
46540Sstevel@tonic-gate zone_pool_set(zone, pool_default);
46550Sstevel@tonic-gate /*
46560Sstevel@tonic-gate * The zone no longer needs to be able to see any cpus.
46570Sstevel@tonic-gate */
46580Sstevel@tonic-gate zone_pset_set(zone, ZONE_PS_INVAL);
46590Sstevel@tonic-gate mutex_exit(&cpu_lock);
46600Sstevel@tonic-gate }
46610Sstevel@tonic-gate pool_unlock();
46620Sstevel@tonic-gate
46630Sstevel@tonic-gate /*
46640Sstevel@tonic-gate * ZSD shutdown callbacks can be executed multiple times, hence
46650Sstevel@tonic-gate * it is safe to not be holding any locks across this call.
46660Sstevel@tonic-gate */
46670Sstevel@tonic-gate zone_zsd_callbacks(zone, ZSD_SHUTDOWN);
46680Sstevel@tonic-gate
46690Sstevel@tonic-gate mutex_enter(&zone_status_lock);
46700Sstevel@tonic-gate if (zone->zone_kthreads == NULL && zone_status_get(zone) < ZONE_IS_DOWN)
46710Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_DOWN);
46720Sstevel@tonic-gate mutex_exit(&zone_status_lock);
46730Sstevel@tonic-gate
46740Sstevel@tonic-gate /*
46750Sstevel@tonic-gate * Wait for kernel threads to drain.
46760Sstevel@tonic-gate */
46770Sstevel@tonic-gate if (!zone_status_wait_sig(zone, ZONE_IS_DOWN)) {
46780Sstevel@tonic-gate zone_rele(zone);
46790Sstevel@tonic-gate return (set_errno(EINTR));
46800Sstevel@tonic-gate }
46812712Snn35248
46823671Ssl108498 /*
46833671Ssl108498 * Zone can be become down/destroyable even if the above wait
46843671Ssl108498 * returns EINTR, so any code added here may never execute.
46853671Ssl108498 * (i.e. don't add code here)
46863671Ssl108498 */
46872712Snn35248
46880Sstevel@tonic-gate zone_rele(zone);
46890Sstevel@tonic-gate return (0);
46900Sstevel@tonic-gate }
46910Sstevel@tonic-gate
46920Sstevel@tonic-gate /*
4693*13096SJordan.Vaughan@Sun.com * Log the specified zone's reference counts. The caller should not be
4694*13096SJordan.Vaughan@Sun.com * holding the zone's zone_lock.
4695*13096SJordan.Vaughan@Sun.com */
4696*13096SJordan.Vaughan@Sun.com static void
zone_log_refcounts(zone_t * zone)4697*13096SJordan.Vaughan@Sun.com zone_log_refcounts(zone_t *zone)
4698*13096SJordan.Vaughan@Sun.com {
4699*13096SJordan.Vaughan@Sun.com char *buffer;
4700*13096SJordan.Vaughan@Sun.com char *buffer_position;
4701*13096SJordan.Vaughan@Sun.com uint32_t buffer_size;
4702*13096SJordan.Vaughan@Sun.com uint32_t index;
4703*13096SJordan.Vaughan@Sun.com uint_t ref;
4704*13096SJordan.Vaughan@Sun.com uint_t cred_ref;
4705*13096SJordan.Vaughan@Sun.com
4706*13096SJordan.Vaughan@Sun.com /*
4707*13096SJordan.Vaughan@Sun.com * Construct a string representing the subsystem-specific reference
4708*13096SJordan.Vaughan@Sun.com * counts. The counts are printed in ascending order by index into the
4709*13096SJordan.Vaughan@Sun.com * zone_t::zone_subsys_ref array. The list will be surrounded by
4710*13096SJordan.Vaughan@Sun.com * square brackets [] and will only contain nonzero reference counts.
4711*13096SJordan.Vaughan@Sun.com *
4712*13096SJordan.Vaughan@Sun.com * The buffer will hold two square bracket characters plus ten digits,
4713*13096SJordan.Vaughan@Sun.com * one colon, one space, one comma, and some characters for a
4714*13096SJordan.Vaughan@Sun.com * subsystem name per subsystem-specific reference count. (Unsigned 32-
4715*13096SJordan.Vaughan@Sun.com * bit integers have at most ten decimal digits.) The last
4716*13096SJordan.Vaughan@Sun.com * reference count's comma is replaced by the closing square
4717*13096SJordan.Vaughan@Sun.com * bracket and a NULL character to terminate the string.
4718*13096SJordan.Vaughan@Sun.com *
4719*13096SJordan.Vaughan@Sun.com * NOTE: We have to grab the zone's zone_lock to create a consistent
4720*13096SJordan.Vaughan@Sun.com * snapshot of the zone's reference counters.
4721*13096SJordan.Vaughan@Sun.com *
4722*13096SJordan.Vaughan@Sun.com * First, figure out how much space the string buffer will need.
4723*13096SJordan.Vaughan@Sun.com * The buffer's size is stored in buffer_size.
4724*13096SJordan.Vaughan@Sun.com */
4725*13096SJordan.Vaughan@Sun.com buffer_size = 2; /* for the square brackets */
4726*13096SJordan.Vaughan@Sun.com mutex_enter(&zone->zone_lock);
4727*13096SJordan.Vaughan@Sun.com zone->zone_flags |= ZF_REFCOUNTS_LOGGED;
4728*13096SJordan.Vaughan@Sun.com ref = zone->zone_ref;
4729*13096SJordan.Vaughan@Sun.com cred_ref = zone->zone_cred_ref;
4730*13096SJordan.Vaughan@Sun.com for (index = 0; index < ZONE_REF_NUM_SUBSYS; ++index)
4731*13096SJordan.Vaughan@Sun.com if (zone->zone_subsys_ref[index] != 0)
4732*13096SJordan.Vaughan@Sun.com buffer_size += strlen(zone_ref_subsys_names[index]) +
4733*13096SJordan.Vaughan@Sun.com 13;
4734*13096SJordan.Vaughan@Sun.com if (buffer_size == 2) {
4735*13096SJordan.Vaughan@Sun.com /*
4736*13096SJordan.Vaughan@Sun.com * No subsystems had nonzero reference counts. Don't bother
4737*13096SJordan.Vaughan@Sun.com * with allocating a buffer; just log the general-purpose and
4738*13096SJordan.Vaughan@Sun.com * credential reference counts.
4739*13096SJordan.Vaughan@Sun.com */
4740*13096SJordan.Vaughan@Sun.com mutex_exit(&zone->zone_lock);
4741*13096SJordan.Vaughan@Sun.com (void) strlog(0, 0, 1, SL_CONSOLE | SL_NOTE,
4742*13096SJordan.Vaughan@Sun.com "Zone '%s' (ID: %d) is shutting down, but %u zone "
4743*13096SJordan.Vaughan@Sun.com "references and %u credential references are still extant",
4744*13096SJordan.Vaughan@Sun.com zone->zone_name, zone->zone_id, ref, cred_ref);
4745*13096SJordan.Vaughan@Sun.com return;
4746*13096SJordan.Vaughan@Sun.com }
4747*13096SJordan.Vaughan@Sun.com
4748*13096SJordan.Vaughan@Sun.com /*
4749*13096SJordan.Vaughan@Sun.com * buffer_size contains the exact number of characters that the
4750*13096SJordan.Vaughan@Sun.com * buffer will need. Allocate the buffer and fill it with nonzero
4751*13096SJordan.Vaughan@Sun.com * subsystem-specific reference counts. Surround the results with
4752*13096SJordan.Vaughan@Sun.com * square brackets afterwards.
4753*13096SJordan.Vaughan@Sun.com */
4754*13096SJordan.Vaughan@Sun.com buffer = kmem_alloc(buffer_size, KM_SLEEP);
4755*13096SJordan.Vaughan@Sun.com buffer_position = &buffer[1];
4756*13096SJordan.Vaughan@Sun.com for (index = 0; index < ZONE_REF_NUM_SUBSYS; ++index) {
4757*13096SJordan.Vaughan@Sun.com /*
4758*13096SJordan.Vaughan@Sun.com * NOTE: The DDI's version of sprintf() returns a pointer to
4759*13096SJordan.Vaughan@Sun.com * the modified buffer rather than the number of bytes written
4760*13096SJordan.Vaughan@Sun.com * (as in snprintf(3C)). This is unfortunate and annoying.
4761*13096SJordan.Vaughan@Sun.com * Therefore, we'll use snprintf() with INT_MAX to get the
4762*13096SJordan.Vaughan@Sun.com * number of bytes written. Using INT_MAX is safe because
4763*13096SJordan.Vaughan@Sun.com * the buffer is perfectly sized for the data: we'll never
4764*13096SJordan.Vaughan@Sun.com * overrun the buffer.
4765*13096SJordan.Vaughan@Sun.com */
4766*13096SJordan.Vaughan@Sun.com if (zone->zone_subsys_ref[index] != 0)
4767*13096SJordan.Vaughan@Sun.com buffer_position += snprintf(buffer_position, INT_MAX,
4768*13096SJordan.Vaughan@Sun.com "%s: %u,", zone_ref_subsys_names[index],
4769*13096SJordan.Vaughan@Sun.com zone->zone_subsys_ref[index]);
4770*13096SJordan.Vaughan@Sun.com }
4771*13096SJordan.Vaughan@Sun.com mutex_exit(&zone->zone_lock);
4772*13096SJordan.Vaughan@Sun.com buffer[0] = '[';
4773*13096SJordan.Vaughan@Sun.com ASSERT((uintptr_t)(buffer_position - buffer) < buffer_size);
4774*13096SJordan.Vaughan@Sun.com ASSERT(buffer_position[0] == '\0' && buffer_position[-1] == ',');
4775*13096SJordan.Vaughan@Sun.com buffer_position[-1] = ']';
4776*13096SJordan.Vaughan@Sun.com
4777*13096SJordan.Vaughan@Sun.com /*
4778*13096SJordan.Vaughan@Sun.com * Log the reference counts and free the message buffer.
4779*13096SJordan.Vaughan@Sun.com */
4780*13096SJordan.Vaughan@Sun.com (void) strlog(0, 0, 1, SL_CONSOLE | SL_NOTE,
4781*13096SJordan.Vaughan@Sun.com "Zone '%s' (ID: %d) is shutting down, but %u zone references and "
4782*13096SJordan.Vaughan@Sun.com "%u credential references are still extant %s", zone->zone_name,
4783*13096SJordan.Vaughan@Sun.com zone->zone_id, ref, cred_ref, buffer);
4784*13096SJordan.Vaughan@Sun.com kmem_free(buffer, buffer_size);
4785*13096SJordan.Vaughan@Sun.com }
4786*13096SJordan.Vaughan@Sun.com
4787*13096SJordan.Vaughan@Sun.com /*
47880Sstevel@tonic-gate * Systemcall entry point to finalize the zone halt process. The caller
47892677Sml93401 * must have already successfully called zone_shutdown().
47900Sstevel@tonic-gate *
47910Sstevel@tonic-gate * Upon successful completion, the zone will have been fully destroyed:
47920Sstevel@tonic-gate * zsched will have exited, destructor callbacks executed, and the zone
47930Sstevel@tonic-gate * removed from the list of active zones.
47940Sstevel@tonic-gate */
47950Sstevel@tonic-gate static int
zone_destroy(zoneid_t zoneid)47960Sstevel@tonic-gate zone_destroy(zoneid_t zoneid)
47970Sstevel@tonic-gate {
47980Sstevel@tonic-gate uint64_t uniqid;
47990Sstevel@tonic-gate zone_t *zone;
48000Sstevel@tonic-gate zone_status_t status;
4801*13096SJordan.Vaughan@Sun.com clock_t wait_time;
4802*13096SJordan.Vaughan@Sun.com boolean_t log_refcounts;
48030Sstevel@tonic-gate
48040Sstevel@tonic-gate if (secpolicy_zone_config(CRED()) != 0)
48050Sstevel@tonic-gate return (set_errno(EPERM));
48060Sstevel@tonic-gate if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
48070Sstevel@tonic-gate return (set_errno(EINVAL));
48080Sstevel@tonic-gate
48090Sstevel@tonic-gate mutex_enter(&zonehash_lock);
48100Sstevel@tonic-gate /*
48110Sstevel@tonic-gate * Look for zone under hash lock to prevent races with other
48120Sstevel@tonic-gate * calls to zone_destroy.
48130Sstevel@tonic-gate */
48140Sstevel@tonic-gate if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
48150Sstevel@tonic-gate mutex_exit(&zonehash_lock);
48160Sstevel@tonic-gate return (set_errno(EINVAL));
48170Sstevel@tonic-gate }
48180Sstevel@tonic-gate
48190Sstevel@tonic-gate if (zone_mount_count(zone->zone_rootpath) != 0) {
48200Sstevel@tonic-gate mutex_exit(&zonehash_lock);
48210Sstevel@tonic-gate return (set_errno(EBUSY));
48220Sstevel@tonic-gate }
48230Sstevel@tonic-gate mutex_enter(&zone_status_lock);
48240Sstevel@tonic-gate status = zone_status_get(zone);
48250Sstevel@tonic-gate if (status < ZONE_IS_DOWN) {
48260Sstevel@tonic-gate mutex_exit(&zone_status_lock);
48270Sstevel@tonic-gate mutex_exit(&zonehash_lock);
48280Sstevel@tonic-gate return (set_errno(EBUSY));
48290Sstevel@tonic-gate } else if (status == ZONE_IS_DOWN) {
48300Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_DYING); /* Tell zsched to exit */
48310Sstevel@tonic-gate }
48320Sstevel@tonic-gate mutex_exit(&zone_status_lock);
48330Sstevel@tonic-gate zone_hold(zone);
48340Sstevel@tonic-gate mutex_exit(&zonehash_lock);
48350Sstevel@tonic-gate
48360Sstevel@tonic-gate /*
48370Sstevel@tonic-gate * wait for zsched to exit
48380Sstevel@tonic-gate */
48390Sstevel@tonic-gate zone_status_wait(zone, ZONE_IS_DEAD);
48400Sstevel@tonic-gate zone_zsd_callbacks(zone, ZSD_DESTROY);
48413448Sdh155122 zone->zone_netstack = NULL;
48420Sstevel@tonic-gate uniqid = zone->zone_uniqid;
48430Sstevel@tonic-gate zone_rele(zone);
48440Sstevel@tonic-gate zone = NULL; /* potentially free'd */
48450Sstevel@tonic-gate
4846*13096SJordan.Vaughan@Sun.com log_refcounts = B_FALSE;
4847*13096SJordan.Vaughan@Sun.com wait_time = SEC_TO_TICK(ZONE_DESTROY_TIMEOUT_SECS);
48480Sstevel@tonic-gate mutex_enter(&zonehash_lock);
48490Sstevel@tonic-gate for (; /* ever */; ) {
48500Sstevel@tonic-gate boolean_t unref;
4851*13096SJordan.Vaughan@Sun.com boolean_t refs_have_been_logged;
48520Sstevel@tonic-gate
48530Sstevel@tonic-gate if ((zone = zone_find_all_by_id(zoneid)) == NULL ||
48540Sstevel@tonic-gate zone->zone_uniqid != uniqid) {
48550Sstevel@tonic-gate /*
48560Sstevel@tonic-gate * The zone has gone away. Necessary conditions
48570Sstevel@tonic-gate * are met, so we return success.
48580Sstevel@tonic-gate */
48590Sstevel@tonic-gate mutex_exit(&zonehash_lock);
48600Sstevel@tonic-gate return (0);
48610Sstevel@tonic-gate }
48620Sstevel@tonic-gate mutex_enter(&zone->zone_lock);
48630Sstevel@tonic-gate unref = ZONE_IS_UNREF(zone);
4864*13096SJordan.Vaughan@Sun.com refs_have_been_logged = (zone->zone_flags &
4865*13096SJordan.Vaughan@Sun.com ZF_REFCOUNTS_LOGGED);
48660Sstevel@tonic-gate mutex_exit(&zone->zone_lock);
48670Sstevel@tonic-gate if (unref) {
48680Sstevel@tonic-gate /*
48690Sstevel@tonic-gate * There is only one reference to the zone -- that
48700Sstevel@tonic-gate * added when the zone was added to the hashtables --
48710Sstevel@tonic-gate * and things will remain this way until we drop
48720Sstevel@tonic-gate * zonehash_lock... we can go ahead and cleanup the
48730Sstevel@tonic-gate * zone.
48740Sstevel@tonic-gate */
48750Sstevel@tonic-gate break;
48760Sstevel@tonic-gate }
48770Sstevel@tonic-gate
4878*13096SJordan.Vaughan@Sun.com /*
4879*13096SJordan.Vaughan@Sun.com * Wait for zone_rele_common() or zone_cred_rele() to signal
4880*13096SJordan.Vaughan@Sun.com * zone_destroy_cv. zone_destroy_cv is signaled only when
4881*13096SJordan.Vaughan@Sun.com * some zone's general-purpose reference count reaches one.
4882*13096SJordan.Vaughan@Sun.com * If ZONE_DESTROY_TIMEOUT_SECS seconds elapse while waiting
4883*13096SJordan.Vaughan@Sun.com * on zone_destroy_cv, then log the zone's reference counts and
4884*13096SJordan.Vaughan@Sun.com * continue to wait for zone_rele() and zone_cred_rele().
4885*13096SJordan.Vaughan@Sun.com */
4886*13096SJordan.Vaughan@Sun.com if (!refs_have_been_logged) {
4887*13096SJordan.Vaughan@Sun.com if (!log_refcounts) {
4888*13096SJordan.Vaughan@Sun.com /*
4889*13096SJordan.Vaughan@Sun.com * This thread hasn't timed out waiting on
4890*13096SJordan.Vaughan@Sun.com * zone_destroy_cv yet. Wait wait_time clock
4891*13096SJordan.Vaughan@Sun.com * ticks (initially ZONE_DESTROY_TIMEOUT_SECS
4892*13096SJordan.Vaughan@Sun.com * seconds) for the zone's references to clear.
4893*13096SJordan.Vaughan@Sun.com */
4894*13096SJordan.Vaughan@Sun.com ASSERT(wait_time > 0);
4895*13096SJordan.Vaughan@Sun.com wait_time = cv_reltimedwait_sig(
4896*13096SJordan.Vaughan@Sun.com &zone_destroy_cv, &zonehash_lock, wait_time,
4897*13096SJordan.Vaughan@Sun.com TR_SEC);
4898*13096SJordan.Vaughan@Sun.com if (wait_time > 0) {
4899*13096SJordan.Vaughan@Sun.com /*
4900*13096SJordan.Vaughan@Sun.com * A thread in zone_rele() or
4901*13096SJordan.Vaughan@Sun.com * zone_cred_rele() signaled
4902*13096SJordan.Vaughan@Sun.com * zone_destroy_cv before this thread's
4903*13096SJordan.Vaughan@Sun.com * wait timed out. The zone might have
4904*13096SJordan.Vaughan@Sun.com * only one reference left; find out!
4905*13096SJordan.Vaughan@Sun.com */
4906*13096SJordan.Vaughan@Sun.com continue;
4907*13096SJordan.Vaughan@Sun.com } else if (wait_time == 0) {
4908*13096SJordan.Vaughan@Sun.com /* The thread's process was signaled. */
4909*13096SJordan.Vaughan@Sun.com mutex_exit(&zonehash_lock);
4910*13096SJordan.Vaughan@Sun.com return (set_errno(EINTR));
4911*13096SJordan.Vaughan@Sun.com }
4912*13096SJordan.Vaughan@Sun.com
4913*13096SJordan.Vaughan@Sun.com /*
4914*13096SJordan.Vaughan@Sun.com * The thread timed out while waiting on
4915*13096SJordan.Vaughan@Sun.com * zone_destroy_cv. Even though the thread
4916*13096SJordan.Vaughan@Sun.com * timed out, it has to check whether another
4917*13096SJordan.Vaughan@Sun.com * thread woke up from zone_destroy_cv and
4918*13096SJordan.Vaughan@Sun.com * destroyed the zone.
4919*13096SJordan.Vaughan@Sun.com *
4920*13096SJordan.Vaughan@Sun.com * If the zone still exists and has more than
4921*13096SJordan.Vaughan@Sun.com * one unreleased general-purpose reference,
4922*13096SJordan.Vaughan@Sun.com * then log the zone's reference counts.
4923*13096SJordan.Vaughan@Sun.com */
4924*13096SJordan.Vaughan@Sun.com log_refcounts = B_TRUE;
4925*13096SJordan.Vaughan@Sun.com continue;
4926*13096SJordan.Vaughan@Sun.com }
4927*13096SJordan.Vaughan@Sun.com
4928*13096SJordan.Vaughan@Sun.com /*
4929*13096SJordan.Vaughan@Sun.com * The thread already timed out on zone_destroy_cv while
4930*13096SJordan.Vaughan@Sun.com * waiting for subsystems to release the zone's last
4931*13096SJordan.Vaughan@Sun.com * general-purpose references. Log the zone's reference
4932*13096SJordan.Vaughan@Sun.com * counts and wait indefinitely on zone_destroy_cv.
4933*13096SJordan.Vaughan@Sun.com */
4934*13096SJordan.Vaughan@Sun.com zone_log_refcounts(zone);
4935*13096SJordan.Vaughan@Sun.com }
49360Sstevel@tonic-gate if (cv_wait_sig(&zone_destroy_cv, &zonehash_lock) == 0) {
4937*13096SJordan.Vaughan@Sun.com /* The thread's process was signaled. */
49380Sstevel@tonic-gate mutex_exit(&zonehash_lock);
49390Sstevel@tonic-gate return (set_errno(EINTR));
49400Sstevel@tonic-gate }
49410Sstevel@tonic-gate }
49420Sstevel@tonic-gate
49433792Sakolb /*
49443792Sakolb * Remove CPU cap for this zone now since we're not going to
49453792Sakolb * fail below this point.
49463792Sakolb */
49473792Sakolb cpucaps_zone_remove(zone);
49483792Sakolb
49493792Sakolb /* Get rid of the zone's kstats */
49503247Sgjelinek zone_kstat_delete(zone);
49513247Sgjelinek
495212273SCasper.Dik@Sun.COM /* remove the pfexecd doors */
495312273SCasper.Dik@Sun.COM if (zone->zone_pfexecd != NULL) {
495412273SCasper.Dik@Sun.COM klpd_freelist(&zone->zone_pfexecd);
495512273SCasper.Dik@Sun.COM zone->zone_pfexecd = NULL;
495612273SCasper.Dik@Sun.COM }
495712273SCasper.Dik@Sun.COM
49584888Seh208807 /* free brand specific data */
49594888Seh208807 if (ZONE_IS_BRANDED(zone))
49604888Seh208807 ZBROP(zone)->b_free_brand_data(zone);
49614888Seh208807
49623671Ssl108498 /* Say goodbye to brand framework. */
49633671Ssl108498 brand_unregister_zone(zone->zone_brand);
49643671Ssl108498
49650Sstevel@tonic-gate /*
49660Sstevel@tonic-gate * It is now safe to let the zone be recreated; remove it from the
49670Sstevel@tonic-gate * lists. The memory will not be freed until the last cred
49680Sstevel@tonic-gate * reference goes away.
49690Sstevel@tonic-gate */
49700Sstevel@tonic-gate ASSERT(zonecount > 1); /* must be > 1; can't destroy global zone */
49710Sstevel@tonic-gate zonecount--;
49720Sstevel@tonic-gate /* remove from active list and hash tables */
49730Sstevel@tonic-gate list_remove(&zone_active, zone);
49740Sstevel@tonic-gate (void) mod_hash_destroy(zonehashbyname,
49750Sstevel@tonic-gate (mod_hash_key_t)zone->zone_name);
49760Sstevel@tonic-gate (void) mod_hash_destroy(zonehashbyid,
49770Sstevel@tonic-gate (mod_hash_key_t)(uintptr_t)zone->zone_id);
49781769Scarlsonj if (zone->zone_flags & ZF_HASHED_LABEL)
49791676Sjpk (void) mod_hash_destroy(zonehashbylabel,
49801676Sjpk (mod_hash_key_t)zone->zone_slabel);
49810Sstevel@tonic-gate mutex_exit(&zonehash_lock);
49820Sstevel@tonic-gate
4983766Scarlsonj /*
4984766Scarlsonj * Release the root vnode; we're not using it anymore. Nor should any
4985766Scarlsonj * other thread that might access it exist.
4986766Scarlsonj */
4987766Scarlsonj if (zone->zone_rootvp != NULL) {
4988766Scarlsonj VN_RELE(zone->zone_rootvp);
4989766Scarlsonj zone->zone_rootvp = NULL;
4990766Scarlsonj }
4991766Scarlsonj
49920Sstevel@tonic-gate /* add to deathrow list */
49930Sstevel@tonic-gate mutex_enter(&zone_deathrow_lock);
49940Sstevel@tonic-gate list_insert_tail(&zone_deathrow, zone);
49950Sstevel@tonic-gate mutex_exit(&zone_deathrow_lock);
49960Sstevel@tonic-gate
49970Sstevel@tonic-gate /*
49980Sstevel@tonic-gate * Drop last reference (which was added by zsched()), this will
49990Sstevel@tonic-gate * free the zone unless there are outstanding cred references.
50000Sstevel@tonic-gate */
50010Sstevel@tonic-gate zone_rele(zone);
50020Sstevel@tonic-gate return (0);
50030Sstevel@tonic-gate }
50040Sstevel@tonic-gate
50050Sstevel@tonic-gate /*
50060Sstevel@tonic-gate * Systemcall entry point for zone_getattr(2).
50070Sstevel@tonic-gate */
50080Sstevel@tonic-gate static ssize_t
zone_getattr(zoneid_t zoneid,int attr,void * buf,size_t bufsize)50090Sstevel@tonic-gate zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
50100Sstevel@tonic-gate {
50110Sstevel@tonic-gate size_t size;
50120Sstevel@tonic-gate int error = 0, err;
50130Sstevel@tonic-gate zone_t *zone;
50140Sstevel@tonic-gate char *zonepath;
50152267Sdp char *outstr;
50160Sstevel@tonic-gate zone_status_t zone_status;
50170Sstevel@tonic-gate pid_t initpid;
50183792Sakolb boolean_t global = (curzone == global_zone);
50193792Sakolb boolean_t inzone = (curzone->zone_id == zoneid);
50203448Sdh155122 ushort_t flags;
502112748SSowmini.Varadhan@oracle.COM zone_net_data_t *zbuf;
50220Sstevel@tonic-gate
50230Sstevel@tonic-gate mutex_enter(&zonehash_lock);
50240Sstevel@tonic-gate if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
50250Sstevel@tonic-gate mutex_exit(&zonehash_lock);
50260Sstevel@tonic-gate return (set_errno(EINVAL));
50270Sstevel@tonic-gate }
50280Sstevel@tonic-gate zone_status = zone_status_get(zone);
50295880Snordmark if (zone_status < ZONE_IS_INITIALIZED) {
50300Sstevel@tonic-gate mutex_exit(&zonehash_lock);
50310Sstevel@tonic-gate return (set_errno(EINVAL));
50320Sstevel@tonic-gate }
50330Sstevel@tonic-gate zone_hold(zone);
50340Sstevel@tonic-gate mutex_exit(&zonehash_lock);
50350Sstevel@tonic-gate
50360Sstevel@tonic-gate /*
50371676Sjpk * If not in the global zone, don't show information about other zones,
50381676Sjpk * unless the system is labeled and the local zone's label dominates
50391676Sjpk * the other zone.
50400Sstevel@tonic-gate */
50411676Sjpk if (!zone_list_access(zone)) {
50420Sstevel@tonic-gate zone_rele(zone);
50430Sstevel@tonic-gate return (set_errno(EINVAL));
50440Sstevel@tonic-gate }
50450Sstevel@tonic-gate
50460Sstevel@tonic-gate switch (attr) {
50470Sstevel@tonic-gate case ZONE_ATTR_ROOT:
50480Sstevel@tonic-gate if (global) {
50490Sstevel@tonic-gate /*
50500Sstevel@tonic-gate * Copy the path to trim the trailing "/" (except for
50510Sstevel@tonic-gate * the global zone).
50520Sstevel@tonic-gate */
50530Sstevel@tonic-gate if (zone != global_zone)
50540Sstevel@tonic-gate size = zone->zone_rootpathlen - 1;
50550Sstevel@tonic-gate else
50560Sstevel@tonic-gate size = zone->zone_rootpathlen;
50570Sstevel@tonic-gate zonepath = kmem_alloc(size, KM_SLEEP);
50580Sstevel@tonic-gate bcopy(zone->zone_rootpath, zonepath, size);
50590Sstevel@tonic-gate zonepath[size - 1] = '\0';
50600Sstevel@tonic-gate } else {
50613792Sakolb if (inzone || !is_system_labeled()) {
50621676Sjpk /*
50631676Sjpk * Caller is not in the global zone.
50641676Sjpk * if the query is on the current zone
50651676Sjpk * or the system is not labeled,
50661676Sjpk * just return faked-up path for current zone.
50671676Sjpk */
50681676Sjpk zonepath = "/";
50691676Sjpk size = 2;
50701676Sjpk } else {
50711676Sjpk /*
50721676Sjpk * Return related path for current zone.
50731676Sjpk */
50741676Sjpk int prefix_len = strlen(zone_prefix);
50751676Sjpk int zname_len = strlen(zone->zone_name);
50761676Sjpk
50771676Sjpk size = prefix_len + zname_len + 1;
50781676Sjpk zonepath = kmem_alloc(size, KM_SLEEP);
50791676Sjpk bcopy(zone_prefix, zonepath, prefix_len);
50801676Sjpk bcopy(zone->zone_name, zonepath +
50812267Sdp prefix_len, zname_len);
50821676Sjpk zonepath[size - 1] = '\0';
50831676Sjpk }
50840Sstevel@tonic-gate }
50850Sstevel@tonic-gate if (bufsize > size)
50860Sstevel@tonic-gate bufsize = size;
50870Sstevel@tonic-gate if (buf != NULL) {
50880Sstevel@tonic-gate err = copyoutstr(zonepath, buf, bufsize, NULL);
50890Sstevel@tonic-gate if (err != 0 && err != ENAMETOOLONG)
50900Sstevel@tonic-gate error = EFAULT;
50910Sstevel@tonic-gate }
50923792Sakolb if (global || (is_system_labeled() && !inzone))
50930Sstevel@tonic-gate kmem_free(zonepath, size);
50940Sstevel@tonic-gate break;
50950Sstevel@tonic-gate
50960Sstevel@tonic-gate case ZONE_ATTR_NAME:
50970Sstevel@tonic-gate size = strlen(zone->zone_name) + 1;
50980Sstevel@tonic-gate if (bufsize > size)
50990Sstevel@tonic-gate bufsize = size;
51000Sstevel@tonic-gate if (buf != NULL) {
51010Sstevel@tonic-gate err = copyoutstr(zone->zone_name, buf, bufsize, NULL);
51020Sstevel@tonic-gate if (err != 0 && err != ENAMETOOLONG)
51030Sstevel@tonic-gate error = EFAULT;
51040Sstevel@tonic-gate }
51050Sstevel@tonic-gate break;
51060Sstevel@tonic-gate
51070Sstevel@tonic-gate case ZONE_ATTR_STATUS:
51080Sstevel@tonic-gate /*
51090Sstevel@tonic-gate * Since we're not holding zonehash_lock, the zone status
51100Sstevel@tonic-gate * may be anything; leave it up to userland to sort it out.
51110Sstevel@tonic-gate */
51120Sstevel@tonic-gate size = sizeof (zone_status);
51130Sstevel@tonic-gate if (bufsize > size)
51140Sstevel@tonic-gate bufsize = size;
51150Sstevel@tonic-gate zone_status = zone_status_get(zone);
51160Sstevel@tonic-gate if (buf != NULL &&
51170Sstevel@tonic-gate copyout(&zone_status, buf, bufsize) != 0)
51180Sstevel@tonic-gate error = EFAULT;
51190Sstevel@tonic-gate break;
51203448Sdh155122 case ZONE_ATTR_FLAGS:
51213448Sdh155122 size = sizeof (zone->zone_flags);
51223448Sdh155122 if (bufsize > size)
51233448Sdh155122 bufsize = size;
51243448Sdh155122 flags = zone->zone_flags;
51253448Sdh155122 if (buf != NULL &&
51263448Sdh155122 copyout(&flags, buf, bufsize) != 0)
51273448Sdh155122 error = EFAULT;
51283448Sdh155122 break;
51290Sstevel@tonic-gate case ZONE_ATTR_PRIVSET:
51300Sstevel@tonic-gate size = sizeof (priv_set_t);
51310Sstevel@tonic-gate if (bufsize > size)
51320Sstevel@tonic-gate bufsize = size;
51330Sstevel@tonic-gate if (buf != NULL &&
51340Sstevel@tonic-gate copyout(zone->zone_privset, buf, bufsize) != 0)
51350Sstevel@tonic-gate error = EFAULT;
51360Sstevel@tonic-gate break;
51370Sstevel@tonic-gate case ZONE_ATTR_UNIQID:
51380Sstevel@tonic-gate size = sizeof (zone->zone_uniqid);
51390Sstevel@tonic-gate if (bufsize > size)
51400Sstevel@tonic-gate bufsize = size;
51410Sstevel@tonic-gate if (buf != NULL &&
51420Sstevel@tonic-gate copyout(&zone->zone_uniqid, buf, bufsize) != 0)
51430Sstevel@tonic-gate error = EFAULT;
51440Sstevel@tonic-gate break;
51450Sstevel@tonic-gate case ZONE_ATTR_POOLID:
51460Sstevel@tonic-gate {
51470Sstevel@tonic-gate pool_t *pool;
51480Sstevel@tonic-gate poolid_t poolid;
51490Sstevel@tonic-gate
51500Sstevel@tonic-gate if (pool_lock_intr() != 0) {
51510Sstevel@tonic-gate error = EINTR;
51520Sstevel@tonic-gate break;
51530Sstevel@tonic-gate }
51540Sstevel@tonic-gate pool = zone_pool_get(zone);
51550Sstevel@tonic-gate poolid = pool->pool_id;
51560Sstevel@tonic-gate pool_unlock();
51570Sstevel@tonic-gate size = sizeof (poolid);
51580Sstevel@tonic-gate if (bufsize > size)
51590Sstevel@tonic-gate bufsize = size;
51600Sstevel@tonic-gate if (buf != NULL && copyout(&poolid, buf, size) != 0)
51610Sstevel@tonic-gate error = EFAULT;
51620Sstevel@tonic-gate }
51630Sstevel@tonic-gate break;
51641676Sjpk case ZONE_ATTR_SLBL:
51651676Sjpk size = sizeof (bslabel_t);
51661676Sjpk if (bufsize > size)
51671676Sjpk bufsize = size;
51681676Sjpk if (zone->zone_slabel == NULL)
51691676Sjpk error = EINVAL;
51701676Sjpk else if (buf != NULL &&
51711676Sjpk copyout(label2bslabel(zone->zone_slabel), buf,
51721676Sjpk bufsize) != 0)
51731676Sjpk error = EFAULT;
51741676Sjpk break;
51750Sstevel@tonic-gate case ZONE_ATTR_INITPID:
51760Sstevel@tonic-gate size = sizeof (initpid);
51770Sstevel@tonic-gate if (bufsize > size)
51780Sstevel@tonic-gate bufsize = size;
51790Sstevel@tonic-gate initpid = zone->zone_proc_initpid;
51800Sstevel@tonic-gate if (initpid == -1) {
51810Sstevel@tonic-gate error = ESRCH;
51820Sstevel@tonic-gate break;
51830Sstevel@tonic-gate }
51840Sstevel@tonic-gate if (buf != NULL &&
51850Sstevel@tonic-gate copyout(&initpid, buf, bufsize) != 0)
51860Sstevel@tonic-gate error = EFAULT;
51870Sstevel@tonic-gate break;
51882712Snn35248 case ZONE_ATTR_BRAND:
51892712Snn35248 size = strlen(zone->zone_brand->b_name) + 1;
51902712Snn35248
51912712Snn35248 if (bufsize > size)
51922712Snn35248 bufsize = size;
51932712Snn35248 if (buf != NULL) {
51942712Snn35248 err = copyoutstr(zone->zone_brand->b_name, buf,
51952712Snn35248 bufsize, NULL);
51962712Snn35248 if (err != 0 && err != ENAMETOOLONG)
51972712Snn35248 error = EFAULT;
51982712Snn35248 }
51992712Snn35248 break;
52002267Sdp case ZONE_ATTR_INITNAME:
52012267Sdp size = strlen(zone->zone_initname) + 1;
52022267Sdp if (bufsize > size)
52032267Sdp bufsize = size;
52042267Sdp if (buf != NULL) {
52052267Sdp err = copyoutstr(zone->zone_initname, buf, bufsize,
52062267Sdp NULL);
52072267Sdp if (err != 0 && err != ENAMETOOLONG)
52082267Sdp error = EFAULT;
52092267Sdp }
52102267Sdp break;
52112267Sdp case ZONE_ATTR_BOOTARGS:
52122267Sdp if (zone->zone_bootargs == NULL)
52132267Sdp outstr = "";
52142267Sdp else
52152267Sdp outstr = zone->zone_bootargs;
52162267Sdp size = strlen(outstr) + 1;
52172267Sdp if (bufsize > size)
52182267Sdp bufsize = size;
52192267Sdp if (buf != NULL) {
52202267Sdp err = copyoutstr(outstr, buf, bufsize, NULL);
52212267Sdp if (err != 0 && err != ENAMETOOLONG)
52222267Sdp error = EFAULT;
52232267Sdp }
52242267Sdp break;
52253247Sgjelinek case ZONE_ATTR_PHYS_MCAP:
52263247Sgjelinek size = sizeof (zone->zone_phys_mcap);
52273247Sgjelinek if (bufsize > size)
52283247Sgjelinek bufsize = size;
52293247Sgjelinek if (buf != NULL &&
52303247Sgjelinek copyout(&zone->zone_phys_mcap, buf, bufsize) != 0)
52313247Sgjelinek error = EFAULT;
52323247Sgjelinek break;
52333247Sgjelinek case ZONE_ATTR_SCHED_CLASS:
52343247Sgjelinek mutex_enter(&class_lock);
52353247Sgjelinek
52363247Sgjelinek if (zone->zone_defaultcid >= loaded_classes)
52373247Sgjelinek outstr = "";
52383247Sgjelinek else
52393247Sgjelinek outstr = sclass[zone->zone_defaultcid].cl_name;
52403247Sgjelinek size = strlen(outstr) + 1;
52413247Sgjelinek if (bufsize > size)
52423247Sgjelinek bufsize = size;
52433247Sgjelinek if (buf != NULL) {
52443247Sgjelinek err = copyoutstr(outstr, buf, bufsize, NULL);
52453247Sgjelinek if (err != 0 && err != ENAMETOOLONG)
52463247Sgjelinek error = EFAULT;
52473247Sgjelinek }
52483247Sgjelinek
52493247Sgjelinek mutex_exit(&class_lock);
52503247Sgjelinek break;
52518662SJordan.Vaughan@Sun.com case ZONE_ATTR_HOSTID:
52528662SJordan.Vaughan@Sun.com if (zone->zone_hostid != HW_INVALID_HOSTID &&
52538662SJordan.Vaughan@Sun.com bufsize == sizeof (zone->zone_hostid)) {
52548662SJordan.Vaughan@Sun.com size = sizeof (zone->zone_hostid);
52558662SJordan.Vaughan@Sun.com if (buf != NULL && copyout(&zone->zone_hostid, buf,
52568662SJordan.Vaughan@Sun.com bufsize) != 0)
52578662SJordan.Vaughan@Sun.com error = EFAULT;
52588662SJordan.Vaughan@Sun.com } else {
52598662SJordan.Vaughan@Sun.com error = EINVAL;
52608662SJordan.Vaughan@Sun.com }
52618662SJordan.Vaughan@Sun.com break;
526212633Sjohn.levon@sun.com case ZONE_ATTR_FS_ALLOWED:
526312633Sjohn.levon@sun.com if (zone->zone_fs_allowed == NULL)
526412633Sjohn.levon@sun.com outstr = "";
526512633Sjohn.levon@sun.com else
526612633Sjohn.levon@sun.com outstr = zone->zone_fs_allowed;
526712633Sjohn.levon@sun.com size = strlen(outstr) + 1;
526812633Sjohn.levon@sun.com if (bufsize > size)
526912633Sjohn.levon@sun.com bufsize = size;
527012633Sjohn.levon@sun.com if (buf != NULL) {
527112633Sjohn.levon@sun.com err = copyoutstr(outstr, buf, bufsize, NULL);
527212633Sjohn.levon@sun.com if (err != 0 && err != ENAMETOOLONG)
527312633Sjohn.levon@sun.com error = EFAULT;
527412633Sjohn.levon@sun.com }
527512633Sjohn.levon@sun.com break;
527612748SSowmini.Varadhan@oracle.COM case ZONE_ATTR_NETWORK:
527712748SSowmini.Varadhan@oracle.COM zbuf = kmem_alloc(bufsize, KM_SLEEP);
527812748SSowmini.Varadhan@oracle.COM if (copyin(buf, zbuf, bufsize) != 0) {
527912748SSowmini.Varadhan@oracle.COM error = EFAULT;
528012748SSowmini.Varadhan@oracle.COM } else {
528112748SSowmini.Varadhan@oracle.COM error = zone_get_network(zoneid, zbuf);
528212748SSowmini.Varadhan@oracle.COM if (error == 0 && copyout(zbuf, buf, bufsize) != 0)
528312748SSowmini.Varadhan@oracle.COM error = EFAULT;
528412748SSowmini.Varadhan@oracle.COM }
528512748SSowmini.Varadhan@oracle.COM kmem_free(zbuf, bufsize);
528612748SSowmini.Varadhan@oracle.COM break;
52870Sstevel@tonic-gate default:
52882712Snn35248 if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) {
52892712Snn35248 size = bufsize;
52902712Snn35248 error = ZBROP(zone)->b_getattr(zone, attr, buf, &size);
52912712Snn35248 } else {
52922712Snn35248 error = EINVAL;
52932712Snn35248 }
52940Sstevel@tonic-gate }
52950Sstevel@tonic-gate zone_rele(zone);
52960Sstevel@tonic-gate
52970Sstevel@tonic-gate if (error)
52980Sstevel@tonic-gate return (set_errno(error));
52990Sstevel@tonic-gate return ((ssize_t)size);
53000Sstevel@tonic-gate }
53010Sstevel@tonic-gate
53020Sstevel@tonic-gate /*
53032267Sdp * Systemcall entry point for zone_setattr(2).
53042267Sdp */
53052267Sdp /*ARGSUSED*/
53062267Sdp static int
zone_setattr(zoneid_t zoneid,int attr,void * buf,size_t bufsize)53072267Sdp zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
53082267Sdp {
53092267Sdp zone_t *zone;
53102267Sdp zone_status_t zone_status;
531112820Sdp@eng.sun.com int err = -1;
531212748SSowmini.Varadhan@oracle.COM zone_net_data_t *zbuf;
53132267Sdp
53142267Sdp if (secpolicy_zone_config(CRED()) != 0)
53152267Sdp return (set_errno(EPERM));
53162267Sdp
53172267Sdp /*
53183247Sgjelinek * Only the ZONE_ATTR_PHYS_MCAP attribute can be set on the
53193247Sgjelinek * global zone.
53202267Sdp */
53213247Sgjelinek if (zoneid == GLOBAL_ZONEID && attr != ZONE_ATTR_PHYS_MCAP) {
53222267Sdp return (set_errno(EINVAL));
53232267Sdp }
53242267Sdp
53252267Sdp mutex_enter(&zonehash_lock);
53262267Sdp if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
53272267Sdp mutex_exit(&zonehash_lock);
53282267Sdp return (set_errno(EINVAL));
53292267Sdp }
53302267Sdp zone_hold(zone);
53312267Sdp mutex_exit(&zonehash_lock);
53322267Sdp
53333247Sgjelinek /*
53343247Sgjelinek * At present most attributes can only be set on non-running,
53353247Sgjelinek * non-global zones.
53363247Sgjelinek */
53372267Sdp zone_status = zone_status_get(zone);
533812820Sdp@eng.sun.com if (attr != ZONE_ATTR_PHYS_MCAP && zone_status > ZONE_IS_READY) {
533912820Sdp@eng.sun.com err = EINVAL;
53402267Sdp goto done;
534112820Sdp@eng.sun.com }
53422267Sdp
53432267Sdp switch (attr) {
53442267Sdp case ZONE_ATTR_INITNAME:
53452267Sdp err = zone_set_initname(zone, (const char *)buf);
53462267Sdp break;
53472267Sdp case ZONE_ATTR_BOOTARGS:
53482267Sdp err = zone_set_bootargs(zone, (const char *)buf);
53492267Sdp break;
53502712Snn35248 case ZONE_ATTR_BRAND:
53514141Sedp err = zone_set_brand(zone, (const char *)buf);
53522712Snn35248 break;
535312633Sjohn.levon@sun.com case ZONE_ATTR_FS_ALLOWED:
535412633Sjohn.levon@sun.com err = zone_set_fs_allowed(zone, (const char *)buf);
535512633Sjohn.levon@sun.com break;
53563247Sgjelinek case ZONE_ATTR_PHYS_MCAP:
53573247Sgjelinek err = zone_set_phys_mcap(zone, (const uint64_t *)buf);
53583247Sgjelinek break;
53593247Sgjelinek case ZONE_ATTR_SCHED_CLASS:
53603247Sgjelinek err = zone_set_sched_class(zone, (const char *)buf);
53613247Sgjelinek break;
53628662SJordan.Vaughan@Sun.com case ZONE_ATTR_HOSTID:
53638662SJordan.Vaughan@Sun.com if (bufsize == sizeof (zone->zone_hostid)) {
53648662SJordan.Vaughan@Sun.com if (copyin(buf, &zone->zone_hostid, bufsize) == 0)
53658662SJordan.Vaughan@Sun.com err = 0;
53668662SJordan.Vaughan@Sun.com else
53678662SJordan.Vaughan@Sun.com err = EFAULT;
53688662SJordan.Vaughan@Sun.com } else {
53698662SJordan.Vaughan@Sun.com err = EINVAL;
53708662SJordan.Vaughan@Sun.com }
53718662SJordan.Vaughan@Sun.com break;
537212748SSowmini.Varadhan@oracle.COM case ZONE_ATTR_NETWORK:
537312748SSowmini.Varadhan@oracle.COM if (bufsize > (PIPE_BUF + sizeof (zone_net_data_t))) {
537412748SSowmini.Varadhan@oracle.COM err = EINVAL;
537512820Sdp@eng.sun.com break;
537612748SSowmini.Varadhan@oracle.COM }
537712748SSowmini.Varadhan@oracle.COM zbuf = kmem_alloc(bufsize, KM_SLEEP);
537812748SSowmini.Varadhan@oracle.COM if (copyin(buf, zbuf, bufsize) != 0) {
537912820Sdp@eng.sun.com kmem_free(zbuf, bufsize);
538012748SSowmini.Varadhan@oracle.COM err = EFAULT;
538112820Sdp@eng.sun.com break;
538212748SSowmini.Varadhan@oracle.COM }
538312748SSowmini.Varadhan@oracle.COM err = zone_set_network(zoneid, zbuf);
538412748SSowmini.Varadhan@oracle.COM kmem_free(zbuf, bufsize);
538512748SSowmini.Varadhan@oracle.COM break;
53862267Sdp default:
53872712Snn35248 if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone))
53882712Snn35248 err = ZBROP(zone)->b_setattr(zone, attr, buf, bufsize);
53892712Snn35248 else
53902712Snn35248 err = EINVAL;
53912267Sdp }
53922267Sdp
53932267Sdp done:
53942267Sdp zone_rele(zone);
539512820Sdp@eng.sun.com ASSERT(err != -1);
53962267Sdp return (err != 0 ? set_errno(err) : 0);
53972267Sdp }
53982267Sdp
53992267Sdp /*
54000Sstevel@tonic-gate * Return zero if the process has at least one vnode mapped in to its
54010Sstevel@tonic-gate * address space which shouldn't be allowed to change zones.
54023247Sgjelinek *
54033247Sgjelinek * Also return zero if the process has any shared mappings which reserve
54043247Sgjelinek * swap. This is because the counting for zone.max-swap does not allow swap
54055331Samw * reservation to be shared between zones. zone swap reservation is counted
54063247Sgjelinek * on zone->zone_max_swap.
54070Sstevel@tonic-gate */
54080Sstevel@tonic-gate static int
as_can_change_zones(void)54090Sstevel@tonic-gate as_can_change_zones(void)
54100Sstevel@tonic-gate {
54110Sstevel@tonic-gate proc_t *pp = curproc;
54120Sstevel@tonic-gate struct seg *seg;
54130Sstevel@tonic-gate struct as *as = pp->p_as;
54140Sstevel@tonic-gate vnode_t *vp;
54150Sstevel@tonic-gate int allow = 1;
54160Sstevel@tonic-gate
54170Sstevel@tonic-gate ASSERT(pp->p_as != &kas);
54183247Sgjelinek AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
54190Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
54203247Sgjelinek
54213247Sgjelinek /*
54223247Sgjelinek * Cannot enter zone with shared anon memory which
54233247Sgjelinek * reserves swap. See comment above.
54243247Sgjelinek */
54253247Sgjelinek if (seg_can_change_zones(seg) == B_FALSE) {
54263247Sgjelinek allow = 0;
54273247Sgjelinek break;
54283247Sgjelinek }
54290Sstevel@tonic-gate /*
54300Sstevel@tonic-gate * if we can't get a backing vnode for this segment then skip
54310Sstevel@tonic-gate * it.
54320Sstevel@tonic-gate */
54330Sstevel@tonic-gate vp = NULL;
54340Sstevel@tonic-gate if (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)
54350Sstevel@tonic-gate continue;
54360Sstevel@tonic-gate if (!vn_can_change_zones(vp)) { /* bail on first match */
54370Sstevel@tonic-gate allow = 0;
54380Sstevel@tonic-gate break;
54390Sstevel@tonic-gate }
54400Sstevel@tonic-gate }
54413247Sgjelinek AS_LOCK_EXIT(as, &as->a_lock);
54420Sstevel@tonic-gate return (allow);
54430Sstevel@tonic-gate }
54440Sstevel@tonic-gate
54450Sstevel@tonic-gate /*
54463247Sgjelinek * Count swap reserved by curproc's address space
54473247Sgjelinek */
54483247Sgjelinek static size_t
as_swresv(void)54493247Sgjelinek as_swresv(void)
54503247Sgjelinek {
54513247Sgjelinek proc_t *pp = curproc;
54523247Sgjelinek struct seg *seg;
54533247Sgjelinek struct as *as = pp->p_as;
54543247Sgjelinek size_t swap = 0;
54553247Sgjelinek
54563247Sgjelinek ASSERT(pp->p_as != &kas);
54573247Sgjelinek ASSERT(AS_WRITE_HELD(as, &as->a_lock));
54583247Sgjelinek for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg))
54593247Sgjelinek swap += seg_swresv(seg);
54603247Sgjelinek
54613247Sgjelinek return (swap);
54623247Sgjelinek }
54633247Sgjelinek
54643247Sgjelinek /*
54650Sstevel@tonic-gate * Systemcall entry point for zone_enter().
54660Sstevel@tonic-gate *
54670Sstevel@tonic-gate * The current process is injected into said zone. In the process
54680Sstevel@tonic-gate * it will change its project membership, privileges, rootdir/cwd,
54690Sstevel@tonic-gate * zone-wide rctls, and pool association to match those of the zone.
54700Sstevel@tonic-gate *
54710Sstevel@tonic-gate * The first zone_enter() called while the zone is in the ZONE_IS_READY
54720Sstevel@tonic-gate * state will transition it to ZONE_IS_RUNNING. Processes may only
54730Sstevel@tonic-gate * enter a zone that is "ready" or "running".
54740Sstevel@tonic-gate */
54750Sstevel@tonic-gate static int
zone_enter(zoneid_t zoneid)54760Sstevel@tonic-gate zone_enter(zoneid_t zoneid)
54770Sstevel@tonic-gate {
54780Sstevel@tonic-gate zone_t *zone;
54790Sstevel@tonic-gate vnode_t *vp;
54800Sstevel@tonic-gate proc_t *pp = curproc;
54810Sstevel@tonic-gate contract_t *ct;
54820Sstevel@tonic-gate cont_process_t *ctp;
54830Sstevel@tonic-gate task_t *tk, *oldtk;
54840Sstevel@tonic-gate kproject_t *zone_proj0;
54850Sstevel@tonic-gate cred_t *cr, *newcr;
54860Sstevel@tonic-gate pool_t *oldpool, *newpool;
54870Sstevel@tonic-gate sess_t *sp;
54880Sstevel@tonic-gate uid_t uid;
54890Sstevel@tonic-gate zone_status_t status;
54900Sstevel@tonic-gate int err = 0;
54910Sstevel@tonic-gate rctl_entity_p_t e;
54923247Sgjelinek size_t swap;
54933792Sakolb kthread_id_t t;
54940Sstevel@tonic-gate
54950Sstevel@tonic-gate if (secpolicy_zone_config(CRED()) != 0)
54960Sstevel@tonic-gate return (set_errno(EPERM));
54970Sstevel@tonic-gate if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
54980Sstevel@tonic-gate return (set_errno(EINVAL));
54990Sstevel@tonic-gate
55000Sstevel@tonic-gate /*
55010Sstevel@tonic-gate * Stop all lwps so we don't need to hold a lock to look at
55020Sstevel@tonic-gate * curproc->p_zone. This needs to happen before we grab any
55030Sstevel@tonic-gate * locks to avoid deadlock (another lwp in the process could
55040Sstevel@tonic-gate * be waiting for the held lock).
55050Sstevel@tonic-gate */
55060Sstevel@tonic-gate if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK))
55070Sstevel@tonic-gate return (set_errno(EINTR));
55080Sstevel@tonic-gate
55090Sstevel@tonic-gate /*
55100Sstevel@tonic-gate * Make sure we're not changing zones with files open or mapped in
55110Sstevel@tonic-gate * to our address space which shouldn't be changing zones.
55120Sstevel@tonic-gate */
55130Sstevel@tonic-gate if (!files_can_change_zones()) {
55140Sstevel@tonic-gate err = EBADF;
55150Sstevel@tonic-gate goto out;
55160Sstevel@tonic-gate }
55170Sstevel@tonic-gate if (!as_can_change_zones()) {
55180Sstevel@tonic-gate err = EFAULT;
55190Sstevel@tonic-gate goto out;
55200Sstevel@tonic-gate }
55210Sstevel@tonic-gate
55220Sstevel@tonic-gate mutex_enter(&zonehash_lock);
55230Sstevel@tonic-gate if (pp->p_zone != global_zone) {
55240Sstevel@tonic-gate mutex_exit(&zonehash_lock);
55250Sstevel@tonic-gate err = EINVAL;
55260Sstevel@tonic-gate goto out;
55270Sstevel@tonic-gate }
55280Sstevel@tonic-gate
55290Sstevel@tonic-gate zone = zone_find_all_by_id(zoneid);
55300Sstevel@tonic-gate if (zone == NULL) {
55310Sstevel@tonic-gate mutex_exit(&zonehash_lock);
55320Sstevel@tonic-gate err = EINVAL;
55330Sstevel@tonic-gate goto out;
55340Sstevel@tonic-gate }
55350Sstevel@tonic-gate
55360Sstevel@tonic-gate /*
55370Sstevel@tonic-gate * To prevent processes in a zone from holding contracts on
55380Sstevel@tonic-gate * extrazonal resources, and to avoid process contract
55390Sstevel@tonic-gate * memberships which span zones, contract holders and processes
55400Sstevel@tonic-gate * which aren't the sole members of their encapsulating process
55410Sstevel@tonic-gate * contracts are not allowed to zone_enter.
55420Sstevel@tonic-gate */
55430Sstevel@tonic-gate ctp = pp->p_ct_process;
55440Sstevel@tonic-gate ct = &ctp->conp_contract;
55450Sstevel@tonic-gate mutex_enter(&ct->ct_lock);
55460Sstevel@tonic-gate mutex_enter(&pp->p_lock);
55470Sstevel@tonic-gate if ((avl_numnodes(&pp->p_ct_held) != 0) || (ctp->conp_nmembers != 1)) {
55480Sstevel@tonic-gate mutex_exit(&pp->p_lock);
55490Sstevel@tonic-gate mutex_exit(&ct->ct_lock);
55500Sstevel@tonic-gate mutex_exit(&zonehash_lock);
55510Sstevel@tonic-gate err = EINVAL;
55520Sstevel@tonic-gate goto out;
55530Sstevel@tonic-gate }
55540Sstevel@tonic-gate
55550Sstevel@tonic-gate /*
55560Sstevel@tonic-gate * Moreover, we don't allow processes whose encapsulating
55570Sstevel@tonic-gate * process contracts have inherited extrazonal contracts.
55580Sstevel@tonic-gate * While it would be easier to eliminate all process contracts
55590Sstevel@tonic-gate * with inherited contracts, we need to be able to give a
55600Sstevel@tonic-gate * restarted init (or other zone-penetrating process) its
55610Sstevel@tonic-gate * predecessor's contracts.
55620Sstevel@tonic-gate */
55630Sstevel@tonic-gate if (ctp->conp_ninherited != 0) {
55640Sstevel@tonic-gate contract_t *next;
55650Sstevel@tonic-gate for (next = list_head(&ctp->conp_inherited); next;
55660Sstevel@tonic-gate next = list_next(&ctp->conp_inherited, next)) {
55670Sstevel@tonic-gate if (contract_getzuniqid(next) != zone->zone_uniqid) {
55680Sstevel@tonic-gate mutex_exit(&pp->p_lock);
55690Sstevel@tonic-gate mutex_exit(&ct->ct_lock);
55700Sstevel@tonic-gate mutex_exit(&zonehash_lock);
55710Sstevel@tonic-gate err = EINVAL;
55720Sstevel@tonic-gate goto out;
55730Sstevel@tonic-gate }
55740Sstevel@tonic-gate }
55750Sstevel@tonic-gate }
55766073Sacruz
55770Sstevel@tonic-gate mutex_exit(&pp->p_lock);
55780Sstevel@tonic-gate mutex_exit(&ct->ct_lock);
55790Sstevel@tonic-gate
55800Sstevel@tonic-gate status = zone_status_get(zone);
55810Sstevel@tonic-gate if (status < ZONE_IS_READY || status >= ZONE_IS_SHUTTING_DOWN) {
55820Sstevel@tonic-gate /*
55830Sstevel@tonic-gate * Can't join
55840Sstevel@tonic-gate */
55850Sstevel@tonic-gate mutex_exit(&zonehash_lock);
55860Sstevel@tonic-gate err = EINVAL;
55870Sstevel@tonic-gate goto out;
55880Sstevel@tonic-gate }
55890Sstevel@tonic-gate
55900Sstevel@tonic-gate /*
55910Sstevel@tonic-gate * Make sure new priv set is within the permitted set for caller
55920Sstevel@tonic-gate */
55930Sstevel@tonic-gate if (!priv_issubset(zone->zone_privset, &CR_OPPRIV(CRED()))) {
55940Sstevel@tonic-gate mutex_exit(&zonehash_lock);
55950Sstevel@tonic-gate err = EPERM;
55960Sstevel@tonic-gate goto out;
55970Sstevel@tonic-gate }
55980Sstevel@tonic-gate /*
55990Sstevel@tonic-gate * We want to momentarily drop zonehash_lock while we optimistically
56000Sstevel@tonic-gate * bind curproc to the pool it should be running in. This is safe
56010Sstevel@tonic-gate * since the zone can't disappear (we have a hold on it).
56020Sstevel@tonic-gate */
56030Sstevel@tonic-gate zone_hold(zone);
56040Sstevel@tonic-gate mutex_exit(&zonehash_lock);
56050Sstevel@tonic-gate
56060Sstevel@tonic-gate /*
56070Sstevel@tonic-gate * Grab pool_lock to keep the pools configuration from changing
56080Sstevel@tonic-gate * and to stop ourselves from getting rebound to another pool
56090Sstevel@tonic-gate * until we join the zone.
56100Sstevel@tonic-gate */
56110Sstevel@tonic-gate if (pool_lock_intr() != 0) {
56120Sstevel@tonic-gate zone_rele(zone);
56130Sstevel@tonic-gate err = EINTR;
56140Sstevel@tonic-gate goto out;
56150Sstevel@tonic-gate }
56160Sstevel@tonic-gate ASSERT(secpolicy_pool(CRED()) == 0);
56170Sstevel@tonic-gate /*
56180Sstevel@tonic-gate * Bind ourselves to the pool currently associated with the zone.
56190Sstevel@tonic-gate */
56200Sstevel@tonic-gate oldpool = curproc->p_pool;
56210Sstevel@tonic-gate newpool = zone_pool_get(zone);
56220Sstevel@tonic-gate if (pool_state == POOL_ENABLED && newpool != oldpool &&
56230Sstevel@tonic-gate (err = pool_do_bind(newpool, P_PID, P_MYID,
56240Sstevel@tonic-gate POOL_BIND_ALL)) != 0) {
56250Sstevel@tonic-gate pool_unlock();
56260Sstevel@tonic-gate zone_rele(zone);
56270Sstevel@tonic-gate goto out;
56280Sstevel@tonic-gate }
56290Sstevel@tonic-gate
56300Sstevel@tonic-gate /*
56310Sstevel@tonic-gate * Grab cpu_lock now; we'll need it later when we call
56320Sstevel@tonic-gate * task_join().
56330Sstevel@tonic-gate */
56340Sstevel@tonic-gate mutex_enter(&cpu_lock);
56350Sstevel@tonic-gate mutex_enter(&zonehash_lock);
56360Sstevel@tonic-gate /*
56370Sstevel@tonic-gate * Make sure the zone hasn't moved on since we dropped zonehash_lock.
56380Sstevel@tonic-gate */
56390Sstevel@tonic-gate if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) {
56400Sstevel@tonic-gate /*
56410Sstevel@tonic-gate * Can't join anymore.
56420Sstevel@tonic-gate */
56430Sstevel@tonic-gate mutex_exit(&zonehash_lock);
56440Sstevel@tonic-gate mutex_exit(&cpu_lock);
56450Sstevel@tonic-gate if (pool_state == POOL_ENABLED &&
56460Sstevel@tonic-gate newpool != oldpool)
56470Sstevel@tonic-gate (void) pool_do_bind(oldpool, P_PID, P_MYID,
56480Sstevel@tonic-gate POOL_BIND_ALL);
56490Sstevel@tonic-gate pool_unlock();
56500Sstevel@tonic-gate zone_rele(zone);
56510Sstevel@tonic-gate err = EINVAL;
56520Sstevel@tonic-gate goto out;
56530Sstevel@tonic-gate }
56540Sstevel@tonic-gate
56553247Sgjelinek /*
56563247Sgjelinek * a_lock must be held while transfering locked memory and swap
56573247Sgjelinek * reservation from the global zone to the non global zone because
56583247Sgjelinek * asynchronous faults on the processes' address space can lock
56593247Sgjelinek * memory and reserve swap via MCL_FUTURE and MAP_NORESERVE
56603247Sgjelinek * segments respectively.
56613247Sgjelinek */
56623247Sgjelinek AS_LOCK_ENTER(pp->as, &pp->p_as->a_lock, RW_WRITER);
56633247Sgjelinek swap = as_swresv();
56640Sstevel@tonic-gate mutex_enter(&pp->p_lock);
56650Sstevel@tonic-gate zone_proj0 = zone->zone_zsched->p_task->tk_proj;
56660Sstevel@tonic-gate /* verify that we do not exceed and task or lwp limits */
56670Sstevel@tonic-gate mutex_enter(&zone->zone_nlwps_lock);
56680Sstevel@tonic-gate /* add new lwps to zone and zone's proj0 */
56690Sstevel@tonic-gate zone_proj0->kpj_nlwps += pp->p_lwpcnt;
56700Sstevel@tonic-gate zone->zone_nlwps += pp->p_lwpcnt;
56710Sstevel@tonic-gate /* add 1 task to zone's proj0 */
56720Sstevel@tonic-gate zone_proj0->kpj_ntasks += 1;
567312725SMenno.Lageman@Sun.COM
567412725SMenno.Lageman@Sun.COM zone_proj0->kpj_nprocs++;
567512725SMenno.Lageman@Sun.COM zone->zone_nprocs++;
56760Sstevel@tonic-gate mutex_exit(&zone->zone_nlwps_lock);
56770Sstevel@tonic-gate
56783247Sgjelinek mutex_enter(&zone->zone_mem_lock);
56792768Ssl108498 zone->zone_locked_mem += pp->p_locked_mem;
56802768Ssl108498 zone_proj0->kpj_data.kpd_locked_mem += pp->p_locked_mem;
56813247Sgjelinek zone->zone_max_swap += swap;
56823247Sgjelinek mutex_exit(&zone->zone_mem_lock);
56832768Ssl108498
56843916Skrishna mutex_enter(&(zone_proj0->kpj_data.kpd_crypto_lock));
56853916Skrishna zone_proj0->kpj_data.kpd_crypto_mem += pp->p_crypto_mem;
56863916Skrishna mutex_exit(&(zone_proj0->kpj_data.kpd_crypto_lock));
56873916Skrishna
568812725SMenno.Lageman@Sun.COM /* remove lwps and process from proc's old zone and old project */
56890Sstevel@tonic-gate mutex_enter(&pp->p_zone->zone_nlwps_lock);
56900Sstevel@tonic-gate pp->p_zone->zone_nlwps -= pp->p_lwpcnt;
56910Sstevel@tonic-gate pp->p_task->tk_proj->kpj_nlwps -= pp->p_lwpcnt;
569212725SMenno.Lageman@Sun.COM pp->p_task->tk_proj->kpj_nprocs--;
569312725SMenno.Lageman@Sun.COM pp->p_zone->zone_nprocs--;
56940Sstevel@tonic-gate mutex_exit(&pp->p_zone->zone_nlwps_lock);
56950Sstevel@tonic-gate
56963247Sgjelinek mutex_enter(&pp->p_zone->zone_mem_lock);
56972768Ssl108498 pp->p_zone->zone_locked_mem -= pp->p_locked_mem;
56982768Ssl108498 pp->p_task->tk_proj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
56993247Sgjelinek pp->p_zone->zone_max_swap -= swap;
57003247Sgjelinek mutex_exit(&pp->p_zone->zone_mem_lock);
57012768Ssl108498
57023916Skrishna mutex_enter(&(pp->p_task->tk_proj->kpj_data.kpd_crypto_lock));
57033916Skrishna pp->p_task->tk_proj->kpj_data.kpd_crypto_mem -= pp->p_crypto_mem;
57043916Skrishna mutex_exit(&(pp->p_task->tk_proj->kpj_data.kpd_crypto_lock));
57053916Skrishna
57069121SVamsi.Krishna@Sun.COM pp->p_flag |= SZONETOP;
57079121SVamsi.Krishna@Sun.COM pp->p_zone = zone;
57082768Ssl108498 mutex_exit(&pp->p_lock);
57093247Sgjelinek AS_LOCK_EXIT(pp->p_as, &pp->p_as->a_lock);
57102768Ssl108498
57110Sstevel@tonic-gate /*
57120Sstevel@tonic-gate * Joining the zone cannot fail from now on.
57130Sstevel@tonic-gate *
57140Sstevel@tonic-gate * This means that a lot of the following code can be commonized and
57150Sstevel@tonic-gate * shared with zsched().
57160Sstevel@tonic-gate */
57170Sstevel@tonic-gate
57180Sstevel@tonic-gate /*
57196073Sacruz * If the process contract fmri was inherited, we need to
57206073Sacruz * flag this so that any contract status will not leak
57216073Sacruz * extra zone information, svc_fmri in this case
57226073Sacruz */
57236073Sacruz if (ctp->conp_svc_ctid != ct->ct_id) {
57246073Sacruz mutex_enter(&ct->ct_lock);
57256073Sacruz ctp->conp_svc_zone_enter = ct->ct_id;
57266073Sacruz mutex_exit(&ct->ct_lock);
57276073Sacruz }
57286073Sacruz
57296073Sacruz /*
57300Sstevel@tonic-gate * Reset the encapsulating process contract's zone.
57310Sstevel@tonic-gate */
57320Sstevel@tonic-gate ASSERT(ct->ct_mzuniqid == GLOBAL_ZONEUNIQID);
57330Sstevel@tonic-gate contract_setzuniqid(ct, zone->zone_uniqid);
57340Sstevel@tonic-gate
57350Sstevel@tonic-gate /*
57360Sstevel@tonic-gate * Create a new task and associate the process with the project keyed
57370Sstevel@tonic-gate * by (projid,zoneid).
57380Sstevel@tonic-gate *
57390Sstevel@tonic-gate * We might as well be in project 0; the global zone's projid doesn't
57400Sstevel@tonic-gate * make much sense in a zone anyhow.
57410Sstevel@tonic-gate *
57420Sstevel@tonic-gate * This also increments zone_ntasks, and returns with p_lock held.
57430Sstevel@tonic-gate */
57440Sstevel@tonic-gate tk = task_create(0, zone);
57450Sstevel@tonic-gate oldtk = task_join(tk, 0);
57460Sstevel@tonic-gate mutex_exit(&cpu_lock);
57470Sstevel@tonic-gate
57480Sstevel@tonic-gate /*
57490Sstevel@tonic-gate * call RCTLOP_SET functions on this proc
57500Sstevel@tonic-gate */
57510Sstevel@tonic-gate e.rcep_p.zone = zone;
57520Sstevel@tonic-gate e.rcep_t = RCENTITY_ZONE;
57530Sstevel@tonic-gate (void) rctl_set_dup(NULL, NULL, pp, &e, zone->zone_rctls, NULL,
57540Sstevel@tonic-gate RCD_CALLBACK);
57550Sstevel@tonic-gate mutex_exit(&pp->p_lock);
57560Sstevel@tonic-gate
57570Sstevel@tonic-gate /*
57580Sstevel@tonic-gate * We don't need to hold any of zsched's locks here; not only do we know
57590Sstevel@tonic-gate * the process and zone aren't going away, we know its session isn't
57600Sstevel@tonic-gate * changing either.
57610Sstevel@tonic-gate *
57620Sstevel@tonic-gate * By joining zsched's session here, we mimic the behavior in the
57630Sstevel@tonic-gate * global zone of init's sid being the pid of sched. We extend this
57640Sstevel@tonic-gate * to all zlogin-like zone_enter()'ing processes as well.
57650Sstevel@tonic-gate */
57660Sstevel@tonic-gate mutex_enter(&pidlock);
57670Sstevel@tonic-gate sp = zone->zone_zsched->p_sessp;
57682712Snn35248 sess_hold(zone->zone_zsched);
57690Sstevel@tonic-gate mutex_enter(&pp->p_lock);
57700Sstevel@tonic-gate pgexit(pp);
57712712Snn35248 sess_rele(pp->p_sessp, B_TRUE);
57720Sstevel@tonic-gate pp->p_sessp = sp;
57730Sstevel@tonic-gate pgjoin(pp, zone->zone_zsched->p_pidp);
57743247Sgjelinek
57753247Sgjelinek /*
57763792Sakolb * If any threads are scheduled to be placed on zone wait queue they
57773792Sakolb * should abandon the idea since the wait queue is changing.
57783792Sakolb * We need to be holding pidlock & p_lock to do this.
57793792Sakolb */
57803792Sakolb if ((t = pp->p_tlist) != NULL) {
57813792Sakolb do {
57823792Sakolb thread_lock(t);
57833792Sakolb /*
57843792Sakolb * Kick this thread so that he doesn't sit
57853792Sakolb * on a wrong wait queue.
57863792Sakolb */
57873792Sakolb if (ISWAITING(t))
57883792Sakolb setrun_locked(t);
57893792Sakolb
57903792Sakolb if (t->t_schedflag & TS_ANYWAITQ)
57913792Sakolb t->t_schedflag &= ~ TS_ANYWAITQ;
57923792Sakolb
57933792Sakolb thread_unlock(t);
57943792Sakolb } while ((t = t->t_forw) != pp->p_tlist);
57953792Sakolb }
57963792Sakolb
57973792Sakolb /*
57983247Sgjelinek * If there is a default scheduling class for the zone and it is not
57993247Sgjelinek * the class we are currently in, change all of the threads in the
58003247Sgjelinek * process to the new class. We need to be holding pidlock & p_lock
58013247Sgjelinek * when we call parmsset so this is a good place to do it.
58023247Sgjelinek */
58033247Sgjelinek if (zone->zone_defaultcid > 0 &&
58043247Sgjelinek zone->zone_defaultcid != curthread->t_cid) {
58053247Sgjelinek pcparms_t pcparms;
58063247Sgjelinek
58073247Sgjelinek pcparms.pc_cid = zone->zone_defaultcid;
58083247Sgjelinek pcparms.pc_clparms[0] = 0;
58093247Sgjelinek
58103247Sgjelinek /*
58113247Sgjelinek * If setting the class fails, we still want to enter the zone.
58123247Sgjelinek */
58133247Sgjelinek if ((t = pp->p_tlist) != NULL) {
58143247Sgjelinek do {
58153247Sgjelinek (void) parmsset(&pcparms, t);
58163247Sgjelinek } while ((t = t->t_forw) != pp->p_tlist);
58173247Sgjelinek }
58183247Sgjelinek }
58193247Sgjelinek
58200Sstevel@tonic-gate mutex_exit(&pp->p_lock);
58210Sstevel@tonic-gate mutex_exit(&pidlock);
58220Sstevel@tonic-gate
58230Sstevel@tonic-gate mutex_exit(&zonehash_lock);
58240Sstevel@tonic-gate /*
58250Sstevel@tonic-gate * We're firmly in the zone; let pools progress.
58260Sstevel@tonic-gate */
58270Sstevel@tonic-gate pool_unlock();
58280Sstevel@tonic-gate task_rele(oldtk);
58290Sstevel@tonic-gate /*
58300Sstevel@tonic-gate * We don't need to retain a hold on the zone since we already
58310Sstevel@tonic-gate * incremented zone_ntasks, so the zone isn't going anywhere.
58320Sstevel@tonic-gate */
58330Sstevel@tonic-gate zone_rele(zone);
58340Sstevel@tonic-gate
58350Sstevel@tonic-gate /*
58360Sstevel@tonic-gate * Chroot
58370Sstevel@tonic-gate */
58380Sstevel@tonic-gate vp = zone->zone_rootvp;
58390Sstevel@tonic-gate zone_chdir(vp, &PTOU(pp)->u_cdir, pp);
58400Sstevel@tonic-gate zone_chdir(vp, &PTOU(pp)->u_rdir, pp);
58410Sstevel@tonic-gate
58420Sstevel@tonic-gate /*
58430Sstevel@tonic-gate * Change process credentials
58440Sstevel@tonic-gate */
58450Sstevel@tonic-gate newcr = cralloc();
58460Sstevel@tonic-gate mutex_enter(&pp->p_crlock);
58470Sstevel@tonic-gate cr = pp->p_cred;
58480Sstevel@tonic-gate crcopy_to(cr, newcr);
58490Sstevel@tonic-gate crsetzone(newcr, zone);
58500Sstevel@tonic-gate pp->p_cred = newcr;
58510Sstevel@tonic-gate
58520Sstevel@tonic-gate /*
58530Sstevel@tonic-gate * Restrict all process privilege sets to zone limit
58540Sstevel@tonic-gate */
58550Sstevel@tonic-gate priv_intersect(zone->zone_privset, &CR_PPRIV(newcr));
58560Sstevel@tonic-gate priv_intersect(zone->zone_privset, &CR_EPRIV(newcr));
58570Sstevel@tonic-gate priv_intersect(zone->zone_privset, &CR_IPRIV(newcr));
58580Sstevel@tonic-gate priv_intersect(zone->zone_privset, &CR_LPRIV(newcr));
58590Sstevel@tonic-gate mutex_exit(&pp->p_crlock);
58600Sstevel@tonic-gate crset(pp, newcr);
58610Sstevel@tonic-gate
58620Sstevel@tonic-gate /*
58630Sstevel@tonic-gate * Adjust upcount to reflect zone entry.
58640Sstevel@tonic-gate */
58650Sstevel@tonic-gate uid = crgetruid(newcr);
58660Sstevel@tonic-gate mutex_enter(&pidlock);
58670Sstevel@tonic-gate upcount_dec(uid, GLOBAL_ZONEID);
58680Sstevel@tonic-gate upcount_inc(uid, zoneid);
58690Sstevel@tonic-gate mutex_exit(&pidlock);
58700Sstevel@tonic-gate
58710Sstevel@tonic-gate /*
58720Sstevel@tonic-gate * Set up core file path and content.
58730Sstevel@tonic-gate */
58740Sstevel@tonic-gate set_core_defaults();
58750Sstevel@tonic-gate
58760Sstevel@tonic-gate out:
58770Sstevel@tonic-gate /*
58780Sstevel@tonic-gate * Let the other lwps continue.
58790Sstevel@tonic-gate */
58800Sstevel@tonic-gate mutex_enter(&pp->p_lock);
58810Sstevel@tonic-gate if (curthread != pp->p_agenttp)
58820Sstevel@tonic-gate continuelwps(pp);
58830Sstevel@tonic-gate mutex_exit(&pp->p_lock);
58840Sstevel@tonic-gate
58850Sstevel@tonic-gate return (err != 0 ? set_errno(err) : 0);
58860Sstevel@tonic-gate }
58870Sstevel@tonic-gate
58880Sstevel@tonic-gate /*
58890Sstevel@tonic-gate * Systemcall entry point for zone_list(2).
58900Sstevel@tonic-gate *
58910Sstevel@tonic-gate * Processes running in a (non-global) zone only see themselves.
58921676Sjpk * On labeled systems, they see all zones whose label they dominate.
58930Sstevel@tonic-gate */
58940Sstevel@tonic-gate static int
zone_list(zoneid_t * zoneidlist,uint_t * numzones)58950Sstevel@tonic-gate zone_list(zoneid_t *zoneidlist, uint_t *numzones)
58960Sstevel@tonic-gate {
58970Sstevel@tonic-gate zoneid_t *zoneids;
58981769Scarlsonj zone_t *zone, *myzone;
58990Sstevel@tonic-gate uint_t user_nzones, real_nzones;
59001676Sjpk uint_t domi_nzones;
59011676Sjpk int error;
59020Sstevel@tonic-gate
59030Sstevel@tonic-gate if (copyin(numzones, &user_nzones, sizeof (uint_t)) != 0)
59040Sstevel@tonic-gate return (set_errno(EFAULT));
59050Sstevel@tonic-gate
59061769Scarlsonj myzone = curproc->p_zone;
59071769Scarlsonj if (myzone != global_zone) {
59081676Sjpk bslabel_t *mybslab;
59091676Sjpk
59101676Sjpk if (!is_system_labeled()) {
59111676Sjpk /* just return current zone */
59121676Sjpk real_nzones = domi_nzones = 1;
59131676Sjpk zoneids = kmem_alloc(sizeof (zoneid_t), KM_SLEEP);
59141769Scarlsonj zoneids[0] = myzone->zone_id;
59151676Sjpk } else {
59161676Sjpk /* return all zones that are dominated */
59171676Sjpk mutex_enter(&zonehash_lock);
59181676Sjpk real_nzones = zonecount;
59191676Sjpk domi_nzones = 0;
59201676Sjpk if (real_nzones > 0) {
59211676Sjpk zoneids = kmem_alloc(real_nzones *
59221676Sjpk sizeof (zoneid_t), KM_SLEEP);
59231769Scarlsonj mybslab = label2bslabel(myzone->zone_slabel);
59241676Sjpk for (zone = list_head(&zone_active);
59251676Sjpk zone != NULL;
59261676Sjpk zone = list_next(&zone_active, zone)) {
59271676Sjpk if (zone->zone_id == GLOBAL_ZONEID)
59281676Sjpk continue;
59291769Scarlsonj if (zone != myzone &&
59301769Scarlsonj (zone->zone_flags & ZF_IS_SCRATCH))
59311769Scarlsonj continue;
59321769Scarlsonj /*
59331769Scarlsonj * Note that a label always dominates
59341769Scarlsonj * itself, so myzone is always included
59351769Scarlsonj * in the list.
59361769Scarlsonj */
59371676Sjpk if (bldominates(mybslab,
59381676Sjpk label2bslabel(zone->zone_slabel))) {
59391676Sjpk zoneids[domi_nzones++] =
59401676Sjpk zone->zone_id;
59411676Sjpk }
59421676Sjpk }
59431676Sjpk }
59441676Sjpk mutex_exit(&zonehash_lock);
59451676Sjpk }
59460Sstevel@tonic-gate } else {
59470Sstevel@tonic-gate mutex_enter(&zonehash_lock);
59480Sstevel@tonic-gate real_nzones = zonecount;
59491676Sjpk domi_nzones = 0;
59501676Sjpk if (real_nzones > 0) {
59510Sstevel@tonic-gate zoneids = kmem_alloc(real_nzones * sizeof (zoneid_t),
59520Sstevel@tonic-gate KM_SLEEP);
59530Sstevel@tonic-gate for (zone = list_head(&zone_active); zone != NULL;
59540Sstevel@tonic-gate zone = list_next(&zone_active, zone))
59551676Sjpk zoneids[domi_nzones++] = zone->zone_id;
59561676Sjpk ASSERT(domi_nzones == real_nzones);
59570Sstevel@tonic-gate }
59580Sstevel@tonic-gate mutex_exit(&zonehash_lock);
59590Sstevel@tonic-gate }
59600Sstevel@tonic-gate
59611676Sjpk /*
59621676Sjpk * If user has allocated space for fewer entries than we found, then
59631676Sjpk * return only up to his limit. Either way, tell him exactly how many
59641676Sjpk * we found.
59651676Sjpk */
59661676Sjpk if (domi_nzones < user_nzones)
59671676Sjpk user_nzones = domi_nzones;
59681676Sjpk error = 0;
59691676Sjpk if (copyout(&domi_nzones, numzones, sizeof (uint_t)) != 0) {
59700Sstevel@tonic-gate error = EFAULT;
59711676Sjpk } else if (zoneidlist != NULL && user_nzones != 0) {
59720Sstevel@tonic-gate if (copyout(zoneids, zoneidlist,
59730Sstevel@tonic-gate user_nzones * sizeof (zoneid_t)) != 0)
59740Sstevel@tonic-gate error = EFAULT;
59750Sstevel@tonic-gate }
59760Sstevel@tonic-gate
59771676Sjpk if (real_nzones > 0)
59780Sstevel@tonic-gate kmem_free(zoneids, real_nzones * sizeof (zoneid_t));
59790Sstevel@tonic-gate
59801676Sjpk if (error != 0)
59810Sstevel@tonic-gate return (set_errno(error));
59820Sstevel@tonic-gate else
59830Sstevel@tonic-gate return (0);
59840Sstevel@tonic-gate }
59850Sstevel@tonic-gate
59860Sstevel@tonic-gate /*
59870Sstevel@tonic-gate * Systemcall entry point for zone_lookup(2).
59880Sstevel@tonic-gate *
59891676Sjpk * Non-global zones are only able to see themselves and (on labeled systems)
59901676Sjpk * the zones they dominate.
59910Sstevel@tonic-gate */
59920Sstevel@tonic-gate static zoneid_t
zone_lookup(const char * zone_name)59930Sstevel@tonic-gate zone_lookup(const char *zone_name)
59940Sstevel@tonic-gate {
59950Sstevel@tonic-gate char *kname;
59960Sstevel@tonic-gate zone_t *zone;
59970Sstevel@tonic-gate zoneid_t zoneid;
59980Sstevel@tonic-gate int err;
59990Sstevel@tonic-gate
60000Sstevel@tonic-gate if (zone_name == NULL) {
60010Sstevel@tonic-gate /* return caller's zone id */
60020Sstevel@tonic-gate return (getzoneid());
60030Sstevel@tonic-gate }
60040Sstevel@tonic-gate
60050Sstevel@tonic-gate kname = kmem_zalloc(ZONENAME_MAX, KM_SLEEP);
60060Sstevel@tonic-gate if ((err = copyinstr(zone_name, kname, ZONENAME_MAX, NULL)) != 0) {
60070Sstevel@tonic-gate kmem_free(kname, ZONENAME_MAX);
60080Sstevel@tonic-gate return (set_errno(err));
60090Sstevel@tonic-gate }
60100Sstevel@tonic-gate
60110Sstevel@tonic-gate mutex_enter(&zonehash_lock);
60120Sstevel@tonic-gate zone = zone_find_all_by_name(kname);
60130Sstevel@tonic-gate kmem_free(kname, ZONENAME_MAX);
60141676Sjpk /*
60151676Sjpk * In a non-global zone, can only lookup global and own name.
60161676Sjpk * In Trusted Extensions zone label dominance rules apply.
60171676Sjpk */
60181676Sjpk if (zone == NULL ||
60191676Sjpk zone_status_get(zone) < ZONE_IS_READY ||
60201676Sjpk !zone_list_access(zone)) {
60210Sstevel@tonic-gate mutex_exit(&zonehash_lock);
60220Sstevel@tonic-gate return (set_errno(EINVAL));
60231676Sjpk } else {
60241676Sjpk zoneid = zone->zone_id;
60251676Sjpk mutex_exit(&zonehash_lock);
60261676Sjpk return (zoneid);
60270Sstevel@tonic-gate }
60280Sstevel@tonic-gate }
60290Sstevel@tonic-gate
6030813Sdp static int
zone_version(int * version_arg)6031813Sdp zone_version(int *version_arg)
6032813Sdp {
6033813Sdp int version = ZONE_SYSCALL_API_VERSION;
6034813Sdp
6035813Sdp if (copyout(&version, version_arg, sizeof (int)) != 0)
6036813Sdp return (set_errno(EFAULT));
6037813Sdp return (0);
6038813Sdp }
6039813Sdp
60400Sstevel@tonic-gate /* ARGSUSED */
60410Sstevel@tonic-gate long
zone(int cmd,void * arg1,void * arg2,void * arg3,void * arg4)6042789Sahrens zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4)
60430Sstevel@tonic-gate {
60440Sstevel@tonic-gate zone_def zs;
604510616SSebastien.Roy@Sun.COM int err;
60460Sstevel@tonic-gate
60470Sstevel@tonic-gate switch (cmd) {
60480Sstevel@tonic-gate case ZONE_CREATE:
60490Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) {
60500Sstevel@tonic-gate if (copyin(arg1, &zs, sizeof (zone_def))) {
60510Sstevel@tonic-gate return (set_errno(EFAULT));
60520Sstevel@tonic-gate }
60530Sstevel@tonic-gate } else {
60540Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
60550Sstevel@tonic-gate zone_def32 zs32;
60560Sstevel@tonic-gate
60570Sstevel@tonic-gate if (copyin(arg1, &zs32, sizeof (zone_def32))) {
60580Sstevel@tonic-gate return (set_errno(EFAULT));
60590Sstevel@tonic-gate }
60600Sstevel@tonic-gate zs.zone_name =
60610Sstevel@tonic-gate (const char *)(unsigned long)zs32.zone_name;
60620Sstevel@tonic-gate zs.zone_root =
60630Sstevel@tonic-gate (const char *)(unsigned long)zs32.zone_root;
60640Sstevel@tonic-gate zs.zone_privs =
60650Sstevel@tonic-gate (const struct priv_set *)
60660Sstevel@tonic-gate (unsigned long)zs32.zone_privs;
60671409Sdp zs.zone_privssz = zs32.zone_privssz;
60680Sstevel@tonic-gate zs.rctlbuf = (caddr_t)(unsigned long)zs32.rctlbuf;
60690Sstevel@tonic-gate zs.rctlbufsz = zs32.rctlbufsz;
6070789Sahrens zs.zfsbuf = (caddr_t)(unsigned long)zs32.zfsbuf;
6071789Sahrens zs.zfsbufsz = zs32.zfsbufsz;
60720Sstevel@tonic-gate zs.extended_error =
60730Sstevel@tonic-gate (int *)(unsigned long)zs32.extended_error;
60741676Sjpk zs.match = zs32.match;
60751676Sjpk zs.doi = zs32.doi;
60761676Sjpk zs.label = (const bslabel_t *)(uintptr_t)zs32.label;
60773448Sdh155122 zs.flags = zs32.flags;
60780Sstevel@tonic-gate #else
60790Sstevel@tonic-gate panic("get_udatamodel() returned bogus result\n");
60800Sstevel@tonic-gate #endif
60810Sstevel@tonic-gate }
60820Sstevel@tonic-gate
60830Sstevel@tonic-gate return (zone_create(zs.zone_name, zs.zone_root,
6084813Sdp zs.zone_privs, zs.zone_privssz,
6085813Sdp (caddr_t)zs.rctlbuf, zs.rctlbufsz,
6086813Sdp (caddr_t)zs.zfsbuf, zs.zfsbufsz,
60871676Sjpk zs.extended_error, zs.match, zs.doi,
60883448Sdh155122 zs.label, zs.flags));
60890Sstevel@tonic-gate case ZONE_BOOT:
60902267Sdp return (zone_boot((zoneid_t)(uintptr_t)arg1));
60910Sstevel@tonic-gate case ZONE_DESTROY:
60920Sstevel@tonic-gate return (zone_destroy((zoneid_t)(uintptr_t)arg1));
60930Sstevel@tonic-gate case ZONE_GETATTR:
60940Sstevel@tonic-gate return (zone_getattr((zoneid_t)(uintptr_t)arg1,
60950Sstevel@tonic-gate (int)(uintptr_t)arg2, arg3, (size_t)arg4));
60962267Sdp case ZONE_SETATTR:
60972267Sdp return (zone_setattr((zoneid_t)(uintptr_t)arg1,
60982267Sdp (int)(uintptr_t)arg2, arg3, (size_t)arg4));
60990Sstevel@tonic-gate case ZONE_ENTER:
61000Sstevel@tonic-gate return (zone_enter((zoneid_t)(uintptr_t)arg1));
61010Sstevel@tonic-gate case ZONE_LIST:
61020Sstevel@tonic-gate return (zone_list((zoneid_t *)arg1, (uint_t *)arg2));
61030Sstevel@tonic-gate case ZONE_SHUTDOWN:
61040Sstevel@tonic-gate return (zone_shutdown((zoneid_t)(uintptr_t)arg1));
61050Sstevel@tonic-gate case ZONE_LOOKUP:
61060Sstevel@tonic-gate return (zone_lookup((const char *)arg1));
6107813Sdp case ZONE_VERSION:
6108813Sdp return (zone_version((int *)arg1));
61093448Sdh155122 case ZONE_ADD_DATALINK:
61103448Sdh155122 return (zone_add_datalink((zoneid_t)(uintptr_t)arg1,
611110616SSebastien.Roy@Sun.COM (datalink_id_t)(uintptr_t)arg2));
61123448Sdh155122 case ZONE_DEL_DATALINK:
61133448Sdh155122 return (zone_remove_datalink((zoneid_t)(uintptr_t)arg1,
611410616SSebastien.Roy@Sun.COM (datalink_id_t)(uintptr_t)arg2));
611510616SSebastien.Roy@Sun.COM case ZONE_CHECK_DATALINK: {
611610616SSebastien.Roy@Sun.COM zoneid_t zoneid;
611710616SSebastien.Roy@Sun.COM boolean_t need_copyout;
611810616SSebastien.Roy@Sun.COM
611910616SSebastien.Roy@Sun.COM if (copyin(arg1, &zoneid, sizeof (zoneid)) != 0)
612010616SSebastien.Roy@Sun.COM return (EFAULT);
612110616SSebastien.Roy@Sun.COM need_copyout = (zoneid == ALL_ZONES);
612210616SSebastien.Roy@Sun.COM err = zone_check_datalink(&zoneid,
612310616SSebastien.Roy@Sun.COM (datalink_id_t)(uintptr_t)arg2);
612410616SSebastien.Roy@Sun.COM if (err == 0 && need_copyout) {
612510616SSebastien.Roy@Sun.COM if (copyout(&zoneid, arg1, sizeof (zoneid)) != 0)
612610616SSebastien.Roy@Sun.COM err = EFAULT;
612710616SSebastien.Roy@Sun.COM }
612810616SSebastien.Roy@Sun.COM return (err == 0 ? 0 : set_errno(err));
612910616SSebastien.Roy@Sun.COM }
61303448Sdh155122 case ZONE_LIST_DATALINK:
61313448Sdh155122 return (zone_list_datalink((zoneid_t)(uintptr_t)arg1,
613210616SSebastien.Roy@Sun.COM (int *)arg2, (datalink_id_t *)(uintptr_t)arg3));
61330Sstevel@tonic-gate default:
61340Sstevel@tonic-gate return (set_errno(EINVAL));
61350Sstevel@tonic-gate }
61360Sstevel@tonic-gate }
61370Sstevel@tonic-gate
61380Sstevel@tonic-gate struct zarg {
61390Sstevel@tonic-gate zone_t *zone;
61400Sstevel@tonic-gate zone_cmd_arg_t arg;
61410Sstevel@tonic-gate };
61420Sstevel@tonic-gate
61430Sstevel@tonic-gate static int
zone_lookup_door(const char * zone_name,door_handle_t * doorp)61440Sstevel@tonic-gate zone_lookup_door(const char *zone_name, door_handle_t *doorp)
61450Sstevel@tonic-gate {
61460Sstevel@tonic-gate char *buf;
61470Sstevel@tonic-gate size_t buflen;
61480Sstevel@tonic-gate int error;
61490Sstevel@tonic-gate
61500Sstevel@tonic-gate buflen = sizeof (ZONE_DOOR_PATH) + strlen(zone_name);
61510Sstevel@tonic-gate buf = kmem_alloc(buflen, KM_SLEEP);
61520Sstevel@tonic-gate (void) snprintf(buf, buflen, ZONE_DOOR_PATH, zone_name);
61530Sstevel@tonic-gate error = door_ki_open(buf, doorp);
61540Sstevel@tonic-gate kmem_free(buf, buflen);
61550Sstevel@tonic-gate return (error);
61560Sstevel@tonic-gate }
61570Sstevel@tonic-gate
61580Sstevel@tonic-gate static void
zone_release_door(door_handle_t * doorp)61590Sstevel@tonic-gate zone_release_door(door_handle_t *doorp)
61600Sstevel@tonic-gate {
61610Sstevel@tonic-gate door_ki_rele(*doorp);
61620Sstevel@tonic-gate *doorp = NULL;
61630Sstevel@tonic-gate }
61640Sstevel@tonic-gate
61650Sstevel@tonic-gate static void
zone_ki_call_zoneadmd(struct zarg * zargp)61660Sstevel@tonic-gate zone_ki_call_zoneadmd(struct zarg *zargp)
61670Sstevel@tonic-gate {
61680Sstevel@tonic-gate door_handle_t door = NULL;
61690Sstevel@tonic-gate door_arg_t darg, save_arg;
61700Sstevel@tonic-gate char *zone_name;
61710Sstevel@tonic-gate size_t zone_namelen;
61720Sstevel@tonic-gate zoneid_t zoneid;
61730Sstevel@tonic-gate zone_t *zone;
61740Sstevel@tonic-gate zone_cmd_arg_t arg;
61750Sstevel@tonic-gate uint64_t uniqid;
61760Sstevel@tonic-gate size_t size;
61770Sstevel@tonic-gate int error;
61780Sstevel@tonic-gate int retry;
61790Sstevel@tonic-gate
61800Sstevel@tonic-gate zone = zargp->zone;
61810Sstevel@tonic-gate arg = zargp->arg;
61820Sstevel@tonic-gate kmem_free(zargp, sizeof (*zargp));
61830Sstevel@tonic-gate
61840Sstevel@tonic-gate zone_namelen = strlen(zone->zone_name) + 1;
61850Sstevel@tonic-gate zone_name = kmem_alloc(zone_namelen, KM_SLEEP);
61860Sstevel@tonic-gate bcopy(zone->zone_name, zone_name, zone_namelen);
61870Sstevel@tonic-gate zoneid = zone->zone_id;
61880Sstevel@tonic-gate uniqid = zone->zone_uniqid;
61890Sstevel@tonic-gate /*
61900Sstevel@tonic-gate * zoneadmd may be down, but at least we can empty out the zone.
61910Sstevel@tonic-gate * We can ignore the return value of zone_empty() since we're called
61920Sstevel@tonic-gate * from a kernel thread and know we won't be delivered any signals.
61930Sstevel@tonic-gate */
61940Sstevel@tonic-gate ASSERT(curproc == &p0);
61950Sstevel@tonic-gate (void) zone_empty(zone);
61960Sstevel@tonic-gate ASSERT(zone_status_get(zone) >= ZONE_IS_EMPTY);
61970Sstevel@tonic-gate zone_rele(zone);
61980Sstevel@tonic-gate
61990Sstevel@tonic-gate size = sizeof (arg);
62000Sstevel@tonic-gate darg.rbuf = (char *)&arg;
62010Sstevel@tonic-gate darg.data_ptr = (char *)&arg;
62020Sstevel@tonic-gate darg.rsize = size;
62030Sstevel@tonic-gate darg.data_size = size;
62040Sstevel@tonic-gate darg.desc_ptr = NULL;
62050Sstevel@tonic-gate darg.desc_num = 0;
62060Sstevel@tonic-gate
62070Sstevel@tonic-gate save_arg = darg;
62080Sstevel@tonic-gate /*
62090Sstevel@tonic-gate * Since we're not holding a reference to the zone, any number of
62100Sstevel@tonic-gate * things can go wrong, including the zone disappearing before we get a
62110Sstevel@tonic-gate * chance to talk to zoneadmd.
62120Sstevel@tonic-gate */
62130Sstevel@tonic-gate for (retry = 0; /* forever */; retry++) {
62140Sstevel@tonic-gate if (door == NULL &&
62150Sstevel@tonic-gate (error = zone_lookup_door(zone_name, &door)) != 0) {
62160Sstevel@tonic-gate goto next;
62170Sstevel@tonic-gate }
62180Sstevel@tonic-gate ASSERT(door != NULL);
62190Sstevel@tonic-gate
62206997Sjwadams if ((error = door_ki_upcall_limited(door, &darg, NULL,
62216997Sjwadams SIZE_MAX, 0)) == 0) {
62220Sstevel@tonic-gate break;
62230Sstevel@tonic-gate }
62240Sstevel@tonic-gate switch (error) {
62250Sstevel@tonic-gate case EINTR:
62260Sstevel@tonic-gate /* FALLTHROUGH */
62270Sstevel@tonic-gate case EAGAIN: /* process may be forking */
62280Sstevel@tonic-gate /*
62290Sstevel@tonic-gate * Back off for a bit
62300Sstevel@tonic-gate */
62310Sstevel@tonic-gate break;
62320Sstevel@tonic-gate case EBADF:
62330Sstevel@tonic-gate zone_release_door(&door);
62340Sstevel@tonic-gate if (zone_lookup_door(zone_name, &door) != 0) {
62350Sstevel@tonic-gate /*
62360Sstevel@tonic-gate * zoneadmd may be dead, but it may come back to
62370Sstevel@tonic-gate * life later.
62380Sstevel@tonic-gate */
62390Sstevel@tonic-gate break;
62400Sstevel@tonic-gate }
62410Sstevel@tonic-gate break;
62420Sstevel@tonic-gate default:
62430Sstevel@tonic-gate cmn_err(CE_WARN,
62440Sstevel@tonic-gate "zone_ki_call_zoneadmd: door_ki_upcall error %d\n",
62450Sstevel@tonic-gate error);
62460Sstevel@tonic-gate goto out;
62470Sstevel@tonic-gate }
62480Sstevel@tonic-gate next:
62490Sstevel@tonic-gate /*
62500Sstevel@tonic-gate * If this isn't the same zone_t that we originally had in mind,
62510Sstevel@tonic-gate * then this is the same as if two kadmin requests come in at
62520Sstevel@tonic-gate * the same time: the first one wins. This means we lose, so we
62530Sstevel@tonic-gate * bail.
62540Sstevel@tonic-gate */
62550Sstevel@tonic-gate if ((zone = zone_find_by_id(zoneid)) == NULL) {
62560Sstevel@tonic-gate /*
62570Sstevel@tonic-gate * Problem is solved.
62580Sstevel@tonic-gate */
62590Sstevel@tonic-gate break;
62600Sstevel@tonic-gate }
62610Sstevel@tonic-gate if (zone->zone_uniqid != uniqid) {
62620Sstevel@tonic-gate /*
62630Sstevel@tonic-gate * zoneid recycled
62640Sstevel@tonic-gate */
62650Sstevel@tonic-gate zone_rele(zone);
62660Sstevel@tonic-gate break;
62670Sstevel@tonic-gate }
62680Sstevel@tonic-gate /*
62690Sstevel@tonic-gate * We could zone_status_timedwait(), but there doesn't seem to
62700Sstevel@tonic-gate * be much point in doing that (plus, it would mean that
62710Sstevel@tonic-gate * zone_free() isn't called until this thread exits).
62720Sstevel@tonic-gate */
62730Sstevel@tonic-gate zone_rele(zone);
62740Sstevel@tonic-gate delay(hz);
62750Sstevel@tonic-gate darg = save_arg;
62760Sstevel@tonic-gate }
62770Sstevel@tonic-gate out:
62780Sstevel@tonic-gate if (door != NULL) {
62790Sstevel@tonic-gate zone_release_door(&door);
62800Sstevel@tonic-gate }
62810Sstevel@tonic-gate kmem_free(zone_name, zone_namelen);
62820Sstevel@tonic-gate thread_exit();
62830Sstevel@tonic-gate }
62840Sstevel@tonic-gate
62850Sstevel@tonic-gate /*
62862267Sdp * Entry point for uadmin() to tell the zone to go away or reboot. Analog to
62872267Sdp * kadmin(). The caller is a process in the zone.
62880Sstevel@tonic-gate *
62890Sstevel@tonic-gate * In order to shutdown the zone, we will hand off control to zoneadmd
62900Sstevel@tonic-gate * (running in the global zone) via a door. We do a half-hearted job at
62910Sstevel@tonic-gate * killing all processes in the zone, create a kernel thread to contact
62920Sstevel@tonic-gate * zoneadmd, and make note of the "uniqid" of the zone. The uniqid is
62930Sstevel@tonic-gate * a form of generation number used to let zoneadmd (as well as
62940Sstevel@tonic-gate * zone_destroy()) know exactly which zone they're re talking about.
62950Sstevel@tonic-gate */
62960Sstevel@tonic-gate int
zone_kadmin(int cmd,int fcn,const char * mdep,cred_t * credp)62972267Sdp zone_kadmin(int cmd, int fcn, const char *mdep, cred_t *credp)
62980Sstevel@tonic-gate {
62990Sstevel@tonic-gate struct zarg *zargp;
63000Sstevel@tonic-gate zone_cmd_t zcmd;
63010Sstevel@tonic-gate zone_t *zone;
63020Sstevel@tonic-gate
63030Sstevel@tonic-gate zone = curproc->p_zone;
63040Sstevel@tonic-gate ASSERT(getzoneid() != GLOBAL_ZONEID);
63050Sstevel@tonic-gate
63060Sstevel@tonic-gate switch (cmd) {
63070Sstevel@tonic-gate case A_SHUTDOWN:
63080Sstevel@tonic-gate switch (fcn) {
63090Sstevel@tonic-gate case AD_HALT:
63100Sstevel@tonic-gate case AD_POWEROFF:
63110Sstevel@tonic-gate zcmd = Z_HALT;
63120Sstevel@tonic-gate break;
63130Sstevel@tonic-gate case AD_BOOT:
63140Sstevel@tonic-gate zcmd = Z_REBOOT;
63150Sstevel@tonic-gate break;
63160Sstevel@tonic-gate case AD_IBOOT:
63170Sstevel@tonic-gate case AD_SBOOT:
63180Sstevel@tonic-gate case AD_SIBOOT:
63190Sstevel@tonic-gate case AD_NOSYNC:
63200Sstevel@tonic-gate return (ENOTSUP);
63210Sstevel@tonic-gate default:
63220Sstevel@tonic-gate return (EINVAL);
63230Sstevel@tonic-gate }
63240Sstevel@tonic-gate break;
63250Sstevel@tonic-gate case A_REBOOT:
63260Sstevel@tonic-gate zcmd = Z_REBOOT;
63270Sstevel@tonic-gate break;
63280Sstevel@tonic-gate case A_FTRACE:
63290Sstevel@tonic-gate case A_REMOUNT:
63300Sstevel@tonic-gate case A_FREEZE:
63310Sstevel@tonic-gate case A_DUMP:
63329160SSherry.Moore@Sun.COM case A_CONFIG:
63330Sstevel@tonic-gate return (ENOTSUP);
63340Sstevel@tonic-gate default:
63350Sstevel@tonic-gate ASSERT(cmd != A_SWAPCTL); /* handled by uadmin() */
63360Sstevel@tonic-gate return (EINVAL);
63370Sstevel@tonic-gate }
63380Sstevel@tonic-gate
63390Sstevel@tonic-gate if (secpolicy_zone_admin(credp, B_FALSE))
63400Sstevel@tonic-gate return (EPERM);
63410Sstevel@tonic-gate mutex_enter(&zone_status_lock);
63422267Sdp
63430Sstevel@tonic-gate /*
63440Sstevel@tonic-gate * zone_status can't be ZONE_IS_EMPTY or higher since curproc
63450Sstevel@tonic-gate * is in the zone.
63460Sstevel@tonic-gate */
63470Sstevel@tonic-gate ASSERT(zone_status_get(zone) < ZONE_IS_EMPTY);
63480Sstevel@tonic-gate if (zone_status_get(zone) > ZONE_IS_RUNNING) {
63490Sstevel@tonic-gate /*
63500Sstevel@tonic-gate * This zone is already on its way down.
63510Sstevel@tonic-gate */
63520Sstevel@tonic-gate mutex_exit(&zone_status_lock);
63530Sstevel@tonic-gate return (0);
63540Sstevel@tonic-gate }
63550Sstevel@tonic-gate /*
63560Sstevel@tonic-gate * Prevent future zone_enter()s
63570Sstevel@tonic-gate */
63580Sstevel@tonic-gate zone_status_set(zone, ZONE_IS_SHUTTING_DOWN);
63590Sstevel@tonic-gate mutex_exit(&zone_status_lock);
63600Sstevel@tonic-gate
63610Sstevel@tonic-gate /*
63620Sstevel@tonic-gate * Kill everyone now and call zoneadmd later.
63630Sstevel@tonic-gate * zone_ki_call_zoneadmd() will do a more thorough job of this
63640Sstevel@tonic-gate * later.
63650Sstevel@tonic-gate */
63660Sstevel@tonic-gate killall(zone->zone_id);
63670Sstevel@tonic-gate /*
63680Sstevel@tonic-gate * Now, create the thread to contact zoneadmd and do the rest of the
63690Sstevel@tonic-gate * work. This thread can't be created in our zone otherwise
63700Sstevel@tonic-gate * zone_destroy() would deadlock.
63710Sstevel@tonic-gate */
63722267Sdp zargp = kmem_zalloc(sizeof (*zargp), KM_SLEEP);
63730Sstevel@tonic-gate zargp->arg.cmd = zcmd;
63740Sstevel@tonic-gate zargp->arg.uniqid = zone->zone_uniqid;
63752267Sdp zargp->zone = zone;
63760Sstevel@tonic-gate (void) strcpy(zargp->arg.locale, "C");
63772267Sdp /* mdep was already copied in for us by uadmin */
63782267Sdp if (mdep != NULL)
63792267Sdp (void) strlcpy(zargp->arg.bootbuf, mdep,
63802267Sdp sizeof (zargp->arg.bootbuf));
63812267Sdp zone_hold(zone);
63820Sstevel@tonic-gate
63830Sstevel@tonic-gate (void) thread_create(NULL, 0, zone_ki_call_zoneadmd, zargp, 0, &p0,
63840Sstevel@tonic-gate TS_RUN, minclsyspri);
63850Sstevel@tonic-gate exit(CLD_EXITED, 0);
63860Sstevel@tonic-gate
63870Sstevel@tonic-gate return (EINVAL);
63880Sstevel@tonic-gate }
63890Sstevel@tonic-gate
63900Sstevel@tonic-gate /*
63910Sstevel@tonic-gate * Entry point so kadmin(A_SHUTDOWN, ...) can set the global zone's
63920Sstevel@tonic-gate * status to ZONE_IS_SHUTTING_DOWN.
63938364SJordan.Vaughan@Sun.com *
63948364SJordan.Vaughan@Sun.com * This function also shuts down all running zones to ensure that they won't
63958364SJordan.Vaughan@Sun.com * fork new processes.
63960Sstevel@tonic-gate */
63970Sstevel@tonic-gate void
zone_shutdown_global(void)63980Sstevel@tonic-gate zone_shutdown_global(void)
63990Sstevel@tonic-gate {
64008364SJordan.Vaughan@Sun.com zone_t *current_zonep;
64018364SJordan.Vaughan@Sun.com
64028364SJordan.Vaughan@Sun.com ASSERT(INGLOBALZONE(curproc));
64038364SJordan.Vaughan@Sun.com mutex_enter(&zonehash_lock);
64040Sstevel@tonic-gate mutex_enter(&zone_status_lock);
64058364SJordan.Vaughan@Sun.com
64068364SJordan.Vaughan@Sun.com /* Modify the global zone's status first. */
64070Sstevel@tonic-gate ASSERT(zone_status_get(global_zone) == ZONE_IS_RUNNING);
64080Sstevel@tonic-gate zone_status_set(global_zone, ZONE_IS_SHUTTING_DOWN);
64098364SJordan.Vaughan@Sun.com
64108364SJordan.Vaughan@Sun.com /*
64118364SJordan.Vaughan@Sun.com * Now change the states of all running zones to ZONE_IS_SHUTTING_DOWN.
64128364SJordan.Vaughan@Sun.com * We don't mark all zones with ZONE_IS_SHUTTING_DOWN because doing so
64138364SJordan.Vaughan@Sun.com * could cause assertions to fail (e.g., assertions about a zone's
64148364SJordan.Vaughan@Sun.com * state during initialization, readying, or booting) or produce races.
64158364SJordan.Vaughan@Sun.com * We'll let threads continue to initialize and ready new zones: they'll
64168364SJordan.Vaughan@Sun.com * fail to boot the new zones when they see that the global zone is
64178364SJordan.Vaughan@Sun.com * shutting down.
64188364SJordan.Vaughan@Sun.com */
64198364SJordan.Vaughan@Sun.com for (current_zonep = list_head(&zone_active); current_zonep != NULL;
64208364SJordan.Vaughan@Sun.com current_zonep = list_next(&zone_active, current_zonep)) {
64218364SJordan.Vaughan@Sun.com if (zone_status_get(current_zonep) == ZONE_IS_RUNNING)
64228364SJordan.Vaughan@Sun.com zone_status_set(current_zonep, ZONE_IS_SHUTTING_DOWN);
64238364SJordan.Vaughan@Sun.com }
64240Sstevel@tonic-gate mutex_exit(&zone_status_lock);
64258364SJordan.Vaughan@Sun.com mutex_exit(&zonehash_lock);
64260Sstevel@tonic-gate }
6427789Sahrens
6428789Sahrens /*
6429789Sahrens * Returns true if the named dataset is visible in the current zone.
6430789Sahrens * The 'write' parameter is set to 1 if the dataset is also writable.
6431789Sahrens */
6432789Sahrens int
zone_dataset_visible(const char * dataset,int * write)6433789Sahrens zone_dataset_visible(const char *dataset, int *write)
6434789Sahrens {
643511850SSanjeev.Bagewadi@Sun.COM static int zfstype = -1;
6436789Sahrens zone_dataset_t *zd;
6437789Sahrens size_t len;
6438789Sahrens zone_t *zone = curproc->p_zone;
643911850SSanjeev.Bagewadi@Sun.COM const char *name = NULL;
644011850SSanjeev.Bagewadi@Sun.COM vfs_t *vfsp = NULL;
6441789Sahrens
6442789Sahrens if (dataset[0] == '\0')
6443789Sahrens return (0);
6444789Sahrens
6445789Sahrens /*
6446789Sahrens * Walk the list once, looking for datasets which match exactly, or
6447789Sahrens * specify a dataset underneath an exported dataset. If found, return
6448789Sahrens * true and note that it is writable.
6449789Sahrens */
6450789Sahrens for (zd = list_head(&zone->zone_datasets); zd != NULL;
6451789Sahrens zd = list_next(&zone->zone_datasets, zd)) {
6452789Sahrens
6453789Sahrens len = strlen(zd->zd_dataset);
6454789Sahrens if (strlen(dataset) >= len &&
6455789Sahrens bcmp(dataset, zd->zd_dataset, len) == 0 &&
6456816Smaybee (dataset[len] == '\0' || dataset[len] == '/' ||
6457816Smaybee dataset[len] == '@')) {
6458789Sahrens if (write)
6459789Sahrens *write = 1;
6460789Sahrens return (1);
6461789Sahrens }
6462789Sahrens }
6463789Sahrens
6464789Sahrens /*
6465789Sahrens * Walk the list a second time, searching for datasets which are parents
6466789Sahrens * of exported datasets. These should be visible, but read-only.
6467789Sahrens *
6468789Sahrens * Note that we also have to support forms such as 'pool/dataset/', with
6469789Sahrens * a trailing slash.
6470789Sahrens */
6471789Sahrens for (zd = list_head(&zone->zone_datasets); zd != NULL;
6472789Sahrens zd = list_next(&zone->zone_datasets, zd)) {
6473789Sahrens
6474789Sahrens len = strlen(dataset);
6475789Sahrens if (dataset[len - 1] == '/')
6476789Sahrens len--; /* Ignore trailing slash */
6477789Sahrens if (len < strlen(zd->zd_dataset) &&
6478789Sahrens bcmp(dataset, zd->zd_dataset, len) == 0 &&
6479789Sahrens zd->zd_dataset[len] == '/') {
6480789Sahrens if (write)
6481789Sahrens *write = 0;
6482789Sahrens return (1);
6483789Sahrens }
6484789Sahrens }
6485789Sahrens
648611850SSanjeev.Bagewadi@Sun.COM /*
648711850SSanjeev.Bagewadi@Sun.COM * We reach here if the given dataset is not found in the zone_dataset
648811850SSanjeev.Bagewadi@Sun.COM * list. Check if this dataset was added as a filesystem (ie. "add fs")
648911850SSanjeev.Bagewadi@Sun.COM * instead of delegation. For this we search for the dataset in the
649011850SSanjeev.Bagewadi@Sun.COM * zone_vfslist of this zone. If found, return true and note that it is
649111850SSanjeev.Bagewadi@Sun.COM * not writable.
649211850SSanjeev.Bagewadi@Sun.COM */
649311850SSanjeev.Bagewadi@Sun.COM
649411850SSanjeev.Bagewadi@Sun.COM /*
649511850SSanjeev.Bagewadi@Sun.COM * Initialize zfstype if it is not initialized yet.
649611850SSanjeev.Bagewadi@Sun.COM */
649711850SSanjeev.Bagewadi@Sun.COM if (zfstype == -1) {
649811850SSanjeev.Bagewadi@Sun.COM struct vfssw *vswp = vfs_getvfssw("zfs");
649911850SSanjeev.Bagewadi@Sun.COM zfstype = vswp - vfssw;
650011850SSanjeev.Bagewadi@Sun.COM vfs_unrefvfssw(vswp);
650111850SSanjeev.Bagewadi@Sun.COM }
650211850SSanjeev.Bagewadi@Sun.COM
650311850SSanjeev.Bagewadi@Sun.COM vfs_list_read_lock();
650411850SSanjeev.Bagewadi@Sun.COM vfsp = zone->zone_vfslist;
650511850SSanjeev.Bagewadi@Sun.COM do {
650611850SSanjeev.Bagewadi@Sun.COM ASSERT(vfsp);
650711850SSanjeev.Bagewadi@Sun.COM if (vfsp->vfs_fstype == zfstype) {
650811850SSanjeev.Bagewadi@Sun.COM name = refstr_value(vfsp->vfs_resource);
650911850SSanjeev.Bagewadi@Sun.COM
651011850SSanjeev.Bagewadi@Sun.COM /*
651111850SSanjeev.Bagewadi@Sun.COM * Check if we have an exact match.
651211850SSanjeev.Bagewadi@Sun.COM */
651311850SSanjeev.Bagewadi@Sun.COM if (strcmp(dataset, name) == 0) {
651411850SSanjeev.Bagewadi@Sun.COM vfs_list_unlock();
651511850SSanjeev.Bagewadi@Sun.COM if (write)
651611850SSanjeev.Bagewadi@Sun.COM *write = 0;
651711850SSanjeev.Bagewadi@Sun.COM return (1);
651811850SSanjeev.Bagewadi@Sun.COM }
651911850SSanjeev.Bagewadi@Sun.COM /*
652011850SSanjeev.Bagewadi@Sun.COM * We need to check if we are looking for parents of
652111850SSanjeev.Bagewadi@Sun.COM * a dataset. These should be visible, but read-only.
652211850SSanjeev.Bagewadi@Sun.COM */
652311850SSanjeev.Bagewadi@Sun.COM len = strlen(dataset);
652411850SSanjeev.Bagewadi@Sun.COM if (dataset[len - 1] == '/')
652511850SSanjeev.Bagewadi@Sun.COM len--;
652611850SSanjeev.Bagewadi@Sun.COM
652711850SSanjeev.Bagewadi@Sun.COM if (len < strlen(name) &&
652811850SSanjeev.Bagewadi@Sun.COM bcmp(dataset, name, len) == 0 && name[len] == '/') {
652911850SSanjeev.Bagewadi@Sun.COM vfs_list_unlock();
653011850SSanjeev.Bagewadi@Sun.COM if (write)
653111850SSanjeev.Bagewadi@Sun.COM *write = 0;
653211850SSanjeev.Bagewadi@Sun.COM return (1);
653311850SSanjeev.Bagewadi@Sun.COM }
653411850SSanjeev.Bagewadi@Sun.COM }
653511850SSanjeev.Bagewadi@Sun.COM vfsp = vfsp->vfs_zone_next;
653611850SSanjeev.Bagewadi@Sun.COM } while (vfsp != zone->zone_vfslist);
653711850SSanjeev.Bagewadi@Sun.COM
653811850SSanjeev.Bagewadi@Sun.COM vfs_list_unlock();
6539789Sahrens return (0);
6540789Sahrens }
65411676Sjpk
65421676Sjpk /*
65431676Sjpk * zone_find_by_any_path() -
65441676Sjpk *
65451676Sjpk * kernel-private routine similar to zone_find_by_path(), but which
65461676Sjpk * effectively compares against zone paths rather than zonerootpath
65471676Sjpk * (i.e., the last component of zonerootpaths, which should be "root/",
65481676Sjpk * are not compared.) This is done in order to accurately identify all
65491676Sjpk * paths, whether zone-visible or not, including those which are parallel
65501676Sjpk * to /root/, such as /dev/, /home/, etc...
65511676Sjpk *
65521676Sjpk * If the specified path does not fall under any zone path then global
65531676Sjpk * zone is returned.
65541676Sjpk *
65551676Sjpk * The treat_abs parameter indicates whether the path should be treated as
65561676Sjpk * an absolute path although it does not begin with "/". (This supports
65571676Sjpk * nfs mount syntax such as host:any/path.)
65581676Sjpk *
65591676Sjpk * The caller is responsible for zone_rele of the returned zone.
65601676Sjpk */
65611676Sjpk zone_t *
zone_find_by_any_path(const char * path,boolean_t treat_abs)65621676Sjpk zone_find_by_any_path(const char *path, boolean_t treat_abs)
65631676Sjpk {
65641676Sjpk zone_t *zone;
65651676Sjpk int path_offset = 0;
65661676Sjpk
65671676Sjpk if (path == NULL) {
65681676Sjpk zone_hold(global_zone);
65691676Sjpk return (global_zone);
65701676Sjpk }
65711676Sjpk
65721676Sjpk if (*path != '/') {
65731676Sjpk ASSERT(treat_abs);
65741676Sjpk path_offset = 1;
65751676Sjpk }
65761676Sjpk
65771676Sjpk mutex_enter(&zonehash_lock);
65781676Sjpk for (zone = list_head(&zone_active); zone != NULL;
65791676Sjpk zone = list_next(&zone_active, zone)) {
65801676Sjpk char *c;
65811676Sjpk size_t pathlen;
65821876Smp46848 char *rootpath_start;
65831676Sjpk
65841676Sjpk if (zone == global_zone) /* skip global zone */
65851676Sjpk continue;
65861676Sjpk
65871676Sjpk /* scan backwards to find start of last component */
65881676Sjpk c = zone->zone_rootpath + zone->zone_rootpathlen - 2;
65891676Sjpk do {
65901676Sjpk c--;
65911676Sjpk } while (*c != '/');
65921676Sjpk
65931876Smp46848 pathlen = c - zone->zone_rootpath + 1 - path_offset;
65941876Smp46848 rootpath_start = (zone->zone_rootpath + path_offset);
65951876Smp46848 if (strncmp(path, rootpath_start, pathlen) == 0)
65961676Sjpk break;
65971676Sjpk }
65981676Sjpk if (zone == NULL)
65991676Sjpk zone = global_zone;
66001676Sjpk zone_hold(zone);
66011676Sjpk mutex_exit(&zonehash_lock);
66021676Sjpk return (zone);
66031676Sjpk }
66043448Sdh155122
66053448Sdh155122 /*
660610616SSebastien.Roy@Sun.COM * Finds a zone_dl_t with the given linkid in the given zone. Returns the
660710616SSebastien.Roy@Sun.COM * zone_dl_t pointer if found, and NULL otherwise.
66083448Sdh155122 */
660910616SSebastien.Roy@Sun.COM static zone_dl_t *
zone_find_dl(zone_t * zone,datalink_id_t linkid)661010616SSebastien.Roy@Sun.COM zone_find_dl(zone_t *zone, datalink_id_t linkid)
661110616SSebastien.Roy@Sun.COM {
661210616SSebastien.Roy@Sun.COM zone_dl_t *zdl;
661310616SSebastien.Roy@Sun.COM
661410616SSebastien.Roy@Sun.COM ASSERT(mutex_owned(&zone->zone_lock));
661510616SSebastien.Roy@Sun.COM for (zdl = list_head(&zone->zone_dl_list); zdl != NULL;
661610616SSebastien.Roy@Sun.COM zdl = list_next(&zone->zone_dl_list, zdl)) {
661710616SSebastien.Roy@Sun.COM if (zdl->zdl_id == linkid)
661810616SSebastien.Roy@Sun.COM break;
661910616SSebastien.Roy@Sun.COM }
662010616SSebastien.Roy@Sun.COM return (zdl);
662110616SSebastien.Roy@Sun.COM }
662210616SSebastien.Roy@Sun.COM
66233448Sdh155122 static boolean_t
zone_dl_exists(zone_t * zone,datalink_id_t linkid)662410616SSebastien.Roy@Sun.COM zone_dl_exists(zone_t *zone, datalink_id_t linkid)
66253448Sdh155122 {
662610616SSebastien.Roy@Sun.COM boolean_t exists;
66273448Sdh155122
66283448Sdh155122 mutex_enter(&zone->zone_lock);
662910616SSebastien.Roy@Sun.COM exists = (zone_find_dl(zone, linkid) != NULL);
66303448Sdh155122 mutex_exit(&zone->zone_lock);
663110616SSebastien.Roy@Sun.COM return (exists);
66323448Sdh155122 }
66333448Sdh155122
66343448Sdh155122 /*
663510616SSebastien.Roy@Sun.COM * Add an data link name for the zone.
66363448Sdh155122 */
66373448Sdh155122 static int
zone_add_datalink(zoneid_t zoneid,datalink_id_t linkid)663810616SSebastien.Roy@Sun.COM zone_add_datalink(zoneid_t zoneid, datalink_id_t linkid)
66393448Sdh155122 {
664010616SSebastien.Roy@Sun.COM zone_dl_t *zdl;
66413448Sdh155122 zone_t *zone;
66423448Sdh155122 zone_t *thiszone;
664310616SSebastien.Roy@Sun.COM
664410616SSebastien.Roy@Sun.COM if ((thiszone = zone_find_by_id(zoneid)) == NULL)
66453448Sdh155122 return (set_errno(ENXIO));
664610616SSebastien.Roy@Sun.COM
664710616SSebastien.Roy@Sun.COM /* Verify that the datalink ID doesn't already belong to a zone. */
66483448Sdh155122 mutex_enter(&zonehash_lock);
66493448Sdh155122 for (zone = list_head(&zone_active); zone != NULL;
66503448Sdh155122 zone = list_next(&zone_active, zone)) {
665110616SSebastien.Roy@Sun.COM if (zone_dl_exists(zone, linkid)) {
66523448Sdh155122 mutex_exit(&zonehash_lock);
66533448Sdh155122 zone_rele(thiszone);
665410616SSebastien.Roy@Sun.COM return (set_errno((zone == thiszone) ? EEXIST : EPERM));
66553448Sdh155122 }
66563448Sdh155122 }
665710616SSebastien.Roy@Sun.COM
665810616SSebastien.Roy@Sun.COM zdl = kmem_zalloc(sizeof (*zdl), KM_SLEEP);
665910616SSebastien.Roy@Sun.COM zdl->zdl_id = linkid;
666012748SSowmini.Varadhan@oracle.COM zdl->zdl_net = NULL;
66613448Sdh155122 mutex_enter(&thiszone->zone_lock);
666210616SSebastien.Roy@Sun.COM list_insert_head(&thiszone->zone_dl_list, zdl);
66633448Sdh155122 mutex_exit(&thiszone->zone_lock);
66643448Sdh155122 mutex_exit(&zonehash_lock);
66653448Sdh155122 zone_rele(thiszone);
66663448Sdh155122 return (0);
66673448Sdh155122 }
66683448Sdh155122
66693448Sdh155122 static int
zone_remove_datalink(zoneid_t zoneid,datalink_id_t linkid)667010616SSebastien.Roy@Sun.COM zone_remove_datalink(zoneid_t zoneid, datalink_id_t linkid)
66713448Sdh155122 {
667210616SSebastien.Roy@Sun.COM zone_dl_t *zdl;
66733448Sdh155122 zone_t *zone;
667410616SSebastien.Roy@Sun.COM int err = 0;
667510616SSebastien.Roy@Sun.COM
667610616SSebastien.Roy@Sun.COM if ((zone = zone_find_by_id(zoneid)) == NULL)
66773448Sdh155122 return (set_errno(EINVAL));
66783448Sdh155122
66793448Sdh155122 mutex_enter(&zone->zone_lock);
668010616SSebastien.Roy@Sun.COM if ((zdl = zone_find_dl(zone, linkid)) == NULL) {
668110616SSebastien.Roy@Sun.COM err = ENXIO;
668210616SSebastien.Roy@Sun.COM } else {
668310616SSebastien.Roy@Sun.COM list_remove(&zone->zone_dl_list, zdl);
668412748SSowmini.Varadhan@oracle.COM if (zdl->zdl_net != NULL)
668512748SSowmini.Varadhan@oracle.COM nvlist_free(zdl->zdl_net);
668610616SSebastien.Roy@Sun.COM kmem_free(zdl, sizeof (zone_dl_t));
66873448Sdh155122 }
66883448Sdh155122 mutex_exit(&zone->zone_lock);
66893448Sdh155122 zone_rele(zone);
669010616SSebastien.Roy@Sun.COM return (err == 0 ? 0 : set_errno(err));
66913448Sdh155122 }
66923448Sdh155122
66933448Sdh155122 /*
669410616SSebastien.Roy@Sun.COM * Using the zoneidp as ALL_ZONES, we can lookup which zone has been assigned
669510616SSebastien.Roy@Sun.COM * the linkid. Otherwise we just check if the specified zoneidp has been
669610616SSebastien.Roy@Sun.COM * assigned the supplied linkid.
66973448Sdh155122 */
669810616SSebastien.Roy@Sun.COM int
zone_check_datalink(zoneid_t * zoneidp,datalink_id_t linkid)669910616SSebastien.Roy@Sun.COM zone_check_datalink(zoneid_t *zoneidp, datalink_id_t linkid)
67003448Sdh155122 {
67013448Sdh155122 zone_t *zone;
670210616SSebastien.Roy@Sun.COM int err = ENXIO;
670310616SSebastien.Roy@Sun.COM
670410616SSebastien.Roy@Sun.COM if (*zoneidp != ALL_ZONES) {
670510616SSebastien.Roy@Sun.COM if ((zone = zone_find_by_id(*zoneidp)) != NULL) {
670610616SSebastien.Roy@Sun.COM if (zone_dl_exists(zone, linkid))
670710616SSebastien.Roy@Sun.COM err = 0;
670810616SSebastien.Roy@Sun.COM zone_rele(zone);
670910616SSebastien.Roy@Sun.COM }
671010616SSebastien.Roy@Sun.COM return (err);
671110616SSebastien.Roy@Sun.COM }
671210616SSebastien.Roy@Sun.COM
67133448Sdh155122 mutex_enter(&zonehash_lock);
67143448Sdh155122 for (zone = list_head(&zone_active); zone != NULL;
67153448Sdh155122 zone = list_next(&zone_active, zone)) {
671610616SSebastien.Roy@Sun.COM if (zone_dl_exists(zone, linkid)) {
671710616SSebastien.Roy@Sun.COM *zoneidp = zone->zone_id;
671810616SSebastien.Roy@Sun.COM err = 0;
671910616SSebastien.Roy@Sun.COM break;
67203448Sdh155122 }
67213448Sdh155122 }
67223448Sdh155122 mutex_exit(&zonehash_lock);
672310616SSebastien.Roy@Sun.COM return (err);
67243448Sdh155122 }
67253448Sdh155122
67263448Sdh155122 /*
672710616SSebastien.Roy@Sun.COM * Get the list of datalink IDs assigned to a zone.
672810616SSebastien.Roy@Sun.COM *
672910616SSebastien.Roy@Sun.COM * On input, *nump is the number of datalink IDs that can fit in the supplied
673010616SSebastien.Roy@Sun.COM * idarray. Upon return, *nump is either set to the number of datalink IDs
673110616SSebastien.Roy@Sun.COM * that were placed in the array if the array was large enough, or to the
673210616SSebastien.Roy@Sun.COM * number of datalink IDs that the function needs to place in the array if the
673310616SSebastien.Roy@Sun.COM * array is too small.
67343448Sdh155122 */
67353448Sdh155122 static int
zone_list_datalink(zoneid_t zoneid,int * nump,datalink_id_t * idarray)673610616SSebastien.Roy@Sun.COM zone_list_datalink(zoneid_t zoneid, int *nump, datalink_id_t *idarray)
67373448Sdh155122 {
673810616SSebastien.Roy@Sun.COM uint_t num, dlcount;
67393448Sdh155122 zone_t *zone;
674010616SSebastien.Roy@Sun.COM zone_dl_t *zdl;
674110616SSebastien.Roy@Sun.COM datalink_id_t *idptr = idarray;
67423448Sdh155122
67433448Sdh155122 if (copyin(nump, &dlcount, sizeof (dlcount)) != 0)
67443448Sdh155122 return (set_errno(EFAULT));
674510616SSebastien.Roy@Sun.COM if ((zone = zone_find_by_id(zoneid)) == NULL)
67463448Sdh155122 return (set_errno(ENXIO));
67473448Sdh155122
67483448Sdh155122 num = 0;
67493448Sdh155122 mutex_enter(&zone->zone_lock);
675010616SSebastien.Roy@Sun.COM for (zdl = list_head(&zone->zone_dl_list); zdl != NULL;
675110616SSebastien.Roy@Sun.COM zdl = list_next(&zone->zone_dl_list, zdl)) {
67523448Sdh155122 /*
675310616SSebastien.Roy@Sun.COM * If the list is bigger than what the caller supplied, just
675410616SSebastien.Roy@Sun.COM * count, don't do copyout.
67553448Sdh155122 */
67563448Sdh155122 if (++num > dlcount)
67573448Sdh155122 continue;
675810616SSebastien.Roy@Sun.COM if (copyout(&zdl->zdl_id, idptr, sizeof (*idptr)) != 0) {
67593448Sdh155122 mutex_exit(&zone->zone_lock);
67603448Sdh155122 zone_rele(zone);
67613448Sdh155122 return (set_errno(EFAULT));
67623448Sdh155122 }
676310616SSebastien.Roy@Sun.COM idptr++;
67643448Sdh155122 }
67653448Sdh155122 mutex_exit(&zone->zone_lock);
67663448Sdh155122 zone_rele(zone);
67673448Sdh155122
67683448Sdh155122 /* Increased or decreased, caller should be notified. */
67693448Sdh155122 if (num != dlcount) {
677010616SSebastien.Roy@Sun.COM if (copyout(&num, nump, sizeof (num)) != 0)
67713448Sdh155122 return (set_errno(EFAULT));
67723448Sdh155122 }
67733448Sdh155122 return (0);
67743448Sdh155122 }
67753448Sdh155122
67763448Sdh155122 /*
67773448Sdh155122 * Public interface for looking up a zone by zoneid. It's a customized version
67785880Snordmark * for netstack_zone_create(). It can only be called from the zsd create
67795880Snordmark * callbacks, since it doesn't have reference on the zone structure hence if
67805880Snordmark * it is called elsewhere the zone could disappear after the zonehash_lock
67815880Snordmark * is dropped.
67825880Snordmark *
67835880Snordmark * Furthermore it
67845880Snordmark * 1. Doesn't check the status of the zone.
67855880Snordmark * 2. It will be called even before zone_init is called, in that case the
67863448Sdh155122 * address of zone0 is returned directly, and netstack_zone_create()
67873448Sdh155122 * will only assign a value to zone0.zone_netstack, won't break anything.
67885880Snordmark * 3. Returns without the zone being held.
67893448Sdh155122 */
67903448Sdh155122 zone_t *
zone_find_by_id_nolock(zoneid_t zoneid)67913448Sdh155122 zone_find_by_id_nolock(zoneid_t zoneid)
67923448Sdh155122 {
67935880Snordmark zone_t *zone;
67945880Snordmark
67955880Snordmark mutex_enter(&zonehash_lock);
67963448Sdh155122 if (zonehashbyid == NULL)
67975880Snordmark zone = &zone0;
67983448Sdh155122 else
67995880Snordmark zone = zone_find_all_by_id(zoneid);
68005880Snordmark mutex_exit(&zonehash_lock);
68015880Snordmark return (zone);
68023448Sdh155122 }
68035895Syz147064
68045895Syz147064 /*
68055895Syz147064 * Walk the datalinks for a given zone
68065895Syz147064 */
68075895Syz147064 int
zone_datalink_walk(zoneid_t zoneid,int (* cb)(datalink_id_t,void *),void * data)680810616SSebastien.Roy@Sun.COM zone_datalink_walk(zoneid_t zoneid, int (*cb)(datalink_id_t, void *),
680910616SSebastien.Roy@Sun.COM void *data)
68105895Syz147064 {
681110616SSebastien.Roy@Sun.COM zone_t *zone;
681210616SSebastien.Roy@Sun.COM zone_dl_t *zdl;
681310616SSebastien.Roy@Sun.COM datalink_id_t *idarray;
681410616SSebastien.Roy@Sun.COM uint_t idcount = 0;
681510616SSebastien.Roy@Sun.COM int i, ret = 0;
68165895Syz147064
68175895Syz147064 if ((zone = zone_find_by_id(zoneid)) == NULL)
68185895Syz147064 return (ENOENT);
68195895Syz147064
682010616SSebastien.Roy@Sun.COM /*
682110616SSebastien.Roy@Sun.COM * We first build an array of linkid's so that we can walk these and
682210616SSebastien.Roy@Sun.COM * execute the callback with the zone_lock dropped.
682310616SSebastien.Roy@Sun.COM */
68245895Syz147064 mutex_enter(&zone->zone_lock);
682510616SSebastien.Roy@Sun.COM for (zdl = list_head(&zone->zone_dl_list); zdl != NULL;
682610616SSebastien.Roy@Sun.COM zdl = list_next(&zone->zone_dl_list, zdl)) {
682710616SSebastien.Roy@Sun.COM idcount++;
682810616SSebastien.Roy@Sun.COM }
682910616SSebastien.Roy@Sun.COM
683010616SSebastien.Roy@Sun.COM if (idcount == 0) {
683110616SSebastien.Roy@Sun.COM mutex_exit(&zone->zone_lock);
683210616SSebastien.Roy@Sun.COM zone_rele(zone);
683310616SSebastien.Roy@Sun.COM return (0);
683410616SSebastien.Roy@Sun.COM }
683510616SSebastien.Roy@Sun.COM
683610616SSebastien.Roy@Sun.COM idarray = kmem_alloc(sizeof (datalink_id_t) * idcount, KM_NOSLEEP);
683710616SSebastien.Roy@Sun.COM if (idarray == NULL) {
683810616SSebastien.Roy@Sun.COM mutex_exit(&zone->zone_lock);
683910616SSebastien.Roy@Sun.COM zone_rele(zone);
684010616SSebastien.Roy@Sun.COM return (ENOMEM);
684110616SSebastien.Roy@Sun.COM }
684210616SSebastien.Roy@Sun.COM
684310616SSebastien.Roy@Sun.COM for (i = 0, zdl = list_head(&zone->zone_dl_list); zdl != NULL;
684410616SSebastien.Roy@Sun.COM i++, zdl = list_next(&zone->zone_dl_list, zdl)) {
684510616SSebastien.Roy@Sun.COM idarray[i] = zdl->zdl_id;
684610616SSebastien.Roy@Sun.COM }
684710616SSebastien.Roy@Sun.COM
684810616SSebastien.Roy@Sun.COM mutex_exit(&zone->zone_lock);
684910616SSebastien.Roy@Sun.COM
685010616SSebastien.Roy@Sun.COM for (i = 0; i < idcount && ret == 0; i++) {
685110616SSebastien.Roy@Sun.COM if ((ret = (*cb)(idarray[i], data)) != 0)
68525895Syz147064 break;
68535895Syz147064 }
685410616SSebastien.Roy@Sun.COM
68555895Syz147064 zone_rele(zone);
685610616SSebastien.Roy@Sun.COM kmem_free(idarray, sizeof (datalink_id_t) * idcount);
68575895Syz147064 return (ret);
68585895Syz147064 }
685912748SSowmini.Varadhan@oracle.COM
686012748SSowmini.Varadhan@oracle.COM static char *
zone_net_type2name(int type)686112748SSowmini.Varadhan@oracle.COM zone_net_type2name(int type)
686212748SSowmini.Varadhan@oracle.COM {
686312748SSowmini.Varadhan@oracle.COM switch (type) {
686412748SSowmini.Varadhan@oracle.COM case ZONE_NETWORK_ADDRESS:
686512748SSowmini.Varadhan@oracle.COM return (ZONE_NET_ADDRNAME);
686612748SSowmini.Varadhan@oracle.COM case ZONE_NETWORK_DEFROUTER:
686712748SSowmini.Varadhan@oracle.COM return (ZONE_NET_RTRNAME);
686812748SSowmini.Varadhan@oracle.COM default:
686912748SSowmini.Varadhan@oracle.COM return (NULL);
687012748SSowmini.Varadhan@oracle.COM }
687112748SSowmini.Varadhan@oracle.COM }
687212748SSowmini.Varadhan@oracle.COM
687312748SSowmini.Varadhan@oracle.COM static int
zone_set_network(zoneid_t zoneid,zone_net_data_t * znbuf)687412748SSowmini.Varadhan@oracle.COM zone_set_network(zoneid_t zoneid, zone_net_data_t *znbuf)
687512748SSowmini.Varadhan@oracle.COM {
687612748SSowmini.Varadhan@oracle.COM zone_t *zone;
687712748SSowmini.Varadhan@oracle.COM zone_dl_t *zdl;
687812748SSowmini.Varadhan@oracle.COM nvlist_t *nvl;
687912748SSowmini.Varadhan@oracle.COM int err = 0;
688012748SSowmini.Varadhan@oracle.COM uint8_t *new = NULL;
688112748SSowmini.Varadhan@oracle.COM char *nvname;
688212748SSowmini.Varadhan@oracle.COM int bufsize;
688312748SSowmini.Varadhan@oracle.COM datalink_id_t linkid = znbuf->zn_linkid;
688412748SSowmini.Varadhan@oracle.COM
688512748SSowmini.Varadhan@oracle.COM if (secpolicy_zone_config(CRED()) != 0)
688612748SSowmini.Varadhan@oracle.COM return (set_errno(EPERM));
688712748SSowmini.Varadhan@oracle.COM
688812748SSowmini.Varadhan@oracle.COM if (zoneid == GLOBAL_ZONEID)
688912748SSowmini.Varadhan@oracle.COM return (set_errno(EINVAL));
689012748SSowmini.Varadhan@oracle.COM
689112748SSowmini.Varadhan@oracle.COM nvname = zone_net_type2name(znbuf->zn_type);
689212748SSowmini.Varadhan@oracle.COM bufsize = znbuf->zn_len;
689312748SSowmini.Varadhan@oracle.COM new = znbuf->zn_val;
689412748SSowmini.Varadhan@oracle.COM if (nvname == NULL)
689512748SSowmini.Varadhan@oracle.COM return (set_errno(EINVAL));
689612748SSowmini.Varadhan@oracle.COM
689712748SSowmini.Varadhan@oracle.COM if ((zone = zone_find_by_id(zoneid)) == NULL) {
689812748SSowmini.Varadhan@oracle.COM return (set_errno(EINVAL));
689912748SSowmini.Varadhan@oracle.COM }
690012748SSowmini.Varadhan@oracle.COM
690112748SSowmini.Varadhan@oracle.COM mutex_enter(&zone->zone_lock);
690212748SSowmini.Varadhan@oracle.COM if ((zdl = zone_find_dl(zone, linkid)) == NULL) {
690312748SSowmini.Varadhan@oracle.COM err = ENXIO;
690412748SSowmini.Varadhan@oracle.COM goto done;
690512748SSowmini.Varadhan@oracle.COM }
690612748SSowmini.Varadhan@oracle.COM if ((nvl = zdl->zdl_net) == NULL) {
690712748SSowmini.Varadhan@oracle.COM if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) {
690812748SSowmini.Varadhan@oracle.COM err = ENOMEM;
690912748SSowmini.Varadhan@oracle.COM goto done;
691012748SSowmini.Varadhan@oracle.COM } else {
691112748SSowmini.Varadhan@oracle.COM zdl->zdl_net = nvl;
691212748SSowmini.Varadhan@oracle.COM }
691312748SSowmini.Varadhan@oracle.COM }
691412748SSowmini.Varadhan@oracle.COM if (nvlist_exists(nvl, nvname)) {
691512748SSowmini.Varadhan@oracle.COM err = EINVAL;
691612748SSowmini.Varadhan@oracle.COM goto done;
691712748SSowmini.Varadhan@oracle.COM }
691812748SSowmini.Varadhan@oracle.COM err = nvlist_add_uint8_array(nvl, nvname, new, bufsize);
691912748SSowmini.Varadhan@oracle.COM ASSERT(err == 0);
692012748SSowmini.Varadhan@oracle.COM done:
692112748SSowmini.Varadhan@oracle.COM mutex_exit(&zone->zone_lock);
692212748SSowmini.Varadhan@oracle.COM zone_rele(zone);
692312748SSowmini.Varadhan@oracle.COM if (err != 0)
692412748SSowmini.Varadhan@oracle.COM return (set_errno(err));
692512748SSowmini.Varadhan@oracle.COM else
692612748SSowmini.Varadhan@oracle.COM return (0);
692712748SSowmini.Varadhan@oracle.COM }
692812748SSowmini.Varadhan@oracle.COM
692912748SSowmini.Varadhan@oracle.COM static int
zone_get_network(zoneid_t zoneid,zone_net_data_t * znbuf)693012748SSowmini.Varadhan@oracle.COM zone_get_network(zoneid_t zoneid, zone_net_data_t *znbuf)
693112748SSowmini.Varadhan@oracle.COM {
693212748SSowmini.Varadhan@oracle.COM zone_t *zone;
693312748SSowmini.Varadhan@oracle.COM zone_dl_t *zdl;
693412748SSowmini.Varadhan@oracle.COM nvlist_t *nvl;
693512748SSowmini.Varadhan@oracle.COM uint8_t *ptr;
693612748SSowmini.Varadhan@oracle.COM uint_t psize;
693712748SSowmini.Varadhan@oracle.COM int err = 0;
693812748SSowmini.Varadhan@oracle.COM char *nvname;
693912748SSowmini.Varadhan@oracle.COM int bufsize;
694012748SSowmini.Varadhan@oracle.COM void *buf;
694112748SSowmini.Varadhan@oracle.COM datalink_id_t linkid = znbuf->zn_linkid;
694212748SSowmini.Varadhan@oracle.COM
694312748SSowmini.Varadhan@oracle.COM if (zoneid == GLOBAL_ZONEID)
694412748SSowmini.Varadhan@oracle.COM return (set_errno(EINVAL));
694512748SSowmini.Varadhan@oracle.COM
694612748SSowmini.Varadhan@oracle.COM nvname = zone_net_type2name(znbuf->zn_type);
694712748SSowmini.Varadhan@oracle.COM bufsize = znbuf->zn_len;
694812748SSowmini.Varadhan@oracle.COM buf = znbuf->zn_val;
694912748SSowmini.Varadhan@oracle.COM
695012748SSowmini.Varadhan@oracle.COM if (nvname == NULL)
695112748SSowmini.Varadhan@oracle.COM return (set_errno(EINVAL));
695212748SSowmini.Varadhan@oracle.COM if ((zone = zone_find_by_id(zoneid)) == NULL)
695312748SSowmini.Varadhan@oracle.COM return (set_errno(EINVAL));
695412748SSowmini.Varadhan@oracle.COM
695512748SSowmini.Varadhan@oracle.COM mutex_enter(&zone->zone_lock);
695612748SSowmini.Varadhan@oracle.COM if ((zdl = zone_find_dl(zone, linkid)) == NULL) {
695712748SSowmini.Varadhan@oracle.COM err = ENXIO;
695812748SSowmini.Varadhan@oracle.COM goto done;
695912748SSowmini.Varadhan@oracle.COM }
696012748SSowmini.Varadhan@oracle.COM if ((nvl = zdl->zdl_net) == NULL || !nvlist_exists(nvl, nvname)) {
696112748SSowmini.Varadhan@oracle.COM err = ENOENT;
696212748SSowmini.Varadhan@oracle.COM goto done;
696312748SSowmini.Varadhan@oracle.COM }
696412748SSowmini.Varadhan@oracle.COM err = nvlist_lookup_uint8_array(nvl, nvname, &ptr, &psize);
696512748SSowmini.Varadhan@oracle.COM ASSERT(err == 0);
696612748SSowmini.Varadhan@oracle.COM
696712748SSowmini.Varadhan@oracle.COM if (psize > bufsize) {
696812748SSowmini.Varadhan@oracle.COM err = ENOBUFS;
696912748SSowmini.Varadhan@oracle.COM goto done;
697012748SSowmini.Varadhan@oracle.COM }
697112748SSowmini.Varadhan@oracle.COM znbuf->zn_len = psize;
697212748SSowmini.Varadhan@oracle.COM bcopy(ptr, buf, psize);
697312748SSowmini.Varadhan@oracle.COM done:
697412748SSowmini.Varadhan@oracle.COM mutex_exit(&zone->zone_lock);
697512748SSowmini.Varadhan@oracle.COM zone_rele(zone);
697612748SSowmini.Varadhan@oracle.COM if (err != 0)
697712748SSowmini.Varadhan@oracle.COM return (set_errno(err));
697812748SSowmini.Varadhan@oracle.COM else
697912748SSowmini.Varadhan@oracle.COM return (0);
698012748SSowmini.Varadhan@oracle.COM }
6981