xref: /onnv-gate/usr/src/uts/common/os/netstack.c (revision 4136)
13448Sdh155122 /*
23448Sdh155122  * CDDL HEADER START
33448Sdh155122  *
43448Sdh155122  * The contents of this file are subject to the terms of the
53448Sdh155122  * Common Development and Distribution License (the "License").
63448Sdh155122  * You may not use this file except in compliance with the License.
73448Sdh155122  *
83448Sdh155122  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93448Sdh155122  * or http://www.opensolaris.org/os/licensing.
103448Sdh155122  * See the License for the specific language governing permissions
113448Sdh155122  * and limitations under the License.
123448Sdh155122  *
133448Sdh155122  * When distributing Covered Code, include this CDDL HEADER in each
143448Sdh155122  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153448Sdh155122  * If applicable, add the following below this CDDL HEADER, with the
163448Sdh155122  * fields enclosed by brackets "[]" replaced with your own identifying
173448Sdh155122  * information: Portions Copyright [yyyy] [name of copyright owner]
183448Sdh155122  *
193448Sdh155122  * CDDL HEADER END
203448Sdh155122  */
213448Sdh155122 
223448Sdh155122 /*
233448Sdh155122  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
243448Sdh155122  * Use is subject to license terms.
253448Sdh155122  */
263448Sdh155122 
273448Sdh155122 #pragma ident	"%Z%%M%	%I%	%E% SMI"
283448Sdh155122 
293448Sdh155122 #include <sys/param.h>
303448Sdh155122 #include <sys/sysmacros.h>
313448Sdh155122 #include <sys/vm.h>
323448Sdh155122 #include <sys/proc.h>
333448Sdh155122 #include <sys/tuneable.h>
343448Sdh155122 #include <sys/systm.h>
353448Sdh155122 #include <sys/cmn_err.h>
363448Sdh155122 #include <sys/debug.h>
373448Sdh155122 #include <sys/sdt.h>
383448Sdh155122 #include <sys/mutex.h>
393448Sdh155122 #include <sys/bitmap.h>
403448Sdh155122 #include <sys/atomic.h>
413448Sdh155122 #include <sys/kobj.h>
423448Sdh155122 #include <sys/disp.h>
433448Sdh155122 #include <vm/seg_kmem.h>
443448Sdh155122 #include <sys/zone.h>
453448Sdh155122 #include <sys/netstack.h>
463448Sdh155122 
473448Sdh155122 /*
483448Sdh155122  * What we use so that the zones framework can tell us about new zones,
493448Sdh155122  * which we use to create new stacks.
503448Sdh155122  */
513448Sdh155122 static zone_key_t netstack_zone_key;
523448Sdh155122 
533448Sdh155122 static int	netstack_initialized = 0;
543448Sdh155122 
553448Sdh155122 /*
563448Sdh155122  * Track the registered netstacks.
573448Sdh155122  * The global lock protects
583448Sdh155122  * - ns_reg
593448Sdh155122  * - the list starting at netstack_head and following the netstack_next
603448Sdh155122  *   pointers.
613448Sdh155122  */
623448Sdh155122 static kmutex_t netstack_g_lock;
633448Sdh155122 
643448Sdh155122 /*
653448Sdh155122  * Registry of netstacks with their create/shutdown/destory functions.
663448Sdh155122  */
673448Sdh155122 static struct netstack_registry	ns_reg[NS_MAX];
683448Sdh155122 
693448Sdh155122 /*
703448Sdh155122  * Global list of existing stacks.  We use this when a new zone with
713448Sdh155122  * an exclusive IP instance is created.
723448Sdh155122  *
733448Sdh155122  * Note that in some cases a netstack_t needs to stay around after the zone
743448Sdh155122  * has gone away. This is because there might be outstanding references
753448Sdh155122  * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
763448Sdh155122  * structure and all the foo_stack_t's hanging off of it will be cleaned up
773448Sdh155122  * when the last reference to it is dropped.
783448Sdh155122  * However, the same zone might be rebooted. That is handled using the
793448Sdh155122  * assumption that the zones framework picks a new zoneid each time a zone
803448Sdh155122  * is (re)booted. We assert for that condition in netstack_zone_create().
813448Sdh155122  * Thus the old netstack_t can take its time for things to time out.
823448Sdh155122  */
833448Sdh155122 static netstack_t *netstack_head;
843448Sdh155122 
853448Sdh155122 /*
863448Sdh155122  * To support kstat_create_netstack() using kstat_zone_add we need
873448Sdh155122  * to track both
883448Sdh155122  *  - all zoneids that use the global/shared stack
893448Sdh155122  *  - all kstats that have been added for the shared stack
903448Sdh155122  */
913448Sdh155122 struct shared_zone_list {
923448Sdh155122 	struct shared_zone_list *sz_next;
933448Sdh155122 	zoneid_t		sz_zoneid;
943448Sdh155122 };
953448Sdh155122 
963448Sdh155122 struct shared_kstat_list {
973448Sdh155122 	struct shared_kstat_list *sk_next;
983448Sdh155122 	kstat_t			 *sk_kstat;
993448Sdh155122 };
1003448Sdh155122 
1013448Sdh155122 static kmutex_t netstack_shared_lock;	/* protects the following two */
1023448Sdh155122 static struct shared_zone_list	*netstack_shared_zones;
1033448Sdh155122 static struct shared_kstat_list	*netstack_shared_kstats;
1043448Sdh155122 
1053448Sdh155122 static void	*netstack_zone_create(zoneid_t zoneid);
1063448Sdh155122 static void	netstack_zone_shutdown(zoneid_t zoneid, void *arg);
1073448Sdh155122 static void	netstack_zone_destroy(zoneid_t zoneid, void *arg);
1083448Sdh155122 
1093448Sdh155122 static void	netstack_do_create(void);
1103448Sdh155122 static void	netstack_do_shutdown(void);
1113448Sdh155122 static void	netstack_do_destroy(void);
1123448Sdh155122 
1133448Sdh155122 static void	netstack_shared_zone_add(zoneid_t zoneid);
1143448Sdh155122 static void	netstack_shared_zone_remove(zoneid_t zoneid);
1153448Sdh155122 static void	netstack_shared_kstat_add(kstat_t *ks);
1163448Sdh155122 static void	netstack_shared_kstat_remove(kstat_t *ks);
1173448Sdh155122 
1183448Sdh155122 
1193448Sdh155122 void
1203448Sdh155122 netstack_init(void)
1213448Sdh155122 {
1223448Sdh155122 	mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
1233448Sdh155122 	mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
1243448Sdh155122 
1253448Sdh155122 	netstack_initialized = 1;
1263448Sdh155122 
1273448Sdh155122 	/*
1283448Sdh155122 	 * We want to be informed each time a zone is created or
1293448Sdh155122 	 * destroyed in the kernel, so we can maintain the
1303448Sdh155122 	 * stack instance information.
1313448Sdh155122 	 */
1323448Sdh155122 	zone_key_create(&netstack_zone_key, netstack_zone_create,
1333448Sdh155122 	    netstack_zone_shutdown, netstack_zone_destroy);
1343448Sdh155122 }
1353448Sdh155122 
1363448Sdh155122 /*
1373448Sdh155122  * Register a new module with the framework.
1383448Sdh155122  * This registers interest in changes to the set of netstacks.
1393448Sdh155122  * The createfn and destroyfn are required, but the shutdownfn can be
1403448Sdh155122  * NULL.
1413448Sdh155122  * Note that due to the current zsd implementation, when the create
1423448Sdh155122  * function is called the zone isn't fully present, thus functions
1433448Sdh155122  * like zone_find_by_* will fail, hence the create function can not
1443448Sdh155122  * use many zones kernel functions including zcmn_err().
1453448Sdh155122  */
1463448Sdh155122 void
1473448Sdh155122 netstack_register(int moduleid,
1483448Sdh155122     void *(*module_create)(netstackid_t, netstack_t *),
1493448Sdh155122     void (*module_shutdown)(netstackid_t, void *),
1503448Sdh155122     void (*module_destroy)(netstackid_t, void *))
1513448Sdh155122 {
1523448Sdh155122 	netstack_t *ns;
1533448Sdh155122 
1543448Sdh155122 	ASSERT(netstack_initialized);
1553448Sdh155122 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
1563448Sdh155122 	ASSERT(module_create != NULL);
1573448Sdh155122 
1583448Sdh155122 	mutex_enter(&netstack_g_lock);
1593448Sdh155122 	ASSERT(ns_reg[moduleid].nr_create == NULL);
1603448Sdh155122 	ASSERT(ns_reg[moduleid].nr_flags == 0);
1613448Sdh155122 	ns_reg[moduleid].nr_create = module_create;
1623448Sdh155122 	ns_reg[moduleid].nr_shutdown = module_shutdown;
1633448Sdh155122 	ns_reg[moduleid].nr_destroy = module_destroy;
1643448Sdh155122 	ns_reg[moduleid].nr_flags = NRF_REGISTERED;
1653448Sdh155122 
1663448Sdh155122 	/*
1673448Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
1683448Sdh155122 	 * Set CREATE_NEEDED for each of those.
1693448Sdh155122 	 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
1703448Sdh155122 	 * set, but check NSF_CLOSING to be sure.
1713448Sdh155122 	 */
1723448Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1733448Sdh155122 		mutex_enter(&ns->netstack_lock);
1743448Sdh155122 		if (!(ns->netstack_flags & NSF_CLOSING) &&
1753448Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_CREATE_ALL) == 0) {
1763448Sdh155122 			ns->netstack_m_state[moduleid] |= NSS_CREATE_NEEDED;
1773448Sdh155122 			DTRACE_PROBE2(netstack__create__needed,
1783448Sdh155122 			    netstack_t *, ns, int, moduleid);
1793448Sdh155122 		}
1803448Sdh155122 		mutex_exit(&ns->netstack_lock);
1813448Sdh155122 	}
1823448Sdh155122 	mutex_exit(&netstack_g_lock);
1833448Sdh155122 
1843448Sdh155122 	/*
1853448Sdh155122 	 * Call the create function for each stack that has CREATE_NEEDED.
1863448Sdh155122 	 * Set CREATE_INPROGRESS, drop lock, and after done,
1873448Sdh155122 	 * set CREATE_COMPLETE
1883448Sdh155122 	 */
1893448Sdh155122 	netstack_do_create();
1903448Sdh155122 }
1913448Sdh155122 
1923448Sdh155122 void
1933448Sdh155122 netstack_unregister(int moduleid)
1943448Sdh155122 {
1953448Sdh155122 	netstack_t *ns;
1963448Sdh155122 
1973448Sdh155122 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
1983448Sdh155122 
1993448Sdh155122 	ASSERT(ns_reg[moduleid].nr_create != NULL);
2003448Sdh155122 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
2013448Sdh155122 
2023448Sdh155122 	mutex_enter(&netstack_g_lock);
2033448Sdh155122 	/*
2043448Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
2053448Sdh155122 	 * Set SHUTDOWN_NEEDED and DESTROY_NEEDED for each of those.
2063448Sdh155122 	 */
2073448Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
2083448Sdh155122 		mutex_enter(&ns->netstack_lock);
2093448Sdh155122 		if (ns_reg[moduleid].nr_shutdown != NULL &&
2103448Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
2113448Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_ALL) == 0) {
2123448Sdh155122 			ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_NEEDED;
2133448Sdh155122 			DTRACE_PROBE2(netstack__shutdown__needed,
2143448Sdh155122 			    netstack_t *, ns, int, moduleid);
2153448Sdh155122 		}
2163448Sdh155122 		if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
2173448Sdh155122 		    ns_reg[moduleid].nr_destroy != NULL &&
2183448Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
2193448Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_DESTROY_ALL) == 0) {
2203448Sdh155122 			ns->netstack_m_state[moduleid] |= NSS_DESTROY_NEEDED;
2213448Sdh155122 			DTRACE_PROBE2(netstack__destroy__needed,
2223448Sdh155122 			    netstack_t *, ns, int, moduleid);
2233448Sdh155122 		}
2243448Sdh155122 		mutex_exit(&ns->netstack_lock);
2253448Sdh155122 	}
2263448Sdh155122 	mutex_exit(&netstack_g_lock);
2273448Sdh155122 
2283448Sdh155122 	netstack_do_shutdown();
2293448Sdh155122 	netstack_do_destroy();
2303448Sdh155122 
2313448Sdh155122 	/*
2323448Sdh155122 	 * Clear the netstack_m_state so that we can handle this module
2333448Sdh155122 	 * being loaded again.
2343448Sdh155122 	 */
2353448Sdh155122 	mutex_enter(&netstack_g_lock);
2363448Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
2373448Sdh155122 		mutex_enter(&ns->netstack_lock);
2383448Sdh155122 		if (ns->netstack_m_state[moduleid] & NSS_DESTROY_COMPLETED) {
2393448Sdh155122 			ns->netstack_m_state[moduleid] = 0;
2403448Sdh155122 			DTRACE_PROBE2(netstack__destroy__done,
2413448Sdh155122 			    netstack_t *, ns, int, moduleid);
2423448Sdh155122 		}
2433448Sdh155122 		mutex_exit(&ns->netstack_lock);
2443448Sdh155122 	}
2453448Sdh155122 
2463448Sdh155122 	ns_reg[moduleid].nr_create = NULL;
2473448Sdh155122 	ns_reg[moduleid].nr_shutdown = NULL;
2483448Sdh155122 	ns_reg[moduleid].nr_destroy = NULL;
2493448Sdh155122 	ns_reg[moduleid].nr_flags = 0;
2503448Sdh155122 	mutex_exit(&netstack_g_lock);
2513448Sdh155122 }
2523448Sdh155122 
2533448Sdh155122 /*
2543448Sdh155122  * Lookup and/or allocate a netstack for this zone.
2553448Sdh155122  */
2563448Sdh155122 static void *
2573448Sdh155122 netstack_zone_create(zoneid_t zoneid)
2583448Sdh155122 {
2593448Sdh155122 	netstackid_t stackid;
2603448Sdh155122 	netstack_t *ns;
2613448Sdh155122 	netstack_t **nsp;
2623448Sdh155122 	zone_t	*zone;
2633448Sdh155122 	int i;
2643448Sdh155122 
2653448Sdh155122 	ASSERT(netstack_initialized);
2663448Sdh155122 
2673448Sdh155122 	zone = zone_find_by_id_nolock(zoneid);
2683448Sdh155122 	ASSERT(zone != NULL);
2693448Sdh155122 
2703448Sdh155122 	if (zone->zone_flags & ZF_NET_EXCL) {
2713448Sdh155122 		stackid = zoneid;
2723448Sdh155122 	} else {
2733448Sdh155122 		/* Look for the stack instance for the global */
2743448Sdh155122 		stackid = GLOBAL_NETSTACKID;
2753448Sdh155122 	}
2763448Sdh155122 
2773448Sdh155122 	/* Allocate even if it isn't needed; simplifies locking */
2783448Sdh155122 	ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
2793448Sdh155122 
2803448Sdh155122 	/* Look if there is a matching stack instance */
2813448Sdh155122 	mutex_enter(&netstack_g_lock);
2823448Sdh155122 	for (nsp = &netstack_head; *nsp != NULL;
2833448Sdh155122 	    nsp = &((*nsp)->netstack_next)) {
2843448Sdh155122 		if ((*nsp)->netstack_stackid == stackid) {
2853448Sdh155122 			/*
2863448Sdh155122 			 * Should never find a pre-existing exclusive stack
2873448Sdh155122 			 */
2883448Sdh155122 			ASSERT(stackid == GLOBAL_NETSTACKID);
2893448Sdh155122 			kmem_free(ns, sizeof (netstack_t));
2903448Sdh155122 			ns = *nsp;
2913448Sdh155122 			mutex_enter(&ns->netstack_lock);
2923448Sdh155122 			ns->netstack_numzones++;
2933448Sdh155122 			mutex_exit(&ns->netstack_lock);
2943448Sdh155122 			mutex_exit(&netstack_g_lock);
2953448Sdh155122 			DTRACE_PROBE1(netstack__inc__numzones,
2963448Sdh155122 			    netstack_t *, ns);
2973448Sdh155122 			/* Record that we have a new shared stack zone */
2983448Sdh155122 			netstack_shared_zone_add(zoneid);
2993448Sdh155122 			zone->zone_netstack = ns;
3003448Sdh155122 			return (ns);
3013448Sdh155122 		}
3023448Sdh155122 	}
3033448Sdh155122 	/* Not found */
3043448Sdh155122 	mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
3053448Sdh155122 	ns->netstack_stackid = zoneid;
3063448Sdh155122 	ns->netstack_numzones = 1;
3073448Sdh155122 	ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
3083448Sdh155122 	ns->netstack_flags = NSF_UNINIT;
3093448Sdh155122 	*nsp = ns;
3103448Sdh155122 	zone->zone_netstack = ns;
3113448Sdh155122 
3123448Sdh155122 	/*
3133448Sdh155122 	 * Determine the set of module create functions that need to be
3143448Sdh155122 	 * called before we drop the lock.
3153448Sdh155122 	 */
3163448Sdh155122 	for (i = 0; i < NS_MAX; i++) {
3173448Sdh155122 		mutex_enter(&ns->netstack_lock);
3183448Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
3193448Sdh155122 		    (ns->netstack_m_state[i] & NSS_CREATE_ALL) == 0) {
3203448Sdh155122 			ns->netstack_m_state[i] |= NSS_CREATE_NEEDED;
3213448Sdh155122 			DTRACE_PROBE2(netstack__create__needed,
3223448Sdh155122 			    netstack_t *, ns, int, i);
3233448Sdh155122 		}
3243448Sdh155122 		mutex_exit(&ns->netstack_lock);
3253448Sdh155122 	}
3263448Sdh155122 	mutex_exit(&netstack_g_lock);
3273448Sdh155122 
3283448Sdh155122 	netstack_do_create();
3293448Sdh155122 
3303448Sdh155122 	mutex_enter(&ns->netstack_lock);
3313448Sdh155122 	ns->netstack_flags &= ~NSF_UNINIT;
3323448Sdh155122 	mutex_exit(&ns->netstack_lock);
3333448Sdh155122 
3343448Sdh155122 	return (ns);
3353448Sdh155122 }
3363448Sdh155122 
3373448Sdh155122 /* ARGSUSED */
3383448Sdh155122 static void
3393448Sdh155122 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
3403448Sdh155122 {
3413448Sdh155122 	netstack_t *ns = (netstack_t *)arg;
3423448Sdh155122 	int i;
3433448Sdh155122 
3443448Sdh155122 	ASSERT(arg != NULL);
3453448Sdh155122 
3463448Sdh155122 	mutex_enter(&ns->netstack_lock);
3473448Sdh155122 	ASSERT(ns->netstack_numzones > 0);
3483448Sdh155122 	if (ns->netstack_numzones != 1) {
3493448Sdh155122 		/* Stack instance being used by other zone */
3503448Sdh155122 		mutex_exit(&ns->netstack_lock);
3513448Sdh155122 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
3523448Sdh155122 		return;
3533448Sdh155122 	}
3543448Sdh155122 	mutex_exit(&ns->netstack_lock);
3553448Sdh155122 
3563448Sdh155122 	mutex_enter(&netstack_g_lock);
3573448Sdh155122 	/*
3583448Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
3593448Sdh155122 	 * Set SHUTDOWN_NEEDED for each of those.
3603448Sdh155122 	 */
3613448Sdh155122 	for (i = 0; i < NS_MAX; i++) {
3623448Sdh155122 		mutex_enter(&ns->netstack_lock);
3633448Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
3643448Sdh155122 		    ns_reg[i].nr_shutdown != NULL &&
3653448Sdh155122 		    (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
3663448Sdh155122 		    (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
3673448Sdh155122 			ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
3683448Sdh155122 			DTRACE_PROBE2(netstack__shutdown__needed,
3693448Sdh155122 			    netstack_t *, ns, int, i);
3703448Sdh155122 		}
3713448Sdh155122 		mutex_exit(&ns->netstack_lock);
3723448Sdh155122 	}
3733448Sdh155122 	mutex_exit(&netstack_g_lock);
3743448Sdh155122 
3753448Sdh155122 	/* Call the shutdown function for all registered modules */
3763448Sdh155122 	netstack_do_shutdown();
3773448Sdh155122 }
3783448Sdh155122 
3793448Sdh155122 /*
3803448Sdh155122  * Common routine to release a zone.
3813448Sdh155122  * If this was the last zone using the stack instance then prepare to
3823448Sdh155122  * have the refcnt dropping to zero free the zone.
3833448Sdh155122  */
3843448Sdh155122 /* ARGSUSED */
3853448Sdh155122 static void
3863448Sdh155122 netstack_zone_destroy(zoneid_t zoneid, void *arg)
3873448Sdh155122 {
3883448Sdh155122 	netstack_t *ns = (netstack_t *)arg;
3893448Sdh155122 
3903448Sdh155122 	ASSERT(arg != NULL);
3913448Sdh155122 
3923448Sdh155122 	mutex_enter(&ns->netstack_lock);
3933448Sdh155122 	ASSERT(ns->netstack_numzones > 0);
3943448Sdh155122 	ns->netstack_numzones--;
3953448Sdh155122 	if (ns->netstack_numzones != 0) {
3963448Sdh155122 		/* Stack instance being used by other zone */
3973448Sdh155122 		mutex_exit(&ns->netstack_lock);
3983448Sdh155122 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
3993448Sdh155122 		/* Record that we a shared stack zone has gone away */
4003448Sdh155122 		netstack_shared_zone_remove(zoneid);
4013448Sdh155122 		return;
4023448Sdh155122 	}
4033448Sdh155122 	/*
4043448Sdh155122 	 * Set CLOSING so that netstack_find_by will not find it
4053448Sdh155122 	 * and decrement the reference count.
4063448Sdh155122 	 */
4073448Sdh155122 	ns->netstack_flags |= NSF_CLOSING;
4083448Sdh155122 	mutex_exit(&ns->netstack_lock);
4093448Sdh155122 	DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
4103448Sdh155122 	/* No other thread can call zone_destroy for this stack */
4113448Sdh155122 
4123448Sdh155122 	/*
4133448Sdh155122 	 * Decrease refcnt to account for the one in netstack_zone_init()
4143448Sdh155122 	 */
4153448Sdh155122 	netstack_rele(ns);
4163448Sdh155122 }
4173448Sdh155122 
4183448Sdh155122 /*
4193448Sdh155122  * Called when the reference count drops to zero.
4203448Sdh155122  * Call the destroy functions for each registered module.
4213448Sdh155122  */
4223448Sdh155122 static void
4233448Sdh155122 netstack_stack_inactive(netstack_t *ns)
4243448Sdh155122 {
4253448Sdh155122 	int i;
4263448Sdh155122 
4273448Sdh155122 	mutex_enter(&netstack_g_lock);
4283448Sdh155122 	/*
4293448Sdh155122 	 * If the shutdown callback wasn't called earlier (e.g., if this is
4303448Sdh155122 	 * a netstack shared between multiple zones), then we call it now.
4313448Sdh155122 	 */
4323448Sdh155122 	for (i = 0; i < NS_MAX; i++) {
4333448Sdh155122 		mutex_enter(&ns->netstack_lock);
4343448Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
4353448Sdh155122 		    ns_reg[i].nr_shutdown != NULL &&
4363448Sdh155122 		    (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
4373448Sdh155122 		    (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
4383448Sdh155122 			ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
4393448Sdh155122 			DTRACE_PROBE2(netstack__shutdown__needed,
4403448Sdh155122 			    netstack_t *, ns, int, i);
4413448Sdh155122 		}
4423448Sdh155122 		mutex_exit(&ns->netstack_lock);
4433448Sdh155122 	}
4443448Sdh155122 	/*
4453448Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
4463448Sdh155122 	 * Set DESTROY_NEEDED for each of those.
4473448Sdh155122 	 */
4483448Sdh155122 	for (i = 0; i < NS_MAX; i++) {
4493448Sdh155122 		mutex_enter(&ns->netstack_lock);
4503448Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
4513448Sdh155122 		    ns_reg[i].nr_destroy != NULL &&
4523448Sdh155122 		    (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
4533448Sdh155122 		    (ns->netstack_m_state[i] & NSS_DESTROY_ALL) == 0) {
4543448Sdh155122 			ns->netstack_m_state[i] |= NSS_DESTROY_NEEDED;
4553448Sdh155122 			DTRACE_PROBE2(netstack__destroy__needed,
4563448Sdh155122 			    netstack_t *, ns, int, i);
4573448Sdh155122 		}
4583448Sdh155122 		mutex_exit(&ns->netstack_lock);
4593448Sdh155122 	}
4603448Sdh155122 	mutex_exit(&netstack_g_lock);
4613448Sdh155122 
4623448Sdh155122 	netstack_do_shutdown();
4633448Sdh155122 	netstack_do_destroy();
4643448Sdh155122 }
4653448Sdh155122 
4663448Sdh155122 /*
4673448Sdh155122  * Call the create function for the ns and moduleid if CREATE_NEEDED
4683448Sdh155122  * is set.
4693448Sdh155122  * When it calls it, it drops the netstack_lock held by the caller,
4703448Sdh155122  * and returns true to tell the caller it needs to re-evalute the
4713448Sdh155122  * state..
4723448Sdh155122  */
4733448Sdh155122 static boolean_t
4743448Sdh155122 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
4753448Sdh155122 {
4763448Sdh155122 	void *result;
4773448Sdh155122 	netstackid_t stackid;
4783448Sdh155122 
4793448Sdh155122 	ASSERT(MUTEX_HELD(lockp));
4803448Sdh155122 	mutex_enter(&ns->netstack_lock);
4813448Sdh155122 	if (ns->netstack_m_state[moduleid] & NSS_CREATE_NEEDED) {
4823448Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_CREATE_NEEDED;
4833448Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_CREATE_INPROGRESS;
4843448Sdh155122 		DTRACE_PROBE2(netstack__create__inprogress,
4853448Sdh155122 		    netstack_t *, ns, int, moduleid);
4863448Sdh155122 		mutex_exit(&ns->netstack_lock);
4873448Sdh155122 		mutex_exit(lockp);
4883448Sdh155122 
4893448Sdh155122 		ASSERT(ns_reg[moduleid].nr_create != NULL);
4903448Sdh155122 		stackid = ns->netstack_stackid;
4913448Sdh155122 		DTRACE_PROBE2(netstack__create__start,
4923448Sdh155122 		    netstackid_t, stackid,
4933448Sdh155122 		    netstack_t *, ns);
4943448Sdh155122 		result = (ns_reg[moduleid].nr_create)(stackid, ns);
4953448Sdh155122 		DTRACE_PROBE2(netstack__create__end,
4963448Sdh155122 		    void *, result, netstack_t *, ns);
4973448Sdh155122 
4983448Sdh155122 		ASSERT(result != NULL);
4993448Sdh155122 		mutex_enter(&ns->netstack_lock);
5003448Sdh155122 		ns->netstack_modules[moduleid] = result;
5013448Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_CREATE_INPROGRESS;
5023448Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_CREATE_COMPLETED;
5033448Sdh155122 		DTRACE_PROBE2(netstack__create__completed,
5043448Sdh155122 		    netstack_t *, ns, int, moduleid);
5053448Sdh155122 		mutex_exit(&ns->netstack_lock);
5063448Sdh155122 		return (B_TRUE);
5073448Sdh155122 	} else {
5083448Sdh155122 		mutex_exit(&ns->netstack_lock);
5093448Sdh155122 		return (B_FALSE);
5103448Sdh155122 	}
5113448Sdh155122 }
5123448Sdh155122 
5133448Sdh155122 /*
5143448Sdh155122  * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
5153448Sdh155122  * is set.
5163448Sdh155122  * When it calls it, it drops the netstack_lock held by the caller,
5173448Sdh155122  * and returns true to tell the caller it needs to re-evalute the
5183448Sdh155122  * state..
5193448Sdh155122  */
5203448Sdh155122 static boolean_t
5213448Sdh155122 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
5223448Sdh155122 {
5233448Sdh155122 	netstackid_t stackid;
5243448Sdh155122 	void * netstack_module;
5253448Sdh155122 
5263448Sdh155122 	ASSERT(MUTEX_HELD(lockp));
5273448Sdh155122 	mutex_enter(&ns->netstack_lock);
5283448Sdh155122 	if (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_NEEDED) {
5293448Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_NEEDED;
5303448Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_INPROGRESS;
5313448Sdh155122 		DTRACE_PROBE2(netstack__shutdown__inprogress,
5323448Sdh155122 		    netstack_t *, ns, int, moduleid);
5333448Sdh155122 		mutex_exit(&ns->netstack_lock);
5343448Sdh155122 		mutex_exit(lockp);
5353448Sdh155122 
5363448Sdh155122 		ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
5373448Sdh155122 		stackid = ns->netstack_stackid;
5383448Sdh155122 		netstack_module = ns->netstack_modules[moduleid];
5393448Sdh155122 		DTRACE_PROBE2(netstack__shutdown__start,
5403448Sdh155122 		    netstackid_t, stackid,
5413448Sdh155122 		    void *, netstack_module);
5423448Sdh155122 		(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
5433448Sdh155122 		DTRACE_PROBE1(netstack__shutdown__end,
5443448Sdh155122 		    netstack_t *, ns);
5453448Sdh155122 
5463448Sdh155122 		mutex_enter(&ns->netstack_lock);
5473448Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_INPROGRESS;
5483448Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_COMPLETED;
5493448Sdh155122 		DTRACE_PROBE2(netstack__shutdown__completed,
5503448Sdh155122 		    netstack_t *, ns, int, moduleid);
5513448Sdh155122 		mutex_exit(&ns->netstack_lock);
5523448Sdh155122 		return (B_TRUE);
5533448Sdh155122 	} else {
5543448Sdh155122 		mutex_exit(&ns->netstack_lock);
5553448Sdh155122 		return (B_FALSE);
5563448Sdh155122 	}
5573448Sdh155122 }
5583448Sdh155122 
5593448Sdh155122 /*
5603448Sdh155122  * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
5613448Sdh155122  * is set.
5623448Sdh155122  * When it calls it, it drops the netstack_lock held by the caller,
5633448Sdh155122  * and returns true to tell the caller it needs to re-evalute the
5643448Sdh155122  * state..
5653448Sdh155122  */
5663448Sdh155122 static boolean_t
5673448Sdh155122 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
5683448Sdh155122 {
5693448Sdh155122 	netstackid_t stackid;
5703448Sdh155122 	void * netstack_module;
5713448Sdh155122 
5723448Sdh155122 	ASSERT(MUTEX_HELD(lockp));
5733448Sdh155122 	mutex_enter(&ns->netstack_lock);
5743448Sdh155122 	if (ns->netstack_m_state[moduleid] & NSS_DESTROY_NEEDED) {
5753448Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_NEEDED;
5763448Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_DESTROY_INPROGRESS;
5773448Sdh155122 		DTRACE_PROBE2(netstack__destroy__inprogress,
5783448Sdh155122 		    netstack_t *, ns, int, moduleid);
5793448Sdh155122 		mutex_exit(&ns->netstack_lock);
5803448Sdh155122 		mutex_exit(lockp);
5813448Sdh155122 
5823448Sdh155122 		/* XXX race against unregister? */
5833448Sdh155122 		ASSERT(ns_reg[moduleid].nr_destroy != NULL);
5843448Sdh155122 		stackid = ns->netstack_stackid;
5853448Sdh155122 		netstack_module = ns->netstack_modules[moduleid];
5863448Sdh155122 		DTRACE_PROBE2(netstack__destroy__start,
5873448Sdh155122 		    netstackid_t, stackid,
5883448Sdh155122 		    void *, netstack_module);
5893448Sdh155122 		(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
5903448Sdh155122 		DTRACE_PROBE1(netstack__destroy__end,
5913448Sdh155122 		    netstack_t *, ns);
5923448Sdh155122 
5933448Sdh155122 		mutex_enter(&ns->netstack_lock);
5943448Sdh155122 		ns->netstack_modules[moduleid] = NULL;
5953448Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_INPROGRESS;
5963448Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_DESTROY_COMPLETED;
5973448Sdh155122 		DTRACE_PROBE2(netstack__destroy__completed,
5983448Sdh155122 		    netstack_t *, ns, int, moduleid);
5993448Sdh155122 		mutex_exit(&ns->netstack_lock);
6003448Sdh155122 		return (B_TRUE);
6013448Sdh155122 	} else {
6023448Sdh155122 		mutex_exit(&ns->netstack_lock);
6033448Sdh155122 		return (B_FALSE);
6043448Sdh155122 	}
6053448Sdh155122 }
6063448Sdh155122 
6073448Sdh155122 static void
6083448Sdh155122 apply_loop(netstack_t **headp, kmutex_t *lockp,
6093448Sdh155122     boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid))
6103448Sdh155122 {
6113448Sdh155122 	netstack_t *ns;
6123448Sdh155122 	int i;
6133448Sdh155122 	boolean_t lock_dropped, result;
6143448Sdh155122 
6153448Sdh155122 	lock_dropped = B_FALSE;
6163448Sdh155122 	ns = *headp;
6173448Sdh155122 	while (ns != NULL) {
6183448Sdh155122 		for (i = 0; i < NS_MAX; i++) {
6193448Sdh155122 			result = (applyfn)(lockp, ns, i);
6203448Sdh155122 			if (result) {
6213448Sdh155122 #ifdef NS_DEBUG
6223448Sdh155122 				(void) printf("netstack_do_apply: "
6233448Sdh155122 				    "LD for %p/%d, %d\n",
6243448Sdh155122 				    (void *)ns, ns->netstack_stackid, i);
6253448Sdh155122 #endif
6263448Sdh155122 				lock_dropped = B_TRUE;
6273448Sdh155122 				mutex_enter(lockp);
6283448Sdh155122 			}
6293448Sdh155122 		}
6303448Sdh155122 		/*
6313448Sdh155122 		 * If at least one applyfn call caused lockp to be dropped,
6323448Sdh155122 		 * then we don't follow netstack_next after reacquiring the
6333448Sdh155122 		 * lock, even if it is possible to do so without any hazards.
6343448Sdh155122 		 * This is because we want the design to allow for the list of
6353448Sdh155122 		 * netstacks threaded by netstack_next to change in any
6363448Sdh155122 		 * arbitrary way during the time the 'lockp' was dropped.
6373448Sdh155122 		 *
6383448Sdh155122 		 * It is safe to restart the loop at *headp since
6393448Sdh155122 		 * the applyfn changes netstack_m_state as it processes
6403448Sdh155122 		 * things, so a subsequent pass through will have no
6413448Sdh155122 		 * effect in applyfn, hence the loop will terminate
6423448Sdh155122 		 * in at worst O(N^2).
6433448Sdh155122 		 */
6443448Sdh155122 		if (lock_dropped) {
6453448Sdh155122 #ifdef NS_DEBUG
6463448Sdh155122 			(void) printf("netstack_do_apply: "
6473448Sdh155122 			    "Lock Dropped for %p/%d, %d\n",
6483448Sdh155122 			    (void *)ns, ns->netstack_stackid, i);
6493448Sdh155122 #endif
6503448Sdh155122 			lock_dropped = B_FALSE;
6513448Sdh155122 			ns = *headp;
6523448Sdh155122 		} else {
6533448Sdh155122 			ns = ns->netstack_next;
6543448Sdh155122 		}
6553448Sdh155122 	}
6563448Sdh155122 }
6573448Sdh155122 
6583448Sdh155122 /* Like above, but in the reverse order of moduleids */
6593448Sdh155122 static void
6603448Sdh155122 apply_loop_reverse(netstack_t **headp, kmutex_t *lockp,
6613448Sdh155122     boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid))
6623448Sdh155122 {
6633448Sdh155122 	netstack_t *ns;
6643448Sdh155122 	int i;
6653448Sdh155122 	boolean_t lock_dropped, result;
6663448Sdh155122 
6673448Sdh155122 	lock_dropped = B_FALSE;
6683448Sdh155122 	ns = *headp;
6693448Sdh155122 	while (ns != NULL) {
6703448Sdh155122 		for (i = NS_MAX-1; i >= 0; i--) {
6713448Sdh155122 			result = (applyfn)(lockp, ns, i);
6723448Sdh155122 			if (result) {
6733448Sdh155122 #ifdef NS_DEBUG
6743448Sdh155122 				(void) printf("netstack_do_apply: "
6753448Sdh155122 				    "LD for %p/%d, %d\n",
6763448Sdh155122 				    (void *)ns, ns->netstack_stackid, i);
6773448Sdh155122 #endif
6783448Sdh155122 				lock_dropped = B_TRUE;
6793448Sdh155122 				mutex_enter(lockp);
6803448Sdh155122 			}
6813448Sdh155122 		}
6823448Sdh155122 		/*
6833448Sdh155122 		 * If at least one applyfn call caused lockp to be dropped,
6843448Sdh155122 		 * then we don't follow netstack_next after reacquiring the
6853448Sdh155122 		 * lock, even if it is possible to do so without any hazards.
6863448Sdh155122 		 * This is because we want the design to allow for the list of
6873448Sdh155122 		 * netstacks threaded by netstack_next to change in any
6883448Sdh155122 		 * arbitrary way during the time the 'lockp' was dropped.
6893448Sdh155122 		 *
6903448Sdh155122 		 * It is safe to restart the loop at *headp since
6913448Sdh155122 		 * the applyfn changes netstack_m_state as it processes
6923448Sdh155122 		 * things, so a subsequent pass through will have no
6933448Sdh155122 		 * effect in applyfn, hence the loop will terminate
6943448Sdh155122 		 * in at worst O(N^2).
6953448Sdh155122 		 */
6963448Sdh155122 		if (lock_dropped) {
6973448Sdh155122 #ifdef NS_DEBUG
6983448Sdh155122 			(void) printf("netstack_do_apply: "
6993448Sdh155122 			    "Lock Dropped for %p/%d, %d\n",
7003448Sdh155122 			    (void *)ns, ns->netstack_stackid, i);
7013448Sdh155122 #endif
7023448Sdh155122 			lock_dropped = B_FALSE;
7033448Sdh155122 			ns = *headp;
7043448Sdh155122 		} else {
7053448Sdh155122 			ns = ns->netstack_next;
7063448Sdh155122 		}
7073448Sdh155122 	}
7083448Sdh155122 }
7093448Sdh155122 
7103448Sdh155122 /*
7113448Sdh155122  * Apply a function to all module/netstack combinations.
7123448Sdh155122  * The applyfn returns true if it had dropped the locks.
7133448Sdh155122  */
7143448Sdh155122 static void
7153448Sdh155122 netstack_do_apply(int reverse,
7163448Sdh155122     boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid))
7173448Sdh155122 {
7183448Sdh155122 	mutex_enter(&netstack_g_lock);
7193448Sdh155122 	if (reverse)
7203448Sdh155122 		apply_loop_reverse(&netstack_head, &netstack_g_lock, applyfn);
7213448Sdh155122 	else
7223448Sdh155122 		apply_loop(&netstack_head, &netstack_g_lock, applyfn);
7233448Sdh155122 	mutex_exit(&netstack_g_lock);
7243448Sdh155122 }
7253448Sdh155122 
7263448Sdh155122 /*
7273448Sdh155122  * Run the create function for all modules x stack combinations
7283448Sdh155122  * that have NSS_CREATE_NEEDED set.
7293448Sdh155122  *
7303448Sdh155122  * Call the create function for each stack that has CREATE_NEEDED.
7313448Sdh155122  * Set CREATE_INPROGRESS, drop lock, and after done,
7323448Sdh155122  * set CREATE_COMPLETE
7333448Sdh155122  */
7343448Sdh155122 static void
7353448Sdh155122 netstack_do_create(void)
7363448Sdh155122 {
7373448Sdh155122 	netstack_do_apply(B_FALSE, netstack_apply_create);
7383448Sdh155122 }
7393448Sdh155122 
7403448Sdh155122 /*
7413448Sdh155122  * Run the shutdown function for all modules x stack combinations
7423448Sdh155122  * that have NSS_SHUTDOWN_NEEDED set.
7433448Sdh155122  *
7443448Sdh155122  * Call the shutdown function for each stack that has SHUTDOWN_NEEDED.
7453448Sdh155122  * Set SHUTDOWN_INPROGRESS, drop lock, and after done,
7463448Sdh155122  * set SHUTDOWN_COMPLETE
7473448Sdh155122  */
7483448Sdh155122 static void
7493448Sdh155122 netstack_do_shutdown(void)
7503448Sdh155122 {
7513448Sdh155122 	netstack_do_apply(B_FALSE, netstack_apply_shutdown);
7523448Sdh155122 }
7533448Sdh155122 
7543448Sdh155122 /*
7553448Sdh155122  * Run the destroy function for all modules x stack combinations
7563448Sdh155122  * that have NSS_DESTROY_NEEDED set.
7573448Sdh155122  *
7583448Sdh155122  * Call the destroy function for each stack that has DESTROY_NEEDED.
7593448Sdh155122  * Set DESTROY_INPROGRESS, drop lock, and after done,
7603448Sdh155122  * set DESTROY_COMPLETE
7613448Sdh155122  *
7623448Sdh155122  * Since a netstack_t is never reused (when a zone is rebooted it gets
7633448Sdh155122  * a new zoneid == netstackid i.e. a new netstack_t is allocated) we leave
7643448Sdh155122  * netstack_m_state the way it is i.e. with NSS_DESTROY_COMPLETED set.
7653448Sdh155122  */
7663448Sdh155122 static void
7673448Sdh155122 netstack_do_destroy(void)
7683448Sdh155122 {
7693448Sdh155122 	/*
7703448Sdh155122 	 * Have to walk the moduleids in reverse order since some
7713448Sdh155122 	 * modules make implicit assumptions about the order
7723448Sdh155122 	 */
7733448Sdh155122 	netstack_do_apply(B_TRUE, netstack_apply_destroy);
7743448Sdh155122 }
7753448Sdh155122 
7763448Sdh155122 /*
7773448Sdh155122  * Get the stack instance used in caller's zone.
7783448Sdh155122  * Increases the reference count, caller must do a netstack_rele.
7793448Sdh155122  * It can't be called after zone_destroy() has started.
7803448Sdh155122  */
781*4136Snordmark netstack_t *
7823448Sdh155122 netstack_get_current(void)
7833448Sdh155122 {
7843448Sdh155122 	netstack_t *ns;
7853448Sdh155122 
7863448Sdh155122 	ns = curproc->p_zone->zone_netstack;
7873448Sdh155122 	ASSERT(ns != NULL);
7883448Sdh155122 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
7893448Sdh155122 		return (NULL);
7903448Sdh155122 
7913448Sdh155122 	netstack_hold(ns);
7923448Sdh155122 
7933448Sdh155122 	return (ns);
7943448Sdh155122 }
7953448Sdh155122 
7963448Sdh155122 /*
7973448Sdh155122  * Find a stack instance given the cred.
7983448Sdh155122  * This is used by the modules to potentially allow for a future when
7993448Sdh155122  * something other than the zoneid is used to determine the stack.
8003448Sdh155122  */
8013448Sdh155122 netstack_t *
8023448Sdh155122 netstack_find_by_cred(const cred_t *cr)
8033448Sdh155122 {
8043448Sdh155122 	zoneid_t zoneid = crgetzoneid(cr);
8053448Sdh155122 
8063448Sdh155122 	/* Handle the case when cr_zone is NULL */
8073448Sdh155122 	if (zoneid == (zoneid_t)-1)
8083448Sdh155122 		zoneid = GLOBAL_ZONEID;
8093448Sdh155122 
8103448Sdh155122 	/* For performance ... */
8113448Sdh155122 	if (curproc->p_zone->zone_id == zoneid)
8123448Sdh155122 		return (netstack_get_current());
8133448Sdh155122 	else
8143448Sdh155122 		return (netstack_find_by_zoneid(zoneid));
8153448Sdh155122 }
8163448Sdh155122 
8173448Sdh155122 /*
8183448Sdh155122  * Find a stack instance given the zoneid.
8193448Sdh155122  * Increases the reference count if found; caller must do a
8203448Sdh155122  * netstack_rele().
8213448Sdh155122  *
8223448Sdh155122  * If there is no exact match then assume the shared stack instance
8233448Sdh155122  * matches.
8243448Sdh155122  *
8253448Sdh155122  * Skip the unitialized ones.
8263448Sdh155122  */
8273448Sdh155122 netstack_t *
8283448Sdh155122 netstack_find_by_zoneid(zoneid_t zoneid)
8293448Sdh155122 {
8303448Sdh155122 	netstack_t *ns;
8313448Sdh155122 	zone_t *zone;
8323448Sdh155122 
8333448Sdh155122 	zone = zone_find_by_id(zoneid);
8343448Sdh155122 
8353448Sdh155122 	if (zone == NULL)
8363448Sdh155122 		return (NULL);
8373448Sdh155122 
8383448Sdh155122 	ns = zone->zone_netstack;
8393448Sdh155122 	ASSERT(ns != NULL);
8403448Sdh155122 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
8413448Sdh155122 		ns = NULL;
8423448Sdh155122 	else
8433448Sdh155122 		netstack_hold(ns);
8443448Sdh155122 
8453448Sdh155122 	zone_rele(zone);
8463448Sdh155122 	return (ns);
8473448Sdh155122 }
8483448Sdh155122 
8493448Sdh155122 /*
8503448Sdh155122  * Find a stack instance given the zoneid.
8513448Sdh155122  * Increases the reference count if found; caller must do a
8523448Sdh155122  * netstack_rele().
8533448Sdh155122  *
8543448Sdh155122  * If there is no exact match then assume the shared stack instance
8553448Sdh155122  * matches.
8563448Sdh155122  *
8573448Sdh155122  * Skip the unitialized ones.
8583448Sdh155122  *
8593448Sdh155122  * NOTE: The caller must hold zonehash_lock.
8603448Sdh155122  */
8613448Sdh155122 netstack_t *
8623448Sdh155122 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
8633448Sdh155122 {
8643448Sdh155122 	netstack_t *ns;
8653448Sdh155122 	zone_t *zone;
8663448Sdh155122 
8673448Sdh155122 	zone = zone_find_by_id_nolock(zoneid);
8683448Sdh155122 
8693448Sdh155122 	if (zone == NULL)
8703448Sdh155122 		return (NULL);
8713448Sdh155122 
8723448Sdh155122 	ns = zone->zone_netstack;
8733448Sdh155122 	ASSERT(ns != NULL);
8743448Sdh155122 
8753448Sdh155122 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
8763448Sdh155122 		ns = NULL;
8773448Sdh155122 	else
8783448Sdh155122 		netstack_hold(ns);
8793448Sdh155122 
8803448Sdh155122 	zone_rele(zone);
8813448Sdh155122 	return (ns);
8823448Sdh155122 }
8833448Sdh155122 
8843448Sdh155122 /*
8853448Sdh155122  * Find a stack instance given the stackid with exact match?
8863448Sdh155122  * Increases the reference count if found; caller must do a
8873448Sdh155122  * netstack_rele().
8883448Sdh155122  *
8893448Sdh155122  * Skip the unitialized ones.
8903448Sdh155122  */
8913448Sdh155122 netstack_t *
8923448Sdh155122 netstack_find_by_stackid(netstackid_t stackid)
8933448Sdh155122 {
8943448Sdh155122 	netstack_t *ns;
8953448Sdh155122 
8963448Sdh155122 	mutex_enter(&netstack_g_lock);
8973448Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
8983448Sdh155122 		mutex_enter(&ns->netstack_lock);
8993448Sdh155122 		if (ns->netstack_stackid == stackid &&
9003448Sdh155122 		    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
9013448Sdh155122 			mutex_exit(&ns->netstack_lock);
9023448Sdh155122 			netstack_hold(ns);
9033448Sdh155122 			mutex_exit(&netstack_g_lock);
9043448Sdh155122 			return (ns);
9053448Sdh155122 		}
9063448Sdh155122 		mutex_exit(&ns->netstack_lock);
9073448Sdh155122 	}
9083448Sdh155122 	mutex_exit(&netstack_g_lock);
9093448Sdh155122 	return (NULL);
9103448Sdh155122 }
9113448Sdh155122 
9123448Sdh155122 void
9133448Sdh155122 netstack_rele(netstack_t *ns)
9143448Sdh155122 {
9153448Sdh155122 	netstack_t **nsp;
9163448Sdh155122 	boolean_t found;
9173448Sdh155122 	int refcnt, numzones;
9183448Sdh155122 
9193448Sdh155122 	mutex_enter(&ns->netstack_lock);
9203448Sdh155122 	ASSERT(ns->netstack_refcnt > 0);
9213448Sdh155122 	ns->netstack_refcnt--;
9223448Sdh155122 	/*
9233448Sdh155122 	 * As we drop the lock additional netstack_rele()s can come in
9243448Sdh155122 	 * and decrement the refcnt to zero and free the netstack_t.
9253448Sdh155122 	 * Store pointers in local variables and if we were not the last
9263448Sdh155122 	 * then don't reference the netstack_t after that.
9273448Sdh155122 	 */
9283448Sdh155122 	refcnt = ns->netstack_refcnt;
9293448Sdh155122 	numzones = ns->netstack_numzones;
9303448Sdh155122 	DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
9313448Sdh155122 	mutex_exit(&ns->netstack_lock);
9323448Sdh155122 
9333448Sdh155122 	if (refcnt == 0 && numzones == 0) {
9343448Sdh155122 		/*
9353448Sdh155122 		 * Time to call the destroy functions and free up
9363448Sdh155122 		 * the structure
9373448Sdh155122 		 */
9383448Sdh155122 		netstack_stack_inactive(ns);
9393448Sdh155122 
9403448Sdh155122 		/* Finally remove from list of netstacks */
9413448Sdh155122 		mutex_enter(&netstack_g_lock);
9423448Sdh155122 		found = B_FALSE;
9433448Sdh155122 		for (nsp = &netstack_head; *nsp != NULL;
9443448Sdh155122 		    nsp = &(*nsp)->netstack_next) {
9453448Sdh155122 			if (*nsp == ns) {
9463448Sdh155122 				*nsp = ns->netstack_next;
9473448Sdh155122 				ns->netstack_next = NULL;
9483448Sdh155122 				found = B_TRUE;
9493448Sdh155122 				break;
9503448Sdh155122 			}
9513448Sdh155122 		}
9523448Sdh155122 		ASSERT(found);
9533448Sdh155122 		mutex_exit(&netstack_g_lock);
9543448Sdh155122 
9553448Sdh155122 		ASSERT(ns->netstack_flags & NSF_CLOSING);
9563448Sdh155122 		kmem_free(ns, sizeof (*ns));
9573448Sdh155122 	}
9583448Sdh155122 }
9593448Sdh155122 
9603448Sdh155122 void
9613448Sdh155122 netstack_hold(netstack_t *ns)
9623448Sdh155122 {
9633448Sdh155122 	mutex_enter(&ns->netstack_lock);
9643448Sdh155122 	ns->netstack_refcnt++;
9653448Sdh155122 	ASSERT(ns->netstack_refcnt > 0);
9663448Sdh155122 	mutex_exit(&ns->netstack_lock);
9673448Sdh155122 	DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
9683448Sdh155122 }
9693448Sdh155122 
9703448Sdh155122 /*
9713448Sdh155122  * To support kstat_create_netstack() using kstat_zone_add we need
9723448Sdh155122  * to track both
9733448Sdh155122  *  - all zoneids that use the global/shared stack
9743448Sdh155122  *  - all kstats that have been added for the shared stack
9753448Sdh155122  */
9763448Sdh155122 kstat_t *
9773448Sdh155122 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
9783448Sdh155122     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
9793448Sdh155122     netstackid_t ks_netstackid)
9803448Sdh155122 {
9813448Sdh155122 	kstat_t *ks;
9823448Sdh155122 
9833448Sdh155122 	if (ks_netstackid == GLOBAL_NETSTACKID) {
9843448Sdh155122 		ks = kstat_create_zone(ks_module, ks_instance, ks_name,
9853448Sdh155122 		    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
9863448Sdh155122 		if (ks != NULL)
9873448Sdh155122 			netstack_shared_kstat_add(ks);
9883448Sdh155122 		return (ks);
9893448Sdh155122 	} else {
9903448Sdh155122 		zoneid_t zoneid = ks_netstackid;
9913448Sdh155122 
9923448Sdh155122 		return (kstat_create_zone(ks_module, ks_instance, ks_name,
9933448Sdh155122 			ks_class, ks_type, ks_ndata, ks_flags, zoneid));
9943448Sdh155122 	}
9953448Sdh155122 }
9963448Sdh155122 
9973448Sdh155122 void
9983448Sdh155122 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
9993448Sdh155122 {
10003448Sdh155122 	if (ks_netstackid == GLOBAL_NETSTACKID) {
10013448Sdh155122 		netstack_shared_kstat_remove(ks);
10023448Sdh155122 	}
10033448Sdh155122 	kstat_delete(ks);
10043448Sdh155122 }
10053448Sdh155122 
10063448Sdh155122 static void
10073448Sdh155122 netstack_shared_zone_add(zoneid_t zoneid)
10083448Sdh155122 {
10093448Sdh155122 	struct shared_zone_list *sz;
10103448Sdh155122 	struct shared_kstat_list *sk;
10113448Sdh155122 
10123448Sdh155122 	sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
10133448Sdh155122 	sz->sz_zoneid = zoneid;
10143448Sdh155122 
10153448Sdh155122 	/* Insert in list */
10163448Sdh155122 	mutex_enter(&netstack_shared_lock);
10173448Sdh155122 	sz->sz_next = netstack_shared_zones;
10183448Sdh155122 	netstack_shared_zones = sz;
10193448Sdh155122 
10203448Sdh155122 	/*
10213448Sdh155122 	 * Perform kstat_zone_add for each existing shared stack kstat.
10223448Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
10233448Sdh155122 	 */
10243448Sdh155122 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
10253448Sdh155122 		kstat_zone_add(sk->sk_kstat, zoneid);
10263448Sdh155122 	}
10273448Sdh155122 	mutex_exit(&netstack_shared_lock);
10283448Sdh155122 }
10293448Sdh155122 
10303448Sdh155122 static void
10313448Sdh155122 netstack_shared_zone_remove(zoneid_t zoneid)
10323448Sdh155122 {
10333448Sdh155122 	struct shared_zone_list **szp, *sz;
10343448Sdh155122 	struct shared_kstat_list *sk;
10353448Sdh155122 
10363448Sdh155122 	/* Find in list */
10373448Sdh155122 	mutex_enter(&netstack_shared_lock);
10383448Sdh155122 	sz = NULL;
10393448Sdh155122 	for (szp = &netstack_shared_zones; *szp != NULL;
10403448Sdh155122 	    szp = &((*szp)->sz_next)) {
10413448Sdh155122 		if ((*szp)->sz_zoneid == zoneid) {
10423448Sdh155122 			sz = *szp;
10433448Sdh155122 			break;
10443448Sdh155122 		}
10453448Sdh155122 	}
10463448Sdh155122 	/* We must find it */
10473448Sdh155122 	ASSERT(sz != NULL);
10483448Sdh155122 	*szp = sz->sz_next;
10493448Sdh155122 	sz->sz_next = NULL;
10503448Sdh155122 
10513448Sdh155122 	/*
10523448Sdh155122 	 * Perform kstat_zone_remove for each existing shared stack kstat.
10533448Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
10543448Sdh155122 	 */
10553448Sdh155122 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
10563448Sdh155122 		kstat_zone_remove(sk->sk_kstat, zoneid);
10573448Sdh155122 	}
10583448Sdh155122 	mutex_exit(&netstack_shared_lock);
10593448Sdh155122 
10603448Sdh155122 	kmem_free(sz, sizeof (*sz));
10613448Sdh155122 }
10623448Sdh155122 
10633448Sdh155122 static void
10643448Sdh155122 netstack_shared_kstat_add(kstat_t *ks)
10653448Sdh155122 {
10663448Sdh155122 	struct shared_zone_list *sz;
10673448Sdh155122 	struct shared_kstat_list *sk;
10683448Sdh155122 
10693448Sdh155122 	sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
10703448Sdh155122 	sk->sk_kstat = ks;
10713448Sdh155122 
10723448Sdh155122 	/* Insert in list */
10733448Sdh155122 	mutex_enter(&netstack_shared_lock);
10743448Sdh155122 	sk->sk_next = netstack_shared_kstats;
10753448Sdh155122 	netstack_shared_kstats = sk;
10763448Sdh155122 
10773448Sdh155122 	/*
10783448Sdh155122 	 * Perform kstat_zone_add for each existing shared stack zone.
10793448Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
10803448Sdh155122 	 */
10813448Sdh155122 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
10823448Sdh155122 		kstat_zone_add(ks, sz->sz_zoneid);
10833448Sdh155122 	}
10843448Sdh155122 	mutex_exit(&netstack_shared_lock);
10853448Sdh155122 }
10863448Sdh155122 
10873448Sdh155122 static void
10883448Sdh155122 netstack_shared_kstat_remove(kstat_t *ks)
10893448Sdh155122 {
10903448Sdh155122 	struct shared_zone_list *sz;
10913448Sdh155122 	struct shared_kstat_list **skp, *sk;
10923448Sdh155122 
10933448Sdh155122 	/* Find in list */
10943448Sdh155122 	mutex_enter(&netstack_shared_lock);
10953448Sdh155122 	sk = NULL;
10963448Sdh155122 	for (skp = &netstack_shared_kstats; *skp != NULL;
10973448Sdh155122 	    skp = &((*skp)->sk_next)) {
10983448Sdh155122 		if ((*skp)->sk_kstat == ks) {
10993448Sdh155122 			sk = *skp;
11003448Sdh155122 			break;
11013448Sdh155122 		}
11023448Sdh155122 	}
11033448Sdh155122 	/* Must find it */
11043448Sdh155122 	ASSERT(sk != NULL);
11053448Sdh155122 	*skp = sk->sk_next;
11063448Sdh155122 	sk->sk_next = NULL;
11073448Sdh155122 
11083448Sdh155122 	/*
11093448Sdh155122 	 * Perform kstat_zone_remove for each existing shared stack kstat.
11103448Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
11113448Sdh155122 	 */
11123448Sdh155122 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
11133448Sdh155122 		kstat_zone_remove(ks, sz->sz_zoneid);
11143448Sdh155122 	}
11153448Sdh155122 	mutex_exit(&netstack_shared_lock);
11163448Sdh155122 	kmem_free(sk, sizeof (*sk));
11173448Sdh155122 }
11183448Sdh155122 
11193448Sdh155122 /*
11203448Sdh155122  * If a zoneid is part of the shared zone, return true
11213448Sdh155122  */
11223448Sdh155122 static boolean_t
11233448Sdh155122 netstack_find_shared_zoneid(zoneid_t zoneid)
11243448Sdh155122 {
11253448Sdh155122 	struct shared_zone_list *sz;
11263448Sdh155122 
11273448Sdh155122 	mutex_enter(&netstack_shared_lock);
11283448Sdh155122 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
11293448Sdh155122 		if (sz->sz_zoneid == zoneid) {
11303448Sdh155122 			mutex_exit(&netstack_shared_lock);
11313448Sdh155122 			return (B_TRUE);
11323448Sdh155122 		}
11333448Sdh155122 	}
11343448Sdh155122 	mutex_exit(&netstack_shared_lock);
11353448Sdh155122 	return (B_FALSE);
11363448Sdh155122 }
11373448Sdh155122 
11383448Sdh155122 /*
11393448Sdh155122  * Hide the fact that zoneids and netstackids are allocated from
11403448Sdh155122  * the same space in the current implementation.
11413448Sdh155122  * XXX could add checks that the stackid/zoneids are valid...
11423448Sdh155122  */
11433448Sdh155122 zoneid_t
11443448Sdh155122 netstackid_to_zoneid(netstackid_t stackid)
11453448Sdh155122 {
11463448Sdh155122 	return (stackid);
11473448Sdh155122 }
11483448Sdh155122 
11493448Sdh155122 netstackid_t
11503448Sdh155122 zoneid_to_netstackid(zoneid_t zoneid)
11513448Sdh155122 {
11523448Sdh155122 	if (netstack_find_shared_zoneid(zoneid))
11533448Sdh155122 		return (GLOBAL_ZONEID);
11543448Sdh155122 	else
11553448Sdh155122 		return (zoneid);
11563448Sdh155122 }
11573448Sdh155122 
11583448Sdh155122 /*
11593448Sdh155122  * Simplistic support for walking all the handles.
11603448Sdh155122  * Example usage:
11613448Sdh155122  *	netstack_handle_t nh;
11623448Sdh155122  *	netstack_t *ns;
11633448Sdh155122  *
11643448Sdh155122  *	netstack_next_init(&nh);
11653448Sdh155122  *	while ((ns = netstack_next(&nh)) != NULL) {
11663448Sdh155122  *		do something;
11673448Sdh155122  *		netstack_rele(ns);
11683448Sdh155122  *	}
11693448Sdh155122  *	netstack_next_fini(&nh);
11703448Sdh155122  */
11713448Sdh155122 void
11723448Sdh155122 netstack_next_init(netstack_handle_t *handle)
11733448Sdh155122 {
11743448Sdh155122 	*handle = 0;
11753448Sdh155122 }
11763448Sdh155122 
11773448Sdh155122 /* ARGSUSED */
11783448Sdh155122 void
11793448Sdh155122 netstack_next_fini(netstack_handle_t *handle)
11803448Sdh155122 {
11813448Sdh155122 }
11823448Sdh155122 
11833448Sdh155122 netstack_t *
11843448Sdh155122 netstack_next(netstack_handle_t *handle)
11853448Sdh155122 {
11863448Sdh155122 	netstack_t *ns;
11873448Sdh155122 	int i, end;
11883448Sdh155122 
11893448Sdh155122 	end = *handle;
11903448Sdh155122 	/* Walk skipping *handle number of instances */
11913448Sdh155122 
11923448Sdh155122 	/* Look if there is a matching stack instance */
11933448Sdh155122 	mutex_enter(&netstack_g_lock);
11943448Sdh155122 	ns = netstack_head;
11953448Sdh155122 	for (i = 0; i < end; i++) {
11963448Sdh155122 		if (ns == NULL)
11973448Sdh155122 			break;
11983448Sdh155122 		ns = ns->netstack_next;
11993448Sdh155122 	}
12003448Sdh155122 	/* skip those with that aren't really here */
12013448Sdh155122 	while (ns != NULL) {
12023448Sdh155122 		mutex_enter(&ns->netstack_lock);
12033448Sdh155122 		if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
12043448Sdh155122 			mutex_exit(&ns->netstack_lock);
12053448Sdh155122 			break;
12063448Sdh155122 		}
12073448Sdh155122 		mutex_exit(&ns->netstack_lock);
12083448Sdh155122 		end++;
12093448Sdh155122 		ns = ns->netstack_next;
12103448Sdh155122 	}
12113448Sdh155122 	if (ns != NULL) {
12123448Sdh155122 		*handle = end + 1;
12133448Sdh155122 		netstack_hold(ns);
12143448Sdh155122 	}
12153448Sdh155122 	mutex_exit(&netstack_g_lock);
12163448Sdh155122 	return (ns);
12173448Sdh155122 }
1218