10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51676Sjpk  * Common Development and Distribution License (the "License").
61676Sjpk  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211735Skcpoon 
220Sstevel@tonic-gate /*
23*11849SErik.Nordmark@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/systm.h>
290Sstevel@tonic-gate #include <sys/stream.h>
300Sstevel@tonic-gate #include <sys/cmn_err.h>
310Sstevel@tonic-gate #include <sys/kmem.h>
320Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
330Sstevel@tonic-gate #include <sys/tihdr.h>
340Sstevel@tonic-gate #include <sys/stropts.h>
350Sstevel@tonic-gate #include <sys/socket.h>
360Sstevel@tonic-gate #include <sys/random.h>
370Sstevel@tonic-gate #include <sys/policy.h>
381676Sjpk #include <sys/tsol/tndb.h>
391676Sjpk #include <sys/tsol/tnet.h>
400Sstevel@tonic-gate 
410Sstevel@tonic-gate #include <netinet/in.h>
420Sstevel@tonic-gate #include <netinet/ip6.h>
430Sstevel@tonic-gate 
440Sstevel@tonic-gate #include <inet/common.h>
450Sstevel@tonic-gate #include <inet/ip.h>
460Sstevel@tonic-gate #include <inet/ip6.h>
470Sstevel@tonic-gate #include <inet/ipclassifier.h>
480Sstevel@tonic-gate #include "sctp_impl.h"
490Sstevel@tonic-gate #include "sctp_asconf.h"
500Sstevel@tonic-gate #include "sctp_addr.h"
510Sstevel@tonic-gate 
520Sstevel@tonic-gate /*
530Sstevel@tonic-gate  * Returns 0 on success, EACCES on permission failure.
540Sstevel@tonic-gate  */
550Sstevel@tonic-gate static int
560Sstevel@tonic-gate sctp_select_port(sctp_t *sctp, in_port_t *requested_port, int *user_specified)
570Sstevel@tonic-gate {
583448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
5911042SErik.Nordmark@Sun.COM 	conn_t		*connp = sctp->sctp_connp;
603448Sdh155122 
610Sstevel@tonic-gate 	/*
620Sstevel@tonic-gate 	 * Get a valid port (within the anonymous range and should not
630Sstevel@tonic-gate 	 * be a privileged one) to use if the user has not given a port.
640Sstevel@tonic-gate 	 * If multiple threads are here, they may all start with
650Sstevel@tonic-gate 	 * with the same initial port. But, it should be fine as long as
660Sstevel@tonic-gate 	 * sctp_bindi will ensure that no two threads will be assigned
670Sstevel@tonic-gate 	 * the same port.
680Sstevel@tonic-gate 	 */
690Sstevel@tonic-gate 	if (*requested_port == 0) {
703448Sdh155122 		*requested_port = sctp_update_next_port(
713448Sdh155122 		    sctps->sctps_next_port_to_try,
7211042SErik.Nordmark@Sun.COM 		    crgetzone(connp->conn_cred), sctps);
731676Sjpk 		if (*requested_port == 0)
741676Sjpk 			return (EACCES);
750Sstevel@tonic-gate 		*user_specified = 0;
760Sstevel@tonic-gate 	} else {
770Sstevel@tonic-gate 		int i;
780Sstevel@tonic-gate 		boolean_t priv = B_FALSE;
790Sstevel@tonic-gate 
800Sstevel@tonic-gate 		/*
810Sstevel@tonic-gate 		 * If the requested_port is in the well-known privileged range,
820Sstevel@tonic-gate 		 * verify that the stream was opened by a privileged user.
830Sstevel@tonic-gate 		 * Note: No locks are held when inspecting sctp_g_*epriv_ports
840Sstevel@tonic-gate 		 * but instead the code relies on:
850Sstevel@tonic-gate 		 * - the fact that the address of the array and its size never
860Sstevel@tonic-gate 		 *   changes
870Sstevel@tonic-gate 		 * - the atomic assignment of the elements of the array
880Sstevel@tonic-gate 		 */
893448Sdh155122 		if (*requested_port < sctps->sctps_smallest_nonpriv_port) {
900Sstevel@tonic-gate 			priv = B_TRUE;
910Sstevel@tonic-gate 		} else {
923448Sdh155122 			for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) {
933448Sdh155122 				if (*requested_port ==
943448Sdh155122 				    sctps->sctps_g_epriv_ports[i]) {
950Sstevel@tonic-gate 					priv = B_TRUE;
960Sstevel@tonic-gate 					break;
970Sstevel@tonic-gate 				}
980Sstevel@tonic-gate 			}
990Sstevel@tonic-gate 		}
1000Sstevel@tonic-gate 		if (priv) {
1010Sstevel@tonic-gate 			/*
1020Sstevel@tonic-gate 			 * sctp_bind() should take a cred_t argument so that
1030Sstevel@tonic-gate 			 * we can use it here.
1040Sstevel@tonic-gate 			 */
10511042SErik.Nordmark@Sun.COM 			if (secpolicy_net_privaddr(connp->conn_cred,
1066134Scasper 			    *requested_port, IPPROTO_SCTP) != 0) {
1070Sstevel@tonic-gate 				dprint(1,
1080Sstevel@tonic-gate 				    ("sctp_bind(x): no prive for port %d",
1090Sstevel@tonic-gate 				    *requested_port));
1101676Sjpk 				return (EACCES);
1110Sstevel@tonic-gate 			}
1120Sstevel@tonic-gate 		}
1130Sstevel@tonic-gate 		*user_specified = 1;
1140Sstevel@tonic-gate 	}
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate 	return (0);
1170Sstevel@tonic-gate }
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate int
1200Sstevel@tonic-gate sctp_listen(sctp_t *sctp)
1210Sstevel@tonic-gate {
1220Sstevel@tonic-gate 	sctp_tf_t	*tf;
1233448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
12411042SErik.Nordmark@Sun.COM 	conn_t		*connp = sctp->sctp_connp;
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate 	RUN_SCTP(sctp);
1270Sstevel@tonic-gate 	/*
1280Sstevel@tonic-gate 	 * TCP handles listen() increasing the backlog, need to check
129852Svi117747 	 * if it should be handled here too
1300Sstevel@tonic-gate 	 */
1314505Skcpoon 	if (sctp->sctp_state > SCTPS_BOUND ||
1324505Skcpoon 	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
1330Sstevel@tonic-gate 		WAKE_SCTP(sctp);
1340Sstevel@tonic-gate 		return (EINVAL);
1350Sstevel@tonic-gate 	}
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate 	/* Do an anonymous bind for unbound socket doing listen(). */
1380Sstevel@tonic-gate 	if (sctp->sctp_nsaddrs == 0) {
1390Sstevel@tonic-gate 		struct sockaddr_storage ss;
1400Sstevel@tonic-gate 		int ret;
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate 		bzero(&ss, sizeof (ss));
14311042SErik.Nordmark@Sun.COM 		ss.ss_family = connp->conn_family;
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate 		WAKE_SCTP(sctp);
1460Sstevel@tonic-gate 		if ((ret = sctp_bind(sctp, (struct sockaddr *)&ss,
1474505Skcpoon 		    sizeof (ss))) != 0)
1480Sstevel@tonic-gate 			return (ret);
1490Sstevel@tonic-gate 		RUN_SCTP(sctp)
1500Sstevel@tonic-gate 	}
1510Sstevel@tonic-gate 
15211042SErik.Nordmark@Sun.COM 	/* Cache things in the ixa without any refhold */
153*11849SErik.Nordmark@Sun.COM 	ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
15411042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_cred = connp->conn_cred;
15511042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
15611042SErik.Nordmark@Sun.COM 	if (is_system_labeled())
15711042SErik.Nordmark@Sun.COM 		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
15811042SErik.Nordmark@Sun.COM 
1590Sstevel@tonic-gate 	sctp->sctp_state = SCTPS_LISTEN;
1600Sstevel@tonic-gate 	(void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN);
16111066Srafael.vanoni@sun.com 	sctp->sctp_last_secret_update = ddi_get_lbolt64();
1620Sstevel@tonic-gate 	bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN);
1633448Sdh155122 	tf = &sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(
16411042SErik.Nordmark@Sun.COM 	    ntohs(connp->conn_lport))];
1650Sstevel@tonic-gate 	sctp_listen_hash_insert(tf, sctp);
1660Sstevel@tonic-gate 	WAKE_SCTP(sctp);
1670Sstevel@tonic-gate 	return (0);
1680Sstevel@tonic-gate }
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate /*
1710Sstevel@tonic-gate  * Bind the sctp_t to a sockaddr, which includes an address and other
1720Sstevel@tonic-gate  * information, such as port or flowinfo.
1730Sstevel@tonic-gate  */
1740Sstevel@tonic-gate int
1750Sstevel@tonic-gate sctp_bind(sctp_t *sctp, struct sockaddr *sa, socklen_t len)
1760Sstevel@tonic-gate {
1770Sstevel@tonic-gate 	int		user_specified;
1780Sstevel@tonic-gate 	boolean_t	bind_to_req_port_only;
1790Sstevel@tonic-gate 	in_port_t	requested_port;
1800Sstevel@tonic-gate 	in_port_t	allocated_port;
1810Sstevel@tonic-gate 	int		err = 0;
18211042SErik.Nordmark@Sun.COM 	conn_t		*connp = sctp->sctp_connp;
18311042SErik.Nordmark@Sun.COM 	uint_t		scope_id;
18411042SErik.Nordmark@Sun.COM 	sin_t		*sin;
18511042SErik.Nordmark@Sun.COM 	sin6_t		*sin6;
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate 	ASSERT(sctp != NULL);
1880Sstevel@tonic-gate 
1890Sstevel@tonic-gate 	RUN_SCTP(sctp);
1900Sstevel@tonic-gate 
1918348SEric.Yu@Sun.COM 	if ((sctp->sctp_state >= SCTPS_BOUND) ||
1928348SEric.Yu@Sun.COM 	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING) ||
1938348SEric.Yu@Sun.COM 	    (sa == NULL || len == 0)) {
1948348SEric.Yu@Sun.COM 		/*
1958348SEric.Yu@Sun.COM 		 * Multiple binds not allowed for any SCTP socket
1968348SEric.Yu@Sun.COM 		 * Also binding with null address is not supported.
1978348SEric.Yu@Sun.COM 		 */
1980Sstevel@tonic-gate 		err = EINVAL;
1990Sstevel@tonic-gate 		goto done;
2000Sstevel@tonic-gate 	}
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	switch (sa->sa_family) {
2030Sstevel@tonic-gate 	case AF_INET:
20411042SErik.Nordmark@Sun.COM 		sin = (sin_t *)sa;
2050Sstevel@tonic-gate 		if (len < sizeof (struct sockaddr_in) ||
20611042SErik.Nordmark@Sun.COM 		    connp->conn_family == AF_INET6) {
20711042SErik.Nordmark@Sun.COM 			err = EINVAL;
20811042SErik.Nordmark@Sun.COM 			goto done;
20911042SErik.Nordmark@Sun.COM 		}
21011042SErik.Nordmark@Sun.COM 		requested_port = ntohs(sin->sin_port);
21111042SErik.Nordmark@Sun.COM 		break;
21211042SErik.Nordmark@Sun.COM 	case AF_INET6:
21311042SErik.Nordmark@Sun.COM 		sin6 = (sin6_t *)sa;
21411042SErik.Nordmark@Sun.COM 		if (len < sizeof (struct sockaddr_in6) ||
21511042SErik.Nordmark@Sun.COM 		    connp->conn_family == AF_INET) {
2160Sstevel@tonic-gate 			err = EINVAL;
2170Sstevel@tonic-gate 			goto done;
2180Sstevel@tonic-gate 		}
21911042SErik.Nordmark@Sun.COM 		requested_port = ntohs(sin6->sin6_port);
22011042SErik.Nordmark@Sun.COM 		/* Set the flowinfo. */
22111042SErik.Nordmark@Sun.COM 		connp->conn_flowinfo =
22211042SErik.Nordmark@Sun.COM 		    sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK;
22311042SErik.Nordmark@Sun.COM 
22411042SErik.Nordmark@Sun.COM 		scope_id = sin6->sin6_scope_id;
22511042SErik.Nordmark@Sun.COM 		if (scope_id != 0 && IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
22611042SErik.Nordmark@Sun.COM 			connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
22711042SErik.Nordmark@Sun.COM 			connp->conn_ixa->ixa_scopeid = scope_id;
22811042SErik.Nordmark@Sun.COM 			connp->conn_incoming_ifindex = scope_id;
22911042SErik.Nordmark@Sun.COM 		} else {
23011042SErik.Nordmark@Sun.COM 			connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
23111042SErik.Nordmark@Sun.COM 			connp->conn_incoming_ifindex = connp->conn_bound_if;
2320Sstevel@tonic-gate 		}
2330Sstevel@tonic-gate 		break;
2340Sstevel@tonic-gate 	default:
2350Sstevel@tonic-gate 		err = EAFNOSUPPORT;
2360Sstevel@tonic-gate 		goto done;
2370Sstevel@tonic-gate 	}
2380Sstevel@tonic-gate 	bind_to_req_port_only = requested_port == 0 ? B_FALSE : B_TRUE;
2390Sstevel@tonic-gate 
2401676Sjpk 	err = sctp_select_port(sctp, &requested_port, &user_specified);
2411676Sjpk 	if (err != 0)
2420Sstevel@tonic-gate 		goto done;
2430Sstevel@tonic-gate 
244852Svi117747 	if ((err = sctp_bind_add(sctp, sa, 1, B_TRUE,
245852Svi117747 	    user_specified == 1 ? htons(requested_port) : 0)) != 0) {
2460Sstevel@tonic-gate 		goto done;
247852Svi117747 	}
2481676Sjpk 	err = sctp_bindi(sctp, requested_port, bind_to_req_port_only,
2491676Sjpk 	    user_specified, &allocated_port);
2501676Sjpk 	if (err != 0) {
2510Sstevel@tonic-gate 		sctp_free_saddrs(sctp);
2521676Sjpk 	} else {
2531676Sjpk 		ASSERT(sctp->sctp_state == SCTPS_BOUND);
2540Sstevel@tonic-gate 	}
2550Sstevel@tonic-gate done:
2560Sstevel@tonic-gate 	WAKE_SCTP(sctp);
2570Sstevel@tonic-gate 	return (err);
2580Sstevel@tonic-gate }
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate /*
2610Sstevel@tonic-gate  * Perform bind/unbind operation of a list of addresses on a sctp_t
2620Sstevel@tonic-gate  */
2630Sstevel@tonic-gate int
2640Sstevel@tonic-gate sctp_bindx(sctp_t *sctp, const void *addrs, int addrcnt, int bindop)
2650Sstevel@tonic-gate {
2660Sstevel@tonic-gate 	ASSERT(sctp != NULL);
2670Sstevel@tonic-gate 	ASSERT(addrs != NULL);
2680Sstevel@tonic-gate 	ASSERT(addrcnt > 0);
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate 	switch (bindop) {
2710Sstevel@tonic-gate 	case SCTP_BINDX_ADD_ADDR:
272852Svi117747 		return (sctp_bind_add(sctp, addrs, addrcnt, B_FALSE,
27311042SErik.Nordmark@Sun.COM 		    sctp->sctp_connp->conn_lport));
2740Sstevel@tonic-gate 	case SCTP_BINDX_REM_ADDR:
2750Sstevel@tonic-gate 		return (sctp_bind_del(sctp, addrs, addrcnt, B_FALSE));
2760Sstevel@tonic-gate 	default:
2770Sstevel@tonic-gate 		return (EINVAL);
2780Sstevel@tonic-gate 	}
2790Sstevel@tonic-gate }
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate /*
2820Sstevel@tonic-gate  * Add a list of addresses to a sctp_t.
2830Sstevel@tonic-gate  */
2840Sstevel@tonic-gate int
2850Sstevel@tonic-gate sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt,
286852Svi117747     boolean_t caller_hold_lock, in_port_t port)
2870Sstevel@tonic-gate {
2880Sstevel@tonic-gate 	int		err = 0;
2890Sstevel@tonic-gate 	boolean_t	do_asconf = B_FALSE;
2903448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
29111042SErik.Nordmark@Sun.COM 	conn_t		*connp = sctp->sctp_connp;
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate 	if (!caller_hold_lock)
2940Sstevel@tonic-gate 		RUN_SCTP(sctp);
2950Sstevel@tonic-gate 
2964505Skcpoon 	if (sctp->sctp_state > SCTPS_ESTABLISHED ||
2974505Skcpoon 	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
2980Sstevel@tonic-gate 		if (!caller_hold_lock)
2990Sstevel@tonic-gate 			WAKE_SCTP(sctp);
3000Sstevel@tonic-gate 		return (EINVAL);
3010Sstevel@tonic-gate 	}
302252Svi117747 
303252Svi117747 	if (sctp->sctp_state > SCTPS_LISTEN) {
304252Svi117747 		/*
305252Svi117747 		 * Let's do some checking here rather than undoing the
306252Svi117747 		 * add later (for these reasons).
307252Svi117747 		 */
3083448Sdh155122 		if (!sctps->sctps_addip_enabled ||
3093448Sdh155122 		    !sctp->sctp_understands_asconf ||
310252Svi117747 		    !sctp->sctp_understands_addip) {
311252Svi117747 			if (!caller_hold_lock)
312252Svi117747 				WAKE_SCTP(sctp);
313252Svi117747 			return (EINVAL);
314252Svi117747 		}
3150Sstevel@tonic-gate 		do_asconf = B_TRUE;
316252Svi117747 	}
317852Svi117747 	/*
318852Svi117747 	 * On a clustered node, for an inaddr_any bind, we will pass the list
319852Svi117747 	 * of all the addresses in the global list, minus any address on the
320852Svi117747 	 * loopback interface, and expect the clustering susbsystem to give us
321852Svi117747 	 * the correct list for the 'port'. For explicit binds we give the
322852Svi117747 	 * list of addresses  and the clustering module validates it for the
323852Svi117747 	 * 'port'.
324852Svi117747 	 *
325852Svi117747 	 * On a non-clustered node, cl_sctp_check_addrs will be NULL and
326852Svi117747 	 * we proceed as usual.
327852Svi117747 	 */
328852Svi117747 	if (cl_sctp_check_addrs != NULL) {
329852Svi117747 		uchar_t		*addrlist = NULL;
330852Svi117747 		size_t		size = 0;
331852Svi117747 		int		unspec = 0;
332852Svi117747 		boolean_t	do_listen;
333852Svi117747 		uchar_t		*llist = NULL;
334852Svi117747 		size_t		lsize = 0;
335852Svi117747 
336852Svi117747 		/*
337852Svi117747 		 * If we are adding addresses after listening, but before
338852Svi117747 		 * an association is established, we need to update the
339852Svi117747 		 * clustering module with this info.
340852Svi117747 		 */
341852Svi117747 		do_listen = !do_asconf && sctp->sctp_state > SCTPS_BOUND &&
342852Svi117747 		    cl_sctp_listen != NULL;
343852Svi117747 
344852Svi117747 		err = sctp_get_addrlist(sctp, addrs, &addrcnt, &addrlist,
345852Svi117747 		    &unspec, &size);
346852Svi117747 		if (err != 0) {
347852Svi117747 			ASSERT(addrlist == NULL);
348852Svi117747 			ASSERT(addrcnt == 0);
349852Svi117747 			ASSERT(size == 0);
350852Svi117747 			if (!caller_hold_lock)
351852Svi117747 				WAKE_SCTP(sctp);
3523448Sdh155122 			SCTP_KSTAT(sctps, sctp_cl_check_addrs);
353852Svi117747 			return (err);
354852Svi117747 		}
355852Svi117747 		ASSERT(addrlist != NULL);
35611042SErik.Nordmark@Sun.COM 		(*cl_sctp_check_addrs)(connp->conn_family, port, &addrlist,
357852Svi117747 		    size, &addrcnt, unspec == 1);
358852Svi117747 		if (addrcnt == 0) {
359852Svi117747 			/* We free the list */
360852Svi117747 			kmem_free(addrlist, size);
361852Svi117747 			if (!caller_hold_lock)
362852Svi117747 				WAKE_SCTP(sctp);
363852Svi117747 			return (EINVAL);
364852Svi117747 		}
365852Svi117747 		if (do_listen) {
366852Svi117747 			lsize = sizeof (in6_addr_t) * addrcnt;
367852Svi117747 			llist = kmem_alloc(lsize, KM_SLEEP);
368852Svi117747 		}
369852Svi117747 		err = sctp_valid_addr_list(sctp, addrlist, addrcnt, llist,
370852Svi117747 		    lsize);
371852Svi117747 		if (err == 0 && do_listen) {
37211042SErik.Nordmark@Sun.COM 			(*cl_sctp_listen)(connp->conn_family, llist,
37311042SErik.Nordmark@Sun.COM 			    addrcnt, connp->conn_lport);
374852Svi117747 			/* list will be freed by the clustering module */
375852Svi117747 		} else if (err != 0 && llist != NULL) {
376852Svi117747 			kmem_free(llist, lsize);
377852Svi117747 		}
378852Svi117747 		/* free the list we allocated */
379852Svi117747 		kmem_free(addrlist, size);
380852Svi117747 	} else {
381852Svi117747 		err = sctp_valid_addr_list(sctp, addrs, addrcnt, NULL, 0);
382852Svi117747 	}
3830Sstevel@tonic-gate 	if (err != 0) {
3840Sstevel@tonic-gate 		if (!caller_hold_lock)
3850Sstevel@tonic-gate 			WAKE_SCTP(sctp);
3860Sstevel@tonic-gate 		return (err);
3870Sstevel@tonic-gate 	}
3880Sstevel@tonic-gate 	/* Need to send  ASCONF messages */
3890Sstevel@tonic-gate 	if (do_asconf) {
3900Sstevel@tonic-gate 		err = sctp_add_ip(sctp, addrs, addrcnt);
3910Sstevel@tonic-gate 		if (err != 0) {
3920Sstevel@tonic-gate 			sctp_del_saddr_list(sctp, addrs, addrcnt, B_FALSE);
3930Sstevel@tonic-gate 			if (!caller_hold_lock)
3940Sstevel@tonic-gate 				WAKE_SCTP(sctp);
3950Sstevel@tonic-gate 			return (err);
3960Sstevel@tonic-gate 		}
3970Sstevel@tonic-gate 	}
3980Sstevel@tonic-gate 	if (!caller_hold_lock)
3990Sstevel@tonic-gate 		WAKE_SCTP(sctp);
4000Sstevel@tonic-gate 	return (0);
4010Sstevel@tonic-gate }
4020Sstevel@tonic-gate 
4030Sstevel@tonic-gate /*
4040Sstevel@tonic-gate  * Remove one or more addresses bound to the sctp_t.
4050Sstevel@tonic-gate  */
4060Sstevel@tonic-gate int
4070Sstevel@tonic-gate sctp_bind_del(sctp_t *sctp, const void *addrs, uint32_t addrcnt,
4080Sstevel@tonic-gate     boolean_t caller_hold_lock)
4090Sstevel@tonic-gate {
4100Sstevel@tonic-gate 	int		error = 0;
4110Sstevel@tonic-gate 	boolean_t	do_asconf = B_FALSE;
412852Svi117747 	uchar_t		*ulist = NULL;
413852Svi117747 	size_t		usize = 0;
4143448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
41511042SErik.Nordmark@Sun.COM 	conn_t		*connp = sctp->sctp_connp;
4160Sstevel@tonic-gate 
4170Sstevel@tonic-gate 	if (!caller_hold_lock)
4180Sstevel@tonic-gate 		RUN_SCTP(sctp);
4190Sstevel@tonic-gate 
4204505Skcpoon 	if (sctp->sctp_state > SCTPS_ESTABLISHED ||
4214505Skcpoon 	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
4220Sstevel@tonic-gate 		if (!caller_hold_lock)
4230Sstevel@tonic-gate 			WAKE_SCTP(sctp);
4240Sstevel@tonic-gate 		return (EINVAL);
4250Sstevel@tonic-gate 	}
426252Svi117747 	/*
427252Svi117747 	 * Fail the remove if we are beyond listen, but can't send this
428252Svi117747 	 * to the peer.
429252Svi117747 	 */
430252Svi117747 	if (sctp->sctp_state > SCTPS_LISTEN) {
4313448Sdh155122 		if (!sctps->sctps_addip_enabled ||
4323448Sdh155122 		    !sctp->sctp_understands_asconf ||
433252Svi117747 		    !sctp->sctp_understands_addip) {
434252Svi117747 			if (!caller_hold_lock)
435252Svi117747 				WAKE_SCTP(sctp);
436252Svi117747 			return (EINVAL);
437252Svi117747 		}
4380Sstevel@tonic-gate 		do_asconf = B_TRUE;
439252Svi117747 	}
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 	/* Can't delete the last address nor all of the addresses */
4420Sstevel@tonic-gate 	if (sctp->sctp_nsaddrs == 1 || addrcnt >= sctp->sctp_nsaddrs) {
4430Sstevel@tonic-gate 		if (!caller_hold_lock)
4440Sstevel@tonic-gate 			WAKE_SCTP(sctp);
4450Sstevel@tonic-gate 		return (EINVAL);
4460Sstevel@tonic-gate 	}
4470Sstevel@tonic-gate 
448852Svi117747 	if (cl_sctp_unlisten != NULL && !do_asconf &&
449852Svi117747 	    sctp->sctp_state > SCTPS_BOUND) {
450852Svi117747 		usize = sizeof (in6_addr_t) * addrcnt;
451852Svi117747 		ulist = kmem_alloc(usize, KM_SLEEP);
452852Svi117747 	}
453852Svi117747 
454852Svi117747 	error = sctp_del_ip(sctp, addrs, addrcnt, ulist, usize);
455852Svi117747 	if (error != 0) {
456852Svi117747 		if (ulist != NULL)
457852Svi117747 			kmem_free(ulist, usize);
458852Svi117747 		if (!caller_hold_lock)
459852Svi117747 			WAKE_SCTP(sctp);
460852Svi117747 		return (error);
461852Svi117747 	}
462852Svi117747 	/* ulist will be non-NULL only if cl_sctp_unlisten is non-NULL */
463852Svi117747 	if (ulist != NULL) {
464852Svi117747 		ASSERT(cl_sctp_unlisten != NULL);
46511042SErik.Nordmark@Sun.COM 		(*cl_sctp_unlisten)(connp->conn_family, ulist, addrcnt,
46611042SErik.Nordmark@Sun.COM 		    connp->conn_lport);
467852Svi117747 		/* ulist will be freed by the clustering module */
468852Svi117747 	}
4690Sstevel@tonic-gate 	if (!caller_hold_lock)
4700Sstevel@tonic-gate 		WAKE_SCTP(sctp);
4710Sstevel@tonic-gate 	return (error);
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate /*
4751676Sjpk  * Returns 0 for success, errno value otherwise.
4761676Sjpk  *
4771676Sjpk  * If the "bind_to_req_port_only" parameter is set and the requested port
4781676Sjpk  * number is available, then set allocated_port to it.  If not available,
4791676Sjpk  * return an error.
4800Sstevel@tonic-gate  *
4811676Sjpk  * If the "bind_to_req_port_only" parameter is not set and the requested port
4821676Sjpk  * number is available, then set allocated_port to it.  If not available,
4831676Sjpk  * find the first anonymous port we can and set allocated_port to that.  If no
4841676Sjpk  * anonymous ports are available, return an error.
4850Sstevel@tonic-gate  *
4861676Sjpk  * In either case, when succeeding, update the sctp_t to record the port number
4870Sstevel@tonic-gate  * and insert it in the bind hash table.
4880Sstevel@tonic-gate  */
4891676Sjpk int
4901676Sjpk sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only,
4911676Sjpk     int user_specified, in_port_t *allocated_port)
4920Sstevel@tonic-gate {
4930Sstevel@tonic-gate 	/* number of times we have run around the loop */
4940Sstevel@tonic-gate 	int count = 0;
4950Sstevel@tonic-gate 	/* maximum number of times to run around the loop */
4960Sstevel@tonic-gate 	int loopmax;
4973448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
49811042SErik.Nordmark@Sun.COM 	conn_t		*connp = sctp->sctp_connp;
49911042SErik.Nordmark@Sun.COM 	zone_t *zone = crgetzone(connp->conn_cred);
50011042SErik.Nordmark@Sun.COM 	zoneid_t zoneid = connp->conn_zoneid;
5010Sstevel@tonic-gate 
5020Sstevel@tonic-gate 	/*
5030Sstevel@tonic-gate 	 * Lookup for free addresses is done in a loop and "loopmax"
5040Sstevel@tonic-gate 	 * influences how long we spin in the loop
5050Sstevel@tonic-gate 	 */
5060Sstevel@tonic-gate 	if (bind_to_req_port_only) {
5070Sstevel@tonic-gate 		/*
5080Sstevel@tonic-gate 		 * If the requested port is busy, don't bother to look
5090Sstevel@tonic-gate 		 * for a new one. Setting loop maximum count to 1 has
5100Sstevel@tonic-gate 		 * that effect.
5110Sstevel@tonic-gate 		 */
5120Sstevel@tonic-gate 		loopmax = 1;
5130Sstevel@tonic-gate 	} else {
5140Sstevel@tonic-gate 		/*
5150Sstevel@tonic-gate 		 * If the requested port is busy, look for a free one
5160Sstevel@tonic-gate 		 * in the anonymous port range.
5170Sstevel@tonic-gate 		 * Set loopmax appropriately so that one does not look
5180Sstevel@tonic-gate 		 * forever in the case all of the anonymous ports are in use.
5190Sstevel@tonic-gate 		 */
5203448Sdh155122 		loopmax = (sctps->sctps_largest_anon_port -
5213448Sdh155122 		    sctps->sctps_smallest_anon_port + 1);
5220Sstevel@tonic-gate 	}
5230Sstevel@tonic-gate 	do {
5240Sstevel@tonic-gate 		uint16_t	lport;
5250Sstevel@tonic-gate 		sctp_tf_t	*tbf;
5260Sstevel@tonic-gate 		sctp_t		*lsctp;
5270Sstevel@tonic-gate 		int		addrcmp;
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 		lport = htons(port);
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 		/*
5320Sstevel@tonic-gate 		 * Ensure that the sctp_t is not currently in the bind hash.
5330Sstevel@tonic-gate 		 * Hold the lock on the hash bucket to ensure that
5340Sstevel@tonic-gate 		 * the duplicate check plus the insertion is an atomic
5350Sstevel@tonic-gate 		 * operation.
5360Sstevel@tonic-gate 		 *
5370Sstevel@tonic-gate 		 * This function does an inline lookup on the bind hash list
5380Sstevel@tonic-gate 		 * Make sure that we access only members of sctp_t
5390Sstevel@tonic-gate 		 * and that we don't look at sctp_sctp, since we are not
5400Sstevel@tonic-gate 		 * doing a SCTPB_REFHOLD. For more details please see the notes
5410Sstevel@tonic-gate 		 * in sctp_compress()
5420Sstevel@tonic-gate 		 */
5430Sstevel@tonic-gate 		sctp_bind_hash_remove(sctp);
5443448Sdh155122 		tbf = &sctps->sctps_bind_fanout[SCTP_BIND_HASH(port)];
5450Sstevel@tonic-gate 		mutex_enter(&tbf->tf_lock);
5460Sstevel@tonic-gate 		for (lsctp = tbf->tf_sctp; lsctp != NULL;
5470Sstevel@tonic-gate 		    lsctp = lsctp->sctp_bind_hash) {
54811042SErik.Nordmark@Sun.COM 			conn_t *lconnp = lsctp->sctp_connp;
5490Sstevel@tonic-gate 
55011042SErik.Nordmark@Sun.COM 			if (lport != lconnp->conn_lport ||
5510Sstevel@tonic-gate 			    lsctp->sctp_state < SCTPS_BOUND)
5520Sstevel@tonic-gate 				continue;
5530Sstevel@tonic-gate 
5541676Sjpk 			/*
5551676Sjpk 			 * On a labeled system, we must treat bindings to ports
5561676Sjpk 			 * on shared IP addresses by sockets with MAC exemption
5571676Sjpk 			 * privilege as being in all zones, as there's
5581676Sjpk 			 * otherwise no way to identify the right receiver.
5591676Sjpk 			 */
56011042SErik.Nordmark@Sun.COM 			if (lconnp->conn_zoneid != zoneid &&
56111042SErik.Nordmark@Sun.COM 			    lconnp->conn_mac_mode == CONN_MAC_DEFAULT &&
56211042SErik.Nordmark@Sun.COM 			    connp->conn_mac_mode == CONN_MAC_DEFAULT)
5631676Sjpk 				continue;
5641676Sjpk 
5650Sstevel@tonic-gate 			addrcmp = sctp_compare_saddrs(sctp, lsctp);
5660Sstevel@tonic-gate 			if (addrcmp != SCTP_ADDR_DISJOINT) {
56711042SErik.Nordmark@Sun.COM 				if (!connp->conn_reuseaddr) {
5680Sstevel@tonic-gate 					/* in use */
5690Sstevel@tonic-gate 					break;
5700Sstevel@tonic-gate 				} else if (lsctp->sctp_state == SCTPS_BOUND ||
5710Sstevel@tonic-gate 				    lsctp->sctp_state == SCTPS_LISTEN) {
5720Sstevel@tonic-gate 					/*
5730Sstevel@tonic-gate 					 * socket option SO_REUSEADDR is set
5740Sstevel@tonic-gate 					 * on the binding sctp_t.
5750Sstevel@tonic-gate 					 *
5760Sstevel@tonic-gate 					 * We have found a match of IP source
5770Sstevel@tonic-gate 					 * address and source port, which is
5780Sstevel@tonic-gate 					 * refused regardless of the
5790Sstevel@tonic-gate 					 * SO_REUSEADDR setting, so we break.
5800Sstevel@tonic-gate 					 */
5810Sstevel@tonic-gate 					break;
5820Sstevel@tonic-gate 				}
5830Sstevel@tonic-gate 			}
5840Sstevel@tonic-gate 		}
5850Sstevel@tonic-gate 		if (lsctp != NULL) {
5860Sstevel@tonic-gate 			/* The port number is busy */
5870Sstevel@tonic-gate 			mutex_exit(&tbf->tf_lock);
5880Sstevel@tonic-gate 		} else {
5891676Sjpk 			if (is_system_labeled()) {
5901676Sjpk 				mlp_type_t addrtype, mlptype;
59111042SErik.Nordmark@Sun.COM 				uint_t ipversion;
5921676Sjpk 
5931676Sjpk 				/*
5941676Sjpk 				 * On a labeled system we must check the type
5951676Sjpk 				 * of the binding requested by the user (either
5961676Sjpk 				 * MLP or SLP on shared and private addresses),
5971676Sjpk 				 * and that the user's requested binding
5981676Sjpk 				 * is permitted.
5991676Sjpk 				 */
60011042SErik.Nordmark@Sun.COM 				if (connp->conn_family == AF_INET)
60111042SErik.Nordmark@Sun.COM 					ipversion = IPV4_VERSION;
60211042SErik.Nordmark@Sun.COM 				else
60311042SErik.Nordmark@Sun.COM 					ipversion = IPV6_VERSION;
60411042SErik.Nordmark@Sun.COM 
60510352Sdanmcd@sun.com 				addrtype = tsol_mlp_addr_type(
60610352Sdanmcd@sun.com 				    connp->conn_allzones ? ALL_ZONES :
60710352Sdanmcd@sun.com 				    zone->zone_id,
60811042SErik.Nordmark@Sun.COM 				    ipversion,
60911042SErik.Nordmark@Sun.COM 				    connp->conn_family == AF_INET ?
6101676Sjpk 				    (void *)&sctp->sctp_ipha->ipha_src :
6113448Sdh155122 				    (void *)&sctp->sctp_ip6h->ip6_src,
6123448Sdh155122 				    sctps->sctps_netstack->netstack_ip);
6131676Sjpk 
6141676Sjpk 				/*
6151676Sjpk 				 * tsol_mlp_addr_type returns the possibilities
6161676Sjpk 				 * for the selected address.  Since all local
6171676Sjpk 				 * addresses are either private or shared, the
6181676Sjpk 				 * return value mlptSingle means "local address
6191676Sjpk 				 * not valid (interface not present)."
6201676Sjpk 				 */
6211676Sjpk 				if (addrtype == mlptSingle) {
6221676Sjpk 					mutex_exit(&tbf->tf_lock);
6231676Sjpk 					return (EADDRNOTAVAIL);
6241676Sjpk 				}
6251676Sjpk 				mlptype = tsol_mlp_port_type(zone, IPPROTO_SCTP,
6261676Sjpk 				    port, addrtype);
6271676Sjpk 				if (mlptype != mlptSingle) {
6281676Sjpk 					if (secpolicy_net_bindmlp(connp->
6291676Sjpk 					    conn_cred) != 0) {
6301676Sjpk 						mutex_exit(&tbf->tf_lock);
6311676Sjpk 						return (EACCES);
6321676Sjpk 					}
6331676Sjpk 					/*
6341676Sjpk 					 * If we're binding a shared MLP, then
6351676Sjpk 					 * make sure that this zone is the one
6361676Sjpk 					 * that owns that MLP.  Shared MLPs can
6371676Sjpk 					 * be owned by at most one zone.
6383448Sdh155122 					 *
6393448Sdh155122 					 * No need to handle exclusive-stack
6403448Sdh155122 					 * zones since ALL_ZONES only applies
6413448Sdh155122 					 * to the shared stack.
6421676Sjpk 					 */
6431676Sjpk 
6441676Sjpk 					if (mlptype == mlptShared &&
6451676Sjpk 					    addrtype == mlptShared &&
6461676Sjpk 					    connp->conn_zoneid !=
6471676Sjpk 					    tsol_mlp_findzone(IPPROTO_SCTP,
6481676Sjpk 					    lport)) {
6491676Sjpk 						mutex_exit(&tbf->tf_lock);
6501676Sjpk 						return (EACCES);
6511676Sjpk 					}
6521676Sjpk 					connp->conn_mlp_type = mlptype;
6531676Sjpk 				}
6541676Sjpk 			}
6550Sstevel@tonic-gate 			/*
6560Sstevel@tonic-gate 			 * This port is ours. Insert in fanout and mark as
6570Sstevel@tonic-gate 			 * bound to prevent others from getting the port
6580Sstevel@tonic-gate 			 * number.
6590Sstevel@tonic-gate 			 */
6600Sstevel@tonic-gate 			sctp->sctp_state = SCTPS_BOUND;
66111042SErik.Nordmark@Sun.COM 			connp->conn_lport = lport;
6620Sstevel@tonic-gate 
6633448Sdh155122 			ASSERT(&sctps->sctps_bind_fanout[
6644505Skcpoon 			    SCTP_BIND_HASH(port)] == tbf);
6650Sstevel@tonic-gate 			sctp_bind_hash_insert(tbf, sctp, 1);
6660Sstevel@tonic-gate 
6670Sstevel@tonic-gate 			mutex_exit(&tbf->tf_lock);
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate 			/*
6700Sstevel@tonic-gate 			 * We don't want sctp_next_port_to_try to "inherit"
6710Sstevel@tonic-gate 			 * a port number supplied by the user in a bind.
6721676Sjpk 			 *
6730Sstevel@tonic-gate 			 * This is the only place where sctp_next_port_to_try
6740Sstevel@tonic-gate 			 * is updated. After the update, it may or may not
6750Sstevel@tonic-gate 			 * be in the valid range.
6760Sstevel@tonic-gate 			 */
6771676Sjpk 			if (user_specified == 0)
6783448Sdh155122 				sctps->sctps_next_port_to_try = port + 1;
6791676Sjpk 
6801676Sjpk 			*allocated_port = port;
6811676Sjpk 
6821676Sjpk 			return (0);
6830Sstevel@tonic-gate 		}
6840Sstevel@tonic-gate 
6850Sstevel@tonic-gate 		if ((count == 0) && (user_specified)) {
6860Sstevel@tonic-gate 			/*
6870Sstevel@tonic-gate 			 * We may have to return an anonymous port. So
6880Sstevel@tonic-gate 			 * get one to start with.
6890Sstevel@tonic-gate 			 */
6903448Sdh155122 			port = sctp_update_next_port(
6913448Sdh155122 			    sctps->sctps_next_port_to_try,
6923448Sdh155122 			    zone, sctps);
6930Sstevel@tonic-gate 			user_specified = 0;
6940Sstevel@tonic-gate 		} else {
6953448Sdh155122 			port = sctp_update_next_port(port + 1, zone, sctps);
6960Sstevel@tonic-gate 		}
6971676Sjpk 		if (port == 0)
6981676Sjpk 			break;
6990Sstevel@tonic-gate 
7000Sstevel@tonic-gate 		/*
7010Sstevel@tonic-gate 		 * Don't let this loop run forever in the case where
7020Sstevel@tonic-gate 		 * all of the anonymous ports are in use.
7030Sstevel@tonic-gate 		 */
7040Sstevel@tonic-gate 	} while (++count < loopmax);
7051676Sjpk 
7061676Sjpk 	return (bind_to_req_port_only ? EADDRINUSE : EADDRNOTAVAIL);
7070Sstevel@tonic-gate }
7080Sstevel@tonic-gate 
7090Sstevel@tonic-gate /*
7100Sstevel@tonic-gate  * Don't let port fall into the privileged range.
7110Sstevel@tonic-gate  * Since the extra privileged ports can be arbitrary we also
7120Sstevel@tonic-gate  * ensure that we exclude those from consideration.
7130Sstevel@tonic-gate  * sctp_g_epriv_ports is not sorted thus we loop over it until
7140Sstevel@tonic-gate  * there are no changes.
7150Sstevel@tonic-gate  *
7160Sstevel@tonic-gate  * Note: No locks are held when inspecting sctp_g_*epriv_ports
7170Sstevel@tonic-gate  * but instead the code relies on:
7180Sstevel@tonic-gate  * - the fact that the address of the array and its size never changes
7190Sstevel@tonic-gate  * - the atomic assignment of the elements of the array
7200Sstevel@tonic-gate  */
7210Sstevel@tonic-gate in_port_t
7223448Sdh155122 sctp_update_next_port(in_port_t port, zone_t *zone, sctp_stack_t *sctps)
7230Sstevel@tonic-gate {
7240Sstevel@tonic-gate 	int i;
7251676Sjpk 	boolean_t restart = B_FALSE;
7260Sstevel@tonic-gate 
7270Sstevel@tonic-gate retry:
7283448Sdh155122 	if (port < sctps->sctps_smallest_anon_port)
7293448Sdh155122 		port = sctps->sctps_smallest_anon_port;
7300Sstevel@tonic-gate 
7313448Sdh155122 	if (port > sctps->sctps_largest_anon_port) {
7321676Sjpk 		if (restart)
7331676Sjpk 			return (0);
7341676Sjpk 		restart = B_TRUE;
7353448Sdh155122 		port = sctps->sctps_smallest_anon_port;
7361676Sjpk 	}
7371676Sjpk 
7383448Sdh155122 	if (port < sctps->sctps_smallest_nonpriv_port)
7393448Sdh155122 		port = sctps->sctps_smallest_nonpriv_port;
7400Sstevel@tonic-gate 
7413448Sdh155122 	for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) {
7423448Sdh155122 		if (port == sctps->sctps_g_epriv_ports[i]) {
7430Sstevel@tonic-gate 			port++;
7440Sstevel@tonic-gate 			/*
7450Sstevel@tonic-gate 			 * Make sure whether the port is in the
7460Sstevel@tonic-gate 			 * valid range.
7470Sstevel@tonic-gate 			 *
7480Sstevel@tonic-gate 			 * XXX Note that if sctp_g_epriv_ports contains
7490Sstevel@tonic-gate 			 * all the anonymous ports this will be an
7500Sstevel@tonic-gate 			 * infinite loop.
7510Sstevel@tonic-gate 			 */
7520Sstevel@tonic-gate 			goto retry;
7530Sstevel@tonic-gate 		}
7540Sstevel@tonic-gate 	}
7551676Sjpk 
7561676Sjpk 	if (is_system_labeled() &&
7571676Sjpk 	    (i = tsol_next_port(zone, port, IPPROTO_SCTP, B_TRUE)) != 0) {
7581676Sjpk 		port = i;
7591676Sjpk 		goto retry;
7601676Sjpk 	}
7611676Sjpk 
7620Sstevel@tonic-gate 	return (port);
7630Sstevel@tonic-gate }
764