10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51676Sjpk  * Common Development and Distribution License (the "License").
61676Sjpk  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211735Skcpoon 
220Sstevel@tonic-gate /*
238778SErik.Nordmark@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/systm.h>
290Sstevel@tonic-gate #include <sys/stream.h>
300Sstevel@tonic-gate #include <sys/cmn_err.h>
310Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
320Sstevel@tonic-gate #include <sys/tihdr.h>
330Sstevel@tonic-gate #include <sys/socket.h>
340Sstevel@tonic-gate #include <sys/stropts.h>
350Sstevel@tonic-gate #include <sys/strsun.h>
360Sstevel@tonic-gate #include <sys/strsubr.h>
370Sstevel@tonic-gate #include <sys/socketvar.h>
380Sstevel@tonic-gate #include <inet/common.h>
390Sstevel@tonic-gate #include <inet/mi.h>
400Sstevel@tonic-gate #include <inet/ip.h>
4111042SErik.Nordmark@Sun.COM #include <inet/ip_ire.h>
420Sstevel@tonic-gate #include <inet/ip6.h>
430Sstevel@tonic-gate #include <inet/sctp_ip.h>
440Sstevel@tonic-gate #include <inet/ipclassifier.h>
450Sstevel@tonic-gate 
460Sstevel@tonic-gate /*
470Sstevel@tonic-gate  * PR-SCTP comments.
480Sstevel@tonic-gate  *
490Sstevel@tonic-gate  * A message can expire before it gets to the transmit list (i.e. it is still
500Sstevel@tonic-gate  * in the unsent list - unchunked), after it gets to the transmit list, but
510Sstevel@tonic-gate  * before transmission has actually started, or after transmission has begun.
520Sstevel@tonic-gate  * Accordingly, we check for the status of a message in sctp_chunkify() when
530Sstevel@tonic-gate  * the message is being transferred from the unsent list to the transmit list;
540Sstevel@tonic-gate  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
550Sstevel@tonic-gate  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
560Sstevel@tonic-gate  * When we nuke a message in sctp_chunkify(), all we need to do is take it
570Sstevel@tonic-gate  * out of the unsent list and update sctp_unsent; when a message is deemed
580Sstevel@tonic-gate  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
590Sstevel@tonic-gate  * list, update sctp_unsent IFF transmission for the message has not yet begun
600Sstevel@tonic-gate  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
610Sstevel@tonic-gate  * message has started, then we cannot just take it out of the list, we need
620Sstevel@tonic-gate  * to send Forward TSN chunk to the peer so that the peer can clear its
630Sstevel@tonic-gate  * fragment list for this message. However, we cannot just send the Forward
640Sstevel@tonic-gate  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
650Sstevel@tonic-gate  * messages preceeding this abandoned message. So, we send a Forward TSN
660Sstevel@tonic-gate  * IFF all messages prior to this abandoned message has been SACKd, if not
670Sstevel@tonic-gate  * we defer sending the Forward TSN to sctp_cumack(), which will check for
680Sstevel@tonic-gate  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
690Sstevel@tonic-gate  * sctp_rexmit() when we check for retransmissions, we need to determine if
700Sstevel@tonic-gate  * the advanced peer ack point can be moved ahead, and if so, send a Forward
710Sstevel@tonic-gate  * TSN to the peer instead of retransmitting the chunk. Note that when
720Sstevel@tonic-gate  * we send a Forward TSN for a message, there may be yet unsent chunks for
730Sstevel@tonic-gate  * this message; we need to mark all such chunks as abandoned, so that
740Sstevel@tonic-gate  * sctp_cumack() can take the message out of the transmit list, additionally
750Sstevel@tonic-gate  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
760Sstevel@tonic-gate  * decremented when a message/chunk is deemed abandoned), sockfs needs to
770Sstevel@tonic-gate  * be notified so that it can adjust its idea of the queued message.
780Sstevel@tonic-gate  */
790Sstevel@tonic-gate 
800Sstevel@tonic-gate #include "sctp_impl.h"
810Sstevel@tonic-gate 
820Sstevel@tonic-gate static struct kmem_cache	*sctp_kmem_ftsn_set_cache;
8310037SNick.Street@Sun.COM static mblk_t			*sctp_chunkify(sctp_t *, int, int, int);
840Sstevel@tonic-gate 
850Sstevel@tonic-gate #ifdef	DEBUG
860Sstevel@tonic-gate static boolean_t	sctp_verify_chain(mblk_t *, mblk_t *);
870Sstevel@tonic-gate #endif
880Sstevel@tonic-gate 
890Sstevel@tonic-gate /*
900Sstevel@tonic-gate  * Called to allocate a header mblk when sending data to SCTP.
910Sstevel@tonic-gate  * Data will follow in b_cont of this mblk.
920Sstevel@tonic-gate  */
930Sstevel@tonic-gate mblk_t *
940Sstevel@tonic-gate sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
950Sstevel@tonic-gate     int flags)
960Sstevel@tonic-gate {
970Sstevel@tonic-gate 	mblk_t *mp;
980Sstevel@tonic-gate 	struct T_unitdata_req *tudr;
990Sstevel@tonic-gate 	size_t size;
1000Sstevel@tonic-gate 	int error;
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate 	size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
1030Sstevel@tonic-gate 	size = MAX(size, sizeof (sctp_msg_hdr_t));
1040Sstevel@tonic-gate 	if (flags & SCTP_CAN_BLOCK) {
1050Sstevel@tonic-gate 		mp = allocb_wait(size, BPRI_MED, 0, &error);
1060Sstevel@tonic-gate 	} else {
1070Sstevel@tonic-gate 		mp = allocb(size, BPRI_MED);
1080Sstevel@tonic-gate 	}
1090Sstevel@tonic-gate 	if (mp) {
1100Sstevel@tonic-gate 		tudr = (struct T_unitdata_req *)mp->b_rptr;
1110Sstevel@tonic-gate 		tudr->PRIM_type = T_UNITDATA_REQ;
1120Sstevel@tonic-gate 		tudr->DEST_length = nlen;
1130Sstevel@tonic-gate 		tudr->DEST_offset = sizeof (*tudr);
1140Sstevel@tonic-gate 		tudr->OPT_length = clen;
1150Sstevel@tonic-gate 		tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
1160Sstevel@tonic-gate 		    _TPI_ALIGN_TOPT(nlen));
1170Sstevel@tonic-gate 		if (nlen > 0)
1180Sstevel@tonic-gate 			bcopy(name, tudr + 1, nlen);
1190Sstevel@tonic-gate 		if (clen > 0)
1200Sstevel@tonic-gate 			bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
1210Sstevel@tonic-gate 		mp->b_wptr += (tudr ->OPT_offset + clen);
1220Sstevel@tonic-gate 		mp->b_datap->db_type = M_PROTO;
1230Sstevel@tonic-gate 	}
1240Sstevel@tonic-gate 	return (mp);
1250Sstevel@tonic-gate }
1260Sstevel@tonic-gate 
1270Sstevel@tonic-gate /*ARGSUSED2*/
1280Sstevel@tonic-gate int
1290Sstevel@tonic-gate sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
1300Sstevel@tonic-gate {
1310Sstevel@tonic-gate 	sctp_faddr_t	*fp = NULL;
1320Sstevel@tonic-gate 	struct T_unitdata_req	*tudr;
1330Sstevel@tonic-gate 	int		error = 0;
1340Sstevel@tonic-gate 	mblk_t		*mproto = mp;
1350Sstevel@tonic-gate 	in6_addr_t	*addr;
1360Sstevel@tonic-gate 	in6_addr_t	tmpaddr;
1370Sstevel@tonic-gate 	uint16_t	sid = sctp->sctp_def_stream;
1380Sstevel@tonic-gate 	uint32_t	ppid = sctp->sctp_def_ppid;
1390Sstevel@tonic-gate 	uint32_t	context = sctp->sctp_def_context;
1400Sstevel@tonic-gate 	uint16_t	msg_flags = sctp->sctp_def_flags;
1410Sstevel@tonic-gate 	sctp_msg_hdr_t	*sctp_msg_hdr;
1420Sstevel@tonic-gate 	uint32_t	msg_len = 0;
1430Sstevel@tonic-gate 	uint32_t	timetolive = sctp->sctp_def_timetolive;
14411042SErik.Nordmark@Sun.COM 	conn_t		*connp = sctp->sctp_connp;
1450Sstevel@tonic-gate 
1460Sstevel@tonic-gate 	ASSERT(DB_TYPE(mproto) == M_PROTO);
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate 	mp = mp->b_cont;
1490Sstevel@tonic-gate 	ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate 	tudr = (struct T_unitdata_req *)mproto->b_rptr;
1520Sstevel@tonic-gate 	ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	/* Get destination address, if specified */
1550Sstevel@tonic-gate 	if (tudr->DEST_length > 0) {
1560Sstevel@tonic-gate 		sin_t *sin;
1570Sstevel@tonic-gate 		sin6_t *sin6;
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 		sin = (struct sockaddr_in *)
1600Sstevel@tonic-gate 		    (mproto->b_rptr + tudr->DEST_offset);
1610Sstevel@tonic-gate 		switch (sin->sin_family) {
1620Sstevel@tonic-gate 		case AF_INET:
1630Sstevel@tonic-gate 			if (tudr->DEST_length < sizeof (*sin)) {
1640Sstevel@tonic-gate 				return (EINVAL);
1650Sstevel@tonic-gate 			}
1660Sstevel@tonic-gate 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
1670Sstevel@tonic-gate 			addr = &tmpaddr;
1680Sstevel@tonic-gate 			break;
1690Sstevel@tonic-gate 		case AF_INET6:
1700Sstevel@tonic-gate 			if (tudr->DEST_length < sizeof (*sin6)) {
1710Sstevel@tonic-gate 				return (EINVAL);
1720Sstevel@tonic-gate 			}
1730Sstevel@tonic-gate 			sin6 = (struct sockaddr_in6 *)
1740Sstevel@tonic-gate 			    (mproto->b_rptr + tudr->DEST_offset);
1750Sstevel@tonic-gate 			addr = &sin6->sin6_addr;
1760Sstevel@tonic-gate 			break;
1770Sstevel@tonic-gate 		default:
1780Sstevel@tonic-gate 			return (EAFNOSUPPORT);
1790Sstevel@tonic-gate 		}
1800Sstevel@tonic-gate 		fp = sctp_lookup_faddr(sctp, addr);
1810Sstevel@tonic-gate 		if (fp == NULL) {
1820Sstevel@tonic-gate 			return (EINVAL);
1830Sstevel@tonic-gate 		}
1840Sstevel@tonic-gate 	}
1850Sstevel@tonic-gate 	/* Ancillary Data? */
1860Sstevel@tonic-gate 	if (tudr->OPT_length > 0) {
1870Sstevel@tonic-gate 		struct cmsghdr		*cmsg;
1880Sstevel@tonic-gate 		char			*cend;
1890Sstevel@tonic-gate 		struct sctp_sndrcvinfo	*sndrcv;
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate 		cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
1920Sstevel@tonic-gate 		cend = ((char *)cmsg + tudr->OPT_length);
1930Sstevel@tonic-gate 		ASSERT(cend <= (char *)mproto->b_wptr);
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate 		for (;;) {
1960Sstevel@tonic-gate 			if ((char *)(cmsg + 1) > cend ||
1970Sstevel@tonic-gate 			    ((char *)cmsg + cmsg->cmsg_len) > cend) {
1980Sstevel@tonic-gate 				break;
1990Sstevel@tonic-gate 			}
2000Sstevel@tonic-gate 			if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
2010Sstevel@tonic-gate 			    (cmsg->cmsg_type == SCTP_SNDRCV)) {
2020Sstevel@tonic-gate 				if (cmsg->cmsg_len <
2030Sstevel@tonic-gate 				    (sizeof (*sndrcv) + sizeof (*cmsg))) {
2040Sstevel@tonic-gate 					return (EINVAL);
2050Sstevel@tonic-gate 				}
2060Sstevel@tonic-gate 				sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
2070Sstevel@tonic-gate 				sid = sndrcv->sinfo_stream;
2080Sstevel@tonic-gate 				msg_flags = sndrcv->sinfo_flags;
2090Sstevel@tonic-gate 				ppid = sndrcv->sinfo_ppid;
2100Sstevel@tonic-gate 				context = sndrcv->sinfo_context;
2110Sstevel@tonic-gate 				timetolive = sndrcv->sinfo_timetolive;
2120Sstevel@tonic-gate 				break;
2130Sstevel@tonic-gate 			}
2140Sstevel@tonic-gate 			if (cmsg->cmsg_len > 0)
2150Sstevel@tonic-gate 				cmsg = CMSG_NEXT(cmsg);
2160Sstevel@tonic-gate 			else
2170Sstevel@tonic-gate 				break;
2180Sstevel@tonic-gate 		}
2190Sstevel@tonic-gate 	}
2200Sstevel@tonic-gate 	if (msg_flags & MSG_ABORT) {
2210Sstevel@tonic-gate 		if (mp && mp->b_cont) {
2220Sstevel@tonic-gate 			mblk_t *pump = msgpullup(mp, -1);
2230Sstevel@tonic-gate 			if (!pump) {
2240Sstevel@tonic-gate 				return (ENOMEM);
2250Sstevel@tonic-gate 			}
2260Sstevel@tonic-gate 			freemsg(mp);
2270Sstevel@tonic-gate 			mp = pump;
2280Sstevel@tonic-gate 			mproto->b_cont = mp;
2290Sstevel@tonic-gate 		}
2300Sstevel@tonic-gate 		RUN_SCTP(sctp);
2317480SKacheong.Poon@Sun.COM 		sctp_user_abort(sctp, mp);
2320Sstevel@tonic-gate 		freemsg(mproto);
23311042SErik.Nordmark@Sun.COM 		goto done2;
2340Sstevel@tonic-gate 	}
2350Sstevel@tonic-gate 	if (mp == NULL)
2360Sstevel@tonic-gate 		goto done;
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate 	RUN_SCTP(sctp);
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate 	/* Reject any new data requests if we are shutting down */
2414505Skcpoon 	if (sctp->sctp_state > SCTPS_ESTABLISHED ||
2424505Skcpoon 	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
2430Sstevel@tonic-gate 		error = EPIPE;
2440Sstevel@tonic-gate 		goto unlock_done;
2450Sstevel@tonic-gate 	}
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate 	/* Re-use the mproto to store relevant info. */
2480Sstevel@tonic-gate 	ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate 	mproto->b_rptr = mproto->b_datap->db_base;
2510Sstevel@tonic-gate 	mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
2540Sstevel@tonic-gate 	bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
2550Sstevel@tonic-gate 	sctp_msg_hdr->smh_context = context;
2560Sstevel@tonic-gate 	sctp_msg_hdr->smh_sid = sid;
2570Sstevel@tonic-gate 	sctp_msg_hdr->smh_ppid = ppid;
2580Sstevel@tonic-gate 	sctp_msg_hdr->smh_flags = msg_flags;
2590Sstevel@tonic-gate 	sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
260*11066Srafael.vanoni@sun.com 	sctp_msg_hdr->smh_tob = ddi_get_lbolt64();
2610Sstevel@tonic-gate 	for (; mp != NULL; mp = mp->b_cont)
2620Sstevel@tonic-gate 		msg_len += MBLKL(mp);
2630Sstevel@tonic-gate 	sctp_msg_hdr->smh_msglen = msg_len;
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate 	/* User requested specific destination */
2660Sstevel@tonic-gate 	SCTP_SET_CHUNK_DEST(mproto, fp);
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate 	if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
2690Sstevel@tonic-gate 	    sid >= sctp->sctp_num_ostr) {
2700Sstevel@tonic-gate 		/* Send sendfail event */
2710Sstevel@tonic-gate 		sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
2720Sstevel@tonic-gate 		    B_FALSE);
2730Sstevel@tonic-gate 		error = EINVAL;
2740Sstevel@tonic-gate 		goto unlock_done;
2750Sstevel@tonic-gate 	}
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 	/* no data */
2780Sstevel@tonic-gate 	if (msg_len == 0) {
2790Sstevel@tonic-gate 		sctp_sendfail_event(sctp, dupmsg(mproto),
2800Sstevel@tonic-gate 		    SCTP_ERR_NO_USR_DATA, B_FALSE);
2810Sstevel@tonic-gate 		error = EINVAL;
2820Sstevel@tonic-gate 		goto unlock_done;
2830Sstevel@tonic-gate 	}
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 	/* Add it to the unsent list */
2860Sstevel@tonic-gate 	if (sctp->sctp_xmit_unsent == NULL) {
2870Sstevel@tonic-gate 		sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
2880Sstevel@tonic-gate 	} else {
2890Sstevel@tonic-gate 		sctp->sctp_xmit_unsent_tail->b_next = mproto;
2900Sstevel@tonic-gate 		sctp->sctp_xmit_unsent_tail = mproto;
2910Sstevel@tonic-gate 	}
2920Sstevel@tonic-gate 	sctp->sctp_unsent += msg_len;
2930Sstevel@tonic-gate 	BUMP_LOCAL(sctp->sctp_msgcount);
2948348SEric.Yu@Sun.COM 	/*
2958348SEric.Yu@Sun.COM 	 * Notify sockfs if the tx queue is full.
2968348SEric.Yu@Sun.COM 	 */
29711042SErik.Nordmark@Sun.COM 	if (SCTP_TXQ_LEN(sctp) >= connp->conn_sndbuf) {
2988348SEric.Yu@Sun.COM 		sctp->sctp_txq_full = 1;
2998348SEric.Yu@Sun.COM 		sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE);
3008348SEric.Yu@Sun.COM 	}
3010Sstevel@tonic-gate 	if (sctp->sctp_state == SCTPS_ESTABLISHED)
3023795Skcpoon 		sctp_output(sctp, UINT_MAX);
30311042SErik.Nordmark@Sun.COM done2:
3040Sstevel@tonic-gate 	WAKE_SCTP(sctp);
3050Sstevel@tonic-gate 	return (0);
3060Sstevel@tonic-gate unlock_done:
3070Sstevel@tonic-gate 	WAKE_SCTP(sctp);
3080Sstevel@tonic-gate done:
3090Sstevel@tonic-gate 	return (error);
3100Sstevel@tonic-gate }
3110Sstevel@tonic-gate 
31210037SNick.Street@Sun.COM /*
31310037SNick.Street@Sun.COM  * While there are messages on sctp_xmit_unsent, detach each one. For each:
31410037SNick.Street@Sun.COM  * allocate space for the chunk header, fill in the data chunk, and fill in
31510037SNick.Street@Sun.COM  * the chunk header. Then append it to sctp_xmit_tail.
31610037SNick.Street@Sun.COM  * Return after appending as many bytes as required (bytes_to_send).
31710037SNick.Street@Sun.COM  * We also return if we've appended one or more chunks, and find a subsequent
31810037SNick.Street@Sun.COM  * unsent message is too big to fit in the segment.
31910037SNick.Street@Sun.COM  */
32010037SNick.Street@Sun.COM mblk_t *
32110037SNick.Street@Sun.COM sctp_chunkify(sctp_t *sctp, int mss, int firstseg_len, int bytes_to_send)
3220Sstevel@tonic-gate {
3230Sstevel@tonic-gate 	mblk_t			*mp;
3240Sstevel@tonic-gate 	mblk_t			*chunk_mp;
3250Sstevel@tonic-gate 	mblk_t			*chunk_head;
3260Sstevel@tonic-gate 	mblk_t			*chunk_hdr;
3270Sstevel@tonic-gate 	mblk_t			*chunk_tail = NULL;
3280Sstevel@tonic-gate 	int			count;
3290Sstevel@tonic-gate 	int			chunksize;
3300Sstevel@tonic-gate 	sctp_data_hdr_t		*sdc;
3310Sstevel@tonic-gate 	mblk_t			*mdblk = sctp->sctp_xmit_unsent;
3320Sstevel@tonic-gate 	sctp_faddr_t		*fp;
3330Sstevel@tonic-gate 	sctp_faddr_t		*fp1;
3340Sstevel@tonic-gate 	size_t			xtralen;
3350Sstevel@tonic-gate 	sctp_msg_hdr_t		*msg_hdr;
33610037SNick.Street@Sun.COM 	sctp_stack_t		*sctps = sctp->sctp_sctps;
33710037SNick.Street@Sun.COM 	sctp_msg_hdr_t		*next_msg_hdr;
33810037SNick.Street@Sun.COM 	size_t			nextlen;
33910037SNick.Street@Sun.COM 	int			remaining_len = mss - firstseg_len;
34010037SNick.Street@Sun.COM 
34110037SNick.Street@Sun.COM 	ASSERT(remaining_len >= 0);
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 	fp = SCTP_CHUNK_DEST(mdblk);
3440Sstevel@tonic-gate 	if (fp == NULL)
3450Sstevel@tonic-gate 		fp = sctp->sctp_current;
3460Sstevel@tonic-gate 	if (fp->isv4)
3473448Sdh155122 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra +
3483448Sdh155122 		    sizeof (*sdc);
3490Sstevel@tonic-gate 	else
3503448Sdh155122 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra +
3513448Sdh155122 		    sizeof (*sdc);
35210037SNick.Street@Sun.COM 	count = chunksize = remaining_len - sizeof (*sdc);
3530Sstevel@tonic-gate nextmsg:
35410037SNick.Street@Sun.COM 	next_msg_hdr = (sctp_msg_hdr_t *)sctp->sctp_xmit_unsent->b_rptr;
35510037SNick.Street@Sun.COM 	nextlen = next_msg_hdr->smh_msglen;
35610037SNick.Street@Sun.COM 	/*
35710037SNick.Street@Sun.COM 	 * Will the entire next message fit in the current packet ?
35810037SNick.Street@Sun.COM 	 * if not, leave it on the unsent list.
35910037SNick.Street@Sun.COM 	 */
36010037SNick.Street@Sun.COM 	if ((firstseg_len != 0) && (nextlen > remaining_len))
36110037SNick.Street@Sun.COM 		return (NULL);
36210037SNick.Street@Sun.COM 
3630Sstevel@tonic-gate 	chunk_mp = mdblk->b_cont;
3640Sstevel@tonic-gate 
3650Sstevel@tonic-gate 	/*
36610037SNick.Street@Sun.COM 	 * If this partially chunked, we ignore the next one for now and
36710037SNick.Street@Sun.COM 	 * use the one already present. For the unchunked bits, we use the
36810037SNick.Street@Sun.COM 	 * length of the last chunk.
3690Sstevel@tonic-gate 	 */
3700Sstevel@tonic-gate 	if (SCTP_IS_MSG_CHUNKED(mdblk)) {
3710Sstevel@tonic-gate 		int	chunk_len;
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate 		ASSERT(chunk_mp->b_next != NULL);
3740Sstevel@tonic-gate 		mdblk->b_cont = chunk_mp->b_next;
3750Sstevel@tonic-gate 		chunk_mp->b_next = NULL;
3760Sstevel@tonic-gate 		SCTP_MSG_CLEAR_CHUNKED(mdblk);
3770Sstevel@tonic-gate 		mp = mdblk->b_cont;
3780Sstevel@tonic-gate 		while (mp->b_next != NULL)
3790Sstevel@tonic-gate 			mp = mp->b_next;
3800Sstevel@tonic-gate 		chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
3810Sstevel@tonic-gate 		if (fp->sfa_pmss - chunk_len > sizeof (*sdc))
3820Sstevel@tonic-gate 			count = chunksize = fp->sfa_pmss - chunk_len;
3830Sstevel@tonic-gate 		else
3840Sstevel@tonic-gate 			count = chunksize = fp->sfa_pmss;
3850Sstevel@tonic-gate 		count = chunksize = count - sizeof (*sdc);
3860Sstevel@tonic-gate 	} else {
3870Sstevel@tonic-gate 		msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
3880Sstevel@tonic-gate 		if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
3890Sstevel@tonic-gate 			sctp->sctp_xmit_unsent = mdblk->b_next;
3900Sstevel@tonic-gate 			if (sctp->sctp_xmit_unsent == NULL)
3910Sstevel@tonic-gate 				sctp->sctp_xmit_unsent_tail = NULL;
3920Sstevel@tonic-gate 			ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
3930Sstevel@tonic-gate 			sctp->sctp_unsent -= msg_hdr->smh_msglen;
3940Sstevel@tonic-gate 			mdblk->b_next = NULL;
3950Sstevel@tonic-gate 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
3960Sstevel@tonic-gate 			/*
3970Sstevel@tonic-gate 			 * Update ULP the amount of queued data, which is
3980Sstevel@tonic-gate 			 * sent-unack'ed + unsent.
3990Sstevel@tonic-gate 			 */
4008348SEric.Yu@Sun.COM 			if (!SCTP_IS_DETACHED(sctp))
4018348SEric.Yu@Sun.COM 				SCTP_TXQ_UPDATE(sctp);
4020Sstevel@tonic-gate 			sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
4030Sstevel@tonic-gate 			goto try_next;
4040Sstevel@tonic-gate 		}
4050Sstevel@tonic-gate 		mdblk->b_cont = NULL;
4060Sstevel@tonic-gate 	}
4070Sstevel@tonic-gate 	msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
4080Sstevel@tonic-gate nextchunk:
4090Sstevel@tonic-gate 	chunk_head = chunk_mp;
4100Sstevel@tonic-gate 	chunk_tail = NULL;
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 	/* Skip as many mblk's as we need */
4130Sstevel@tonic-gate 	while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
4140Sstevel@tonic-gate 		count -= MBLKL(chunk_mp);
4150Sstevel@tonic-gate 		chunk_tail = chunk_mp;
4160Sstevel@tonic-gate 		chunk_mp = chunk_mp->b_cont;
4170Sstevel@tonic-gate 	}
4180Sstevel@tonic-gate 	/* Split the chain, if needed */
4190Sstevel@tonic-gate 	if (chunk_mp != NULL) {
4200Sstevel@tonic-gate 		if (count > 0) {
4210Sstevel@tonic-gate 			mblk_t	*split_mp = dupb(chunk_mp);
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 			if (split_mp == NULL) {
4240Sstevel@tonic-gate 				if (mdblk->b_cont == NULL) {
4250Sstevel@tonic-gate 					mdblk->b_cont = chunk_head;
4260Sstevel@tonic-gate 				} else  {
4270Sstevel@tonic-gate 					SCTP_MSG_SET_CHUNKED(mdblk);
4280Sstevel@tonic-gate 					ASSERT(chunk_head->b_next == NULL);
4290Sstevel@tonic-gate 					chunk_head->b_next = mdblk->b_cont;
4300Sstevel@tonic-gate 					mdblk->b_cont = chunk_head;
4310Sstevel@tonic-gate 				}
43210037SNick.Street@Sun.COM 				return (sctp->sctp_xmit_tail);
4330Sstevel@tonic-gate 			}
4340Sstevel@tonic-gate 			if (chunk_tail != NULL) {
4350Sstevel@tonic-gate 				chunk_tail->b_cont = split_mp;
4360Sstevel@tonic-gate 				chunk_tail = chunk_tail->b_cont;
4370Sstevel@tonic-gate 			} else {
4380Sstevel@tonic-gate 				chunk_head = chunk_tail = split_mp;
4390Sstevel@tonic-gate 			}
4400Sstevel@tonic-gate 			chunk_tail->b_wptr = chunk_tail->b_rptr + count;
4410Sstevel@tonic-gate 			chunk_mp->b_rptr = chunk_tail->b_wptr;
4420Sstevel@tonic-gate 			count = 0;
4430Sstevel@tonic-gate 		} else if (chunk_tail == NULL) {
4440Sstevel@tonic-gate 			goto next;
4450Sstevel@tonic-gate 		} else {
4460Sstevel@tonic-gate 			chunk_tail->b_cont = NULL;
4470Sstevel@tonic-gate 		}
4480Sstevel@tonic-gate 	}
4490Sstevel@tonic-gate 	/* Alloc chunk hdr, if needed */
4500Sstevel@tonic-gate 	if (DB_REF(chunk_head) > 1 ||
4510Sstevel@tonic-gate 	    ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
4520Sstevel@tonic-gate 	    MBLKHEAD(chunk_head) < sizeof (*sdc)) {
4530Sstevel@tonic-gate 		if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
4540Sstevel@tonic-gate 			if (mdblk->b_cont == NULL) {
4550Sstevel@tonic-gate 				if (chunk_mp != NULL)
4560Sstevel@tonic-gate 					linkb(chunk_head, chunk_mp);
4570Sstevel@tonic-gate 				mdblk->b_cont = chunk_head;
4580Sstevel@tonic-gate 			} else {
4590Sstevel@tonic-gate 				SCTP_MSG_SET_CHUNKED(mdblk);
4600Sstevel@tonic-gate 				if (chunk_mp != NULL)
4610Sstevel@tonic-gate 					linkb(chunk_head, chunk_mp);
4620Sstevel@tonic-gate 				ASSERT(chunk_head->b_next == NULL);
4630Sstevel@tonic-gate 				chunk_head->b_next = mdblk->b_cont;
4640Sstevel@tonic-gate 				mdblk->b_cont = chunk_head;
4650Sstevel@tonic-gate 			}
46610037SNick.Street@Sun.COM 			return (sctp->sctp_xmit_tail);
4670Sstevel@tonic-gate 		}
4680Sstevel@tonic-gate 		chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
4690Sstevel@tonic-gate 		chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
4700Sstevel@tonic-gate 		chunk_hdr->b_cont = chunk_head;
4710Sstevel@tonic-gate 	} else {
4720Sstevel@tonic-gate 		chunk_hdr = chunk_head;
4730Sstevel@tonic-gate 		chunk_hdr->b_rptr -= sizeof (*sdc);
4740Sstevel@tonic-gate 	}
4750Sstevel@tonic-gate 	ASSERT(chunk_hdr->b_datap->db_ref == 1);
4760Sstevel@tonic-gate 	sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
4770Sstevel@tonic-gate 	sdc->sdh_id = CHUNK_DATA;
4780Sstevel@tonic-gate 	sdc->sdh_flags = 0;
4790Sstevel@tonic-gate 	sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
4800Sstevel@tonic-gate 	ASSERT(sdc->sdh_len);
4810Sstevel@tonic-gate 	sdc->sdh_sid = htons(msg_hdr->smh_sid);
4820Sstevel@tonic-gate 	/*
4830Sstevel@tonic-gate 	 * We defer assigning the SSN just before sending the chunk, else
4840Sstevel@tonic-gate 	 * if we drop the chunk in sctp_get_msg_to_send(), we would need
4850Sstevel@tonic-gate 	 * to send a Forward TSN to let the peer know. Some more comments
4860Sstevel@tonic-gate 	 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
4870Sstevel@tonic-gate 	 */
4880Sstevel@tonic-gate 	sdc->sdh_payload_id = msg_hdr->smh_ppid;
4890Sstevel@tonic-gate 
4900Sstevel@tonic-gate 	if (mdblk->b_cont == NULL) {
4910Sstevel@tonic-gate 		mdblk->b_cont = chunk_hdr;
4920Sstevel@tonic-gate 		SCTP_DATA_SET_BBIT(sdc);
4930Sstevel@tonic-gate 	} else {
4940Sstevel@tonic-gate 		mp = mdblk->b_cont;
4950Sstevel@tonic-gate 		while (mp->b_next != NULL)
4960Sstevel@tonic-gate 			mp = mp->b_next;
4970Sstevel@tonic-gate 		mp->b_next = chunk_hdr;
4980Sstevel@tonic-gate 	}
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate 	bytes_to_send -= (chunksize - count);
5010Sstevel@tonic-gate 	if (chunk_mp != NULL) {
5020Sstevel@tonic-gate next:
5030Sstevel@tonic-gate 		count = chunksize = fp->sfa_pmss - sizeof (*sdc);
5040Sstevel@tonic-gate 		goto nextchunk;
5050Sstevel@tonic-gate 	}
5060Sstevel@tonic-gate 	SCTP_DATA_SET_EBIT(sdc);
5070Sstevel@tonic-gate 	sctp->sctp_xmit_unsent = mdblk->b_next;
5080Sstevel@tonic-gate 	if (mdblk->b_next == NULL) {
5090Sstevel@tonic-gate 		sctp->sctp_xmit_unsent_tail = NULL;
5100Sstevel@tonic-gate 	}
5110Sstevel@tonic-gate 	mdblk->b_next = NULL;
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate 	if (sctp->sctp_xmit_tail == NULL) {
5140Sstevel@tonic-gate 		sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
5150Sstevel@tonic-gate 	} else {
5160Sstevel@tonic-gate 		mp = sctp->sctp_xmit_tail;
5170Sstevel@tonic-gate 		while (mp->b_next != NULL)
5180Sstevel@tonic-gate 			mp = mp->b_next;
5190Sstevel@tonic-gate 		mp->b_next = mdblk;
5200Sstevel@tonic-gate 		mdblk->b_prev = mp;
5210Sstevel@tonic-gate 	}
5220Sstevel@tonic-gate try_next:
5230Sstevel@tonic-gate 	if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
5240Sstevel@tonic-gate 		mdblk = sctp->sctp_xmit_unsent;
5250Sstevel@tonic-gate 		fp1 = SCTP_CHUNK_DEST(mdblk);
5260Sstevel@tonic-gate 		if (fp1 == NULL)
5270Sstevel@tonic-gate 			fp1 = sctp->sctp_current;
5280Sstevel@tonic-gate 		if (fp == fp1) {
5290Sstevel@tonic-gate 			size_t len = MBLKL(mdblk->b_cont);
5300Sstevel@tonic-gate 			if ((count > 0) &&
5310Sstevel@tonic-gate 			    ((len > fp->sfa_pmss - sizeof (*sdc)) ||
5324505Skcpoon 			    (len <= count))) {
5330Sstevel@tonic-gate 				count -= sizeof (*sdc);
5340Sstevel@tonic-gate 				count = chunksize = count - (count & 0x3);
5350Sstevel@tonic-gate 			} else {
5360Sstevel@tonic-gate 				count = chunksize = fp->sfa_pmss -
5370Sstevel@tonic-gate 				    sizeof (*sdc);
5380Sstevel@tonic-gate 			}
5390Sstevel@tonic-gate 		} else {
5400Sstevel@tonic-gate 			if (fp1->isv4)
5410Sstevel@tonic-gate 				xtralen = sctp->sctp_hdr_len;
5420Sstevel@tonic-gate 			else
5430Sstevel@tonic-gate 				xtralen = sctp->sctp_hdr6_len;
5443448Sdh155122 			xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc);
5450Sstevel@tonic-gate 			count = chunksize = fp1->sfa_pmss - sizeof (*sdc);
5460Sstevel@tonic-gate 			fp = fp1;
5470Sstevel@tonic-gate 		}
5480Sstevel@tonic-gate 		goto nextmsg;
5490Sstevel@tonic-gate 	}
55010037SNick.Street@Sun.COM 	return (sctp->sctp_xmit_tail);
5510Sstevel@tonic-gate }
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate void
5540Sstevel@tonic-gate sctp_free_msg(mblk_t *ump)
5550Sstevel@tonic-gate {
5560Sstevel@tonic-gate 	mblk_t *mp, *nmp;
5570Sstevel@tonic-gate 
5580Sstevel@tonic-gate 	for (mp = ump->b_cont; mp; mp = nmp) {
5590Sstevel@tonic-gate 		nmp = mp->b_next;
5600Sstevel@tonic-gate 		mp->b_next = mp->b_prev = NULL;
5610Sstevel@tonic-gate 		freemsg(mp);
5620Sstevel@tonic-gate 	}
5630Sstevel@tonic-gate 	ASSERT(!ump->b_prev);
5640Sstevel@tonic-gate 	ump->b_next = NULL;
5650Sstevel@tonic-gate 	freeb(ump);
5660Sstevel@tonic-gate }
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate mblk_t *
569252Svi117747 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
570252Svi117747     int *error)
5710Sstevel@tonic-gate {
5720Sstevel@tonic-gate 	int hdrlen;
57311042SErik.Nordmark@Sun.COM 	uchar_t *hdr;
5740Sstevel@tonic-gate 	int isv4 = fp->isv4;
5753448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
5760Sstevel@tonic-gate 
577252Svi117747 	if (error != NULL)
578252Svi117747 		*error = 0;
579252Svi117747 
5800Sstevel@tonic-gate 	if (isv4) {
5810Sstevel@tonic-gate 		hdrlen = sctp->sctp_hdr_len;
5820Sstevel@tonic-gate 		hdr = sctp->sctp_iphc;
5830Sstevel@tonic-gate 	} else {
5840Sstevel@tonic-gate 		hdrlen = sctp->sctp_hdr6_len;
5850Sstevel@tonic-gate 		hdr = sctp->sctp_iphc6;
5860Sstevel@tonic-gate 	}
587252Svi117747 	/*
58811042SErik.Nordmark@Sun.COM 	 * A reject|blackhole could mean that the address is 'down'. Similarly,
589252Svi117747 	 * it is possible that the address went down, we tried to send an
590252Svi117747 	 * heartbeat and ended up setting fp->saddr as unspec because we
5911735Skcpoon 	 * didn't have any usable source address.  In either case
59211042SErik.Nordmark@Sun.COM 	 * sctp_get_dest() will try find an IRE, if available, and set
5931735Skcpoon 	 * the source address, if needed.  If we still don't have any
594252Svi117747 	 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and
595252Svi117747 	 * we return EHOSTUNREACH.
596252Svi117747 	 */
59711042SErik.Nordmark@Sun.COM 	ASSERT(fp->ixa->ixa_ire != NULL);
59811042SErik.Nordmark@Sun.COM 	if ((fp->ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
59911042SErik.Nordmark@Sun.COM 	    SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) {
60011042SErik.Nordmark@Sun.COM 		sctp_get_dest(sctp, fp);
601252Svi117747 		if (fp->state == SCTP_FADDRS_UNREACH) {
602252Svi117747 			if (error != NULL)
603252Svi117747 				*error = EHOSTUNREACH;
604252Svi117747 			return (NULL);
6050Sstevel@tonic-gate 		}
6060Sstevel@tonic-gate 	}
6070Sstevel@tonic-gate 	/* Copy in IP header. */
6080Sstevel@tonic-gate 	if ((mp->b_rptr - mp->b_datap->db_base) <
60911042SErik.Nordmark@Sun.COM 	    (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) {
6100Sstevel@tonic-gate 		mblk_t *nmp;
6111735Skcpoon 
6120Sstevel@tonic-gate 		/*
6130Sstevel@tonic-gate 		 * This can happen if IP headers are adjusted after
6140Sstevel@tonic-gate 		 * data was moved into chunks, or during retransmission,
6150Sstevel@tonic-gate 		 * or things like snoop is running.
6160Sstevel@tonic-gate 		 */
61711042SErik.Nordmark@Sun.COM 		nmp = allocb(sctps->sctps_wroff_xtra + hdrlen + sacklen,
61811042SErik.Nordmark@Sun.COM 		    BPRI_MED);
6190Sstevel@tonic-gate 		if (nmp == NULL) {
620252Svi117747 			if (error !=  NULL)
621252Svi117747 				*error = ENOMEM;
6220Sstevel@tonic-gate 			return (NULL);
6230Sstevel@tonic-gate 		}
6243448Sdh155122 		nmp->b_rptr += sctps->sctps_wroff_xtra;
6250Sstevel@tonic-gate 		nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
6260Sstevel@tonic-gate 		nmp->b_cont = mp;
6270Sstevel@tonic-gate 		mp = nmp;
6280Sstevel@tonic-gate 	} else {
6290Sstevel@tonic-gate 		mp->b_rptr -= (hdrlen + sacklen);
6300Sstevel@tonic-gate 	}
6310Sstevel@tonic-gate 	bcopy(hdr, mp->b_rptr, hdrlen);
6320Sstevel@tonic-gate 	if (sacklen) {
6330Sstevel@tonic-gate 		sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
6340Sstevel@tonic-gate 	}
6350Sstevel@tonic-gate 	if (fp != sctp->sctp_current) {
6360Sstevel@tonic-gate 		/* change addresses in header */
6370Sstevel@tonic-gate 		if (isv4) {
6380Sstevel@tonic-gate 			ipha_t *iph = (ipha_t *)mp->b_rptr;
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
6410Sstevel@tonic-gate 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
6420Sstevel@tonic-gate 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
6430Sstevel@tonic-gate 				    iph->ipha_src);
6440Sstevel@tonic-gate 			} else if (sctp->sctp_bound_to_all) {
6450Sstevel@tonic-gate 				iph->ipha_src = INADDR_ANY;
6460Sstevel@tonic-gate 			}
6470Sstevel@tonic-gate 		} else {
64811042SErik.Nordmark@Sun.COM 			ip6_t *ip6h = (ip6_t *)mp->b_rptr;
64911042SErik.Nordmark@Sun.COM 
65011042SErik.Nordmark@Sun.COM 			ip6h->ip6_dst = fp->faddr;
6510Sstevel@tonic-gate 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
65211042SErik.Nordmark@Sun.COM 				ip6h->ip6_src = fp->saddr;
6530Sstevel@tonic-gate 			} else if (sctp->sctp_bound_to_all) {
65411042SErik.Nordmark@Sun.COM 				ip6h->ip6_src = ipv6_all_zeros;
6550Sstevel@tonic-gate 			}
6560Sstevel@tonic-gate 		}
6570Sstevel@tonic-gate 	}
6580Sstevel@tonic-gate 	return (mp);
6590Sstevel@tonic-gate }
6600Sstevel@tonic-gate 
6610Sstevel@tonic-gate /*
6620Sstevel@tonic-gate  * SCTP requires every chunk to be padded so that the total length
6630Sstevel@tonic-gate  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
6640Sstevel@tonic-gate  * the specified pad length.
6650Sstevel@tonic-gate  */
6660Sstevel@tonic-gate static mblk_t *
6674691Skcpoon sctp_get_padding(sctp_t *sctp, int pad)
6680Sstevel@tonic-gate {
6690Sstevel@tonic-gate 	mblk_t *fill;
6700Sstevel@tonic-gate 
6710Sstevel@tonic-gate 	ASSERT(pad < SCTP_ALIGN);
6724691Skcpoon 	ASSERT(sctp->sctp_pad_mp != NULL);
6734691Skcpoon 	if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) {
6740Sstevel@tonic-gate 		fill->b_wptr += pad;
6750Sstevel@tonic-gate 		return (fill);
6760Sstevel@tonic-gate 	}
6770Sstevel@tonic-gate 
6780Sstevel@tonic-gate 	/*
6790Sstevel@tonic-gate 	 * The memory saving path of reusing the sctp_pad_mp
6800Sstevel@tonic-gate 	 * fails may be because it has been dupb() too
6810Sstevel@tonic-gate 	 * many times (DBLK_REFMAX).  Use the memory consuming
6820Sstevel@tonic-gate 	 * path of allocating the pad mblk.
6830Sstevel@tonic-gate 	 */
6840Sstevel@tonic-gate 	if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
6850Sstevel@tonic-gate 		/* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
6860Sstevel@tonic-gate 		*(int32_t *)fill->b_rptr = 0;
6870Sstevel@tonic-gate 		fill->b_wptr += pad;
6880Sstevel@tonic-gate 	}
6890Sstevel@tonic-gate 	return (fill);
6900Sstevel@tonic-gate }
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate static mblk_t *
6930Sstevel@tonic-gate sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
6940Sstevel@tonic-gate {
6950Sstevel@tonic-gate 	mblk_t		*meta;
6960Sstevel@tonic-gate 	mblk_t		*start_mp = NULL;
6970Sstevel@tonic-gate 	mblk_t		*end_mp = NULL;
6980Sstevel@tonic-gate 	mblk_t		*mp, *nmp;
6990Sstevel@tonic-gate 	mblk_t		*fill;
7000Sstevel@tonic-gate 	sctp_data_hdr_t	*sdh;
7010Sstevel@tonic-gate 	int		msglen;
7020Sstevel@tonic-gate 	int		extra;
7030Sstevel@tonic-gate 	sctp_msg_hdr_t	*msg_hdr;
7041735Skcpoon 	sctp_faddr_t	*old_fp = NULL;
7051735Skcpoon 	sctp_faddr_t	*chunk_fp;
7063448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
7070Sstevel@tonic-gate 
7080Sstevel@tonic-gate 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
7090Sstevel@tonic-gate 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
7100Sstevel@tonic-gate 		if (SCTP_IS_MSG_ABANDONED(meta) ||
7110Sstevel@tonic-gate 		    SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
7120Sstevel@tonic-gate 			continue;
7130Sstevel@tonic-gate 		}
7140Sstevel@tonic-gate 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
7150Sstevel@tonic-gate 			if (SCTP_CHUNK_WANT_REXMIT(mp)) {
7160Sstevel@tonic-gate 				/*
7170Sstevel@tonic-gate 				 * Use the same peer address to do fast
7181735Skcpoon 				 * retransmission.  If the original peer
7191735Skcpoon 				 * address is dead, switch to the current
7201735Skcpoon 				 * one.  Record the old one so that we
7211735Skcpoon 				 * will pick the chunks sent to the old
7221735Skcpoon 				 * one for fast retransmission.
7230Sstevel@tonic-gate 				 */
7241735Skcpoon 				chunk_fp = SCTP_CHUNK_DEST(mp);
7250Sstevel@tonic-gate 				if (*fp == NULL) {
7261735Skcpoon 					*fp = chunk_fp;
7271735Skcpoon 					if ((*fp)->state != SCTP_FADDRS_ALIVE) {
7281735Skcpoon 						old_fp = *fp;
7290Sstevel@tonic-gate 						*fp = sctp->sctp_current;
7301735Skcpoon 					}
7311735Skcpoon 				} else if (old_fp == NULL && *fp != chunk_fp) {
7321735Skcpoon 					continue;
7331735Skcpoon 				} else if (old_fp != NULL &&
7341735Skcpoon 				    old_fp != chunk_fp) {
7350Sstevel@tonic-gate 					continue;
7360Sstevel@tonic-gate 				}
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate 				sdh = (sctp_data_hdr_t *)mp->b_rptr;
7390Sstevel@tonic-gate 				msglen = ntohs(sdh->sdh_len);
7400Sstevel@tonic-gate 				if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
7410Sstevel@tonic-gate 					extra = SCTP_ALIGN - extra;
7420Sstevel@tonic-gate 				}
7430Sstevel@tonic-gate 
7440Sstevel@tonic-gate 				/*
7450Sstevel@tonic-gate 				 * We still return at least the first message
7460Sstevel@tonic-gate 				 * even if that message cannot fit in as
7470Sstevel@tonic-gate 				 * PMTU may have changed.
7480Sstevel@tonic-gate 				 */
7490Sstevel@tonic-gate 				if (*total + msglen + extra >
7500Sstevel@tonic-gate 				    (*fp)->sfa_pmss && start_mp != NULL) {
7510Sstevel@tonic-gate 					return (start_mp);
7520Sstevel@tonic-gate 				}
7530Sstevel@tonic-gate 				if ((nmp = dupmsg(mp)) == NULL)
7540Sstevel@tonic-gate 					return (start_mp);
7550Sstevel@tonic-gate 				if (extra > 0) {
7564691Skcpoon 					fill = sctp_get_padding(sctp, extra);
7570Sstevel@tonic-gate 					if (fill != NULL) {
7580Sstevel@tonic-gate 						linkb(nmp, fill);
7590Sstevel@tonic-gate 					} else {
7600Sstevel@tonic-gate 						return (start_mp);
7610Sstevel@tonic-gate 					}
7620Sstevel@tonic-gate 				}
7633448Sdh155122 				BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans);
7641735Skcpoon 				BUMP_LOCAL(sctp->sctp_rxtchunks);
7650Sstevel@tonic-gate 				SCTP_CHUNK_CLEAR_REXMIT(mp);
7660Sstevel@tonic-gate 				if (start_mp == NULL) {
7670Sstevel@tonic-gate 					start_mp = nmp;
7680Sstevel@tonic-gate 				} else {
7690Sstevel@tonic-gate 					linkb(end_mp, nmp);
7700Sstevel@tonic-gate 				}
7710Sstevel@tonic-gate 				end_mp = nmp;
7720Sstevel@tonic-gate 				*total += msglen + extra;
7730Sstevel@tonic-gate 				dprint(2, ("sctp_find_fast_rexmit_mblks: "
7740Sstevel@tonic-gate 				    "tsn %x\n", sdh->sdh_tsn));
7750Sstevel@tonic-gate 			}
7760Sstevel@tonic-gate 		}
7770Sstevel@tonic-gate 	}
7780Sstevel@tonic-gate 	/* Clear the flag as there is no more message to be fast rexmitted. */
7790Sstevel@tonic-gate 	sctp->sctp_chk_fast_rexmit = B_FALSE;
7800Sstevel@tonic-gate 	return (start_mp);
7810Sstevel@tonic-gate }
7820Sstevel@tonic-gate 
7830Sstevel@tonic-gate /* A debug function just to make sure that a mblk chain is not broken */
7840Sstevel@tonic-gate #ifdef	DEBUG
7850Sstevel@tonic-gate static boolean_t
7860Sstevel@tonic-gate sctp_verify_chain(mblk_t *head, mblk_t *tail)
7870Sstevel@tonic-gate {
7880Sstevel@tonic-gate 	mblk_t	*mp = head;
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate 	if (head == NULL || tail == NULL)
7910Sstevel@tonic-gate 		return (B_TRUE);
7920Sstevel@tonic-gate 	while (mp != NULL) {
7930Sstevel@tonic-gate 		if (mp == tail)
7940Sstevel@tonic-gate 			return (B_TRUE);
7950Sstevel@tonic-gate 		mp = mp->b_next;
7960Sstevel@tonic-gate 	}
7970Sstevel@tonic-gate 	return (B_FALSE);
7980Sstevel@tonic-gate }
7990Sstevel@tonic-gate #endif
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate /*
8020Sstevel@tonic-gate  * Gets the next unsent chunk to transmit. Messages that are abandoned are
8030Sstevel@tonic-gate  * skipped. A message can be abandoned if it has a non-zero timetolive and
8040Sstevel@tonic-gate  * transmission has not yet started or if it is a partially reliable
8050Sstevel@tonic-gate  * message and its time is up (assuming we are PR-SCTP aware).
80610037SNick.Street@Sun.COM  * We only return a chunk if it will fit entirely in the current packet.
8070Sstevel@tonic-gate  * 'cansend' is used to determine if need to try and chunkify messages from
8080Sstevel@tonic-gate  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
8098154SGeorge.Shepherd@Sun.COM  *
81010037SNick.Street@Sun.COM  * firstseg_len indicates the space already used, cansend represents remaining
81110037SNick.Street@Sun.COM  * space in the window, ((sfa_pmss - firstseg_len) can therefore reasonably
8128154SGeorge.Shepherd@Sun.COM  * be used to compute the cansend arg).
8130Sstevel@tonic-gate  */
8140Sstevel@tonic-gate mblk_t *
8150Sstevel@tonic-gate sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
81610037SNick.Street@Sun.COM     int32_t firstseg_len, uint32_t cansend, sctp_faddr_t *fp)
8170Sstevel@tonic-gate {
8180Sstevel@tonic-gate 	mblk_t		*mp1;
8190Sstevel@tonic-gate 	sctp_msg_hdr_t	*msg_hdr;
8200Sstevel@tonic-gate 	mblk_t		*tmp_meta;
8210Sstevel@tonic-gate 	sctp_faddr_t	*fp1;
8220Sstevel@tonic-gate 
8230Sstevel@tonic-gate 	ASSERT(error != NULL && mp != NULL);
8240Sstevel@tonic-gate 	*error = 0;
8250Sstevel@tonic-gate 
8260Sstevel@tonic-gate 	ASSERT(sctp->sctp_current != NULL);
8270Sstevel@tonic-gate 
8280Sstevel@tonic-gate chunkified:
8290Sstevel@tonic-gate 	while (meta != NULL) {
8300Sstevel@tonic-gate 		tmp_meta = meta->b_next;
8310Sstevel@tonic-gate 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
8320Sstevel@tonic-gate 		mp1 = meta->b_cont;
8330Sstevel@tonic-gate 		if (SCTP_IS_MSG_ABANDONED(meta))
8340Sstevel@tonic-gate 			goto next_msg;
8350Sstevel@tonic-gate 		if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
8360Sstevel@tonic-gate 			while (mp1 != NULL) {
8370Sstevel@tonic-gate 				if (SCTP_CHUNK_CANSEND(mp1)) {
8380Sstevel@tonic-gate 					*mp = mp1;
8390Sstevel@tonic-gate #ifdef	DEBUG
8400Sstevel@tonic-gate 					ASSERT(sctp_verify_chain(
8410Sstevel@tonic-gate 					    sctp->sctp_xmit_head, meta));
8420Sstevel@tonic-gate #endif
8430Sstevel@tonic-gate 					return (meta);
8440Sstevel@tonic-gate 				}
8450Sstevel@tonic-gate 				mp1 = mp1->b_next;
8460Sstevel@tonic-gate 			}
8470Sstevel@tonic-gate 			goto next_msg;
8480Sstevel@tonic-gate 		}
8490Sstevel@tonic-gate 		/*
8500Sstevel@tonic-gate 		 * If we come here and the first chunk is sent, then we
8510Sstevel@tonic-gate 		 * we are PR-SCTP aware, in which case if the cumulative
8520Sstevel@tonic-gate 		 * TSN has moved upto or beyond the first chunk (which
8530Sstevel@tonic-gate 		 * means all the previous messages have been cumulative
8540Sstevel@tonic-gate 		 * SACK'd), then we send a Forward TSN with the last
8550Sstevel@tonic-gate 		 * chunk that was sent in this message. If we can't send
8560Sstevel@tonic-gate 		 * a Forward TSN because previous non-abandoned messages
8570Sstevel@tonic-gate 		 * have not been acked then we will defer the Forward TSN
8580Sstevel@tonic-gate 		 * to sctp_rexmit() or sctp_cumack().
8590Sstevel@tonic-gate 		 */
8600Sstevel@tonic-gate 		if (SCTP_CHUNK_ISSENT(mp1)) {
8610Sstevel@tonic-gate 			*error = sctp_check_abandoned_msg(sctp, meta);
8620Sstevel@tonic-gate 			if (*error != 0) {
8630Sstevel@tonic-gate #ifdef	DEBUG
8640Sstevel@tonic-gate 				ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
8650Sstevel@tonic-gate 				    sctp->sctp_xmit_tail));
8660Sstevel@tonic-gate #endif
8670Sstevel@tonic-gate 				return (NULL);
8680Sstevel@tonic-gate 			}
8690Sstevel@tonic-gate 			goto next_msg;
8700Sstevel@tonic-gate 		}
8710Sstevel@tonic-gate 		BUMP_LOCAL(sctp->sctp_prsctpdrop);
8720Sstevel@tonic-gate 		ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
8730Sstevel@tonic-gate 		if (meta->b_prev == NULL) {
8740Sstevel@tonic-gate 			ASSERT(sctp->sctp_xmit_head == meta);
8750Sstevel@tonic-gate 			sctp->sctp_xmit_head = tmp_meta;
8760Sstevel@tonic-gate 			if (sctp->sctp_xmit_tail == meta)
8770Sstevel@tonic-gate 				sctp->sctp_xmit_tail = tmp_meta;
8780Sstevel@tonic-gate 			meta->b_next = NULL;
8790Sstevel@tonic-gate 			if (tmp_meta != NULL)
8800Sstevel@tonic-gate 				tmp_meta->b_prev = NULL;
8810Sstevel@tonic-gate 		} else if (meta->b_next == NULL) {
8820Sstevel@tonic-gate 			if (sctp->sctp_xmit_tail == meta)
8830Sstevel@tonic-gate 				sctp->sctp_xmit_tail = meta->b_prev;
8840Sstevel@tonic-gate 			meta->b_prev->b_next = NULL;
8850Sstevel@tonic-gate 			meta->b_prev = NULL;
8860Sstevel@tonic-gate 		} else {
8870Sstevel@tonic-gate 			meta->b_prev->b_next = tmp_meta;
8880Sstevel@tonic-gate 			tmp_meta->b_prev = meta->b_prev;
8890Sstevel@tonic-gate 			if (sctp->sctp_xmit_tail == meta)
8900Sstevel@tonic-gate 				sctp->sctp_xmit_tail = tmp_meta;
8910Sstevel@tonic-gate 			meta->b_prev = NULL;
8920Sstevel@tonic-gate 			meta->b_next = NULL;
8930Sstevel@tonic-gate 		}
8940Sstevel@tonic-gate 		sctp->sctp_unsent -= msg_hdr->smh_msglen;
8950Sstevel@tonic-gate 		/*
8960Sstevel@tonic-gate 		 * Update ULP the amount of queued data, which is
8970Sstevel@tonic-gate 		 * sent-unack'ed + unsent.
8980Sstevel@tonic-gate 		 */
8998348SEric.Yu@Sun.COM 		if (!SCTP_IS_DETACHED(sctp))
9008348SEric.Yu@Sun.COM 			SCTP_TXQ_UPDATE(sctp);
9010Sstevel@tonic-gate 		sctp_sendfail_event(sctp, meta, 0, B_TRUE);
9020Sstevel@tonic-gate next_msg:
9030Sstevel@tonic-gate 		meta = tmp_meta;
9040Sstevel@tonic-gate 	}
9050Sstevel@tonic-gate 	/* chunkify, if needed */
9060Sstevel@tonic-gate 	if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
9070Sstevel@tonic-gate 		ASSERT(sctp->sctp_unsent > 0);
9080Sstevel@tonic-gate 		if (fp == NULL) {
9090Sstevel@tonic-gate 			fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
9100Sstevel@tonic-gate 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
9110Sstevel@tonic-gate 				fp = sctp->sctp_current;
9120Sstevel@tonic-gate 		} else {
9130Sstevel@tonic-gate 			/*
9140Sstevel@tonic-gate 			 * If user specified destination, try to honor that.
9150Sstevel@tonic-gate 			 */
9160Sstevel@tonic-gate 			fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
9170Sstevel@tonic-gate 			if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE &&
9180Sstevel@tonic-gate 			    fp1 != fp) {
9190Sstevel@tonic-gate 				goto chunk_done;
9200Sstevel@tonic-gate 			}
9210Sstevel@tonic-gate 		}
92210037SNick.Street@Sun.COM 		meta = sctp_chunkify(sctp, fp->sfa_pmss, firstseg_len, cansend);
92310037SNick.Street@Sun.COM 		if (meta == NULL)
9240Sstevel@tonic-gate 			goto chunk_done;
9250Sstevel@tonic-gate 		/*
9260Sstevel@tonic-gate 		 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
9270Sstevel@tonic-gate 		 * new chunk(s) to the tail, so we need to skip the
9280Sstevel@tonic-gate 		 * sctp_xmit_tail, which would have already been processed.
9290Sstevel@tonic-gate 		 * This could happen when there is unacked chunks, but
9300Sstevel@tonic-gate 		 * nothing new to send.
9310Sstevel@tonic-gate 		 * When sctp_chunkify() is called when the transmit queue
9320Sstevel@tonic-gate 		 * is empty then we need to start from sctp_xmit_tail.
9330Sstevel@tonic-gate 		 */
9340Sstevel@tonic-gate 		if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
9350Sstevel@tonic-gate #ifdef	DEBUG
9360Sstevel@tonic-gate 			mp1 = sctp->sctp_xmit_tail->b_cont;
9370Sstevel@tonic-gate 			while (mp1 != NULL) {
9380Sstevel@tonic-gate 				ASSERT(!SCTP_CHUNK_CANSEND(mp1));
9390Sstevel@tonic-gate 				mp1 = mp1->b_next;
9400Sstevel@tonic-gate 			}
9410Sstevel@tonic-gate #endif
9420Sstevel@tonic-gate 			if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
9430Sstevel@tonic-gate 				goto chunk_done;
9440Sstevel@tonic-gate 		}
9450Sstevel@tonic-gate 		goto chunkified;
9460Sstevel@tonic-gate 	}
9470Sstevel@tonic-gate chunk_done:
9480Sstevel@tonic-gate #ifdef	DEBUG
9490Sstevel@tonic-gate 	ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
9500Sstevel@tonic-gate #endif
9510Sstevel@tonic-gate 	return (NULL);
9520Sstevel@tonic-gate }
9530Sstevel@tonic-gate 
9540Sstevel@tonic-gate void
9550Sstevel@tonic-gate sctp_fast_rexmit(sctp_t *sctp)
9560Sstevel@tonic-gate {
9570Sstevel@tonic-gate 	mblk_t		*mp, *head;
9580Sstevel@tonic-gate 	int		pktlen = 0;
9590Sstevel@tonic-gate 	sctp_faddr_t	*fp = NULL;
9603448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
9610Sstevel@tonic-gate 
9620Sstevel@tonic-gate 	ASSERT(sctp->sctp_xmit_head != NULL);
9630Sstevel@tonic-gate 	mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
9641735Skcpoon 	if (mp == NULL) {
9653448Sdh155122 		SCTP_KSTAT(sctps, sctp_fr_not_found);
9660Sstevel@tonic-gate 		return;
9671735Skcpoon 	}
968252Svi117747 	if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
9690Sstevel@tonic-gate 		freemsg(mp);
9703448Sdh155122 		SCTP_KSTAT(sctps, sctp_fr_add_hdr);
9710Sstevel@tonic-gate 		return;
9720Sstevel@tonic-gate 	}
9730Sstevel@tonic-gate 	if ((pktlen > fp->sfa_pmss) && fp->isv4) {
9740Sstevel@tonic-gate 		ipha_t *iph = (ipha_t *)head->b_rptr;
9750Sstevel@tonic-gate 
9760Sstevel@tonic-gate 		iph->ipha_fragment_offset_and_flags = 0;
9770Sstevel@tonic-gate 	}
9780Sstevel@tonic-gate 
97911042SErik.Nordmark@Sun.COM 	sctp_set_iplen(sctp, head, fp->ixa);
98011042SErik.Nordmark@Sun.COM 	(void) conn_ip_output(head, fp->ixa);
98111042SErik.Nordmark@Sun.COM 	BUMP_LOCAL(sctp->sctp_opkts);
982*11066Srafael.vanoni@sun.com 	sctp->sctp_active = fp->lastactive = ddi_get_lbolt64();
9830Sstevel@tonic-gate }
9840Sstevel@tonic-gate 
9850Sstevel@tonic-gate void
9863795Skcpoon sctp_output(sctp_t *sctp, uint_t num_pkt)
9870Sstevel@tonic-gate {
9880Sstevel@tonic-gate 	mblk_t			*mp = NULL;
9890Sstevel@tonic-gate 	mblk_t			*nmp;
9900Sstevel@tonic-gate 	mblk_t			*head;
9910Sstevel@tonic-gate 	mblk_t			*meta = sctp->sctp_xmit_tail;
9920Sstevel@tonic-gate 	mblk_t			*fill = NULL;
9930Sstevel@tonic-gate 	uint16_t 		chunklen;
9940Sstevel@tonic-gate 	uint32_t 		cansend;
9950Sstevel@tonic-gate 	int32_t			seglen;
9960Sstevel@tonic-gate 	int32_t			xtralen;
9970Sstevel@tonic-gate 	int32_t			sacklen;
9980Sstevel@tonic-gate 	int32_t			pad = 0;
9990Sstevel@tonic-gate 	int32_t			pathmax;
10000Sstevel@tonic-gate 	int			extra;
1001*11066Srafael.vanoni@sun.com 	int64_t			now = ddi_get_lbolt64();
10020Sstevel@tonic-gate 	sctp_faddr_t		*fp;
10030Sstevel@tonic-gate 	sctp_faddr_t		*lfp;
10040Sstevel@tonic-gate 	sctp_data_hdr_t		*sdc;
10050Sstevel@tonic-gate 	int			error;
1006252Svi117747 	boolean_t		notsent = B_TRUE;
10073795Skcpoon 	sctp_stack_t		*sctps = sctp->sctp_sctps;
10080Sstevel@tonic-gate 
10090Sstevel@tonic-gate 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
10100Sstevel@tonic-gate 		sacklen = 0;
10110Sstevel@tonic-gate 	} else {
10120Sstevel@tonic-gate 		/* send a SACK chunk */
10130Sstevel@tonic-gate 		sacklen = sizeof (sctp_chunk_hdr_t) +
10140Sstevel@tonic-gate 		    sizeof (sctp_sack_chunk_t) +
10150Sstevel@tonic-gate 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
10160Sstevel@tonic-gate 		lfp = sctp->sctp_lastdata;
10170Sstevel@tonic-gate 		ASSERT(lfp != NULL);
10180Sstevel@tonic-gate 		if (lfp->state != SCTP_FADDRS_ALIVE)
10190Sstevel@tonic-gate 			lfp = sctp->sctp_current;
10200Sstevel@tonic-gate 	}
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 	cansend = sctp->sctp_frwnd;
10230Sstevel@tonic-gate 	if (sctp->sctp_unsent < cansend)
10240Sstevel@tonic-gate 		cansend = sctp->sctp_unsent;
10258222SGeorge.Shepherd@Sun.COM 
10268222SGeorge.Shepherd@Sun.COM 	/*
10278222SGeorge.Shepherd@Sun.COM 	 * Start persist timer if unable to send or when
10288222SGeorge.Shepherd@Sun.COM 	 * trying to send into a zero window. This timer
10298222SGeorge.Shepherd@Sun.COM 	 * ensures the blocked send attempt is retried.
10308222SGeorge.Shepherd@Sun.COM 	 */
10310Sstevel@tonic-gate 	if ((cansend < sctp->sctp_current->sfa_pmss / 2) &&
10328222SGeorge.Shepherd@Sun.COM 	    (sctp->sctp_unacked != 0) &&
10330Sstevel@tonic-gate 	    (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) &&
10348222SGeorge.Shepherd@Sun.COM 	    !sctp->sctp_ndelay ||
10358222SGeorge.Shepherd@Sun.COM 	    (cansend == 0 && sctp->sctp_unacked == 0 &&
10368222SGeorge.Shepherd@Sun.COM 	    sctp->sctp_unsent != 0)) {
10370Sstevel@tonic-gate 		head = NULL;
10380Sstevel@tonic-gate 		fp = sctp->sctp_current;
10390Sstevel@tonic-gate 		goto unsent_data;
10400Sstevel@tonic-gate 	}
10410Sstevel@tonic-gate 	if (meta != NULL)
10420Sstevel@tonic-gate 		mp = meta->b_cont;
10433795Skcpoon 	while (cansend > 0 && num_pkt-- != 0) {
10440Sstevel@tonic-gate 		pad = 0;
10450Sstevel@tonic-gate 
10460Sstevel@tonic-gate 		/*
10470Sstevel@tonic-gate 		 * Find first segment eligible for transmit.
10480Sstevel@tonic-gate 		 */
10490Sstevel@tonic-gate 		while (mp != NULL) {
10500Sstevel@tonic-gate 			if (SCTP_CHUNK_CANSEND(mp))
10510Sstevel@tonic-gate 				break;
10520Sstevel@tonic-gate 			mp = mp->b_next;
10530Sstevel@tonic-gate 		}
10540Sstevel@tonic-gate 		if (mp == NULL) {
10550Sstevel@tonic-gate 			meta = sctp_get_msg_to_send(sctp, &mp,
10560Sstevel@tonic-gate 			    meta == NULL ? NULL : meta->b_next, &error, sacklen,
10570Sstevel@tonic-gate 			    cansend, NULL);
10580Sstevel@tonic-gate 			if (error != 0 || meta == NULL) {
10590Sstevel@tonic-gate 				head = NULL;
10600Sstevel@tonic-gate 				fp = sctp->sctp_current;
10610Sstevel@tonic-gate 				goto unsent_data;
10620Sstevel@tonic-gate 			}
10630Sstevel@tonic-gate 			sctp->sctp_xmit_tail =  meta;
10640Sstevel@tonic-gate 		}
10650Sstevel@tonic-gate 
10660Sstevel@tonic-gate 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
10670Sstevel@tonic-gate 		seglen = ntohs(sdc->sdh_len);
10680Sstevel@tonic-gate 		xtralen = sizeof (*sdc);
10690Sstevel@tonic-gate 		chunklen = seglen - xtralen;
10700Sstevel@tonic-gate 
10710Sstevel@tonic-gate 		/*
10720Sstevel@tonic-gate 		 * Check rwnd.
10730Sstevel@tonic-gate 		 */
10740Sstevel@tonic-gate 		if (chunklen > cansend) {
10750Sstevel@tonic-gate 			head = NULL;
10760Sstevel@tonic-gate 			fp = SCTP_CHUNK_DEST(meta);
10770Sstevel@tonic-gate 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
10780Sstevel@tonic-gate 				fp = sctp->sctp_current;
10790Sstevel@tonic-gate 			goto unsent_data;
10800Sstevel@tonic-gate 		}
10810Sstevel@tonic-gate 		if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
10820Sstevel@tonic-gate 			extra = SCTP_ALIGN - extra;
10830Sstevel@tonic-gate 
10840Sstevel@tonic-gate 		/*
10850Sstevel@tonic-gate 		 * Pick destination address, and check cwnd.
10860Sstevel@tonic-gate 		 */
10870Sstevel@tonic-gate 		if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) &&
10880Sstevel@tonic-gate 		    (seglen + sacklen + extra <= lfp->sfa_pmss)) {
10890Sstevel@tonic-gate 			/*
10900Sstevel@tonic-gate 			 * Only include SACK chunk if it can be bundled
10910Sstevel@tonic-gate 			 * with a data chunk, and sent to sctp_lastdata.
10920Sstevel@tonic-gate 			 */
10930Sstevel@tonic-gate 			pathmax = lfp->cwnd - lfp->suna;
10940Sstevel@tonic-gate 
10950Sstevel@tonic-gate 			fp = lfp;
10960Sstevel@tonic-gate 			if ((nmp = dupmsg(mp)) == NULL) {
10970Sstevel@tonic-gate 				head = NULL;
10980Sstevel@tonic-gate 				goto unsent_data;
10990Sstevel@tonic-gate 			}
11000Sstevel@tonic-gate 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1101252Svi117747 			head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1102252Svi117747 			    &error);
11030Sstevel@tonic-gate 			if (head == NULL) {
1104252Svi117747 				/*
1105252Svi117747 				 * If none of the source addresses are
1106252Svi117747 				 * available (i.e error == EHOSTUNREACH),
1107252Svi117747 				 * pretend we have sent the data. We will
1108252Svi117747 				 * eventually time out trying to retramsmit
1109252Svi117747 				 * the data if the interface never comes up.
1110252Svi117747 				 * If we have already sent some stuff (i.e.,
1111252Svi117747 				 * notsent is B_FALSE) then we are fine, else
1112252Svi117747 				 * just mark this packet as sent.
1113252Svi117747 				 */
1114252Svi117747 				if (notsent && error == EHOSTUNREACH) {
1115252Svi117747 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1116252Svi117747 					    fp, chunklen, meta);
1117252Svi117747 				}
11180Sstevel@tonic-gate 				freemsg(nmp);
11193448Sdh155122 				SCTP_KSTAT(sctps, sctp_output_failed);
11200Sstevel@tonic-gate 				goto unsent_data;
11210Sstevel@tonic-gate 			}
11220Sstevel@tonic-gate 			seglen += sacklen;
11230Sstevel@tonic-gate 			xtralen += sacklen;
11240Sstevel@tonic-gate 			sacklen = 0;
11250Sstevel@tonic-gate 		} else {
11260Sstevel@tonic-gate 			fp = SCTP_CHUNK_DEST(meta);
11270Sstevel@tonic-gate 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
11280Sstevel@tonic-gate 				fp = sctp->sctp_current;
11290Sstevel@tonic-gate 			/*
11300Sstevel@tonic-gate 			 * If we haven't sent data to this destination for
11310Sstevel@tonic-gate 			 * a while, do slow start again.
11320Sstevel@tonic-gate 			 */
11330Sstevel@tonic-gate 			if (now - fp->lastactive > fp->rto) {
11343795Skcpoon 				SET_CWND(fp, fp->sfa_pmss,
11353795Skcpoon 				    sctps->sctps_slow_start_after_idle);
11360Sstevel@tonic-gate 			}
11370Sstevel@tonic-gate 
11380Sstevel@tonic-gate 			pathmax = fp->cwnd - fp->suna;
11390Sstevel@tonic-gate 			if (seglen + extra > pathmax) {
11400Sstevel@tonic-gate 				head = NULL;
11410Sstevel@tonic-gate 				goto unsent_data;
11420Sstevel@tonic-gate 			}
11430Sstevel@tonic-gate 			if ((nmp = dupmsg(mp)) == NULL) {
11440Sstevel@tonic-gate 				head = NULL;
11450Sstevel@tonic-gate 				goto unsent_data;
11460Sstevel@tonic-gate 			}
11470Sstevel@tonic-gate 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1148252Svi117747 			head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
11490Sstevel@tonic-gate 			if (head == NULL) {
1150252Svi117747 				/*
1151252Svi117747 				 * If none of the source addresses are
1152252Svi117747 				 * available (i.e error == EHOSTUNREACH),
1153252Svi117747 				 * pretend we have sent the data. We will
1154252Svi117747 				 * eventually time out trying to retramsmit
1155252Svi117747 				 * the data if the interface never comes up.
1156252Svi117747 				 * If we have already sent some stuff (i.e.,
1157252Svi117747 				 * notsent is B_FALSE) then we are fine, else
1158252Svi117747 				 * just mark this packet as sent.
1159252Svi117747 				 */
1160252Svi117747 				if (notsent && error == EHOSTUNREACH) {
1161252Svi117747 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1162252Svi117747 					    fp, chunklen, meta);
1163252Svi117747 				}
11640Sstevel@tonic-gate 				freemsg(nmp);
11653448Sdh155122 				SCTP_KSTAT(sctps, sctp_output_failed);
11660Sstevel@tonic-gate 				goto unsent_data;
11670Sstevel@tonic-gate 			}
11680Sstevel@tonic-gate 		}
11691735Skcpoon 		fp->lastactive = now;
11700Sstevel@tonic-gate 		if (pathmax > fp->sfa_pmss)
11710Sstevel@tonic-gate 			pathmax = fp->sfa_pmss;
11720Sstevel@tonic-gate 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
11730Sstevel@tonic-gate 		mp = mp->b_next;
11740Sstevel@tonic-gate 
11750Sstevel@tonic-gate 		/* Use this chunk to measure RTT? */
11760Sstevel@tonic-gate 		if (sctp->sctp_out_time == 0) {
11770Sstevel@tonic-gate 			sctp->sctp_out_time = now;
11780Sstevel@tonic-gate 			sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1;
11791735Skcpoon 			ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn));
11800Sstevel@tonic-gate 		}
11810Sstevel@tonic-gate 		if (extra > 0) {
11824691Skcpoon 			fill = sctp_get_padding(sctp, extra);
11830Sstevel@tonic-gate 			if (fill != NULL) {
11840Sstevel@tonic-gate 				linkb(head, fill);
11850Sstevel@tonic-gate 				pad = extra;
11860Sstevel@tonic-gate 				seglen += extra;
11870Sstevel@tonic-gate 			} else {
11880Sstevel@tonic-gate 				goto unsent_data;
11890Sstevel@tonic-gate 			}
11900Sstevel@tonic-gate 		}
119110037SNick.Street@Sun.COM 		/*
119210037SNick.Street@Sun.COM 		 * Bundle chunks. We linkb() the chunks together to send
119310037SNick.Street@Sun.COM 		 * downstream in a single packet.
119410037SNick.Street@Sun.COM 		 * Partial chunks MUST NOT be bundled with full chunks, so we
119510037SNick.Street@Sun.COM 		 * rely on sctp_get_msg_to_send() to only return messages that
119610037SNick.Street@Sun.COM 		 * will fit entirely in the current packet.
119710037SNick.Street@Sun.COM 		 */
11980Sstevel@tonic-gate 		while (seglen < pathmax) {
11990Sstevel@tonic-gate 			int32_t		new_len;
12000Sstevel@tonic-gate 			int32_t		new_xtralen;
12010Sstevel@tonic-gate 
12020Sstevel@tonic-gate 			while (mp != NULL) {
12030Sstevel@tonic-gate 				if (SCTP_CHUNK_CANSEND(mp))
12040Sstevel@tonic-gate 					break;
12050Sstevel@tonic-gate 				mp = mp->b_next;
12060Sstevel@tonic-gate 			}
12070Sstevel@tonic-gate 			if (mp == NULL) {
12080Sstevel@tonic-gate 				meta = sctp_get_msg_to_send(sctp, &mp,
12090Sstevel@tonic-gate 				    meta->b_next, &error, seglen,
12100Sstevel@tonic-gate 				    (seglen - xtralen) >= cansend ? 0 :
12110Sstevel@tonic-gate 				    cansend - seglen, fp);
121210037SNick.Street@Sun.COM 				if (error != 0)
121310037SNick.Street@Sun.COM 					break;
121410037SNick.Street@Sun.COM 				/* If no more eligible chunks, cease bundling */
121510037SNick.Street@Sun.COM 				if (meta == NULL)
12160Sstevel@tonic-gate 					break;
12170Sstevel@tonic-gate 				sctp->sctp_xmit_tail =  meta;
12180Sstevel@tonic-gate 			}
12190Sstevel@tonic-gate 			ASSERT(mp != NULL);
12200Sstevel@tonic-gate 			if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
12210Sstevel@tonic-gate 			    fp != SCTP_CHUNK_DEST(meta)) {
12220Sstevel@tonic-gate 				break;
12230Sstevel@tonic-gate 			}
12240Sstevel@tonic-gate 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
12250Sstevel@tonic-gate 			chunklen = ntohs(sdc->sdh_len);
12260Sstevel@tonic-gate 			if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
12270Sstevel@tonic-gate 				extra = SCTP_ALIGN - extra;
12280Sstevel@tonic-gate 
12290Sstevel@tonic-gate 			new_len = seglen + chunklen;
12300Sstevel@tonic-gate 			new_xtralen = xtralen + sizeof (*sdc);
12310Sstevel@tonic-gate 			chunklen -= sizeof (*sdc);
12320Sstevel@tonic-gate 
12330Sstevel@tonic-gate 			if (new_len - new_xtralen > cansend ||
12340Sstevel@tonic-gate 			    new_len + extra > pathmax) {
12350Sstevel@tonic-gate 				break;
12360Sstevel@tonic-gate 			}
12370Sstevel@tonic-gate 			if ((nmp = dupmsg(mp)) == NULL)
12380Sstevel@tonic-gate 				break;
12390Sstevel@tonic-gate 			if (extra > 0) {
12404691Skcpoon 				fill = sctp_get_padding(sctp, extra);
12410Sstevel@tonic-gate 				if (fill != NULL) {
12420Sstevel@tonic-gate 					pad += extra;
12430Sstevel@tonic-gate 					new_len += extra;
12440Sstevel@tonic-gate 					linkb(nmp, fill);
12450Sstevel@tonic-gate 				} else {
12460Sstevel@tonic-gate 					freemsg(nmp);
12470Sstevel@tonic-gate 					break;
12480Sstevel@tonic-gate 				}
12490Sstevel@tonic-gate 			}
12500Sstevel@tonic-gate 			seglen = new_len;
12510Sstevel@tonic-gate 			xtralen = new_xtralen;
12520Sstevel@tonic-gate 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
12530Sstevel@tonic-gate 			SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
12540Sstevel@tonic-gate 			linkb(head, nmp);
12550Sstevel@tonic-gate 			mp = mp->b_next;
12560Sstevel@tonic-gate 		}
12570Sstevel@tonic-gate 		if ((seglen > fp->sfa_pmss) && fp->isv4) {
12580Sstevel@tonic-gate 			ipha_t *iph = (ipha_t *)head->b_rptr;
12590Sstevel@tonic-gate 
12600Sstevel@tonic-gate 			/*
12610Sstevel@tonic-gate 			 * Path MTU is different from what we thought it would
12620Sstevel@tonic-gate 			 * be when we created chunks, or IP headers have grown.
12630Sstevel@tonic-gate 			 * Need to clear the DF bit.
12640Sstevel@tonic-gate 			 */
12650Sstevel@tonic-gate 			iph->ipha_fragment_offset_and_flags = 0;
12660Sstevel@tonic-gate 		}
12670Sstevel@tonic-gate 		/* xmit segment */
12680Sstevel@tonic-gate 		ASSERT(cansend >= seglen - pad - xtralen);
12690Sstevel@tonic-gate 		cansend -= (seglen - pad - xtralen);
12700Sstevel@tonic-gate 		dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
12711676Sjpk 		    "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
12721676Sjpk 		    seglen - xtralen, ntohl(sdc->sdh_tsn),
12731676Sjpk 		    ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
12741676Sjpk 		    cansend, sctp->sctp_lastack_rxd));
127511042SErik.Nordmark@Sun.COM 		sctp_set_iplen(sctp, head, fp->ixa);
127611042SErik.Nordmark@Sun.COM 		(void) conn_ip_output(head, fp->ixa);
127711042SErik.Nordmark@Sun.COM 		BUMP_LOCAL(sctp->sctp_opkts);
12780Sstevel@tonic-gate 		/* arm rto timer (if not set) */
12790Sstevel@tonic-gate 		if (!fp->timer_running)
12800Sstevel@tonic-gate 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1281252Svi117747 		notsent = B_FALSE;
12820Sstevel@tonic-gate 	}
12830Sstevel@tonic-gate 	sctp->sctp_active = now;
12840Sstevel@tonic-gate 	return;
12850Sstevel@tonic-gate unsent_data:
12860Sstevel@tonic-gate 	/* arm persist timer (if rto timer not set) */
12870Sstevel@tonic-gate 	if (!fp->timer_running)
12880Sstevel@tonic-gate 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
12890Sstevel@tonic-gate 	if (head != NULL)
12900Sstevel@tonic-gate 		freemsg(head);
12910Sstevel@tonic-gate }
12920Sstevel@tonic-gate 
12930Sstevel@tonic-gate /*
12940Sstevel@tonic-gate  * The following two functions initialize and destroy the cache
12950Sstevel@tonic-gate  * associated with the sets used for PR-SCTP.
12960Sstevel@tonic-gate  */
12970Sstevel@tonic-gate void
12980Sstevel@tonic-gate sctp_ftsn_sets_init(void)
12990Sstevel@tonic-gate {
13000Sstevel@tonic-gate 	sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
13010Sstevel@tonic-gate 	    sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
13020Sstevel@tonic-gate 	    NULL, 0);
13030Sstevel@tonic-gate }
13040Sstevel@tonic-gate 
13050Sstevel@tonic-gate void
13060Sstevel@tonic-gate sctp_ftsn_sets_fini(void)
13070Sstevel@tonic-gate {
13080Sstevel@tonic-gate 	kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
13090Sstevel@tonic-gate }
13100Sstevel@tonic-gate 
13110Sstevel@tonic-gate 
13120Sstevel@tonic-gate /* Free PR-SCTP sets */
13130Sstevel@tonic-gate void
13140Sstevel@tonic-gate sctp_free_ftsn_set(sctp_ftsn_set_t *s)
13150Sstevel@tonic-gate {
13160Sstevel@tonic-gate 	sctp_ftsn_set_t *p;
13170Sstevel@tonic-gate 
13180Sstevel@tonic-gate 	while (s != NULL) {
13190Sstevel@tonic-gate 		p = s->next;
13200Sstevel@tonic-gate 		s->next = NULL;
13210Sstevel@tonic-gate 		kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
13220Sstevel@tonic-gate 		s = p;
13230Sstevel@tonic-gate 	}
13240Sstevel@tonic-gate }
13250Sstevel@tonic-gate 
13260Sstevel@tonic-gate /*
13270Sstevel@tonic-gate  * Given a message meta block, meta, this routine creates or modifies
13280Sstevel@tonic-gate  * the set that will be used to generate a Forward TSN chunk. If the
13290Sstevel@tonic-gate  * entry for stream id, sid, for this message already exists, the
13300Sstevel@tonic-gate  * sequence number, ssn, is updated if it is greater than the existing
13310Sstevel@tonic-gate  * one. If an entry for this sid does not exist, one is created if
13320Sstevel@tonic-gate  * the size does not exceed fp->sfa_pmss. We return false in case
13330Sstevel@tonic-gate  * or an error.
13340Sstevel@tonic-gate  */
13350Sstevel@tonic-gate boolean_t
13360Sstevel@tonic-gate sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
13370Sstevel@tonic-gate     uint_t *nsets, uint32_t *slen)
13380Sstevel@tonic-gate {
13390Sstevel@tonic-gate 	sctp_ftsn_set_t		*p;
13400Sstevel@tonic-gate 	sctp_msg_hdr_t		*msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
13410Sstevel@tonic-gate 	uint16_t		sid = htons(msg_hdr->smh_sid);
13420Sstevel@tonic-gate 	/* msg_hdr->smh_ssn is already in NBO */
13430Sstevel@tonic-gate 	uint16_t		ssn = msg_hdr->smh_ssn;
13440Sstevel@tonic-gate 
13450Sstevel@tonic-gate 	ASSERT(s != NULL && nsets != NULL);
13460Sstevel@tonic-gate 	ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
13470Sstevel@tonic-gate 
13480Sstevel@tonic-gate 	if (*s == NULL) {
13490Sstevel@tonic-gate 		ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss);
13500Sstevel@tonic-gate 		*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
13510Sstevel@tonic-gate 		if (*s == NULL)
13520Sstevel@tonic-gate 			return (B_FALSE);
13530Sstevel@tonic-gate 		(*s)->ftsn_entries.ftsn_sid = sid;
13540Sstevel@tonic-gate 		(*s)->ftsn_entries.ftsn_ssn = ssn;
13550Sstevel@tonic-gate 		(*s)->next = NULL;
13560Sstevel@tonic-gate 		*nsets = 1;
13570Sstevel@tonic-gate 		*slen += sizeof (uint32_t);
13580Sstevel@tonic-gate 		return (B_TRUE);
13590Sstevel@tonic-gate 	}
13600Sstevel@tonic-gate 	for (p = *s; p->next != NULL; p = p->next) {
13610Sstevel@tonic-gate 		if (p->ftsn_entries.ftsn_sid == sid) {
13620Sstevel@tonic-gate 			if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
13630Sstevel@tonic-gate 				p->ftsn_entries.ftsn_ssn = ssn;
13640Sstevel@tonic-gate 			return (B_TRUE);
13650Sstevel@tonic-gate 		}
13660Sstevel@tonic-gate 	}
13670Sstevel@tonic-gate 	/* the last one */
13680Sstevel@tonic-gate 	if (p->ftsn_entries.ftsn_sid == sid) {
13690Sstevel@tonic-gate 		if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
13700Sstevel@tonic-gate 			p->ftsn_entries.ftsn_ssn = ssn;
13710Sstevel@tonic-gate 	} else {
13720Sstevel@tonic-gate 		if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss)
13730Sstevel@tonic-gate 			return (B_FALSE);
13740Sstevel@tonic-gate 		p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
13750Sstevel@tonic-gate 		    KM_NOSLEEP);
13760Sstevel@tonic-gate 		if (p->next == NULL)
13770Sstevel@tonic-gate 			return (B_FALSE);
13780Sstevel@tonic-gate 		p = p->next;
13790Sstevel@tonic-gate 		p->ftsn_entries.ftsn_sid = sid;
13800Sstevel@tonic-gate 		p->ftsn_entries.ftsn_ssn = ssn;
13810Sstevel@tonic-gate 		p->next = NULL;
13820Sstevel@tonic-gate 		(*nsets)++;
13830Sstevel@tonic-gate 		*slen += sizeof (uint32_t);
13840Sstevel@tonic-gate 	}
13850Sstevel@tonic-gate 	return (B_TRUE);
13860Sstevel@tonic-gate }
13870Sstevel@tonic-gate 
13880Sstevel@tonic-gate /*
13890Sstevel@tonic-gate  * Given a set of stream id - sequence number pairs, this routing creates
13900Sstevel@tonic-gate  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
13910Sstevel@tonic-gate  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
13920Sstevel@tonic-gate  * will add the IP/SCTP header.
13930Sstevel@tonic-gate  */
13940Sstevel@tonic-gate mblk_t *
13950Sstevel@tonic-gate sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
13960Sstevel@tonic-gate     uint_t nsets, uint32_t seglen)
13970Sstevel@tonic-gate {
13980Sstevel@tonic-gate 	mblk_t			*ftsn_mp;
13990Sstevel@tonic-gate 	sctp_chunk_hdr_t	*ch_hdr;
14000Sstevel@tonic-gate 	uint32_t		*advtsn;
14010Sstevel@tonic-gate 	uint16_t		schlen;
14020Sstevel@tonic-gate 	size_t			xtralen;
14030Sstevel@tonic-gate 	ftsn_entry_t		*ftsn_entry;
14043448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
14050Sstevel@tonic-gate 
14060Sstevel@tonic-gate 	seglen += sizeof (sctp_chunk_hdr_t);
14070Sstevel@tonic-gate 	if (fp->isv4)
14083448Sdh155122 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra;
14090Sstevel@tonic-gate 	else
14103448Sdh155122 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra;
141111042SErik.Nordmark@Sun.COM 	ftsn_mp = allocb(xtralen + seglen, BPRI_MED);
14120Sstevel@tonic-gate 	if (ftsn_mp == NULL)
14130Sstevel@tonic-gate 		return (NULL);
14140Sstevel@tonic-gate 	ftsn_mp->b_rptr += xtralen;
14150Sstevel@tonic-gate 	ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
14160Sstevel@tonic-gate 
14170Sstevel@tonic-gate 	ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
14180Sstevel@tonic-gate 	ch_hdr->sch_id = CHUNK_FORWARD_TSN;
14190Sstevel@tonic-gate 	ch_hdr->sch_flags = 0;
14200Sstevel@tonic-gate 	/*
14210Sstevel@tonic-gate 	 * The cast here should not be an issue since seglen is
14220Sstevel@tonic-gate 	 * the length of the Forward TSN chunk.
14230Sstevel@tonic-gate 	 */
14240Sstevel@tonic-gate 	schlen = (uint16_t)seglen;
14250Sstevel@tonic-gate 	U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
14260Sstevel@tonic-gate 
14270Sstevel@tonic-gate 	advtsn = (uint32_t *)(ch_hdr + 1);
14280Sstevel@tonic-gate 	U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
14290Sstevel@tonic-gate 	ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
14300Sstevel@tonic-gate 	while (nsets > 0) {
14310Sstevel@tonic-gate 		ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
14320Sstevel@tonic-gate 		ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
14330Sstevel@tonic-gate 		ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
14340Sstevel@tonic-gate 		ftsn_entry++;
14350Sstevel@tonic-gate 		sets = sets->next;
14360Sstevel@tonic-gate 		nsets--;
14370Sstevel@tonic-gate 	}
14380Sstevel@tonic-gate 	return (ftsn_mp);
14390Sstevel@tonic-gate }
14400Sstevel@tonic-gate 
14410Sstevel@tonic-gate /*
14420Sstevel@tonic-gate  * Given a starting message, the routine steps through all the
14430Sstevel@tonic-gate  * messages whose TSN is less than sctp->sctp_adv_pap and creates
14440Sstevel@tonic-gate  * ftsn sets. The ftsn sets is then used to create an Forward TSN
14450Sstevel@tonic-gate  * chunk. All the messages, that have chunks that are included in the
14460Sstevel@tonic-gate  * ftsn sets, are flagged abandonded. If a message is partially sent
14470Sstevel@tonic-gate  * and is deemed abandoned, all remaining unsent chunks are marked
14480Sstevel@tonic-gate  * abandoned and are deducted from sctp_unsent.
14490Sstevel@tonic-gate  */
14500Sstevel@tonic-gate void
14510Sstevel@tonic-gate sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
14520Sstevel@tonic-gate     sctp_faddr_t *fp, uint32_t *seglen)
14530Sstevel@tonic-gate {
14540Sstevel@tonic-gate 	mblk_t		*mp1 = mp;
14550Sstevel@tonic-gate 	mblk_t		*mp_head = mp;
14560Sstevel@tonic-gate 	mblk_t		*meta_head = meta;
14570Sstevel@tonic-gate 	mblk_t		*head;
14580Sstevel@tonic-gate 	sctp_ftsn_set_t	*sets = NULL;
14590Sstevel@tonic-gate 	uint_t		nsets = 0;
14600Sstevel@tonic-gate 	uint16_t	clen;
14610Sstevel@tonic-gate 	sctp_data_hdr_t	*sdc;
14620Sstevel@tonic-gate 	uint32_t	sacklen;
14630Sstevel@tonic-gate 	uint32_t	adv_pap = sctp->sctp_adv_pap;
14640Sstevel@tonic-gate 	uint32_t	unsent = 0;
14650Sstevel@tonic-gate 	boolean_t	ubit;
14663448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
14670Sstevel@tonic-gate 
14680Sstevel@tonic-gate 	*seglen = sizeof (uint32_t);
14690Sstevel@tonic-gate 
14700Sstevel@tonic-gate 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
14710Sstevel@tonic-gate 	while (meta != NULL &&
14720Sstevel@tonic-gate 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
14730Sstevel@tonic-gate 		/*
14740Sstevel@tonic-gate 		 * Skip adding FTSN sets for un-ordered messages as they do
14750Sstevel@tonic-gate 		 * not have SSNs.
14760Sstevel@tonic-gate 		 */
14770Sstevel@tonic-gate 		ubit = SCTP_DATA_GET_UBIT(sdc);
14780Sstevel@tonic-gate 		if (!ubit &&
14790Sstevel@tonic-gate 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
14800Sstevel@tonic-gate 			meta = NULL;
14810Sstevel@tonic-gate 			sctp->sctp_adv_pap = adv_pap;
14820Sstevel@tonic-gate 			goto ftsn_done;
14830Sstevel@tonic-gate 		}
14840Sstevel@tonic-gate 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
14850Sstevel@tonic-gate 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
14860Sstevel@tonic-gate 			adv_pap = ntohl(sdc->sdh_tsn);
14870Sstevel@tonic-gate 			mp1 = mp1->b_next;
14880Sstevel@tonic-gate 		}
14890Sstevel@tonic-gate 		meta = meta->b_next;
14900Sstevel@tonic-gate 		if (meta != NULL) {
14910Sstevel@tonic-gate 			mp1 = meta->b_cont;
14920Sstevel@tonic-gate 			if (!SCTP_CHUNK_ISSENT(mp1))
14930Sstevel@tonic-gate 				break;
14940Sstevel@tonic-gate 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
14950Sstevel@tonic-gate 		}
14960Sstevel@tonic-gate 	}
14970Sstevel@tonic-gate ftsn_done:
14980Sstevel@tonic-gate 	/*
14990Sstevel@tonic-gate 	 * Can't compare with sets == NULL, since we don't add any
15000Sstevel@tonic-gate 	 * sets for un-ordered messages.
15010Sstevel@tonic-gate 	 */
15020Sstevel@tonic-gate 	if (meta == meta_head)
15030Sstevel@tonic-gate 		return;
15040Sstevel@tonic-gate 	*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
15050Sstevel@tonic-gate 	sctp_free_ftsn_set(sets);
15060Sstevel@tonic-gate 	if (*nmp == NULL)
15070Sstevel@tonic-gate 		return;
15080Sstevel@tonic-gate 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
15090Sstevel@tonic-gate 		sacklen = 0;
15100Sstevel@tonic-gate 	} else {
15110Sstevel@tonic-gate 		sacklen = sizeof (sctp_chunk_hdr_t) +
15120Sstevel@tonic-gate 		    sizeof (sctp_sack_chunk_t) +
15130Sstevel@tonic-gate 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
15140Sstevel@tonic-gate 		if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
15150Sstevel@tonic-gate 			/* piggybacked SACK doesn't fit */
15160Sstevel@tonic-gate 			sacklen = 0;
15170Sstevel@tonic-gate 		} else {
15180Sstevel@tonic-gate 			fp = sctp->sctp_lastdata;
15190Sstevel@tonic-gate 		}
15200Sstevel@tonic-gate 	}
1521252Svi117747 	head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
15220Sstevel@tonic-gate 	if (head == NULL) {
15230Sstevel@tonic-gate 		freemsg(*nmp);
15240Sstevel@tonic-gate 		*nmp = NULL;
15253448Sdh155122 		SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
15260Sstevel@tonic-gate 		return;
15270Sstevel@tonic-gate 	}
15280Sstevel@tonic-gate 	*seglen += sacklen;
15290Sstevel@tonic-gate 	*nmp = head;
15300Sstevel@tonic-gate 
15310Sstevel@tonic-gate 	/*
15320Sstevel@tonic-gate 	 * XXXNeed to optimise this, the reason it is done here is so
15330Sstevel@tonic-gate 	 * that we don't have to undo in case of failure.
15340Sstevel@tonic-gate 	 */
15350Sstevel@tonic-gate 	mp1 = mp_head;
15360Sstevel@tonic-gate 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
15370Sstevel@tonic-gate 	while (meta_head != NULL &&
15380Sstevel@tonic-gate 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
15390Sstevel@tonic-gate 		if (!SCTP_IS_MSG_ABANDONED(meta_head))
15400Sstevel@tonic-gate 			SCTP_MSG_SET_ABANDONED(meta_head);
15410Sstevel@tonic-gate 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
15420Sstevel@tonic-gate 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
15430Sstevel@tonic-gate 			if (!SCTP_CHUNK_ISACKED(mp1)) {
15440Sstevel@tonic-gate 				clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
15450Sstevel@tonic-gate 				SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
15460Sstevel@tonic-gate 				    meta_head);
15470Sstevel@tonic-gate 			}
15480Sstevel@tonic-gate 			mp1 = mp1->b_next;
15490Sstevel@tonic-gate 		}
15500Sstevel@tonic-gate 		while (mp1 != NULL) {
15510Sstevel@tonic-gate 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
15520Sstevel@tonic-gate 			if (!SCTP_CHUNK_ABANDONED(mp1)) {
15530Sstevel@tonic-gate 				ASSERT(!SCTP_CHUNK_ISSENT(mp1));
15540Sstevel@tonic-gate 				unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
15550Sstevel@tonic-gate 				SCTP_ABANDON_CHUNK(mp1);
15560Sstevel@tonic-gate 			}
15570Sstevel@tonic-gate 			mp1 = mp1->b_next;
15580Sstevel@tonic-gate 		}
15590Sstevel@tonic-gate 		meta_head = meta_head->b_next;
15600Sstevel@tonic-gate 		if (meta_head != NULL) {
15610Sstevel@tonic-gate 			mp1 = meta_head->b_cont;
15620Sstevel@tonic-gate 			if (!SCTP_CHUNK_ISSENT(mp1))
15630Sstevel@tonic-gate 				break;
15640Sstevel@tonic-gate 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
15650Sstevel@tonic-gate 		}
15660Sstevel@tonic-gate 	}
15670Sstevel@tonic-gate 	if (unsent > 0) {
15680Sstevel@tonic-gate 		ASSERT(sctp->sctp_unsent >= unsent);
15690Sstevel@tonic-gate 		sctp->sctp_unsent -= unsent;
15700Sstevel@tonic-gate 		/*
15710Sstevel@tonic-gate 		 * Update ULP the amount of queued data, which is
15720Sstevel@tonic-gate 		 * sent-unack'ed + unsent.
15730Sstevel@tonic-gate 		 */
15748348SEric.Yu@Sun.COM 		if (!SCTP_IS_DETACHED(sctp))
15758348SEric.Yu@Sun.COM 			SCTP_TXQ_UPDATE(sctp);
15760Sstevel@tonic-gate 	}
15770Sstevel@tonic-gate }
15780Sstevel@tonic-gate 
15790Sstevel@tonic-gate /*
15800Sstevel@tonic-gate  * This function steps through messages starting at meta and checks if
15810Sstevel@tonic-gate  * the message is abandoned. It stops when it hits an unsent chunk or
15820Sstevel@tonic-gate  * a message that has all its chunk acked. This is the only place
15830Sstevel@tonic-gate  * where the sctp_adv_pap is moved forward to indicated abandoned
15840Sstevel@tonic-gate  * messages.
15850Sstevel@tonic-gate  */
15860Sstevel@tonic-gate void
15870Sstevel@tonic-gate sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
15880Sstevel@tonic-gate {
15890Sstevel@tonic-gate 	uint32_t	tsn = sctp->sctp_adv_pap;
15900Sstevel@tonic-gate 	sctp_data_hdr_t	*sdc;
15910Sstevel@tonic-gate 	sctp_msg_hdr_t	*msg_hdr;
15920Sstevel@tonic-gate 
15930Sstevel@tonic-gate 	ASSERT(mp != NULL);
15940Sstevel@tonic-gate 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
15950Sstevel@tonic-gate 	ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
15960Sstevel@tonic-gate 	msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
15970Sstevel@tonic-gate 	if (!SCTP_IS_MSG_ABANDONED(meta) &&
15980Sstevel@tonic-gate 	    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
15990Sstevel@tonic-gate 		return;
16000Sstevel@tonic-gate 	}
16010Sstevel@tonic-gate 	while (meta != NULL) {
16020Sstevel@tonic-gate 		while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
16030Sstevel@tonic-gate 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
16040Sstevel@tonic-gate 			tsn = ntohl(sdc->sdh_tsn);
16050Sstevel@tonic-gate 			mp = mp->b_next;
16060Sstevel@tonic-gate 		}
16070Sstevel@tonic-gate 		if (mp != NULL)
16080Sstevel@tonic-gate 			break;
16090Sstevel@tonic-gate 		/*
16100Sstevel@tonic-gate 		 * We continue checking for successive messages only if there
16110Sstevel@tonic-gate 		 * is a chunk marked for retransmission. Else, we might
16120Sstevel@tonic-gate 		 * end up sending FTSN prematurely for chunks that have been
16130Sstevel@tonic-gate 		 * sent, but not yet acked.
16140Sstevel@tonic-gate 		 */
16150Sstevel@tonic-gate 		if ((meta = meta->b_next) != NULL) {
16160Sstevel@tonic-gate 			msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
16170Sstevel@tonic-gate 			if (!SCTP_IS_MSG_ABANDONED(meta) &&
16180Sstevel@tonic-gate 			    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
16190Sstevel@tonic-gate 				break;
16200Sstevel@tonic-gate 			}
16210Sstevel@tonic-gate 			for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
16220Sstevel@tonic-gate 				if (!SCTP_CHUNK_ISSENT(mp)) {
16230Sstevel@tonic-gate 					sctp->sctp_adv_pap = tsn;
16240Sstevel@tonic-gate 					return;
16250Sstevel@tonic-gate 				}
16260Sstevel@tonic-gate 				if (SCTP_CHUNK_WANT_REXMIT(mp))
16270Sstevel@tonic-gate 					break;
16280Sstevel@tonic-gate 			}
16290Sstevel@tonic-gate 			if (mp == NULL)
16300Sstevel@tonic-gate 				break;
16310Sstevel@tonic-gate 		}
16320Sstevel@tonic-gate 	}
16330Sstevel@tonic-gate 	sctp->sctp_adv_pap = tsn;
16340Sstevel@tonic-gate }
16350Sstevel@tonic-gate 
16361735Skcpoon 
16371735Skcpoon /*
16381735Skcpoon  * Determine if we should bundle a data chunk with the chunk being
16391735Skcpoon  * retransmitted.  We bundle if
16401735Skcpoon  *
16411735Skcpoon  * - the chunk is sent to the same destination and unack'ed.
16421735Skcpoon  *
16431735Skcpoon  * OR
16441735Skcpoon  *
16451735Skcpoon  * - the chunk is unsent, i.e. new data.
16461735Skcpoon  */
16471735Skcpoon #define	SCTP_CHUNK_RX_CANBUNDLE(mp, fp)					\
16481735Skcpoon 	(!SCTP_CHUNK_ABANDONED((mp)) && 				\
16491735Skcpoon 	((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) &&	\
16501735Skcpoon 	!SCTP_CHUNK_ISACKED(mp))) ||					\
16511735Skcpoon 	(((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
16521735Skcpoon 	SCTP_CHUNK_FLAG_SENT)))
16531735Skcpoon 
16540Sstevel@tonic-gate /*
16550Sstevel@tonic-gate  * Retransmit first segment which hasn't been acked with cumtsn or send
16560Sstevel@tonic-gate  * a Forward TSN chunk, if appropriate.
16570Sstevel@tonic-gate  */
16580Sstevel@tonic-gate void
16590Sstevel@tonic-gate sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
16600Sstevel@tonic-gate {
16610Sstevel@tonic-gate 	mblk_t		*mp;
16620Sstevel@tonic-gate 	mblk_t		*nmp = NULL;
16630Sstevel@tonic-gate 	mblk_t		*head;
16640Sstevel@tonic-gate 	mblk_t		*meta = sctp->sctp_xmit_head;
16650Sstevel@tonic-gate 	mblk_t		*fill;
16660Sstevel@tonic-gate 	uint32_t	seglen = 0;
16670Sstevel@tonic-gate 	uint32_t	sacklen;
16680Sstevel@tonic-gate 	uint16_t	chunklen;
16690Sstevel@tonic-gate 	int		extra;
16700Sstevel@tonic-gate 	sctp_data_hdr_t	*sdc;
16710Sstevel@tonic-gate 	sctp_faddr_t	*fp;
16720Sstevel@tonic-gate 	uint32_t	adv_pap = sctp->sctp_adv_pap;
16730Sstevel@tonic-gate 	boolean_t	do_ftsn = B_FALSE;
16740Sstevel@tonic-gate 	boolean_t	ftsn_check = B_TRUE;
16751735Skcpoon 	uint32_t	first_ua_tsn;
16761735Skcpoon 	sctp_msg_hdr_t	*mhdr;
16773448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
16788154SGeorge.Shepherd@Sun.COM 	int		error;
16790Sstevel@tonic-gate 
16800Sstevel@tonic-gate 	while (meta != NULL) {
16810Sstevel@tonic-gate 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
16820Sstevel@tonic-gate 			uint32_t	tsn;
16830Sstevel@tonic-gate 
16840Sstevel@tonic-gate 			if (!SCTP_CHUNK_ISSENT(mp))
16850Sstevel@tonic-gate 				goto window_probe;
16860Sstevel@tonic-gate 			/*
16870Sstevel@tonic-gate 			 * We break in the following cases -
16880Sstevel@tonic-gate 			 *
16890Sstevel@tonic-gate 			 *	if the advanced peer ack point includes the next
16900Sstevel@tonic-gate 			 *	chunk to be retransmited - possibly the Forward
16910Sstevel@tonic-gate 			 * 	TSN was lost.
16920Sstevel@tonic-gate 			 *
16930Sstevel@tonic-gate 			 *	if we are PRSCTP aware and the next chunk to be
16940Sstevel@tonic-gate 			 *	retransmitted is now abandoned
16950Sstevel@tonic-gate 			 *
16960Sstevel@tonic-gate 			 *	if the next chunk to be retransmitted is for
16970Sstevel@tonic-gate 			 *	the dest on which the timer went off. (this
16980Sstevel@tonic-gate 			 *	message is not abandoned).
16990Sstevel@tonic-gate 			 *
17000Sstevel@tonic-gate 			 * We check for Forward TSN only for the first
17010Sstevel@tonic-gate 			 * eligible chunk to be retransmitted. The reason
17020Sstevel@tonic-gate 			 * being if the first eligible chunk is skipped (say
17030Sstevel@tonic-gate 			 * it was sent to a destination other than oldfp)
17040Sstevel@tonic-gate 			 * then we cannot advance the cum TSN via Forward
17050Sstevel@tonic-gate 			 * TSN chunk.
17060Sstevel@tonic-gate 			 *
17070Sstevel@tonic-gate 			 * Also, ftsn_check is B_TRUE only for the first
17080Sstevel@tonic-gate 			 * eligible chunk, it  will be B_FALSE for all
17090Sstevel@tonic-gate 			 * subsequent candidate messages for retransmission.
17100Sstevel@tonic-gate 			 */
17110Sstevel@tonic-gate 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
17120Sstevel@tonic-gate 			tsn = ntohl(sdc->sdh_tsn);
17130Sstevel@tonic-gate 			if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
17140Sstevel@tonic-gate 				if (sctp->sctp_prsctp_aware && ftsn_check) {
17150Sstevel@tonic-gate 					if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
17160Sstevel@tonic-gate 						ASSERT(sctp->sctp_prsctp_aware);
17170Sstevel@tonic-gate 						do_ftsn = B_TRUE;
17180Sstevel@tonic-gate 						goto out;
17190Sstevel@tonic-gate 					} else {
17200Sstevel@tonic-gate 						sctp_check_adv_ack_pt(sctp,
17210Sstevel@tonic-gate 						    meta, mp);
17220Sstevel@tonic-gate 						if (SEQ_GT(sctp->sctp_adv_pap,
17230Sstevel@tonic-gate 						    adv_pap)) {
17240Sstevel@tonic-gate 							do_ftsn = B_TRUE;
17250Sstevel@tonic-gate 							goto out;
17260Sstevel@tonic-gate 						}
17270Sstevel@tonic-gate 					}
17280Sstevel@tonic-gate 					ftsn_check = B_FALSE;
17290Sstevel@tonic-gate 				}
17300Sstevel@tonic-gate 				if (SCTP_CHUNK_DEST(mp) == oldfp)
17310Sstevel@tonic-gate 					goto out;
17320Sstevel@tonic-gate 			}
17330Sstevel@tonic-gate 		}
17340Sstevel@tonic-gate 		meta = meta->b_next;
17350Sstevel@tonic-gate 		if (meta != NULL && sctp->sctp_prsctp_aware) {
17361735Skcpoon 			mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
17370Sstevel@tonic-gate 
17380Sstevel@tonic-gate 			while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
17390Sstevel@tonic-gate 			    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
17400Sstevel@tonic-gate 				meta = meta->b_next;
17410Sstevel@tonic-gate 			}
17420Sstevel@tonic-gate 		}
17430Sstevel@tonic-gate 	}
17440Sstevel@tonic-gate window_probe:
17450Sstevel@tonic-gate 	/*
17460Sstevel@tonic-gate 	 * Retransmit fired for a destination which didn't have
17470Sstevel@tonic-gate 	 * any unacked data pending.
17480Sstevel@tonic-gate 	 */
17491932Svi117747 	if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) {
17500Sstevel@tonic-gate 		/*
17510Sstevel@tonic-gate 		 * Send a window probe. Inflate frwnd to allow
17520Sstevel@tonic-gate 		 * sending one segment.
17530Sstevel@tonic-gate 		 */
17541932Svi117747 		if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc)))
17550Sstevel@tonic-gate 			sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
17563795Skcpoon 
17571932Svi117747 		/* next TSN to send */
17581932Svi117747 		sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
17593795Skcpoon 
17603795Skcpoon 		/*
17613795Skcpoon 		 * The above sctp_frwnd adjustment is coarse.  The "changed"
17623795Skcpoon 		 * sctp_frwnd may allow us to send more than 1 packet.  So
17633795Skcpoon 		 * tell sctp_output() to send only 1 packet.
17643795Skcpoon 		 */
17653795Skcpoon 		sctp_output(sctp, 1);
17663795Skcpoon 
17671932Svi117747 		/* Last sent TSN */
17681932Svi117747 		sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
17691932Svi117747 		ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
17701932Svi117747 		sctp->sctp_zero_win_probe = B_TRUE;
17713448Sdh155122 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
17720Sstevel@tonic-gate 	}
17730Sstevel@tonic-gate 	return;
17740Sstevel@tonic-gate out:
17750Sstevel@tonic-gate 	/*
17763795Skcpoon 	 * After a time out, assume that everything has left the network.  So
17773795Skcpoon 	 * we can clear rxt_unacked for the original peer address.
17783795Skcpoon 	 */
17793795Skcpoon 	oldfp->rxt_unacked = 0;
17803795Skcpoon 
17813795Skcpoon 	/*
17823795Skcpoon 	 * If we were probing for zero window, don't adjust retransmission
17831932Svi117747 	 * variables, but the timer is still backed off.
17841932Svi117747 	 */
17851932Svi117747 	if (sctp->sctp_zero_win_probe) {
17861932Svi117747 		mblk_t	*pkt;
17871932Svi117747 		uint_t	pkt_len;
17881932Svi117747 
17891932Svi117747 		/*
17901932Svi117747 		 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
17911932Svi117747 		 * and sctp_rxt_maxtsn will specify the ZWP packet.
17921932Svi117747 		 */
17931932Svi117747 		fp = oldfp;
17941932Svi117747 		if (oldfp->state != SCTP_FADDRS_ALIVE)
17951932Svi117747 			fp = sctp_rotate_faddr(sctp, oldfp);
17961932Svi117747 		pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
17971932Svi117747 		if (pkt != NULL) {
17981932Svi117747 			ASSERT(pkt_len <= fp->sfa_pmss);
179911042SErik.Nordmark@Sun.COM 			sctp_set_iplen(sctp, pkt, fp->ixa);
180011042SErik.Nordmark@Sun.COM 			(void) conn_ip_output(pkt, fp->ixa);
180111042SErik.Nordmark@Sun.COM 			BUMP_LOCAL(sctp->sctp_opkts);
18021932Svi117747 		} else {
18033448Sdh155122 			SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
18041932Svi117747 		}
18053795Skcpoon 
18063795Skcpoon 		/*
18073795Skcpoon 		 * The strikes will be clear by sctp_faddr_alive() when the
18083795Skcpoon 		 * other side sends us an ack.
18093795Skcpoon 		 */
18101932Svi117747 		oldfp->strikes++;
18111932Svi117747 		sctp->sctp_strikes++;
18123795Skcpoon 
181310212SGeorge.Shepherd@Sun.COM 		SCTP_CALC_RXT(sctp, oldfp);
18141932Svi117747 		if (oldfp != fp && oldfp->suna != 0)
18151932Svi117747 			SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto);
18161932Svi117747 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
18173448Sdh155122 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
18181932Svi117747 		return;
18191932Svi117747 	}
18201932Svi117747 
18211932Svi117747 	/*
18220Sstevel@tonic-gate 	 * Enter slowstart for this destination
18230Sstevel@tonic-gate 	 */
18240Sstevel@tonic-gate 	oldfp->ssthresh = oldfp->cwnd / 2;
18250Sstevel@tonic-gate 	if (oldfp->ssthresh < 2 * oldfp->sfa_pmss)
18260Sstevel@tonic-gate 		oldfp->ssthresh = 2 * oldfp->sfa_pmss;
18270Sstevel@tonic-gate 	oldfp->cwnd = oldfp->sfa_pmss;
18280Sstevel@tonic-gate 	oldfp->pba = 0;
18290Sstevel@tonic-gate 	fp = sctp_rotate_faddr(sctp, oldfp);
18300Sstevel@tonic-gate 	ASSERT(fp != NULL);
18310Sstevel@tonic-gate 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
18320Sstevel@tonic-gate 
18331735Skcpoon 	first_ua_tsn = ntohl(sdc->sdh_tsn);
18340Sstevel@tonic-gate 	if (do_ftsn) {
18350Sstevel@tonic-gate 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
18360Sstevel@tonic-gate 		if (nmp == NULL) {
18370Sstevel@tonic-gate 			sctp->sctp_adv_pap = adv_pap;
18380Sstevel@tonic-gate 			goto restart_timer;
18390Sstevel@tonic-gate 		}
18400Sstevel@tonic-gate 		head = nmp;
18411735Skcpoon 		/*
18421735Skcpoon 		 * Move to the next unabandoned chunk. XXXCheck if meta will
18431735Skcpoon 		 * always be marked abandoned.
18441735Skcpoon 		 */
18451735Skcpoon 		while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
18461735Skcpoon 			meta = meta->b_next;
18470Sstevel@tonic-gate 		if (meta != NULL)
18481735Skcpoon 			mp = mp->b_cont;
18491735Skcpoon 		else
18501735Skcpoon 			mp = NULL;
18510Sstevel@tonic-gate 		goto try_bundle;
18520Sstevel@tonic-gate 	}
18530Sstevel@tonic-gate 	seglen = ntohs(sdc->sdh_len);
18540Sstevel@tonic-gate 	chunklen = seglen - sizeof (*sdc);
18550Sstevel@tonic-gate 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
18560Sstevel@tonic-gate 		extra = SCTP_ALIGN - extra;
18570Sstevel@tonic-gate 
18581735Skcpoon 	/* Find out if we need to piggyback SACK. */
18591735Skcpoon 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
18601735Skcpoon 		sacklen = 0;
18611735Skcpoon 	} else {
18621735Skcpoon 		sacklen = sizeof (sctp_chunk_hdr_t) +
18631735Skcpoon 		    sizeof (sctp_sack_chunk_t) +
18641735Skcpoon 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
18651735Skcpoon 		if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
18661735Skcpoon 			/* piggybacked SACK doesn't fit */
18671735Skcpoon 			sacklen = 0;
18681735Skcpoon 		} else {
18691735Skcpoon 			/*
18701735Skcpoon 			 * OK, we have room to send SACK back.  But we
18711735Skcpoon 			 * should send it back to the last fp where we
18721735Skcpoon 			 * receive data from, unless sctp_lastdata equals
18731735Skcpoon 			 * oldfp, then we should probably not send it
18741735Skcpoon 			 * back to that fp.  Also we should check that
18751735Skcpoon 			 * the fp is alive.
18761735Skcpoon 			 */
18771735Skcpoon 			if (sctp->sctp_lastdata != oldfp &&
18781735Skcpoon 			    sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) {
18791735Skcpoon 				fp = sctp->sctp_lastdata;
18801735Skcpoon 			}
18811735Skcpoon 		}
18821735Skcpoon 	}
18831735Skcpoon 
18840Sstevel@tonic-gate 	/*
18850Sstevel@tonic-gate 	 * Cancel RTT measurement if the retransmitted TSN is before the
18860Sstevel@tonic-gate 	 * TSN used for timimg.
18870Sstevel@tonic-gate 	 */
18880Sstevel@tonic-gate 	if (sctp->sctp_out_time != 0 &&
18890Sstevel@tonic-gate 	    SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
18900Sstevel@tonic-gate 		sctp->sctp_out_time = 0;
18910Sstevel@tonic-gate 	}
18920Sstevel@tonic-gate 	/* Clear the counter as the RTT calculation may be off. */
18930Sstevel@tonic-gate 	fp->rtt_updates = 0;
18941735Skcpoon 	oldfp->rtt_updates = 0;
18950Sstevel@tonic-gate 
18961735Skcpoon 	/*
18971735Skcpoon 	 * After a timeout, we should change the current faddr so that
18981735Skcpoon 	 * new chunks will be sent to the alternate address.
18991735Skcpoon 	 */
19001735Skcpoon 	sctp_set_faddr_current(sctp, fp);
19010Sstevel@tonic-gate 
19020Sstevel@tonic-gate 	nmp = dupmsg(mp);
19030Sstevel@tonic-gate 	if (nmp == NULL)
19040Sstevel@tonic-gate 		goto restart_timer;
19050Sstevel@tonic-gate 	if (extra > 0) {
19064691Skcpoon 		fill = sctp_get_padding(sctp, extra);
19070Sstevel@tonic-gate 		if (fill != NULL) {
19080Sstevel@tonic-gate 			linkb(nmp, fill);
19090Sstevel@tonic-gate 			seglen += extra;
19100Sstevel@tonic-gate 		} else {
19110Sstevel@tonic-gate 			freemsg(nmp);
19120Sstevel@tonic-gate 			goto restart_timer;
19130Sstevel@tonic-gate 		}
19140Sstevel@tonic-gate 	}
19150Sstevel@tonic-gate 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
1916252Svi117747 	head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
19170Sstevel@tonic-gate 	if (head == NULL) {
19180Sstevel@tonic-gate 		freemsg(nmp);
19193448Sdh155122 		SCTP_KSTAT(sctps, sctp_rexmit_failed);
19200Sstevel@tonic-gate 		goto restart_timer;
19210Sstevel@tonic-gate 	}
19220Sstevel@tonic-gate 	seglen += sacklen;
19230Sstevel@tonic-gate 
19240Sstevel@tonic-gate 	SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
19250Sstevel@tonic-gate 
19260Sstevel@tonic-gate 	mp = mp->b_next;
19271735Skcpoon 
19280Sstevel@tonic-gate try_bundle:
19293795Skcpoon 	/* We can at least and at most send 1 packet at timeout. */
19300Sstevel@tonic-gate 	while (seglen < fp->sfa_pmss) {
19310Sstevel@tonic-gate 		int32_t new_len;
19320Sstevel@tonic-gate 
19331735Skcpoon 		/* Go through the list to find more chunks to be bundled. */
19340Sstevel@tonic-gate 		while (mp != NULL) {
19351735Skcpoon 			/* Check if the chunk can be bundled. */
19361735Skcpoon 			if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
19370Sstevel@tonic-gate 				break;
19380Sstevel@tonic-gate 			mp = mp->b_next;
19390Sstevel@tonic-gate 		}
19401735Skcpoon 		/* Go to the next message. */
19410Sstevel@tonic-gate 		if (mp == NULL) {
19421735Skcpoon 			for (meta = meta->b_next; meta != NULL;
19431735Skcpoon 			    meta = meta->b_next) {
19441735Skcpoon 				mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
19451735Skcpoon 
19461735Skcpoon 				if (SCTP_IS_MSG_ABANDONED(meta) ||
19471735Skcpoon 				    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
19481735Skcpoon 				    sctp)) {
19491735Skcpoon 					continue;
19501735Skcpoon 				}
19511735Skcpoon 
19521735Skcpoon 				mp = meta->b_cont;
19531735Skcpoon 				goto try_bundle;
19541735Skcpoon 			}
19558154SGeorge.Shepherd@Sun.COM 			/*
19568154SGeorge.Shepherd@Sun.COM 			 * Check if there is a new message which potentially
19578154SGeorge.Shepherd@Sun.COM 			 * could be bundled with this retransmission.
19588154SGeorge.Shepherd@Sun.COM 			 */
19598154SGeorge.Shepherd@Sun.COM 			meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error,
19608154SGeorge.Shepherd@Sun.COM 			    seglen, fp->sfa_pmss - seglen, NULL);
19618154SGeorge.Shepherd@Sun.COM 			if (error != 0 || meta == NULL) {
19628154SGeorge.Shepherd@Sun.COM 				/* No more chunk to be bundled. */
19638154SGeorge.Shepherd@Sun.COM 				break;
19648154SGeorge.Shepherd@Sun.COM 			} else {
19658154SGeorge.Shepherd@Sun.COM 				goto try_bundle;
19668154SGeorge.Shepherd@Sun.COM 			}
19670Sstevel@tonic-gate 		}
19681735Skcpoon 
19690Sstevel@tonic-gate 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
19701735Skcpoon 		new_len = ntohs(sdc->sdh_len);
19711735Skcpoon 		chunklen = new_len - sizeof (*sdc);
19720Sstevel@tonic-gate 
19731735Skcpoon 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
19741735Skcpoon 			extra = SCTP_ALIGN - extra;
19751735Skcpoon 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
19761735Skcpoon 			break;
19771735Skcpoon 		if ((nmp = dupmsg(mp)) == NULL)
19781735Skcpoon 			break;
19790Sstevel@tonic-gate 
19801735Skcpoon 		if (extra > 0) {
19814691Skcpoon 			fill = sctp_get_padding(sctp, extra);
19820Sstevel@tonic-gate 			if (fill != NULL) {
19831735Skcpoon 				linkb(nmp, fill);
19840Sstevel@tonic-gate 			} else {
19851735Skcpoon 				freemsg(nmp);
19860Sstevel@tonic-gate 				break;
19870Sstevel@tonic-gate 			}
19880Sstevel@tonic-gate 		}
19891735Skcpoon 		linkb(head, nmp);
19900Sstevel@tonic-gate 
19910Sstevel@tonic-gate 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
19920Sstevel@tonic-gate 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
19931735Skcpoon 
19941735Skcpoon 		seglen = new_len;
19950Sstevel@tonic-gate 		mp = mp->b_next;
19960Sstevel@tonic-gate 	}
19971735Skcpoon done_bundle:
19980Sstevel@tonic-gate 	if ((seglen > fp->sfa_pmss) && fp->isv4) {
19990Sstevel@tonic-gate 		ipha_t *iph = (ipha_t *)head->b_rptr;
20000Sstevel@tonic-gate 
20010Sstevel@tonic-gate 		/*
20020Sstevel@tonic-gate 		 * Path MTU is different from path we thought it would
20030Sstevel@tonic-gate 		 * be when we created chunks, or IP headers have grown.
20040Sstevel@tonic-gate 		 * Need to clear the DF bit.
20050Sstevel@tonic-gate 		 */
20060Sstevel@tonic-gate 		iph->ipha_fragment_offset_and_flags = 0;
20070Sstevel@tonic-gate 	}
20083795Skcpoon 	fp->rxt_unacked += seglen;
20093795Skcpoon 
20100Sstevel@tonic-gate 	dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
20110Sstevel@tonic-gate 	    "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
20121676Sjpk 	    seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
20131676Sjpk 	    (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
20140Sstevel@tonic-gate 
20151735Skcpoon 	sctp->sctp_rexmitting = B_TRUE;
20161735Skcpoon 	sctp->sctp_rxt_nxttsn = first_ua_tsn;
20171735Skcpoon 	sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
201811042SErik.Nordmark@Sun.COM 	sctp_set_iplen(sctp, head, fp->ixa);
201911042SErik.Nordmark@Sun.COM 	(void) conn_ip_output(head, fp->ixa);
202011042SErik.Nordmark@Sun.COM 	BUMP_LOCAL(sctp->sctp_opkts);
20210Sstevel@tonic-gate 
20220Sstevel@tonic-gate 	/*
20231735Skcpoon 	 * Restart the oldfp timer with exponential backoff and
20241735Skcpoon 	 * the new fp timer for the retransmitted chunks.
20250Sstevel@tonic-gate 	 */
20260Sstevel@tonic-gate restart_timer:
20270Sstevel@tonic-gate 	oldfp->strikes++;
20280Sstevel@tonic-gate 	sctp->sctp_strikes++;
202910212SGeorge.Shepherd@Sun.COM 	SCTP_CALC_RXT(sctp, oldfp);
20304818Skcpoon 	/*
20314818Skcpoon 	 * If there is still some data in the oldfp, restart the
20324818Skcpoon 	 * retransmission timer.  If there is no data, the heartbeat will
20334818Skcpoon 	 * continue to run so it will do its job in checking the reachability
20344818Skcpoon 	 * of the oldfp.
20354818Skcpoon 	 */
20364818Skcpoon 	if (oldfp != fp && oldfp->suna != 0)
20370Sstevel@tonic-gate 		SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto);
20381735Skcpoon 
20391735Skcpoon 	/*
20401735Skcpoon 	 * Should we restart the timer of the new fp?  If there is
20411735Skcpoon 	 * outstanding data to the new fp, the timer should be
20421735Skcpoon 	 * running already.  So restarting it means that the timer
20431735Skcpoon 	 * will fire later for those outstanding data.  But if
20441735Skcpoon 	 * we don't restart it, the timer will fire too early for the
20451735Skcpoon 	 * just retransmitted chunks to the new fp.  The reason is that we
20461735Skcpoon 	 * don't keep a timestamp on when a chunk is retransmitted.
20471735Skcpoon 	 * So when the timer fires, it will just search for the
20481735Skcpoon 	 * chunk with the earliest TSN sent to new fp.  This probably
20491735Skcpoon 	 * is the chunk we just retransmitted.  So for now, let's
20501735Skcpoon 	 * be conservative and restart the timer of the new fp.
20511735Skcpoon 	 */
20521735Skcpoon 	SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
20534818Skcpoon 
2054*11066Srafael.vanoni@sun.com 	sctp->sctp_active = ddi_get_lbolt64();
20550Sstevel@tonic-gate }
20560Sstevel@tonic-gate 
20570Sstevel@tonic-gate /*
20581735Skcpoon  * This function is called by sctp_ss_rexmit() to create a packet
20591735Skcpoon  * to be retransmitted to the given fp.  The given meta and mp
20601735Skcpoon  * parameters are respectively the sctp_msg_hdr_t and the mblk of the
20613795Skcpoon  * first chunk to be retransmitted.  This is also called when we want
20621932Svi117747  * to retransmit a zero window probe from sctp_rexmit() or when we
20631932Svi117747  * want to retransmit the zero window probe after the window has
20641932Svi117747  * opened from sctp_got_sack().
20651735Skcpoon  */
20661932Svi117747 mblk_t *
20671735Skcpoon sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
20681735Skcpoon     uint_t *packet_len)
20691735Skcpoon {
20701735Skcpoon 	uint32_t	seglen = 0;
20711735Skcpoon 	uint16_t	chunklen;
20721735Skcpoon 	int		extra;
20731735Skcpoon 	mblk_t		*nmp;
20741735Skcpoon 	mblk_t		*head;
20751735Skcpoon 	mblk_t		*fill;
20761735Skcpoon 	sctp_data_hdr_t	*sdc;
20771735Skcpoon 	sctp_msg_hdr_t	*mhdr;
20781735Skcpoon 
20791735Skcpoon 	sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
20801735Skcpoon 	seglen = ntohs(sdc->sdh_len);
20811735Skcpoon 	chunklen = seglen - sizeof (*sdc);
20821735Skcpoon 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
20831735Skcpoon 		extra = SCTP_ALIGN - extra;
20841735Skcpoon 
20851735Skcpoon 	nmp = dupmsg(*mp);
20861735Skcpoon 	if (nmp == NULL)
20871735Skcpoon 		return (NULL);
20881735Skcpoon 	if (extra > 0) {
20894691Skcpoon 		fill = sctp_get_padding(sctp, extra);
20901735Skcpoon 		if (fill != NULL) {
20911735Skcpoon 			linkb(nmp, fill);
20921735Skcpoon 			seglen += extra;
20931735Skcpoon 		} else {
20941735Skcpoon 			freemsg(nmp);
20951735Skcpoon 			return (NULL);
20961735Skcpoon 		}
20971735Skcpoon 	}
20981735Skcpoon 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
20991735Skcpoon 	head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
21001735Skcpoon 	if (head == NULL) {
21011735Skcpoon 		freemsg(nmp);
21021735Skcpoon 		return (NULL);
21031735Skcpoon 	}
21041735Skcpoon 	SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
21051932Svi117747 	/*
21061932Svi117747 	 * Don't update the TSN if we are doing a Zero Win Probe.
21071932Svi117747 	 */
21081932Svi117747 	if (!sctp->sctp_zero_win_probe)
21091932Svi117747 		sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
21101735Skcpoon 	*mp = (*mp)->b_next;
21111735Skcpoon 
21121735Skcpoon try_bundle:
21131735Skcpoon 	while (seglen < fp->sfa_pmss) {
21141735Skcpoon 		int32_t new_len;
21151735Skcpoon 
21161735Skcpoon 		/*
21171735Skcpoon 		 * Go through the list to find more chunks to be bundled.
21181735Skcpoon 		 * We should only retransmit sent by unack'ed chunks.  Since
21191735Skcpoon 		 * they were sent before, the peer's receive window should
21201735Skcpoon 		 * be able to receive them.
21211735Skcpoon 		 */
21221735Skcpoon 		while (*mp != NULL) {
21231735Skcpoon 			/* Check if the chunk can be bundled. */
21241735Skcpoon 			if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
21251735Skcpoon 				break;
21261735Skcpoon 			*mp = (*mp)->b_next;
21271735Skcpoon 		}
21281735Skcpoon 		/* Go to the next message. */
21291735Skcpoon 		if (*mp == NULL) {
21301735Skcpoon 			for (*meta = (*meta)->b_next; *meta != NULL;
21311735Skcpoon 			    *meta = (*meta)->b_next) {
21321735Skcpoon 				mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
21331735Skcpoon 
21341735Skcpoon 				if (SCTP_IS_MSG_ABANDONED(*meta) ||
21351735Skcpoon 				    SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
21361735Skcpoon 				    sctp)) {
21371735Skcpoon 					continue;
21381735Skcpoon 				}
21391735Skcpoon 
21401735Skcpoon 				*mp = (*meta)->b_cont;
21411735Skcpoon 				goto try_bundle;
21421735Skcpoon 			}
21431735Skcpoon 			/* No more chunk to be bundled. */
21441735Skcpoon 			break;
21451735Skcpoon 		}
21461735Skcpoon 
21471735Skcpoon 		sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
21481735Skcpoon 		/* Don't bundle chunks beyond sctp_rxt_maxtsn. */
21491735Skcpoon 		if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
21501735Skcpoon 			break;
21511735Skcpoon 		new_len = ntohs(sdc->sdh_len);
21521735Skcpoon 		chunklen = new_len - sizeof (*sdc);
21531735Skcpoon 
21541735Skcpoon 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
21551735Skcpoon 			extra = SCTP_ALIGN - extra;
21561735Skcpoon 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
21571735Skcpoon 			break;
21581735Skcpoon 		if ((nmp = dupmsg(*mp)) == NULL)
21591735Skcpoon 			break;
21601735Skcpoon 
21611735Skcpoon 		if (extra > 0) {
21624691Skcpoon 			fill = sctp_get_padding(sctp, extra);
21631735Skcpoon 			if (fill != NULL) {
21641735Skcpoon 				linkb(nmp, fill);
21651735Skcpoon 			} else {
21661735Skcpoon 				freemsg(nmp);
21671735Skcpoon 				break;
21681735Skcpoon 			}
21691735Skcpoon 		}
21701735Skcpoon 		linkb(head, nmp);
21711735Skcpoon 
21721735Skcpoon 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
21731735Skcpoon 		SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
21741932Svi117747 		/*
21751932Svi117747 		 * Don't update the TSN if we are doing a Zero Win Probe.
21761932Svi117747 		 */
21771932Svi117747 		if (!sctp->sctp_zero_win_probe)
21781932Svi117747 			sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
21791735Skcpoon 
21801735Skcpoon 		seglen = new_len;
21811735Skcpoon 		*mp = (*mp)->b_next;
21821735Skcpoon 	}
21831735Skcpoon 	*packet_len = seglen;
21843795Skcpoon 	fp->rxt_unacked += seglen;
21851735Skcpoon 	return (head);
21861735Skcpoon }
21871735Skcpoon 
21881735Skcpoon /*
21891735Skcpoon  * sctp_ss_rexmit() is called when we get a SACK after a timeout which
21901735Skcpoon  * advances the cum_tsn but the cum_tsn is still less than what we have sent
21911735Skcpoon  * (sctp_rxt_maxtsn) at the time of the timeout.  This SACK is a "partial"
21921735Skcpoon  * SACK.  We retransmit unacked chunks without having to wait for another
21931735Skcpoon  * timeout.  The rationale is that the SACK should not be "partial" if all the
21941735Skcpoon  * lost chunks have been retransmitted.  Since the SACK is "partial,"
21951735Skcpoon  * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
21961735Skcpoon  * be missing.  It is better for us to retransmit them now instead
21971735Skcpoon  * of waiting for a timeout.
21981735Skcpoon  */
21991735Skcpoon void
22001735Skcpoon sctp_ss_rexmit(sctp_t *sctp)
22011735Skcpoon {
22021735Skcpoon 	mblk_t		*meta;
22031735Skcpoon 	mblk_t		*mp;
22041735Skcpoon 	mblk_t		*pkt;
22051735Skcpoon 	sctp_faddr_t	*fp;
22061735Skcpoon 	uint_t		pkt_len;
22071735Skcpoon 	uint32_t	tot_wnd;
22081735Skcpoon 	sctp_data_hdr_t	*sdc;
22091735Skcpoon 	int		burst;
22103448Sdh155122 	sctp_stack_t	*sctps = sctp->sctp_sctps;
22111735Skcpoon 
22121932Svi117747 	ASSERT(!sctp->sctp_zero_win_probe);
22131932Svi117747 
22141735Skcpoon 	/*
22151735Skcpoon 	 * If the last cum ack is smaller than what we have just
22161735Skcpoon 	 * retransmitted, simply return.
22171735Skcpoon 	 */
22181735Skcpoon 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
22191735Skcpoon 		sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
22201735Skcpoon 	else
22211735Skcpoon 		return;
22221735Skcpoon 	ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
22231735Skcpoon 
22241735Skcpoon 	/*
22251735Skcpoon 	 * After a timer fires, sctp_current should be set to the new
22261735Skcpoon 	 * fp where the retransmitted chunks are sent.
22271735Skcpoon 	 */
22281735Skcpoon 	fp = sctp->sctp_current;
22291735Skcpoon 
22301735Skcpoon 	/*
22313795Skcpoon 	 * Since we are retransmitting, we only need to use cwnd to determine
22323795Skcpoon 	 * how much we can send as we were allowed (by peer's receive window)
22333795Skcpoon 	 * to send those retransmitted chunks previously when they are first
22343795Skcpoon 	 * sent.  If we record how much we have retransmitted but
22353795Skcpoon 	 * unacknowledged using rxt_unacked, then the amount we can now send
22363795Skcpoon 	 * is equal to cwnd minus rxt_unacked.
22373795Skcpoon 	 *
22383795Skcpoon 	 * The field rxt_unacked is incremented when we retransmit a packet
22393795Skcpoon 	 * and decremented when we got a SACK acknowledging something.  And
22403795Skcpoon 	 * it is reset when the retransmission timer fires as we assume that
22413795Skcpoon 	 * all packets have left the network after a timeout.  If this
22423795Skcpoon 	 * assumption is not true, it means that after a timeout, we can
22433795Skcpoon 	 * get a SACK acknowledging more than rxt_unacked (its value only
22443795Skcpoon 	 * contains what is retransmitted when the timer fires).  So
22453795Skcpoon 	 * rxt_unacked will become very big (it is an unsiged int so going
22463795Skcpoon 	 * negative means that the value is huge).  This is the reason we
22473795Skcpoon 	 * always send at least 1 MSS bytes.
22483795Skcpoon 	 *
22493795Skcpoon 	 * The reason why we do not have an accurate count is that we
22503795Skcpoon 	 * only know how many packets are outstanding (using the TSN numbers).
22513795Skcpoon 	 * But we do not know how many bytes those packets contain.  To
22523795Skcpoon 	 * have an accurate count, we need to walk through the send list.
22533795Skcpoon 	 * As it is not really important to have an accurate count during
22543795Skcpoon 	 * retransmission, we skip this walk to save some time.  This should
22553795Skcpoon 	 * not make the retransmission too aggressive to cause congestion.
22561735Skcpoon 	 */
22573795Skcpoon 	if (fp->cwnd <= fp->rxt_unacked)
22583795Skcpoon 		tot_wnd = fp->sfa_pmss;
22591735Skcpoon 	else
22603795Skcpoon 		tot_wnd = fp->cwnd - fp->rxt_unacked;
22611735Skcpoon 
22621735Skcpoon 	/* Find the first unack'ed chunk */
22631735Skcpoon 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
22641735Skcpoon 		sctp_msg_hdr_t	*mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
22651735Skcpoon 
22661735Skcpoon 		if (SCTP_IS_MSG_ABANDONED(meta) ||
22671735Skcpoon 		    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
22681735Skcpoon 			continue;
22691735Skcpoon 		}
22701735Skcpoon 
22711735Skcpoon 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
22721735Skcpoon 			/* Again, this may not be possible */
22731735Skcpoon 			if (!SCTP_CHUNK_ISSENT(mp))
22741735Skcpoon 				return;
22751735Skcpoon 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
22761735Skcpoon 			if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
22771735Skcpoon 				goto found_msg;
22781735Skcpoon 		}
22791735Skcpoon 	}
22801735Skcpoon 
22811735Skcpoon 	/* Everything is abandoned... */
22821735Skcpoon 	return;
22831735Skcpoon 
22841735Skcpoon found_msg:
22851735Skcpoon 	if (!fp->timer_running)
22861735Skcpoon 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
22871735Skcpoon 	pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
22881735Skcpoon 	if (pkt == NULL) {
22893448Sdh155122 		SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
22901735Skcpoon 		return;
22911735Skcpoon 	}
22921735Skcpoon 	if ((pkt_len > fp->sfa_pmss) && fp->isv4) {
22931735Skcpoon 		ipha_t	*iph = (ipha_t *)pkt->b_rptr;
22941735Skcpoon 
22951735Skcpoon 		/*
22961735Skcpoon 		 * Path MTU is different from path we thought it would
22971735Skcpoon 		 * be when we created chunks, or IP headers have grown.
22981735Skcpoon 		 *  Need to clear the DF bit.
22991735Skcpoon 		 */
23001735Skcpoon 		iph->ipha_fragment_offset_and_flags = 0;
23011735Skcpoon 	}
230211042SErik.Nordmark@Sun.COM 	sctp_set_iplen(sctp, pkt, fp->ixa);
230311042SErik.Nordmark@Sun.COM 	(void) conn_ip_output(pkt, fp->ixa);
230411042SErik.Nordmark@Sun.COM 	BUMP_LOCAL(sctp->sctp_opkts);
23051735Skcpoon 
23061735Skcpoon 	/* Check and see if there is more chunk to be retransmitted. */
23071735Skcpoon 	if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss ||
23081735Skcpoon 	    meta == NULL)
23091735Skcpoon 		return;
23101735Skcpoon 	if (mp == NULL)
23111735Skcpoon 		meta = meta->b_next;
23121735Skcpoon 	if (meta == NULL)
23131735Skcpoon 		return;
23141735Skcpoon 
23151735Skcpoon 	/* Retransmit another packet if the window allows. */
23163448Sdh155122 	for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1;
23171735Skcpoon 	    meta != NULL && burst > 0; meta = meta->b_next, burst--) {
23181735Skcpoon 		if (mp == NULL)
23191735Skcpoon 			mp = meta->b_cont;
23201735Skcpoon 		for (; mp != NULL; mp = mp->b_next) {
23211735Skcpoon 			/* Again, this may not be possible */
23221735Skcpoon 			if (!SCTP_CHUNK_ISSENT(mp))
23231735Skcpoon 				return;
23241735Skcpoon 			if (!SCTP_CHUNK_ISACKED(mp))
23251735Skcpoon 				goto found_msg;
23261735Skcpoon 		}
23271735Skcpoon 	}
23281735Skcpoon }
2329