xref: /csrg-svn/sys/netiso/tp_subr.c (revision 51249)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)tp_subr.c	7.16 (Berkeley) 10/02/91
8  */
9 
10 /***********************************************************
11 		Copyright IBM Corporation 1987
12 
13                       All Rights Reserved
14 
15 Permission to use, copy, modify, and distribute this software and its
16 documentation for any purpose and without fee is hereby granted,
17 provided that the above copyright notice appear in all copies and that
18 both that copyright notice and this permission notice appear in
19 supporting documentation, and that the name of IBM not be
20 used in advertising or publicity pertaining to distribution of the
21 software without specific, written prior permission.
22 
23 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
24 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
25 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
26 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
27 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
28 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
29 SOFTWARE.
30 
31 ******************************************************************/
32 
33 /*
34  * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
35  */
36 /*
37  * ARGO TP
38  *
39  * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $
40  * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $
41  *
42  * The main work of data transfer is done here.
43  * These routines are called from tp.trans.
44  * They include the routines that check the validity of acks and Xacks,
45  * (tp_goodack() and tp_goodXack() )
46  * take packets from socket buffers and send them (tp_send()),
47  * drop the data from the socket buffers (tp_sbdrop()),
48  * and put incoming packet data into socket buffers (tp_stash()).
49  */
50 
51 #include "param.h"
52 #include "mbuf.h"
53 #include "socket.h"
54 #include "socketvar.h"
55 #include "protosw.h"
56 #include "errno.h"
57 #include "types.h"
58 #include "time.h"
59 #include "kernel.h"
60 
61 #include "tp_ip.h"
62 #include "iso.h"
63 #include "argo_debug.h"
64 #include "tp_timer.h"
65 #include "tp_param.h"
66 #include "tp_stat.h"
67 #include "tp_pcb.h"
68 #include "tp_tpdu.h"
69 #include "tp_trace.h"
70 #include "tp_meas.h"
71 #include "tp_seq.h"
72 
73 int		tp_emit(), tp_sbdrop();
74 int		tprexmtthresh = 3;
75 void	tp_send();
76 
77 /*
78  * CALLED FROM:
79  *	tp.trans, when an XAK arrives
80  * FUNCTION and ARGUMENTS:
81  * 	Determines if the sequence number (seq) from the XAK
82  * 	acks anything new.  If so, drop the appropriate tpdu
83  * 	from the XPD send queue.
84  * RETURN VALUE:
85  * 	Returns 1 if it did this, 0 if the ack caused no action.
86  */
87 int
88 tp_goodXack(tpcb, seq)
89 	struct tp_pcb	*tpcb;
90 	SeqNum 			seq;
91 {
92 
93 	IFTRACE(D_XPD)
94 		tptraceTPCB(TPPTgotXack,
95 			seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
96 			tpcb->tp_snduna);
97 	ENDTRACE
98 
99 	if ( seq == tpcb->tp_Xuna ) {
100 			tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
101 
102 			/* DROP 1 packet from the Xsnd socket buf - just so happens
103 			 * that only one packet can be there at any time
104 			 * so drop the whole thing.  If you allow > 1 packet
105 			 * the socket buffer, then you'll have to keep
106 			 * track of how many characters went w/ each XPD tpdu, so this
107 			 * will get messier
108 			 */
109 			IFDEBUG(D_XPD)
110 				dump_mbuf(tpcb->tp_Xsnd.sb_mb,
111 					"tp_goodXack Xsnd before sbdrop");
112 			ENDDEBUG
113 
114 			IFTRACE(D_XPD)
115 				tptraceTPCB(TPPTmisc,
116 					"goodXack: dropping cc ",
117 					(int)(tpcb->tp_Xsnd.sb_cc),
118 					0,0,0);
119 			ENDTRACE
120 			sbdroprecord(&tpcb->tp_Xsnd);
121 			return 1;
122 	}
123 	return 0;
124 }
125 
126 /*
127  * CALLED FROM:
128  *  tp_good_ack()
129  * FUNCTION and ARGUMENTS:
130  *  updates
131  *  smoothed average round trip time (*rtt)
132  *  roundtrip time variance (*rtv) - actually deviation, not variance
133  *  given the new value (diff)
134  * RETURN VALUE:
135  * void
136  */
137 
138 void
139 tp_rtt_rtv(tpcb)
140 register struct tp_pcb *tpcb;
141 {
142 	int new, old = tpcb->tp_dt_ticks;
143 	int delta, elapsed = tick - tpcb->tp_rttemit;
144 
145 	if (tpcb->tp_rtt != 0) {
146 		/*
147 		 * rtt is the smoothed round trip time in machine clock ticks (hz).
148 		 * It is stored as a fixed point number, unscaled (unlike the tcp
149 		 * srtt).  The rationale here is that it is only significant to the
150 		 * nearest unit of slowtimo, which is at least 8 machine clock ticks
151 		 * so there is no need to scale.  The smoothing is done according
152 		 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
153 		 */
154 		delta = elapsed - tpcb->tp_rtt;
155 		if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
156 			tpcb->tp_rtt = 1;
157 		/*
158 		 * rtv is a smoothed accumulated mean difference, unscaled
159 		 * for reasons expressed above.
160 		 * It is smoothed with an alpha of .75, and the round trip timer
161 		 * will be set to rtt + 4*rtv, also as TCP does.
162 		 */
163 		if (delta < 0)
164 			delta = -delta;
165 		if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
166 			tpcb->tp_rtv = 1;
167 	} else {
168 		/*
169 		 * No rtt measurement yet - use the unsmoothed rtt.
170 		 * Set the variance to half the rtt (so our first
171 		 * retransmit happens at 3*rtt)
172 		 */
173 		tpcb->tp_rtt = elapsed;
174 		tpcb->tp_rtv = elapsed >> 1;
175 	}
176 	tpcb->tp_rttemit = 0;
177 	tpcb->tp_rxtshift = 0;
178 	/*
179 	 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
180 	 * Because of the way we do the smoothing, srtt and rttvar
181 	 * will each average +1/2 tick of bias.  When we compute
182 	 * the retransmit timer, we want 1/2 tick of rounding and
183 	 * 1 extra tick because of +-1/2 tick uncertainty in the
184 	 * firing of the timer.  The bias will give us exactly the
185 	 * 1.5 tick we need.  But, because the bias is
186 	 * statistical, we have to test that we don't drop below
187 	 * the minimum feasible timer (which is 2 ticks)."
188 	 */
189 	TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
190 		tpcb->tp_peer_acktime, 128 /* XXX */);
191 	IFTRACE(D_RTT)
192 		tptraceTPCB(TPPTmisc, "oldticks ,rtv, rtt, newticks",
193 			old, rtv, rtt, new);
194 	ENDTRACE
195 	tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
196 }
197 
198 /*
199  * CALLED FROM:
200  *  tp.trans when an AK arrives
201  * FUNCTION and ARGUMENTS:
202  * 	Given (cdt), the credit from the AK tpdu, and
203  *	(seq), the sequence number from the AK tpdu,
204  *  tp_goodack() determines if the AK acknowledges something in the send
205  * 	window, and if so, drops the appropriate packets from the retransmission
206  *  list, computes the round trip time, and updates the retransmission timer
207  *  based on the new smoothed round trip time.
208  * RETURN VALUE:
209  * 	Returns 1 if
210  * 	EITHER it actually acked something heretofore unacknowledged
211  * 	OR no news but the credit should be processed.
212  * 	If something heretofore unacked was acked with this sequence number,
213  * 	the appropriate tpdus are dropped from the retransmission control list,
214  * 	by calling tp_sbdrop().
215  * 	No need to see the tpdu itself.
216  */
217 int
218 tp_goodack(tpcb, cdt, seq, subseq)
219 	register struct tp_pcb	*tpcb;
220 	u_int					cdt;
221 	register SeqNum			seq;
222 	u_int					subseq;
223 {
224 	int 	old_fcredit;
225 	int 	bang = 0; 	/* bang --> ack for something heretofore unacked */
226 	u_int	bytes_acked;
227 
228 	IFDEBUG(D_ACKRECV)
229 		printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
230 			tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
231 	ENDDEBUG
232 	IFTRACE(D_ACKRECV)
233 		tptraceTPCB(TPPTgotack,
234 			seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
235 	ENDTRACE
236 
237 	IFPERF(tpcb)
238 		tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
239 	ENDPERF
240 
241 	if (seq == tpcb->tp_snduna) {
242 		if (subseq < tpcb->tp_r_subseq ||
243 			(subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
244 		discard_the_ack:
245 			IFDEBUG(D_ACKRECV)
246 				printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
247 					tpcb, subseq, tpcb->tp_r_subseq);
248 			ENDDEBUG
249 			goto done;
250 		}
251 		if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
252 			tpcb->tp_r_subseq = subseq;
253 			if (tpcb->tp_timer[TM_data_retrans] == 0)
254 				tpcb->tp_dupacks = 0;
255 			else if (++tpcb->tp_dupacks == tprexmtthresh) {
256 				/* partner went out of his way to signal with different
257 				   subsequences that he has the same lack of an expected
258 				   packet.  This may be an early indiciation of a loss */
259 
260 				SeqNum onxt = tpcb->tp_sndnxt;
261 				struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
262 				u_int win = min(tpcb->tp_fcredit,
263 							tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
264 				if (win < 2)
265 					win = 2;
266 				tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
267 				tpcb->tp_timer[TM_data_retrans] = 0;
268 				tpcb->tp_rttemit = 0;
269 				tpcb->tp_sndnxt = tpcb->tp_snduna;
270 				tpcb->tp_sndnxt_m = 0;
271 				tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
272 				tp_send(tpcb);
273 				tpcb->tp_cong_win = tpcb->tp_ssthresh +
274 					tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
275 				if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
276 					tpcb->tp_sndnxt = onxt;
277 					tpcb->tp_sndnxt_m = onxt_m;
278 				}
279 
280 			} else if (tpcb->tp_dupacks > tprexmtthresh) {
281 				tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
282 			}
283 			goto done;
284 		}
285 	} else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
286 		goto discard_the_ack;
287 	/*
288 	 * If the congestion window was inflated to account
289 	 * for the other side's cached packets, retract it.
290 	 */
291 	if (tpcb->tp_dupacks > tprexmtthresh &&
292 		tpcb->tp_cong_win > tpcb->tp_ssthresh)
293 			tpcb->tp_cong_win = tpcb->tp_ssthresh;
294 	tpcb->tp_r_subseq = subseq;
295 	old_fcredit = tpcb->tp_fcredit;
296 	tpcb->tp_fcredit = cdt;
297 	if (cdt > tpcb->tp_maxfcredit)
298 		tpcb->tp_maxfcredit = cdt;
299 	tpcb->tp_dupacks = 0;
300 
301 	if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
302 
303 		tpsbcheck(tpcb, 0);
304 		bytes_acked = tp_sbdrop(tpcb, seq);
305 		tpsbcheck(tpcb, 1);
306 		/*
307 		 * If transmit timer is running and timed sequence
308 		 * number was acked, update smoothed round trip time.
309 		 * Since we now have an rtt measurement, cancel the
310 		 * timer backoff (cf., Phil Karn's retransmit alg.).
311 		 * Recompute the initial retransmit timer.
312 		 */
313 		if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
314 			tp_rtt_rtv(tpcb);
315 		/*
316 		 * If all outstanding data is acked, stop retransmit timer.
317 		 * If there is more data to be acked, restart retransmit
318 		 * timer, using current (possibly backed-off) value.
319 		 * OSI combines the keepalive and persistance functions.
320 		 * So, there is no persistance timer per se, to restart.
321 		 */
322 		tpcb->tp_timer[TM_data_retrans] =
323 			(seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
324 		/*
325 		 * When new data is acked, open the congestion window.
326 		 * If the window gives us less than ssthresh packets
327 		 * in flight, open exponentially (maxseg per packet).
328 		 * Otherwise open linearly: maxseg per window
329 		 * (maxseg^2 / cwnd per packet), plus a constant
330 		 * fraction of a packet (maxseg/8) to help larger windows
331 		 * open quickly enough.
332 		 */
333 		{
334 			u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
335 
336 			incr = min(incr, bytes_acked);
337 			if (cw > tpcb->tp_ssthresh)
338 				incr = incr * incr / cw + incr / 8;
339 			tpcb->tp_cong_win =
340 				min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
341 		}
342 		tpcb->tp_snduna = seq;
343 		if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
344 				tpcb->tp_sndnxt = seq;
345 				tpcb->tp_sndnxt_m = 0;
346 		}
347 		bang++;
348 	}
349 
350 	if( cdt != 0 && old_fcredit == 0 ) {
351 		tpcb->tp_sendfcc = 1;
352 	}
353 	if (cdt == 0) {
354 		if (old_fcredit != 0)
355 			IncStat(ts_zfcdt);
356 		/* The following might mean that the window shrunk */
357 		if (tpcb->tp_timer[TM_data_retrans]) {
358 			tpcb->tp_timer[TM_data_retrans] = 0;
359 			tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
360 			if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
361 				tpcb->tp_sndnxt = tpcb->tp_snduna;
362 				tpcb->tp_sndnxt_m = 0;
363 			}
364 		}
365 	}
366 	tpcb->tp_fcredit = cdt;
367 	bang |= (old_fcredit < cdt);
368 
369 done:
370 	IFDEBUG(D_ACKRECV)
371 		printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
372 			bang, cdt, old_fcredit, tpcb->tp_cong_win);
373 	ENDDEBUG
374 	/* if (bang) XXXXX Very bad to remove this test, but somethings broken */
375 		tp_send(tpcb);
376 	return (bang);
377 }
378 
379 /*
380  * CALLED FROM:
381  *  tp_goodack()
382  * FUNCTION and ARGUMENTS:
383  *  drops everything up TO but not INCLUDING seq # (seq)
384  *  from the retransmission queue.
385  */
386 tp_sbdrop(tpcb, seq)
387 	register struct 	tp_pcb 			*tpcb;
388 	SeqNum					seq;
389 {
390 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
391 	register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
392 	int	oldcc = sb->sb_cc, oldi = i;
393 
394 	if (i >= tpcb->tp_seqhalf)
395 		printf("tp_spdropping too much -- should panic");
396 	while (i-- > 0)
397 		sbdroprecord(sb);
398 	IFDEBUG(D_ACKRECV)
399 		printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
400 			oldi, oldcc - sb->sb_cc, tpcb, seq);
401 	ENDDEBUG
402 	if (sb->sb_flags & SB_NOTIFY)
403 		sowwakeup(tpcb->tp_sock);
404 	return (oldcc - sb->sb_cc);
405 }
406 
407 /*
408  * CALLED FROM:
409  * 	tp.trans on user send request, arrival of AK and arrival of XAK
410  * FUNCTION and ARGUMENTS:
411  * 	Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
412  * 	Emits until a) runs out of data, or  b) runs into an XPD mark, or
413  * 			c) it hits seq number (highseq) limited by cong or credit.
414  *
415  * 	If you want XPD to buffer > 1 du per socket buffer, you can
416  * 	modifiy this to issue XPD tpdus also, but then it'll have
417  * 	to take some argument(s) to distinguish between the type of DU to
418  * 	hand tp_emit.
419  *
420  * 	When something is sent for the first time, its time-of-send
421  * 	is stashed (in system clock ticks rather than pf_slowtimo ticks).
422  *  When the ack arrives, the smoothed round-trip time is figured
423  *  using this value.
424  */
425 void
426 tp_send(tpcb)
427 	register struct tp_pcb	*tpcb;
428 {
429 	register int			len;
430 	register struct mbuf	*m;
431 	struct mbuf				*mb = 0;
432 	struct 	sockbuf			*sb = &tpcb->tp_sock->so_snd;
433 	unsigned int			eotsdu = 0;
434 	SeqNum					highseq, checkseq;
435 	int						idle, idleticks, off, cong_win;
436 #ifdef TP_PERF_MEAS
437 	int			 			send_start_time = tick;
438 	SeqNum					oldnxt = tpcb->tp_sndnxt;
439 #endif TP_PERF_MEAS
440 
441 	idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
442 	if (idle) {
443 		idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
444 		if (idleticks > tpcb->tp_dt_ticks)
445 			/*
446 			 * We have been idle for "a while" and no acks are
447 			 * expected to clock out any data we send --
448 			 * slow start to get ack "clock" running again.
449 			 */
450 			tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
451 	}
452 
453 	cong_win = tpcb->tp_cong_win;
454 	highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
455 	if (tpcb->tp_Xsnd.sb_mb)
456 		highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
457 
458 	IFDEBUG(D_DATA)
459 		printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
460 				tpcb, tpcb->tp_sndnxt, cong_win, highseq);
461 	ENDDEBUG
462 	IFTRACE(D_DATA)
463 		tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
464 			tpcb->tp_sndnew,  tpcb->tp_snduna, 0, 0);
465 		tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
466 			tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
467 	ENDTRACE
468 	IFTRACE(D_DATA)
469 		tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
470 			tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
471 	ENDTRACE
472 
473 	if (tpcb->tp_sndnxt_m)
474 		m = tpcb->tp_sndnxt_m;
475 	else {
476 		off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
477 		for (m = sb->sb_mb; m && off > 0; m = m->m_next)
478 			off--;
479 	}
480 send:
481 	/*
482 	 * Avoid silly window syndrome here . . . figure out how!
483 	 */
484 	checkseq = tpcb->tp_sndnum;
485 	if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
486 		checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
487 
488 	while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
489 
490 		eotsdu = (m->m_flags & M_EOR) != 0;
491 		len = m->m_pkthdr.len;
492 		if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
493 			len < (tpcb->tp_l_tpdusize / 2))
494 				break;  /* Nagle . . . . . */
495 		cong_win -= len;
496 		/* make a copy - mb goes into the retransmission list
497 		 * while m gets emitted.  m_copy won't copy a zero-length mbuf.
498 		 */
499 		mb = m;
500 		m = m_copy(mb, 0, M_COPYALL);
501 		if (m == MNULL)
502 				break;
503 		IFTRACE(D_STASH)
504 			tptraceTPCB( TPPTmisc,
505 				"tp_send mcopy nxt high eotsdu len",
506 				tpcb->tp_sndnxt, highseq, eotsdu, len);
507 		ENDTRACE
508 
509 		IFDEBUG(D_DATA)
510 			printf("tp_sending tpcb 0x%x nxt 0x%x\n",
511 				tpcb, tpcb->tp_sndnxt);
512 		ENDDEBUG
513 		/* when headers are precomputed, may need to fill
514 			   in checksum here */
515 		if (tpcb->tp_sock->so_error =
516 			tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
517 			/* error */
518 			break;
519 		}
520 		m = mb->m_nextpkt;
521 		tpcb->tp_sndnxt_m = m;
522 		if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
523 			SEQ_INC(tpcb, tpcb->tp_sndnew);
524 			/*
525 			 * Time this transmission if not a retransmission and
526 			 * not currently timing anything.
527 			 */
528 			if (tpcb->tp_rttemit == 0) {
529 				tpcb->tp_rttemit = tick;
530 				tpcb->tp_rttseq = tpcb->tp_sndnxt;
531 			}
532 			tpcb->tp_sndnxt = tpcb->tp_sndnew;
533 		} else
534 			SEQ_INC(tpcb, tpcb->tp_sndnxt);
535 		/*
536 		 * Set retransmit timer if not currently set.
537 		 * Initial value for retransmit timer is smoothed
538 		 * round-trip time + 2 * round-trip time variance.
539 		 * Initialize shift counter which is used for backoff
540 		 * of retransmit time.
541 		 */
542 		if (tpcb->tp_timer[TM_data_retrans] == 0) {
543 			tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
544 			tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
545 			tpcb->tp_rxtshift = 0;
546 		}
547 	}
548 	if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
549 		tpcb->tp_oktonagle = 0;
550 #ifdef TP_PERF_MEAS
551 	IFPERF(tpcb)
552 		{
553 			register int npkts;
554 			int	 elapsed = tick - send_start_time, *t;
555 			struct timeval now;
556 
557 			npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
558 
559 			if (npkts > 0)
560 				tpcb->tp_Nwindow++;
561 
562 			if (npkts > TP_PM_MAX)
563 				npkts = TP_PM_MAX;
564 
565 			t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
566 			*t += (t - elapsed) >> TP_RTT_ALPHA;
567 
568 			if (mb == 0) {
569 				IncPStat(tpcb, tps_win_lim_by_data[npkts] );
570 			} else {
571 				IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
572 				/* not true with congestion-window being used */
573 			}
574 			now.tv_sec = elapsed / hz;
575 			now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
576 			tpmeas( tpcb->tp_lref,
577 					TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
578 		}
579 	ENDPERF
580 #endif TP_PERF_MEAS
581 
582 
583 	IFTRACE(D_DATA)
584 		tptraceTPCB( TPPTmisc,
585 			"tp_send at end: new nxt eotsdu error",
586 			tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
587 
588 	ENDTRACE
589 }
590 
591 int TPNagleok;
592 int TPNagled;
593 
594 tp_packetize(tpcb, m, eotsdu)
595 register struct tp_pcb *tpcb;
596 register struct mbuf *m;
597 int eotsdu;
598 {
599 	register struct mbuf *n;
600 	register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
601 	int	maxsize = tpcb->tp_l_tpdusize
602 			- tp_headersize(DT_TPDU_type, tpcb)
603 			- (tpcb->tp_use_checksum?4:0) ;
604 	int totlen = m->m_pkthdr.len;
605 	struct mbuf *m_split();
606 	/*
607 	 * Pre-packetize the data in the sockbuf
608 	 * according to negotiated mtu.  Do it here
609 	 * where we can safely wait for mbufs.
610 	 *
611 	 * This presumes knowledge of sockbuf conventions.
612 	 * TODO: allocate space for header and fill it in (once!).
613 	 */
614 	IFDEBUG(D_DATA)
615 		printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
616 			maxsize, totlen, eotsdu, tpcb->tp_sndnum);
617 	ENDTRACE
618 	if (tpcb->tp_oktonagle) {
619 		if ((n = sb->sb_mb) == 0)
620 			panic("tp_packetize");
621 		while (n->m_act)
622 			n = n->m_act;
623 		if (n->m_flags & M_EOR)
624 			panic("tp_packetize 2");
625 		SEQ_INC(tpcb, tpcb->tp_sndnum);
626 		if (totlen + n->m_pkthdr.len < maxsize) {
627 			/* There is an unsent packet with space, combine data */
628 			struct mbuf *old_n = n;
629 			tpsbcheck(tpcb,3);
630 			n->m_pkthdr.len += totlen;
631 			while (n->m_next)
632 				n = n->m_next;
633 			sbcompress(sb, m, n);
634 			tpsbcheck(tpcb,4);
635 			n = old_n;
636 			TPNagled++;
637 			goto out;
638 		}
639 	}
640 	while (m) {
641 		n = m;
642 		if (totlen > maxsize) {
643 			if ((m = m_split(n, maxsize, M_WAIT)) == 0)
644 				panic("tp_packetize");
645 		} else
646 			m = 0;
647 		totlen -= maxsize;
648 		tpsbcheck(tpcb, 5);
649 		sbappendrecord(sb, n);
650 		tpsbcheck(tpcb, 6);
651 		SEQ_INC(tpcb, tpcb->tp_sndnum);
652 	}
653 out:
654 	if (eotsdu) {
655 		n->m_flags |= M_EOR;  /* XXX belongs at end */
656 		tpcb->tp_oktonagle = 0;
657 	} else {
658 		SEQ_DEC(tpcb, tpcb->tp_sndnum);
659 		tpcb->tp_oktonagle = 1;
660 		TPNagleok++;
661 	}
662 	IFDEBUG(D_DATA)
663 		printf("SEND out: oktonagle %d sndnum 0x%x\n",
664 			tpcb->tp_oktonagle, tpcb->tp_sndnum);
665 	ENDTRACE
666 	return 0;
667 }
668 
669 
670 /*
671  * NAME: tp_stash()
672  * CALLED FROM:
673  *	tp.trans on arrival of a DT tpdu
674  * FUNCTION, ARGUMENTS, and RETURN VALUE:
675  * 	Returns 1 if
676  *		a) something new arrived and it's got eotsdu_reached bit on,
677  * 		b) this arrival was caused other out-of-sequence things to be
678  *    	accepted, or
679  * 		c) this arrival is the highest seq # for which we last gave credit
680  *   	(sender just sent a whole window)
681  *  In other words, returns 1 if tp should send an ack immediately, 0 if
682  *  the ack can wait a while.
683  *
684  * Note: this implementation no longer renegs on credit, (except
685  * when debugging option D_RENEG is on, for the purpose of testing
686  * ack subsequencing), so we don't  need to check for incoming tpdus
687  * being in a reneged portion of the window.
688  */
689 
690 tp_stash(tpcb, e)
691 	register struct tp_pcb		*tpcb;
692 	register struct tp_event	*e;
693 {
694 	register int		ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
695 									/* 0--> delay acks until full window */
696 									/* 1--> ack each tpdu */
697 #ifndef lint
698 #define E e->ATTR(DT_TPDU)
699 #else lint
700 #define E e->ev_union.EV_DT_TPDU
701 #endif lint
702 
703 	if ( E.e_eot ) {
704 		register struct mbuf *n = E.e_data;
705 		n->m_flags |= M_EOR;
706 		n->m_act = 0;
707 	}
708 		IFDEBUG(D_STASH)
709 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
710 				"stash: so_rcv before appending");
711 			dump_mbuf(E.e_data,
712 				"stash: e_data before appending");
713 		ENDDEBUG
714 
715 	IFPERF(tpcb)
716 		PStat(tpcb, Nb_from_ll) += E.e_datalen;
717 		tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
718 			E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
719 	ENDPERF
720 
721 	if (E.e_seq == tpcb->tp_rcvnxt) {
722 
723 		IFDEBUG(D_STASH)
724 			printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
725 			E.e_seq, E.e_datalen, E.e_eot);
726 		ENDDEBUG
727 
728 		IFTRACE(D_STASH)
729 			tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
730 			E.e_seq, E.e_datalen, E.e_eot, 0);
731 		ENDTRACE
732 
733 		SET_DELACK(tpcb);
734 
735 		sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
736 
737 		SEQ_INC( tpcb, tpcb->tp_rcvnxt );
738 		/*
739 		 * move chains from the reassembly queue to the socket buffer
740 		 */
741 		if (tpcb->tp_rsycnt) {
742 			register struct mbuf **mp;
743 			struct mbuf **mplim;
744 
745 			mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
746 			mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
747 
748 			while (tpcb->tp_rsycnt && *mp) {
749 				sbappend(&tpcb->tp_sock->so_rcv, *mp);
750 				tpcb->tp_rsycnt--;
751 				*mp = 0;
752 				SEQ_INC(tpcb, tpcb->tp_rcvnxt);
753 				ack_reason |= ACK_REORDER;
754 				if (++mp == mplim)
755 					mp = tpcb->tp_rsyq;
756 			}
757 		}
758 		IFDEBUG(D_STASH)
759 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
760 				"stash: so_rcv after appending");
761 		ENDDEBUG
762 
763 	} else {
764 		register struct mbuf **mp;
765 		SeqNum uwe;
766 
767 		IFTRACE(D_STASH)
768 			tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
769 			E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
770 		ENDTRACE
771 
772 		if (tpcb->tp_rsyq == 0)
773 			tp_rsyset(tpcb);
774 		uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
775 		if (tpcb->tp_rsyq == 0 ||
776 						!IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
777 			ack_reason = ACK_DONT;
778 			m_freem(E.e_data);
779 		} else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
780 			IFDEBUG(D_STASH)
781 				printf("tp_stash - drop & ack\n");
782 			ENDDEBUG
783 
784 			/* retransmission - drop it and force an ack */
785 			IncStat(ts_dt_dup);
786 			IFPERF(tpcb)
787 				IncPStat(tpcb, tps_n_ack_cuz_dup);
788 			ENDPERF
789 
790 			m_freem(E.e_data);
791 			ack_reason |= ACK_DUP;
792 		} else {
793 			*mp = E.e_data;
794 			tpcb->tp_rsycnt++;
795 			ack_reason = ACK_DONT;
796 		}
797 	}
798 	/* there were some comments of historical interest here. */
799 	{
800 		LOCAL_CREDIT(tpcb);
801 
802 		if ( E.e_seq ==  tpcb->tp_sent_uwe )
803 			ack_reason |= ACK_STRAT_FULLWIN;
804 
805 		IFTRACE(D_STASH)
806 			tptraceTPCB(TPPTmisc,
807 				"end of stash, eot, ack_reason, sent_uwe ",
808 				E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
809 		ENDTRACE
810 
811 		if ( ack_reason == ACK_DONT ) {
812 			IncStat( ts_ackreason[ACK_DONT] );
813 			return 0;
814 		} else {
815 			IFPERF(tpcb)
816 				if(ack_reason & ACK_STRAT_EACH) {
817 					IncPStat(tpcb, tps_n_ack_cuz_strat);
818 				} else if(ack_reason & ACK_STRAT_FULLWIN) {
819 					IncPStat(tpcb, tps_n_ack_cuz_fullwin);
820 				} else if(ack_reason & ACK_REORDER) {
821 					IncPStat(tpcb, tps_n_ack_cuz_reorder);
822 				}
823 				tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
824 							SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
825 			ENDPERF
826 			{
827 				register int i;
828 
829 				/* keep track of all reasons that apply */
830 				for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
831 					if( ack_reason & (1<<i) )
832 						IncStat( ts_ackreason[i] );
833 				}
834 			}
835 			return 1;
836 		}
837 	}
838 }
839 
840 /*
841  * tp_rsyflush - drop all the packets on the reassembly queue.
842  * Do this when closing the socket, or when somebody has changed
843  * the space avaible in the receive socket (XXX).
844  */
845 tp_rsyflush(tpcb)
846 register struct tp_pcb *tpcb;
847 {
848 	register struct mbuf *m, **mp;
849 	if (tpcb->tp_rsycnt) {
850 		for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
851 									 --mp >= tpcb->tp_rsyq; )
852 			if (*mp) {
853 				tpcb->tp_rsycnt--;
854 				m_freem(*mp);
855 			}
856 		if (tpcb->tp_rsycnt)
857 			panic("tp_rsyflush");
858 	}
859 	free((caddr_t)tpcb->tp_rsyq, M_PCB);
860 	tpcb->tp_rsyq = 0;
861 }
862 
863 tp_rsyset(tpcb)
864 register struct tp_pcb *tpcb;
865 {
866 	register struct socket *so = tpcb->tp_sock;
867 	int maxcredit  = tpcb->tp_xtd_format ? 0xffff : 0xf;
868 	int old_credit = tpcb->tp_maxlcredit;
869 	caddr_t	rsyq;
870 
871 	tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
872 		  (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
873 
874 	if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
875 		return;
876 	maxcredit *= sizeof(struct mbuf *);
877 	if (tpcb->tp_rsyq)
878 		tp_rsyflush(tpcb);
879 	if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
880 		bzero(rsyq, maxcredit);
881 	tpcb->tp_rsyq = (struct mbuf **)rsyq;
882 }
883 
884 tpsbcheck(tpcb, i)
885 struct tp_pcb *tpcb;
886 {
887 	register struct mbuf *n, *m;
888 	register int len = 0, mbcnt = 0, pktlen;
889 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
890 
891 	for (n = sb->sb_mb; n; n = n->m_nextpkt) {
892 		if ((n->m_flags & M_PKTHDR) == 0)
893 			panic("tpsbcheck nohdr");
894 		pktlen = len + n->m_pkthdr.len;
895 	    for (m = n; m; m = m->m_next) {
896 			len += m->m_len;
897 			mbcnt += MSIZE;
898 			if (m->m_flags & M_EXT)
899 				mbcnt += m->m_ext.ext_size;
900 		}
901 		if (len != pktlen) {
902 			printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
903 				i, len, pktlen, n);
904 			panic("tpsbcheck short");
905 		}
906 	}
907 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
908 		printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
909 		    mbcnt, sb->sb_mbcnt);
910 		panic("tpsbcheck");
911 	}
912 }
913