xref: /csrg-svn/sys/netiso/tp_subr.c (revision 51204)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)tp_subr.c	7.15 (Berkeley) 09/26/91
8  */
9 
10 /***********************************************************
11 		Copyright IBM Corporation 1987
12 
13                       All Rights Reserved
14 
15 Permission to use, copy, modify, and distribute this software and its
16 documentation for any purpose and without fee is hereby granted,
17 provided that the above copyright notice appear in all copies and that
18 both that copyright notice and this permission notice appear in
19 supporting documentation, and that the name of IBM not be
20 used in advertising or publicity pertaining to distribution of the
21 software without specific, written prior permission.
22 
23 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
24 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
25 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
26 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
27 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
28 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
29 SOFTWARE.
30 
31 ******************************************************************/
32 
33 /*
34  * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
35  */
36 /*
37  * ARGO TP
38  *
39  * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $
40  * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $
41  *
42  * The main work of data transfer is done here.
43  * These routines are called from tp.trans.
44  * They include the routines that check the validity of acks and Xacks,
45  * (tp_goodack() and tp_goodXack() )
46  * take packets from socket buffers and send them (tp_send()),
47  * drop the data from the socket buffers (tp_sbdrop()),
48  * and put incoming packet data into socket buffers (tp_stash()).
49  */
50 
51 #include "param.h"
52 #include "mbuf.h"
53 #include "socket.h"
54 #include "socketvar.h"
55 #include "protosw.h"
56 #include "errno.h"
57 #include "types.h"
58 #include "time.h"
59 #include "kernel.h"
60 
61 #include "tp_ip.h"
62 #include "iso.h"
63 #include "argo_debug.h"
64 #include "tp_timer.h"
65 #include "tp_param.h"
66 #include "tp_stat.h"
67 #include "tp_pcb.h"
68 #include "tp_tpdu.h"
69 #include "tp_trace.h"
70 #include "tp_meas.h"
71 #include "tp_seq.h"
72 
73 int		tp_emit(), tp_sbdrop();
74 int		tprexmtthresh = 3;
75 void	tp_send();
76 
77 /*
78  * CALLED FROM:
79  *	tp.trans, when an XAK arrives
80  * FUNCTION and ARGUMENTS:
81  * 	Determines if the sequence number (seq) from the XAK
82  * 	acks anything new.  If so, drop the appropriate tpdu
83  * 	from the XPD send queue.
84  * RETURN VALUE:
85  * 	Returns 1 if it did this, 0 if the ack caused no action.
86  */
87 int
88 tp_goodXack(tpcb, seq)
89 	struct tp_pcb	*tpcb;
90 	SeqNum 			seq;
91 {
92 
93 	IFTRACE(D_XPD)
94 		tptraceTPCB(TPPTgotXack,
95 			seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
96 			tpcb->tp_snduna);
97 	ENDTRACE
98 
99 	if ( seq == tpcb->tp_Xuna ) {
100 			tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
101 
102 			/* DROP 1 packet from the Xsnd socket buf - just so happens
103 			 * that only one packet can be there at any time
104 			 * so drop the whole thing.  If you allow > 1 packet
105 			 * the socket buffer, then you'll have to keep
106 			 * track of how many characters went w/ each XPD tpdu, so this
107 			 * will get messier
108 			 */
109 			IFDEBUG(D_XPD)
110 				dump_mbuf(tpcb->tp_Xsnd.sb_mb,
111 					"tp_goodXack Xsnd before sbdrop");
112 			ENDDEBUG
113 
114 			IFTRACE(D_XPD)
115 				tptraceTPCB(TPPTmisc,
116 					"goodXack: dropping cc ",
117 					(int)(tpcb->tp_Xsnd.sb_cc),
118 					0,0,0);
119 			ENDTRACE
120 			sbdroprecord(&tpcb->tp_Xsnd);
121 			return 1;
122 	}
123 	return 0;
124 }
125 
126 /*
127  * CALLED FROM:
128  *  tp_good_ack()
129  * FUNCTION and ARGUMENTS:
130  *  updates
131  *  smoothed average round trip time (*rtt)
132  *  roundtrip time variance (*rtv) - actually deviation, not variance
133  *  given the new value (diff)
134  * RETURN VALUE:
135  * void
136  */
137 
138 void
139 tp_rtt_rtv(tpcb)
140 register struct tp_pcb *tpcb;
141 {
142 	int new, old = tpcb->tp_dt_ticks;
143 	int delta, elapsed = tick - tpcb->tp_rttemit;
144 
145 	if (tpcb->tp_rtt != 0) {
146 		/*
147 		 * rtt is the smoothed round trip time in machine clock ticks (hz).
148 		 * it is stored as a fixed point number, unscaled (unlike the tcp
149 		 * srtt.  The rationale here is that it is only significant to the
150 		 * nearest unit of slowtimo, which is at least 8 machine clock ticks
151 		 * so there is no need to scale.  The smoothing is done according
152 		 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
153 		 */
154 		delta = elapsed - tpcb->tp_rtt;
155 		if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
156 			tpcb->tp_rtt = 1;
157 		/*
158 		 * rtv is a smoothed accumulated mean difference, unscaled
159 		 * for reasons expressed above.
160 		 * It is smoothed with an alpha of .75, and the round trip timer
161 		 * will be set to rtt + 4*rtv, also as TCP does.
162 		 */
163 		if (delta < 0)
164 			delta = -delta;
165 		if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
166 			tpcb->tp_rtv = 1;
167 	} else {
168 		/*
169 		 * No rtt measurement yet - use the unsmoothed rtt.
170 		 * Set the variance to half the rtt (so our first
171 		 * retransmit happens at 3*rtt)
172 		 */
173 		tpcb->tp_rtt = elapsed;
174 		tpcb->tp_rtv = elapsed >> 1;
175 	}
176 	tpcb->tp_rttemit = 0;
177 	tpcb->tp_rxtshift = 0;
178 	/*
179 	 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
180 	 * Because of the way we do the smoothing, srtt and rttvar
181 	 * will each average +1/2 tick of bias.  When we compute
182 	 * the retransmit timer, we want 1/2 tick of rounding and
183 	 * 1 extra tick because of +-1/2 tick uncertainty in the
184 	 * firing of the timer.  The bias will give us exactly the
185 	 * 1.5 tick we need.  But, because the bias is
186 	 * statistical, we have to test that we don't drop below
187 	 * the minimum feasible timer (which is 2 ticks)."
188 	 */
189 	new = (((tpcb->tp_rtt + (tpcb->tp_rtv << 2)) * PR_SLOWHZ) + hz) / hz;
190 	new = MAX(new + 1, tpcb->tp_peer_acktime);
191 	new = MAX(new, 2);
192 	IFTRACE(D_RTT)
193 		tptraceTPCB(TPPTmisc, "oldticks ,rtv, rtt, newticks",
194 			old, rtv, rtt, new);
195 	ENDTRACE
196 	tpcb->tp_rxtcur = tpcb->tp_dt_ticks = new;
197 }
198 
199 /*
200  * CALLED FROM:
201  *  tp.trans when an AK arrives
202  * FUNCTION and ARGUMENTS:
203  * 	Given (cdt), the credit from the AK tpdu, and
204  *	(seq), the sequence number from the AK tpdu,
205  *  tp_goodack() determines if the AK acknowledges something in the send
206  * 	window, and if so, drops the appropriate packets from the retransmission
207  *  list, computes the round trip time, and updates the retransmission timer
208  *  based on the new smoothed round trip time.
209  * RETURN VALUE:
210  * 	Returns 1 if
211  * 	EITHER it actually acked something heretofore unacknowledged
212  * 	OR no news but the credit should be processed.
213  * 	If something heretofore unacked was acked with this sequence number,
214  * 	the appropriate tpdus are dropped from the retransmission control list,
215  * 	by calling tp_sbdrop().
216  * 	No need to see the tpdu itself.
217  */
218 int
219 tp_goodack(tpcb, cdt, seq, subseq)
220 	register struct tp_pcb	*tpcb;
221 	u_int					cdt;
222 	register SeqNum			seq;
223 	u_int					subseq;
224 {
225 	int 	old_fcredit;
226 	int 	bang = 0; 	/* bang --> ack for something heretofore unacked */
227 	u_int	bytes_acked;
228 
229 	IFDEBUG(D_ACKRECV)
230 		printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
231 			tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
232 	ENDDEBUG
233 	IFTRACE(D_ACKRECV)
234 		tptraceTPCB(TPPTgotack,
235 			seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
236 	ENDTRACE
237 
238 	IFPERF(tpcb)
239 		tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
240 	ENDPERF
241 
242 	if (seq == tpcb->tp_snduna) {
243 		if (subseq < tpcb->tp_r_subseq ||
244 			(subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
245 		discard_the_ack:
246 			IFDEBUG(D_ACKRECV)
247 				printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
248 					tpcb, subseq, tpcb->tp_r_subseq);
249 			ENDDEBUG
250 			goto done;
251 		}
252 		if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
253 			tpcb->tp_r_subseq = subseq;
254 			if (tpcb->tp_timer[TM_data_retrans] == 0)
255 				tpcb->tp_dupacks = 0;
256 			else if (++tpcb->tp_dupacks == tprexmtthresh) {
257 				/* partner went out of his way to signal with different
258 				   subsequences that he has the same lack of an expected
259 				   packet.  This may be an early indiciation of a loss */
260 
261 				SeqNum onxt = tpcb->tp_sndnxt;
262 				struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
263 				u_int win = min(tpcb->tp_fcredit,
264 							tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
265 				if (win < 2)
266 					win = 2;
267 				tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
268 				tpcb->tp_timer[TM_data_retrans] = 0;
269 				tpcb->tp_rttemit = 0;
270 				tpcb->tp_sndnxt = tpcb->tp_snduna;
271 				tpcb->tp_sndnxt_m = 0;
272 				tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
273 				tp_send(tpcb);
274 				tpcb->tp_cong_win = tpcb->tp_ssthresh +
275 					tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
276 				if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
277 					tpcb->tp_sndnxt = onxt;
278 					tpcb->tp_sndnxt_m = onxt_m;
279 				}
280 
281 			} else if (tpcb->tp_dupacks > tprexmtthresh) {
282 				tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
283 			}
284 			goto done;
285 		}
286 	} else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
287 		goto discard_the_ack;
288 	/*
289 	 * If the congestion window was inflated to account
290 	 * for the other side's cached packets, retract it.
291 	 */
292 	if (tpcb->tp_dupacks > tprexmtthresh &&
293 		tpcb->tp_cong_win > tpcb->tp_ssthresh)
294 			tpcb->tp_cong_win = tpcb->tp_ssthresh;
295 	tpcb->tp_r_subseq = subseq;
296 	old_fcredit = tpcb->tp_fcredit;
297 	tpcb->tp_fcredit = cdt;
298 	if (cdt > tpcb->tp_maxfcredit)
299 		tpcb->tp_maxfcredit = cdt;
300 	tpcb->tp_dupacks = 0;
301 
302 	if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
303 
304 		tpsbcheck(tpcb, 0);
305 		bytes_acked = tp_sbdrop(tpcb, seq);
306 		tpsbcheck(tpcb, 1);
307 		/*
308 		 * If transmit timer is running and timed sequence
309 		 * number was acked, update smoothed round trip time.
310 		 * Since we now have an rtt measurement, cancel the
311 		 * timer backoff (cf., Phil Karn's retransmit alg.).
312 		 * Recompute the initial retransmit timer.
313 		 */
314 		if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
315 			tp_rtt_rtv(tpcb);
316 		/*
317 		 * If all outstanding data is acked, stop retransmit timer.
318 		 * If there is more data to be acked, restart retransmit
319 		 * timer, using current (possibly backed-off) value.
320 		 * OSI combines the keepalive and persistance functions.
321 		 * So, there is no persistance timer per se, to restart.
322 		 */
323 		tpcb->tp_timer[TM_data_retrans] =
324 			(seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
325 		/*
326 		 * When new data is acked, open the congestion window.
327 		 * If the window gives us less than ssthresh packets
328 		 * in flight, open exponentially (maxseg per packet).
329 		 * Otherwise open linearly: maxseg per window
330 		 * (maxseg^2 / cwnd per packet), plus a constant
331 		 * fraction of a packet (maxseg/8) to help larger windows
332 		 * open quickly enough.
333 		 */
334 		{
335 			u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
336 
337 			incr = min(incr, bytes_acked);
338 			if (cw > tpcb->tp_ssthresh)
339 				incr = incr * incr / cw + incr / 8;
340 			tpcb->tp_cong_win =
341 				min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
342 		}
343 		tpcb->tp_snduna = seq;
344 		if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
345 				tpcb->tp_sndnxt = seq;
346 				tpcb->tp_sndnxt_m = 0;
347 		}
348 		bang++;
349 	}
350 
351 	if( cdt != 0 && old_fcredit == 0 ) {
352 		tpcb->tp_sendfcc = 1;
353 	}
354 	if( cdt == 0 && old_fcredit != 0 ) {
355 		IncStat(ts_zfcdt);
356 	}
357 	tpcb->tp_fcredit = cdt;
358 	bang |= (old_fcredit < cdt);
359 
360 done:
361 	IFDEBUG(D_ACKRECV)
362 		printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
363 			bang, cdt, old_fcredit, tpcb->tp_cong_win);
364 	ENDDEBUG
365 	/* if (bang) XXXXX Very bad to remove this test, but somethings broken */
366 		tp_send(tpcb);
367 	return (bang);
368 }
369 
370 /*
371  * CALLED FROM:
372  *  tp_goodack()
373  * FUNCTION and ARGUMENTS:
374  *  drops everything up TO but not INCLUDING seq # (seq)
375  *  from the retransmission queue.
376  */
377 tp_sbdrop(tpcb, seq)
378 	register struct 	tp_pcb 			*tpcb;
379 	SeqNum					seq;
380 {
381 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
382 	register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
383 	int	oldcc = sb->sb_cc, oldi = i;
384 
385 	if (i >= tpcb->tp_seqhalf)
386 		printf("tp_spdropping too much -- should panic");
387 	while (i-- > 0)
388 		sbdroprecord(sb);
389 	IFDEBUG(D_ACKRECV)
390 		printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
391 			oldi, oldcc - sb->sb_cc, tpcb, seq);
392 	ENDDEBUG
393 	if (sb->sb_flags & SB_NOTIFY)
394 		sowwakeup(tpcb->tp_sock);
395 	return (oldcc - sb->sb_cc);
396 }
397 
398 /*
399  * CALLED FROM:
400  * 	tp.trans on user send request, arrival of AK and arrival of XAK
401  * FUNCTION and ARGUMENTS:
402  * 	Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
403  * 	Emits until a) runs out of data, or  b) runs into an XPD mark, or
404  * 			c) it hits seq number (highseq) limited by cong or credit.
405  *
406  * 	If you want XPD to buffer > 1 du per socket buffer, you can
407  * 	modifiy this to issue XPD tpdus also, but then it'll have
408  * 	to take some argument(s) to distinguish between the type of DU to
409  * 	hand tp_emit.
410  *
411  * 	When something is sent for the first time, its time-of-send
412  * 	is stashed (in system clock ticks rather than pf_slowtimo ticks).
413  *  When the ack arrives, the smoothed round-trip time is figured
414  *  using this value.
415  */
416 void
417 tp_send(tpcb)
418 	register struct tp_pcb	*tpcb;
419 {
420 	register int			len;
421 	register struct mbuf	*m;
422 	struct mbuf				*mb = 0;
423 	struct 	sockbuf			*sb = &tpcb->tp_sock->so_snd;
424 	unsigned int			eotsdu = 0;
425 	SeqNum					highseq, checkseq;
426 	int						idle, idleticks, off, cong_win;
427 #ifdef TP_PERF_MEAS
428 	int			 			send_start_time = tick;
429 	SeqNum					oldnxt = tpcb->tp_sndnxt;
430 #endif TP_PERF_MEAS
431 
432 	idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
433 	if (idle) {
434 		idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
435 		if (idleticks > tpcb->tp_dt_ticks)
436 			/*
437 			 * We have been idle for "a while" and no acks are
438 			 * expected to clock out any data we send --
439 			 * slow start to get ack "clock" running again.
440 			 */
441 			tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
442 	}
443 
444 	cong_win = tpcb->tp_cong_win;
445 	highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
446 	if (tpcb->tp_Xsnd.sb_mb)
447 		highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
448 
449 	IFDEBUG(D_DATA)
450 		printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
451 				tpcb, tpcb->tp_sndnxt, cong_win, highseq);
452 	ENDDEBUG
453 	IFTRACE(D_DATA)
454 		tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
455 			tpcb->tp_sndnew,  tpcb->tp_snduna, 0, 0);
456 		tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
457 			tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
458 	ENDTRACE
459 	IFTRACE(D_DATA)
460 		tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
461 			tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
462 	ENDTRACE
463 
464 	if (tpcb->tp_sndnxt_m)
465 		m = tpcb->tp_sndnxt_m;
466 	else {
467 		off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
468 		for (m = sb->sb_mb; m && off > 0; m = m->m_next)
469 			off--;
470 	}
471 send:
472 	/*
473 	 * Avoid silly window syndrome here . . . figure out how!
474 	 */
475 	checkseq = tpcb->tp_sndnum;
476 	if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
477 		checkseq = highseq;
478 
479 	while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
480 
481 		eotsdu = (m->m_flags & M_EOR) != 0;
482 		len = m->m_pkthdr.len;
483 		if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
484 			len < (tpcb->tp_l_tpdusize / 2))
485 				break;  /* Nagle . . . . . */
486 		cong_win -= len;
487 		/* make a copy - mb goes into the retransmission list
488 		 * while m gets emitted.  m_copy won't copy a zero-length mbuf.
489 		 */
490 		mb = m;
491 		m = m_copy(mb, 0, M_COPYALL);
492 		if (m == MNULL)
493 				break;
494 		IFTRACE(D_STASH)
495 			tptraceTPCB( TPPTmisc,
496 				"tp_send mcopy nxt high eotsdu len",
497 				tpcb->tp_sndnxt, highseq, eotsdu, len);
498 		ENDTRACE
499 
500 		IFDEBUG(D_DATA)
501 			printf("tp_sending tpcb 0x%x nxt 0x%x\n",
502 				tpcb, tpcb->tp_sndnxt);
503 		ENDDEBUG
504 		/* when headers are precomputed, may need to fill
505 			   in checksum here */
506 		if (tpcb->tp_sock->so_error =
507 			tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
508 			/* error */
509 			break;
510 		}
511 		m = mb->m_nextpkt;
512 		tpcb->tp_sndnxt_m = m;
513 		if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
514 			SEQ_INC(tpcb, tpcb->tp_sndnew);
515 			/*
516 			 * Time this transmission if not a retransmission and
517 			 * not currently timing anything.
518 			 */
519 			if (tpcb->tp_rttemit == 0) {
520 				tpcb->tp_rttemit = tick;
521 				tpcb->tp_rttseq = tpcb->tp_sndnxt;
522 			}
523 			tpcb->tp_sndnxt = tpcb->tp_sndnew;
524 		} else
525 			SEQ_INC(tpcb, tpcb->tp_sndnxt);
526 		/*
527 		 * Set retransmit timer if not currently set.
528 		 * Initial value for retransmit timer is smoothed
529 		 * round-trip time + 2 * round-trip time variance.
530 		 * Initialize shift counter which is used for backoff
531 		 * of retransmit time.
532 		 */
533 		if (tpcb->tp_timer[TM_data_retrans] == 0) {
534 			tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
535 			tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
536 			tpcb->tp_rxtshift = 0;
537 		}
538 	}
539 	if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
540 		tpcb->tp_oktonagle = 0;
541 #ifdef TP_PERF_MEAS
542 	IFPERF(tpcb)
543 		{
544 			register int npkts;
545 			int	 elapsed = tick - send_start_time, *t;
546 			struct timeval now;
547 
548 			npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
549 
550 			if (npkts > 0)
551 				tpcb->tp_Nwindow++;
552 
553 			if (npkts > TP_PM_MAX)
554 				npkts = TP_PM_MAX;
555 
556 			t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
557 			*t += (t - elapsed) >> TP_RTT_ALPHA;
558 
559 			if (mb == 0) {
560 				IncPStat(tpcb, tps_win_lim_by_data[npkts] );
561 			} else {
562 				IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
563 				/* not true with congestion-window being used */
564 			}
565 			now.tv_sec = elapsed / hz;
566 			now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
567 			tpmeas( tpcb->tp_lref,
568 					TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
569 		}
570 	ENDPERF
571 #endif TP_PERF_MEAS
572 
573 
574 	IFTRACE(D_DATA)
575 		tptraceTPCB( TPPTmisc,
576 			"tp_send at end: new nxt eotsdu error",
577 			tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
578 
579 	ENDTRACE
580 }
581 
582 int TPNagleok;
583 int TPNagled;
584 
585 tp_packetize(tpcb, m, eotsdu)
586 register struct tp_pcb *tpcb;
587 register struct mbuf *m;
588 int eotsdu;
589 {
590 	register struct mbuf *n;
591 	register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
592 	int	maxsize = tpcb->tp_l_tpdusize
593 			- tp_headersize(DT_TPDU_type, tpcb)
594 			- (tpcb->tp_use_checksum?4:0) ;
595 	int totlen = m->m_pkthdr.len;
596 	struct mbuf *m_split();
597 	/*
598 	 * Pre-packetize the data in the sockbuf
599 	 * according to negotiated mtu.  Do it here
600 	 * where we can safely wait for mbufs.
601 	 *
602 	 * This presumes knowledge of sockbuf conventions.
603 	 * TODO: allocate space for header and fill it in (once!).
604 	 */
605 	IFDEBUG(D_DATA)
606 		printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
607 			maxsize, totlen, eotsdu, tpcb->tp_sndnum);
608 	ENDTRACE
609 	if (tpcb->tp_oktonagle) {
610 		if ((n = sb->sb_mb) == 0)
611 			panic("tp_packetize");
612 		while (n->m_act)
613 			n = n->m_act;
614 		if (n->m_flags & M_EOR)
615 			panic("tp_packetize 2");
616 		SEQ_INC(tpcb, tpcb->tp_sndnum);
617 		if (totlen + n->m_pkthdr.len < maxsize) {
618 			/* There is an unsent packet with space, combine data */
619 			struct mbuf *old_n = n;
620 			tpsbcheck(tpcb,3);
621 			n->m_pkthdr.len += totlen;
622 			while (n->m_next)
623 				n = n->m_next;
624 			sbcompress(sb, m, n);
625 			tpsbcheck(tpcb,4);
626 			n = old_n;
627 			TPNagled++;
628 			goto out;
629 		}
630 	}
631 	while (m) {
632 		n = m;
633 		if (totlen > maxsize) {
634 			if ((m = m_split(n, maxsize, M_WAIT)) == 0)
635 				panic("tp_packetize");
636 		} else
637 			m = 0;
638 		totlen -= maxsize;
639 		tpsbcheck(tpcb, 5);
640 		sbappendrecord(sb, n);
641 		tpsbcheck(tpcb, 6);
642 		SEQ_INC(tpcb, tpcb->tp_sndnum);
643 	}
644 out:
645 	if (eotsdu) {
646 		n->m_flags |= M_EOR;  /* XXX belongs at end */
647 		tpcb->tp_oktonagle = 0;
648 	} else {
649 		SEQ_DEC(tpcb, tpcb->tp_sndnum);
650 		tpcb->tp_oktonagle = 1;
651 		TPNagleok++;
652 	}
653 	IFDEBUG(D_DATA)
654 		printf("SEND out: oktonagle %d sndnum 0x%x\n",
655 			tpcb->tp_oktonagle, tpcb->tp_sndnum);
656 	ENDTRACE
657 	return 0;
658 }
659 
660 
661 /*
662  * NAME: tp_stash()
663  * CALLED FROM:
664  *	tp.trans on arrival of a DT tpdu
665  * FUNCTION, ARGUMENTS, and RETURN VALUE:
666  * 	Returns 1 if
667  *		a) something new arrived and it's got eotsdu_reached bit on,
668  * 		b) this arrival was caused other out-of-sequence things to be
669  *    	accepted, or
670  * 		c) this arrival is the highest seq # for which we last gave credit
671  *   	(sender just sent a whole window)
672  *  In other words, returns 1 if tp should send an ack immediately, 0 if
673  *  the ack can wait a while.
674  *
675  * Note: this implementation no longer renegs on credit, (except
676  * when debugging option D_RENEG is on, for the purpose of testing
677  * ack subsequencing), so we don't  need to check for incoming tpdus
678  * being in a reneged portion of the window.
679  */
680 
681 tp_stash(tpcb, e)
682 	register struct tp_pcb		*tpcb;
683 	register struct tp_event	*e;
684 {
685 	register int		ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
686 									/* 0--> delay acks until full window */
687 									/* 1--> ack each tpdu */
688 #ifndef lint
689 #define E e->ATTR(DT_TPDU)
690 #else lint
691 #define E e->ev_union.EV_DT_TPDU
692 #endif lint
693 
694 	if ( E.e_eot ) {
695 		register struct mbuf *n = E.e_data;
696 		n->m_flags |= M_EOR;
697 		n->m_act = 0;
698 	}
699 		IFDEBUG(D_STASH)
700 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
701 				"stash: so_rcv before appending");
702 			dump_mbuf(E.e_data,
703 				"stash: e_data before appending");
704 		ENDDEBUG
705 
706 	IFPERF(tpcb)
707 		PStat(tpcb, Nb_from_ll) += E.e_datalen;
708 		tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
709 			E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
710 	ENDPERF
711 
712 	if (E.e_seq == tpcb->tp_rcvnxt) {
713 
714 		IFDEBUG(D_STASH)
715 			printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
716 			E.e_seq, E.e_datalen, E.e_eot);
717 		ENDDEBUG
718 
719 		IFTRACE(D_STASH)
720 			tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
721 			E.e_seq, E.e_datalen, E.e_eot, 0);
722 		ENDTRACE
723 
724 		SET_DELACK(tpcb);
725 
726 		sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
727 
728 		SEQ_INC( tpcb, tpcb->tp_rcvnxt );
729 		/*
730 		 * move chains from the reassembly queue to the socket buffer
731 		 */
732 		if (tpcb->tp_rsycnt) {
733 			register struct mbuf **mp;
734 			struct mbuf **mplim;
735 
736 			mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
737 			mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
738 
739 			while (tpcb->tp_rsycnt && *mp) {
740 				sbappend(&tpcb->tp_sock->so_rcv, *mp);
741 				tpcb->tp_rsycnt--;
742 				*mp = 0;
743 				SEQ_INC(tpcb, tpcb->tp_rcvnxt);
744 				ack_reason |= ACK_REORDER;
745 				if (++mp == mplim)
746 					mp = tpcb->tp_rsyq;
747 			}
748 		}
749 		IFDEBUG(D_STASH)
750 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
751 				"stash: so_rcv after appending");
752 		ENDDEBUG
753 
754 	} else {
755 		register struct mbuf **mp;
756 		SeqNum uwe;
757 
758 		IFTRACE(D_STASH)
759 			tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
760 			E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
761 		ENDTRACE
762 
763 		if (tpcb->tp_rsyq == 0)
764 			tp_rsyset(tpcb);
765 		uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
766 		if (tpcb->tp_rsyq == 0 ||
767 						!IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
768 			ack_reason = ACK_DONT;
769 			m_freem(E.e_data);
770 		} else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
771 			IFDEBUG(D_STASH)
772 				printf("tp_stash - drop & ack\n");
773 			ENDDEBUG
774 
775 			/* retransmission - drop it and force an ack */
776 			IncStat(ts_dt_dup);
777 			IFPERF(tpcb)
778 				IncPStat(tpcb, tps_n_ack_cuz_dup);
779 			ENDPERF
780 
781 			m_freem(E.e_data);
782 			ack_reason |= ACK_DUP;
783 		} else {
784 			*mp = E.e_data;
785 			tpcb->tp_rsycnt++;
786 			ack_reason = ACK_DONT;
787 		}
788 	}
789 	/* there were some comments of historical interest here. */
790 	{
791 		LOCAL_CREDIT(tpcb);
792 
793 		if ( E.e_seq ==  tpcb->tp_sent_uwe )
794 			ack_reason |= ACK_STRAT_FULLWIN;
795 
796 		IFTRACE(D_STASH)
797 			tptraceTPCB(TPPTmisc,
798 				"end of stash, eot, ack_reason, sent_uwe ",
799 				E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
800 		ENDTRACE
801 
802 		if ( ack_reason == ACK_DONT ) {
803 			IncStat( ts_ackreason[ACK_DONT] );
804 			return 0;
805 		} else {
806 			IFPERF(tpcb)
807 				if(ack_reason & ACK_STRAT_EACH) {
808 					IncPStat(tpcb, tps_n_ack_cuz_strat);
809 				} else if(ack_reason & ACK_STRAT_FULLWIN) {
810 					IncPStat(tpcb, tps_n_ack_cuz_fullwin);
811 				} else if(ack_reason & ACK_REORDER) {
812 					IncPStat(tpcb, tps_n_ack_cuz_reorder);
813 				}
814 				tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
815 							SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
816 			ENDPERF
817 			{
818 				register int i;
819 
820 				/* keep track of all reasons that apply */
821 				for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
822 					if( ack_reason & (1<<i) )
823 						IncStat( ts_ackreason[i] );
824 				}
825 			}
826 			return 1;
827 		}
828 	}
829 }
830 
831 /*
832  * tp_rsyflush - drop all the packets on the reassembly queue.
833  * Do this when closing the socket, or when somebody has changed
834  * the space avaible in the receive socket (XXX).
835  */
836 tp_rsyflush(tpcb)
837 register struct tp_pcb *tpcb;
838 {
839 	register struct mbuf *m, **mp;
840 	if (tpcb->tp_rsycnt) {
841 		for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
842 									 --mp >= tpcb->tp_rsyq; )
843 			if (*mp) {
844 				tpcb->tp_rsycnt--;
845 				m_freem(*mp);
846 			}
847 		if (tpcb->tp_rsycnt)
848 			panic("tp_rsyflush");
849 	}
850 	free((caddr_t)tpcb->tp_rsyq, M_PCB);
851 	tpcb->tp_rsyq = 0;
852 }
853 
854 tp_rsyset(tpcb)
855 register struct tp_pcb *tpcb;
856 {
857 	register struct socket *so = tpcb->tp_sock;
858 	int maxcredit  = tpcb->tp_xtd_format ? 0xffff : 0xf;
859 	int old_credit = tpcb->tp_maxlcredit;
860 	caddr_t	rsyq;
861 
862 	tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
863 		  (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
864 
865 	if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
866 		return;
867 	maxcredit *= sizeof(struct mbuf *);
868 	if (tpcb->tp_rsyq)
869 		tp_rsyflush(tpcb);
870 	if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
871 		bzero(rsyq, maxcredit);
872 	tpcb->tp_rsyq = (struct mbuf **)rsyq;
873 }
874 
875 tpsbcheck(tpcb, i)
876 struct tp_pcb *tpcb;
877 {
878 	register struct mbuf *n, *m;
879 	register int len = 0, mbcnt = 0, pktlen;
880 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
881 
882 	for (n = sb->sb_mb; n; n = n->m_nextpkt) {
883 		if ((n->m_flags & M_PKTHDR) == 0)
884 			panic("tpsbcheck nohdr");
885 		pktlen = len + n->m_pkthdr.len;
886 	    for (m = n; m; m = m->m_next) {
887 			len += m->m_len;
888 			mbcnt += MSIZE;
889 			if (m->m_flags & M_EXT)
890 				mbcnt += m->m_ext.ext_size;
891 		}
892 		if (len != pktlen) {
893 			printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
894 				i, len, pktlen, n);
895 			panic("tpsbcheck short");
896 		}
897 	}
898 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
899 		printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
900 		    mbcnt, sb->sb_mbcnt);
901 		panic("tpsbcheck");
902 	}
903 }
904