1 /*- 2 * Copyright (c) 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)tp_subr.c 7.16 (Berkeley) 10/02/91 8 */ 9 10 /*********************************************************** 11 Copyright IBM Corporation 1987 12 13 All Rights Reserved 14 15 Permission to use, copy, modify, and distribute this software and its 16 documentation for any purpose and without fee is hereby granted, 17 provided that the above copyright notice appear in all copies and that 18 both that copyright notice and this permission notice appear in 19 supporting documentation, and that the name of IBM not be 20 used in advertising or publicity pertaining to distribution of the 21 software without specific, written prior permission. 22 23 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING 24 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL 25 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR 26 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 27 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 28 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 29 SOFTWARE. 30 31 ******************************************************************/ 32 33 /* 34 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison 35 */ 36 /* 37 * ARGO TP 38 * 39 * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $ 40 * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $ 41 * 42 * The main work of data transfer is done here. 43 * These routines are called from tp.trans. 44 * They include the routines that check the validity of acks and Xacks, 45 * (tp_goodack() and tp_goodXack() ) 46 * take packets from socket buffers and send them (tp_send()), 47 * drop the data from the socket buffers (tp_sbdrop()), 48 * and put incoming packet data into socket buffers (tp_stash()). 49 */ 50 51 #include "param.h" 52 #include "mbuf.h" 53 #include "socket.h" 54 #include "socketvar.h" 55 #include "protosw.h" 56 #include "errno.h" 57 #include "types.h" 58 #include "time.h" 59 #include "kernel.h" 60 61 #include "tp_ip.h" 62 #include "iso.h" 63 #include "argo_debug.h" 64 #include "tp_timer.h" 65 #include "tp_param.h" 66 #include "tp_stat.h" 67 #include "tp_pcb.h" 68 #include "tp_tpdu.h" 69 #include "tp_trace.h" 70 #include "tp_meas.h" 71 #include "tp_seq.h" 72 73 int tp_emit(), tp_sbdrop(); 74 int tprexmtthresh = 3; 75 void tp_send(); 76 77 /* 78 * CALLED FROM: 79 * tp.trans, when an XAK arrives 80 * FUNCTION and ARGUMENTS: 81 * Determines if the sequence number (seq) from the XAK 82 * acks anything new. If so, drop the appropriate tpdu 83 * from the XPD send queue. 84 * RETURN VALUE: 85 * Returns 1 if it did this, 0 if the ack caused no action. 86 */ 87 int 88 tp_goodXack(tpcb, seq) 89 struct tp_pcb *tpcb; 90 SeqNum seq; 91 { 92 93 IFTRACE(D_XPD) 94 tptraceTPCB(TPPTgotXack, 95 seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew, 96 tpcb->tp_snduna); 97 ENDTRACE 98 99 if ( seq == tpcb->tp_Xuna ) { 100 tpcb->tp_Xuna = tpcb->tp_Xsndnxt; 101 102 /* DROP 1 packet from the Xsnd socket buf - just so happens 103 * that only one packet can be there at any time 104 * so drop the whole thing. If you allow > 1 packet 105 * the socket buffer, then you'll have to keep 106 * track of how many characters went w/ each XPD tpdu, so this 107 * will get messier 108 */ 109 IFDEBUG(D_XPD) 110 dump_mbuf(tpcb->tp_Xsnd.sb_mb, 111 "tp_goodXack Xsnd before sbdrop"); 112 ENDDEBUG 113 114 IFTRACE(D_XPD) 115 tptraceTPCB(TPPTmisc, 116 "goodXack: dropping cc ", 117 (int)(tpcb->tp_Xsnd.sb_cc), 118 0,0,0); 119 ENDTRACE 120 sbdroprecord(&tpcb->tp_Xsnd); 121 return 1; 122 } 123 return 0; 124 } 125 126 /* 127 * CALLED FROM: 128 * tp_good_ack() 129 * FUNCTION and ARGUMENTS: 130 * updates 131 * smoothed average round trip time (*rtt) 132 * roundtrip time variance (*rtv) - actually deviation, not variance 133 * given the new value (diff) 134 * RETURN VALUE: 135 * void 136 */ 137 138 void 139 tp_rtt_rtv(tpcb) 140 register struct tp_pcb *tpcb; 141 { 142 int new, old = tpcb->tp_dt_ticks; 143 int delta, elapsed = tick - tpcb->tp_rttemit; 144 145 if (tpcb->tp_rtt != 0) { 146 /* 147 * rtt is the smoothed round trip time in machine clock ticks (hz). 148 * It is stored as a fixed point number, unscaled (unlike the tcp 149 * srtt). The rationale here is that it is only significant to the 150 * nearest unit of slowtimo, which is at least 8 machine clock ticks 151 * so there is no need to scale. The smoothing is done according 152 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8). 153 */ 154 delta = elapsed - tpcb->tp_rtt; 155 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0) 156 tpcb->tp_rtt = 1; 157 /* 158 * rtv is a smoothed accumulated mean difference, unscaled 159 * for reasons expressed above. 160 * It is smoothed with an alpha of .75, and the round trip timer 161 * will be set to rtt + 4*rtv, also as TCP does. 162 */ 163 if (delta < 0) 164 delta = -delta; 165 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0) 166 tpcb->tp_rtv = 1; 167 } else { 168 /* 169 * No rtt measurement yet - use the unsmoothed rtt. 170 * Set the variance to half the rtt (so our first 171 * retransmit happens at 3*rtt) 172 */ 173 tpcb->tp_rtt = elapsed; 174 tpcb->tp_rtv = elapsed >> 1; 175 } 176 tpcb->tp_rttemit = 0; 177 tpcb->tp_rxtshift = 0; 178 /* 179 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar. 180 * Because of the way we do the smoothing, srtt and rttvar 181 * will each average +1/2 tick of bias. When we compute 182 * the retransmit timer, we want 1/2 tick of rounding and 183 * 1 extra tick because of +-1/2 tick uncertainty in the 184 * firing of the timer. The bias will give us exactly the 185 * 1.5 tick we need. But, because the bias is 186 * statistical, we have to test that we don't drop below 187 * the minimum feasible timer (which is 2 ticks)." 188 */ 189 TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb), 190 tpcb->tp_peer_acktime, 128 /* XXX */); 191 IFTRACE(D_RTT) 192 tptraceTPCB(TPPTmisc, "oldticks ,rtv, rtt, newticks", 193 old, rtv, rtt, new); 194 ENDTRACE 195 tpcb->tp_rxtcur = tpcb->tp_dt_ticks; 196 } 197 198 /* 199 * CALLED FROM: 200 * tp.trans when an AK arrives 201 * FUNCTION and ARGUMENTS: 202 * Given (cdt), the credit from the AK tpdu, and 203 * (seq), the sequence number from the AK tpdu, 204 * tp_goodack() determines if the AK acknowledges something in the send 205 * window, and if so, drops the appropriate packets from the retransmission 206 * list, computes the round trip time, and updates the retransmission timer 207 * based on the new smoothed round trip time. 208 * RETURN VALUE: 209 * Returns 1 if 210 * EITHER it actually acked something heretofore unacknowledged 211 * OR no news but the credit should be processed. 212 * If something heretofore unacked was acked with this sequence number, 213 * the appropriate tpdus are dropped from the retransmission control list, 214 * by calling tp_sbdrop(). 215 * No need to see the tpdu itself. 216 */ 217 int 218 tp_goodack(tpcb, cdt, seq, subseq) 219 register struct tp_pcb *tpcb; 220 u_int cdt; 221 register SeqNum seq; 222 u_int subseq; 223 { 224 int old_fcredit; 225 int bang = 0; /* bang --> ack for something heretofore unacked */ 226 u_int bytes_acked; 227 228 IFDEBUG(D_ACKRECV) 229 printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n", 230 tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt); 231 ENDDEBUG 232 IFTRACE(D_ACKRECV) 233 tptraceTPCB(TPPTgotack, 234 seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq); 235 ENDTRACE 236 237 IFPERF(tpcb) 238 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0); 239 ENDPERF 240 241 if (seq == tpcb->tp_snduna) { 242 if (subseq < tpcb->tp_r_subseq || 243 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) { 244 discard_the_ack: 245 IFDEBUG(D_ACKRECV) 246 printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n", 247 tpcb, subseq, tpcb->tp_r_subseq); 248 ENDDEBUG 249 goto done; 250 } 251 if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) { 252 tpcb->tp_r_subseq = subseq; 253 if (tpcb->tp_timer[TM_data_retrans] == 0) 254 tpcb->tp_dupacks = 0; 255 else if (++tpcb->tp_dupacks == tprexmtthresh) { 256 /* partner went out of his way to signal with different 257 subsequences that he has the same lack of an expected 258 packet. This may be an early indiciation of a loss */ 259 260 SeqNum onxt = tpcb->tp_sndnxt; 261 struct mbuf *onxt_m = tpcb->tp_sndnxt_m; 262 u_int win = min(tpcb->tp_fcredit, 263 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2; 264 if (win < 2) 265 win = 2; 266 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize; 267 tpcb->tp_timer[TM_data_retrans] = 0; 268 tpcb->tp_rttemit = 0; 269 tpcb->tp_sndnxt = tpcb->tp_snduna; 270 tpcb->tp_sndnxt_m = 0; 271 tpcb->tp_cong_win = tpcb->tp_l_tpdusize; 272 tp_send(tpcb); 273 tpcb->tp_cong_win = tpcb->tp_ssthresh + 274 tpcb->tp_dupacks * tpcb->tp_l_tpdusize; 275 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) { 276 tpcb->tp_sndnxt = onxt; 277 tpcb->tp_sndnxt_m = onxt_m; 278 } 279 280 } else if (tpcb->tp_dupacks > tprexmtthresh) { 281 tpcb->tp_cong_win += tpcb->tp_l_tpdusize; 282 } 283 goto done; 284 } 285 } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna)) 286 goto discard_the_ack; 287 /* 288 * If the congestion window was inflated to account 289 * for the other side's cached packets, retract it. 290 */ 291 if (tpcb->tp_dupacks > tprexmtthresh && 292 tpcb->tp_cong_win > tpcb->tp_ssthresh) 293 tpcb->tp_cong_win = tpcb->tp_ssthresh; 294 tpcb->tp_r_subseq = subseq; 295 old_fcredit = tpcb->tp_fcredit; 296 tpcb->tp_fcredit = cdt; 297 if (cdt > tpcb->tp_maxfcredit) 298 tpcb->tp_maxfcredit = cdt; 299 tpcb->tp_dupacks = 0; 300 301 if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) { 302 303 tpsbcheck(tpcb, 0); 304 bytes_acked = tp_sbdrop(tpcb, seq); 305 tpsbcheck(tpcb, 1); 306 /* 307 * If transmit timer is running and timed sequence 308 * number was acked, update smoothed round trip time. 309 * Since we now have an rtt measurement, cancel the 310 * timer backoff (cf., Phil Karn's retransmit alg.). 311 * Recompute the initial retransmit timer. 312 */ 313 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq)) 314 tp_rtt_rtv(tpcb); 315 /* 316 * If all outstanding data is acked, stop retransmit timer. 317 * If there is more data to be acked, restart retransmit 318 * timer, using current (possibly backed-off) value. 319 * OSI combines the keepalive and persistance functions. 320 * So, there is no persistance timer per se, to restart. 321 */ 322 tpcb->tp_timer[TM_data_retrans] = 323 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur; 324 /* 325 * When new data is acked, open the congestion window. 326 * If the window gives us less than ssthresh packets 327 * in flight, open exponentially (maxseg per packet). 328 * Otherwise open linearly: maxseg per window 329 * (maxseg^2 / cwnd per packet), plus a constant 330 * fraction of a packet (maxseg/8) to help larger windows 331 * open quickly enough. 332 */ 333 { 334 u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize; 335 336 incr = min(incr, bytes_acked); 337 if (cw > tpcb->tp_ssthresh) 338 incr = incr * incr / cw + incr / 8; 339 tpcb->tp_cong_win = 340 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat); 341 } 342 tpcb->tp_snduna = seq; 343 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) { 344 tpcb->tp_sndnxt = seq; 345 tpcb->tp_sndnxt_m = 0; 346 } 347 bang++; 348 } 349 350 if( cdt != 0 && old_fcredit == 0 ) { 351 tpcb->tp_sendfcc = 1; 352 } 353 if (cdt == 0) { 354 if (old_fcredit != 0) 355 IncStat(ts_zfcdt); 356 /* The following might mean that the window shrunk */ 357 if (tpcb->tp_timer[TM_data_retrans]) { 358 tpcb->tp_timer[TM_data_retrans] = 0; 359 tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks; 360 if (tpcb->tp_sndnxt != tpcb->tp_snduna) { 361 tpcb->tp_sndnxt = tpcb->tp_snduna; 362 tpcb->tp_sndnxt_m = 0; 363 } 364 } 365 } 366 tpcb->tp_fcredit = cdt; 367 bang |= (old_fcredit < cdt); 368 369 done: 370 IFDEBUG(D_ACKRECV) 371 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n", 372 bang, cdt, old_fcredit, tpcb->tp_cong_win); 373 ENDDEBUG 374 /* if (bang) XXXXX Very bad to remove this test, but somethings broken */ 375 tp_send(tpcb); 376 return (bang); 377 } 378 379 /* 380 * CALLED FROM: 381 * tp_goodack() 382 * FUNCTION and ARGUMENTS: 383 * drops everything up TO but not INCLUDING seq # (seq) 384 * from the retransmission queue. 385 */ 386 tp_sbdrop(tpcb, seq) 387 register struct tp_pcb *tpcb; 388 SeqNum seq; 389 { 390 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 391 register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna); 392 int oldcc = sb->sb_cc, oldi = i; 393 394 if (i >= tpcb->tp_seqhalf) 395 printf("tp_spdropping too much -- should panic"); 396 while (i-- > 0) 397 sbdroprecord(sb); 398 IFDEBUG(D_ACKRECV) 399 printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n", 400 oldi, oldcc - sb->sb_cc, tpcb, seq); 401 ENDDEBUG 402 if (sb->sb_flags & SB_NOTIFY) 403 sowwakeup(tpcb->tp_sock); 404 return (oldcc - sb->sb_cc); 405 } 406 407 /* 408 * CALLED FROM: 409 * tp.trans on user send request, arrival of AK and arrival of XAK 410 * FUNCTION and ARGUMENTS: 411 * Emits tpdus starting at sequence number (tpcb->tp_sndnxt). 412 * Emits until a) runs out of data, or b) runs into an XPD mark, or 413 * c) it hits seq number (highseq) limited by cong or credit. 414 * 415 * If you want XPD to buffer > 1 du per socket buffer, you can 416 * modifiy this to issue XPD tpdus also, but then it'll have 417 * to take some argument(s) to distinguish between the type of DU to 418 * hand tp_emit. 419 * 420 * When something is sent for the first time, its time-of-send 421 * is stashed (in system clock ticks rather than pf_slowtimo ticks). 422 * When the ack arrives, the smoothed round-trip time is figured 423 * using this value. 424 */ 425 void 426 tp_send(tpcb) 427 register struct tp_pcb *tpcb; 428 { 429 register int len; 430 register struct mbuf *m; 431 struct mbuf *mb = 0; 432 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 433 unsigned int eotsdu = 0; 434 SeqNum highseq, checkseq; 435 int idle, idleticks, off, cong_win; 436 #ifdef TP_PERF_MEAS 437 int send_start_time = tick; 438 SeqNum oldnxt = tpcb->tp_sndnxt; 439 #endif TP_PERF_MEAS 440 441 idle = (tpcb->tp_snduna == tpcb->tp_sndnew); 442 if (idle) { 443 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact]; 444 if (idleticks > tpcb->tp_dt_ticks) 445 /* 446 * We have been idle for "a while" and no acks are 447 * expected to clock out any data we send -- 448 * slow start to get ack "clock" running again. 449 */ 450 tpcb->tp_cong_win = tpcb->tp_l_tpdusize; 451 } 452 453 cong_win = tpcb->tp_cong_win; 454 highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna); 455 if (tpcb->tp_Xsnd.sb_mb) 456 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew); 457 458 IFDEBUG(D_DATA) 459 printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n", 460 tpcb, tpcb->tp_sndnxt, cong_win, highseq); 461 ENDDEBUG 462 IFTRACE(D_DATA) 463 tptraceTPCB( TPPTmisc, "tp_send sndnew snduna", 464 tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0); 465 tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin", 466 tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win); 467 ENDTRACE 468 IFTRACE(D_DATA) 469 tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin", 470 tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win); 471 ENDTRACE 472 473 if (tpcb->tp_sndnxt_m) 474 m = tpcb->tp_sndnxt_m; 475 else { 476 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna); 477 for (m = sb->sb_mb; m && off > 0; m = m->m_next) 478 off--; 479 } 480 send: 481 /* 482 * Avoid silly window syndrome here . . . figure out how! 483 */ 484 checkseq = tpcb->tp_sndnum; 485 if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq)) 486 checkseq = highseq; /* i.e. DON'T retain highest assigned packet */ 487 488 while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) { 489 490 eotsdu = (m->m_flags & M_EOR) != 0; 491 len = m->m_pkthdr.len; 492 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 && 493 len < (tpcb->tp_l_tpdusize / 2)) 494 break; /* Nagle . . . . . */ 495 cong_win -= len; 496 /* make a copy - mb goes into the retransmission list 497 * while m gets emitted. m_copy won't copy a zero-length mbuf. 498 */ 499 mb = m; 500 m = m_copy(mb, 0, M_COPYALL); 501 if (m == MNULL) 502 break; 503 IFTRACE(D_STASH) 504 tptraceTPCB( TPPTmisc, 505 "tp_send mcopy nxt high eotsdu len", 506 tpcb->tp_sndnxt, highseq, eotsdu, len); 507 ENDTRACE 508 509 IFDEBUG(D_DATA) 510 printf("tp_sending tpcb 0x%x nxt 0x%x\n", 511 tpcb, tpcb->tp_sndnxt); 512 ENDDEBUG 513 /* when headers are precomputed, may need to fill 514 in checksum here */ 515 if (tpcb->tp_sock->so_error = 516 tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) { 517 /* error */ 518 break; 519 } 520 m = mb->m_nextpkt; 521 tpcb->tp_sndnxt_m = m; 522 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) { 523 SEQ_INC(tpcb, tpcb->tp_sndnew); 524 /* 525 * Time this transmission if not a retransmission and 526 * not currently timing anything. 527 */ 528 if (tpcb->tp_rttemit == 0) { 529 tpcb->tp_rttemit = tick; 530 tpcb->tp_rttseq = tpcb->tp_sndnxt; 531 } 532 tpcb->tp_sndnxt = tpcb->tp_sndnew; 533 } else 534 SEQ_INC(tpcb, tpcb->tp_sndnxt); 535 /* 536 * Set retransmit timer if not currently set. 537 * Initial value for retransmit timer is smoothed 538 * round-trip time + 2 * round-trip time variance. 539 * Initialize shift counter which is used for backoff 540 * of retransmit time. 541 */ 542 if (tpcb->tp_timer[TM_data_retrans] == 0) { 543 tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks; 544 tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks; 545 tpcb->tp_rxtshift = 0; 546 } 547 } 548 if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum)) 549 tpcb->tp_oktonagle = 0; 550 #ifdef TP_PERF_MEAS 551 IFPERF(tpcb) 552 { 553 register int npkts; 554 int elapsed = tick - send_start_time, *t; 555 struct timeval now; 556 557 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt); 558 559 if (npkts > 0) 560 tpcb->tp_Nwindow++; 561 562 if (npkts > TP_PM_MAX) 563 npkts = TP_PM_MAX; 564 565 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]); 566 *t += (t - elapsed) >> TP_RTT_ALPHA; 567 568 if (mb == 0) { 569 IncPStat(tpcb, tps_win_lim_by_data[npkts] ); 570 } else { 571 IncPStat(tpcb, tps_win_lim_by_cdt[npkts] ); 572 /* not true with congestion-window being used */ 573 } 574 now.tv_sec = elapsed / hz; 575 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz; 576 tpmeas( tpcb->tp_lref, 577 TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts); 578 } 579 ENDPERF 580 #endif TP_PERF_MEAS 581 582 583 IFTRACE(D_DATA) 584 tptraceTPCB( TPPTmisc, 585 "tp_send at end: new nxt eotsdu error", 586 tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error); 587 588 ENDTRACE 589 } 590 591 int TPNagleok; 592 int TPNagled; 593 594 tp_packetize(tpcb, m, eotsdu) 595 register struct tp_pcb *tpcb; 596 register struct mbuf *m; 597 int eotsdu; 598 { 599 register struct mbuf *n; 600 register struct sockbuf *sb = &tpcb->tp_sock->so_snd; 601 int maxsize = tpcb->tp_l_tpdusize 602 - tp_headersize(DT_TPDU_type, tpcb) 603 - (tpcb->tp_use_checksum?4:0) ; 604 int totlen = m->m_pkthdr.len; 605 struct mbuf *m_split(); 606 /* 607 * Pre-packetize the data in the sockbuf 608 * according to negotiated mtu. Do it here 609 * where we can safely wait for mbufs. 610 * 611 * This presumes knowledge of sockbuf conventions. 612 * TODO: allocate space for header and fill it in (once!). 613 */ 614 IFDEBUG(D_DATA) 615 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n", 616 maxsize, totlen, eotsdu, tpcb->tp_sndnum); 617 ENDTRACE 618 if (tpcb->tp_oktonagle) { 619 if ((n = sb->sb_mb) == 0) 620 panic("tp_packetize"); 621 while (n->m_act) 622 n = n->m_act; 623 if (n->m_flags & M_EOR) 624 panic("tp_packetize 2"); 625 SEQ_INC(tpcb, tpcb->tp_sndnum); 626 if (totlen + n->m_pkthdr.len < maxsize) { 627 /* There is an unsent packet with space, combine data */ 628 struct mbuf *old_n = n; 629 tpsbcheck(tpcb,3); 630 n->m_pkthdr.len += totlen; 631 while (n->m_next) 632 n = n->m_next; 633 sbcompress(sb, m, n); 634 tpsbcheck(tpcb,4); 635 n = old_n; 636 TPNagled++; 637 goto out; 638 } 639 } 640 while (m) { 641 n = m; 642 if (totlen > maxsize) { 643 if ((m = m_split(n, maxsize, M_WAIT)) == 0) 644 panic("tp_packetize"); 645 } else 646 m = 0; 647 totlen -= maxsize; 648 tpsbcheck(tpcb, 5); 649 sbappendrecord(sb, n); 650 tpsbcheck(tpcb, 6); 651 SEQ_INC(tpcb, tpcb->tp_sndnum); 652 } 653 out: 654 if (eotsdu) { 655 n->m_flags |= M_EOR; /* XXX belongs at end */ 656 tpcb->tp_oktonagle = 0; 657 } else { 658 SEQ_DEC(tpcb, tpcb->tp_sndnum); 659 tpcb->tp_oktonagle = 1; 660 TPNagleok++; 661 } 662 IFDEBUG(D_DATA) 663 printf("SEND out: oktonagle %d sndnum 0x%x\n", 664 tpcb->tp_oktonagle, tpcb->tp_sndnum); 665 ENDTRACE 666 return 0; 667 } 668 669 670 /* 671 * NAME: tp_stash() 672 * CALLED FROM: 673 * tp.trans on arrival of a DT tpdu 674 * FUNCTION, ARGUMENTS, and RETURN VALUE: 675 * Returns 1 if 676 * a) something new arrived and it's got eotsdu_reached bit on, 677 * b) this arrival was caused other out-of-sequence things to be 678 * accepted, or 679 * c) this arrival is the highest seq # for which we last gave credit 680 * (sender just sent a whole window) 681 * In other words, returns 1 if tp should send an ack immediately, 0 if 682 * the ack can wait a while. 683 * 684 * Note: this implementation no longer renegs on credit, (except 685 * when debugging option D_RENEG is on, for the purpose of testing 686 * ack subsequencing), so we don't need to check for incoming tpdus 687 * being in a reneged portion of the window. 688 */ 689 690 tp_stash(tpcb, e) 691 register struct tp_pcb *tpcb; 692 register struct tp_event *e; 693 { 694 register int ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH; 695 /* 0--> delay acks until full window */ 696 /* 1--> ack each tpdu */ 697 #ifndef lint 698 #define E e->ATTR(DT_TPDU) 699 #else lint 700 #define E e->ev_union.EV_DT_TPDU 701 #endif lint 702 703 if ( E.e_eot ) { 704 register struct mbuf *n = E.e_data; 705 n->m_flags |= M_EOR; 706 n->m_act = 0; 707 } 708 IFDEBUG(D_STASH) 709 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb, 710 "stash: so_rcv before appending"); 711 dump_mbuf(E.e_data, 712 "stash: e_data before appending"); 713 ENDDEBUG 714 715 IFPERF(tpcb) 716 PStat(tpcb, Nb_from_ll) += E.e_datalen; 717 tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time, 718 E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen); 719 ENDPERF 720 721 if (E.e_seq == tpcb->tp_rcvnxt) { 722 723 IFDEBUG(D_STASH) 724 printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n", 725 E.e_seq, E.e_datalen, E.e_eot); 726 ENDDEBUG 727 728 IFTRACE(D_STASH) 729 tptraceTPCB(TPPTmisc, "stash EQ: seq len eot", 730 E.e_seq, E.e_datalen, E.e_eot, 0); 731 ENDTRACE 732 733 SET_DELACK(tpcb); 734 735 sbappend(&tpcb->tp_sock->so_rcv, E.e_data); 736 737 SEQ_INC( tpcb, tpcb->tp_rcvnxt ); 738 /* 739 * move chains from the reassembly queue to the socket buffer 740 */ 741 if (tpcb->tp_rsycnt) { 742 register struct mbuf **mp; 743 struct mbuf **mplim; 744 745 mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit); 746 mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit; 747 748 while (tpcb->tp_rsycnt && *mp) { 749 sbappend(&tpcb->tp_sock->so_rcv, *mp); 750 tpcb->tp_rsycnt--; 751 *mp = 0; 752 SEQ_INC(tpcb, tpcb->tp_rcvnxt); 753 ack_reason |= ACK_REORDER; 754 if (++mp == mplim) 755 mp = tpcb->tp_rsyq; 756 } 757 } 758 IFDEBUG(D_STASH) 759 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb, 760 "stash: so_rcv after appending"); 761 ENDDEBUG 762 763 } else { 764 register struct mbuf **mp; 765 SeqNum uwe; 766 767 IFTRACE(D_STASH) 768 tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt", 769 E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0); 770 ENDTRACE 771 772 if (tpcb->tp_rsyq == 0) 773 tp_rsyset(tpcb); 774 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit); 775 if (tpcb->tp_rsyq == 0 || 776 !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) { 777 ack_reason = ACK_DONT; 778 m_freem(E.e_data); 779 } else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) { 780 IFDEBUG(D_STASH) 781 printf("tp_stash - drop & ack\n"); 782 ENDDEBUG 783 784 /* retransmission - drop it and force an ack */ 785 IncStat(ts_dt_dup); 786 IFPERF(tpcb) 787 IncPStat(tpcb, tps_n_ack_cuz_dup); 788 ENDPERF 789 790 m_freem(E.e_data); 791 ack_reason |= ACK_DUP; 792 } else { 793 *mp = E.e_data; 794 tpcb->tp_rsycnt++; 795 ack_reason = ACK_DONT; 796 } 797 } 798 /* there were some comments of historical interest here. */ 799 { 800 LOCAL_CREDIT(tpcb); 801 802 if ( E.e_seq == tpcb->tp_sent_uwe ) 803 ack_reason |= ACK_STRAT_FULLWIN; 804 805 IFTRACE(D_STASH) 806 tptraceTPCB(TPPTmisc, 807 "end of stash, eot, ack_reason, sent_uwe ", 808 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0); 809 ENDTRACE 810 811 if ( ack_reason == ACK_DONT ) { 812 IncStat( ts_ackreason[ACK_DONT] ); 813 return 0; 814 } else { 815 IFPERF(tpcb) 816 if(ack_reason & ACK_STRAT_EACH) { 817 IncPStat(tpcb, tps_n_ack_cuz_strat); 818 } else if(ack_reason & ACK_STRAT_FULLWIN) { 819 IncPStat(tpcb, tps_n_ack_cuz_fullwin); 820 } else if(ack_reason & ACK_REORDER) { 821 IncPStat(tpcb, tps_n_ack_cuz_reorder); 822 } 823 tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0, 824 SEQ_ADD(tpcb, E.e_seq, 1), 0, 0); 825 ENDPERF 826 { 827 register int i; 828 829 /* keep track of all reasons that apply */ 830 for( i=1; i<_ACK_NUM_REASONS_ ;i++) { 831 if( ack_reason & (1<<i) ) 832 IncStat( ts_ackreason[i] ); 833 } 834 } 835 return 1; 836 } 837 } 838 } 839 840 /* 841 * tp_rsyflush - drop all the packets on the reassembly queue. 842 * Do this when closing the socket, or when somebody has changed 843 * the space avaible in the receive socket (XXX). 844 */ 845 tp_rsyflush(tpcb) 846 register struct tp_pcb *tpcb; 847 { 848 register struct mbuf *m, **mp; 849 if (tpcb->tp_rsycnt) { 850 for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit; 851 --mp >= tpcb->tp_rsyq; ) 852 if (*mp) { 853 tpcb->tp_rsycnt--; 854 m_freem(*mp); 855 } 856 if (tpcb->tp_rsycnt) 857 panic("tp_rsyflush"); 858 } 859 free((caddr_t)tpcb->tp_rsyq, M_PCB); 860 tpcb->tp_rsyq = 0; 861 } 862 863 tp_rsyset(tpcb) 864 register struct tp_pcb *tpcb; 865 { 866 register struct socket *so = tpcb->tp_sock; 867 int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf; 868 int old_credit = tpcb->tp_maxlcredit; 869 caddr_t rsyq; 870 871 tpcb->tp_maxlcredit = maxcredit = min(maxcredit, 872 (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize); 873 874 if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0) 875 return; 876 maxcredit *= sizeof(struct mbuf *); 877 if (tpcb->tp_rsyq) 878 tp_rsyflush(tpcb); 879 if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT)) 880 bzero(rsyq, maxcredit); 881 tpcb->tp_rsyq = (struct mbuf **)rsyq; 882 } 883 884 tpsbcheck(tpcb, i) 885 struct tp_pcb *tpcb; 886 { 887 register struct mbuf *n, *m; 888 register int len = 0, mbcnt = 0, pktlen; 889 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 890 891 for (n = sb->sb_mb; n; n = n->m_nextpkt) { 892 if ((n->m_flags & M_PKTHDR) == 0) 893 panic("tpsbcheck nohdr"); 894 pktlen = len + n->m_pkthdr.len; 895 for (m = n; m; m = m->m_next) { 896 len += m->m_len; 897 mbcnt += MSIZE; 898 if (m->m_flags & M_EXT) 899 mbcnt += m->m_ext.ext_size; 900 } 901 if (len != pktlen) { 902 printf("test %d; len %d != pktlen %d on mbuf 0x%x\n", 903 i, len, pktlen, n); 904 panic("tpsbcheck short"); 905 } 906 } 907 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 908 printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc, 909 mbcnt, sb->sb_mbcnt); 910 panic("tpsbcheck"); 911 } 912 } 913