1*25202Skarels #ifdef RCSIDENT 2*25202Skarels static char rcsident[] = "$Header: tcp_procs.c,v 1.32 85/07/31 09:34:27 walsh Exp $"; 3*25202Skarels #endif 4*25202Skarels 5*25202Skarels #include "../h/param.h" 6*25202Skarels #include "../h/systm.h" 7*25202Skarels #include "../h/mbuf.h" 8*25202Skarels #include "../h/socket.h" 9*25202Skarels #include "../h/socketvar.h" 10*25202Skarels #include "../h/syslog.h" 11*25202Skarels 12*25202Skarels #include "../net/if.h" 13*25202Skarels #include "../net/route.h" 14*25202Skarels 15*25202Skarels #include "../bbnnet/in.h" 16*25202Skarels #include "../bbnnet/in_pcb.h" 17*25202Skarels #include "../bbnnet/in_var.h" 18*25202Skarels #include "../bbnnet/net.h" 19*25202Skarels #include "../bbnnet/fsm.h" 20*25202Skarels #include "../bbnnet/tcp.h" 21*25202Skarels #include "../bbnnet/seq.h" 22*25202Skarels #include "../bbnnet/ip.h" 23*25202Skarels #include "../bbnnet/macros.h" 24*25202Skarels #ifdef HMPTRAPS 25*25202Skarels #include "../bbnnet/hmp_traps.h" 26*25202Skarels #endif 27*25202Skarels 28*25202Skarels /* 29*25202Skarels * TCP finite state machine procedures. 30*25202Skarels * 31*25202Skarels * Called from finite state machine action routines, these do most of the work 32*25202Skarels * of the protocol. They in turn call primitive routines (in tcp_prim) to 33*25202Skarels * perform lower level functions. 34*25202Skarels */ 35*25202Skarels 36*25202Skarels 37*25202Skarels /* 38*25202Skarels * This works cooperatively with t_close for freeing up data on receive/send 39*25202Skarels * buffers. 40*25202Skarels */ 41*25202Skarels tcp_pcbdisconnect(inp) 42*25202Skarels struct inpcb *inp; 43*25202Skarels { 44*25202Skarels register struct tcpcb *tp; 45*25202Skarels 46*25202Skarels if (tp = (struct tcpcb *) inp->inp_ppcb) 47*25202Skarels { 48*25202Skarels inp->inp_ppcb = (caddr_t) NULL; 49*25202Skarels 50*25202Skarels /* 51*25202Skarels * free all data on receive queues 52*25202Skarels */ 53*25202Skarels { 54*25202Skarels register struct th *t, *next; 55*25202Skarels 56*25202Skarels t = tp->t_rcv_next; 57*25202Skarels while (t != (struct th *)tp) 58*25202Skarels { 59*25202Skarels next = t->t_next; 60*25202Skarels m_freem(dtom(t)); 61*25202Skarels t = next; 62*25202Skarels } 63*25202Skarels } 64*25202Skarels { 65*25202Skarels register struct mbuf *m, *next; 66*25202Skarels 67*25202Skarels m = tp->t_rcv_unack; 68*25202Skarels while (m != NULL) 69*25202Skarels { 70*25202Skarels next = m->m_act; 71*25202Skarels m_freem(m); 72*25202Skarels m = next; 73*25202Skarels } 74*25202Skarels } 75*25202Skarels 76*25202Skarels if (tp->oob_data) 77*25202Skarels m_freem(tp->oob_data); 78*25202Skarels 79*25202Skarels if (tp->t_template) 80*25202Skarels m_free(dtom(tp->t_template)); 81*25202Skarels 82*25202Skarels m_free(dtom(tp)); 83*25202Skarels } 84*25202Skarels } 85*25202Skarels 86*25202Skarels /* 87*25202Skarels * Delete TCB and free all resources used by the connection. Called after 88*25202Skarels * the close protocol is complete. 89*25202Skarels */ 90*25202Skarels t_close(tp, state) 91*25202Skarels register struct tcpcb *tp; 92*25202Skarels short state; 93*25202Skarels { 94*25202Skarels register struct inpcb *inp; 95*25202Skarels 96*25202Skarels /* 97*25202Skarels * in_pcbdetach() calls soisdisconnected(), which wakes up the 98*25202Skarels * process if it's sleeping. Need only pass user error code if 99*25202Skarels * appropriate (like ENETRESET) and hope he'll close the file 100*25202Skarels * descriptor. Don't need to clear timers since they're in the 101*25202Skarels * tcpcb to be deleted. 102*25202Skarels */ 103*25202Skarels inp = tp->t_in_pcb; 104*25202Skarels if (!tp->usr_abort) 105*25202Skarels inp->inp_socket->so_error = state; 106*25202Skarels in_pcbdetach(inp, tcp_pcbdisconnect); 107*25202Skarels } 108*25202Skarels 109*25202Skarels short max_ack_skipped = 1; 110*25202Skarels 111*25202Skarels /* 112*25202Skarels * We are in a position where, perhaps, we should send a TCP segment (packet). 113*25202Skarels * The important decisions are: 114*25202Skarels * 1) How big a segment should we send? This is important since most 115*25202Skarels * overhead occurs at the packet level (interrupts, queueing, 116*25202Skarels * header field checks...) and not at the byte level. 117*25202Skarels * 2) Is it worth it to send this packet? Are we sending enough data 118*25202Skarels * or would we be better off waiting for some more to queue up? 119*25202Skarels * 120*25202Skarels * The above requirements are the point of view when called in response to 121*25202Skarels * a user's write request. We are also called on packet arrival in order 122*25202Skarels * to send an ack (with piggy-backed data), and to respond to window openings 123*25202Skarels * by sending any pent up data. 124*25202Skarels * 125*25202Skarels * Send a TCP segment. Send data from left window edge of send buffer up to 126*25202Skarels * window size or end (whichever is less). Set retransmission timers. 127*25202Skarels * 128*25202Skarels * The Ford/Nagle algorithms might be thought of (if outstanding data, only 129*25202Skarels * send if packet would be large), but they are primarily for telnet and 130*25202Skarels * that doesn't go with ideas in comments down by push. Has idea of tcp 131*25202Skarels * changed since RFC? 132*25202Skarels */ 133*25202Skarels send_tcp(tp, ctl) 134*25202Skarels register struct tcpcb *tp; 135*25202Skarels int ctl; 136*25202Skarels { 137*25202Skarels register sequence last, wind; 138*25202Skarels register int snd_flags; 139*25202Skarels register int len; 140*25202Skarels struct sockbuf *sosnd; 141*25202Skarels int forced, error; 142*25202Skarels int sendalot; 143*25202Skarels 144*25202Skarels sosnd = &tp->t_in_pcb->inp_socket->so_snd; 145*25202Skarels sendalot = FALSE; 146*25202Skarels snd_flags = 0; 147*25202Skarels tp->snd_lst = tp->snd_nxt; 148*25202Skarels forced = FALSE; 149*25202Skarels /* 150*25202Skarels * Send SYN if this is first data (ISS) 151*25202Skarels */ 152*25202Skarels if (SEQ_EQ(tp->snd_nxt, tp->iss)) 153*25202Skarels { 154*25202Skarels snd_flags |= T_SYN; 155*25202Skarels tp->snd_lst++; 156*25202Skarels } 157*25202Skarels /* 158*25202Skarels * Get seq # of last datum in send buffer 159*25202Skarels */ 160*25202Skarels last = tp->snd_una; 161*25202Skarels if (!tp->syn_acked) 162*25202Skarels last++; /* don't forget SYN */ 163*25202Skarels last += sosnd->sb_cc; 164*25202Skarels /* 165*25202Skarels * If no data to send in buffer, just do FIN check, otherwise see 166*25202Skarels * how much we should send in segment. 167*25202Skarels */ 168*25202Skarels if (SEQ_GEQ(tp->snd_nxt, last)) 169*25202Skarels { 170*25202Skarels /* 171*25202Skarels * should send FIN? don't unless haven't already sent one 172*25202Skarels */ 173*25202Skarels if (tp->snd_fin && 174*25202Skarels (SEQ_EQ(tp->seq_fin, tp->iss) || 175*25202Skarels SEQ_LEQ(tp->snd_nxt, tp->seq_fin))) 176*25202Skarels { 177*25202Skarels snd_flags |= T_FIN; 178*25202Skarels tp->seq_fin = tp->snd_lst++; 179*25202Skarels } 180*25202Skarels } 181*25202Skarels else if (tp->syn_acked) 182*25202Skarels { 183*25202Skarels /* 184*25202Skarels * We can't send more than we have (SYN + data represented 185*25202Skarels * by last). Nor can we send more than the other end is 186*25202Skarels * prepared to receive (represented by the window in snd_wnd 187*25202Skarels * and wind). 188*25202Skarels * 189*25202Skarels * Only send a segment if there is something in the buffer, 190*25202Skarels * and a non-zero window has been received. 191*25202Skarels */ 192*25202Skarels wind = tp->snd_una + tp->snd_wnd; 193*25202Skarels tp->snd_lst = SEQ_MIN(last, wind); 194*25202Skarels 195*25202Skarels /* 196*25202Skarels * Make sure the segment is not larger than the remote end 197*25202Skarels * can handle. Though they may advertise a 4K window, perhaps 198*25202Skarels * they can only fill it 512 bytes at a time due to some 199*25202Skarels * buffering or device driver constraint. 200*25202Skarels * 201*25202Skarels * If we're both on the local net, the maxseg is probably the 202*25202Skarels * mtu of the local network, and this will avoid some IP 203*25202Skarels * fragmentation. 204*25202Skarels * 205*25202Skarels * ">=" so that set sendalot. 206*25202Skarels */ 207*25202Skarels if ((len = tp->snd_lst - tp->snd_nxt) >= tp->t_maxseg) 208*25202Skarels { 209*25202Skarels tp->snd_lst -= len - tp->t_maxseg; 210*25202Skarels sendalot = TRUE; 211*25202Skarels } 212*25202Skarels 213*25202Skarels /* 214*25202Skarels * If we're not on the same net or on similar nets immediately 215*25202Skarels * connected by a gateway, the negotiated maxseg may cause 216*25202Skarels * fragmentation. Fragmentation per se is not bad, but 217*25202Skarels * tinygrams can cause problems and are inefficient. So, 218*25202Skarels * send something that if it fragments, will produce reasonably 219*25202Skarels * sized fragments. Avoid excessive fragmentation to reduce 220*25202Skarels * probability datagram fails to reassemble. 221*25202Skarels */ 222*25202Skarels if (tp->t_maxfrag) 223*25202Skarels { 224*25202Skarels len = tp->t_maxfrag*3; 225*25202Skarels if ((tp->snd_lst - tp->snd_nxt) > len) 226*25202Skarels { 227*25202Skarels tp->snd_lst = tp->snd_nxt + len; 228*25202Skarels sendalot = TRUE; 229*25202Skarels } 230*25202Skarels } 231*25202Skarels 232*25202Skarels if (SEQ_GT(tp->snd_end, tp->snd_una) && 233*25202Skarels SEQ_LEQ(tp->snd_end, tp->snd_lst)) 234*25202Skarels /* 235*25202Skarels * There is data to send, and it should be PUSHed. 236*25202Skarels * PUSHed segments avoid the SWS algorithm since it 237*25202Skarels * might delay transmission. PUSHed data MUST go 238*25202Skarels * out ASAP. Note: To avoid performance degradation, 239*25202Skarels * bulk data transfers should not have PUSH on. 240*25202Skarels */ 241*25202Skarels snd_flags |= T_PUSH; 242*25202Skarels else if (tp->snd_wnd > 0) 243*25202Skarels { 244*25202Skarels /* 245*25202Skarels * Avoid the silly window syndrome (sending small 246*25202Skarels * packets). Making sure the usable window is at 247*25202Skarels * least some % of the offered window ensures we're 248*25202Skarels * sending a relatively (for this connection) good 249*25202Skarels * sized segment. 250*25202Skarels * 251*25202Skarels * If sbspace(sosnd) == 0, then the user 252*25202Skarels * is blocked for send resources, and we won't be 253*25202Skarels * able to send a larger packet later, so send it now. 254*25202Skarels * (Hmm, still true? How about the wakeup after we 255*25202Skarels * trim the acked data?) 256*25202Skarels * 257*25202Skarels * SWS and persistence interaction 258*25202Skarels * If there is outstanding data, snd_nxt - snd_una 259*25202Skarels * will be > 0, we'll have retransmit timers running 260*25202Skarels * forcing eventual window updates. If there is 261*25202Skarels * no outstanding data, then we'll send some and 262*25202Skarels * start up the retransmit timers. So, any time 263*25202Skarels * we run through this segment of code instead of 264*25202Skarels * the next one, we've got some good timers running. 265*25202Skarels */ 266*25202Skarels if (!tp->rexmt && !tp->ack_due && !tp->snd_fin && 267*25202Skarels !sendalot && 268*25202Skarels sbspace(sosnd) > 0 && 269*25202Skarels ((100*(tp->snd_nxt-tp->snd_una))/tp->snd_wnd) 270*25202Skarels > tp->sws_qff) 271*25202Skarels tp->snd_lst = tp->snd_nxt; 272*25202Skarels } 273*25202Skarels else 274*25202Skarels { 275*25202Skarels /* 276*25202Skarels * We have stuff to send, but can't since the other 277*25202Skarels * end can't handle it right now. We start up the 278*25202Skarels * persistence timer in case their window opening 279*25202Skarels * ack is lost. When the timer goes off, we send 280*25202Skarels * a byte to force a window update. Wait for timer 281*25202Skarels * in order to give him a chance to deal with the 282*25202Skarels * remotely buffered data and send us an update. 283*25202Skarels * (We'll get here on acks that stop rxmit timers 284*25202Skarels * but that contain zero window since remote user 285*25202Skarels * has not picked up data yet.) 286*25202Skarels * 287*25202Skarels * If we're called due to a write() or packet arrival, 288*25202Skarels * this is how we enter the persistence state. If 289*25202Skarels * we're called because the persist timer went off, 290*25202Skarels * the timer is restarted to keep persisting. 291*25202Skarels */ 292*25202Skarels if (tp->t_timers[TPERSIST] == 0) 293*25202Skarels tp->t_timers[TPERSIST] = MIN(TCP_tvMAXPERSIST, 294*25202Skarels MAX(TCP_tvMINPERSIST, tp->t_srtt*3)); 295*25202Skarels 296*25202Skarels if (tp->force_one) 297*25202Skarels { 298*25202Skarels /* persist timer went off */ 299*25202Skarels tp->snd_lst = tp->snd_nxt + 1; 300*25202Skarels forced = TRUE; 301*25202Skarels } 302*25202Skarels } 303*25202Skarels 304*25202Skarels /* must send FIN and no more data left to send after this */ 305*25202Skarels 306*25202Skarels if (tp->snd_fin && !forced && SEQ_EQ(tp->snd_lst, last) && 307*25202Skarels (SEQ_EQ(tp->seq_fin, tp->iss) || 308*25202Skarels SEQ_LEQ(tp->snd_nxt, tp->seq_fin))) 309*25202Skarels { 310*25202Skarels snd_flags |= T_FIN; 311*25202Skarels tp->seq_fin = tp->snd_lst++; 312*25202Skarels } 313*25202Skarels } 314*25202Skarels 315*25202Skarels /* 316*25202Skarels * Now, we have determined how large a segment to send if our only 317*25202Skarels * purpose is to get data to the other side. If there is something 318*25202Skarels * to send, do it and update timers for rexmt. 319*25202Skarels */ 320*25202Skarels len = tp->snd_lst - tp->snd_nxt; 321*25202Skarels if (len > 0) 322*25202Skarels { /* then SEQ_LT(tp->snd_nxt, tp->snd_lst) */ 323*25202Skarels 324*25202Skarels error = send_pkt (tp, snd_flags, len); 325*25202Skarels 326*25202Skarels /* 327*25202Skarels * SEQ_LEQ(snd_nxt, t_xmt_val): if this is a retransmission 328*25202Skarels * of the round-trip-time measuring byte, then remeasure the 329*25202Skarels * round trip time. (Keep rtt from drifting upward on lossy 330*25202Skarels * networks.) 331*25202Skarels * 332*25202Skarels * SEQ_GT(snd_una, t_xmt_val): Measure the rtt if the last 333*25202Skarels * timed byte has been acked. 334*25202Skarels */ 335*25202Skarels if (tp->syn_acked && (SEQ_LEQ(tp->snd_nxt, tp->t_xmt_val) || 336*25202Skarels SEQ_GT(tp->snd_una, tp->t_xmt_val))) 337*25202Skarels { 338*25202Skarels if (tp->t_srtt != 0) 339*25202Skarels tp->t_timers[TXMT] = 0; 340*25202Skarels tp->t_xmt_val = tp->snd_nxt; 341*25202Skarels } 342*25202Skarels 343*25202Skarels /* 344*25202Skarels * If the window was full, and we're just forcing a byte 345*25202Skarels * out to try to get a new window, then don't use 346*25202Skarels * retransmission timeouts. The other side can take as 347*25202Skarels * long as it wants to process the data it's currently got. 348*25202Skarels */ 349*25202Skarels if (! forced) 350*25202Skarels { 351*25202Skarels /* 352*25202Skarels * Set timers for retransmission. If we already have 353*25202Skarels * some outstanding data, then don't reset timer. Think 354*25202Skarels * of case where send one byte every 1/4 second and only 355*25202Skarels * first byte is lost. Would need to wait until filled 356*25202Skarels * window before retransmission timer started to decrease 357*25202Skarels * and go off. 358*25202Skarels */ 359*25202Skarels if (tp->t_timers[TREXMT] == 0) 360*25202Skarels tp->t_timers[TREXMT] = tp->t_rxmitime; 361*25202Skarels 362*25202Skarels if (tp->t_timers[TREXMTTL] == 0) 363*25202Skarels tp->t_timers[TREXMTTL] = tp->t_rttltimeo; 364*25202Skarels 365*25202Skarels /* 366*25202Skarels * and remember that next segment out begins 367*25202Skarels * further into the stream if this one got out. 368*25202Skarels */ 369*25202Skarels if (! error) 370*25202Skarels tp->snd_nxt = tp->snd_lst; 371*25202Skarels } 372*25202Skarels 373*25202Skarels #if T_DELACK > 0 374*25202Skarels t_cancel(tp, TDELACK); 375*25202Skarels tp->force_ack = FALSE; 376*25202Skarels tp->ack_skipped = 0; 377*25202Skarels #endif 378*25202Skarels tp->ack_due = FALSE; 379*25202Skarels tp->snd_hi = SEQ_MAX(tp->snd_lst, tp->snd_hi); 380*25202Skarels if (!error) 381*25202Skarels return(TRUE); 382*25202Skarels } 383*25202Skarels 384*25202Skarels /* 385*25202Skarels * If ctl, make sure to send something so ACK gets through. Attempt 386*25202Skarels * to reduce ACK traffic by delaying ACKs with no data slightly. 387*25202Skarels * Naive ack traffic can account for about 10% of what the receiving 388*25202Skarels * tcp is doing. 389*25202Skarels * 390*25202Skarels * Bidirectional connection (telnet) => ack piggy backs application's 391*25202Skarels * response. 392*25202Skarels * 393*25202Skarels * Unidirectional connection (ftp) => advertise large enough window 394*25202Skarels * so that either #skipped (tp->ack_skipped) or our estimate of what he 395*25202Skarels * thinks window is cause ack. The estimate assumes most packets get 396*25202Skarels * through. This also assumes that the sender buffers enough to take 397*25202Skarels * advantage of the estimated usable window, so we'll assume a minimum 398*25202Skarels * send buffer provided by his operating system. (Remember, his OS has 399*25202Skarels * to buffer it until we ack it.) 400*25202Skarels * 401*25202Skarels * So, test assumes his send buffer > MINTCPBUF bytes large 402*25202Skarels * and his silly window algorithm cuts in at < 50% of window. 403*25202Skarels * 404*25202Skarels * Use of the fasttimeout facility is a possibility. 405*25202Skarels */ 406*25202Skarels if (ctl == TCP_CTL) 407*25202Skarels { 408*25202Skarels #if T_DELACK > 0 409*25202Skarels if (tp->force_ack || 410*25202Skarels (tp->ack_skipped >= max_ack_skipped) || 411*25202Skarels ((tp->rcv_nxt - tp->lastack) > MIN(MINTCPBUF, tp->rcv_wnd>>1))) 412*25202Skarels { 413*25202Skarels (void) send_pkt(tp, 0, 0); 414*25202Skarels t_cancel(tp, TDELACK); 415*25202Skarels tp->force_ack = FALSE; 416*25202Skarels tp->ack_skipped = 0; 417*25202Skarels tp->ack_due = FALSE; 418*25202Skarels tcpstat.t_ackonly ++; 419*25202Skarels } 420*25202Skarels else 421*25202Skarels { 422*25202Skarels tp->ack_skipped ++; 423*25202Skarels if (tp->t_timers[TDELACK] == 0) 424*25202Skarels tp->t_timers[TDELACK] = T_DELACK; 425*25202Skarels } 426*25202Skarels #else 427*25202Skarels (void) send_pkt(tp, 0, 0); 428*25202Skarels tp->ack_due = FALSE; 429*25202Skarels tcpstat.t_ackonly ++; 430*25202Skarels #endif 431*25202Skarels } 432*25202Skarels return(FALSE); 433*25202Skarels } 434*25202Skarels 435*25202Skarels /* 436*25202Skarels * Process incoming ACKs. Remove data from send queue up to acknowledgement. 437*25202Skarels * Also handles round-trip timer for retransmissions and acknowledgement of 438*25202Skarels * SYN, and clears the urgent flag if required. 439*25202Skarels */ 440*25202Skarels 441*25202Skarels #ifdef BBNPING 442*25202Skarels #define BBNPING_RESET(inp, len) \ 443*25202Skarels if (len > 0){ \ 444*25202Skarels /* \ 445*25202Skarels * We've sent him NEW data, perhaps by a gateway, that he \ 446*25202Skarels * has successfully received. If that's the case, then \ 447*25202Skarels * we know the route works and we don't have to ping that \ 448*25202Skarels * gateway. \ 449*25202Skarels * \ 450*25202Skarels * see check_ping() \ 451*25202Skarels */ \ 452*25202Skarels register struct rtentry *rt; \ 453*25202Skarels \ 454*25202Skarels if (rt = inp->inp_route.ro_rt) \ 455*25202Skarels if (rt->rt_flags & RTF_GATEWAY) \ 456*25202Skarels rt->irt_pings = (-1); \ 457*25202Skarels } 458*25202Skarels #else 459*25202Skarels #define BBNPING_RESET(x,y) /* */ 460*25202Skarels #endif 461*25202Skarels 462*25202Skarels #ifdef MBUF_DEBUG 463*25202Skarels #define LENCHECK \ 464*25202Skarels if ((len > sosnd->sb_cc) || (len < 0)){ \ 465*25202Skarels printf("len %d sb_cc %d flags 0x%x state %d\n", \ 466*25202Skarels len, sosnd->sb_cc, n->t_flags, tp->t_state); \ 467*25202Skarels if (len < 0) \ 468*25202Skarels len = 0; \ 469*25202Skarels else \ 470*25202Skarels len = sosnd->sb_cc; \ 471*25202Skarels } 472*25202Skarels #else 473*25202Skarels #define LENCHECK /* */ 474*25202Skarels #endif 475*25202Skarels 476*25202Skarels #define smooth(tp) (((75*(tp)->t_timers[TXMT]) + (125*(tp)->t_srtt)) / 200) 477*25202Skarels 478*25202Skarels #define RCV_ACK(tp, n) \ 479*25202Skarels { \ 480*25202Skarels register struct inpcb *inp; \ 481*25202Skarels register struct sockbuf *sosnd; \ 482*25202Skarels register len; \ 483*25202Skarels \ 484*25202Skarels inp = tp->t_in_pcb; \ 485*25202Skarels sosnd = &inp->inp_socket->so_snd; \ 486*25202Skarels len = n->t_ackno - tp->snd_una; \ 487*25202Skarels \ 488*25202Skarels tp->snd_una = n->t_ackno; \ 489*25202Skarels if (SEQ_GT(tp->snd_una, tp->snd_nxt)) \ 490*25202Skarels tp->snd_nxt = tp->snd_una; \ 491*25202Skarels \ 492*25202Skarels /* \ 493*25202Skarels * if urgent data has been acked, reset urgent flag \ 494*25202Skarels */ \ 495*25202Skarels \ 496*25202Skarels if (tp->snd_urg && SEQ_GEQ(tp->snd_una, tp->snd_urp)) \ 497*25202Skarels tp->snd_urg = FALSE; \ 498*25202Skarels \ 499*25202Skarels if (tp->syn_acked) { \ 500*25202Skarels /* if timed message has been acknowledged, use the time to set \ 501*25202Skarels the retransmission time value, exponential decay, 60/40 \ 502*25202Skarels weighted average */ \ 503*25202Skarels \ 504*25202Skarels if (SEQ_GEQ(tp->snd_una, tp->t_xmt_val)) { \ 505*25202Skarels if (tp->t_srtt == 0) \ 506*25202Skarels tp->t_srtt = tp->t_timers[TXMT]; \ 507*25202Skarels else \ 508*25202Skarels tp->t_srtt = smooth(tp); \ 509*25202Skarels tp->t_rxmitime = MIN(TCP_tvRXMAX, \ 510*25202Skarels MAX(TCP_tvRXMIN, (3*tp->t_srtt)/2)); \ 511*25202Skarels } \ 512*25202Skarels } else { \ 513*25202Skarels /* handle ack of opening syn (tell user) */ \ 514*25202Skarels \ 515*25202Skarels if (SEQ_GT(tp->snd_una, tp->iss)) { \ 516*25202Skarels tp->syn_acked = TRUE; \ 517*25202Skarels len--; /* ignore SYN */ \ 518*25202Skarels t_cancel(tp, TINIT); /* cancel init timer */ \ 519*25202Skarels } \ 520*25202Skarels } \ 521*25202Skarels \ 522*25202Skarels /* remove acknowledged data from send buff */ \ 523*25202Skarels if (ack_fin(tp, n)) \ 524*25202Skarels len --; \ 525*25202Skarels LENCHECK \ 526*25202Skarels sbdrop (sosnd, len); \ 527*25202Skarels BBNPING_RESET(inp, len) \ 528*25202Skarels sbwakeup (sosnd); /* wakeup iff > x% of buffering avail? */ \ 529*25202Skarels \ 530*25202Skarels /* handle ack of closing fin */ \ 531*25202Skarels \ 532*25202Skarels if (SEQ_NEQ(tp->seq_fin, tp->iss) && SEQ_GT(tp->snd_una, tp->seq_fin)) \ 533*25202Skarels tp->snd_fin = FALSE; \ 534*25202Skarels t_cancel(tp, TREXMT); /* cancel retransmit timer */ \ 535*25202Skarels t_cancel(tp, TREXMTTL); /* cancel retransmit too long timer */ \ 536*25202Skarels tp->cancelled = TRUE; \ 537*25202Skarels } 538*25202Skarels 539*25202Skarels 540*25202Skarels /* 541*25202Skarels * Process incoming segments 542*25202Skarels */ 543*25202Skarels rcv_tcp(tp, n, ctl) 544*25202Skarels register struct tcpcb *tp; 545*25202Skarels register struct th *n; 546*25202Skarels int ctl; 547*25202Skarels { 548*25202Skarels int sentsomedata; 549*25202Skarels 550*25202Skarels tp->dropped_txt = FALSE; 551*25202Skarels tp->ack_due = FALSE; 552*25202Skarels tp->new_window = FALSE; 553*25202Skarels /* 554*25202Skarels * Process SYN 555*25202Skarels */ 556*25202Skarels if (!tp->syn_rcvd && n->t_flags&T_SYN) 557*25202Skarels { 558*25202Skarels tp->snd_wl = tp->rcv_urp = tp->irs = n->t_seq; 559*25202Skarels tp->rcv_urpend = tp->rcv_urp -1; 560*25202Skarels tp->rcv_nxt = n->t_seq + 1; 561*25202Skarels tp->syn_rcvd = TRUE; 562*25202Skarels tp->ack_due = TRUE; 563*25202Skarels } 564*25202Skarels 565*25202Skarels if (tp->syn_rcvd) 566*25202Skarels { 567*25202Skarels /* 568*25202Skarels * Process ACK if data not already acked previously. (Take 569*25202Skarels * ACKed data off send queue, and reset rexmt timers). 570*25202Skarels */ 571*25202Skarels if (n->t_flags&T_ACK && SEQ_GT(n->t_ackno, tp->snd_una)) 572*25202Skarels RCV_ACK(tp, n) 573*25202Skarels 574*25202Skarels /* 575*25202Skarels * Check for new window. rcv_ack did not change syn_rcvd. 576*25202Skarels */ 577*25202Skarels if (SEQ_GEQ(n->t_seq, tp->snd_wl)) 578*25202Skarels { 579*25202Skarels tp->snd_wl = n->t_seq; 580*25202Skarels tp->snd_wnd = n->t_win; 581*25202Skarels tp->new_window = TRUE; 582*25202Skarels t_cancel(tp, TPERSIST); /* cancel persist timer */ 583*25202Skarels } 584*25202Skarels } 585*25202Skarels 586*25202Skarels /* 587*25202Skarels * For data packets only (vs. ctl), process data and URG. 588*25202Skarels */ 589*25202Skarels if (ctl == TCP_DATA) 590*25202Skarels { 591*25202Skarels /* 592*25202Skarels * Remember how much urgent data for present_data 593*25202Skarels */ 594*25202Skarels if (n->t_flags & T_URG) 595*25202Skarels { 596*25202Skarels /* 597*25202Skarels * if last <= urpend, then is a retransmission 598*25202Skarels * bytes [n->t_seq ... last] are urgent 599*25202Skarels */ 600*25202Skarels register sequence last; 601*25202Skarels 602*25202Skarels last = n->t_seq + n->t_urp; 603*25202Skarels if (SEQ_GT(last, tp->rcv_urpend)) 604*25202Skarels { 605*25202Skarels /* 606*25202Skarels * Can only remember one contiguous region. 607*25202Skarels */ 608*25202Skarels if (SEQ_GT(n->t_seq, tp->rcv_urpend+1)) 609*25202Skarels { 610*25202Skarels struct socket *so; 611*25202Skarels 612*25202Skarels tp->rcv_urp = n->t_seq; 613*25202Skarels if (tp->oob_data) 614*25202Skarels { 615*25202Skarels m_freem(tp->oob_data); 616*25202Skarels tp->oob_data = NULL; 617*25202Skarels } 618*25202Skarels 619*25202Skarels so = tp->t_in_pcb->inp_socket; 620*25202Skarels so->so_oobmark = so->so_rcv.sb_cc + 621*25202Skarels (tp->rcv_urp-tp->rcv_nxt); 622*25202Skarels if (so->so_oobmark == 0) 623*25202Skarels so->so_state |= SS_RCVATMARK; 624*25202Skarels } 625*25202Skarels tp->rcv_urpend = last; 626*25202Skarels } 627*25202Skarels } 628*25202Skarels 629*25202Skarels if (n->t_len != 0) 630*25202Skarels rcv_text(tp, n); /* accept and sequence data */ 631*25202Skarels 632*25202Skarels /* 633*25202Skarels * Delay extraction of out-of-band data until 634*25202Skarels * present_data() so don't have to worry about 635*25202Skarels * duplication... 636*25202Skarels */ 637*25202Skarels 638*25202Skarels #ifdef bsd41 639*25202Skarels /* 640*25202Skarels * Process PUSH, mark end of data chain. 641*25202Skarels * 642*25202Skarels * Not done in 4.2. TCP is a byte stream, without record 643*25202Skarels * boundries, so don't have to mark for sbappend(), which 644*25202Skarels * preserves marks, and soreceive(), which terminates reads 645*25202Skarels * at marks. Data IS pushed nevertheless since soreceive 646*25202Skarels * gives the user all that is available and returns. 647*25202Skarels */ 648*25202Skarels if (n->t_flags&T_PUSH && !tp->dropped_txt && 649*25202Skarels tp->t_rcv_prev != (struct th *)tp) 650*25202Skarels { 651*25202Skarels 652*25202Skarels /* Find last mbuf on received data chain and mark */ 653*25202Skarels 654*25202Skarels m = dtom(tp->t_rcv_prev); 655*25202Skarels if (m != NULL) 656*25202Skarels { 657*25202Skarels while (m->m_next != NULL) 658*25202Skarels m = m->m_next; 659*25202Skarels m->m_act = (struct mbuf *) 1; 660*25202Skarels } 661*25202Skarels } 662*25202Skarels #endif 663*25202Skarels } 664*25202Skarels /* 665*25202Skarels * Process FIN, check for duplicates and make sure all data is in. 666*25202Skarels */ 667*25202Skarels if (n->t_flags&T_FIN && !tp->dropped_txt) 668*25202Skarels { 669*25202Skarels if (tp->fin_rcvd) 670*25202Skarels tp->ack_due = TRUE; 671*25202Skarels else 672*25202Skarels { 673*25202Skarels /* 674*25202Skarels * Check if we really have FIN 675*25202Skarels * (rcv buf filled in, no drops) 676*25202Skarels */ 677*25202Skarels register sequence last; 678*25202Skarels 679*25202Skarels last = firstempty(tp); 680*25202Skarels if ((tp->t_rcv_prev == (struct th *)tp && 681*25202Skarels SEQ_EQ(last, t_end(n)+1)) || 682*25202Skarels SEQ_EQ(last, t_end(tp->t_rcv_prev)+1)) 683*25202Skarels { 684*25202Skarels tp->fin_rcvd = TRUE; 685*25202Skarels uwake(tp->t_in_pcb); 686*25202Skarels } 687*25202Skarels /* 688*25202Skarels * If FIN, then set to ACK: incr rcv_nxt, since FIN 689*25202Skarels * occupies sequence space 690*25202Skarels */ 691*25202Skarels if (tp->fin_rcvd && SEQ_GEQ(tp->rcv_nxt, last)) 692*25202Skarels { 693*25202Skarels tp->rcv_nxt = last + 1; 694*25202Skarels tp->ack_due = TRUE; 695*25202Skarels } 696*25202Skarels } 697*25202Skarels } 698*25202Skarels /* 699*25202Skarels * If ACK required or rcv window has changed, try to send something. 700*25202Skarels */ 701*25202Skarels sentsomedata = FALSE; 702*25202Skarels if (tp->ack_due) 703*25202Skarels sentsomedata = send_tcp(tp, TCP_CTL); 704*25202Skarels else if (tp->new_window) 705*25202Skarels sentsomedata = send_tcp(tp, TCP_DATA); 706*25202Skarels /* 707*25202Skarels * tp->cancelled => retransmit, rttl timers are now zero 708*25202Skarels * 709*25202Skarels * If didn't send any data, might not have retransmit, rttl timers 710*25202Skarels * running. If we still have unACKed data and we turned off 711*25202Skarels * the timers above, then ensure timers are running. 712*25202Skarels */ 713*25202Skarels if (!sentsomedata && is_unacked(tp) && tp->cancelled) 714*25202Skarels { 715*25202Skarels tp->t_timers[TREXMT] = tp->t_rxmitime; 716*25202Skarels tp->t_timers[TREXMTTL] = tp->t_rttltimeo; 717*25202Skarels tp->cancelled = FALSE; 718*25202Skarels } 719*25202Skarels } 720*25202Skarels 721*25202Skarels #undef BBNPING_RESET 722*25202Skarels #undef LENCHECK 723*25202Skarels 724*25202Skarels /* 725*25202Skarels * Process incoming data. Put the segments on sequencing queue in order, 726*25202Skarels * taking care of overlaps and duplicates. Data is removed from sequence 727*25202Skarels * queue by present_data when sequence is complete (no holes at top). 728*25202Skarels * Drop data that falls outside buffer quota if tight for space. Otherwise, 729*25202Skarels * process and recycle data held in tcp_input. 730*25202Skarels */ 731*25202Skarels rcv_text(tp, t) 732*25202Skarels register struct tcpcb *tp; 733*25202Skarels register struct th *t; 734*25202Skarels { 735*25202Skarels register i; 736*25202Skarels register struct sockbuf *sorcv; 737*25202Skarels register struct mbuf *m; 738*25202Skarels register struct th *q; 739*25202Skarels struct th *p; 740*25202Skarels struct mbuf *n; 741*25202Skarels struct th *savq; 742*25202Skarels int j, oldkeep; 743*25202Skarels sequence last; 744*25202Skarels 745*25202Skarels /* throw away any data we have already received */ 746*25202Skarels 747*25202Skarels if ((i = tp->rcv_nxt - t->t_seq) > 0) 748*25202Skarels { 749*25202Skarels if (i < t->t_len) 750*25202Skarels { 751*25202Skarels t->t_seq += i; 752*25202Skarels t->t_len -= i; 753*25202Skarels m_adj(dtom(t), i); 754*25202Skarels } 755*25202Skarels else 756*25202Skarels { 757*25202Skarels tp->t_olddata++; 758*25202Skarels tp->ack_due = TRUE; /* send ack just in case */ 759*25202Skarels #ifdef HMPTRAPS 760*25202Skarels /* hmp_trap(T_TCP_DUP, (caddr_t)0,0); */ 761*25202Skarels #endif 762*25202Skarels return; 763*25202Skarels } 764*25202Skarels } 765*25202Skarels 766*25202Skarels last = t_end(t); /* last seq # in incoming seg */ 767*25202Skarels 768*25202Skarels /* # buffers available to con */ 769*25202Skarels 770*25202Skarels sorcv = &tp->t_in_pcb->inp_socket->so_rcv; 771*25202Skarels i = sbspace(sorcv); 772*25202Skarels if (i < 0) 773*25202Skarels i = 0; 774*25202Skarels 775*25202Skarels /* enough resources to process segment? used to walk mbuf chain to 776*25202Skarels * count up data bytes. let's be smart and use t_len */ 777*25202Skarels 778*25202Skarels j = t->t_len; 779*25202Skarels if (j > i) 780*25202Skarels { 781*25202Skarels 782*25202Skarels /* if segment preceeds top of sequencing queue, try to take 783*25202Skarels buffers from bottom of queue */ 784*25202Skarels 785*25202Skarels q = tp->t_rcv_next; 786*25202Skarels if (q != (struct th *)tp && SEQ_LT(tp->rcv_nxt, q->t_seq) && 787*25202Skarels SEQ_LT(t->t_seq, q->t_seq)) 788*25202Skarels 789*25202Skarels for (p = tp->t_rcv_prev; i < j && p != (struct th *)tp;) 790*25202Skarels { 791*25202Skarels savq = p->t_prev; 792*25202Skarels TCP_DEQ(p, tp); 793*25202Skarels #ifdef HMPTRAPS 794*25202Skarels /* hmp_trap(T_TCP_UDROP, (caddr_t)0,0); */ 795*25202Skarels #endif 796*25202Skarels for (m = dtom(p); m != NULL; m = m_free(m)) 797*25202Skarels i += m->m_len; 798*25202Skarels p = savq; 799*25202Skarels } 800*25202Skarels 801*25202Skarels /* if still not enough room, drop text from end of new segment */ 802*25202Skarels 803*25202Skarels if (j > i) 804*25202Skarels { 805*25202Skarels 806*25202Skarels for (m = dtom(t); i > 0 && m != NULL; m = m->m_next) 807*25202Skarels i -= m->m_len; 808*25202Skarels 809*25202Skarels while (m != NULL) 810*25202Skarels { 811*25202Skarels t->t_len -= m->m_len; 812*25202Skarels last -= m->m_len; 813*25202Skarels m->m_len = 0; 814*25202Skarels m = m->m_next; 815*25202Skarels } 816*25202Skarels tp->dropped_txt = TRUE; 817*25202Skarels #ifdef HMPTRAPS 818*25202Skarels /* hmp_trap(T_TCP_RDROP, (caddr_t)0,0); */ 819*25202Skarels #endif 820*25202Skarels if (SEQ_LT(last, t->t_seq)) 821*25202Skarels return; 822*25202Skarels } 823*25202Skarels } 824*25202Skarels 825*25202Skarels /* merge incoming data into the sequence queue */ 826*25202Skarels 827*25202Skarels q = tp->t_rcv_next; /* -> top of sequencing queue */ 828*25202Skarels 829*25202Skarels /* skip frags which new doesn't overlap at end */ 830*25202Skarels 831*25202Skarels while ((q != (struct th *)tp) && SEQ_GT(t->t_seq, t_end(q))) 832*25202Skarels q = q->t_next; 833*25202Skarels 834*25202Skarels if (q == (struct th *)tp) 835*25202Skarels { /* frag at end of chain */ 836*25202Skarels 837*25202Skarels if (SEQ_GEQ(last, tp->rcv_nxt)) 838*25202Skarels { 839*25202Skarels tcp_net_keep = TRUE; 840*25202Skarels TCP_ENQ(t, tp->t_rcv_prev, tp); 841*25202Skarels } 842*25202Skarels 843*25202Skarels } 844*25202Skarels else 845*25202Skarels { 846*25202Skarels 847*25202Skarels #ifdef HMPTRAPS 848*25202Skarels /* we've received an out-of-order packet: trap! */ 849*25202Skarels 850*25202Skarels /* hmp_trap(T_TCP_ORDER, (caddr_t)0,0); */ 851*25202Skarels 852*25202Skarels #endif 853*25202Skarels /* frag doesn't overlap any on chain */ 854*25202Skarels 855*25202Skarels if (SEQ_LT(last, q->t_seq)) 856*25202Skarels { 857*25202Skarels tcp_net_keep = TRUE; 858*25202Skarels TCP_ENQ(t, q->t_prev, tp); 859*25202Skarels 860*25202Skarels /* new overlaps beginning of next frag only */ 861*25202Skarels 862*25202Skarels } 863*25202Skarels else if (SEQ_LT(last, t_end(q))) 864*25202Skarels { 865*25202Skarels if ((i = last - q->t_seq + 1) < t->t_len) 866*25202Skarels { 867*25202Skarels t->t_len -= i; 868*25202Skarels m_adj(dtom(t), -i); 869*25202Skarels tcp_net_keep = TRUE; 870*25202Skarels TCP_ENQ(t, q->t_prev, tp); 871*25202Skarels } 872*25202Skarels 873*25202Skarels /* new overlaps end of previous frag */ 874*25202Skarels 875*25202Skarels } 876*25202Skarels else 877*25202Skarels { 878*25202Skarels savq = q; 879*25202Skarels if (SEQ_LEQ(t->t_seq, q->t_seq)) 880*25202Skarels { /* complete cover */ 881*25202Skarels savq = q->t_prev; 882*25202Skarels TCP_DEQ(q, tp); 883*25202Skarels m_freem(dtom(q)); 884*25202Skarels 885*25202Skarels } 886*25202Skarels else 887*25202Skarels { /* overlap */ 888*25202Skarels if ((i = t_end(q) - t->t_seq + 1) < t->t_len) 889*25202Skarels { 890*25202Skarels t->t_seq += i; 891*25202Skarels t->t_len -= i; 892*25202Skarels m_adj(dtom(t), i); 893*25202Skarels } 894*25202Skarels else 895*25202Skarels t->t_len = 0; 896*25202Skarels } 897*25202Skarels 898*25202Skarels /* new overlaps at beginning of successor frags */ 899*25202Skarels 900*25202Skarels q = savq->t_next; 901*25202Skarels while ((q != (struct th *)tp) && (t->t_len != 0) && 902*25202Skarels SEQ_LEQ(q->t_seq, last)) 903*25202Skarels 904*25202Skarels /* complete cover */ 905*25202Skarels 906*25202Skarels if (SEQ_LEQ(t_end(q), last)) 907*25202Skarels { 908*25202Skarels p = q->t_next; 909*25202Skarels TCP_DEQ(q, tp); 910*25202Skarels m_freem(dtom(q)); 911*25202Skarels q = p; 912*25202Skarels } 913*25202Skarels else 914*25202Skarels { /* overlap */ 915*25202Skarels if ((i = last-q->t_seq+1) < t->t_len) 916*25202Skarels { 917*25202Skarels t->t_len -= i; 918*25202Skarels m_adj(dtom(t), -i); 919*25202Skarels } 920*25202Skarels else 921*25202Skarels t->t_len = 0; 922*25202Skarels break; 923*25202Skarels } 924*25202Skarels 925*25202Skarels /* enqueue whatever is left of new before successors */ 926*25202Skarels 927*25202Skarels if (t->t_len != 0) 928*25202Skarels { 929*25202Skarels tcp_net_keep = TRUE; 930*25202Skarels TCP_ENQ(t, savq, tp); 931*25202Skarels } 932*25202Skarels } 933*25202Skarels } 934*25202Skarels 935*25202Skarels /* set to ack completed data (no gaps) */ 936*25202Skarels 937*25202Skarels FIRSTEMPTY(tp, tp->rcv_nxt); 938*25202Skarels tp->ack_due = TRUE; 939*25202Skarels 940*25202Skarels /* if any room remaining in rcv buf, take any unprocessed 941*25202Skarels messages and schedule for later processing */ 942*25202Skarels 943*25202Skarels if ((m = tp->t_rcv_unack) != NULL && (i = sbspace(sorcv)) > 0) 944*25202Skarels do 945*25202Skarels { 946*25202Skarels 947*25202Skarels /* schedule work request */ 948*25202Skarels 949*25202Skarels t = mtod(m, struct th *); 950*25202Skarels j = (t->t_off << TCP_OFFSHIFT) + sizeof(struct ip); 951*25202Skarels m->m_off += j; 952*25202Skarels m->m_len -= j; 953*25202Skarels tp->t_rcv_unack = m->m_act; 954*25202Skarels m->m_act = (struct mbuf *)0; 955*25202Skarels oldkeep = tcp_net_keep; 956*25202Skarels tcpstat.t_unack++; 957*25202Skarels w_alloc(INRECV, 0, tp, t); 958*25202Skarels tcp_net_keep = oldkeep; 959*25202Skarels 960*25202Skarels /* remaining buffer space */ 961*25202Skarels 962*25202Skarels for (n = m; n != NULL; n = n->m_next) 963*25202Skarels i -= n->m_len; 964*25202Skarels } 965*25202Skarels while ((m = tp->t_rcv_unack) != NULL && i > 0); 966*25202Skarels } 967*25202Skarels 968*25202Skarels /* 969*25202Skarels * Send a reset segment 970*25202Skarels */ 971*25202Skarels send_rst(tp, n) 972*25202Skarels register struct tcpcb *tp; 973*25202Skarels register struct th *n; 974*25202Skarels { 975*25202Skarels register struct inpcb *inp; 976*25202Skarels struct in_addr src, dst; 977*25202Skarels u_short port; 978*25202Skarels int temp_rst; 979*25202Skarels 980*25202Skarels /* don't send a reset in response to a reset */ 981*25202Skarels 982*25202Skarels if (n->t_flags&T_RST || (inp = tp->t_in_pcb) == NULL) 983*25202Skarels return; 984*25202Skarels 985*25202Skarels tp->snd_rst = TRUE; 986*25202Skarels temp_rst = FALSE; 987*25202Skarels if (n->t_flags&T_ACK) 988*25202Skarels tp->snd_nxt = n->t_ackno; 989*25202Skarels 990*25202Skarels /* if reset required from "wildcard" listener, take addresses and 991*25202Skarels port from incoming packet */ 992*25202Skarels 993*25202Skarels if (inp->inp_laddr.s_addr == 0 || inp->inp_faddr.s_addr == 0 || 994*25202Skarels inp->inp_fport == 0) 995*25202Skarels { 996*25202Skarels src = inp->inp_laddr; 997*25202Skarels dst = inp->inp_faddr; 998*25202Skarels port = inp->inp_fport; 999*25202Skarels inp->inp_laddr = n->t_d; 1000*25202Skarels inp->inp_faddr = n->t_s; 1001*25202Skarels inp->inp_fport = n->t_src; 1002*25202Skarels tp->t_template = tcp_template(tp); 1003*25202Skarels temp_rst = TRUE; 1004*25202Skarels } 1005*25202Skarels tp->syn_rcvd = FALSE; 1006*25202Skarels if (tp->t_template) 1007*25202Skarels (void) send_pkt(tp, 0, 0); 1008*25202Skarels else 1009*25202Skarels printf("send_rst: no template\n"); 1010*25202Skarels tp->ack_due = FALSE; 1011*25202Skarels tp->snd_rst = FALSE; 1012*25202Skarels #if T_DELACK > 0 1013*25202Skarels tp->force_ack = FALSE; 1014*25202Skarels t_cancel(tp, TDELACK); 1015*25202Skarels tp->ack_skipped = 0; 1016*25202Skarels #endif 1017*25202Skarels 1018*25202Skarels /* restore "wildcard" addresses */ 1019*25202Skarels 1020*25202Skarels if (temp_rst) 1021*25202Skarels { 1022*25202Skarels inp->inp_laddr = src; 1023*25202Skarels inp->inp_faddr = dst; 1024*25202Skarels inp->inp_fport = port; 1025*25202Skarels tp->snd_nxt = tp->iss; 1026*25202Skarels if (inp->inp_route.ro_rt != NULL) 1027*25202Skarels { 1028*25202Skarels rtfree(inp->inp_route.ro_rt); 1029*25202Skarels inp->inp_route.ro_rt = NULL; 1030*25202Skarels } 1031*25202Skarels if (tp->t_template) 1032*25202Skarels { 1033*25202Skarels m_free(dtom(tp->t_template)); 1034*25202Skarels tp->t_template = NULL; 1035*25202Skarels } 1036*25202Skarels } 1037*25202Skarels } 1038*25202Skarels 1039*25202Skarels struct mbuf *extract_oob(tp, mp, sorcv) 1040*25202Skarels struct tcpcb *tp; 1041*25202Skarels struct mbuf *mp; 1042*25202Skarels struct sockbuf *sorcv; 1043*25202Skarels { 1044*25202Skarels struct socket *so; 1045*25202Skarels struct mbuf *top, *here, *m; 1046*25202Skarels int off, len, tmp; 1047*25202Skarels 1048*25202Skarels m = mp; 1049*25202Skarels so = tp->t_in_pcb->inp_socket; 1050*25202Skarels /* 1051*25202Skarels * skip over bytes that preceed out of band data. 1052*25202Skarels */ 1053*25202Skarels if ((off = so->so_oobmark - sorcv->sb_cc) < 0) 1054*25202Skarels { 1055*25202Skarels log(KERN_RECOV, "extract_oob: neg off\n"); 1056*25202Skarels tp->rcv_urpend = tp->rcv_urp = tp->irs; 1057*25202Skarels return (mp); 1058*25202Skarels } 1059*25202Skarels 1060*25202Skarels while (m && (off > 0)) 1061*25202Skarels { 1062*25202Skarels if (m->m_len <= off) 1063*25202Skarels { 1064*25202Skarels off -= m->m_len; 1065*25202Skarels m = m->m_next; 1066*25202Skarels } 1067*25202Skarels else 1068*25202Skarels break; 1069*25202Skarels } 1070*25202Skarels 1071*25202Skarels if (!m) 1072*25202Skarels return (mp); 1073*25202Skarels 1074*25202Skarels /* 1075*25202Skarels * copy out of band data. removing it from input stream. 1076*25202Skarels */ 1077*25202Skarels len = tp->rcv_urpend - tp->rcv_urp + 1; /* # urgent bytes */ 1078*25202Skarels top = here = NULL; 1079*25202Skarels while (m && (len > 0)) 1080*25202Skarels { 1081*25202Skarels char *p; 1082*25202Skarels struct mbuf *newm; 1083*25202Skarels int dropped; 1084*25202Skarels 1085*25202Skarels tmp = MIN(m->m_len - off, len); 1086*25202Skarels /* tmp == # urgent bytes in this mbuf */ 1087*25202Skarels len -= tmp; 1088*25202Skarels tp->rcv_urp += tmp; 1089*25202Skarels 1090*25202Skarels p = mtod(m, caddr_t) + off; /* points at first urgent byte */ 1091*25202Skarels dropped = FALSE; 1092*25202Skarels 1093*25202Skarels while (tmp > 0) 1094*25202Skarels { 1095*25202Skarels unsigned nbytes; 1096*25202Skarels 1097*25202Skarels /* in case this mbuf uses pages */ 1098*25202Skarels nbytes = MIN(tmp, MLEN); 1099*25202Skarels 1100*25202Skarels if (! dropped) 1101*25202Skarels { 1102*25202Skarels if (newm = m_get(M_WAIT, MT_DATA)) 1103*25202Skarels { 1104*25202Skarels bcopy (p, mtod(newm, char *), nbytes); 1105*25202Skarels newm->m_len = nbytes; 1106*25202Skarels 1107*25202Skarels if (!top) 1108*25202Skarels top = here = newm; 1109*25202Skarels else 1110*25202Skarels { 1111*25202Skarels here->m_next = newm; 1112*25202Skarels here = here->m_next; 1113*25202Skarels } 1114*25202Skarels } 1115*25202Skarels else 1116*25202Skarels /* potential unreliability */ 1117*25202Skarels dropped = TRUE; 1118*25202Skarels } 1119*25202Skarels 1120*25202Skarels bcopy(p+nbytes, p, (unsigned)(m->m_len -off -nbytes)); 1121*25202Skarels m->m_len -= nbytes; 1122*25202Skarels tmp -= nbytes; 1123*25202Skarels } 1124*25202Skarels 1125*25202Skarels if (m->m_len <= 0) 1126*25202Skarels { 1127*25202Skarels /* 1128*25202Skarels * So soreceive never sees a zero length mbuf 1129*25202Skarels * with m_act set. (PUSHED URGENT data packet) 1130*25202Skarels */ 1131*25202Skarels if (m == mp) 1132*25202Skarels mp = m = m_free(m); 1133*25202Skarels else 1134*25202Skarels m = m_free(m); 1135*25202Skarels } 1136*25202Skarels else 1137*25202Skarels m = m->m_next; 1138*25202Skarels 1139*25202Skarels off = 0; 1140*25202Skarels } 1141*25202Skarels 1142*25202Skarels if (top) 1143*25202Skarels { 1144*25202Skarels if (tp->oob_data) 1145*25202Skarels m_cat (tp->oob_data, top); 1146*25202Skarels else 1147*25202Skarels tp->oob_data = top; 1148*25202Skarels sohasoutofband(so); 1149*25202Skarels } 1150*25202Skarels 1151*25202Skarels return (mp); 1152*25202Skarels } 1153*25202Skarels 1154*25202Skarels /* 1155*25202Skarels * Accept data for the user to receive. Moves data from sequenced tcp 1156*25202Skarels * segments from the sequencing queue to the user's receive queue (in the 1157*25202Skarels * ucb). Observes locking on receive queue. 1158*25202Skarels */ 1159*25202Skarels present_data(tp) 1160*25202Skarels register struct tcpcb *tp; 1161*25202Skarels { 1162*25202Skarels PRESENT_DATA(tp) 1163*25202Skarels } 1164