123190Smckusick /* 2*44375Skarels * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. 332787Sbostic * All rights reserved. 423190Smckusick * 532787Sbostic * Redistribution and use in source and binary forms are permitted 634854Sbostic * provided that the above copyright notice and this paragraph are 734854Sbostic * duplicated in all such forms and that any documentation, 834854Sbostic * advertising materials, and other materials related to such 934854Sbostic * distribution and use acknowledge that the software was developed 1034854Sbostic * by the University of California, Berkeley. The name of the 1134854Sbostic * University may not be used to endorse or promote products derived 1234854Sbostic * from this software without specific prior written permission. 1334854Sbostic * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1434854Sbostic * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1534854Sbostic * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1632787Sbostic * 17*44375Skarels * @(#)tcp_input.c 7.23 (Berkeley) 06/28/90 1823190Smckusick */ 194601Swnj 2017062Sbloom #include "param.h" 2117062Sbloom #include "systm.h" 2237320Skarels #include "malloc.h" 2317062Sbloom #include "mbuf.h" 2417062Sbloom #include "protosw.h" 2517062Sbloom #include "socket.h" 2617062Sbloom #include "socketvar.h" 2717062Sbloom #include "errno.h" 2810894Ssam 2910894Ssam #include "../net/if.h" 3010894Ssam #include "../net/route.h" 3110894Ssam 3217062Sbloom #include "in.h" 3317062Sbloom #include "in_systm.h" 3417062Sbloom #include "ip.h" 3540687Skarels #include "in_pcb.h" 3617062Sbloom #include "ip_var.h" 3717062Sbloom #include "tcp.h" 3817062Sbloom #include "tcp_fsm.h" 3917062Sbloom #include "tcp_seq.h" 4017062Sbloom #include "tcp_timer.h" 4117062Sbloom #include "tcp_var.h" 4217062Sbloom #include "tcpip.h" 4317062Sbloom #include "tcp_debug.h" 444601Swnj 45*44375Skarels #define VAN 4632098Skarels int tcprexmtthresh = 3; 47*44375Skarels int tcppredack; /* XXX debugging: times hdr predict ok for acks */ 48*44375Skarels int tcppreddat; /* XXX # times header prediction ok for data packets */ 49*44375Skarels int tcppcbcachemiss; 505267Sroot struct tcpiphdr tcp_saveti; 51*44375Skarels struct inpcb *tcp_last_inpcb = &tcb; 524601Swnj 535267Sroot struct tcpcb *tcp_newtcpcb(); 5424816Skarels 555065Swnj /* 5624816Skarels * Insert segment ti into reassembly queue of tcp with 5724816Skarels * control block tp. Return TH_FIN if reassembly now includes 5824816Skarels * a segment with FIN. The macro form does the common case inline 5924816Skarels * (segment is the next to be received on an established connection, 6024816Skarels * and the queue is empty), avoiding linkage into and removal 6124816Skarels * from the queue and repetition of various conversions. 6234278Skarels * Set DELACK for segments received in order, but ack immediately 6334278Skarels * when segments are out of order (so fast retransmit can work). 6424816Skarels */ 6524816Skarels #define TCP_REASS(tp, ti, m, so, flags) { \ 6624816Skarels if ((ti)->ti_seq == (tp)->rcv_nxt && \ 6724816Skarels (tp)->seg_next == (struct tcpiphdr *)(tp) && \ 6824816Skarels (tp)->t_state == TCPS_ESTABLISHED) { \ 6934278Skarels tp->t_flags |= TF_DELACK; \ 7024816Skarels (tp)->rcv_nxt += (ti)->ti_len; \ 7124816Skarels flags = (ti)->ti_flags & TH_FIN; \ 7230525Skarels tcpstat.tcps_rcvpack++;\ 7330525Skarels tcpstat.tcps_rcvbyte += (ti)->ti_len;\ 7424816Skarels sbappend(&(so)->so_rcv, (m)); \ 7524816Skarels sorwakeup(so); \ 7634278Skarels } else { \ 77*44375Skarels (flags) = tcp_reass((tp), (ti), (m)); \ 7834278Skarels tp->t_flags |= TF_ACKNOW; \ 7934278Skarels } \ 8024816Skarels } 8124816Skarels 82*44375Skarels tcp_reass(tp, ti, m) 8324816Skarels register struct tcpcb *tp; 8424816Skarels register struct tcpiphdr *ti; 85*44375Skarels struct mbuf *m; 8624816Skarels { 8724816Skarels register struct tcpiphdr *q; 8824816Skarels struct socket *so = tp->t_inpcb->inp_socket; 8924816Skarels int flags; 9024816Skarels 9124816Skarels /* 9224816Skarels * Call with ti==0 after become established to 9324816Skarels * force pre-ESTABLISHED data up to user socket. 9424816Skarels */ 9524816Skarels if (ti == 0) 9624816Skarels goto present; 9724816Skarels 9824816Skarels /* 9924816Skarels * Find a segment which begins after this one does. 10024816Skarels */ 10124816Skarels for (q = tp->seg_next; q != (struct tcpiphdr *)tp; 10224816Skarels q = (struct tcpiphdr *)q->ti_next) 10324816Skarels if (SEQ_GT(q->ti_seq, ti->ti_seq)) 10424816Skarels break; 10524816Skarels 10624816Skarels /* 10724816Skarels * If there is a preceding segment, it may provide some of 10824816Skarels * our data already. If so, drop the data from the incoming 10924816Skarels * segment. If it provides all of our data, drop us. 11024816Skarels */ 11124816Skarels if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { 11224816Skarels register int i; 11324816Skarels q = (struct tcpiphdr *)q->ti_prev; 11424816Skarels /* conversion to int (in i) handles seq wraparound */ 11524816Skarels i = q->ti_seq + q->ti_len - ti->ti_seq; 11624816Skarels if (i > 0) { 11730525Skarels if (i >= ti->ti_len) { 11830525Skarels tcpstat.tcps_rcvduppack++; 11930525Skarels tcpstat.tcps_rcvdupbyte += ti->ti_len; 120*44375Skarels m_freem(m); 121*44375Skarels return (0); 12230525Skarels } 123*44375Skarels m_adj(m, i); 12424816Skarels ti->ti_len -= i; 12524816Skarels ti->ti_seq += i; 12624816Skarels } 12724816Skarels q = (struct tcpiphdr *)(q->ti_next); 12824816Skarels } 12930525Skarels tcpstat.tcps_rcvoopack++; 13030525Skarels tcpstat.tcps_rcvoobyte += ti->ti_len; 131*44375Skarels REASS_MBUF(ti) = m; /* XXX */ 13224816Skarels 13324816Skarels /* 13424816Skarels * While we overlap succeeding segments trim them or, 13524816Skarels * if they are completely covered, dequeue them. 13624816Skarels */ 13724816Skarels while (q != (struct tcpiphdr *)tp) { 13824816Skarels register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; 13924816Skarels if (i <= 0) 14024816Skarels break; 14124816Skarels if (i < q->ti_len) { 14224816Skarels q->ti_seq += i; 14324816Skarels q->ti_len -= i; 144*44375Skarels m_adj(REASS_MBUF(q), i); 14524816Skarels break; 14624816Skarels } 14724816Skarels q = (struct tcpiphdr *)q->ti_next; 148*44375Skarels m = REASS_MBUF((struct tcpiphdr *)q->ti_prev); 14924816Skarels remque(q->ti_prev); 15024816Skarels m_freem(m); 15124816Skarels } 15224816Skarels 15324816Skarels /* 15424816Skarels * Stick new segment in its place. 15524816Skarels */ 15624816Skarels insque(ti, q->ti_prev); 15724816Skarels 15824816Skarels present: 15924816Skarels /* 16024816Skarels * Present data to user, advancing rcv_nxt through 16124816Skarels * completed sequence space. 16224816Skarels */ 16324816Skarels if (TCPS_HAVERCVDSYN(tp->t_state) == 0) 16424816Skarels return (0); 16524816Skarels ti = tp->seg_next; 16624816Skarels if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) 16724816Skarels return (0); 16824816Skarels if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) 16924816Skarels return (0); 17024816Skarels do { 17124816Skarels tp->rcv_nxt += ti->ti_len; 17224816Skarels flags = ti->ti_flags & TH_FIN; 17324816Skarels remque(ti); 174*44375Skarels m = REASS_MBUF(ti); 17524816Skarels ti = (struct tcpiphdr *)ti->ti_next; 17624816Skarels if (so->so_state & SS_CANTRCVMORE) 17724816Skarels m_freem(m); 17824816Skarels else 17924816Skarels sbappend(&so->so_rcv, m); 18024816Skarels } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); 18124816Skarels sorwakeup(so); 18224816Skarels return (flags); 18324816Skarels } 18424816Skarels 18524816Skarels /* 1865065Swnj * TCP input routine, follows pages 65-76 of the 1875065Swnj * protocol specification dated September, 1981 very closely. 1885065Swnj */ 18937320Skarels tcp_input(m, iphlen) 19037320Skarels register struct mbuf *m; 19137320Skarels int iphlen; 1924601Swnj { 1934924Swnj register struct tcpiphdr *ti; 194*44375Skarels register struct inpcb *inp; 1955440Swnj struct mbuf *om = 0; 1964924Swnj int len, tlen, off; 1975391Swnj register struct tcpcb *tp = 0; 1984924Swnj register int tiflags; 1994803Swnj struct socket *so; 20031721Skarels int todrop, acked, ourfinisacked, needoutput = 0; 2015267Sroot short ostate; 2026028Sroot struct in_addr laddr; 20310769Ssam int dropsocket = 0; 20430525Skarels int iss = 0; 2054924Swnj 20630525Skarels tcpstat.tcps_rcvtotal++; 2074924Swnj /* 2085244Sroot * Get IP and TCP header together in first mbuf. 2095244Sroot * Note: IP leaves IP header in first mbuf. 2104924Swnj */ 2115020Sroot ti = mtod(m, struct tcpiphdr *); 21237320Skarels if (iphlen > sizeof (struct ip)) 21337320Skarels ip_stripoptions(m, (struct mbuf *)0); 214*44375Skarels if (m->m_len < sizeof (struct tcpiphdr)) { 2155307Sroot if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { 21630525Skarels tcpstat.tcps_rcvshort++; 2175307Sroot return; 2185085Swnj } 2195085Swnj ti = mtod(m, struct tcpiphdr *); 2205085Swnj } 2214601Swnj 2224601Swnj /* 2235244Sroot * Checksum extended TCP header and data. 2244601Swnj */ 2254924Swnj tlen = ((struct ip *)ti)->ip_len; 2264924Swnj len = sizeof (struct ip) + tlen; 22737320Skarels ti->ti_next = ti->ti_prev = 0; 22837320Skarels ti->ti_x1 = 0; 22937320Skarels ti->ti_len = (u_short)tlen; 230*44375Skarels HTONS(ti->ti_len); 23137320Skarels if (ti->ti_sum = in_cksum(m, len)) { 23237320Skarels tcpstat.tcps_rcvbadsum++; 23337320Skarels goto drop; 2344601Swnj } 2354601Swnj 2364601Swnj /* 2375244Sroot * Check that TCP offset makes sense, 238*44375Skarels * pull out TCP options and adjust length. XXX 2394601Swnj */ 2404924Swnj off = ti->ti_off << 2; 2415231Swnj if (off < sizeof (struct tcphdr) || off > tlen) { 24230525Skarels tcpstat.tcps_rcvbadoff++; 2435085Swnj goto drop; 2444924Swnj } 2456211Swnj tlen -= off; 2466211Swnj ti->ti_len = tlen; 2475440Swnj if (off > sizeof (struct tcphdr)) { 24824816Skarels if (m->m_len < sizeof(struct ip) + off) { 24924816Skarels if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { 25030525Skarels tcpstat.tcps_rcvshort++; 25124816Skarels return; 25224816Skarels } 25324816Skarels ti = mtod(m, struct tcpiphdr *); 2545440Swnj } 2559642Ssam om = m_get(M_DONTWAIT, MT_DATA); 2565440Swnj if (om == 0) 2575440Swnj goto drop; 2585440Swnj om->m_len = off - sizeof (struct tcphdr); 2595440Swnj { caddr_t op = mtod(m, caddr_t) + sizeof (struct tcpiphdr); 2606161Ssam bcopy(op, mtod(om, caddr_t), (unsigned)om->m_len); 2615440Swnj m->m_len -= om->m_len; 26237320Skarels m->m_pkthdr.len -= om->m_len; 2636161Ssam bcopy(op+om->m_len, op, 2646161Ssam (unsigned)(m->m_len-sizeof (struct tcpiphdr))); 2655440Swnj } 2665440Swnj } 2675065Swnj tiflags = ti->ti_flags; 2684924Swnj 2696093Sroot /* 2705244Sroot * Convert TCP protocol specific fields to host format. 2715085Swnj */ 272*44375Skarels NTOHL(ti->ti_seq); 273*44375Skarels NTOHL(ti->ti_ack); 274*44375Skarels NTOHS(ti->ti_win); 275*44375Skarels NTOHS(ti->ti_urp); 2765085Swnj 2775085Swnj /* 2788271Sroot * Locate pcb for segment. 2794924Swnj */ 28030525Skarels findpcb: 281*44375Skarels inp = tcp_last_inpcb; 282*44375Skarels if (inp->inp_lport != ti->ti_dport || 283*44375Skarels inp->inp_fport != ti->ti_sport || 284*44375Skarels inp->inp_faddr.s_addr != ti->ti_src.s_addr || 285*44375Skarels inp->inp_laddr.s_addr != ti->ti_dst.s_addr) { 286*44375Skarels inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport, 287*44375Skarels ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD); 288*44375Skarels if (inp) 289*44375Skarels tcp_last_inpcb = inp; 290*44375Skarels ++tcppcbcachemiss; 291*44375Skarels } 2925065Swnj 2935065Swnj /* 2945065Swnj * If the state is CLOSED (i.e., TCB does not exist) then 2955244Sroot * all data in the incoming segment is discarded. 29632098Skarels * If the TCB exists but is in CLOSED state, it is embryonic, 29732098Skarels * but should either do a listen or a connect soon. 2985065Swnj */ 2995300Sroot if (inp == 0) 3005085Swnj goto dropwithreset; 3015065Swnj tp = intotcpcb(inp); 3025300Sroot if (tp == 0) 3035085Swnj goto dropwithreset; 30432098Skarels if (tp->t_state == TCPS_CLOSED) 30532098Skarels goto drop; 3065109Swnj so = inp->inp_socket; 307*44375Skarels if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { 308*44375Skarels if (so->so_options & SO_DEBUG) { 309*44375Skarels ostate = tp->t_state; 310*44375Skarels tcp_saveti = *ti; 311*44375Skarels } 312*44375Skarels if (so->so_options & SO_ACCEPTCONN) { 313*44375Skarels so = sonewconn(so, 0); 314*44375Skarels if (so == 0) 315*44375Skarels goto drop; 316*44375Skarels /* 317*44375Skarels * This is ugly, but .... 318*44375Skarels * 319*44375Skarels * Mark socket as temporary until we're 320*44375Skarels * committed to keeping it. The code at 321*44375Skarels * ``drop'' and ``dropwithreset'' check the 322*44375Skarels * flag dropsocket to see if the temporary 323*44375Skarels * socket created here should be discarded. 324*44375Skarels * We mark the socket as discardable until 325*44375Skarels * we're committed to it below in TCPS_LISTEN. 326*44375Skarels */ 327*44375Skarels dropsocket++; 328*44375Skarels inp = (struct inpcb *)so->so_pcb; 329*44375Skarels inp->inp_laddr = ti->ti_dst; 330*44375Skarels inp->inp_lport = ti->ti_dport; 331*44375Skarels #if BSD>=43 332*44375Skarels inp->inp_options = ip_srcroute(); 333*44375Skarels #endif 334*44375Skarels tp = intotcpcb(inp); 335*44375Skarels tp->t_state = TCPS_LISTEN; 336*44375Skarels } 3375267Sroot } 3384601Swnj 3394601Swnj /* 3405162Swnj * Segment received on connection. 3415162Swnj * Reset idle time and keep-alive timer. 3425162Swnj */ 3435162Swnj tp->t_idle = 0; 34433745Skarels tp->t_timer[TCPT_KEEP] = tcp_keepidle; 3455162Swnj 346*44375Skarels #ifndef VAN 3475162Swnj /* 34817272Skarels * Process options if not in LISTEN state, 34917272Skarels * else do it below (after getting remote address). 3505440Swnj */ 35117272Skarels if (om && tp->t_state != TCPS_LISTEN) { 35217272Skarels tcp_dooptions(tp, om, ti); 3535440Swnj om = 0; 3545440Swnj } 355*44375Skarels #endif VAN 356*44375Skarels /* 357*44375Skarels * Header prediction: check for the two common cases 358*44375Skarels * of a uni-directional data xfer. If the packet has 359*44375Skarels * no control flags, is in-sequence, the window didn't 360*44375Skarels * change and we're not retransmitting, it's a 361*44375Skarels * candidate. If the length is zero and the ack moved 362*44375Skarels * forward, we're the sender side of the xfer. Just 363*44375Skarels * free the data acked & wake any higher level process 364*44375Skarels * that was blocked waiting for space. If the length 365*44375Skarels * is non-zero and the ack didn't move, we're the 366*44375Skarels * receiver side. If we're getting packets in-order 367*44375Skarels * (the reassembly queue is empty), add the data to 368*44375Skarels * the socket buffer and note that we need a delayed ack. 369*44375Skarels */ 370*44375Skarels if (tp->t_state == TCPS_ESTABLISHED && 371*44375Skarels (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && 372*44375Skarels ti->ti_seq == tp->rcv_nxt && 373*44375Skarels ti->ti_win && ti->ti_win == tp->snd_wnd && 374*44375Skarels tp->snd_nxt == tp->snd_max) { 375*44375Skarels if (ti->ti_len == 0) { 376*44375Skarels if (SEQ_GT(ti->ti_ack, tp->snd_una) && 377*44375Skarels SEQ_LEQ(ti->ti_ack, tp->snd_max) && 378*44375Skarels tp->snd_cwnd >= tp->snd_wnd) { 379*44375Skarels /* 380*44375Skarels * this is a pure ack for outstanding data. 381*44375Skarels */ 382*44375Skarels ++tcppredack; 383*44375Skarels if (tp->t_rtt && SEQ_GT(ti->ti_ack,tp->t_rtseq)) 384*44375Skarels tcp_xmit_timer(tp); 385*44375Skarels acked = ti->ti_ack - tp->snd_una; 386*44375Skarels tcpstat.tcps_rcvackpack++; 387*44375Skarels tcpstat.tcps_rcvackbyte += acked; 388*44375Skarels sbdrop(&so->so_snd, acked); 389*44375Skarels tp->snd_una = ti->ti_ack; 390*44375Skarels m_freem(m); 3915440Swnj 392*44375Skarels /* 393*44375Skarels * If all outstanding data are acked, stop 394*44375Skarels * retransmit timer, otherwise restart timer 395*44375Skarels * using current (possibly backed-off) value. 396*44375Skarels * If process is waiting for space, 397*44375Skarels * wakeup/selwakeup/signal. If data 398*44375Skarels * are ready to send, let tcp_output 399*44375Skarels * decide between more output or persist. 400*44375Skarels */ 401*44375Skarels if (tp->snd_una == tp->snd_max) 402*44375Skarels tp->t_timer[TCPT_REXMT] = 0; 403*44375Skarels else if (tp->t_timer[TCPT_PERSIST] == 0) 404*44375Skarels tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; 405*44375Skarels 406*44375Skarels if (so->so_snd.sb_flags & SB_NOTIFY) 407*44375Skarels sowwakeup(so); 408*44375Skarels if (so->so_snd.sb_cc) 409*44375Skarels (void) tcp_output(tp); 410*44375Skarels return; 411*44375Skarels } 412*44375Skarels } else if (ti->ti_ack == tp->snd_una && 413*44375Skarels tp->seg_next == (struct tcpiphdr *)tp && 414*44375Skarels ti->ti_len <= sbspace(&so->so_rcv)) { 415*44375Skarels /* 416*44375Skarels * this is a pure, in-sequence data packet 417*44375Skarels * with nothing on the reassembly queue and 418*44375Skarels * we have enough buffer space to take it. 419*44375Skarels */ 420*44375Skarels ++tcppreddat; 421*44375Skarels tp->rcv_nxt += ti->ti_len; 422*44375Skarels tcpstat.tcps_rcvpack++; 423*44375Skarels tcpstat.tcps_rcvbyte += ti->ti_len; 424*44375Skarels /* 425*44375Skarels * Drop TCP and IP headers then add data 426*44375Skarels * to socket buffer 427*44375Skarels */ 428*44375Skarels m->m_data += sizeof(struct tcpiphdr); 429*44375Skarels m->m_len -= sizeof(struct tcpiphdr); 430*44375Skarels sbappend(&so->so_rcv, m); 431*44375Skarels sorwakeup(so); 432*44375Skarels tp->t_flags |= TF_DELACK; 433*44375Skarels return; 434*44375Skarels } 435*44375Skarels } 436*44375Skarels 4375440Swnj /* 438*44375Skarels * Drop TCP and IP headers; TCP options were dropped above. 439*44375Skarels */ 440*44375Skarels m->m_data += sizeof(struct tcpiphdr); 441*44375Skarels m->m_len -= sizeof(struct tcpiphdr); 442*44375Skarels 443*44375Skarels /* 4445085Swnj * Calculate amount of space in receive window, 4455085Swnj * and then do TCP input processing. 44624816Skarels * Receive window is amount of space in rcv queue, 44724816Skarels * but not less than advertised window. 4484601Swnj */ 44925939Skarels { int win; 4504601Swnj 45125939Skarels win = sbspace(&so->so_rcv); 45225939Skarels if (win < 0) 45325939Skarels win = 0; 45437320Skarels tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); 45525939Skarels } 45625939Skarels 4574601Swnj switch (tp->t_state) { 4584601Swnj 4595065Swnj /* 4605065Swnj * If the state is LISTEN then ignore segment if it contains an RST. 4615065Swnj * If the segment contains an ACK then it is bad and send a RST. 4625065Swnj * If it does not contain a SYN then it is not interesting; drop it. 46325197Skarels * Don't bother responding if the destination was a broadcast. 4645085Swnj * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial 4655065Swnj * tp->iss, and send a segment: 4665085Swnj * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> 4675065Swnj * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. 4685065Swnj * Fill in remote peer address fields if not previously specified. 4695065Swnj * Enter SYN_RECEIVED state, and process any other fields of this 4705244Sroot * segment in this state. 4715065Swnj */ 4728271Sroot case TCPS_LISTEN: { 47310145Ssam struct mbuf *am; 4748271Sroot register struct sockaddr_in *sin; 4758271Sroot 4765065Swnj if (tiflags & TH_RST) 4775065Swnj goto drop; 4785300Sroot if (tiflags & TH_ACK) 4795085Swnj goto dropwithreset; 4805300Sroot if ((tiflags & TH_SYN) == 0) 4815065Swnj goto drop; 48237320Skarels if (m->m_flags & M_BCAST) 48325197Skarels goto drop; 484*44375Skarels am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */ 48510145Ssam if (am == NULL) 48610145Ssam goto drop; 48710145Ssam am->m_len = sizeof (struct sockaddr_in); 4888599Sroot sin = mtod(am, struct sockaddr_in *); 4898271Sroot sin->sin_family = AF_INET; 49037320Skarels sin->sin_len = sizeof(*sin); 4918271Sroot sin->sin_addr = ti->ti_src; 4928271Sroot sin->sin_port = ti->ti_sport; 4936028Sroot laddr = inp->inp_laddr; 49410145Ssam if (inp->inp_laddr.s_addr == INADDR_ANY) 4956028Sroot inp->inp_laddr = ti->ti_dst; 4968599Sroot if (in_pcbconnect(inp, am)) { 4976028Sroot inp->inp_laddr = laddr; 4988716Sroot (void) m_free(am); 4995244Sroot goto drop; 5006028Sroot } 5018716Sroot (void) m_free(am); 5025244Sroot tp->t_template = tcp_template(tp); 5035244Sroot if (tp->t_template == 0) { 50426386Skarels tp = tcp_drop(tp, ENOBUFS); 50517264Skarels dropsocket = 0; /* socket is already gone */ 5065244Sroot goto drop; 5075244Sroot } 50817272Skarels if (om) { 50917272Skarels tcp_dooptions(tp, om, ti); 51017272Skarels om = 0; 51117272Skarels } 51230525Skarels if (iss) 51330525Skarels tp->iss = iss; 51430525Skarels else 51530525Skarels tp->iss = tcp_iss; 51630525Skarels tcp_iss += TCP_ISSINCR/2; 5175065Swnj tp->irs = ti->ti_seq; 5185085Swnj tcp_sendseqinit(tp); 5195085Swnj tcp_rcvseqinit(tp); 52025939Skarels tp->t_flags |= TF_ACKNOW; 5215065Swnj tp->t_state = TCPS_SYN_RECEIVED; 52233745Skarels tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; 52310769Ssam dropsocket = 0; /* committed to socket */ 52430525Skarels tcpstat.tcps_accepts++; 5255085Swnj goto trimthenstep6; 5268271Sroot } 5274601Swnj 5285065Swnj /* 5295065Swnj * If the state is SYN_SENT: 5305065Swnj * if seg contains an ACK, but not for our SYN, drop the input. 5315065Swnj * if seg contains a RST, then drop the connection. 5325065Swnj * if seg does not contain SYN, then drop it. 5335065Swnj * Otherwise this is an acceptable SYN segment 5345065Swnj * initialize tp->rcv_nxt and tp->irs 5355065Swnj * if seg contains ack then advance tp->snd_una 5365065Swnj * if SYN has been acked change to ESTABLISHED else SYN_RCVD state 5375065Swnj * arrange for segment to be acked (eventually) 5385065Swnj * continue processing rest of data/controls, beginning with URG 5395065Swnj */ 5405065Swnj case TCPS_SYN_SENT: 541*44375Skarels #ifdef VAN 542*44375Skarels if (om) { 543*44375Skarels tcp_dooptions(tp, om, ti); 544*44375Skarels om = 0; 545*44375Skarels } 546*44375Skarels #endif VAN 5475065Swnj if ((tiflags & TH_ACK) && 54824816Skarels (SEQ_LEQ(ti->ti_ack, tp->iss) || 5495231Swnj SEQ_GT(ti->ti_ack, tp->snd_max))) 5505085Swnj goto dropwithreset; 5515065Swnj if (tiflags & TH_RST) { 55210394Ssam if (tiflags & TH_ACK) 55310394Ssam tp = tcp_drop(tp, ECONNREFUSED); 5545065Swnj goto drop; 5554601Swnj } 5565065Swnj if ((tiflags & TH_SYN) == 0) 5575065Swnj goto drop; 55830974Skarels if (tiflags & TH_ACK) { 55930974Skarels tp->snd_una = ti->ti_ack; 56030974Skarels if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 56130974Skarels tp->snd_nxt = tp->snd_una; 56230974Skarels } 5635244Sroot tp->t_timer[TCPT_REXMT] = 0; 5645065Swnj tp->irs = ti->ti_seq; 5655085Swnj tcp_rcvseqinit(tp); 5665085Swnj tp->t_flags |= TF_ACKNOW; 56730974Skarels if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { 56830525Skarels tcpstat.tcps_connects++; 5695244Sroot soisconnected(so); 5705065Swnj tp->t_state = TCPS_ESTABLISHED; 571*44375Skarels (void) tcp_reass(tp, (struct tcpiphdr *)0, 572*44375Skarels (struct mbuf *)0); 57332098Skarels /* 57432098Skarels * if we didn't have to retransmit the SYN, 57532098Skarels * use its rtt as our initial srtt & rtt var. 57632098Skarels */ 577*44375Skarels if (tp->t_rtt) 578*44375Skarels tcp_xmit_timer(tp); 5795162Swnj } else 5805085Swnj tp->t_state = TCPS_SYN_RECEIVED; 5815085Swnj 5825085Swnj trimthenstep6: 5835085Swnj /* 5845231Swnj * Advance ti->ti_seq to correspond to first data byte. 5855085Swnj * If data, trim to stay within window, 5865085Swnj * dropping FIN if necessary. 5875085Swnj */ 5885231Swnj ti->ti_seq++; 5895085Swnj if (ti->ti_len > tp->rcv_wnd) { 5905085Swnj todrop = ti->ti_len - tp->rcv_wnd; 5915085Swnj m_adj(m, -todrop); 5925085Swnj ti->ti_len = tp->rcv_wnd; 59325939Skarels tiflags &= ~TH_FIN; 59430525Skarels tcpstat.tcps_rcvpackafterwin++; 59530525Skarels tcpstat.tcps_rcvbyteafterwin += todrop; 5965065Swnj } 5975263Swnj tp->snd_wl1 = ti->ti_seq - 1; 59825939Skarels tp->rcv_up = ti->ti_seq; 5995085Swnj goto step6; 600*44375Skarels #ifdef VAN 601*44375Skarels 602*44375Skarels default: 603*44375Skarels if (om) { 604*44375Skarels tcp_dooptions(tp, om, ti); 605*44375Skarels om = 0; 606*44375Skarels } 607*44375Skarels #endif VAN 6085065Swnj } 6094601Swnj 6105065Swnj /* 61130525Skarels * States other than LISTEN or SYN_SENT. 61230525Skarels * First check that at least some bytes of segment are within 61330525Skarels * receive window. If segment begins before rcv_nxt, 61430525Skarels * drop leading data (and SYN); if nothing left, just ack. 61516222Skarels */ 61630525Skarels todrop = tp->rcv_nxt - ti->ti_seq; 61730525Skarels if (todrop > 0) { 61830525Skarels if (tiflags & TH_SYN) { 61930525Skarels tiflags &= ~TH_SYN; 62030525Skarels ti->ti_seq++; 62130525Skarels if (ti->ti_urp > 1) 62230525Skarels ti->ti_urp--; 62330525Skarels else 62430525Skarels tiflags &= ~TH_URG; 62530525Skarels todrop--; 62630525Skarels } 62730525Skarels if (todrop > ti->ti_len || 62830525Skarels todrop == ti->ti_len && (tiflags&TH_FIN) == 0) { 62933745Skarels tcpstat.tcps_rcvduppack++; 63033745Skarels tcpstat.tcps_rcvdupbyte += ti->ti_len; 63131730Skarels /* 63233745Skarels * If segment is just one to the left of the window, 63333745Skarels * check two special cases: 63433745Skarels * 1. Don't toss RST in response to 4.2-style keepalive. 63533745Skarels * 2. If the only thing to drop is a FIN, we can drop 63633745Skarels * it, but check the ACK or we will get into FIN 63733745Skarels * wars if our FINs crossed (both CLOSING). 63833745Skarels * In either case, send ACK to resynchronize, 63933745Skarels * but keep on processing for RST or ACK. 64031730Skarels */ 64133745Skarels if ((tiflags & TH_FIN && todrop == ti->ti_len + 1) 64233745Skarels #ifdef TCP_COMPAT_42 64333745Skarels || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1) 64431730Skarels #endif 64533745Skarels ) { 64633745Skarels todrop = ti->ti_len; 64733745Skarels tiflags &= ~TH_FIN; 64833745Skarels tp->t_flags |= TF_ACKNOW; 64933745Skarels } else 65033745Skarels goto dropafterack; 65132034Skarels } else { 65232034Skarels tcpstat.tcps_rcvpartduppack++; 65332034Skarels tcpstat.tcps_rcvpartdupbyte += todrop; 65430525Skarels } 65530525Skarels m_adj(m, todrop); 65630525Skarels ti->ti_seq += todrop; 65730525Skarels ti->ti_len -= todrop; 65830525Skarels if (ti->ti_urp > todrop) 65930525Skarels ti->ti_urp -= todrop; 66030525Skarels else { 66130525Skarels tiflags &= ~TH_URG; 66230525Skarels ti->ti_urp = 0; 66330525Skarels } 66416222Skarels } 66516222Skarels 66632612Skarels /* 66733745Skarels * If new data are received on a connection after the 66832612Skarels * user processes are gone, then RST the other end. 66932612Skarels */ 67032612Skarels if ((so->so_state & SS_NOFDREF) && 67132612Skarels tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { 67232612Skarels tp = tcp_close(tp); 67332612Skarels tcpstat.tcps_rcvafterclose++; 67432612Skarels goto dropwithreset; 67532612Skarels } 67632612Skarels 67733445Skarels /* 67833445Skarels * If segment ends after window, drop trailing data 67933445Skarels * (and PUSH and FIN); if nothing left, just ACK. 68033445Skarels */ 68133445Skarels todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); 68233445Skarels if (todrop > 0) { 68333445Skarels tcpstat.tcps_rcvpackafterwin++; 68433445Skarels if (todrop >= ti->ti_len) { 68530525Skarels tcpstat.tcps_rcvbyteafterwin += ti->ti_len; 68633445Skarels /* 68733445Skarels * If a new connection request is received 68833445Skarels * while in TIME_WAIT, drop the old connection 68933445Skarels * and start over if the sequence numbers 69033445Skarels * are above the previous ones. 69133445Skarels */ 69233445Skarels if (tiflags & TH_SYN && 69333445Skarels tp->t_state == TCPS_TIME_WAIT && 69433445Skarels SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { 69533445Skarels iss = tp->rcv_nxt + TCP_ISSINCR; 696*44375Skarels tp = tcp_close(tp); 69733445Skarels goto findpcb; 69833445Skarels } 69933445Skarels /* 70033445Skarels * If window is closed can only take segments at 70133445Skarels * window edge, and have to drop data and PUSH from 70233445Skarels * incoming segments. Continue processing, but 70333445Skarels * remember to ack. Otherwise, drop segment 70433445Skarels * and ack. 70533445Skarels */ 70633445Skarels if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { 70733445Skarels tp->t_flags |= TF_ACKNOW; 70830525Skarels tcpstat.tcps_rcvwinprobe++; 70933445Skarels } else 7105065Swnj goto dropafterack; 71133445Skarels } else 71230525Skarels tcpstat.tcps_rcvbyteafterwin += todrop; 71333445Skarels m_adj(m, -todrop); 71433445Skarels ti->ti_len -= todrop; 71533445Skarels tiflags &= ~(TH_PUSH|TH_FIN); 7165065Swnj } 7174601Swnj 7185065Swnj /* 7195065Swnj * If the RST bit is set examine the state: 7205065Swnj * SYN_RECEIVED STATE: 7215065Swnj * If passive open, return to LISTEN state. 7225065Swnj * If active open, inform user that connection was refused. 7235065Swnj * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: 7245065Swnj * Inform user that connection was reset, and close tcb. 7255065Swnj * CLOSING, LAST_ACK, TIME_WAIT STATES 7265065Swnj * Close the tcb. 7275065Swnj */ 7285065Swnj if (tiflags&TH_RST) switch (tp->t_state) { 7295267Sroot 7305065Swnj case TCPS_SYN_RECEIVED: 73133745Skarels so->so_error = ECONNREFUSED; 73233745Skarels goto close; 7334601Swnj 7345065Swnj case TCPS_ESTABLISHED: 7355065Swnj case TCPS_FIN_WAIT_1: 7365065Swnj case TCPS_FIN_WAIT_2: 7375065Swnj case TCPS_CLOSE_WAIT: 73833745Skarels so->so_error = ECONNRESET; 73933745Skarels close: 74033745Skarels tp->t_state = TCPS_CLOSED; 74133745Skarels tcpstat.tcps_drops++; 74233745Skarels tp = tcp_close(tp); 7435065Swnj goto drop; 7445065Swnj 7455065Swnj case TCPS_CLOSING: 7465065Swnj case TCPS_LAST_ACK: 7475065Swnj case TCPS_TIME_WAIT: 74810394Ssam tp = tcp_close(tp); 7495065Swnj goto drop; 7504601Swnj } 7514601Swnj 7524601Swnj /* 7535065Swnj * If a SYN is in the window, then this is an 7545065Swnj * error and we send an RST and drop the connection. 7554601Swnj */ 7565065Swnj if (tiflags & TH_SYN) { 75710394Ssam tp = tcp_drop(tp, ECONNRESET); 7585085Swnj goto dropwithreset; 7594601Swnj } 7604601Swnj 7614601Swnj /* 7625065Swnj * If the ACK bit is off we drop the segment and return. 7634601Swnj */ 7645085Swnj if ((tiflags & TH_ACK) == 0) 7655065Swnj goto drop; 7665065Swnj 7675065Swnj /* 7685065Swnj * Ack processing. 7695065Swnj */ 7704601Swnj switch (tp->t_state) { 7714601Swnj 7725065Swnj /* 7735065Swnj * In SYN_RECEIVED state if the ack ACKs our SYN then enter 77431721Skarels * ESTABLISHED state and continue processing, otherwise 7755065Swnj * send an RST. 7765065Swnj */ 7775065Swnj case TCPS_SYN_RECEIVED: 7785085Swnj if (SEQ_GT(tp->snd_una, ti->ti_ack) || 7795231Swnj SEQ_GT(ti->ti_ack, tp->snd_max)) 7805085Swnj goto dropwithreset; 78130525Skarels tcpstat.tcps_connects++; 7825085Swnj soisconnected(so); 7835085Swnj tp->t_state = TCPS_ESTABLISHED; 784*44375Skarels (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); 7855244Sroot tp->snd_wl1 = ti->ti_seq - 1; 7865085Swnj /* fall into ... */ 7874601Swnj 7885065Swnj /* 7895065Swnj * In ESTABLISHED state: drop duplicate ACKs; ACK out of range 7905065Swnj * ACKs. If the ack is in the range 7915231Swnj * tp->snd_una < ti->ti_ack <= tp->snd_max 7925065Swnj * then advance tp->snd_una to ti->ti_ack and drop 7935065Swnj * data from the retransmission queue. If this ACK reflects 7945065Swnj * more up to date window information we update our window information. 7955065Swnj */ 7965065Swnj case TCPS_ESTABLISHED: 7975065Swnj case TCPS_FIN_WAIT_1: 7985065Swnj case TCPS_FIN_WAIT_2: 7995065Swnj case TCPS_CLOSE_WAIT: 8005065Swnj case TCPS_CLOSING: 8015244Sroot case TCPS_LAST_ACK: 8025244Sroot case TCPS_TIME_WAIT: 8035085Swnj 80430525Skarels if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { 80532098Skarels if (ti->ti_len == 0 && ti->ti_win == tp->snd_wnd) { 80630525Skarels tcpstat.tcps_rcvdupack++; 80732098Skarels /* 808*44375Skarels * If we have outstanding data (other than 809*44375Skarels * a window probe), this is a completely 81032098Skarels * duplicate ack (ie, window info didn't 81132098Skarels * change), the ack is the biggest we've 81232098Skarels * seen and we've seen exactly our rexmt 81332098Skarels * threshhold of them, assume a packet 81432098Skarels * has been dropped and retransmit it. 81532098Skarels * Kludge snd_nxt & the congestion 81632098Skarels * window so we send only this one 817*44375Skarels * packet. 818*44375Skarels * 819*44375Skarels * We know we're losing at the current 820*44375Skarels * window size so do congestion avoidance 821*44375Skarels * (set ssthresh to half the current window 822*44375Skarels * and pull our congestion window back to 823*44375Skarels * the new ssthresh). 824*44375Skarels * 825*44375Skarels * Dup acks mean that packets have left the 826*44375Skarels * network (they're now cached at the receiver) 827*44375Skarels * so bump cwnd by the amount in the receiver 828*44375Skarels * to keep a constant cwnd packets in the 829*44375Skarels * network. 83032098Skarels */ 83132098Skarels if (tp->t_timer[TCPT_REXMT] == 0 || 83232098Skarels ti->ti_ack != tp->snd_una) 83332098Skarels tp->t_dupacks = 0; 83432098Skarels else if (++tp->t_dupacks == tcprexmtthresh) { 83532098Skarels tcp_seq onxt = tp->snd_nxt; 83632376Skarels u_int win = 83737320Skarels min(tp->snd_wnd, tp->snd_cwnd) / 2 / 83832376Skarels tp->t_maxseg; 83932098Skarels 84032376Skarels if (win < 2) 84132376Skarels win = 2; 84232376Skarels tp->snd_ssthresh = win * tp->t_maxseg; 84332098Skarels tp->t_timer[TCPT_REXMT] = 0; 84432098Skarels tp->t_rtt = 0; 84532098Skarels tp->snd_nxt = ti->ti_ack; 84632098Skarels tp->snd_cwnd = tp->t_maxseg; 84732098Skarels (void) tcp_output(tp); 848*44375Skarels tp->snd_cwnd = tp->snd_ssthresh + 849*44375Skarels tp->t_maxseg * tp->t_dupacks; 85032098Skarels if (SEQ_GT(onxt, tp->snd_nxt)) 85132098Skarels tp->snd_nxt = onxt; 85232098Skarels goto drop; 853*44375Skarels } else if (tp->t_dupacks > tcprexmtthresh) { 854*44375Skarels tp->snd_cwnd += tp->t_maxseg; 855*44375Skarels (void) tcp_output(tp); 856*44375Skarels goto drop; 85732098Skarels } 85832098Skarels } else 85932098Skarels tp->t_dupacks = 0; 8605065Swnj break; 86130525Skarels } 862*44375Skarels /* 863*44375Skarels * If the congestion window was inflated to account 864*44375Skarels * for the other side's cached packets, retract it. 865*44375Skarels */ 866*44375Skarels if (tp->t_dupacks > tcprexmtthresh && 867*44375Skarels tp->snd_cwnd > tp->snd_ssthresh) 868*44375Skarels tp->snd_cwnd = tp->snd_ssthresh; 86932098Skarels tp->t_dupacks = 0; 87030525Skarels if (SEQ_GT(ti->ti_ack, tp->snd_max)) { 87130525Skarels tcpstat.tcps_rcvacktoomuch++; 8725065Swnj goto dropafterack; 87330525Skarels } 8745085Swnj acked = ti->ti_ack - tp->snd_una; 87530525Skarels tcpstat.tcps_rcvackpack++; 87630525Skarels tcpstat.tcps_rcvackbyte += acked; 8775951Swnj 8785951Swnj /* 8795951Swnj * If transmit timer is running and timed sequence 8805951Swnj * number was acked, update smoothed round trip time. 88132034Skarels * Since we now have an rtt measurement, cancel the 88232034Skarels * timer backoff (cf., Phil Karn's retransmit alg.). 88332034Skarels * Recompute the initial retransmit timer. 8845951Swnj */ 885*44375Skarels if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) 886*44375Skarels tcp_xmit_timer(tp); 88731726Skarels 88826824Skarels /* 88926824Skarels * If all outstanding data is acked, stop retransmit 89026824Skarels * timer and remember to restart (more output or persist). 89126824Skarels * If there is more data to be acked, restart retransmit 89232034Skarels * timer, using current (possibly backed-off) value. 89326824Skarels */ 89426824Skarels if (ti->ti_ack == tp->snd_max) { 8955244Sroot tp->t_timer[TCPT_REXMT] = 0; 89626824Skarels needoutput = 1; 89732034Skarels } else if (tp->t_timer[TCPT_PERSIST] == 0) 89832034Skarels tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; 89917360Skarels /* 90032098Skarels * When new data is acked, open the congestion window. 90132098Skarels * If the window gives us less than ssthresh packets 90232098Skarels * in flight, open exponentially (maxseg per packet). 903*44375Skarels * Otherwise open linearly: maxseg per window 904*44375Skarels * (maxseg^2 / cwnd per packet), plus a constant 905*44375Skarels * fraction of a packet (maxseg/8) to help larger windows 906*44375Skarels * open quickly enough. 90717360Skarels */ 90832098Skarels { 909*44375Skarels register u_int cw = tp->snd_cwnd; 910*44375Skarels register u_int incr = tp->t_maxseg; 91132098Skarels 912*44375Skarels if (cw > tp->snd_ssthresh) 913*44375Skarels incr = incr * incr / cw + incr / 8; 914*44375Skarels tp->snd_cwnd = min(cw + incr, TCP_MAXWIN); 91532098Skarels } 9165307Sroot if (acked > so->so_snd.sb_cc) { 91715386Ssam tp->snd_wnd -= so->so_snd.sb_cc; 91826386Skarels sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); 91931721Skarels ourfinisacked = 1; 9205307Sroot } else { 9216161Ssam sbdrop(&so->so_snd, acked); 9225307Sroot tp->snd_wnd -= acked; 92331721Skarels ourfinisacked = 0; 9245307Sroot } 925*44375Skarels if (so->so_snd.sb_flags & SB_NOTIFY) 926*44375Skarels sowwakeup(so); 9275231Swnj tp->snd_una = ti->ti_ack; 9285357Sroot if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 9295357Sroot tp->snd_nxt = tp->snd_una; 9305162Swnj 9314601Swnj switch (tp->t_state) { 9324601Swnj 9335065Swnj /* 9345065Swnj * In FIN_WAIT_1 STATE in addition to the processing 9355065Swnj * for the ESTABLISHED state if our FIN is now acknowledged 9365085Swnj * then enter FIN_WAIT_2. 9375065Swnj */ 9385065Swnj case TCPS_FIN_WAIT_1: 9395896Swnj if (ourfinisacked) { 9405896Swnj /* 9415896Swnj * If we can't receive any more 9425896Swnj * data, then closing user can proceed. 94324816Skarels * Starting the timer is contrary to the 94424816Skarels * specification, but if we don't get a FIN 94524816Skarels * we'll hang forever. 9465896Swnj */ 94724816Skarels if (so->so_state & SS_CANTRCVMORE) { 9485896Swnj soisdisconnected(so); 94933745Skarels tp->t_timer[TCPT_2MSL] = tcp_maxidle; 95024816Skarels } 9515085Swnj tp->t_state = TCPS_FIN_WAIT_2; 9525896Swnj } 9534601Swnj break; 9544601Swnj 9555065Swnj /* 9565065Swnj * In CLOSING STATE in addition to the processing for 9575065Swnj * the ESTABLISHED state if the ACK acknowledges our FIN 9585065Swnj * then enter the TIME-WAIT state, otherwise ignore 9595065Swnj * the segment. 9605065Swnj */ 9615065Swnj case TCPS_CLOSING: 9625244Sroot if (ourfinisacked) { 9635065Swnj tp->t_state = TCPS_TIME_WAIT; 9645244Sroot tcp_canceltimers(tp); 9655244Sroot tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 9665244Sroot soisdisconnected(so); 9675244Sroot } 9685244Sroot break; 9694601Swnj 9705065Swnj /* 97131743Skarels * In LAST_ACK, we may still be waiting for data to drain 97231743Skarels * and/or to be acked, as well as for the ack of our FIN. 97331743Skarels * If our FIN is now acknowledged, delete the TCB, 97431743Skarels * enter the closed state and return. 9755065Swnj */ 9765065Swnj case TCPS_LAST_ACK: 97731743Skarels if (ourfinisacked) { 97810394Ssam tp = tcp_close(tp); 97931743Skarels goto drop; 98031743Skarels } 98131743Skarels break; 9824601Swnj 9835065Swnj /* 9845065Swnj * In TIME_WAIT state the only thing that should arrive 9855065Swnj * is a retransmission of the remote FIN. Acknowledge 9865065Swnj * it and restart the finack timer. 9875065Swnj */ 9885065Swnj case TCPS_TIME_WAIT: 9895162Swnj tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 9905065Swnj goto dropafterack; 9914601Swnj } 9925085Swnj } 9934601Swnj 9945065Swnj step6: 9955065Swnj /* 9965244Sroot * Update window information. 99725939Skarels * Don't look at window if no ACK: TAC's send garbage on first SYN. 9985244Sroot */ 99925939Skarels if ((tiflags & TH_ACK) && 100025939Skarels (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq && 10015391Swnj (SEQ_LT(tp->snd_wl2, ti->ti_ack) || 100225939Skarels tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd))) { 100330525Skarels /* keep track of pure window updates */ 100430525Skarels if (ti->ti_len == 0 && 100532098Skarels tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd) 100630525Skarels tcpstat.tcps_rcvwinupd++; 10075244Sroot tp->snd_wnd = ti->ti_win; 10085244Sroot tp->snd_wl1 = ti->ti_seq; 10095244Sroot tp->snd_wl2 = ti->ti_ack; 101025260Skarels if (tp->snd_wnd > tp->max_sndwnd) 101125260Skarels tp->max_sndwnd = tp->snd_wnd; 101226824Skarels needoutput = 1; 101326824Skarels } 10145244Sroot 10155244Sroot /* 10165547Swnj * Process segments with URG. 10175065Swnj */ 10187267Swnj if ((tiflags & TH_URG) && ti->ti_urp && 10197267Swnj TCPS_HAVERCVDFIN(tp->t_state) == 0) { 10205547Swnj /* 102125939Skarels * This is a kludge, but if we receive and accept 102213121Ssam * random urgent pointers, we'll crash in 102313121Ssam * soreceive. It's hard to imagine someone 102413121Ssam * actually wanting to send this much urgent data. 102512441Ssam */ 102625652Skarels if (ti->ti_urp + so->so_rcv.sb_cc > SB_MAX) { 102712441Ssam ti->ti_urp = 0; /* XXX */ 102812441Ssam tiflags &= ~TH_URG; /* XXX */ 102925939Skarels goto dodata; /* XXX */ 103012441Ssam } 103112441Ssam /* 10325547Swnj * If this segment advances the known urgent pointer, 10335547Swnj * then mark the data stream. This should not happen 10345547Swnj * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since 10355547Swnj * a FIN has been received from the remote side. 10365547Swnj * In these states we ignore the URG. 103727190Skarels * 103827190Skarels * According to RFC961 (Assigned Protocols), 103927190Skarels * the urgent pointer points to the last octet 104027190Skarels * of urgent data. We continue, however, 104127190Skarels * to consider it to indicate the first octet 1042*44375Skarels * of data past the urgent section as the original 1043*44375Skarels * spec states (in one of two places). 10445547Swnj */ 10455547Swnj if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { 10465547Swnj tp->rcv_up = ti->ti_seq + ti->ti_urp; 10475547Swnj so->so_oobmark = so->so_rcv.sb_cc + 10485547Swnj (tp->rcv_up - tp->rcv_nxt) - 1; 10495547Swnj if (so->so_oobmark == 0) 10505547Swnj so->so_state |= SS_RCVATMARK; 10518313Sroot sohasoutofband(so); 105224816Skarels tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); 10535440Swnj } 10545547Swnj /* 10555547Swnj * Remove out of band data so doesn't get presented to user. 10565547Swnj * This can happen independent of advancing the URG pointer, 10575547Swnj * but if two URG's are pending at once, some out-of-band 10585547Swnj * data may creep in... ick. 10595547Swnj */ 1060*44375Skarels if (ti->ti_urp <= ti->ti_len 1061*44375Skarels #ifdef SO_OOBINLINE 1062*44375Skarels && (so->so_options & SO_OOBINLINE) == 0 1063*44375Skarels #endif 1064*44375Skarels ) 1065*44375Skarels tcp_pulloutofband(so, ti, m); 106625939Skarels } else 106725939Skarels /* 106825939Skarels * If no out of band data is expected, 106925939Skarels * pull receive urgent pointer along 107025939Skarels * with the receive window. 107125939Skarels */ 107225939Skarels if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) 107325939Skarels tp->rcv_up = tp->rcv_nxt; 107425939Skarels dodata: /* XXX */ 10754601Swnj 10764601Swnj /* 10775065Swnj * Process the segment text, merging it into the TCP sequencing queue, 10785065Swnj * and arranging for acknowledgment of receipt if necessary. 10795065Swnj * This process logically involves adjusting tp->rcv_wnd as data 10805065Swnj * is presented to the user (this happens in tcp_usrreq.c, 10815065Swnj * case PRU_RCVD). If a FIN has already been received on this 10825065Swnj * connection then we just ignore the text. 10834601Swnj */ 108417946Skarels if ((ti->ti_len || (tiflags&TH_FIN)) && 108517946Skarels TCPS_HAVERCVDFIN(tp->t_state) == 0) { 108624816Skarels TCP_REASS(tp, ti, m, so, tiflags); 108725260Skarels /* 108825260Skarels * Note the amount of data that peer has sent into 108925260Skarels * our window, in order to estimate the sender's 109025260Skarels * buffer size. 109125260Skarels */ 109232098Skarels len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); 10935244Sroot } else { 10944924Swnj m_freem(m); 10955263Swnj tiflags &= ~TH_FIN; 10965244Sroot } 10974601Swnj 10984601Swnj /* 10995263Swnj * If FIN is received ACK the FIN and let the user know 11005263Swnj * that the connection is closing. 11014601Swnj */ 11025263Swnj if (tiflags & TH_FIN) { 11035244Sroot if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 11045244Sroot socantrcvmore(so); 11055244Sroot tp->t_flags |= TF_ACKNOW; 11065244Sroot tp->rcv_nxt++; 11075244Sroot } 11085065Swnj switch (tp->t_state) { 11094601Swnj 11105065Swnj /* 11115065Swnj * In SYN_RECEIVED and ESTABLISHED STATES 11125065Swnj * enter the CLOSE_WAIT state. 11134884Swnj */ 11145065Swnj case TCPS_SYN_RECEIVED: 11155065Swnj case TCPS_ESTABLISHED: 11165065Swnj tp->t_state = TCPS_CLOSE_WAIT; 11175065Swnj break; 11184884Swnj 11195065Swnj /* 11205085Swnj * If still in FIN_WAIT_1 STATE FIN has not been acked so 11215085Swnj * enter the CLOSING state. 11224884Swnj */ 11235065Swnj case TCPS_FIN_WAIT_1: 11245085Swnj tp->t_state = TCPS_CLOSING; 11255065Swnj break; 11264601Swnj 11275065Swnj /* 11285065Swnj * In FIN_WAIT_2 state enter the TIME_WAIT state, 11295065Swnj * starting the time-wait timer, turning off the other 11305065Swnj * standard timers. 11315065Swnj */ 11325065Swnj case TCPS_FIN_WAIT_2: 11335244Sroot tp->t_state = TCPS_TIME_WAIT; 11345074Swnj tcp_canceltimers(tp); 11355162Swnj tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 11365244Sroot soisdisconnected(so); 11375065Swnj break; 11385065Swnj 11394884Swnj /* 11405065Swnj * In TIME_WAIT state restart the 2 MSL time_wait timer. 11414884Swnj */ 11425065Swnj case TCPS_TIME_WAIT: 11435162Swnj tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 11445065Swnj break; 11455085Swnj } 11464601Swnj } 11475267Sroot if (so->so_options & SO_DEBUG) 11485267Sroot tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0); 11495085Swnj 11505085Swnj /* 11515085Swnj * Return any desired output. 11525085Swnj */ 115326824Skarels if (needoutput || (tp->t_flags & TF_ACKNOW)) 115425939Skarels (void) tcp_output(tp); 11555065Swnj return; 11565085Swnj 11575065Swnj dropafterack: 11585085Swnj /* 11596211Swnj * Generate an ACK dropping incoming segment if it occupies 11606211Swnj * sequence space, where the ACK reflects our state. 11615085Swnj */ 116226057Skarels if (tiflags & TH_RST) 11635085Swnj goto drop; 116431749Skarels m_freem(m); 116531721Skarels tp->t_flags |= TF_ACKNOW; 116631721Skarels (void) tcp_output(tp); 11675231Swnj return; 11685085Swnj 11695085Swnj dropwithreset: 117011731Ssam if (om) { 11716161Ssam (void) m_free(om); 117211731Ssam om = 0; 117311731Ssam } 11745085Swnj /* 11755244Sroot * Generate a RST, dropping incoming segment. 11765085Swnj * Make ACK acceptable to originator of segment. 117725197Skarels * Don't bother to respond if destination was broadcast. 11785085Swnj */ 117937320Skarels if ((tiflags & TH_RST) || m->m_flags & M_BCAST) 11805085Swnj goto drop; 11815085Swnj if (tiflags & TH_ACK) 118237320Skarels tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); 11835085Swnj else { 11845085Swnj if (tiflags & TH_SYN) 11855085Swnj ti->ti_len++; 118637320Skarels tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, 11876211Swnj TH_RST|TH_ACK); 11885085Swnj } 118910769Ssam /* destroy temporarily created socket */ 119010769Ssam if (dropsocket) 119110769Ssam (void) soabort(so); 11925231Swnj return; 11935085Swnj 11945065Swnj drop: 119511730Ssam if (om) 119611730Ssam (void) m_free(om); 11975085Swnj /* 11985085Swnj * Drop space held by incoming segment and return. 11995085Swnj */ 12006303Sroot if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 12016303Sroot tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); 12025065Swnj m_freem(m); 120310769Ssam /* destroy temporarily created socket */ 120410769Ssam if (dropsocket) 120510769Ssam (void) soabort(so); 12065267Sroot return; 12075065Swnj } 12085065Swnj 120917272Skarels tcp_dooptions(tp, om, ti) 12105440Swnj struct tcpcb *tp; 12115440Swnj struct mbuf *om; 121217272Skarels struct tcpiphdr *ti; 12135419Swnj { 12145440Swnj register u_char *cp; 1215*44375Skarels u_short mss; 12165440Swnj int opt, optlen, cnt; 12175419Swnj 12185440Swnj cp = mtod(om, u_char *); 12195440Swnj cnt = om->m_len; 12205440Swnj for (; cnt > 0; cnt -= optlen, cp += optlen) { 12215440Swnj opt = cp[0]; 12225440Swnj if (opt == TCPOPT_EOL) 12235440Swnj break; 12245440Swnj if (opt == TCPOPT_NOP) 12255440Swnj optlen = 1; 122612169Ssam else { 12275440Swnj optlen = cp[1]; 122812169Ssam if (optlen <= 0) 122912169Ssam break; 123012169Ssam } 12315440Swnj switch (opt) { 12325440Swnj 12335440Swnj default: 1234*44375Skarels continue; 12355440Swnj 12365440Swnj case TCPOPT_MAXSEG: 12375440Swnj if (optlen != 4) 12385440Swnj continue; 123917272Skarels if (!(ti->ti_flags & TH_SYN)) 124017272Skarels continue; 1241*44375Skarels bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); 1242*44375Skarels NTOHS(mss); 1243*44375Skarels (void) tcp_mss(tp, mss); /* sets t_maxseg */ 12445440Swnj break; 12455419Swnj } 12465419Swnj } 12476161Ssam (void) m_free(om); 12485419Swnj } 12495419Swnj 12505419Swnj /* 12515547Swnj * Pull out of band byte out of a segment so 12525547Swnj * it doesn't appear in the user's data queue. 12535547Swnj * It is still reflected in the segment length for 12545547Swnj * sequencing purposes. 12555547Swnj */ 1256*44375Skarels tcp_pulloutofband(so, ti, m) 12575547Swnj struct socket *so; 12585547Swnj struct tcpiphdr *ti; 1259*44375Skarels register struct mbuf *m; 12605547Swnj { 12616116Swnj int cnt = ti->ti_urp - 1; 12625547Swnj 12635547Swnj while (cnt >= 0) { 12645547Swnj if (m->m_len > cnt) { 12655547Swnj char *cp = mtod(m, caddr_t) + cnt; 12665547Swnj struct tcpcb *tp = sototcpcb(so); 12675547Swnj 12685547Swnj tp->t_iobc = *cp; 12695547Swnj tp->t_oobflags |= TCPOOB_HAVEDATA; 12706161Ssam bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); 12715547Swnj m->m_len--; 12725547Swnj return; 12735547Swnj } 12745547Swnj cnt -= m->m_len; 12755547Swnj m = m->m_next; 12765547Swnj if (m == 0) 12775547Swnj break; 12785547Swnj } 12795547Swnj panic("tcp_pulloutofband"); 12805547Swnj } 12815547Swnj 12825547Swnj /* 1283*44375Skarels * Collect new round-trip time estimate 1284*44375Skarels * and update averages and current timeout. 128517272Skarels */ 1286*44375Skarels tcp_xmit_timer(tp) 128723975Skarels register struct tcpcb *tp; 128817272Skarels { 1289*44375Skarels register short delta; 1290*44375Skarels 1291*44375Skarels tcpstat.tcps_rttupdated++; 1292*44375Skarels if (tp->t_srtt != 0) { 1293*44375Skarels /* 1294*44375Skarels * srtt is stored as fixed point with 3 bits after the 1295*44375Skarels * binary point (i.e., scaled by 8). The following magic 1296*44375Skarels * is equivalent to the smoothing algorithm in rfc793 with 1297*44375Skarels * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed 1298*44375Skarels * point). Adjust t_rtt to origin 0. 1299*44375Skarels */ 1300*44375Skarels delta = tp->t_rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); 1301*44375Skarels if ((tp->t_srtt += delta) <= 0) 1302*44375Skarels tp->t_srtt = 1; 1303*44375Skarels /* 1304*44375Skarels * We accumulate a smoothed rtt variance (actually, a 1305*44375Skarels * smoothed mean difference), then set the retransmit 1306*44375Skarels * timer to smoothed rtt + 4 times the smoothed variance. 1307*44375Skarels * rttvar is stored as fixed point with 2 bits after the 1308*44375Skarels * binary point (scaled by 4). The following is 1309*44375Skarels * equivalent to rfc793 smoothing with an alpha of .75 1310*44375Skarels * (rttvar = rttvar*3/4 + |delta| / 4). This replaces 1311*44375Skarels * rfc793's wired-in beta. 1312*44375Skarels */ 1313*44375Skarels if (delta < 0) 1314*44375Skarels delta = -delta; 1315*44375Skarels delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); 1316*44375Skarels if ((tp->t_rttvar += delta) <= 0) 1317*44375Skarels tp->t_rttvar = 1; 1318*44375Skarels } else { 1319*44375Skarels /* 1320*44375Skarels * No rtt measurement yet - use the unsmoothed rtt. 1321*44375Skarels * Set the variance to half the rtt (so our first 1322*44375Skarels * retransmit happens at 2*rtt) 1323*44375Skarels */ 1324*44375Skarels tp->t_srtt = tp->t_rtt << TCP_RTT_SHIFT; 1325*44375Skarels tp->t_rttvar = tp->t_rtt << (TCP_RTTVAR_SHIFT - 1); 1326*44375Skarels } 1327*44375Skarels tp->t_rtt = 0; 1328*44375Skarels tp->t_rxtshift = 0; 1329*44375Skarels 1330*44375Skarels /* 1331*44375Skarels * the retransmit should happen at rtt + 4 * rttvar. 1332*44375Skarels * Because of the way we do the smoothing, srtt and rttvar 1333*44375Skarels * will each average +1/2 tick of bias. When we compute 1334*44375Skarels * the retransmit timer, we want 1/2 tick of rounding and 1335*44375Skarels * 1 extra tick because of +-1/2 tick uncertainty in the 1336*44375Skarels * firing of the timer. The bias will give us exactly the 1337*44375Skarels * 1.5 tick we need. But, because the bias is 1338*44375Skarels * statistical, we have to test that we don't drop below 1339*44375Skarels * the minimum feasible timer (which is 2 ticks). 1340*44375Skarels */ 1341*44375Skarels TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), 1342*44375Skarels tp->t_rttmin, TCPTV_REXMTMAX); 1343*44375Skarels 1344*44375Skarels /* 1345*44375Skarels * We received an ack for a packet that wasn't retransmitted; 1346*44375Skarels * it is probably safe to discard any error indications we've 1347*44375Skarels * received recently. This isn't quite right, but close enough 1348*44375Skarels * for now (a route might have failed after we sent a segment, 1349*44375Skarels * and the return path might not be symmetrical). 1350*44375Skarels */ 1351*44375Skarels tp->t_softerror = 0; 1352*44375Skarels } 1353*44375Skarels 1354*44375Skarels /* 1355*44375Skarels * Determine a reasonable value for maxseg size. 1356*44375Skarels * If the route is known, check route for mtu. 1357*44375Skarels * If none, use an mss that can be handled on the outgoing 1358*44375Skarels * interface without forcing IP to fragment; if bigger than 1359*44375Skarels * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES 1360*44375Skarels * to utilize large mbufs. If no route is found, route has no mtu, 1361*44375Skarels * or the destination isn't local, use a default, hopefully conservative 1362*44375Skarels * size (usually 512 or the default IP max size, but no more than the mtu 1363*44375Skarels * of the interface), as we can't discover anything about intervening 1364*44375Skarels * gateways or networks. We also initialize the congestion/slow start 1365*44375Skarels * window to be a single segment if the destination isn't local. 1366*44375Skarels * While looking at the routing entry, we also initialize other path-dependent 1367*44375Skarels * parameters from pre-set or cached values in the routing entry. 1368*44375Skarels */ 1369*44375Skarels 1370*44375Skarels tcp_mss(tp, offer) 1371*44375Skarels register struct tcpcb *tp; 1372*44375Skarels u_short offer; 1373*44375Skarels { 137417272Skarels struct route *ro; 1375*44375Skarels register struct rtentry *rt; 137617272Skarels struct ifnet *ifp; 1377*44375Skarels register int rtt, mss; 1378*44375Skarels u_long bufsize; 137917272Skarels struct inpcb *inp; 1380*44375Skarels struct socket *so; 1381*44375Skarels extern int tcp_mssdflt, tcp_rttdflt; 138217272Skarels 138317272Skarels inp = tp->t_inpcb; 138417272Skarels ro = &inp->inp_route; 1385*44375Skarels 1386*44375Skarels if ((rt = ro->ro_rt) == (struct rtentry *)0) { 138717272Skarels /* No route yet, so try to acquire one */ 138817272Skarels if (inp->inp_faddr.s_addr != INADDR_ANY) { 138917272Skarels ro->ro_dst.sa_family = AF_INET; 139037320Skarels ro->ro_dst.sa_len = sizeof(ro->ro_dst); 139117272Skarels ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 139217272Skarels inp->inp_faddr; 139317272Skarels rtalloc(ro); 139417272Skarels } 1395*44375Skarels if ((rt = ro->ro_rt) == (struct rtentry *)0) 1396*44375Skarels return (tcp_mssdflt); 139717272Skarels } 1398*44375Skarels ifp = rt->rt_ifp; 1399*44375Skarels so = inp->inp_socket; 140017272Skarels 1401*44375Skarels #ifdef RTV_MTU /* if route characteristics exist ... */ 1402*44375Skarels /* 1403*44375Skarels * While we're here, check if there's an initial rtt 1404*44375Skarels * or rttvar. Convert from the route-table units 1405*44375Skarels * to scaled multiples of the slow timeout timer. 1406*44375Skarels */ 1407*44375Skarels if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { 1408*44375Skarels if (rt->rt_rmx.rmx_locks & RTV_MTU) 1409*44375Skarels tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ); 1410*44375Skarels tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); 1411*44375Skarels if (rt->rt_rmx.rmx_rttvar) 1412*44375Skarels tp->t_rttvar = rt->rt_rmx.rmx_rttvar / 1413*44375Skarels (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); 1414*44375Skarels else 1415*44375Skarels /* default variation is +- 1 rtt */ 1416*44375Skarels tp->t_rttvar = 1417*44375Skarels tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; 1418*44375Skarels TCPT_RANGESET(tp->t_rxtcur, 1419*44375Skarels ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, 1420*44375Skarels tp->t_rttmin, TCPTV_REXMTMAX); 1421*44375Skarels } 1422*44375Skarels /* 1423*44375Skarels * if there's an mtu associated with the route, use it 1424*44375Skarels */ 1425*44375Skarels if (rt->rt_rmx.rmx_mtu) 1426*44375Skarels mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); 1427*44375Skarels else 1428*44375Skarels #endif /* RTV_MTU */ 1429*44375Skarels { 1430*44375Skarels mss = ifp->if_mtu - sizeof(struct tcpiphdr); 143131726Skarels #if (MCLBYTES & (MCLBYTES - 1)) == 0 1432*44375Skarels if (mss > MCLBYTES) 1433*44375Skarels mss &= ~(MCLBYTES-1); 143417272Skarels #else 1435*44375Skarels if (mss > MCLBYTES) 1436*44375Skarels mss = mss / MCLBYTES * MCLBYTES; 143717272Skarels #endif 1438*44375Skarels if (!in_localaddr(inp->inp_faddr)) 1439*44375Skarels mss = min(mss, tcp_mssdflt); 1440*44375Skarels } 1441*44375Skarels /* 1442*44375Skarels * The current mss, t_maxseg, is initialized to the default value. 1443*44375Skarels * If we compute a smaller value, reduce the current mss. 1444*44375Skarels * If we compute a larger value, return it for use in sending 1445*44375Skarels * a max seg size option, but don't store it for use 1446*44375Skarels * unless we received an offer at least that large from peer. 1447*44375Skarels * However, do not accept offers under 32 bytes. 1448*44375Skarels */ 1449*44375Skarels if (offer) 1450*44375Skarels mss = min(mss, offer); 1451*44375Skarels mss = max(mss, 32); /* sanity */ 1452*44375Skarels if (mss < tp->t_maxseg || offer != 0) { 1453*44375Skarels /* 1454*44375Skarels * If there's a pipesize, change the socket buffer 1455*44375Skarels * to that size. Make the socket buffers an integral 1456*44375Skarels * number of mss units; if the mss is larger than 1457*44375Skarels * the socket buffer, decrease the mss. 1458*44375Skarels */ 1459*44375Skarels #ifdef RTV_SPIPE 1460*44375Skarels if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) 1461*44375Skarels #endif 1462*44375Skarels bufsize = so->so_snd.sb_hiwat; 1463*44375Skarels if (bufsize < mss) 1464*44375Skarels mss = bufsize; 1465*44375Skarels else { 1466*44375Skarels bufsize = min(bufsize, SB_MAX) / mss * mss; 1467*44375Skarels (void) sbreserve(&so->so_snd, bufsize); 1468*44375Skarels } 1469*44375Skarels tp->t_maxseg = mss; 147032098Skarels 1471*44375Skarels #ifdef RTV_RPIPE 1472*44375Skarels if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) 1473*44375Skarels #endif 1474*44375Skarels bufsize = so->so_rcv.sb_hiwat; 1475*44375Skarels if (bufsize > mss) { 1476*44375Skarels bufsize = min(bufsize, SB_MAX) / mss * mss; 1477*44375Skarels (void) sbreserve(&so->so_rcv, bufsize); 1478*44375Skarels } 1479*44375Skarels } 148032034Skarels tp->snd_cwnd = mss; 1481*44375Skarels 1482*44375Skarels #ifdef RTV_SSTHRESH 1483*44375Skarels if (rt->rt_rmx.rmx_ssthresh) { 1484*44375Skarels /* 1485*44375Skarels * There's some sort of gateway or interface 1486*44375Skarels * buffer limit on the path. Use this to set 1487*44375Skarels * the slow start threshhold, but set the 1488*44375Skarels * threshold to no less than 2*mss. 1489*44375Skarels */ 1490*44375Skarels tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); 1491*44375Skarels } 1492*44375Skarels #endif /* RTV_MTU */ 149332034Skarels return (mss); 149417272Skarels } 1495