1*17360Skarels /* tcp_input.c 6.8 84/11/14 */ 24601Swnj 317062Sbloom #include "param.h" 417062Sbloom #include "systm.h" 517062Sbloom #include "mbuf.h" 617062Sbloom #include "protosw.h" 717062Sbloom #include "socket.h" 817062Sbloom #include "socketvar.h" 917062Sbloom #include "errno.h" 1010894Ssam 1110894Ssam #include "../net/if.h" 1210894Ssam #include "../net/route.h" 1310894Ssam 1417062Sbloom #include "in.h" 1517062Sbloom #include "in_pcb.h" 1617062Sbloom #include "in_systm.h" 1717062Sbloom #include "ip.h" 1817062Sbloom #include "ip_var.h" 1917062Sbloom #include "tcp.h" 2017062Sbloom #include "tcp_fsm.h" 2117062Sbloom #include "tcp_seq.h" 2217062Sbloom #include "tcp_timer.h" 2317062Sbloom #include "tcp_var.h" 2417062Sbloom #include "tcpip.h" 2517062Sbloom #include "tcp_debug.h" 264601Swnj 275300Sroot int tcpprintfs = 0; 284679Swnj int tcpcksum = 1; 295267Sroot struct tcpiphdr tcp_saveti; 305440Swnj extern tcpnodelack; 314601Swnj 325267Sroot struct tcpcb *tcp_newtcpcb(); 335065Swnj /* 345065Swnj * TCP input routine, follows pages 65-76 of the 355065Swnj * protocol specification dated September, 1981 very closely. 365065Swnj */ 374924Swnj tcp_input(m0) 384924Swnj struct mbuf *m0; 394601Swnj { 404924Swnj register struct tcpiphdr *ti; 414924Swnj struct inpcb *inp; 424924Swnj register struct mbuf *m; 435440Swnj struct mbuf *om = 0; 444924Swnj int len, tlen, off; 455391Swnj register struct tcpcb *tp = 0; 464924Swnj register int tiflags; 474803Swnj struct socket *so; 485109Swnj int todrop, acked; 495267Sroot short ostate; 506028Sroot struct in_addr laddr; 5110769Ssam int dropsocket = 0; 524924Swnj 534924Swnj /* 545244Sroot * Get IP and TCP header together in first mbuf. 555244Sroot * Note: IP leaves IP header in first mbuf. 564924Swnj */ 574924Swnj m = m0; 585020Sroot ti = mtod(m, struct tcpiphdr *); 595244Sroot if (((struct ip *)ti)->ip_hl > (sizeof (struct ip) >> 2)) 605208Swnj ip_stripoptions((struct ip *)ti, (struct mbuf *)0); 615307Sroot if (m->m_off > MMAXOFF || m->m_len < sizeof (struct tcpiphdr)) { 625307Sroot if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { 635085Swnj tcpstat.tcps_hdrops++; 645307Sroot return; 655085Swnj } 665085Swnj ti = mtod(m, struct tcpiphdr *); 675085Swnj } 684601Swnj 694601Swnj /* 705244Sroot * Checksum extended TCP header and data. 714601Swnj */ 724924Swnj tlen = ((struct ip *)ti)->ip_len; 734924Swnj len = sizeof (struct ip) + tlen; 744679Swnj if (tcpcksum) { 754924Swnj ti->ti_next = ti->ti_prev = 0; 764924Swnj ti->ti_x1 = 0; 775223Swnj ti->ti_len = (u_short)tlen; 786161Ssam ti->ti_len = htons((u_short)ti->ti_len); 795231Swnj if (ti->ti_sum = in_cksum(m, len)) { 8011830Ssam if (tcpprintfs) 8111830Ssam printf("tcp sum: src %x\n", ti->ti_src); 824924Swnj tcpstat.tcps_badsum++; 835085Swnj goto drop; 844601Swnj } 854601Swnj } 864601Swnj 874601Swnj /* 885244Sroot * Check that TCP offset makes sense, 895440Swnj * pull out TCP options and adjust length. 904601Swnj */ 914924Swnj off = ti->ti_off << 2; 925231Swnj if (off < sizeof (struct tcphdr) || off > tlen) { 9311830Ssam if (tcpprintfs) 9411830Ssam printf("tcp off: src %x off %d\n", ti->ti_src, off); 954924Swnj tcpstat.tcps_badoff++; 965085Swnj goto drop; 974924Swnj } 986211Swnj tlen -= off; 996211Swnj ti->ti_len = tlen; 1005440Swnj if (off > sizeof (struct tcphdr)) { 1015440Swnj if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { 1025440Swnj tcpstat.tcps_hdrops++; 10311730Ssam return; 1045440Swnj } 1055440Swnj ti = mtod(m, struct tcpiphdr *); 1069642Ssam om = m_get(M_DONTWAIT, MT_DATA); 1075440Swnj if (om == 0) 1085440Swnj goto drop; 1095440Swnj om->m_len = off - sizeof (struct tcphdr); 1105440Swnj { caddr_t op = mtod(m, caddr_t) + sizeof (struct tcpiphdr); 1116161Ssam bcopy(op, mtod(om, caddr_t), (unsigned)om->m_len); 1125440Swnj m->m_len -= om->m_len; 1136161Ssam bcopy(op+om->m_len, op, 1146161Ssam (unsigned)(m->m_len-sizeof (struct tcpiphdr))); 1155440Swnj } 1165440Swnj } 1175065Swnj tiflags = ti->ti_flags; 1184924Swnj 1196093Sroot /* 1206211Swnj * Drop TCP and IP headers. 1216093Sroot */ 1226093Sroot off += sizeof (struct ip); 1236093Sroot m->m_off += off; 1246093Sroot m->m_len -= off; 1256093Sroot 1264924Swnj /* 1275244Sroot * Convert TCP protocol specific fields to host format. 1285085Swnj */ 1295085Swnj ti->ti_seq = ntohl(ti->ti_seq); 1305085Swnj ti->ti_ack = ntohl(ti->ti_ack); 1315085Swnj ti->ti_win = ntohs(ti->ti_win); 1325085Swnj ti->ti_urp = ntohs(ti->ti_urp); 1335085Swnj 1345085Swnj /* 1358271Sroot * Locate pcb for segment. 1364924Swnj */ 1375065Swnj inp = in_pcblookup 1386028Sroot (&tcb, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport, 1396028Sroot INPLOOKUP_WILDCARD); 1405065Swnj 1415065Swnj /* 1425065Swnj * If the state is CLOSED (i.e., TCB does not exist) then 1435244Sroot * all data in the incoming segment is discarded. 1445065Swnj */ 1455300Sroot if (inp == 0) 1465085Swnj goto dropwithreset; 1475065Swnj tp = intotcpcb(inp); 1485300Sroot if (tp == 0) 1495085Swnj goto dropwithreset; 1505109Swnj so = inp->inp_socket; 1515267Sroot if (so->so_options & SO_DEBUG) { 1525267Sroot ostate = tp->t_state; 1535267Sroot tcp_saveti = *ti; 1545267Sroot } 1557510Sroot if (so->so_options & SO_ACCEPTCONN) { 1567510Sroot so = sonewconn(so); 1577510Sroot if (so == 0) 1587510Sroot goto drop; 15910769Ssam /* 16010769Ssam * This is ugly, but .... 16110769Ssam * 16210769Ssam * Mark socket as temporary until we're 16310769Ssam * committed to keeping it. The code at 16410769Ssam * ``drop'' and ``dropwithreset'' check the 16510769Ssam * flag dropsocket to see if the temporary 16610769Ssam * socket created here should be discarded. 16710769Ssam * We mark the socket as discardable until 16810769Ssam * we're committed to it below in TCPS_LISTEN. 16910769Ssam */ 17010769Ssam dropsocket++; 1717510Sroot inp = (struct inpcb *)so->so_pcb; 1727510Sroot inp->inp_laddr = ti->ti_dst; 1737510Sroot inp->inp_lport = ti->ti_dport; 1747510Sroot tp = intotcpcb(inp); 1757510Sroot tp->t_state = TCPS_LISTEN; 1767510Sroot } 1774601Swnj 1784601Swnj /* 1795162Swnj * Segment received on connection. 1805162Swnj * Reset idle time and keep-alive timer. 1815162Swnj */ 1825162Swnj tp->t_idle = 0; 1835162Swnj tp->t_timer[TCPT_KEEP] = TCPTV_KEEP; 1845162Swnj 1855162Swnj /* 18617272Skarels * Process options if not in LISTEN state, 18717272Skarels * else do it below (after getting remote address). 1885440Swnj */ 18917272Skarels if (om && tp->t_state != TCPS_LISTEN) { 19017272Skarels tcp_dooptions(tp, om, ti); 1915440Swnj om = 0; 1925440Swnj } 1935440Swnj 1945440Swnj /* 1955085Swnj * Calculate amount of space in receive window, 1965085Swnj * and then do TCP input processing. 1974601Swnj */ 1985085Swnj tp->rcv_wnd = sbspace(&so->so_rcv); 1995231Swnj if (tp->rcv_wnd < 0) 2005231Swnj tp->rcv_wnd = 0; 2014601Swnj 2024601Swnj switch (tp->t_state) { 2034601Swnj 2045065Swnj /* 2055065Swnj * If the state is LISTEN then ignore segment if it contains an RST. 2065065Swnj * If the segment contains an ACK then it is bad and send a RST. 2075065Swnj * If it does not contain a SYN then it is not interesting; drop it. 2085085Swnj * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial 2095065Swnj * tp->iss, and send a segment: 2105085Swnj * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> 2115065Swnj * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. 2125065Swnj * Fill in remote peer address fields if not previously specified. 2135065Swnj * Enter SYN_RECEIVED state, and process any other fields of this 2145244Sroot * segment in this state. 2155065Swnj */ 2168271Sroot case TCPS_LISTEN: { 21710145Ssam struct mbuf *am; 2188271Sroot register struct sockaddr_in *sin; 2198271Sroot 2205065Swnj if (tiflags & TH_RST) 2215065Swnj goto drop; 2225300Sroot if (tiflags & TH_ACK) 2235085Swnj goto dropwithreset; 2245300Sroot if ((tiflags & TH_SYN) == 0) 2255065Swnj goto drop; 22610145Ssam am = m_get(M_DONTWAIT, MT_SONAME); 22710145Ssam if (am == NULL) 22810145Ssam goto drop; 22910145Ssam am->m_len = sizeof (struct sockaddr_in); 2308599Sroot sin = mtod(am, struct sockaddr_in *); 2318271Sroot sin->sin_family = AF_INET; 2328271Sroot sin->sin_addr = ti->ti_src; 2338271Sroot sin->sin_port = ti->ti_sport; 2346028Sroot laddr = inp->inp_laddr; 23510145Ssam if (inp->inp_laddr.s_addr == INADDR_ANY) 2366028Sroot inp->inp_laddr = ti->ti_dst; 2378599Sroot if (in_pcbconnect(inp, am)) { 2386028Sroot inp->inp_laddr = laddr; 2398716Sroot (void) m_free(am); 2405244Sroot goto drop; 2416028Sroot } 2428716Sroot (void) m_free(am); 2435244Sroot tp->t_template = tcp_template(tp); 2445244Sroot if (tp->t_template == 0) { 2455244Sroot in_pcbdisconnect(inp); 24617264Skarels dropsocket = 0; /* socket is already gone */ 2476028Sroot inp->inp_laddr = laddr; 2486320Swnj tp = 0; 2495244Sroot goto drop; 2505244Sroot } 25117272Skarels if (om) { 25217272Skarels tcp_dooptions(tp, om, ti); 25317272Skarels om = 0; 25417272Skarels } 2555085Swnj tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; 2565065Swnj tp->irs = ti->ti_seq; 2575085Swnj tcp_sendseqinit(tp); 2585085Swnj tcp_rcvseqinit(tp); 2595065Swnj tp->t_state = TCPS_SYN_RECEIVED; 2605244Sroot tp->t_timer[TCPT_KEEP] = TCPTV_KEEP; 26110769Ssam dropsocket = 0; /* committed to socket */ 2625085Swnj goto trimthenstep6; 2638271Sroot } 2644601Swnj 2655065Swnj /* 2665065Swnj * If the state is SYN_SENT: 2675065Swnj * if seg contains an ACK, but not for our SYN, drop the input. 2685065Swnj * if seg contains a RST, then drop the connection. 2695065Swnj * if seg does not contain SYN, then drop it. 2705065Swnj * Otherwise this is an acceptable SYN segment 2715065Swnj * initialize tp->rcv_nxt and tp->irs 2725065Swnj * if seg contains ack then advance tp->snd_una 2735065Swnj * if SYN has been acked change to ESTABLISHED else SYN_RCVD state 2745065Swnj * arrange for segment to be acked (eventually) 2755065Swnj * continue processing rest of data/controls, beginning with URG 2765065Swnj */ 2775065Swnj case TCPS_SYN_SENT: 2785065Swnj if ((tiflags & TH_ACK) && 2795300Sroot /* this should be SEQ_LT; is SEQ_LEQ for BBN vax TCP only */ 2805300Sroot (SEQ_LT(ti->ti_ack, tp->iss) || 2815231Swnj SEQ_GT(ti->ti_ack, tp->snd_max))) 2825085Swnj goto dropwithreset; 2835065Swnj if (tiflags & TH_RST) { 28410394Ssam if (tiflags & TH_ACK) 28510394Ssam tp = tcp_drop(tp, ECONNREFUSED); 2865065Swnj goto drop; 2874601Swnj } 2885065Swnj if ((tiflags & TH_SYN) == 0) 2895065Swnj goto drop; 2905231Swnj tp->snd_una = ti->ti_ack; 2915357Sroot if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 2925357Sroot tp->snd_nxt = tp->snd_una; 2935244Sroot tp->t_timer[TCPT_REXMT] = 0; 2945065Swnj tp->irs = ti->ti_seq; 2955085Swnj tcp_rcvseqinit(tp); 2965085Swnj tp->t_flags |= TF_ACKNOW; 2975162Swnj if (SEQ_GT(tp->snd_una, tp->iss)) { 2985244Sroot soisconnected(so); 2995065Swnj tp->t_state = TCPS_ESTABLISHED; 30017272Skarels tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); 3015162Swnj (void) tcp_reass(tp, (struct tcpiphdr *)0); 3025162Swnj } else 3035085Swnj tp->t_state = TCPS_SYN_RECEIVED; 3045085Swnj goto trimthenstep6; 3055085Swnj 3065085Swnj trimthenstep6: 3075085Swnj /* 3085231Swnj * Advance ti->ti_seq to correspond to first data byte. 3095085Swnj * If data, trim to stay within window, 3105085Swnj * dropping FIN if necessary. 3115085Swnj */ 3125231Swnj ti->ti_seq++; 3135085Swnj if (ti->ti_len > tp->rcv_wnd) { 3145085Swnj todrop = ti->ti_len - tp->rcv_wnd; 3155085Swnj m_adj(m, -todrop); 3165085Swnj ti->ti_len = tp->rcv_wnd; 3175085Swnj ti->ti_flags &= ~TH_FIN; 3185065Swnj } 3195263Swnj tp->snd_wl1 = ti->ti_seq - 1; 3205085Swnj goto step6; 3215065Swnj } 3224601Swnj 3235065Swnj /* 32416222Skarels * If data is received on a connection after the 32516222Skarels * user processes are gone, then RST the other end. 32616222Skarels */ 32716222Skarels if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && 32816222Skarels ti->ti_len) { 32916222Skarels tp = tcp_close(tp); 33016222Skarels goto dropwithreset; 33116222Skarels } 33216222Skarels 33316222Skarels /* 3345065Swnj * States other than LISTEN or SYN_SENT. 3355065Swnj * First check that at least some bytes of segment are within 3365065Swnj * receive window. 3375065Swnj */ 3385065Swnj if (tp->rcv_wnd == 0) { 3395065Swnj /* 3405065Swnj * If window is closed can only take segments at 3415231Swnj * window edge, and have to drop data and PUSH from 3425065Swnj * incoming segments. 3435065Swnj */ 3445300Sroot if (tp->rcv_nxt != ti->ti_seq) 3455065Swnj goto dropafterack; 3465085Swnj if (ti->ti_len > 0) { 3475690Swnj m_adj(m, ti->ti_len); 3485085Swnj ti->ti_len = 0; 3495085Swnj ti->ti_flags &= ~(TH_PUSH|TH_FIN); 3505065Swnj } 3515065Swnj } else { 3525065Swnj /* 3535231Swnj * If segment begins before rcv_nxt, drop leading 3545065Swnj * data (and SYN); if nothing left, just ack. 3555065Swnj */ 3565690Swnj todrop = tp->rcv_nxt - ti->ti_seq; 3575690Swnj if (todrop > 0) { 3585085Swnj if (tiflags & TH_SYN) { 3595300Sroot tiflags &= ~TH_SYN; 3605690Swnj ti->ti_flags &= ~TH_SYN; 3615085Swnj ti->ti_seq++; 3625085Swnj if (ti->ti_urp > 1) 3635085Swnj ti->ti_urp--; 3645085Swnj else 3655085Swnj tiflags &= ~TH_URG; 3665085Swnj todrop--; 3675085Swnj } 3686211Swnj if (todrop > ti->ti_len || 3696211Swnj todrop == ti->ti_len && (tiflags&TH_FIN) == 0) 3705065Swnj goto dropafterack; 3715065Swnj m_adj(m, todrop); 3725065Swnj ti->ti_seq += todrop; 3735065Swnj ti->ti_len -= todrop; 3745085Swnj if (ti->ti_urp > todrop) 3755085Swnj ti->ti_urp -= todrop; 3765085Swnj else { 3775085Swnj tiflags &= ~TH_URG; 3785690Swnj ti->ti_flags &= ~TH_URG; 3795690Swnj ti->ti_urp = 0; 3805085Swnj } 3815065Swnj } 3825065Swnj /* 3835065Swnj * If segment ends after window, drop trailing data 3845085Swnj * (and PUSH and FIN); if nothing left, just ACK. 3855065Swnj */ 3865690Swnj todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); 3875690Swnj if (todrop > 0) { 3886211Swnj if (todrop >= ti->ti_len) 3895065Swnj goto dropafterack; 3905065Swnj m_adj(m, -todrop); 3915065Swnj ti->ti_len -= todrop; 3925085Swnj ti->ti_flags &= ~(TH_PUSH|TH_FIN); 3935065Swnj } 3945065Swnj } 3954601Swnj 3965065Swnj /* 3975065Swnj * If the RST bit is set examine the state: 3985065Swnj * SYN_RECEIVED STATE: 3995065Swnj * If passive open, return to LISTEN state. 4005065Swnj * If active open, inform user that connection was refused. 4015065Swnj * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: 4025065Swnj * Inform user that connection was reset, and close tcb. 4035065Swnj * CLOSING, LAST_ACK, TIME_WAIT STATES 4045065Swnj * Close the tcb. 4055065Swnj */ 4065065Swnj if (tiflags&TH_RST) switch (tp->t_state) { 4075267Sroot 4085065Swnj case TCPS_SYN_RECEIVED: 40910394Ssam tp = tcp_drop(tp, ECONNREFUSED); 4105065Swnj goto drop; 4114601Swnj 4125065Swnj case TCPS_ESTABLISHED: 4135065Swnj case TCPS_FIN_WAIT_1: 4145065Swnj case TCPS_FIN_WAIT_2: 4155065Swnj case TCPS_CLOSE_WAIT: 41610394Ssam tp = tcp_drop(tp, ECONNRESET); 4175065Swnj goto drop; 4185065Swnj 4195065Swnj case TCPS_CLOSING: 4205065Swnj case TCPS_LAST_ACK: 4215065Swnj case TCPS_TIME_WAIT: 42210394Ssam tp = tcp_close(tp); 4235065Swnj goto drop; 4244601Swnj } 4254601Swnj 4264601Swnj /* 4275065Swnj * If a SYN is in the window, then this is an 4285065Swnj * error and we send an RST and drop the connection. 4294601Swnj */ 4305065Swnj if (tiflags & TH_SYN) { 43110394Ssam tp = tcp_drop(tp, ECONNRESET); 4325085Swnj goto dropwithreset; 4334601Swnj } 4344601Swnj 4354601Swnj /* 4365065Swnj * If the ACK bit is off we drop the segment and return. 4374601Swnj */ 4385085Swnj if ((tiflags & TH_ACK) == 0) 4395065Swnj goto drop; 4405065Swnj 4415065Swnj /* 4425065Swnj * Ack processing. 4435065Swnj */ 4444601Swnj switch (tp->t_state) { 4454601Swnj 4465065Swnj /* 4475065Swnj * In SYN_RECEIVED state if the ack ACKs our SYN then enter 4485065Swnj * ESTABLISHED state and continue processing, othewise 4495065Swnj * send an RST. 4505065Swnj */ 4515065Swnj case TCPS_SYN_RECEIVED: 4525085Swnj if (SEQ_GT(tp->snd_una, ti->ti_ack) || 4535231Swnj SEQ_GT(ti->ti_ack, tp->snd_max)) 4545085Swnj goto dropwithreset; 4555244Sroot tp->snd_una++; /* SYN acked */ 4565357Sroot if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 4575357Sroot tp->snd_nxt = tp->snd_una; 4585244Sroot tp->t_timer[TCPT_REXMT] = 0; 4595085Swnj soisconnected(so); 4605085Swnj tp->t_state = TCPS_ESTABLISHED; 46117272Skarels tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); 4625162Swnj (void) tcp_reass(tp, (struct tcpiphdr *)0); 4635244Sroot tp->snd_wl1 = ti->ti_seq - 1; 4645085Swnj /* fall into ... */ 4654601Swnj 4665065Swnj /* 4675065Swnj * In ESTABLISHED state: drop duplicate ACKs; ACK out of range 4685065Swnj * ACKs. If the ack is in the range 4695231Swnj * tp->snd_una < ti->ti_ack <= tp->snd_max 4705065Swnj * then advance tp->snd_una to ti->ti_ack and drop 4715065Swnj * data from the retransmission queue. If this ACK reflects 4725065Swnj * more up to date window information we update our window information. 4735065Swnj */ 4745065Swnj case TCPS_ESTABLISHED: 4755065Swnj case TCPS_FIN_WAIT_1: 4765065Swnj case TCPS_FIN_WAIT_2: 4775065Swnj case TCPS_CLOSE_WAIT: 4785065Swnj case TCPS_CLOSING: 4795244Sroot case TCPS_LAST_ACK: 4805244Sroot case TCPS_TIME_WAIT: 4815085Swnj #define ourfinisacked (acked > 0) 4825085Swnj 4835244Sroot if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) 4845065Swnj break; 4855300Sroot if (SEQ_GT(ti->ti_ack, tp->snd_max)) 4865065Swnj goto dropafterack; 4875085Swnj acked = ti->ti_ack - tp->snd_una; 4885951Swnj 4895951Swnj /* 4905951Swnj * If transmit timer is running and timed sequence 4915951Swnj * number was acked, update smoothed round trip time. 4925951Swnj */ 4935951Swnj if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) { 4945951Swnj if (tp->t_srtt == 0) 4955951Swnj tp->t_srtt = tp->t_rtt; 4965951Swnj else 4975951Swnj tp->t_srtt = 4985951Swnj tcp_alpha * tp->t_srtt + 4995951Swnj (1 - tcp_alpha) * tp->t_rtt; 5005951Swnj tp->t_rtt = 0; 5015951Swnj } 5025951Swnj 5035307Sroot if (ti->ti_ack == tp->snd_max) 5045244Sroot tp->t_timer[TCPT_REXMT] = 0; 5055307Sroot else { 5065244Sroot TCPT_RANGESET(tp->t_timer[TCPT_REXMT], 5075244Sroot tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); 5085300Sroot tp->t_rxtshift = 0; 5095085Swnj } 510*17360Skarels /* 511*17360Skarels * When new data is acked, open the congestion window a bit. 512*17360Skarels */ 513*17360Skarels if (acked > 0) 514*17360Skarels tp->snd_cwnd = MIN(11 * tp->snd_cwnd / 10, 65535); 5155307Sroot if (acked > so->so_snd.sb_cc) { 51615386Ssam tp->snd_wnd -= so->so_snd.sb_cc; 5175307Sroot sbdrop(&so->so_snd, so->so_snd.sb_cc); 5185307Sroot } else { 5196161Ssam sbdrop(&so->so_snd, acked); 5205307Sroot tp->snd_wnd -= acked; 5215307Sroot acked = 0; 5225307Sroot } 5236434Swnj if ((so->so_snd.sb_flags & SB_WAIT) || so->so_snd.sb_sel) 5245300Sroot sowwakeup(so); 5255231Swnj tp->snd_una = ti->ti_ack; 5265357Sroot if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 5275357Sroot tp->snd_nxt = tp->snd_una; 5285162Swnj 5294601Swnj switch (tp->t_state) { 5304601Swnj 5315065Swnj /* 5325065Swnj * In FIN_WAIT_1 STATE in addition to the processing 5335065Swnj * for the ESTABLISHED state if our FIN is now acknowledged 5345085Swnj * then enter FIN_WAIT_2. 5355065Swnj */ 5365065Swnj case TCPS_FIN_WAIT_1: 5375896Swnj if (ourfinisacked) { 5385896Swnj /* 5395896Swnj * If we can't receive any more 5405896Swnj * data, then closing user can proceed. 5415896Swnj */ 5425896Swnj if (so->so_state & SS_CANTRCVMORE) 5435896Swnj soisdisconnected(so); 5445085Swnj tp->t_state = TCPS_FIN_WAIT_2; 54517264Skarels /* 54617264Skarels * This is contrary to the specification, 54717264Skarels * but if we haven't gotten our FIN in 54817264Skarels * 5 minutes, it's not forthcoming. 54917264Skarels */ 55017316Skarels tp->t_timer[TCPT_2MSL] = 5 * 60 * PR_SLOWHZ; 5515896Swnj } 5524601Swnj break; 5534601Swnj 5545065Swnj /* 5555065Swnj * In CLOSING STATE in addition to the processing for 5565065Swnj * the ESTABLISHED state if the ACK acknowledges our FIN 5575065Swnj * then enter the TIME-WAIT state, otherwise ignore 5585065Swnj * the segment. 5595065Swnj */ 5605065Swnj case TCPS_CLOSING: 5615244Sroot if (ourfinisacked) { 5625065Swnj tp->t_state = TCPS_TIME_WAIT; 5635244Sroot tcp_canceltimers(tp); 5645244Sroot tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 5655244Sroot soisdisconnected(so); 5665244Sroot } 5675244Sroot break; 5684601Swnj 5695065Swnj /* 5705085Swnj * The only thing that can arrive in LAST_ACK state 5715085Swnj * is an acknowledgment of our FIN. If our FIN is now 5725085Swnj * acknowledged, delete the TCB, enter the closed state 5735085Swnj * and return. 5745065Swnj */ 5755065Swnj case TCPS_LAST_ACK: 57610394Ssam if (ourfinisacked) 57710394Ssam tp = tcp_close(tp); 5785065Swnj goto drop; 5794601Swnj 5805065Swnj /* 5815065Swnj * In TIME_WAIT state the only thing that should arrive 5825065Swnj * is a retransmission of the remote FIN. Acknowledge 5835065Swnj * it and restart the finack timer. 5845065Swnj */ 5855065Swnj case TCPS_TIME_WAIT: 5865162Swnj tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 5875065Swnj goto dropafterack; 5884601Swnj } 5895085Swnj #undef ourfinisacked 5905085Swnj } 5914601Swnj 5925065Swnj step6: 5935065Swnj /* 5945244Sroot * Update window information. 5955244Sroot */ 5965300Sroot if (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq && 5975391Swnj (SEQ_LT(tp->snd_wl2, ti->ti_ack) || 5985300Sroot tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd)) { 5995244Sroot tp->snd_wnd = ti->ti_win; 6005244Sroot tp->snd_wl1 = ti->ti_seq; 6015244Sroot tp->snd_wl2 = ti->ti_ack; 6028599Sroot if (tp->snd_wnd != 0) 6035244Sroot tp->t_timer[TCPT_PERSIST] = 0; 6045244Sroot } 6055244Sroot 6065244Sroot /* 6075547Swnj * Process segments with URG. 6085065Swnj */ 6097267Swnj if ((tiflags & TH_URG) && ti->ti_urp && 6107267Swnj TCPS_HAVERCVDFIN(tp->t_state) == 0) { 6115547Swnj /* 61213121Ssam * This is a kludge, but if we receive accept 61313121Ssam * random urgent pointers, we'll crash in 61413121Ssam * soreceive. It's hard to imagine someone 61513121Ssam * actually wanting to send this much urgent data. 61612441Ssam */ 617*17360Skarels if (ti->ti_urp + (unsigned) so->so_rcv.sb_cc > 32767) { 61812441Ssam ti->ti_urp = 0; /* XXX */ 61912441Ssam tiflags &= ~TH_URG; /* XXX */ 62012441Ssam ti->ti_flags &= ~TH_URG; /* XXX */ 62113121Ssam goto badurp; /* XXX */ 62212441Ssam } 62312441Ssam /* 6245547Swnj * If this segment advances the known urgent pointer, 6255547Swnj * then mark the data stream. This should not happen 6265547Swnj * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since 6275547Swnj * a FIN has been received from the remote side. 6285547Swnj * In these states we ignore the URG. 6295547Swnj */ 6305547Swnj if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { 6315547Swnj tp->rcv_up = ti->ti_seq + ti->ti_urp; 6325547Swnj so->so_oobmark = so->so_rcv.sb_cc + 6335547Swnj (tp->rcv_up - tp->rcv_nxt) - 1; 6345547Swnj if (so->so_oobmark == 0) 6355547Swnj so->so_state |= SS_RCVATMARK; 6368313Sroot sohasoutofband(so); 6375547Swnj tp->t_oobflags &= ~TCPOOB_HAVEDATA; 6385440Swnj } 6395547Swnj /* 6405547Swnj * Remove out of band data so doesn't get presented to user. 6415547Swnj * This can happen independent of advancing the URG pointer, 6425547Swnj * but if two URG's are pending at once, some out-of-band 6435547Swnj * data may creep in... ick. 6445547Swnj */ 6457510Sroot if (ti->ti_urp <= ti->ti_len) 6465547Swnj tcp_pulloutofband(so, ti); 6475419Swnj } 64813121Ssam badurp: /* XXX */ 6494601Swnj 6504601Swnj /* 6515065Swnj * Process the segment text, merging it into the TCP sequencing queue, 6525065Swnj * and arranging for acknowledgment of receipt if necessary. 6535065Swnj * This process logically involves adjusting tp->rcv_wnd as data 6545065Swnj * is presented to the user (this happens in tcp_usrreq.c, 6555065Swnj * case PRU_RCVD). If a FIN has already been received on this 6565065Swnj * connection then we just ignore the text. 6574601Swnj */ 658*17360Skarels if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 659*17360Skarels if (ti->ti_len || (tiflags&TH_FIN)) 660*17360Skarels tiflags = tcp_reass(tp, ti); 661*17360Skarels else 662*17360Skarels m_freem(m); 6635440Swnj if (tcpnodelack == 0) 6645440Swnj tp->t_flags |= TF_DELACK; 6655440Swnj else 6665440Swnj tp->t_flags |= TF_ACKNOW; 6675244Sroot } else { 6684924Swnj m_freem(m); 6695263Swnj tiflags &= ~TH_FIN; 6705244Sroot } 6714601Swnj 6724601Swnj /* 6735263Swnj * If FIN is received ACK the FIN and let the user know 6745263Swnj * that the connection is closing. 6754601Swnj */ 6765263Swnj if (tiflags & TH_FIN) { 6775244Sroot if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 6785244Sroot socantrcvmore(so); 6795244Sroot tp->t_flags |= TF_ACKNOW; 6805244Sroot tp->rcv_nxt++; 6815244Sroot } 6825065Swnj switch (tp->t_state) { 6834601Swnj 6845065Swnj /* 6855065Swnj * In SYN_RECEIVED and ESTABLISHED STATES 6865065Swnj * enter the CLOSE_WAIT state. 6874884Swnj */ 6885065Swnj case TCPS_SYN_RECEIVED: 6895065Swnj case TCPS_ESTABLISHED: 6905065Swnj tp->t_state = TCPS_CLOSE_WAIT; 6915065Swnj break; 6924884Swnj 6935065Swnj /* 6945085Swnj * If still in FIN_WAIT_1 STATE FIN has not been acked so 6955085Swnj * enter the CLOSING state. 6964884Swnj */ 6975065Swnj case TCPS_FIN_WAIT_1: 6985085Swnj tp->t_state = TCPS_CLOSING; 6995065Swnj break; 7004601Swnj 7015065Swnj /* 7025065Swnj * In FIN_WAIT_2 state enter the TIME_WAIT state, 7035065Swnj * starting the time-wait timer, turning off the other 7045065Swnj * standard timers. 7055065Swnj */ 7065065Swnj case TCPS_FIN_WAIT_2: 7075244Sroot tp->t_state = TCPS_TIME_WAIT; 7085074Swnj tcp_canceltimers(tp); 7095162Swnj tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 7105244Sroot soisdisconnected(so); 7115065Swnj break; 7125065Swnj 7134884Swnj /* 7145065Swnj * In TIME_WAIT state restart the 2 MSL time_wait timer. 7154884Swnj */ 7165065Swnj case TCPS_TIME_WAIT: 7175162Swnj tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 7185065Swnj break; 7195085Swnj } 7204601Swnj } 7215267Sroot if (so->so_options & SO_DEBUG) 7225267Sroot tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0); 7235085Swnj 7245085Swnj /* 7255085Swnj * Return any desired output. 7265085Swnj */ 7276161Ssam (void) tcp_output(tp); 7285065Swnj return; 7295085Swnj 7305065Swnj dropafterack: 7315085Swnj /* 7326211Swnj * Generate an ACK dropping incoming segment if it occupies 7336211Swnj * sequence space, where the ACK reflects our state. 7345085Swnj */ 7356211Swnj if ((tiflags&TH_RST) || 7366211Swnj tlen == 0 && (tiflags&(TH_SYN|TH_FIN)) == 0) 7375085Swnj goto drop; 7386303Sroot if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 7396303Sroot tcp_trace(TA_RESPOND, ostate, tp, &tcp_saveti, 0); 7405391Swnj tcp_respond(tp, ti, tp->rcv_nxt, tp->snd_nxt, TH_ACK); 7415231Swnj return; 7425085Swnj 7435085Swnj dropwithreset: 74411731Ssam if (om) { 7456161Ssam (void) m_free(om); 74611731Ssam om = 0; 74711731Ssam } 7485085Swnj /* 7495244Sroot * Generate a RST, dropping incoming segment. 7505085Swnj * Make ACK acceptable to originator of segment. 7515085Swnj */ 7525085Swnj if (tiflags & TH_RST) 7535085Swnj goto drop; 7545085Swnj if (tiflags & TH_ACK) 7555391Swnj tcp_respond(tp, ti, (tcp_seq)0, ti->ti_ack, TH_RST); 7565085Swnj else { 7575085Swnj if (tiflags & TH_SYN) 7585085Swnj ti->ti_len++; 7596211Swnj tcp_respond(tp, ti, ti->ti_seq+ti->ti_len, (tcp_seq)0, 7606211Swnj TH_RST|TH_ACK); 7615085Swnj } 76210769Ssam /* destroy temporarily created socket */ 76310769Ssam if (dropsocket) 76410769Ssam (void) soabort(so); 7655231Swnj return; 7665085Swnj 7675065Swnj drop: 76811730Ssam if (om) 76911730Ssam (void) m_free(om); 7705085Swnj /* 7715085Swnj * Drop space held by incoming segment and return. 7725085Swnj */ 7736303Sroot if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 7746303Sroot tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); 7755065Swnj m_freem(m); 77610769Ssam /* destroy temporarily created socket */ 77710769Ssam if (dropsocket) 77810769Ssam (void) soabort(so); 7795267Sroot return; 7805065Swnj } 7815065Swnj 78217272Skarels tcp_dooptions(tp, om, ti) 7835440Swnj struct tcpcb *tp; 7845440Swnj struct mbuf *om; 78517272Skarels struct tcpiphdr *ti; 7865419Swnj { 7875440Swnj register u_char *cp; 7885440Swnj int opt, optlen, cnt; 7895419Swnj 7905440Swnj cp = mtod(om, u_char *); 7915440Swnj cnt = om->m_len; 7925440Swnj for (; cnt > 0; cnt -= optlen, cp += optlen) { 7935440Swnj opt = cp[0]; 7945440Swnj if (opt == TCPOPT_EOL) 7955440Swnj break; 7965440Swnj if (opt == TCPOPT_NOP) 7975440Swnj optlen = 1; 79812169Ssam else { 7995440Swnj optlen = cp[1]; 80012169Ssam if (optlen <= 0) 80112169Ssam break; 80212169Ssam } 8035440Swnj switch (opt) { 8045440Swnj 8055440Swnj default: 8065440Swnj break; 8075440Swnj 8085440Swnj case TCPOPT_MAXSEG: 8095440Swnj if (optlen != 4) 8105440Swnj continue; 81117272Skarels if (!(ti->ti_flags & TH_SYN)) 81217272Skarels continue; 8135440Swnj tp->t_maxseg = *(u_short *)(cp + 2); 8146161Ssam tp->t_maxseg = ntohs((u_short)tp->t_maxseg); 81517272Skarels tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); 8165440Swnj break; 8175419Swnj } 8185419Swnj } 8196161Ssam (void) m_free(om); 8205419Swnj } 8215419Swnj 8225419Swnj /* 8235547Swnj * Pull out of band byte out of a segment so 8245547Swnj * it doesn't appear in the user's data queue. 8255547Swnj * It is still reflected in the segment length for 8265547Swnj * sequencing purposes. 8275547Swnj */ 8285547Swnj tcp_pulloutofband(so, ti) 8295547Swnj struct socket *so; 8305547Swnj struct tcpiphdr *ti; 8315547Swnj { 8325547Swnj register struct mbuf *m; 8336116Swnj int cnt = ti->ti_urp - 1; 8345547Swnj 8355547Swnj m = dtom(ti); 8365547Swnj while (cnt >= 0) { 8375547Swnj if (m->m_len > cnt) { 8385547Swnj char *cp = mtod(m, caddr_t) + cnt; 8395547Swnj struct tcpcb *tp = sototcpcb(so); 8405547Swnj 8415547Swnj tp->t_iobc = *cp; 8425547Swnj tp->t_oobflags |= TCPOOB_HAVEDATA; 8436161Ssam bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); 8445547Swnj m->m_len--; 8455547Swnj return; 8465547Swnj } 8475547Swnj cnt -= m->m_len; 8485547Swnj m = m->m_next; 8495547Swnj if (m == 0) 8505547Swnj break; 8515547Swnj } 8525547Swnj panic("tcp_pulloutofband"); 8535547Swnj } 8545547Swnj 8555547Swnj /* 8565065Swnj * Insert segment ti into reassembly queue of tcp with 8575065Swnj * control block tp. Return TH_FIN if reassembly now includes 8585065Swnj * a segment with FIN. 8595065Swnj */ 8605109Swnj tcp_reass(tp, ti) 8615065Swnj register struct tcpcb *tp; 8625065Swnj register struct tcpiphdr *ti; 8635065Swnj { 8645065Swnj register struct tcpiphdr *q; 8655085Swnj struct socket *so = tp->t_inpcb->inp_socket; 8665263Swnj struct mbuf *m; 8675263Swnj int flags; 8685065Swnj 8695065Swnj /* 8705162Swnj * Call with ti==0 after become established to 8715162Swnj * force pre-ESTABLISHED data up to user socket. 8725065Swnj */ 8735162Swnj if (ti == 0) 8745065Swnj goto present; 8754601Swnj 8765065Swnj /* 8775065Swnj * Find a segment which begins after this one does. 8785065Swnj */ 8795065Swnj for (q = tp->seg_next; q != (struct tcpiphdr *)tp; 8805065Swnj q = (struct tcpiphdr *)q->ti_next) 8815065Swnj if (SEQ_GT(q->ti_seq, ti->ti_seq)) 8825065Swnj break; 8834601Swnj 8845065Swnj /* 8855065Swnj * If there is a preceding segment, it may provide some of 8865065Swnj * our data already. If so, drop the data from the incoming 8875065Swnj * segment. If it provides all of our data, drop us. 8885065Swnj */ 8895065Swnj if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { 8905065Swnj register int i; 8915690Swnj q = (struct tcpiphdr *)q->ti_prev; 8925065Swnj /* conversion to int (in i) handles seq wraparound */ 8935065Swnj i = q->ti_seq + q->ti_len - ti->ti_seq; 8945065Swnj if (i > 0) { 8954924Swnj if (i >= ti->ti_len) 8965065Swnj goto drop; 8977338Swnj m_adj(dtom(ti), i); 8985065Swnj ti->ti_len -= i; 8994924Swnj ti->ti_seq += i; 9004601Swnj } 9015065Swnj q = (struct tcpiphdr *)(q->ti_next); 9025065Swnj } 9034601Swnj 9045065Swnj /* 9055065Swnj * While we overlap succeeding segments trim them or, 9065065Swnj * if they are completely covered, dequeue them. 9075065Swnj */ 9085690Swnj while (q != (struct tcpiphdr *)tp) { 9095065Swnj register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; 9105690Swnj if (i <= 0) 9115690Swnj break; 9125065Swnj if (i < q->ti_len) { 9135690Swnj q->ti_seq += i; 9145065Swnj q->ti_len -= i; 9155065Swnj m_adj(dtom(q), i); 9165065Swnj break; 9174601Swnj } 9185065Swnj q = (struct tcpiphdr *)q->ti_next; 9195623Swnj m = dtom(q->ti_prev); 9205065Swnj remque(q->ti_prev); 9215623Swnj m_freem(m); 9225065Swnj } 9234601Swnj 9245065Swnj /* 9255065Swnj * Stick new segment in its place. 9265065Swnj */ 9275065Swnj insque(ti, q->ti_prev); 9284601Swnj 9295065Swnj present: 9305065Swnj /* 9315244Sroot * Present data to user, advancing rcv_nxt through 9325244Sroot * completed sequence space. 9335065Swnj */ 9345263Swnj if (TCPS_HAVERCVDSYN(tp->t_state) == 0) 9355244Sroot return (0); 9364924Swnj ti = tp->seg_next; 9375263Swnj if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) 9385263Swnj return (0); 9395263Swnj if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) 9405263Swnj return (0); 9415263Swnj do { 9425244Sroot tp->rcv_nxt += ti->ti_len; 9435244Sroot flags = ti->ti_flags & TH_FIN; 9444924Swnj remque(ti); 9455263Swnj m = dtom(ti); 9464924Swnj ti = (struct tcpiphdr *)ti->ti_next; 9475263Swnj if (so->so_state & SS_CANTRCVMORE) 9486161Ssam m_freem(m); 94910145Ssam else 9505263Swnj sbappend(&so->so_rcv, m); 9515263Swnj } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); 9525263Swnj sorwakeup(so); 9535065Swnj return (flags); 9545065Swnj drop: 9555065Swnj m_freem(dtom(ti)); 9565263Swnj return (0); 9574601Swnj } 95817272Skarels 95917272Skarels /* 96017272Skarels * Determine a reasonable value for maxseg size. 96117272Skarels * If the route is known, use one that can be handled 96217272Skarels * on the given interface without forcing IP to fragment. 96317272Skarels * If bigger than a page (CLSIZE), round down to nearest pagesize 96417272Skarels * to utilize pagesize mbufs. 96517272Skarels * If interface pointer is unavailable, or the destination isn't local, 96617272Skarels * use a conservative size (512 or the default IP max size), 96717272Skarels * as we can't discover anything about intervening gateways or networks. 96817272Skarels * 96917272Skarels * This is ugly, and doesn't belong at this level, but has to happen somehow. 97017272Skarels */ 97117272Skarels tcp_mss(tp) 97217272Skarels register struct tcpcb *tp; 97317272Skarels { 97417272Skarels struct route *ro; 97517272Skarels struct ifnet *ifp; 97617272Skarels int mss; 97717272Skarels struct inpcb *inp; 97817272Skarels 97917272Skarels inp = tp->t_inpcb; 98017272Skarels ro = &inp->inp_route; 98117272Skarels if ((ro->ro_rt == (struct rtentry *)0) || 98217272Skarels (ifp = ro->ro_rt->rt_ifp) == (struct ifnet *)0) { 98317272Skarels /* No route yet, so try to acquire one */ 98417272Skarels if (inp->inp_faddr.s_addr != INADDR_ANY) { 98517272Skarels ro->ro_dst.sa_family = AF_INET; 98617272Skarels ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 98717272Skarels inp->inp_faddr; 98817272Skarels rtalloc(ro); 98917272Skarels } 99017272Skarels if ((ro->ro_rt == 0) || (ifp = ro->ro_rt->rt_ifp) == 0) 99117316Skarels return (TCP_MSS); 99217272Skarels } 99317272Skarels 99417272Skarels mss = ifp->if_mtu - sizeof(struct tcpiphdr); 99517272Skarels #if (CLBYTES & (CLBYTES - 1)) == 0 99617272Skarels if (mss > CLBYTES) 99717272Skarels mss &= ~(CLBYTES-1); 99817272Skarels #else 99917272Skarels if (mss > CLBYTES) 100017272Skarels mss = mss / CLBYTES * CLBYTES; 100117272Skarels #endif 100217272Skarels if (in_localaddr(tp->t_inpcb->inp_faddr)) 100317272Skarels return(mss); 100417316Skarels return (MIN(mss, TCP_MSS)); 100517272Skarels } 1006