123190Smckusick /* 223190Smckusick * Copyright (c) 1982 Regents of the University of California. 323190Smckusick * All rights reserved. The Berkeley software License Agreement 423190Smckusick * specifies the terms and conditions for redistribution. 523190Smckusick * 6*23975Skarels * @(#)tcp_input.c 6.12 (Berkeley) 07/19/85 723190Smckusick */ 84601Swnj 917062Sbloom #include "param.h" 1017062Sbloom #include "systm.h" 1117062Sbloom #include "mbuf.h" 1217062Sbloom #include "protosw.h" 1317062Sbloom #include "socket.h" 1417062Sbloom #include "socketvar.h" 1517062Sbloom #include "errno.h" 1610894Ssam 1710894Ssam #include "../net/if.h" 1810894Ssam #include "../net/route.h" 1910894Ssam 2017062Sbloom #include "in.h" 2117062Sbloom #include "in_pcb.h" 2217062Sbloom #include "in_systm.h" 2317062Sbloom #include "ip.h" 2417062Sbloom #include "ip_var.h" 2517062Sbloom #include "tcp.h" 2617062Sbloom #include "tcp_fsm.h" 2717062Sbloom #include "tcp_seq.h" 2817062Sbloom #include "tcp_timer.h" 2917062Sbloom #include "tcp_var.h" 3017062Sbloom #include "tcpip.h" 3117062Sbloom #include "tcp_debug.h" 324601Swnj 335300Sroot int tcpprintfs = 0; 344679Swnj int tcpcksum = 1; 355267Sroot struct tcpiphdr tcp_saveti; 365440Swnj extern tcpnodelack; 374601Swnj 385267Sroot struct tcpcb *tcp_newtcpcb(); 395065Swnj /* 405065Swnj * TCP input routine, follows pages 65-76 of the 415065Swnj * protocol specification dated September, 1981 very closely. 425065Swnj */ 434924Swnj tcp_input(m0) 444924Swnj struct mbuf *m0; 454601Swnj { 464924Swnj register struct tcpiphdr *ti; 474924Swnj struct inpcb *inp; 484924Swnj register struct mbuf *m; 495440Swnj struct mbuf *om = 0; 504924Swnj int len, tlen, off; 515391Swnj register struct tcpcb *tp = 0; 524924Swnj register int tiflags; 534803Swnj struct socket *so; 545109Swnj int todrop, acked; 555267Sroot short ostate; 566028Sroot struct in_addr laddr; 5710769Ssam int dropsocket = 0; 584924Swnj 594924Swnj /* 605244Sroot * Get IP and TCP header together in first mbuf. 615244Sroot * Note: IP leaves IP header in first mbuf. 624924Swnj */ 634924Swnj m = m0; 645020Sroot ti = mtod(m, struct tcpiphdr *); 655244Sroot if (((struct ip *)ti)->ip_hl > (sizeof (struct ip) >> 2)) 665208Swnj ip_stripoptions((struct ip *)ti, (struct mbuf *)0); 675307Sroot if (m->m_off > MMAXOFF || m->m_len < sizeof (struct tcpiphdr)) { 685307Sroot if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { 695085Swnj tcpstat.tcps_hdrops++; 705307Sroot return; 715085Swnj } 725085Swnj ti = mtod(m, struct tcpiphdr *); 735085Swnj } 744601Swnj 754601Swnj /* 765244Sroot * Checksum extended TCP header and data. 774601Swnj */ 784924Swnj tlen = ((struct ip *)ti)->ip_len; 794924Swnj len = sizeof (struct ip) + tlen; 804679Swnj if (tcpcksum) { 814924Swnj ti->ti_next = ti->ti_prev = 0; 824924Swnj ti->ti_x1 = 0; 835223Swnj ti->ti_len = (u_short)tlen; 846161Ssam ti->ti_len = htons((u_short)ti->ti_len); 855231Swnj if (ti->ti_sum = in_cksum(m, len)) { 8611830Ssam if (tcpprintfs) 8711830Ssam printf("tcp sum: src %x\n", ti->ti_src); 884924Swnj tcpstat.tcps_badsum++; 895085Swnj goto drop; 904601Swnj } 914601Swnj } 924601Swnj 934601Swnj /* 945244Sroot * Check that TCP offset makes sense, 955440Swnj * pull out TCP options and adjust length. 964601Swnj */ 974924Swnj off = ti->ti_off << 2; 985231Swnj if (off < sizeof (struct tcphdr) || off > tlen) { 9911830Ssam if (tcpprintfs) 10011830Ssam printf("tcp off: src %x off %d\n", ti->ti_src, off); 1014924Swnj tcpstat.tcps_badoff++; 1025085Swnj goto drop; 1034924Swnj } 1046211Swnj tlen -= off; 1056211Swnj ti->ti_len = tlen; 1065440Swnj if (off > sizeof (struct tcphdr)) { 1075440Swnj if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { 1085440Swnj tcpstat.tcps_hdrops++; 10911730Ssam return; 1105440Swnj } 1115440Swnj ti = mtod(m, struct tcpiphdr *); 1129642Ssam om = m_get(M_DONTWAIT, MT_DATA); 1135440Swnj if (om == 0) 1145440Swnj goto drop; 1155440Swnj om->m_len = off - sizeof (struct tcphdr); 1165440Swnj { caddr_t op = mtod(m, caddr_t) + sizeof (struct tcpiphdr); 1176161Ssam bcopy(op, mtod(om, caddr_t), (unsigned)om->m_len); 1185440Swnj m->m_len -= om->m_len; 1196161Ssam bcopy(op+om->m_len, op, 1206161Ssam (unsigned)(m->m_len-sizeof (struct tcpiphdr))); 1215440Swnj } 1225440Swnj } 1235065Swnj tiflags = ti->ti_flags; 1244924Swnj 1256093Sroot /* 1266211Swnj * Drop TCP and IP headers. 1276093Sroot */ 1286093Sroot off += sizeof (struct ip); 1296093Sroot m->m_off += off; 1306093Sroot m->m_len -= off; 1316093Sroot 1324924Swnj /* 1335244Sroot * Convert TCP protocol specific fields to host format. 1345085Swnj */ 1355085Swnj ti->ti_seq = ntohl(ti->ti_seq); 1365085Swnj ti->ti_ack = ntohl(ti->ti_ack); 1375085Swnj ti->ti_win = ntohs(ti->ti_win); 1385085Swnj ti->ti_urp = ntohs(ti->ti_urp); 1395085Swnj 1405085Swnj /* 1418271Sroot * Locate pcb for segment. 1424924Swnj */ 1435065Swnj inp = in_pcblookup 1446028Sroot (&tcb, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport, 1456028Sroot INPLOOKUP_WILDCARD); 1465065Swnj 1475065Swnj /* 1485065Swnj * If the state is CLOSED (i.e., TCB does not exist) then 1495244Sroot * all data in the incoming segment is discarded. 1505065Swnj */ 1515300Sroot if (inp == 0) 1525085Swnj goto dropwithreset; 1535065Swnj tp = intotcpcb(inp); 1545300Sroot if (tp == 0) 1555085Swnj goto dropwithreset; 1565109Swnj so = inp->inp_socket; 1575267Sroot if (so->so_options & SO_DEBUG) { 1585267Sroot ostate = tp->t_state; 1595267Sroot tcp_saveti = *ti; 1605267Sroot } 1617510Sroot if (so->so_options & SO_ACCEPTCONN) { 1627510Sroot so = sonewconn(so); 1637510Sroot if (so == 0) 1647510Sroot goto drop; 16510769Ssam /* 16610769Ssam * This is ugly, but .... 16710769Ssam * 16810769Ssam * Mark socket as temporary until we're 16910769Ssam * committed to keeping it. The code at 17010769Ssam * ``drop'' and ``dropwithreset'' check the 17110769Ssam * flag dropsocket to see if the temporary 17210769Ssam * socket created here should be discarded. 17310769Ssam * We mark the socket as discardable until 17410769Ssam * we're committed to it below in TCPS_LISTEN. 17510769Ssam */ 17610769Ssam dropsocket++; 1777510Sroot inp = (struct inpcb *)so->so_pcb; 1787510Sroot inp->inp_laddr = ti->ti_dst; 1797510Sroot inp->inp_lport = ti->ti_dport; 1807510Sroot tp = intotcpcb(inp); 1817510Sroot tp->t_state = TCPS_LISTEN; 1827510Sroot } 1834601Swnj 1844601Swnj /* 1855162Swnj * Segment received on connection. 1865162Swnj * Reset idle time and keep-alive timer. 1875162Swnj */ 1885162Swnj tp->t_idle = 0; 1895162Swnj tp->t_timer[TCPT_KEEP] = TCPTV_KEEP; 1905162Swnj 1915162Swnj /* 19217272Skarels * Process options if not in LISTEN state, 19317272Skarels * else do it below (after getting remote address). 1945440Swnj */ 19517272Skarels if (om && tp->t_state != TCPS_LISTEN) { 19617272Skarels tcp_dooptions(tp, om, ti); 1975440Swnj om = 0; 1985440Swnj } 1995440Swnj 2005440Swnj /* 2015085Swnj * Calculate amount of space in receive window, 2025085Swnj * and then do TCP input processing. 2034601Swnj */ 2045085Swnj tp->rcv_wnd = sbspace(&so->so_rcv); 2055231Swnj if (tp->rcv_wnd < 0) 2065231Swnj tp->rcv_wnd = 0; 2074601Swnj 2084601Swnj switch (tp->t_state) { 2094601Swnj 2105065Swnj /* 2115065Swnj * If the state is LISTEN then ignore segment if it contains an RST. 2125065Swnj * If the segment contains an ACK then it is bad and send a RST. 2135065Swnj * If it does not contain a SYN then it is not interesting; drop it. 2145085Swnj * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial 2155065Swnj * tp->iss, and send a segment: 2165085Swnj * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> 2175065Swnj * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. 2185065Swnj * Fill in remote peer address fields if not previously specified. 2195065Swnj * Enter SYN_RECEIVED state, and process any other fields of this 2205244Sroot * segment in this state. 2215065Swnj */ 2228271Sroot case TCPS_LISTEN: { 22310145Ssam struct mbuf *am; 2248271Sroot register struct sockaddr_in *sin; 2258271Sroot 2265065Swnj if (tiflags & TH_RST) 2275065Swnj goto drop; 2285300Sroot if (tiflags & TH_ACK) 2295085Swnj goto dropwithreset; 2305300Sroot if ((tiflags & TH_SYN) == 0) 2315065Swnj goto drop; 23210145Ssam am = m_get(M_DONTWAIT, MT_SONAME); 23310145Ssam if (am == NULL) 23410145Ssam goto drop; 23510145Ssam am->m_len = sizeof (struct sockaddr_in); 2368599Sroot sin = mtod(am, struct sockaddr_in *); 2378271Sroot sin->sin_family = AF_INET; 2388271Sroot sin->sin_addr = ti->ti_src; 2398271Sroot sin->sin_port = ti->ti_sport; 2406028Sroot laddr = inp->inp_laddr; 24110145Ssam if (inp->inp_laddr.s_addr == INADDR_ANY) 2426028Sroot inp->inp_laddr = ti->ti_dst; 2438599Sroot if (in_pcbconnect(inp, am)) { 2446028Sroot inp->inp_laddr = laddr; 2458716Sroot (void) m_free(am); 2465244Sroot goto drop; 2476028Sroot } 2488716Sroot (void) m_free(am); 2495244Sroot tp->t_template = tcp_template(tp); 2505244Sroot if (tp->t_template == 0) { 2515244Sroot in_pcbdisconnect(inp); 25217264Skarels dropsocket = 0; /* socket is already gone */ 2536028Sroot inp->inp_laddr = laddr; 2546320Swnj tp = 0; 2555244Sroot goto drop; 2565244Sroot } 25717272Skarels if (om) { 25817272Skarels tcp_dooptions(tp, om, ti); 25917272Skarels om = 0; 26017272Skarels } 2615085Swnj tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; 2625065Swnj tp->irs = ti->ti_seq; 2635085Swnj tcp_sendseqinit(tp); 2645085Swnj tcp_rcvseqinit(tp); 2655065Swnj tp->t_state = TCPS_SYN_RECEIVED; 2665244Sroot tp->t_timer[TCPT_KEEP] = TCPTV_KEEP; 26710769Ssam dropsocket = 0; /* committed to socket */ 2685085Swnj goto trimthenstep6; 2698271Sroot } 2704601Swnj 2715065Swnj /* 2725065Swnj * If the state is SYN_SENT: 2735065Swnj * if seg contains an ACK, but not for our SYN, drop the input. 2745065Swnj * if seg contains a RST, then drop the connection. 2755065Swnj * if seg does not contain SYN, then drop it. 2765065Swnj * Otherwise this is an acceptable SYN segment 2775065Swnj * initialize tp->rcv_nxt and tp->irs 2785065Swnj * if seg contains ack then advance tp->snd_una 2795065Swnj * if SYN has been acked change to ESTABLISHED else SYN_RCVD state 2805065Swnj * arrange for segment to be acked (eventually) 2815065Swnj * continue processing rest of data/controls, beginning with URG 2825065Swnj */ 2835065Swnj case TCPS_SYN_SENT: 2845065Swnj if ((tiflags & TH_ACK) && 2855300Sroot /* this should be SEQ_LT; is SEQ_LEQ for BBN vax TCP only */ 2865300Sroot (SEQ_LT(ti->ti_ack, tp->iss) || 2875231Swnj SEQ_GT(ti->ti_ack, tp->snd_max))) 2885085Swnj goto dropwithreset; 2895065Swnj if (tiflags & TH_RST) { 29010394Ssam if (tiflags & TH_ACK) 29110394Ssam tp = tcp_drop(tp, ECONNREFUSED); 2925065Swnj goto drop; 2934601Swnj } 2945065Swnj if ((tiflags & TH_SYN) == 0) 2955065Swnj goto drop; 2965231Swnj tp->snd_una = ti->ti_ack; 2975357Sroot if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 2985357Sroot tp->snd_nxt = tp->snd_una; 2995244Sroot tp->t_timer[TCPT_REXMT] = 0; 3005065Swnj tp->irs = ti->ti_seq; 3015085Swnj tcp_rcvseqinit(tp); 3025085Swnj tp->t_flags |= TF_ACKNOW; 3035162Swnj if (SEQ_GT(tp->snd_una, tp->iss)) { 3045244Sroot soisconnected(so); 3055065Swnj tp->t_state = TCPS_ESTABLISHED; 30617272Skarels tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); 3075162Swnj (void) tcp_reass(tp, (struct tcpiphdr *)0); 3085162Swnj } else 3095085Swnj tp->t_state = TCPS_SYN_RECEIVED; 3105085Swnj goto trimthenstep6; 3115085Swnj 3125085Swnj trimthenstep6: 3135085Swnj /* 3145231Swnj * Advance ti->ti_seq to correspond to first data byte. 3155085Swnj * If data, trim to stay within window, 3165085Swnj * dropping FIN if necessary. 3175085Swnj */ 3185231Swnj ti->ti_seq++; 3195085Swnj if (ti->ti_len > tp->rcv_wnd) { 3205085Swnj todrop = ti->ti_len - tp->rcv_wnd; 3215085Swnj m_adj(m, -todrop); 3225085Swnj ti->ti_len = tp->rcv_wnd; 3235085Swnj ti->ti_flags &= ~TH_FIN; 3245065Swnj } 3255263Swnj tp->snd_wl1 = ti->ti_seq - 1; 3265085Swnj goto step6; 3275065Swnj } 3284601Swnj 3295065Swnj /* 33016222Skarels * If data is received on a connection after the 33116222Skarels * user processes are gone, then RST the other end. 33216222Skarels */ 33316222Skarels if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && 33416222Skarels ti->ti_len) { 33516222Skarels tp = tcp_close(tp); 33616222Skarels goto dropwithreset; 33716222Skarels } 33816222Skarels 33916222Skarels /* 3405065Swnj * States other than LISTEN or SYN_SENT. 3415065Swnj * First check that at least some bytes of segment are within 3425065Swnj * receive window. 3435065Swnj */ 3445065Swnj if (tp->rcv_wnd == 0) { 3455065Swnj /* 3465065Swnj * If window is closed can only take segments at 3475231Swnj * window edge, and have to drop data and PUSH from 3485065Swnj * incoming segments. 3495065Swnj */ 3505300Sroot if (tp->rcv_nxt != ti->ti_seq) 3515065Swnj goto dropafterack; 3525085Swnj if (ti->ti_len > 0) { 3535690Swnj m_adj(m, ti->ti_len); 3545085Swnj ti->ti_len = 0; 3555085Swnj ti->ti_flags &= ~(TH_PUSH|TH_FIN); 3565065Swnj } 3575065Swnj } else { 3585065Swnj /* 3595231Swnj * If segment begins before rcv_nxt, drop leading 3605065Swnj * data (and SYN); if nothing left, just ack. 3615065Swnj */ 3625690Swnj todrop = tp->rcv_nxt - ti->ti_seq; 3635690Swnj if (todrop > 0) { 3645085Swnj if (tiflags & TH_SYN) { 3655300Sroot tiflags &= ~TH_SYN; 3665690Swnj ti->ti_flags &= ~TH_SYN; 3675085Swnj ti->ti_seq++; 3685085Swnj if (ti->ti_urp > 1) 3695085Swnj ti->ti_urp--; 3705085Swnj else 3715085Swnj tiflags &= ~TH_URG; 3725085Swnj todrop--; 3735085Swnj } 3746211Swnj if (todrop > ti->ti_len || 3756211Swnj todrop == ti->ti_len && (tiflags&TH_FIN) == 0) 3765065Swnj goto dropafterack; 3775065Swnj m_adj(m, todrop); 3785065Swnj ti->ti_seq += todrop; 3795065Swnj ti->ti_len -= todrop; 3805085Swnj if (ti->ti_urp > todrop) 3815085Swnj ti->ti_urp -= todrop; 3825085Swnj else { 3835085Swnj tiflags &= ~TH_URG; 3845690Swnj ti->ti_flags &= ~TH_URG; 3855690Swnj ti->ti_urp = 0; 3865085Swnj } 3875065Swnj } 3885065Swnj /* 3895065Swnj * If segment ends after window, drop trailing data 3905085Swnj * (and PUSH and FIN); if nothing left, just ACK. 3915065Swnj */ 3925690Swnj todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); 3935690Swnj if (todrop > 0) { 3946211Swnj if (todrop >= ti->ti_len) 3955065Swnj goto dropafterack; 3965065Swnj m_adj(m, -todrop); 3975065Swnj ti->ti_len -= todrop; 3985085Swnj ti->ti_flags &= ~(TH_PUSH|TH_FIN); 3995065Swnj } 4005065Swnj } 4014601Swnj 4025065Swnj /* 4035065Swnj * If the RST bit is set examine the state: 4045065Swnj * SYN_RECEIVED STATE: 4055065Swnj * If passive open, return to LISTEN state. 4065065Swnj * If active open, inform user that connection was refused. 4075065Swnj * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: 4085065Swnj * Inform user that connection was reset, and close tcb. 4095065Swnj * CLOSING, LAST_ACK, TIME_WAIT STATES 4105065Swnj * Close the tcb. 4115065Swnj */ 4125065Swnj if (tiflags&TH_RST) switch (tp->t_state) { 4135267Sroot 4145065Swnj case TCPS_SYN_RECEIVED: 41510394Ssam tp = tcp_drop(tp, ECONNREFUSED); 4165065Swnj goto drop; 4174601Swnj 4185065Swnj case TCPS_ESTABLISHED: 4195065Swnj case TCPS_FIN_WAIT_1: 4205065Swnj case TCPS_FIN_WAIT_2: 4215065Swnj case TCPS_CLOSE_WAIT: 42210394Ssam tp = tcp_drop(tp, ECONNRESET); 4235065Swnj goto drop; 4245065Swnj 4255065Swnj case TCPS_CLOSING: 4265065Swnj case TCPS_LAST_ACK: 4275065Swnj case TCPS_TIME_WAIT: 42810394Ssam tp = tcp_close(tp); 4295065Swnj goto drop; 4304601Swnj } 4314601Swnj 4324601Swnj /* 4335065Swnj * If a SYN is in the window, then this is an 4345065Swnj * error and we send an RST and drop the connection. 4354601Swnj */ 4365065Swnj if (tiflags & TH_SYN) { 43710394Ssam tp = tcp_drop(tp, ECONNRESET); 4385085Swnj goto dropwithreset; 4394601Swnj } 4404601Swnj 4414601Swnj /* 4425065Swnj * If the ACK bit is off we drop the segment and return. 4434601Swnj */ 4445085Swnj if ((tiflags & TH_ACK) == 0) 4455065Swnj goto drop; 4465065Swnj 4475065Swnj /* 4485065Swnj * Ack processing. 4495065Swnj */ 4504601Swnj switch (tp->t_state) { 4514601Swnj 4525065Swnj /* 4535065Swnj * In SYN_RECEIVED state if the ack ACKs our SYN then enter 4545065Swnj * ESTABLISHED state and continue processing, othewise 4555065Swnj * send an RST. 4565065Swnj */ 4575065Swnj case TCPS_SYN_RECEIVED: 4585085Swnj if (SEQ_GT(tp->snd_una, ti->ti_ack) || 4595231Swnj SEQ_GT(ti->ti_ack, tp->snd_max)) 4605085Swnj goto dropwithreset; 4615244Sroot tp->snd_una++; /* SYN acked */ 4625357Sroot if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 4635357Sroot tp->snd_nxt = tp->snd_una; 4645244Sroot tp->t_timer[TCPT_REXMT] = 0; 4655085Swnj soisconnected(so); 4665085Swnj tp->t_state = TCPS_ESTABLISHED; 46717272Skarels tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); 4685162Swnj (void) tcp_reass(tp, (struct tcpiphdr *)0); 4695244Sroot tp->snd_wl1 = ti->ti_seq - 1; 4705085Swnj /* fall into ... */ 4714601Swnj 4725065Swnj /* 4735065Swnj * In ESTABLISHED state: drop duplicate ACKs; ACK out of range 4745065Swnj * ACKs. If the ack is in the range 4755231Swnj * tp->snd_una < ti->ti_ack <= tp->snd_max 4765065Swnj * then advance tp->snd_una to ti->ti_ack and drop 4775065Swnj * data from the retransmission queue. If this ACK reflects 4785065Swnj * more up to date window information we update our window information. 4795065Swnj */ 4805065Swnj case TCPS_ESTABLISHED: 4815065Swnj case TCPS_FIN_WAIT_1: 4825065Swnj case TCPS_FIN_WAIT_2: 4835065Swnj case TCPS_CLOSE_WAIT: 4845065Swnj case TCPS_CLOSING: 4855244Sroot case TCPS_LAST_ACK: 4865244Sroot case TCPS_TIME_WAIT: 4875085Swnj #define ourfinisacked (acked > 0) 4885085Swnj 4895244Sroot if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) 4905065Swnj break; 4915300Sroot if (SEQ_GT(ti->ti_ack, tp->snd_max)) 4925065Swnj goto dropafterack; 4935085Swnj acked = ti->ti_ack - tp->snd_una; 4945951Swnj 4955951Swnj /* 4965951Swnj * If transmit timer is running and timed sequence 4975951Swnj * number was acked, update smoothed round trip time. 4985951Swnj */ 4995951Swnj if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) { 5005951Swnj if (tp->t_srtt == 0) 5015951Swnj tp->t_srtt = tp->t_rtt; 5025951Swnj else 5035951Swnj tp->t_srtt = 5045951Swnj tcp_alpha * tp->t_srtt + 5055951Swnj (1 - tcp_alpha) * tp->t_rtt; 5065951Swnj tp->t_rtt = 0; 5075951Swnj } 5085951Swnj 5095307Sroot if (ti->ti_ack == tp->snd_max) 5105244Sroot tp->t_timer[TCPT_REXMT] = 0; 5115307Sroot else { 5125244Sroot TCPT_RANGESET(tp->t_timer[TCPT_REXMT], 5135244Sroot tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); 5145300Sroot tp->t_rxtshift = 0; 5155085Swnj } 51617360Skarels /* 51717360Skarels * When new data is acked, open the congestion window a bit. 51817360Skarels */ 51917360Skarels if (acked > 0) 52017360Skarels tp->snd_cwnd = MIN(11 * tp->snd_cwnd / 10, 65535); 5215307Sroot if (acked > so->so_snd.sb_cc) { 52215386Ssam tp->snd_wnd -= so->so_snd.sb_cc; 5235307Sroot sbdrop(&so->so_snd, so->so_snd.sb_cc); 5245307Sroot } else { 5256161Ssam sbdrop(&so->so_snd, acked); 5265307Sroot tp->snd_wnd -= acked; 5275307Sroot acked = 0; 5285307Sroot } 5296434Swnj if ((so->so_snd.sb_flags & SB_WAIT) || so->so_snd.sb_sel) 5305300Sroot sowwakeup(so); 5315231Swnj tp->snd_una = ti->ti_ack; 5325357Sroot if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 5335357Sroot tp->snd_nxt = tp->snd_una; 5345162Swnj 5354601Swnj switch (tp->t_state) { 5364601Swnj 5375065Swnj /* 5385065Swnj * In FIN_WAIT_1 STATE in addition to the processing 5395065Swnj * for the ESTABLISHED state if our FIN is now acknowledged 5405085Swnj * then enter FIN_WAIT_2. 5415065Swnj */ 5425065Swnj case TCPS_FIN_WAIT_1: 5435896Swnj if (ourfinisacked) { 5445896Swnj /* 5455896Swnj * If we can't receive any more 5465896Swnj * data, then closing user can proceed. 5475896Swnj */ 5485896Swnj if (so->so_state & SS_CANTRCVMORE) 5495896Swnj soisdisconnected(so); 5505085Swnj tp->t_state = TCPS_FIN_WAIT_2; 55117264Skarels /* 55217264Skarels * This is contrary to the specification, 55317264Skarels * but if we haven't gotten our FIN in 55417264Skarels * 5 minutes, it's not forthcoming. 55521118Skarels tp->t_timer[TCPT_2MSL] = 5 * 60 * PR_SLOWHZ; 55621118Skarels * MUST WORRY ABOUT ONE-WAY CONNECTIONS. 55717264Skarels */ 5585896Swnj } 5594601Swnj break; 5604601Swnj 5615065Swnj /* 5625065Swnj * In CLOSING STATE in addition to the processing for 5635065Swnj * the ESTABLISHED state if the ACK acknowledges our FIN 5645065Swnj * then enter the TIME-WAIT state, otherwise ignore 5655065Swnj * the segment. 5665065Swnj */ 5675065Swnj case TCPS_CLOSING: 5685244Sroot if (ourfinisacked) { 5695065Swnj tp->t_state = TCPS_TIME_WAIT; 5705244Sroot tcp_canceltimers(tp); 5715244Sroot tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 5725244Sroot soisdisconnected(so); 5735244Sroot } 5745244Sroot break; 5754601Swnj 5765065Swnj /* 5775085Swnj * The only thing that can arrive in LAST_ACK state 5785085Swnj * is an acknowledgment of our FIN. If our FIN is now 5795085Swnj * acknowledged, delete the TCB, enter the closed state 5805085Swnj * and return. 5815065Swnj */ 5825065Swnj case TCPS_LAST_ACK: 58310394Ssam if (ourfinisacked) 58410394Ssam tp = tcp_close(tp); 5855065Swnj goto drop; 5864601Swnj 5875065Swnj /* 5885065Swnj * In TIME_WAIT state the only thing that should arrive 5895065Swnj * is a retransmission of the remote FIN. Acknowledge 5905065Swnj * it and restart the finack timer. 5915065Swnj */ 5925065Swnj case TCPS_TIME_WAIT: 5935162Swnj tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 5945065Swnj goto dropafterack; 5954601Swnj } 5965085Swnj #undef ourfinisacked 5975085Swnj } 5984601Swnj 5995065Swnj step6: 6005065Swnj /* 6015244Sroot * Update window information. 6025244Sroot */ 6035300Sroot if (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq && 6045391Swnj (SEQ_LT(tp->snd_wl2, ti->ti_ack) || 6055300Sroot tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd)) { 6065244Sroot tp->snd_wnd = ti->ti_win; 6075244Sroot tp->snd_wl1 = ti->ti_seq; 6085244Sroot tp->snd_wl2 = ti->ti_ack; 6095244Sroot } 6105244Sroot 6115244Sroot /* 6125547Swnj * Process segments with URG. 6135065Swnj */ 6147267Swnj if ((tiflags & TH_URG) && ti->ti_urp && 6157267Swnj TCPS_HAVERCVDFIN(tp->t_state) == 0) { 6165547Swnj /* 61713121Ssam * This is a kludge, but if we receive accept 61813121Ssam * random urgent pointers, we'll crash in 61913121Ssam * soreceive. It's hard to imagine someone 62013121Ssam * actually wanting to send this much urgent data. 62112441Ssam */ 62217360Skarels if (ti->ti_urp + (unsigned) so->so_rcv.sb_cc > 32767) { 62312441Ssam ti->ti_urp = 0; /* XXX */ 62412441Ssam tiflags &= ~TH_URG; /* XXX */ 62512441Ssam ti->ti_flags &= ~TH_URG; /* XXX */ 62613121Ssam goto badurp; /* XXX */ 62712441Ssam } 62812441Ssam /* 6295547Swnj * If this segment advances the known urgent pointer, 6305547Swnj * then mark the data stream. This should not happen 6315547Swnj * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since 6325547Swnj * a FIN has been received from the remote side. 6335547Swnj * In these states we ignore the URG. 6345547Swnj */ 6355547Swnj if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { 6365547Swnj tp->rcv_up = ti->ti_seq + ti->ti_urp; 6375547Swnj so->so_oobmark = so->so_rcv.sb_cc + 6385547Swnj (tp->rcv_up - tp->rcv_nxt) - 1; 6395547Swnj if (so->so_oobmark == 0) 6405547Swnj so->so_state |= SS_RCVATMARK; 6418313Sroot sohasoutofband(so); 6425547Swnj tp->t_oobflags &= ~TCPOOB_HAVEDATA; 6435440Swnj } 6445547Swnj /* 6455547Swnj * Remove out of band data so doesn't get presented to user. 6465547Swnj * This can happen independent of advancing the URG pointer, 6475547Swnj * but if two URG's are pending at once, some out-of-band 6485547Swnj * data may creep in... ick. 6495547Swnj */ 6507510Sroot if (ti->ti_urp <= ti->ti_len) 6515547Swnj tcp_pulloutofband(so, ti); 6525419Swnj } 65313121Ssam badurp: /* XXX */ 6544601Swnj 6554601Swnj /* 6565065Swnj * Process the segment text, merging it into the TCP sequencing queue, 6575065Swnj * and arranging for acknowledgment of receipt if necessary. 6585065Swnj * This process logically involves adjusting tp->rcv_wnd as data 6595065Swnj * is presented to the user (this happens in tcp_usrreq.c, 6605065Swnj * case PRU_RCVD). If a FIN has already been received on this 6615065Swnj * connection then we just ignore the text. 6624601Swnj */ 66317946Skarels if ((ti->ti_len || (tiflags&TH_FIN)) && 66417946Skarels TCPS_HAVERCVDFIN(tp->t_state) == 0) { 66517946Skarels tiflags = tcp_reass(tp, ti); 6665440Swnj if (tcpnodelack == 0) 6675440Swnj tp->t_flags |= TF_DELACK; 6685440Swnj else 6695440Swnj tp->t_flags |= TF_ACKNOW; 6705244Sroot } else { 6714924Swnj m_freem(m); 6725263Swnj tiflags &= ~TH_FIN; 6735244Sroot } 6744601Swnj 6754601Swnj /* 6765263Swnj * If FIN is received ACK the FIN and let the user know 6775263Swnj * that the connection is closing. 6784601Swnj */ 6795263Swnj if (tiflags & TH_FIN) { 6805244Sroot if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 6815244Sroot socantrcvmore(so); 6825244Sroot tp->t_flags |= TF_ACKNOW; 6835244Sroot tp->rcv_nxt++; 6845244Sroot } 6855065Swnj switch (tp->t_state) { 6864601Swnj 6875065Swnj /* 6885065Swnj * In SYN_RECEIVED and ESTABLISHED STATES 6895065Swnj * enter the CLOSE_WAIT state. 6904884Swnj */ 6915065Swnj case TCPS_SYN_RECEIVED: 6925065Swnj case TCPS_ESTABLISHED: 6935065Swnj tp->t_state = TCPS_CLOSE_WAIT; 6945065Swnj break; 6954884Swnj 6965065Swnj /* 6975085Swnj * If still in FIN_WAIT_1 STATE FIN has not been acked so 6985085Swnj * enter the CLOSING state. 6994884Swnj */ 7005065Swnj case TCPS_FIN_WAIT_1: 7015085Swnj tp->t_state = TCPS_CLOSING; 7025065Swnj break; 7034601Swnj 7045065Swnj /* 7055065Swnj * In FIN_WAIT_2 state enter the TIME_WAIT state, 7065065Swnj * starting the time-wait timer, turning off the other 7075065Swnj * standard timers. 7085065Swnj */ 7095065Swnj case TCPS_FIN_WAIT_2: 7105244Sroot tp->t_state = TCPS_TIME_WAIT; 7115074Swnj tcp_canceltimers(tp); 7125162Swnj tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 7135244Sroot soisdisconnected(so); 7145065Swnj break; 7155065Swnj 7164884Swnj /* 7175065Swnj * In TIME_WAIT state restart the 2 MSL time_wait timer. 7184884Swnj */ 7195065Swnj case TCPS_TIME_WAIT: 7205162Swnj tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 7215065Swnj break; 7225085Swnj } 7234601Swnj } 7245267Sroot if (so->so_options & SO_DEBUG) 7255267Sroot tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0); 7265085Swnj 7275085Swnj /* 7285085Swnj * Return any desired output. 7295085Swnj */ 7306161Ssam (void) tcp_output(tp); 7315065Swnj return; 7325085Swnj 7335065Swnj dropafterack: 7345085Swnj /* 7356211Swnj * Generate an ACK dropping incoming segment if it occupies 7366211Swnj * sequence space, where the ACK reflects our state. 7375085Swnj */ 7386211Swnj if ((tiflags&TH_RST) || 7396211Swnj tlen == 0 && (tiflags&(TH_SYN|TH_FIN)) == 0) 7405085Swnj goto drop; 7416303Sroot if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 7426303Sroot tcp_trace(TA_RESPOND, ostate, tp, &tcp_saveti, 0); 7435391Swnj tcp_respond(tp, ti, tp->rcv_nxt, tp->snd_nxt, TH_ACK); 7445231Swnj return; 7455085Swnj 7465085Swnj dropwithreset: 74711731Ssam if (om) { 7486161Ssam (void) m_free(om); 74911731Ssam om = 0; 75011731Ssam } 7515085Swnj /* 7525244Sroot * Generate a RST, dropping incoming segment. 7535085Swnj * Make ACK acceptable to originator of segment. 7545085Swnj */ 7555085Swnj if (tiflags & TH_RST) 7565085Swnj goto drop; 7575085Swnj if (tiflags & TH_ACK) 7585391Swnj tcp_respond(tp, ti, (tcp_seq)0, ti->ti_ack, TH_RST); 7595085Swnj else { 7605085Swnj if (tiflags & TH_SYN) 7615085Swnj ti->ti_len++; 7626211Swnj tcp_respond(tp, ti, ti->ti_seq+ti->ti_len, (tcp_seq)0, 7636211Swnj TH_RST|TH_ACK); 7645085Swnj } 76510769Ssam /* destroy temporarily created socket */ 76610769Ssam if (dropsocket) 76710769Ssam (void) soabort(so); 7685231Swnj return; 7695085Swnj 7705065Swnj drop: 77111730Ssam if (om) 77211730Ssam (void) m_free(om); 7735085Swnj /* 7745085Swnj * Drop space held by incoming segment and return. 7755085Swnj */ 7766303Sroot if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 7776303Sroot tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); 7785065Swnj m_freem(m); 77910769Ssam /* destroy temporarily created socket */ 78010769Ssam if (dropsocket) 78110769Ssam (void) soabort(so); 7825267Sroot return; 7835065Swnj } 7845065Swnj 78517272Skarels tcp_dooptions(tp, om, ti) 7865440Swnj struct tcpcb *tp; 7875440Swnj struct mbuf *om; 78817272Skarels struct tcpiphdr *ti; 7895419Swnj { 7905440Swnj register u_char *cp; 7915440Swnj int opt, optlen, cnt; 7925419Swnj 7935440Swnj cp = mtod(om, u_char *); 7945440Swnj cnt = om->m_len; 7955440Swnj for (; cnt > 0; cnt -= optlen, cp += optlen) { 7965440Swnj opt = cp[0]; 7975440Swnj if (opt == TCPOPT_EOL) 7985440Swnj break; 7995440Swnj if (opt == TCPOPT_NOP) 8005440Swnj optlen = 1; 80112169Ssam else { 8025440Swnj optlen = cp[1]; 80312169Ssam if (optlen <= 0) 80412169Ssam break; 80512169Ssam } 8065440Swnj switch (opt) { 8075440Swnj 8085440Swnj default: 8095440Swnj break; 8105440Swnj 8115440Swnj case TCPOPT_MAXSEG: 8125440Swnj if (optlen != 4) 8135440Swnj continue; 81417272Skarels if (!(ti->ti_flags & TH_SYN)) 81517272Skarels continue; 8165440Swnj tp->t_maxseg = *(u_short *)(cp + 2); 8176161Ssam tp->t_maxseg = ntohs((u_short)tp->t_maxseg); 81817272Skarels tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); 8195440Swnj break; 8205419Swnj } 8215419Swnj } 8226161Ssam (void) m_free(om); 8235419Swnj } 8245419Swnj 8255419Swnj /* 8265547Swnj * Pull out of band byte out of a segment so 8275547Swnj * it doesn't appear in the user's data queue. 8285547Swnj * It is still reflected in the segment length for 8295547Swnj * sequencing purposes. 8305547Swnj */ 8315547Swnj tcp_pulloutofband(so, ti) 8325547Swnj struct socket *so; 8335547Swnj struct tcpiphdr *ti; 8345547Swnj { 8355547Swnj register struct mbuf *m; 8366116Swnj int cnt = ti->ti_urp - 1; 8375547Swnj 8385547Swnj m = dtom(ti); 8395547Swnj while (cnt >= 0) { 8405547Swnj if (m->m_len > cnt) { 8415547Swnj char *cp = mtod(m, caddr_t) + cnt; 8425547Swnj struct tcpcb *tp = sototcpcb(so); 8435547Swnj 8445547Swnj tp->t_iobc = *cp; 8455547Swnj tp->t_oobflags |= TCPOOB_HAVEDATA; 8466161Ssam bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); 8475547Swnj m->m_len--; 8485547Swnj return; 8495547Swnj } 8505547Swnj cnt -= m->m_len; 8515547Swnj m = m->m_next; 8525547Swnj if (m == 0) 8535547Swnj break; 8545547Swnj } 8555547Swnj panic("tcp_pulloutofband"); 8565547Swnj } 8575547Swnj 8585547Swnj /* 8595065Swnj * Insert segment ti into reassembly queue of tcp with 8605065Swnj * control block tp. Return TH_FIN if reassembly now includes 8615065Swnj * a segment with FIN. 8625065Swnj */ 8635109Swnj tcp_reass(tp, ti) 8645065Swnj register struct tcpcb *tp; 8655065Swnj register struct tcpiphdr *ti; 8665065Swnj { 8675065Swnj register struct tcpiphdr *q; 8685085Swnj struct socket *so = tp->t_inpcb->inp_socket; 8695263Swnj struct mbuf *m; 8705263Swnj int flags; 8715065Swnj 8725065Swnj /* 8735162Swnj * Call with ti==0 after become established to 8745162Swnj * force pre-ESTABLISHED data up to user socket. 8755065Swnj */ 8765162Swnj if (ti == 0) 8775065Swnj goto present; 8784601Swnj 8795065Swnj /* 8805065Swnj * Find a segment which begins after this one does. 8815065Swnj */ 8825065Swnj for (q = tp->seg_next; q != (struct tcpiphdr *)tp; 8835065Swnj q = (struct tcpiphdr *)q->ti_next) 8845065Swnj if (SEQ_GT(q->ti_seq, ti->ti_seq)) 8855065Swnj break; 8864601Swnj 8875065Swnj /* 8885065Swnj * If there is a preceding segment, it may provide some of 8895065Swnj * our data already. If so, drop the data from the incoming 8905065Swnj * segment. If it provides all of our data, drop us. 8915065Swnj */ 8925065Swnj if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { 8935065Swnj register int i; 8945690Swnj q = (struct tcpiphdr *)q->ti_prev; 8955065Swnj /* conversion to int (in i) handles seq wraparound */ 8965065Swnj i = q->ti_seq + q->ti_len - ti->ti_seq; 8975065Swnj if (i > 0) { 8984924Swnj if (i >= ti->ti_len) 8995065Swnj goto drop; 9007338Swnj m_adj(dtom(ti), i); 9015065Swnj ti->ti_len -= i; 9024924Swnj ti->ti_seq += i; 9034601Swnj } 9045065Swnj q = (struct tcpiphdr *)(q->ti_next); 9055065Swnj } 9064601Swnj 9075065Swnj /* 9085065Swnj * While we overlap succeeding segments trim them or, 9095065Swnj * if they are completely covered, dequeue them. 9105065Swnj */ 9115690Swnj while (q != (struct tcpiphdr *)tp) { 9125065Swnj register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; 9135690Swnj if (i <= 0) 9145690Swnj break; 9155065Swnj if (i < q->ti_len) { 9165690Swnj q->ti_seq += i; 9175065Swnj q->ti_len -= i; 9185065Swnj m_adj(dtom(q), i); 9195065Swnj break; 9204601Swnj } 9215065Swnj q = (struct tcpiphdr *)q->ti_next; 9225623Swnj m = dtom(q->ti_prev); 9235065Swnj remque(q->ti_prev); 9245623Swnj m_freem(m); 9255065Swnj } 9264601Swnj 9275065Swnj /* 9285065Swnj * Stick new segment in its place. 9295065Swnj */ 9305065Swnj insque(ti, q->ti_prev); 9314601Swnj 9325065Swnj present: 9335065Swnj /* 9345244Sroot * Present data to user, advancing rcv_nxt through 9355244Sroot * completed sequence space. 9365065Swnj */ 9375263Swnj if (TCPS_HAVERCVDSYN(tp->t_state) == 0) 9385244Sroot return (0); 9394924Swnj ti = tp->seg_next; 9405263Swnj if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) 9415263Swnj return (0); 9425263Swnj if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) 9435263Swnj return (0); 9445263Swnj do { 9455244Sroot tp->rcv_nxt += ti->ti_len; 9465244Sroot flags = ti->ti_flags & TH_FIN; 9474924Swnj remque(ti); 9485263Swnj m = dtom(ti); 9494924Swnj ti = (struct tcpiphdr *)ti->ti_next; 9505263Swnj if (so->so_state & SS_CANTRCVMORE) 9516161Ssam m_freem(m); 95210145Ssam else 9535263Swnj sbappend(&so->so_rcv, m); 9545263Swnj } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); 9555263Swnj sorwakeup(so); 9565065Swnj return (flags); 9575065Swnj drop: 9585065Swnj m_freem(dtom(ti)); 9595263Swnj return (0); 9604601Swnj } 96117272Skarels 96217272Skarels /* 96317272Skarels * Determine a reasonable value for maxseg size. 96417272Skarels * If the route is known, use one that can be handled 96517272Skarels * on the given interface without forcing IP to fragment. 966*23975Skarels * If bigger than a page (CLBYTES), round down to nearest pagesize 96717272Skarels * to utilize pagesize mbufs. 96817272Skarels * If interface pointer is unavailable, or the destination isn't local, 969*23975Skarels * use a conservative size (512 or the default IP max size, but no more 970*23975Skarels * than the mtu of the interface through which we route), 97117272Skarels * as we can't discover anything about intervening gateways or networks. 97217272Skarels * 97317272Skarels * This is ugly, and doesn't belong at this level, but has to happen somehow. 97417272Skarels */ 97517272Skarels tcp_mss(tp) 976*23975Skarels register struct tcpcb *tp; 97717272Skarels { 97817272Skarels struct route *ro; 97917272Skarels struct ifnet *ifp; 98017272Skarels int mss; 98117272Skarels struct inpcb *inp; 98217272Skarels 98317272Skarels inp = tp->t_inpcb; 98417272Skarels ro = &inp->inp_route; 98517272Skarels if ((ro->ro_rt == (struct rtentry *)0) || 98617272Skarels (ifp = ro->ro_rt->rt_ifp) == (struct ifnet *)0) { 98717272Skarels /* No route yet, so try to acquire one */ 98817272Skarels if (inp->inp_faddr.s_addr != INADDR_ANY) { 98917272Skarels ro->ro_dst.sa_family = AF_INET; 99017272Skarels ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 99117272Skarels inp->inp_faddr; 99217272Skarels rtalloc(ro); 99317272Skarels } 99417272Skarels if ((ro->ro_rt == 0) || (ifp = ro->ro_rt->rt_ifp) == 0) 99517316Skarels return (TCP_MSS); 99617272Skarels } 99717272Skarels 99817272Skarels mss = ifp->if_mtu - sizeof(struct tcpiphdr); 99917272Skarels #if (CLBYTES & (CLBYTES - 1)) == 0 100017272Skarels if (mss > CLBYTES) 100117272Skarels mss &= ~(CLBYTES-1); 100217272Skarels #else 100317272Skarels if (mss > CLBYTES) 100417272Skarels mss = mss / CLBYTES * CLBYTES; 100517272Skarels #endif 1006*23975Skarels if (in_localaddr(inp->inp_faddr)) 1007*23975Skarels return (mss); 100817316Skarels return (MIN(mss, TCP_MSS)); 100917272Skarels } 1010