123193Smckusick /* 2*44378Skarels * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. 332789Sbostic * All rights reserved. 423193Smckusick * 532789Sbostic * Redistribution and use in source and binary forms are permitted 634855Sbostic * provided that the above copyright notice and this paragraph are 734855Sbostic * duplicated in all such forms and that any documentation, 834855Sbostic * advertising materials, and other materials related to such 934855Sbostic * distribution and use acknowledge that the software was developed 1034855Sbostic * by the University of California, Berkeley. The name of the 1134855Sbostic * University may not be used to endorse or promote products derived 1234855Sbostic * from this software without specific prior written permission. 1334855Sbostic * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1434855Sbostic * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1534855Sbostic * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1632789Sbostic * 17*44378Skarels * @(#)tcp_subr.c 7.17 (Berkeley) 06/28/90 1823193Smckusick */ 195068Swnj 2017064Sbloom #include "param.h" 2117064Sbloom #include "systm.h" 2240691Skarels #include "malloc.h" 2317064Sbloom #include "mbuf.h" 2417064Sbloom #include "socket.h" 2517064Sbloom #include "socketvar.h" 2617064Sbloom #include "protosw.h" 2717064Sbloom #include "errno.h" 2810896Ssam 2910896Ssam #include "../net/route.h" 3010896Ssam #include "../net/if.h" 3110896Ssam 3217064Sbloom #include "in.h" 3317064Sbloom #include "in_systm.h" 3417064Sbloom #include "ip.h" 3540691Skarels #include "in_pcb.h" 3617064Sbloom #include "ip_var.h" 3717064Sbloom #include "ip_icmp.h" 3817064Sbloom #include "tcp.h" 3917064Sbloom #include "tcp_fsm.h" 4017064Sbloom #include "tcp_seq.h" 4117064Sbloom #include "tcp_timer.h" 4217064Sbloom #include "tcp_var.h" 4317064Sbloom #include "tcpip.h" 445068Swnj 45*44378Skarels /* patchable/settable parameters for tcp */ 4631395Skarels int tcp_ttl = TCP_TTL; 47*44378Skarels int tcp_mssdflt = TCP_MSS; 48*44378Skarels int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; 4931395Skarels 50*44378Skarels extern struct inpcb *tcp_last_inpcb; 51*44378Skarels 525068Swnj /* 535068Swnj * Tcp initialization 545068Swnj */ 555068Swnj tcp_init() 565068Swnj { 575068Swnj 585068Swnj tcp_iss = 1; /* wrong */ 595068Swnj tcb.inp_next = tcb.inp_prev = &tcb; 6040691Skarels if (max_protohdr < sizeof(struct tcpiphdr)) 6140691Skarels max_protohdr = sizeof(struct tcpiphdr); 6240691Skarels if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) 6340691Skarels panic("tcp_init"); 645068Swnj } 655068Swnj 665068Swnj /* 675068Swnj * Create template to be used to send tcp packets on a connection. 685068Swnj * Call after host entry created, allocates an mbuf and fills 695068Swnj * in a skeletal tcp/ip header, minimizing the amount of work 705068Swnj * necessary when the connection is used. 715068Swnj */ 725068Swnj struct tcpiphdr * 735068Swnj tcp_template(tp) 745068Swnj struct tcpcb *tp; 755068Swnj { 765068Swnj register struct inpcb *inp = tp->t_inpcb; 775068Swnj register struct mbuf *m; 785068Swnj register struct tcpiphdr *n; 795068Swnj 8026815Skarels if ((n = tp->t_template) == 0) { 8132101Skarels m = m_get(M_DONTWAIT, MT_HEADER); 8226815Skarels if (m == NULL) 8326815Skarels return (0); 8426815Skarels m->m_len = sizeof (struct tcpiphdr); 8526815Skarels n = mtod(m, struct tcpiphdr *); 8626815Skarels } 875068Swnj n->ti_next = n->ti_prev = 0; 885068Swnj n->ti_x1 = 0; 895068Swnj n->ti_pr = IPPROTO_TCP; 905068Swnj n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip)); 915068Swnj n->ti_src = inp->inp_laddr; 925068Swnj n->ti_dst = inp->inp_faddr; 935068Swnj n->ti_sport = inp->inp_lport; 945068Swnj n->ti_dport = inp->inp_fport; 955068Swnj n->ti_seq = 0; 965089Swnj n->ti_ack = 0; 975068Swnj n->ti_x2 = 0; 985068Swnj n->ti_off = 5; 995068Swnj n->ti_flags = 0; 1005068Swnj n->ti_win = 0; 1015068Swnj n->ti_sum = 0; 1025068Swnj n->ti_urp = 0; 1035068Swnj return (n); 1045068Swnj } 1055068Swnj 1065068Swnj /* 1075164Swnj * Send a single message to the TCP at address specified by 108*44378Skarels * the given TCP/IP header. If m == 0, then we make a copy 1095164Swnj * of the tcpiphdr at ti and send directly to the addressed host. 1105164Swnj * This is used to force keep alive messages out using the TCP 1115164Swnj * template for a connection tp->t_template. If flags are given 1125164Swnj * then we send a message back to the TCP which originated the 1135164Swnj * segment ti, and discard the mbuf containing it and any other 1145164Swnj * attached mbufs. 1155164Swnj * 1165164Swnj * In any case the ack and sequence number of the transmitted 1175164Swnj * segment are as specified by the parameters. 1185068Swnj */ 11940691Skarels tcp_respond(tp, ti, m, ack, seq, flags) 1205392Swnj struct tcpcb *tp; 1215068Swnj register struct tcpiphdr *ti; 12240691Skarels register struct mbuf *m; 1235089Swnj tcp_seq ack, seq; 1245068Swnj int flags; 1255068Swnj { 1266212Swnj int win = 0, tlen; 1276353Ssam struct route *ro = 0; 1285068Swnj 1296353Ssam if (tp) { 1305392Swnj win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); 1316353Ssam ro = &tp->t_inpcb->inp_route; 1326353Ssam } 13340691Skarels if (m == 0) { 13440691Skarels m = m_gethdr(M_DONTWAIT, MT_HEADER); 13510144Ssam if (m == NULL) 1365164Swnj return; 13731727Skarels #ifdef TCP_COMPAT_42 13831727Skarels tlen = 1; 13931727Skarels #else 14031727Skarels tlen = 0; 14131727Skarels #endif 14240691Skarels m->m_data += max_linkhdr; 1435164Swnj *mtod(m, struct tcpiphdr *) = *ti; 1445164Swnj ti = mtod(m, struct tcpiphdr *); 1455164Swnj flags = TH_ACK; 1465164Swnj } else { 1475164Swnj m_freem(m->m_next); 1485164Swnj m->m_next = 0; 14940691Skarels m->m_data = (caddr_t)ti; 150*44378Skarels m->m_len = sizeof (struct tcpiphdr); 15130762Skarels tlen = 0; 1525089Swnj #define xchg(a,b,type) { type t; t=a; a=b; b=t; } 1535164Swnj xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long); 1545164Swnj xchg(ti->ti_dport, ti->ti_sport, u_short); 1555068Swnj #undef xchg 1565164Swnj } 157*44378Skarels m->m_len = sizeof (struct tcpiphdr) + tlen; 1585089Swnj ti->ti_next = ti->ti_prev = 0; 1595089Swnj ti->ti_x1 = 0; 1609185Ssam ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen)); 1618942Sroot ti->ti_seq = htonl(seq); 1628942Sroot ti->ti_ack = htonl(ack); 1635089Swnj ti->ti_x2 = 0; 1645089Swnj ti->ti_off = sizeof (struct tcphdr) >> 2; 1655068Swnj ti->ti_flags = flags; 1669185Ssam ti->ti_win = htons((u_short)win); 1675392Swnj ti->ti_urp = 0; 1686304Sroot ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + tlen); 1696212Swnj ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + tlen; 17031395Skarels ((struct ip *)ti)->ip_ttl = tcp_ttl; 1716353Ssam (void) ip_output(m, (struct mbuf *)0, ro, 0); 1725068Swnj } 1735075Swnj 1745089Swnj /* 1755089Swnj * Create a new TCP control block, making an 1765089Swnj * empty reassembly queue and hooking it to the argument 1775089Swnj * protocol control block. 1785089Swnj */ 1795075Swnj struct tcpcb * 1805075Swnj tcp_newtcpcb(inp) 1815075Swnj struct inpcb *inp; 1825075Swnj { 1839644Ssam struct mbuf *m = m_getclr(M_DONTWAIT, MT_PCB); 1845075Swnj register struct tcpcb *tp; 1855075Swnj 18610144Ssam if (m == NULL) 18710144Ssam return ((struct tcpcb *)0); 1885075Swnj tp = mtod(m, struct tcpcb *); 1895075Swnj tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; 190*44378Skarels tp->t_maxseg = tcp_mssdflt; 191*44378Skarels 1926470Sroot tp->t_flags = 0; /* sends options! */ 1935075Swnj tp->t_inpcb = inp; 19431726Skarels /* 19531757Skarels * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no 19631757Skarels * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives 19731757Skarels * reasonable initial retransmit time. 19831726Skarels */ 19931757Skarels tp->t_srtt = TCPTV_SRTTBASE; 200*44378Skarels tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2; 201*44378Skarels tp->t_rttmin = TCPTV_MIN; 20232374Skarels TCPT_RANGESET(tp->t_rxtcur, 20332374Skarels ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, 20432374Skarels TCPTV_MIN, TCPTV_REXMTMAX); 205*44378Skarels tp->snd_cwnd = TCP_MAXWIN; 206*44378Skarels tp->snd_ssthresh = TCP_MAXWIN; 207*44378Skarels inp->inp_ip.ip_ttl = tcp_ttl; 2085075Swnj inp->inp_ppcb = (caddr_t)tp; 2095075Swnj return (tp); 2105075Swnj } 2115075Swnj 2125089Swnj /* 2135089Swnj * Drop a TCP connection, reporting 2145089Swnj * the specified error. If connection is synchronized, 2155089Swnj * then send a RST to peer. 2165089Swnj */ 21710395Ssam struct tcpcb * 2185075Swnj tcp_drop(tp, errno) 21910395Ssam register struct tcpcb *tp; 2205075Swnj int errno; 2215075Swnj { 2225075Swnj struct socket *so = tp->t_inpcb->inp_socket; 2235075Swnj 2245286Sroot if (TCPS_HAVERCVDSYN(tp->t_state)) { 2255075Swnj tp->t_state = TCPS_CLOSED; 2268776Sroot (void) tcp_output(tp); 22730524Skarels tcpstat.tcps_drops++; 22830524Skarels } else 22930524Skarels tcpstat.tcps_conndrops++; 230*44378Skarels if (errno == ETIMEDOUT && tp->t_softerror) 231*44378Skarels errno = tp->t_softerror; 2325075Swnj so->so_error = errno; 23310395Ssam return (tcp_close(tp)); 2345075Swnj } 2355075Swnj 2365089Swnj /* 2375089Swnj * Close a TCP control block: 2385089Swnj * discard all space held by the tcp 2395089Swnj * discard internet protocol block 2405089Swnj * wake up any sleepers 2415089Swnj */ 24210395Ssam struct tcpcb * 2435075Swnj tcp_close(tp) 2445075Swnj register struct tcpcb *tp; 2455075Swnj { 2465075Swnj register struct tcpiphdr *t; 2475261Swnj struct inpcb *inp = tp->t_inpcb; 2485261Swnj struct socket *so = inp->inp_socket; 24912422Ssam register struct mbuf *m; 250*44378Skarels #ifdef RTV_RTT 251*44378Skarels register struct rtentry *rt; 2525075Swnj 253*44378Skarels /* 254*44378Skarels * If we sent enough data to get some meaningful characteristics, 255*44378Skarels * save them in the routing entry. 'Enough' is arbitrarily 256*44378Skarels * defined as 4K (default tcp_sendspace) * 16. This would 257*44378Skarels * give us 16 rtt samples assuming we only get one sample per 258*44378Skarels * window (the usual case on a long haul net). 16 samples is 259*44378Skarels * enough for the srtt filter to converge to within 5% of the correct 260*44378Skarels * value; fewer samples and we could save a very bogus rtt. 261*44378Skarels * 262*44378Skarels * Don't update the default route's characteristics and don't 263*44378Skarels * update anything that the user "locked". 264*44378Skarels */ 265*44378Skarels if (SEQ_LT(tp->iss+(4096*16), tp->snd_max) && 266*44378Skarels (rt = inp->inp_route.ro_rt) && 267*44378Skarels ((struct sockaddr_in *) rt_key(rt))->sin_addr.s_addr != 268*44378Skarels INADDR_ANY) { 269*44378Skarels register u_long i; 270*44378Skarels 271*44378Skarels if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { 272*44378Skarels i = tp->t_srtt * 273*44378Skarels (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); 274*44378Skarels if (rt->rt_rmx.rmx_rtt && i) 275*44378Skarels /* 276*44378Skarels * filter this update to half the old & half 277*44378Skarels * the new values, converting scale. 278*44378Skarels * See route.h and tcp_var.h for a 279*44378Skarels * description of the scaling constants. 280*44378Skarels */ 281*44378Skarels rt->rt_rmx.rmx_rtt = 282*44378Skarels (rt->rt_rmx.rmx_rtt + i) / 2; 283*44378Skarels else 284*44378Skarels rt->rt_rmx.rmx_rtt = i; 285*44378Skarels } 286*44378Skarels if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { 287*44378Skarels i = tp->t_rttvar * 288*44378Skarels (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); 289*44378Skarels if (rt->rt_rmx.rmx_rttvar && i) 290*44378Skarels rt->rt_rmx.rmx_rttvar = 291*44378Skarels (rt->rt_rmx.rmx_rttvar + i) / 2; 292*44378Skarels else 293*44378Skarels rt->rt_rmx.rmx_rttvar = i; 294*44378Skarels } 295*44378Skarels /* 296*44378Skarels * update the pipelimit (ssthresh) if it has been updated 297*44378Skarels * already or if a pipesize was specified & the threshhold 298*44378Skarels * got below half the pipesize. I.e., wait for bad news 299*44378Skarels * before we start updating, then update on both good 300*44378Skarels * and bad news. 301*44378Skarels */ 302*44378Skarels if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && 303*44378Skarels (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh || 304*44378Skarels i < (rt->rt_rmx.rmx_sendpipe / 2)) { 305*44378Skarels /* 306*44378Skarels * convert the limit from user data bytes to 307*44378Skarels * packets then to packet data bytes. 308*44378Skarels */ 309*44378Skarels i = (i + tp->t_maxseg / 2) / tp->t_maxseg; 310*44378Skarels if (i < 2) 311*44378Skarels i = 2; 312*44378Skarels i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr)); 313*44378Skarels if (rt->rt_rmx.rmx_ssthresh) 314*44378Skarels rt->rt_rmx.rmx_ssthresh = 315*44378Skarels (rt->rt_rmx.rmx_ssthresh + i) / 2; 316*44378Skarels else 317*44378Skarels rt->rt_rmx.rmx_ssthresh = i; 318*44378Skarels } 319*44378Skarels } 320*44378Skarels #endif RTV_RTT 321*44378Skarels /* free the reassembly queue, if any */ 3225075Swnj t = tp->seg_next; 32312422Ssam while (t != (struct tcpiphdr *)tp) { 32412422Ssam t = (struct tcpiphdr *)t->ti_next; 325*44378Skarels m = REASS_MBUF((struct tcpiphdr *)t->ti_prev); 32612422Ssam remque(t->ti_prev); 32712422Ssam m_freem(m); 32812422Ssam } 3295089Swnj if (tp->t_template) 3305075Swnj (void) m_free(dtom(tp->t_template)); 3315075Swnj (void) m_free(dtom(tp)); 3325261Swnj inp->inp_ppcb = 0; 3336472Sroot soisdisconnected(so); 334*44378Skarels /* clobber input pcb cache if we're closing the cached connection */ 335*44378Skarels if (inp == tcp_last_inpcb) 336*44378Skarels tcp_last_inpcb = &tcb; 3375269Sroot in_pcbdetach(inp); 33830524Skarels tcpstat.tcps_closed++; 33910395Ssam return ((struct tcpcb *)0); 3405075Swnj } 3415075Swnj 3425075Swnj tcp_drain() 3435075Swnj { 3445075Swnj 3455075Swnj } 3465075Swnj 34730233Skarels /* 34830233Skarels * Notify a tcp user of an asynchronous error; 349*44378Skarels * store error as soft error, but wake up user 350*44378Skarels * (for now, won't do anything until can select for soft error). 35130233Skarels */ 352*44378Skarels tcp_notify(inp, error) 35330233Skarels register struct inpcb *inp; 354*44378Skarels int error; 35530233Skarels { 35630233Skarels 357*44378Skarels ((struct tcpcb *)inp->inp_ppcb)->t_softerror = error; 35830233Skarels wakeup((caddr_t) &inp->inp_socket->so_timeo); 35930233Skarels sorwakeup(inp->inp_socket); 36030233Skarels sowwakeup(inp->inp_socket); 36130233Skarels } 36240691Skarels 36340691Skarels tcp_ctlinput(cmd, sa, ip) 3646584Ssam int cmd; 36524818Skarels struct sockaddr *sa; 36640691Skarels register struct ip *ip; 3675075Swnj { 36840691Skarels register struct tcphdr *th; 36940691Skarels extern struct in_addr zeroin_addr; 3706591Ssam extern u_char inetctlerrmap[]; 37140691Skarels int (*notify)() = tcp_notify, tcp_quench(); 3726591Ssam 37340691Skarels if (cmd == PRC_QUENCH) 37440691Skarels notify = tcp_quench; 37540691Skarels else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0) 3766591Ssam return; 37740691Skarels if (ip) { 37840691Skarels th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 37940691Skarels in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport, 38040691Skarels cmd, notify); 38140691Skarels } else 38240691Skarels in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify); 3835075Swnj } 38417359Skarels 38517359Skarels /* 38617359Skarels * When a source quench is received, close congestion window 38731442Skarels * to one segment. We will gradually open it again as we proceed. 38817359Skarels */ 38917359Skarels tcp_quench(inp) 39017359Skarels struct inpcb *inp; 39117359Skarels { 39217359Skarels struct tcpcb *tp = intotcpcb(inp); 39317359Skarels 39424818Skarels if (tp) 39531442Skarels tp->snd_cwnd = tp->t_maxseg; 39617359Skarels } 397