123196Smckusick /* 2*69642Skarels * Copyright (c) 1982, 1986, 1988, 1993, 1995 363218Sbostic * The Regents of the University of California. All rights reserved. 423196Smckusick * 544491Sbostic * %sccs.include.redist.c% 632789Sbostic * 7*69642Skarels * @(#)tcp_usrreq.c 8.4 (Berkeley) 05/24/95 823196Smckusick */ 94567Swnj 1056531Sbostic #include <sys/param.h> 1156531Sbostic #include <sys/systm.h> 1256531Sbostic #include <sys/malloc.h> 1356531Sbostic #include <sys/mbuf.h> 1456531Sbostic #include <sys/socket.h> 1556531Sbostic #include <sys/socketvar.h> 1656531Sbostic #include <sys/protosw.h> 1756531Sbostic #include <sys/errno.h> 1856531Sbostic #include <sys/stat.h> 198697Sroot 2056531Sbostic #include <net/if.h> 2156531Sbostic #include <net/route.h> 2210896Ssam 2356531Sbostic #include <netinet/in.h> 2456531Sbostic #include <netinet/in_systm.h> 2556531Sbostic #include <netinet/ip.h> 2656531Sbostic #include <netinet/in_pcb.h> 2756531Sbostic #include <netinet/ip_var.h> 2856531Sbostic #include <netinet/tcp.h> 2956531Sbostic #include <netinet/tcp_fsm.h> 3056531Sbostic #include <netinet/tcp_seq.h> 3156531Sbostic #include <netinet/tcp_timer.h> 3256531Sbostic #include <netinet/tcp_var.h> 3356531Sbostic #include <netinet/tcpip.h> 3456531Sbostic #include <netinet/tcp_debug.h> 354497Swnj 365280Sroot /* 375280Sroot * TCP protocol interface to socket abstraction. 385280Sroot */ 395280Sroot extern char *tcpstates[]; 405280Sroot 414734Swnj /* 425280Sroot * Process a TCP user request for TCP tb. If this is a send request 434731Swnj * then m is the mbuf chain of send data. If this is a timer expiration 444731Swnj * (called from the software clock routine), then timertype tells which timer. 454731Swnj */ 468601Sroot /*ARGSUSED*/ 4761335Sbostic int 4842184Skarels tcp_usrreq(so, req, m, nam, control) 494809Swnj struct socket *so; 504809Swnj int req; 5142184Skarels struct mbuf *m, *nam, *control; 524497Swnj { 5330909Skarels register struct inpcb *inp; 544911Swnj register struct tcpcb *tp; 5530909Skarels int s; 564809Swnj int error = 0; 575270Sroot int ostate; 584497Swnj 5930909Skarels if (req == PRU_CONTROL) 6068170Scgd return (in_control(so, (u_long)m, (caddr_t)nam, 6142184Skarels (struct ifnet *)control)); 6242184Skarels if (control && control->m_len) { 6342184Skarels m_freem(control); 6442184Skarels if (m) 6542184Skarels m_freem(m); 6612766Ssam return (EINVAL); 6742184Skarels } 6830909Skarels 6930909Skarels s = splnet(); 7030909Skarels inp = sotoinpcb(so); 714886Swnj /* 725280Sroot * When a TCP is attached to a socket, then there will be 735280Sroot * a (struct inpcb) pointed at by the socket, and this 745280Sroot * structure will point at a subsidary (struct tcpcb). 754886Swnj */ 765089Swnj if (inp == 0 && req != PRU_ATTACH) { 775075Swnj splx(s); 78*69642Skarels #if 0 79*69642Skarels /* 80*69642Skarels * The following corrects an mbuf leak under rare 81*69642Skarels * circumstances, but has not been fully tested. 82*69642Skarels */ 83*69642Skarels if (m && req != PRU_SENSE) 84*69642Skarels m_freem(m); 85*69642Skarels #else 86*69642Skarels /* safer version of fix for mbuf leak */ 87*69642Skarels if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 88*69642Skarels m_freem(m); 89*69642Skarels #endif 905280Sroot return (EINVAL); /* XXX */ 915075Swnj } 925075Swnj if (inp) { 934911Swnj tp = intotcpcb(inp); 948272Sroot /* WHAT IF TP IS 0? */ 954731Swnj #ifdef KPROF 965075Swnj tcp_acounts[tp->t_state][req]++; 974731Swnj #endif 985270Sroot ostate = tp->t_state; 997511Sroot } else 1007511Sroot ostate = 0; 1014809Swnj switch (req) { 1024497Swnj 1035280Sroot /* 1045280Sroot * TCP attaches to socket via PRU_ATTACH, reserving space, 1058272Sroot * and an internet control block. 1065280Sroot */ 1074809Swnj case PRU_ATTACH: 1084954Swnj if (inp) { 1094809Swnj error = EISCONN; 1104911Swnj break; 1114886Swnj } 1128640Sroot error = tcp_attach(so); 1135075Swnj if (error) 1144954Swnj break; 11510397Ssam if ((so->so_options & SO_LINGER) && so->so_linger == 0) 1165392Swnj so->so_linger = TCP_LINGERTIME; 1175280Sroot tp = sototcpcb(so); 1184567Swnj break; 1194497Swnj 1205280Sroot /* 1215280Sroot * PRU_DETACH detaches the TCP protocol from the socket. 1225280Sroot * If the protocol state is non-embryonic, then can't 1235280Sroot * do this directly: have to initiate a PRU_DISCONNECT, 1245280Sroot * which may finish later; embryonic TCB's can just 1255280Sroot * be discarded here. 1265280Sroot */ 1274809Swnj case PRU_DETACH: 1285280Sroot if (tp->t_state > TCPS_LISTEN) 12910397Ssam tp = tcp_disconnect(tp); 13010397Ssam else 13110397Ssam tp = tcp_close(tp); 1324809Swnj break; 1334809Swnj 1345280Sroot /* 1358272Sroot * Give the socket an address. 1368272Sroot */ 1378272Sroot case PRU_BIND: 1388272Sroot error = in_pcbbind(inp, nam); 1398272Sroot if (error) 1408272Sroot break; 1418272Sroot break; 1428272Sroot 1438272Sroot /* 1448272Sroot * Prepare to accept connections. 1458272Sroot */ 1468272Sroot case PRU_LISTEN: 1478272Sroot if (inp->inp_lport == 0) 1488272Sroot error = in_pcbbind(inp, (struct mbuf *)0); 1498272Sroot if (error == 0) 1508272Sroot tp->t_state = TCPS_LISTEN; 1518272Sroot break; 1528272Sroot 1538272Sroot /* 1545280Sroot * Initiate connection to peer. 1555280Sroot * Create a template for use in transmissions on this connection. 1565280Sroot * Enter SYN_SENT state, and mark socket as connecting. 1575280Sroot * Start keep-alive timer, and seed output sequence space. 1585280Sroot * Send initial segment on connection. 1595280Sroot */ 1604809Swnj case PRU_CONNECT: 1618272Sroot if (inp->inp_lport == 0) { 1628272Sroot error = in_pcbbind(inp, (struct mbuf *)0); 1638272Sroot if (error) 1648272Sroot break; 1658272Sroot } 1668272Sroot error = in_pcbconnect(inp, nam); 1674954Swnj if (error) 1684886Swnj break; 1695174Swnj tp->t_template = tcp_template(tp); 1705280Sroot if (tp->t_template == 0) { 1715280Sroot in_pcbdisconnect(inp); 1725280Sroot error = ENOBUFS; 1735280Sroot break; 1745280Sroot } 17557433Sandrew /* Compute window scaling to request. */ 17657433Sandrew while (tp->request_r_scale < TCP_MAX_WINSHIFT && 17757433Sandrew (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 17857433Sandrew tp->request_r_scale++; 1794886Swnj soisconnecting(so); 18030527Skarels tcpstat.tcps_connattempt++; 1815075Swnj tp->t_state = TCPS_SYN_SENT; 18233747Skarels tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; 1835245Sroot tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; 1845245Sroot tcp_sendseqinit(tp); 1856506Ssam error = tcp_output(tp); 1864567Swnj break; 1874497Swnj 1885280Sroot /* 18913117Ssam * Create a TCP connection between two sockets. 19013117Ssam */ 19113117Ssam case PRU_CONNECT2: 19213117Ssam error = EOPNOTSUPP; 19313117Ssam break; 19413117Ssam 19513117Ssam /* 1965280Sroot * Initiate disconnect from peer. 1975280Sroot * If connection never passed embryonic stage, just drop; 1985280Sroot * else if don't need to let data drain, then can just drop anyways, 1995280Sroot * else have to begin TCP shutdown process: mark socket disconnecting, 2005280Sroot * drain unread data, state switch to reflect user close, and 2015280Sroot * send segment (e.g. FIN) to peer. Socket will be really disconnected 2025280Sroot * when peer sends FIN and acks ours. 2035280Sroot * 2045280Sroot * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 2055280Sroot */ 2065280Sroot case PRU_DISCONNECT: 20710397Ssam tp = tcp_disconnect(tp); 2085245Sroot break; 2095245Sroot 2105280Sroot /* 2115280Sroot * Accept a connection. Essentially all the work is 2125280Sroot * done at higher levels; just return the address 2135280Sroot * of the peer, storing through addr. 2145280Sroot */ 21558551Storek case PRU_ACCEPT: 21658551Storek in_setpeeraddr(inp, nam); 2178272Sroot break; 2184925Swnj 2195280Sroot /* 2205280Sroot * Mark the connection as being incapable of further output. 2215280Sroot */ 2224809Swnj case PRU_SHUTDOWN: 2235089Swnj socantsendmore(so); 22410397Ssam tp = tcp_usrclosed(tp); 22510397Ssam if (tp) 22610397Ssam error = tcp_output(tp); 2274567Swnj break; 2284497Swnj 2295280Sroot /* 2305280Sroot * After a receive, possibly send window update to peer. 2315280Sroot */ 2324809Swnj case PRU_RCVD: 2335113Swnj (void) tcp_output(tp); 2344567Swnj break; 2354497Swnj 2365280Sroot /* 2375280Sroot * Do a send by putting data in output queue and updating urgent 2385280Sroot * marker if URG set. Possibly send more data. 2395280Sroot */ 2404809Swnj case PRU_SEND: 2415075Swnj sbappend(&so->so_snd, m); 2426506Ssam error = tcp_output(tp); 2434567Swnj break; 2444567Swnj 2455280Sroot /* 2465280Sroot * Abort the TCP. 2475280Sroot */ 2484809Swnj case PRU_ABORT: 24910397Ssam tp = tcp_drop(tp, ECONNABORTED); 2504567Swnj break; 2514567Swnj 2525113Swnj case PRU_SENSE: 25316989Skarels ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 25430871Smckusick (void) splx(s); 25516989Skarels return (0); 2565113Swnj 2575113Swnj case PRU_RCVOOB: 25824821Skarels if ((so->so_oobmark == 0 && 25924821Skarels (so->so_state & SS_RCVATMARK) == 0) || 26027195Skarels so->so_options & SO_OOBINLINE || 26124821Skarels tp->t_oobflags & TCPOOB_HADDATA) { 2625417Swnj error = EINVAL; 2635417Swnj break; 2645417Swnj } 2655549Swnj if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 2665442Swnj error = EWOULDBLOCK; 2675549Swnj break; 2685442Swnj } 2698310Sroot m->m_len = 1; 2705549Swnj *mtod(m, caddr_t) = tp->t_iobc; 27124821Skarels if (((int)nam & MSG_PEEK) == 0) 27224821Skarels tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 2735113Swnj break; 2745113Swnj 2755113Swnj case PRU_SENDOOB: 2765442Swnj if (sbspace(&so->so_snd) < -512) { 27711229Ssam m_freem(m); 2785442Swnj error = ENOBUFS; 2795442Swnj break; 2805442Swnj } 28127195Skarels /* 28227195Skarels * According to RFC961 (Assigned Protocols), 28327195Skarels * the urgent pointer points to the last octet 28427195Skarels * of urgent data. We continue, however, 28527195Skarels * to consider it to indicate the first octet 28627195Skarels * of data past the urgent section. 28727195Skarels * Otherwise, snd_up should be one lower. 28827195Skarels */ 2895417Swnj sbappend(&so->so_snd, m); 29027195Skarels tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 2915549Swnj tp->t_force = 1; 2926506Ssam error = tcp_output(tp); 2935549Swnj tp->t_force = 0; 2945113Swnj break; 2955113Swnj 2966510Ssam case PRU_SOCKADDR: 2978272Sroot in_setsockaddr(inp, nam); 2986510Ssam break; 2996510Ssam 30014123Ssam case PRU_PEERADDR: 30114123Ssam in_setpeeraddr(inp, nam); 30214123Ssam break; 30314123Ssam 3045280Sroot /* 3055280Sroot * TCP slow timer went off; going through this 3065280Sroot * routine for tracing's sake. 3075280Sroot */ 3084809Swnj case PRU_SLOWTIMO: 30910397Ssam tp = tcp_timers(tp, (int)nam); 3108272Sroot req |= (int)nam << 8; /* for debug's sake */ 3114809Swnj break; 3124809Swnj 3134731Swnj default: 3144731Swnj panic("tcp_usrreq"); 3154567Swnj } 3165270Sroot if (tp && (so->so_options & SO_DEBUG)) 3175270Sroot tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req); 3184567Swnj splx(s); 3194886Swnj return (error); 3204497Swnj } 3215245Sroot 32261335Sbostic int 32325896Skarels tcp_ctloutput(op, so, level, optname, mp) 32424821Skarels int op; 32524821Skarels struct socket *so; 32624821Skarels int level, optname; 32725896Skarels struct mbuf **mp; 32824821Skarels { 32957433Sandrew int error = 0, s; 33057433Sandrew struct inpcb *inp; 33157433Sandrew register struct tcpcb *tp; 33225896Skarels register struct mbuf *m; 33355287Smckusick register int i; 33425896Skarels 33557433Sandrew s = splnet(); 33657433Sandrew inp = sotoinpcb(so); 33757433Sandrew if (inp == NULL) { 33857433Sandrew splx(s); 33965368Sbostic if (op == PRCO_SETOPT && *mp) 34065368Sbostic (void) m_free(*mp); 34157433Sandrew return (ECONNRESET); 34257433Sandrew } 34357433Sandrew if (level != IPPROTO_TCP) { 34457433Sandrew error = ip_ctloutput(op, so, level, optname, mp); 34557433Sandrew splx(s); 34657433Sandrew return (error); 34757433Sandrew } 34857433Sandrew tp = intotcpcb(inp); 34925896Skarels 35025896Skarels switch (op) { 35125896Skarels 35225896Skarels case PRCO_SETOPT: 35325896Skarels m = *mp; 35425896Skarels switch (optname) { 35525896Skarels 35625896Skarels case TCP_NODELAY: 35725896Skarels if (m == NULL || m->m_len < sizeof (int)) 35825896Skarels error = EINVAL; 35925896Skarels else if (*mtod(m, int *)) 36025896Skarels tp->t_flags |= TF_NODELAY; 36125896Skarels else 36225896Skarels tp->t_flags &= ~TF_NODELAY; 36325896Skarels break; 36425896Skarels 36555287Smckusick case TCP_MAXSEG: 36655287Smckusick if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg) 36755287Smckusick tp->t_maxseg = i; 36855287Smckusick else 36955287Smckusick error = EINVAL; 37055287Smckusick break; 37155287Smckusick 37225896Skarels default: 37365368Sbostic error = ENOPROTOOPT; 37425896Skarels break; 37525896Skarels } 37631041Ssam if (m) 37731041Ssam (void) m_free(m); 37825896Skarels break; 37925896Skarels 38025896Skarels case PRCO_GETOPT: 38125896Skarels *mp = m = m_get(M_WAIT, MT_SOOPTS); 38225896Skarels m->m_len = sizeof(int); 38325896Skarels 38425896Skarels switch (optname) { 38525896Skarels case TCP_NODELAY: 38625896Skarels *mtod(m, int *) = tp->t_flags & TF_NODELAY; 38725896Skarels break; 38825896Skarels case TCP_MAXSEG: 38925896Skarels *mtod(m, int *) = tp->t_maxseg; 39025896Skarels break; 39125896Skarels default: 39265368Sbostic error = ENOPROTOOPT; 39325896Skarels break; 39425896Skarels } 39525896Skarels break; 39625896Skarels } 39757433Sandrew splx(s); 39825896Skarels return (error); 39924821Skarels } 40024821Skarels 40155287Smckusick u_long tcp_sendspace = 1024*8; 40255287Smckusick u_long tcp_recvspace = 1024*8; 40337323Skarels 4045280Sroot /* 4055280Sroot * Attach TCP protocol to socket, allocating 4065280Sroot * internet protocol control block, tcp control block, 4075280Sroot * bufer space, and entering LISTEN state if to accept connections. 4085280Sroot */ 40961335Sbostic int 4108272Sroot tcp_attach(so) 4115280Sroot struct socket *so; 4125280Sroot { 4135280Sroot register struct tcpcb *tp; 4145280Sroot struct inpcb *inp; 4155280Sroot int error; 4165280Sroot 41734485Skarels if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 41834485Skarels error = soreserve(so, tcp_sendspace, tcp_recvspace); 41934485Skarels if (error) 42034485Skarels return (error); 42134485Skarels } 4227511Sroot error = in_pcballoc(so, &tcb); 4237511Sroot if (error) 42417047Skarels return (error); 4258272Sroot inp = sotoinpcb(so); 4265280Sroot tp = tcp_newtcpcb(inp); 4277511Sroot if (tp == 0) { 42817047Skarels int nofd = so->so_state & SS_NOFDREF; /* XXX */ 42917047Skarels 43017047Skarels so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 43117047Skarels in_pcbdetach(inp); 43217047Skarels so->so_state |= nofd; 43317047Skarels return (ENOBUFS); 4347511Sroot } 4358272Sroot tp->t_state = TCPS_CLOSED; 4365280Sroot return (0); 4375280Sroot } 4385280Sroot 4395280Sroot /* 4405280Sroot * Initiate (or continue) disconnect. 4415280Sroot * If embryonic state, just send reset (once). 44213221Ssam * If in ``let data drain'' option and linger null, just drop. 4435280Sroot * Otherwise (hard), mark socket disconnecting and drop 4445280Sroot * current input data; switch states based on user close, and 4455280Sroot * send segment to peer (with FIN). 4465280Sroot */ 44710397Ssam struct tcpcb * 4485280Sroot tcp_disconnect(tp) 44910397Ssam register struct tcpcb *tp; 4505280Sroot { 4515280Sroot struct socket *so = tp->t_inpcb->inp_socket; 4525280Sroot 4535280Sroot if (tp->t_state < TCPS_ESTABLISHED) 45410397Ssam tp = tcp_close(tp); 45513221Ssam else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 45610397Ssam tp = tcp_drop(tp, 0); 4575280Sroot else { 4585280Sroot soisdisconnecting(so); 4595280Sroot sbflush(&so->so_rcv); 46010397Ssam tp = tcp_usrclosed(tp); 46110397Ssam if (tp) 46210397Ssam (void) tcp_output(tp); 4635280Sroot } 46410397Ssam return (tp); 4655280Sroot } 4665280Sroot 4675280Sroot /* 4685280Sroot * User issued close, and wish to trail through shutdown states: 4695280Sroot * if never received SYN, just forget it. If got a SYN from peer, 4705280Sroot * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 4715280Sroot * If already got a FIN from peer, then almost done; go to LAST_ACK 4725280Sroot * state. In all other cases, have already sent FIN to peer (e.g. 4735280Sroot * after PRU_SHUTDOWN), and just have to play tedious game waiting 4745280Sroot * for peer to send FIN or not respond to keep-alives, etc. 4755897Swnj * We can let the user exit from the close as soon as the FIN is acked. 4765280Sroot */ 47710397Ssam struct tcpcb * 4785245Sroot tcp_usrclosed(tp) 47910397Ssam register struct tcpcb *tp; 4805245Sroot { 4815245Sroot 4825245Sroot switch (tp->t_state) { 4835245Sroot 48412438Ssam case TCPS_CLOSED: 4855245Sroot case TCPS_LISTEN: 4865245Sroot case TCPS_SYN_SENT: 4875245Sroot tp->t_state = TCPS_CLOSED; 48810397Ssam tp = tcp_close(tp); 4895245Sroot break; 4905245Sroot 4915245Sroot case TCPS_SYN_RECEIVED: 4925245Sroot case TCPS_ESTABLISHED: 4935245Sroot tp->t_state = TCPS_FIN_WAIT_1; 4945245Sroot break; 4955245Sroot 4965245Sroot case TCPS_CLOSE_WAIT: 4975245Sroot tp->t_state = TCPS_LAST_ACK; 4985245Sroot break; 4995245Sroot } 50010397Ssam if (tp && tp->t_state >= TCPS_FIN_WAIT_2) 5015897Swnj soisdisconnected(tp->t_inpcb->inp_socket); 50210397Ssam return (tp); 5035245Sroot } 504