123196Smckusick /* 263218Sbostic * Copyright (c) 1982, 1986, 1988, 1993 363218Sbostic * The Regents of the University of California. All rights reserved. 423196Smckusick * 544491Sbostic * %sccs.include.redist.c% 632789Sbostic * 7*68170Scgd * @(#)tcp_usrreq.c 8.3 (Berkeley) 01/09/95 823196Smckusick */ 94567Swnj 1056531Sbostic #include <sys/param.h> 1156531Sbostic #include <sys/systm.h> 1256531Sbostic #include <sys/malloc.h> 1356531Sbostic #include <sys/mbuf.h> 1456531Sbostic #include <sys/socket.h> 1556531Sbostic #include <sys/socketvar.h> 1656531Sbostic #include <sys/protosw.h> 1756531Sbostic #include <sys/errno.h> 1856531Sbostic #include <sys/stat.h> 198697Sroot 2056531Sbostic #include <net/if.h> 2156531Sbostic #include <net/route.h> 2210896Ssam 2356531Sbostic #include <netinet/in.h> 2456531Sbostic #include <netinet/in_systm.h> 2556531Sbostic #include <netinet/ip.h> 2656531Sbostic #include <netinet/in_pcb.h> 2756531Sbostic #include <netinet/ip_var.h> 2856531Sbostic #include <netinet/tcp.h> 2956531Sbostic #include <netinet/tcp_fsm.h> 3056531Sbostic #include <netinet/tcp_seq.h> 3156531Sbostic #include <netinet/tcp_timer.h> 3256531Sbostic #include <netinet/tcp_var.h> 3356531Sbostic #include <netinet/tcpip.h> 3456531Sbostic #include <netinet/tcp_debug.h> 354497Swnj 365280Sroot /* 375280Sroot * TCP protocol interface to socket abstraction. 385280Sroot */ 395280Sroot extern char *tcpstates[]; 405280Sroot 414734Swnj /* 425280Sroot * Process a TCP user request for TCP tb. If this is a send request 434731Swnj * then m is the mbuf chain of send data. If this is a timer expiration 444731Swnj * (called from the software clock routine), then timertype tells which timer. 454731Swnj */ 468601Sroot /*ARGSUSED*/ 4761335Sbostic int 4842184Skarels tcp_usrreq(so, req, m, nam, control) 494809Swnj struct socket *so; 504809Swnj int req; 5142184Skarels struct mbuf *m, *nam, *control; 524497Swnj { 5330909Skarels register struct inpcb *inp; 544911Swnj register struct tcpcb *tp; 5530909Skarels int s; 564809Swnj int error = 0; 575270Sroot int ostate; 584497Swnj 5930909Skarels if (req == PRU_CONTROL) 60*68170Scgd return (in_control(so, (u_long)m, (caddr_t)nam, 6142184Skarels (struct ifnet *)control)); 6242184Skarels if (control && control->m_len) { 6342184Skarels m_freem(control); 6442184Skarels if (m) 6542184Skarels m_freem(m); 6612766Ssam return (EINVAL); 6742184Skarels } 6830909Skarels 6930909Skarels s = splnet(); 7030909Skarels inp = sotoinpcb(so); 714886Swnj /* 725280Sroot * When a TCP is attached to a socket, then there will be 735280Sroot * a (struct inpcb) pointed at by the socket, and this 745280Sroot * structure will point at a subsidary (struct tcpcb). 754886Swnj */ 765089Swnj if (inp == 0 && req != PRU_ATTACH) { 775075Swnj splx(s); 785280Sroot return (EINVAL); /* XXX */ 795075Swnj } 805075Swnj if (inp) { 814911Swnj tp = intotcpcb(inp); 828272Sroot /* WHAT IF TP IS 0? */ 834731Swnj #ifdef KPROF 845075Swnj tcp_acounts[tp->t_state][req]++; 854731Swnj #endif 865270Sroot ostate = tp->t_state; 877511Sroot } else 887511Sroot ostate = 0; 894809Swnj switch (req) { 904497Swnj 915280Sroot /* 925280Sroot * TCP attaches to socket via PRU_ATTACH, reserving space, 938272Sroot * and an internet control block. 945280Sroot */ 954809Swnj case PRU_ATTACH: 964954Swnj if (inp) { 974809Swnj error = EISCONN; 984911Swnj break; 994886Swnj } 1008640Sroot error = tcp_attach(so); 1015075Swnj if (error) 1024954Swnj break; 10310397Ssam if ((so->so_options & SO_LINGER) && so->so_linger == 0) 1045392Swnj so->so_linger = TCP_LINGERTIME; 1055280Sroot tp = sototcpcb(so); 1064567Swnj break; 1074497Swnj 1085280Sroot /* 1095280Sroot * PRU_DETACH detaches the TCP protocol from the socket. 1105280Sroot * If the protocol state is non-embryonic, then can't 1115280Sroot * do this directly: have to initiate a PRU_DISCONNECT, 1125280Sroot * which may finish later; embryonic TCB's can just 1135280Sroot * be discarded here. 1145280Sroot */ 1154809Swnj case PRU_DETACH: 1165280Sroot if (tp->t_state > TCPS_LISTEN) 11710397Ssam tp = tcp_disconnect(tp); 11810397Ssam else 11910397Ssam tp = tcp_close(tp); 1204809Swnj break; 1214809Swnj 1225280Sroot /* 1238272Sroot * Give the socket an address. 1248272Sroot */ 1258272Sroot case PRU_BIND: 1268272Sroot error = in_pcbbind(inp, nam); 1278272Sroot if (error) 1288272Sroot break; 1298272Sroot break; 1308272Sroot 1318272Sroot /* 1328272Sroot * Prepare to accept connections. 1338272Sroot */ 1348272Sroot case PRU_LISTEN: 1358272Sroot if (inp->inp_lport == 0) 1368272Sroot error = in_pcbbind(inp, (struct mbuf *)0); 1378272Sroot if (error == 0) 1388272Sroot tp->t_state = TCPS_LISTEN; 1398272Sroot break; 1408272Sroot 1418272Sroot /* 1425280Sroot * Initiate connection to peer. 1435280Sroot * Create a template for use in transmissions on this connection. 1445280Sroot * Enter SYN_SENT state, and mark socket as connecting. 1455280Sroot * Start keep-alive timer, and seed output sequence space. 1465280Sroot * Send initial segment on connection. 1475280Sroot */ 1484809Swnj case PRU_CONNECT: 1498272Sroot if (inp->inp_lport == 0) { 1508272Sroot error = in_pcbbind(inp, (struct mbuf *)0); 1518272Sroot if (error) 1528272Sroot break; 1538272Sroot } 1548272Sroot error = in_pcbconnect(inp, nam); 1554954Swnj if (error) 1564886Swnj break; 1575174Swnj tp->t_template = tcp_template(tp); 1585280Sroot if (tp->t_template == 0) { 1595280Sroot in_pcbdisconnect(inp); 1605280Sroot error = ENOBUFS; 1615280Sroot break; 1625280Sroot } 16357433Sandrew /* Compute window scaling to request. */ 16457433Sandrew while (tp->request_r_scale < TCP_MAX_WINSHIFT && 16557433Sandrew (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 16657433Sandrew tp->request_r_scale++; 1674886Swnj soisconnecting(so); 16830527Skarels tcpstat.tcps_connattempt++; 1695075Swnj tp->t_state = TCPS_SYN_SENT; 17033747Skarels tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; 1715245Sroot tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; 1725245Sroot tcp_sendseqinit(tp); 1736506Ssam error = tcp_output(tp); 1744567Swnj break; 1754497Swnj 1765280Sroot /* 17713117Ssam * Create a TCP connection between two sockets. 17813117Ssam */ 17913117Ssam case PRU_CONNECT2: 18013117Ssam error = EOPNOTSUPP; 18113117Ssam break; 18213117Ssam 18313117Ssam /* 1845280Sroot * Initiate disconnect from peer. 1855280Sroot * If connection never passed embryonic stage, just drop; 1865280Sroot * else if don't need to let data drain, then can just drop anyways, 1875280Sroot * else have to begin TCP shutdown process: mark socket disconnecting, 1885280Sroot * drain unread data, state switch to reflect user close, and 1895280Sroot * send segment (e.g. FIN) to peer. Socket will be really disconnected 1905280Sroot * when peer sends FIN and acks ours. 1915280Sroot * 1925280Sroot * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 1935280Sroot */ 1945280Sroot case PRU_DISCONNECT: 19510397Ssam tp = tcp_disconnect(tp); 1965245Sroot break; 1975245Sroot 1985280Sroot /* 1995280Sroot * Accept a connection. Essentially all the work is 2005280Sroot * done at higher levels; just return the address 2015280Sroot * of the peer, storing through addr. 2025280Sroot */ 20358551Storek case PRU_ACCEPT: 20458551Storek in_setpeeraddr(inp, nam); 2058272Sroot break; 2064925Swnj 2075280Sroot /* 2085280Sroot * Mark the connection as being incapable of further output. 2095280Sroot */ 2104809Swnj case PRU_SHUTDOWN: 2115089Swnj socantsendmore(so); 21210397Ssam tp = tcp_usrclosed(tp); 21310397Ssam if (tp) 21410397Ssam error = tcp_output(tp); 2154567Swnj break; 2164497Swnj 2175280Sroot /* 2185280Sroot * After a receive, possibly send window update to peer. 2195280Sroot */ 2204809Swnj case PRU_RCVD: 2215113Swnj (void) tcp_output(tp); 2224567Swnj break; 2234497Swnj 2245280Sroot /* 2255280Sroot * Do a send by putting data in output queue and updating urgent 2265280Sroot * marker if URG set. Possibly send more data. 2275280Sroot */ 2284809Swnj case PRU_SEND: 2295075Swnj sbappend(&so->so_snd, m); 2306506Ssam error = tcp_output(tp); 2314567Swnj break; 2324567Swnj 2335280Sroot /* 2345280Sroot * Abort the TCP. 2355280Sroot */ 2364809Swnj case PRU_ABORT: 23710397Ssam tp = tcp_drop(tp, ECONNABORTED); 2384567Swnj break; 2394567Swnj 2405113Swnj case PRU_SENSE: 24116989Skarels ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 24230871Smckusick (void) splx(s); 24316989Skarels return (0); 2445113Swnj 2455113Swnj case PRU_RCVOOB: 24624821Skarels if ((so->so_oobmark == 0 && 24724821Skarels (so->so_state & SS_RCVATMARK) == 0) || 24827195Skarels so->so_options & SO_OOBINLINE || 24924821Skarels tp->t_oobflags & TCPOOB_HADDATA) { 2505417Swnj error = EINVAL; 2515417Swnj break; 2525417Swnj } 2535549Swnj if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 2545442Swnj error = EWOULDBLOCK; 2555549Swnj break; 2565442Swnj } 2578310Sroot m->m_len = 1; 2585549Swnj *mtod(m, caddr_t) = tp->t_iobc; 25924821Skarels if (((int)nam & MSG_PEEK) == 0) 26024821Skarels tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 2615113Swnj break; 2625113Swnj 2635113Swnj case PRU_SENDOOB: 2645442Swnj if (sbspace(&so->so_snd) < -512) { 26511229Ssam m_freem(m); 2665442Swnj error = ENOBUFS; 2675442Swnj break; 2685442Swnj } 26927195Skarels /* 27027195Skarels * According to RFC961 (Assigned Protocols), 27127195Skarels * the urgent pointer points to the last octet 27227195Skarels * of urgent data. We continue, however, 27327195Skarels * to consider it to indicate the first octet 27427195Skarels * of data past the urgent section. 27527195Skarels * Otherwise, snd_up should be one lower. 27627195Skarels */ 2775417Swnj sbappend(&so->so_snd, m); 27827195Skarels tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 2795549Swnj tp->t_force = 1; 2806506Ssam error = tcp_output(tp); 2815549Swnj tp->t_force = 0; 2825113Swnj break; 2835113Swnj 2846510Ssam case PRU_SOCKADDR: 2858272Sroot in_setsockaddr(inp, nam); 2866510Ssam break; 2876510Ssam 28814123Ssam case PRU_PEERADDR: 28914123Ssam in_setpeeraddr(inp, nam); 29014123Ssam break; 29114123Ssam 2925280Sroot /* 2935280Sroot * TCP slow timer went off; going through this 2945280Sroot * routine for tracing's sake. 2955280Sroot */ 2964809Swnj case PRU_SLOWTIMO: 29710397Ssam tp = tcp_timers(tp, (int)nam); 2988272Sroot req |= (int)nam << 8; /* for debug's sake */ 2994809Swnj break; 3004809Swnj 3014731Swnj default: 3024731Swnj panic("tcp_usrreq"); 3034567Swnj } 3045270Sroot if (tp && (so->so_options & SO_DEBUG)) 3055270Sroot tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req); 3064567Swnj splx(s); 3074886Swnj return (error); 3084497Swnj } 3095245Sroot 31061335Sbostic int 31125896Skarels tcp_ctloutput(op, so, level, optname, mp) 31224821Skarels int op; 31324821Skarels struct socket *so; 31424821Skarels int level, optname; 31525896Skarels struct mbuf **mp; 31624821Skarels { 31757433Sandrew int error = 0, s; 31857433Sandrew struct inpcb *inp; 31957433Sandrew register struct tcpcb *tp; 32025896Skarels register struct mbuf *m; 32155287Smckusick register int i; 32225896Skarels 32357433Sandrew s = splnet(); 32457433Sandrew inp = sotoinpcb(so); 32557433Sandrew if (inp == NULL) { 32657433Sandrew splx(s); 32765368Sbostic if (op == PRCO_SETOPT && *mp) 32865368Sbostic (void) m_free(*mp); 32957433Sandrew return (ECONNRESET); 33057433Sandrew } 33157433Sandrew if (level != IPPROTO_TCP) { 33257433Sandrew error = ip_ctloutput(op, so, level, optname, mp); 33357433Sandrew splx(s); 33457433Sandrew return (error); 33557433Sandrew } 33657433Sandrew tp = intotcpcb(inp); 33725896Skarels 33825896Skarels switch (op) { 33925896Skarels 34025896Skarels case PRCO_SETOPT: 34125896Skarels m = *mp; 34225896Skarels switch (optname) { 34325896Skarels 34425896Skarels case TCP_NODELAY: 34525896Skarels if (m == NULL || m->m_len < sizeof (int)) 34625896Skarels error = EINVAL; 34725896Skarels else if (*mtod(m, int *)) 34825896Skarels tp->t_flags |= TF_NODELAY; 34925896Skarels else 35025896Skarels tp->t_flags &= ~TF_NODELAY; 35125896Skarels break; 35225896Skarels 35355287Smckusick case TCP_MAXSEG: 35455287Smckusick if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg) 35555287Smckusick tp->t_maxseg = i; 35655287Smckusick else 35755287Smckusick error = EINVAL; 35855287Smckusick break; 35955287Smckusick 36025896Skarels default: 36165368Sbostic error = ENOPROTOOPT; 36225896Skarels break; 36325896Skarels } 36431041Ssam if (m) 36531041Ssam (void) m_free(m); 36625896Skarels break; 36725896Skarels 36825896Skarels case PRCO_GETOPT: 36925896Skarels *mp = m = m_get(M_WAIT, MT_SOOPTS); 37025896Skarels m->m_len = sizeof(int); 37125896Skarels 37225896Skarels switch (optname) { 37325896Skarels case TCP_NODELAY: 37425896Skarels *mtod(m, int *) = tp->t_flags & TF_NODELAY; 37525896Skarels break; 37625896Skarels case TCP_MAXSEG: 37725896Skarels *mtod(m, int *) = tp->t_maxseg; 37825896Skarels break; 37925896Skarels default: 38065368Sbostic error = ENOPROTOOPT; 38125896Skarels break; 38225896Skarels } 38325896Skarels break; 38425896Skarels } 38557433Sandrew splx(s); 38625896Skarels return (error); 38724821Skarels } 38824821Skarels 38955287Smckusick u_long tcp_sendspace = 1024*8; 39055287Smckusick u_long tcp_recvspace = 1024*8; 39137323Skarels 3925280Sroot /* 3935280Sroot * Attach TCP protocol to socket, allocating 3945280Sroot * internet protocol control block, tcp control block, 3955280Sroot * bufer space, and entering LISTEN state if to accept connections. 3965280Sroot */ 39761335Sbostic int 3988272Sroot tcp_attach(so) 3995280Sroot struct socket *so; 4005280Sroot { 4015280Sroot register struct tcpcb *tp; 4025280Sroot struct inpcb *inp; 4035280Sroot int error; 4045280Sroot 40534485Skarels if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 40634485Skarels error = soreserve(so, tcp_sendspace, tcp_recvspace); 40734485Skarels if (error) 40834485Skarels return (error); 40934485Skarels } 4107511Sroot error = in_pcballoc(so, &tcb); 4117511Sroot if (error) 41217047Skarels return (error); 4138272Sroot inp = sotoinpcb(so); 4145280Sroot tp = tcp_newtcpcb(inp); 4157511Sroot if (tp == 0) { 41617047Skarels int nofd = so->so_state & SS_NOFDREF; /* XXX */ 41717047Skarels 41817047Skarels so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 41917047Skarels in_pcbdetach(inp); 42017047Skarels so->so_state |= nofd; 42117047Skarels return (ENOBUFS); 4227511Sroot } 4238272Sroot tp->t_state = TCPS_CLOSED; 4245280Sroot return (0); 4255280Sroot } 4265280Sroot 4275280Sroot /* 4285280Sroot * Initiate (or continue) disconnect. 4295280Sroot * If embryonic state, just send reset (once). 43013221Ssam * If in ``let data drain'' option and linger null, just drop. 4315280Sroot * Otherwise (hard), mark socket disconnecting and drop 4325280Sroot * current input data; switch states based on user close, and 4335280Sroot * send segment to peer (with FIN). 4345280Sroot */ 43510397Ssam struct tcpcb * 4365280Sroot tcp_disconnect(tp) 43710397Ssam register struct tcpcb *tp; 4385280Sroot { 4395280Sroot struct socket *so = tp->t_inpcb->inp_socket; 4405280Sroot 4415280Sroot if (tp->t_state < TCPS_ESTABLISHED) 44210397Ssam tp = tcp_close(tp); 44313221Ssam else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 44410397Ssam tp = tcp_drop(tp, 0); 4455280Sroot else { 4465280Sroot soisdisconnecting(so); 4475280Sroot sbflush(&so->so_rcv); 44810397Ssam tp = tcp_usrclosed(tp); 44910397Ssam if (tp) 45010397Ssam (void) tcp_output(tp); 4515280Sroot } 45210397Ssam return (tp); 4535280Sroot } 4545280Sroot 4555280Sroot /* 4565280Sroot * User issued close, and wish to trail through shutdown states: 4575280Sroot * if never received SYN, just forget it. If got a SYN from peer, 4585280Sroot * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 4595280Sroot * If already got a FIN from peer, then almost done; go to LAST_ACK 4605280Sroot * state. In all other cases, have already sent FIN to peer (e.g. 4615280Sroot * after PRU_SHUTDOWN), and just have to play tedious game waiting 4625280Sroot * for peer to send FIN or not respond to keep-alives, etc. 4635897Swnj * We can let the user exit from the close as soon as the FIN is acked. 4645280Sroot */ 46510397Ssam struct tcpcb * 4665245Sroot tcp_usrclosed(tp) 46710397Ssam register struct tcpcb *tp; 4685245Sroot { 4695245Sroot 4705245Sroot switch (tp->t_state) { 4715245Sroot 47212438Ssam case TCPS_CLOSED: 4735245Sroot case TCPS_LISTEN: 4745245Sroot case TCPS_SYN_SENT: 4755245Sroot tp->t_state = TCPS_CLOSED; 47610397Ssam tp = tcp_close(tp); 4775245Sroot break; 4785245Sroot 4795245Sroot case TCPS_SYN_RECEIVED: 4805245Sroot case TCPS_ESTABLISHED: 4815245Sroot tp->t_state = TCPS_FIN_WAIT_1; 4825245Sroot break; 4835245Sroot 4845245Sroot case TCPS_CLOSE_WAIT: 4855245Sroot tp->t_state = TCPS_LAST_ACK; 4865245Sroot break; 4875245Sroot } 48810397Ssam if (tp && tp->t_state >= TCPS_FIN_WAIT_2) 4895897Swnj soisdisconnected(tp->t_inpcb->inp_socket); 49010397Ssam return (tp); 4915245Sroot } 492