123191Smckusick /* 223191Smckusick * Copyright (c) 1982 Regents of the University of California. 323191Smckusick * All rights reserved. The Berkeley software License Agreement 423191Smckusick * specifies the terms and conditions for redistribution. 523191Smckusick * 6*25261Skarels * @(#)tcp_output.c 6.11 (Berkeley) 10/23/85 723191Smckusick */ 84677Swnj 917063Sbloom #include "param.h" 1017063Sbloom #include "systm.h" 1117063Sbloom #include "mbuf.h" 1217063Sbloom #include "protosw.h" 1317063Sbloom #include "socket.h" 1417063Sbloom #include "socketvar.h" 1517063Sbloom #include "errno.h" 1610895Ssam 1710895Ssam #include "../net/route.h" 1810895Ssam 1917063Sbloom #include "in.h" 2017063Sbloom #include "in_pcb.h" 2117063Sbloom #include "in_systm.h" 2217063Sbloom #include "ip.h" 2317063Sbloom #include "ip_var.h" 2417063Sbloom #include "tcp.h" 255088Swnj #define TCPOUTFLAGS 2617063Sbloom #include "tcp_fsm.h" 2717063Sbloom #include "tcp_seq.h" 2817063Sbloom #include "tcp_timer.h" 2917063Sbloom #include "tcp_var.h" 3017063Sbloom #include "tcpip.h" 3117063Sbloom #include "tcp_debug.h" 324677Swnj 334678Swnj /* 348314Sroot * Initial options. 355441Swnj */ 365441Swnj u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, }; 375441Swnj 385441Swnj /* 395245Sroot * Tcp output routine: figure out what should be sent and send it. 404678Swnj */ 415075Swnj tcp_output(tp) 425075Swnj register struct tcpcb *tp; 434678Swnj { 445075Swnj register struct socket *so = tp->t_inpcb->inp_socket; 455075Swnj register int len; 465075Swnj struct mbuf *m0; 476505Ssam int off, flags, win, error; 485075Swnj register struct mbuf *m; 495075Swnj register struct tcpiphdr *ti; 505441Swnj u_char *opt; 515441Swnj unsigned optlen = 0; 527125Swnj int sendalot; 534678Swnj 545075Swnj /* 556279Swnj * Determine length of data that should be transmitted, 565088Swnj * and flags that will be used. 575088Swnj * If there is some data or critical controls (SYN, RST) 585088Swnj * to send, then transmit; otherwise, investigate further. 595075Swnj */ 607125Swnj again: 617125Swnj sendalot = 0; 625075Swnj off = tp->snd_nxt - tp->snd_una; 6321116Skarels win = MIN(tp->snd_wnd, tp->snd_cwnd); 6421116Skarels /* 6521116Skarels * If in persist timeout with window of 0, send 1 byte. 6621116Skarels * Otherwise, window is small but nonzero 6721116Skarels * and timer expired, go to transmit state. 6821116Skarels */ 6921116Skarels if (tp->t_force) { 7021116Skarels if (win == 0) 7121116Skarels win = 1; 7221116Skarels else { 7321116Skarels tp->t_timer[TCPT_PERSIST] = 0; 7421116Skarels tp->t_rxtshift = 0; 7521116Skarels } 7621116Skarels } 7717361Skarels len = MIN(so->so_snd.sb_cc, win) - off; 785285Sroot if (len < 0) 796505Ssam return (0); /* ??? */ /* past FIN */ 807125Swnj if (len > tp->t_maxseg) { 815088Swnj len = tp->t_maxseg; 8217318Skarels /* 8321116Skarels * Don't send more than one segment if retransmitting 8421116Skarels * (or persisting, but then we shouldn't be here). 8517318Skarels */ 8617361Skarels if (tp->t_rxtshift == 0) 8717318Skarels sendalot = 1; 887125Swnj } 896279Swnj 9021116Skarels win = sbspace(&so->so_rcv); 915088Swnj flags = tcp_outflags[tp->t_state]; 925299Sroot if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc) 935163Swnj flags &= ~TH_FIN; 946279Swnj if (flags & (TH_SYN|TH_RST|TH_FIN)) 955075Swnj goto send; 966279Swnj if (SEQ_GT(tp->snd_up, tp->snd_una)) 976279Swnj goto send; 984678Swnj 995075Swnj /* 10017318Skarels * Sender silly window avoidance. If connection is idle 10117318Skarels * and can send all data, a maximum segment, 10217318Skarels * at least a maximum default-size segment do it, 1036279Swnj * or are forced, do it; otherwise don't bother. 104*25261Skarels * If peer's buffer is tiny, then send 105*25261Skarels * when window is at least half open. 10621116Skarels * If retransmitting (possibly after persist timer forced us 10721116Skarels * to send into a small window), then must resend. 1086279Swnj */ 1096279Swnj if (len) { 11017318Skarels if (len == tp->t_maxseg || len >= so->so_snd.sb_cc) /* off = 0*/ 1116279Swnj goto send; 11217318Skarels if (len >= TCP_MSS) /* a lot */ 1136279Swnj goto send; 1146279Swnj if (tp->t_force) 1156279Swnj goto send; 116*25261Skarels if (len >= tp->max_sndwnd / 2) 117*25261Skarels goto send; 11821116Skarels if (SEQ_LT(tp->snd_nxt, tp->snd_max)) 11921116Skarels goto send; 1206279Swnj } 1216279Swnj 1226279Swnj /* 1235285Sroot * Send if we owe peer an ACK. 1245075Swnj */ 1255441Swnj if (tp->t_flags&TF_ACKNOW) 1265075Swnj goto send; 1274678Swnj 1285441Swnj 1295441Swnj /* 13017361Skarels * Calculate available window, and also amount 1315075Swnj * of window known to peer (as advertised window less 13217361Skarels * next expected input.) If the difference is 35% or more of the 13317361Skarels * maximum possible window, then want to send a window update to peer. 1345075Swnj */ 1355088Swnj win = sbspace(&so->so_rcv); 1365088Swnj if (win > 0 && 1375088Swnj ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35)) 1385075Swnj goto send; 1394678Swnj 1405075Swnj /* 1417125Swnj * TCP window updates are not reliable, rather a polling protocol 1427125Swnj * using ``persist'' packets is used to insure receipt of window 1437125Swnj * updates. The three ``states'' for the output side are: 1447125Swnj * idle not doing retransmits or persists 1457125Swnj * persisting to move a zero window 1467125Swnj * (re)transmitting and thereby not persisting 1477125Swnj * 1487125Swnj * tp->t_timer[TCPT_PERSIST] 1497125Swnj * is set when we are in persist state. 1507125Swnj * tp->t_force 1517125Swnj * is set when we are called to send a persist packet. 1527125Swnj * tp->t_timer[TCPT_REXMT] 1537125Swnj * is set when we are retransmitting 1547125Swnj * The output side is idle when both timers are zero. 1557125Swnj * 15621116Skarels * If send window is too small, there is data to transmit, and no 15721116Skarels * retransmit or persist is pending, then go to persist state. 15821116Skarels * If nothing happens soon, send when timer expires: 15921116Skarels * if window is nonzero, transmit what we can, 16021116Skarels * otherwise force out a byte. 1617125Swnj */ 16221116Skarels if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && 16321116Skarels tp->t_timer[TCPT_PERSIST] == 0) { 1647125Swnj tp->t_rxtshift = 0; 1657125Swnj tcp_setpersist(tp); 1667125Swnj } 1677125Swnj 1687125Swnj /* 1695075Swnj * No reason to send a segment, just return. 1705075Swnj */ 1715110Swnj return (0); 1724678Swnj 1735075Swnj send: 1745075Swnj /* 1755075Swnj * Grab a header mbuf, attaching a copy of data to 1765075Swnj * be transmitted, and initialize the header from 1775075Swnj * the template for sends on this connection. 1785075Swnj */ 17911720Ssam MGET(m, M_DONTWAIT, MT_HEADER); 18011720Ssam if (m == NULL) 1816505Ssam return (ENOBUFS); 1825245Sroot m->m_off = MMAXOFF - sizeof (struct tcpiphdr); 1834885Swnj m->m_len = sizeof (struct tcpiphdr); 1845075Swnj if (len) { 1855075Swnj m->m_next = m_copy(so->so_snd.sb_mb, off, len); 1865075Swnj if (m->m_next == 0) 1875075Swnj len = 0; 1885075Swnj } 1895075Swnj ti = mtod(m, struct tcpiphdr *); 1905075Swnj if (tp->t_template == 0) 1915075Swnj panic("tcp_output"); 1925110Swnj bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr)); 1935075Swnj 1945075Swnj /* 1955075Swnj * Fill in fields, remembering maximum advertised 1965075Swnj * window for use in delaying messages about window sizes. 1975075Swnj */ 1985245Sroot ti->ti_seq = tp->snd_nxt; 1995245Sroot ti->ti_ack = tp->rcv_nxt; 2005245Sroot ti->ti_seq = htonl(ti->ti_seq); 2015245Sroot ti->ti_ack = htonl(ti->ti_ack); 2025441Swnj /* 2035441Swnj * Before ESTABLISHED, force sending of initial options 2045441Swnj * unless TCP set to not do any options. 2055441Swnj */ 2065441Swnj if (tp->t_state < TCPS_ESTABLISHED) { 20717273Skarels int mss; 20817273Skarels 2095441Swnj if (tp->t_flags&TF_NOOPT) 2105441Swnj goto noopt; 21117273Skarels mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp)); 21217273Skarels if (mss <= IP_MSS - sizeof(struct tcpiphdr)) 21317273Skarels goto noopt; 2145441Swnj opt = tcp_initopt; 2155441Swnj optlen = sizeof (tcp_initopt); 21617273Skarels *(u_short *)(opt + 2) = htons(mss); 2175441Swnj } else { 2185441Swnj if (tp->t_tcpopt == 0) 2195441Swnj goto noopt; 2205441Swnj opt = mtod(tp->t_tcpopt, u_char *); 2215441Swnj optlen = tp->t_tcpopt->m_len; 2225441Swnj } 2238314Sroot if (opt) { 2245110Swnj m0 = m->m_next; 2259643Ssam m->m_next = m_get(M_DONTWAIT, MT_DATA); 2265088Swnj if (m->m_next == 0) { 2275088Swnj (void) m_free(m); 2285441Swnj m_freem(m0); 2296505Ssam return (ENOBUFS); 2305088Swnj } 2315088Swnj m->m_next->m_next = m0; 2325441Swnj m0 = m->m_next; 2335441Swnj m0->m_len = optlen; 2346162Ssam bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen); 2355441Swnj opt = (u_char *)(mtod(m0, caddr_t) + optlen); 2365441Swnj while (m0->m_len & 0x3) { 2375441Swnj *opt++ = TCPOPT_EOL; 2385441Swnj m0->m_len++; 2395441Swnj } 2405441Swnj optlen = m0->m_len; 2415441Swnj ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; 2425088Swnj } 2435441Swnj noopt: 2445088Swnj ti->ti_flags = flags; 24521116Skarels if (win >= so->so_rcv.sb_hiwat / 4) /* avoid silly window */ 2465110Swnj ti->ti_win = htons((u_short)win); 2475088Swnj if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { 2485420Swnj ti->ti_urp = tp->snd_up - tp->snd_nxt; 2495420Swnj ti->ti_urp = htons(ti->ti_urp); 2505075Swnj ti->ti_flags |= TH_URG; 2515075Swnj } else 2525075Swnj /* 2535075Swnj * If no urgent pointer to send, then we pull 2545075Swnj * the urgent pointer to the left edge of the send window 2555075Swnj * so that it doesn't drift into the send window on sequence 2565075Swnj * number wraparound. 2575075Swnj */ 2585088Swnj tp->snd_up = tp->snd_una; /* drag it along */ 2597644Sroot /* 2607644Sroot * If anything to send and we can send it all, set PUSH. 2617644Sroot * (This will keep happy those implementations which only 26210143Ssam * give data to the user when a buffer fills or a PUSH comes in.) 2637644Sroot */ 2647644Sroot if (len && off+len == so->so_snd.sb_cc) 2657644Sroot ti->ti_flags |= TH_PUSH; 2665075Swnj 2675075Swnj /* 2685075Swnj * Put TCP length in extended header, and then 2695075Swnj * checksum extended header and data. 2705075Swnj */ 2715441Swnj if (len + optlen) { 2725441Swnj ti->ti_len = sizeof (struct tcphdr) + optlen + len; 2735441Swnj ti->ti_len = htons((u_short)ti->ti_len); 2745441Swnj } 2756162Ssam ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len); 2765075Swnj 2775075Swnj /* 2787125Swnj * In transmit state, time the transmission and arrange for 27921116Skarels * the retransmit. In persist state, just set snd_max. 2805088Swnj */ 28121116Skarels if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { 2827125Swnj /* 2837146Swnj * Advance snd_nxt over sequence space of this segment. 2847125Swnj */ 2857125Swnj if (flags & (TH_SYN|TH_FIN)) 2867125Swnj tp->snd_nxt++; 2877125Swnj tp->snd_nxt += len; 28815385Ssam if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { 2897149Swnj tp->snd_max = tp->snd_nxt; 29015385Ssam /* 29115385Ssam * Time this transmission if not a retransmission and 29215385Ssam * not currently timing anything. 29315385Ssam */ 29415385Ssam if (tp->t_rtt == 0) { 29515385Ssam tp->t_rtt = 1; 29615385Ssam tp->t_rtseq = tp->snd_nxt - len; 29715385Ssam } 2987125Swnj } 2995088Swnj 3007125Swnj /* 30121116Skarels * Set retransmit timer if not currently set, 30221116Skarels * and not doing a keep-alive probe. 3037125Swnj * Initial value for retransmit timer to tcp_beta*tp->t_srtt. 3047125Swnj * Initialize shift counter which is used for exponential 3057125Swnj * backoff of retransmit time. 3067125Swnj */ 3077125Swnj if (tp->t_timer[TCPT_REXMT] == 0 && 3087125Swnj tp->snd_nxt != tp->snd_una) { 3097125Swnj TCPT_RANGESET(tp->t_timer[TCPT_REXMT], 3107125Swnj tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); 3117125Swnj tp->t_rxtshift = 0; 3127125Swnj } 3137125Swnj tp->t_timer[TCPT_PERSIST] = 0; 3147149Swnj } else { 3157149Swnj if (SEQ_GT(tp->snd_una+1, tp->snd_max)) 3167149Swnj tp->snd_max = tp->snd_una+1; 3177146Swnj } 3185163Swnj 3195163Swnj /* 3205268Sroot * Trace. 3215268Sroot */ 3227146Swnj if (so->so_options & SO_DEBUG) 3235268Sroot tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); 3245268Sroot 3255268Sroot /* 3265075Swnj * Fill in IP length and desired time to live and 3275075Swnj * send to IP level. 3285075Swnj */ 3295441Swnj ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len; 3305075Swnj ((struct ip *)ti)->ip_ttl = TCP_TTL; 33112418Ssam if (so->so_options & SO_DONTROUTE) 33212765Ssam error = 33324817Skarels ip_output(m, tp->t_inpcb->inp_options, (struct route *)0, 33424817Skarels IP_ROUTETOIF); 33512418Ssam else 33624817Skarels error = ip_output(m, tp->t_inpcb->inp_options, 33724817Skarels &tp->t_inpcb->inp_route, 0); 33812418Ssam if (error) 3396505Ssam return (error); 3405075Swnj 3415075Swnj /* 3425075Swnj * Data sent (as far as we can tell). 3435075Swnj * If this advertises a larger window than any other segment, 3445245Sroot * then remember the size of the advertised window. 3455088Swnj * Drop send for purpose of ACK requirements. 3465075Swnj */ 3475252Sroot if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 3485075Swnj tp->rcv_adv = tp->rcv_nxt + win; 3495088Swnj tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); 3507125Swnj if (sendalot && tp->t_force == 0) 3517125Swnj goto again; 3526505Ssam return (0); 3534677Swnj } 3547125Swnj 3557125Swnj tcp_setpersist(tp) 3567125Swnj register struct tcpcb *tp; 3577125Swnj { 3587125Swnj 3597125Swnj if (tp->t_timer[TCPT_REXMT]) 3607125Swnj panic("tcp_output REXMT"); 3617125Swnj /* 3627125Swnj * Start/restart persistance timer. 3637125Swnj */ 3647125Swnj TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], 3657125Swnj ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift, 3667125Swnj TCPTV_PERSMIN, TCPTV_MAX); 3677125Swnj tp->t_rxtshift++; 3687125Swnj if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) 3697125Swnj tp->t_rxtshift = 0; 3707125Swnj } 371