1*21116Skarels /* tcp_output.c 6.8 85/05/27 */ 24677Swnj 317063Sbloom #include "param.h" 417063Sbloom #include "systm.h" 517063Sbloom #include "mbuf.h" 617063Sbloom #include "protosw.h" 717063Sbloom #include "socket.h" 817063Sbloom #include "socketvar.h" 917063Sbloom #include "errno.h" 1010895Ssam 1110895Ssam #include "../net/route.h" 1210895Ssam 1317063Sbloom #include "in.h" 1417063Sbloom #include "in_pcb.h" 1517063Sbloom #include "in_systm.h" 1617063Sbloom #include "ip.h" 1717063Sbloom #include "ip_var.h" 1817063Sbloom #include "tcp.h" 195088Swnj #define TCPOUTFLAGS 2017063Sbloom #include "tcp_fsm.h" 2117063Sbloom #include "tcp_seq.h" 2217063Sbloom #include "tcp_timer.h" 2317063Sbloom #include "tcp_var.h" 2417063Sbloom #include "tcpip.h" 2517063Sbloom #include "tcp_debug.h" 264677Swnj 274678Swnj /* 288314Sroot * Initial options. 295441Swnj */ 305441Swnj u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, }; 315441Swnj 325441Swnj /* 335245Sroot * Tcp output routine: figure out what should be sent and send it. 344678Swnj */ 355075Swnj tcp_output(tp) 365075Swnj register struct tcpcb *tp; 374678Swnj { 385075Swnj register struct socket *so = tp->t_inpcb->inp_socket; 395075Swnj register int len; 405075Swnj struct mbuf *m0; 416505Ssam int off, flags, win, error; 425075Swnj register struct mbuf *m; 435075Swnj register struct tcpiphdr *ti; 445441Swnj u_char *opt; 455441Swnj unsigned optlen = 0; 467125Swnj int sendalot; 474678Swnj 485075Swnj /* 496279Swnj * Determine length of data that should be transmitted, 505088Swnj * and flags that will be used. 515088Swnj * If there is some data or critical controls (SYN, RST) 525088Swnj * to send, then transmit; otherwise, investigate further. 535075Swnj */ 547125Swnj again: 557125Swnj sendalot = 0; 565075Swnj off = tp->snd_nxt - tp->snd_una; 57*21116Skarels win = MIN(tp->snd_wnd, tp->snd_cwnd); 58*21116Skarels /* 59*21116Skarels * If in persist timeout with window of 0, send 1 byte. 60*21116Skarels * Otherwise, window is small but nonzero 61*21116Skarels * and timer expired, go to transmit state. 62*21116Skarels */ 63*21116Skarels if (tp->t_force) { 64*21116Skarels if (win == 0) 65*21116Skarels win = 1; 66*21116Skarels else { 67*21116Skarels tp->t_timer[TCPT_PERSIST] = 0; 68*21116Skarels tp->t_rxtshift = 0; 69*21116Skarels } 70*21116Skarels } 7117361Skarels len = MIN(so->so_snd.sb_cc, win) - off; 725285Sroot if (len < 0) 736505Ssam return (0); /* ??? */ /* past FIN */ 747125Swnj if (len > tp->t_maxseg) { 755088Swnj len = tp->t_maxseg; 7617318Skarels /* 77*21116Skarels * Don't send more than one segment if retransmitting 78*21116Skarels * (or persisting, but then we shouldn't be here). 7917318Skarels */ 8017361Skarels if (tp->t_rxtshift == 0) 8117318Skarels sendalot = 1; 827125Swnj } 836279Swnj 84*21116Skarels win = sbspace(&so->so_rcv); 855088Swnj flags = tcp_outflags[tp->t_state]; 865299Sroot if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc) 875163Swnj flags &= ~TH_FIN; 886279Swnj if (flags & (TH_SYN|TH_RST|TH_FIN)) 895075Swnj goto send; 906279Swnj if (SEQ_GT(tp->snd_up, tp->snd_una)) 916279Swnj goto send; 924678Swnj 935075Swnj /* 9417318Skarels * Sender silly window avoidance. If connection is idle 9517318Skarels * and can send all data, a maximum segment, 9617318Skarels * at least a maximum default-size segment do it, 976279Swnj * or are forced, do it; otherwise don't bother. 98*21116Skarels * If retransmitting (possibly after persist timer forced us 99*21116Skarels * to send into a small window), then must resend. 1006279Swnj */ 1016279Swnj if (len) { 10217318Skarels if (len == tp->t_maxseg || len >= so->so_snd.sb_cc) /* off = 0*/ 1036279Swnj goto send; 10417318Skarels if (len >= TCP_MSS) /* a lot */ 1056279Swnj goto send; 1066279Swnj if (tp->t_force) 1076279Swnj goto send; 108*21116Skarels if (SEQ_LT(tp->snd_nxt, tp->snd_max)) 109*21116Skarels goto send; 1106279Swnj } 1116279Swnj 1126279Swnj /* 1135285Sroot * Send if we owe peer an ACK. 1145075Swnj */ 1155441Swnj if (tp->t_flags&TF_ACKNOW) 1165075Swnj goto send; 1174678Swnj 1185441Swnj 1195441Swnj /* 12017361Skarels * Calculate available window, and also amount 1215075Swnj * of window known to peer (as advertised window less 12217361Skarels * next expected input.) If the difference is 35% or more of the 12317361Skarels * maximum possible window, then want to send a window update to peer. 1245075Swnj */ 1255088Swnj win = sbspace(&so->so_rcv); 1265088Swnj if (win > 0 && 1275088Swnj ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35)) 1285075Swnj goto send; 1294678Swnj 1305075Swnj /* 1317125Swnj * TCP window updates are not reliable, rather a polling protocol 1327125Swnj * using ``persist'' packets is used to insure receipt of window 1337125Swnj * updates. The three ``states'' for the output side are: 1347125Swnj * idle not doing retransmits or persists 1357125Swnj * persisting to move a zero window 1367125Swnj * (re)transmitting and thereby not persisting 1377125Swnj * 1387125Swnj * tp->t_timer[TCPT_PERSIST] 1397125Swnj * is set when we are in persist state. 1407125Swnj * tp->t_force 1417125Swnj * is set when we are called to send a persist packet. 1427125Swnj * tp->t_timer[TCPT_REXMT] 1437125Swnj * is set when we are retransmitting 1447125Swnj * The output side is idle when both timers are zero. 1457125Swnj * 146*21116Skarels * If send window is too small, there is data to transmit, and no 147*21116Skarels * retransmit or persist is pending, then go to persist state. 148*21116Skarels * If nothing happens soon, send when timer expires: 149*21116Skarels * if window is nonzero, transmit what we can, 150*21116Skarels * otherwise force out a byte. 1517125Swnj */ 152*21116Skarels if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && 153*21116Skarels tp->t_timer[TCPT_PERSIST] == 0) { 1547125Swnj tp->t_rxtshift = 0; 1557125Swnj tcp_setpersist(tp); 1567125Swnj } 1577125Swnj 1587125Swnj /* 1595075Swnj * No reason to send a segment, just return. 1605075Swnj */ 1615110Swnj return (0); 1624678Swnj 1635075Swnj send: 1645075Swnj /* 1655075Swnj * Grab a header mbuf, attaching a copy of data to 1665075Swnj * be transmitted, and initialize the header from 1675075Swnj * the template for sends on this connection. 1685075Swnj */ 16911720Ssam MGET(m, M_DONTWAIT, MT_HEADER); 17011720Ssam if (m == NULL) 1716505Ssam return (ENOBUFS); 1725245Sroot m->m_off = MMAXOFF - sizeof (struct tcpiphdr); 1734885Swnj m->m_len = sizeof (struct tcpiphdr); 1745075Swnj if (len) { 1755075Swnj m->m_next = m_copy(so->so_snd.sb_mb, off, len); 1765075Swnj if (m->m_next == 0) 1775075Swnj len = 0; 1785075Swnj } 1795075Swnj ti = mtod(m, struct tcpiphdr *); 1805075Swnj if (tp->t_template == 0) 1815075Swnj panic("tcp_output"); 1825110Swnj bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr)); 1835075Swnj 1845075Swnj /* 1855075Swnj * Fill in fields, remembering maximum advertised 1865075Swnj * window for use in delaying messages about window sizes. 1875075Swnj */ 1885245Sroot ti->ti_seq = tp->snd_nxt; 1895245Sroot ti->ti_ack = tp->rcv_nxt; 1905245Sroot ti->ti_seq = htonl(ti->ti_seq); 1915245Sroot ti->ti_ack = htonl(ti->ti_ack); 1925441Swnj /* 1935441Swnj * Before ESTABLISHED, force sending of initial options 1945441Swnj * unless TCP set to not do any options. 1955441Swnj */ 1965441Swnj if (tp->t_state < TCPS_ESTABLISHED) { 19717273Skarels int mss; 19817273Skarels 1995441Swnj if (tp->t_flags&TF_NOOPT) 2005441Swnj goto noopt; 20117273Skarels mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp)); 20217273Skarels if (mss <= IP_MSS - sizeof(struct tcpiphdr)) 20317273Skarels goto noopt; 2045441Swnj opt = tcp_initopt; 2055441Swnj optlen = sizeof (tcp_initopt); 20617273Skarels *(u_short *)(opt + 2) = htons(mss); 2075441Swnj } else { 2085441Swnj if (tp->t_tcpopt == 0) 2095441Swnj goto noopt; 2105441Swnj opt = mtod(tp->t_tcpopt, u_char *); 2115441Swnj optlen = tp->t_tcpopt->m_len; 2125441Swnj } 2138314Sroot if (opt) { 2145110Swnj m0 = m->m_next; 2159643Ssam m->m_next = m_get(M_DONTWAIT, MT_DATA); 2165088Swnj if (m->m_next == 0) { 2175088Swnj (void) m_free(m); 2185441Swnj m_freem(m0); 2196505Ssam return (ENOBUFS); 2205088Swnj } 2215088Swnj m->m_next->m_next = m0; 2225441Swnj m0 = m->m_next; 2235441Swnj m0->m_len = optlen; 2246162Ssam bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen); 2255441Swnj opt = (u_char *)(mtod(m0, caddr_t) + optlen); 2265441Swnj while (m0->m_len & 0x3) { 2275441Swnj *opt++ = TCPOPT_EOL; 2285441Swnj m0->m_len++; 2295441Swnj } 2305441Swnj optlen = m0->m_len; 2315441Swnj ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; 2325088Swnj } 2335441Swnj noopt: 2345088Swnj ti->ti_flags = flags; 235*21116Skarels if (win >= so->so_rcv.sb_hiwat / 4) /* avoid silly window */ 2365110Swnj ti->ti_win = htons((u_short)win); 2375088Swnj if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { 2385420Swnj ti->ti_urp = tp->snd_up - tp->snd_nxt; 2395420Swnj ti->ti_urp = htons(ti->ti_urp); 2405075Swnj ti->ti_flags |= TH_URG; 2415075Swnj } else 2425075Swnj /* 2435075Swnj * If no urgent pointer to send, then we pull 2445075Swnj * the urgent pointer to the left edge of the send window 2455075Swnj * so that it doesn't drift into the send window on sequence 2465075Swnj * number wraparound. 2475075Swnj */ 2485088Swnj tp->snd_up = tp->snd_una; /* drag it along */ 2497644Sroot /* 2507644Sroot * If anything to send and we can send it all, set PUSH. 2517644Sroot * (This will keep happy those implementations which only 25210143Ssam * give data to the user when a buffer fills or a PUSH comes in.) 2537644Sroot */ 2547644Sroot if (len && off+len == so->so_snd.sb_cc) 2557644Sroot ti->ti_flags |= TH_PUSH; 2565075Swnj 2575075Swnj /* 2585075Swnj * Put TCP length in extended header, and then 2595075Swnj * checksum extended header and data. 2605075Swnj */ 2615441Swnj if (len + optlen) { 2625441Swnj ti->ti_len = sizeof (struct tcphdr) + optlen + len; 2635441Swnj ti->ti_len = htons((u_short)ti->ti_len); 2645441Swnj } 2656162Ssam ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len); 2665075Swnj 2675075Swnj /* 2687125Swnj * In transmit state, time the transmission and arrange for 269*21116Skarels * the retransmit. In persist state, just set snd_max. 2705088Swnj */ 271*21116Skarels if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { 2727125Swnj /* 2737146Swnj * Advance snd_nxt over sequence space of this segment. 2747125Swnj */ 2757125Swnj if (flags & (TH_SYN|TH_FIN)) 2767125Swnj tp->snd_nxt++; 2777125Swnj tp->snd_nxt += len; 27815385Ssam if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { 2797149Swnj tp->snd_max = tp->snd_nxt; 28015385Ssam /* 28115385Ssam * Time this transmission if not a retransmission and 28215385Ssam * not currently timing anything. 28315385Ssam */ 28415385Ssam if (tp->t_rtt == 0) { 28515385Ssam tp->t_rtt = 1; 28615385Ssam tp->t_rtseq = tp->snd_nxt - len; 28715385Ssam } 2887125Swnj } 2895088Swnj 2907125Swnj /* 291*21116Skarels * Set retransmit timer if not currently set, 292*21116Skarels * and not doing a keep-alive probe. 2937125Swnj * Initial value for retransmit timer to tcp_beta*tp->t_srtt. 2947125Swnj * Initialize shift counter which is used for exponential 2957125Swnj * backoff of retransmit time. 2967125Swnj */ 2977125Swnj if (tp->t_timer[TCPT_REXMT] == 0 && 2987125Swnj tp->snd_nxt != tp->snd_una) { 2997125Swnj TCPT_RANGESET(tp->t_timer[TCPT_REXMT], 3007125Swnj tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); 3017125Swnj tp->t_rxtshift = 0; 3027125Swnj } 3037125Swnj tp->t_timer[TCPT_PERSIST] = 0; 3047149Swnj } else { 3057149Swnj if (SEQ_GT(tp->snd_una+1, tp->snd_max)) 3067149Swnj tp->snd_max = tp->snd_una+1; 3077146Swnj } 3085163Swnj 3095163Swnj /* 3105268Sroot * Trace. 3115268Sroot */ 3127146Swnj if (so->so_options & SO_DEBUG) 3135268Sroot tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); 3145268Sroot 3155268Sroot /* 3165075Swnj * Fill in IP length and desired time to live and 3175075Swnj * send to IP level. 3185075Swnj */ 3195441Swnj ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len; 3205075Swnj ((struct ip *)ti)->ip_ttl = TCP_TTL; 32112418Ssam if (so->so_options & SO_DONTROUTE) 32212765Ssam error = 32312765Ssam ip_output(m, tp->t_ipopt, (struct route *)0, IP_ROUTETOIF); 32412418Ssam else 32512418Ssam error = ip_output(m, tp->t_ipopt, &tp->t_inpcb->inp_route, 0); 32612418Ssam if (error) 3276505Ssam return (error); 3285075Swnj 3295075Swnj /* 3305075Swnj * Data sent (as far as we can tell). 3315075Swnj * If this advertises a larger window than any other segment, 3325245Sroot * then remember the size of the advertised window. 3335088Swnj * Drop send for purpose of ACK requirements. 3345075Swnj */ 3355252Sroot if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 3365075Swnj tp->rcv_adv = tp->rcv_nxt + win; 3375088Swnj tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); 3387125Swnj if (sendalot && tp->t_force == 0) 3397125Swnj goto again; 3406505Ssam return (0); 3414677Swnj } 3427125Swnj 3437125Swnj tcp_setpersist(tp) 3447125Swnj register struct tcpcb *tp; 3457125Swnj { 3467125Swnj 3477125Swnj if (tp->t_timer[TCPT_REXMT]) 3487125Swnj panic("tcp_output REXMT"); 3497125Swnj /* 3507125Swnj * Start/restart persistance timer. 3517125Swnj */ 3527125Swnj TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], 3537125Swnj ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift, 3547125Swnj TCPTV_PERSMIN, TCPTV_MAX); 3557125Swnj tp->t_rxtshift++; 3567125Swnj if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) 3577125Swnj tp->t_rxtshift = 0; 3587125Swnj } 359