1*17318Skarels /* tcp_output.c 6.6 84/11/01 */ 24677Swnj 317063Sbloom #include "param.h" 417063Sbloom #include "systm.h" 517063Sbloom #include "mbuf.h" 617063Sbloom #include "protosw.h" 717063Sbloom #include "socket.h" 817063Sbloom #include "socketvar.h" 917063Sbloom #include "errno.h" 1010895Ssam 1110895Ssam #include "../net/route.h" 1210895Ssam 1317063Sbloom #include "in.h" 1417063Sbloom #include "in_pcb.h" 1517063Sbloom #include "in_systm.h" 1617063Sbloom #include "ip.h" 1717063Sbloom #include "ip_var.h" 1817063Sbloom #include "tcp.h" 195088Swnj #define TCPOUTFLAGS 2017063Sbloom #include "tcp_fsm.h" 2117063Sbloom #include "tcp_seq.h" 2217063Sbloom #include "tcp_timer.h" 2317063Sbloom #include "tcp_var.h" 2417063Sbloom #include "tcpip.h" 2517063Sbloom #include "tcp_debug.h" 264677Swnj 274678Swnj /* 288314Sroot * Initial options. 295441Swnj */ 305441Swnj u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, }; 315441Swnj 325441Swnj /* 335245Sroot * Tcp output routine: figure out what should be sent and send it. 344678Swnj */ 355075Swnj tcp_output(tp) 365075Swnj register struct tcpcb *tp; 374678Swnj { 385075Swnj register struct socket *so = tp->t_inpcb->inp_socket; 395075Swnj register int len; 405075Swnj struct mbuf *m0; 416505Ssam int off, flags, win, error; 425075Swnj register struct mbuf *m; 435075Swnj register struct tcpiphdr *ti; 445441Swnj u_char *opt; 455441Swnj unsigned optlen = 0; 467125Swnj int sendalot; 474678Swnj 485075Swnj /* 496279Swnj * Determine length of data that should be transmitted, 505088Swnj * and flags that will be used. 515088Swnj * If there is some data or critical controls (SYN, RST) 525088Swnj * to send, then transmit; otherwise, investigate further. 535075Swnj */ 547125Swnj again: 557125Swnj sendalot = 0; 565075Swnj off = tp->snd_nxt - tp->snd_una; 575163Swnj len = MIN(so->so_snd.sb_cc, tp->snd_wnd+tp->t_force) - off; 585285Sroot if (len < 0) 596505Ssam return (0); /* ??? */ /* past FIN */ 607125Swnj if (len > tp->t_maxseg) { 615088Swnj len = tp->t_maxseg; 62*17318Skarels /* 63*17318Skarels * Don't send more than one segment if retransmitting. 64*17318Skarels */ 65*17318Skarels if (SEQ_GT(tp->snd_nxt, tp->snd_max)) 66*17318Skarels sendalot = 1; 677125Swnj } 686279Swnj 695088Swnj flags = tcp_outflags[tp->t_state]; 705299Sroot if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc) 715163Swnj flags &= ~TH_FIN; 726279Swnj if (flags & (TH_SYN|TH_RST|TH_FIN)) 735075Swnj goto send; 746279Swnj if (SEQ_GT(tp->snd_up, tp->snd_una)) 756279Swnj goto send; 764678Swnj 775075Swnj /* 78*17318Skarels * Sender silly window avoidance. If connection is idle 79*17318Skarels * and can send all data, a maximum segment, 80*17318Skarels * at least a maximum default-size segment do it, 816279Swnj * or are forced, do it; otherwise don't bother. 826279Swnj */ 836279Swnj if (len) { 84*17318Skarels if (len == tp->t_maxseg || len >= so->so_snd.sb_cc) /* off = 0*/ 856279Swnj goto send; 86*17318Skarels if (len >= TCP_MSS) /* a lot */ 876279Swnj goto send; 886279Swnj if (tp->t_force) 896279Swnj goto send; 906279Swnj } 916279Swnj 926279Swnj /* 935285Sroot * Send if we owe peer an ACK. 945075Swnj */ 955441Swnj if (tp->t_flags&TF_ACKNOW) 965075Swnj goto send; 974678Swnj 985441Swnj 995441Swnj /* 1005075Swnj * Calculate available window in i, and also amount 1015075Swnj * of window known to peer (as advertised window less 1025075Swnj * next expected input.) If this is 35% or more of the 1035075Swnj * maximum possible window, then want to send a segment to peer. 1045075Swnj */ 1055088Swnj win = sbspace(&so->so_rcv); 1065088Swnj if (win > 0 && 1075088Swnj ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35)) 1085075Swnj goto send; 1094678Swnj 1105075Swnj /* 1117125Swnj * TCP window updates are not reliable, rather a polling protocol 1127125Swnj * using ``persist'' packets is used to insure receipt of window 1137125Swnj * updates. The three ``states'' for the output side are: 1147125Swnj * idle not doing retransmits or persists 1157125Swnj * persisting to move a zero window 1167125Swnj * (re)transmitting and thereby not persisting 1177125Swnj * 1187125Swnj * tp->t_timer[TCPT_PERSIST] 1197125Swnj * is set when we are in persist state. 1207125Swnj * tp->t_force 1217125Swnj * is set when we are called to send a persist packet. 1227125Swnj * tp->t_timer[TCPT_REXMT] 1237125Swnj * is set when we are retransmitting 1247125Swnj * The output side is idle when both timers are zero. 1257125Swnj * 1267125Swnj * If send window is closed, there is data to transmit, and no 1277125Swnj * retransmit or persist is pending, then go to persist state, 1287125Swnj * arranging to force out a byte to get more current window information 1297125Swnj * if nothing happens soon. 1307125Swnj */ 1317125Swnj if (tp->snd_wnd == 0 && so->so_snd.sb_cc && 1327125Swnj tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) { 1337125Swnj tp->t_rxtshift = 0; 1347125Swnj tcp_setpersist(tp); 1357125Swnj } 1367125Swnj 1377125Swnj /* 1385075Swnj * No reason to send a segment, just return. 1395075Swnj */ 1405110Swnj return (0); 1414678Swnj 1425075Swnj send: 1435075Swnj /* 1445075Swnj * Grab a header mbuf, attaching a copy of data to 1455075Swnj * be transmitted, and initialize the header from 1465075Swnj * the template for sends on this connection. 1475075Swnj */ 14811720Ssam MGET(m, M_DONTWAIT, MT_HEADER); 14911720Ssam if (m == NULL) 1506505Ssam return (ENOBUFS); 1515245Sroot m->m_off = MMAXOFF - sizeof (struct tcpiphdr); 1524885Swnj m->m_len = sizeof (struct tcpiphdr); 1535075Swnj if (len) { 1545075Swnj m->m_next = m_copy(so->so_snd.sb_mb, off, len); 1555075Swnj if (m->m_next == 0) 1565075Swnj len = 0; 1575075Swnj } 1585075Swnj ti = mtod(m, struct tcpiphdr *); 1595075Swnj if (tp->t_template == 0) 1605075Swnj panic("tcp_output"); 1615110Swnj bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr)); 1625075Swnj 1635075Swnj /* 1645075Swnj * Fill in fields, remembering maximum advertised 1655075Swnj * window for use in delaying messages about window sizes. 1665075Swnj */ 1675245Sroot ti->ti_seq = tp->snd_nxt; 1685245Sroot ti->ti_ack = tp->rcv_nxt; 1695245Sroot ti->ti_seq = htonl(ti->ti_seq); 1705245Sroot ti->ti_ack = htonl(ti->ti_ack); 1715441Swnj /* 1725441Swnj * Before ESTABLISHED, force sending of initial options 1735441Swnj * unless TCP set to not do any options. 1745441Swnj */ 1755441Swnj if (tp->t_state < TCPS_ESTABLISHED) { 17617273Skarels int mss; 17717273Skarels 1785441Swnj if (tp->t_flags&TF_NOOPT) 1795441Swnj goto noopt; 18017273Skarels mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp)); 18117273Skarels if (mss <= IP_MSS - sizeof(struct tcpiphdr)) 18217273Skarels goto noopt; 1835441Swnj opt = tcp_initopt; 1845441Swnj optlen = sizeof (tcp_initopt); 18517273Skarels *(u_short *)(opt + 2) = htons(mss); 1865441Swnj } else { 1875441Swnj if (tp->t_tcpopt == 0) 1885441Swnj goto noopt; 1895441Swnj opt = mtod(tp->t_tcpopt, u_char *); 1905441Swnj optlen = tp->t_tcpopt->m_len; 1915441Swnj } 1928314Sroot if (opt) { 1935110Swnj m0 = m->m_next; 1949643Ssam m->m_next = m_get(M_DONTWAIT, MT_DATA); 1955088Swnj if (m->m_next == 0) { 1965088Swnj (void) m_free(m); 1975441Swnj m_freem(m0); 1986505Ssam return (ENOBUFS); 1995088Swnj } 2005088Swnj m->m_next->m_next = m0; 2015441Swnj m0 = m->m_next; 2025441Swnj m0->m_len = optlen; 2036162Ssam bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen); 2045441Swnj opt = (u_char *)(mtod(m0, caddr_t) + optlen); 2055441Swnj while (m0->m_len & 0x3) { 2065441Swnj *opt++ = TCPOPT_EOL; 2075441Swnj m0->m_len++; 2085441Swnj } 2095441Swnj optlen = m0->m_len; 2105441Swnj ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; 2115088Swnj } 2125441Swnj noopt: 2135088Swnj ti->ti_flags = flags; 2145075Swnj win = sbspace(&so->so_rcv); 2156279Swnj if (win < so->so_rcv.sb_hiwat / 4) /* avoid silly window */ 2166279Swnj win = 0; 2175075Swnj if (win > 0) 2185110Swnj ti->ti_win = htons((u_short)win); 2195088Swnj if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { 2205420Swnj ti->ti_urp = tp->snd_up - tp->snd_nxt; 2215420Swnj ti->ti_urp = htons(ti->ti_urp); 2225075Swnj ti->ti_flags |= TH_URG; 2235075Swnj } else 2245075Swnj /* 2255075Swnj * If no urgent pointer to send, then we pull 2265075Swnj * the urgent pointer to the left edge of the send window 2275075Swnj * so that it doesn't drift into the send window on sequence 2285075Swnj * number wraparound. 2295075Swnj */ 2305088Swnj tp->snd_up = tp->snd_una; /* drag it along */ 2317644Sroot /* 2327644Sroot * If anything to send and we can send it all, set PUSH. 2337644Sroot * (This will keep happy those implementations which only 23410143Ssam * give data to the user when a buffer fills or a PUSH comes in.) 2357644Sroot */ 2367644Sroot if (len && off+len == so->so_snd.sb_cc) 2377644Sroot ti->ti_flags |= TH_PUSH; 2385075Swnj 2395075Swnj /* 2405075Swnj * Put TCP length in extended header, and then 2415075Swnj * checksum extended header and data. 2425075Swnj */ 2435441Swnj if (len + optlen) { 2445441Swnj ti->ti_len = sizeof (struct tcphdr) + optlen + len; 2455441Swnj ti->ti_len = htons((u_short)ti->ti_len); 2465441Swnj } 2476162Ssam ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len); 2485075Swnj 2495075Swnj /* 2507125Swnj * In transmit state, time the transmission and arrange for 2517125Swnj * the retransmit. In persist state, reset persist time for 2527125Swnj * next persist. 2535088Swnj */ 2547125Swnj if (tp->t_force == 0) { 2557125Swnj /* 2567146Swnj * Advance snd_nxt over sequence space of this segment. 2577125Swnj */ 2587125Swnj if (flags & (TH_SYN|TH_FIN)) 2597125Swnj tp->snd_nxt++; 2607125Swnj tp->snd_nxt += len; 26115385Ssam if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { 2627149Swnj tp->snd_max = tp->snd_nxt; 26315385Ssam /* 26415385Ssam * Time this transmission if not a retransmission and 26515385Ssam * not currently timing anything. 26615385Ssam */ 26715385Ssam if (tp->t_rtt == 0) { 26815385Ssam tp->t_rtt = 1; 26915385Ssam tp->t_rtseq = tp->snd_nxt - len; 27015385Ssam } 2717125Swnj } 2725088Swnj 2737125Swnj /* 2747125Swnj * Set retransmit timer if not currently set. 2757125Swnj * Initial value for retransmit timer to tcp_beta*tp->t_srtt. 2767125Swnj * Initialize shift counter which is used for exponential 2777125Swnj * backoff of retransmit time. 2787125Swnj */ 2797125Swnj if (tp->t_timer[TCPT_REXMT] == 0 && 2807125Swnj tp->snd_nxt != tp->snd_una) { 2817125Swnj TCPT_RANGESET(tp->t_timer[TCPT_REXMT], 2827125Swnj tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); 2837125Swnj tp->t_rxtshift = 0; 2847125Swnj } 2857125Swnj tp->t_timer[TCPT_PERSIST] = 0; 2867149Swnj } else { 2877149Swnj if (SEQ_GT(tp->snd_una+1, tp->snd_max)) 2887149Swnj tp->snd_max = tp->snd_una+1; 2897146Swnj } 2905163Swnj 2915163Swnj /* 2925268Sroot * Trace. 2935268Sroot */ 2947146Swnj if (so->so_options & SO_DEBUG) 2955268Sroot tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); 2965268Sroot 2975268Sroot /* 2985075Swnj * Fill in IP length and desired time to live and 2995075Swnj * send to IP level. 3005075Swnj */ 3015441Swnj ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len; 3025075Swnj ((struct ip *)ti)->ip_ttl = TCP_TTL; 30312418Ssam if (so->so_options & SO_DONTROUTE) 30412765Ssam error = 30512765Ssam ip_output(m, tp->t_ipopt, (struct route *)0, IP_ROUTETOIF); 30612418Ssam else 30712418Ssam error = ip_output(m, tp->t_ipopt, &tp->t_inpcb->inp_route, 0); 30812418Ssam if (error) 3096505Ssam return (error); 3105075Swnj 3115075Swnj /* 3125075Swnj * Data sent (as far as we can tell). 3135075Swnj * If this advertises a larger window than any other segment, 3145245Sroot * then remember the size of the advertised window. 3155088Swnj * Drop send for purpose of ACK requirements. 3165075Swnj */ 3175252Sroot if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 3185075Swnj tp->rcv_adv = tp->rcv_nxt + win; 3195088Swnj tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); 3207125Swnj if (sendalot && tp->t_force == 0) 3217125Swnj goto again; 3226505Ssam return (0); 3234677Swnj } 3247125Swnj 3257125Swnj tcp_setpersist(tp) 3267125Swnj register struct tcpcb *tp; 3277125Swnj { 3287125Swnj 3297125Swnj if (tp->t_timer[TCPT_REXMT]) 3307125Swnj panic("tcp_output REXMT"); 3317125Swnj /* 3327125Swnj * Start/restart persistance timer. 3337125Swnj */ 3347125Swnj TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], 3357125Swnj ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift, 3367125Swnj TCPTV_PERSMIN, TCPTV_MAX); 3377125Swnj tp->t_rxtshift++; 3387125Swnj if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) 3397125Swnj tp->t_rxtshift = 0; 3407125Swnj } 341