1*12418Ssam /* tcp_output.c 4.53 83/05/12 */ 24677Swnj 34677Swnj #include "../h/param.h" 44677Swnj #include "../h/systm.h" 54677Swnj #include "../h/mbuf.h" 65163Swnj #include "../h/protosw.h" 74677Swnj #include "../h/socket.h" 84804Swnj #include "../h/socketvar.h" 910895Ssam #include "../h/errno.h" 1010895Ssam 1110895Ssam #include "../net/route.h" 1210895Ssam 138402Swnj #include "../netinet/in.h" 148402Swnj #include "../netinet/in_pcb.h" 158402Swnj #include "../netinet/in_systm.h" 168402Swnj #include "../netinet/ip.h" 178402Swnj #include "../netinet/ip_var.h" 188402Swnj #include "../netinet/tcp.h" 195088Swnj #define TCPOUTFLAGS 208402Swnj #include "../netinet/tcp_fsm.h" 218402Swnj #include "../netinet/tcp_seq.h" 228402Swnj #include "../netinet/tcp_timer.h" 238402Swnj #include "../netinet/tcp_var.h" 248402Swnj #include "../netinet/tcpip.h" 258402Swnj #include "../netinet/tcp_debug.h" 264677Swnj 274678Swnj /* 288314Sroot * Initial options. 295441Swnj */ 305441Swnj u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, }; 315441Swnj 325441Swnj /* 335245Sroot * Tcp output routine: figure out what should be sent and send it. 344678Swnj */ 355075Swnj tcp_output(tp) 365075Swnj register struct tcpcb *tp; 374678Swnj { 385075Swnj register struct socket *so = tp->t_inpcb->inp_socket; 395075Swnj register int len; 405075Swnj struct mbuf *m0; 416505Ssam int off, flags, win, error; 425075Swnj register struct mbuf *m; 435075Swnj register struct tcpiphdr *ti; 445441Swnj u_char *opt; 455441Swnj unsigned optlen = 0; 467125Swnj int sendalot; 474678Swnj 484678Swnj 49*12418Ssam if (tp->t_state == TCPS_CLOSED) 50*12418Ssam return (EINVAL); 515075Swnj /* 526279Swnj * Determine length of data that should be transmitted, 535088Swnj * and flags that will be used. 545088Swnj * If there is some data or critical controls (SYN, RST) 555088Swnj * to send, then transmit; otherwise, investigate further. 565075Swnj */ 577125Swnj again: 587125Swnj sendalot = 0; 595075Swnj off = tp->snd_nxt - tp->snd_una; 605163Swnj len = MIN(so->so_snd.sb_cc, tp->snd_wnd+tp->t_force) - off; 615285Sroot if (len < 0) 626505Ssam return (0); /* ??? */ /* past FIN */ 637125Swnj if (len > tp->t_maxseg) { 645088Swnj len = tp->t_maxseg; 657125Swnj sendalot = 1; 667125Swnj } 676279Swnj 685088Swnj flags = tcp_outflags[tp->t_state]; 695299Sroot if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc) 705163Swnj flags &= ~TH_FIN; 716279Swnj if (flags & (TH_SYN|TH_RST|TH_FIN)) 725075Swnj goto send; 736279Swnj if (SEQ_GT(tp->snd_up, tp->snd_una)) 746279Swnj goto send; 754678Swnj 765075Swnj /* 776279Swnj * Sender silly window avoidance. If can send all data, 786279Swnj * a maximum segment, at least 1/4 of window do it, 796279Swnj * or are forced, do it; otherwise don't bother. 806279Swnj */ 816279Swnj if (len) { 826279Swnj if (len == tp->t_maxseg || off+len >= so->so_snd.sb_cc) 836279Swnj goto send; 846279Swnj if (len * 4 >= tp->snd_wnd) /* a lot */ 856279Swnj goto send; 866279Swnj if (tp->t_force) 876279Swnj goto send; 886279Swnj } 896279Swnj 906279Swnj /* 915285Sroot * Send if we owe peer an ACK. 925075Swnj */ 935441Swnj if (tp->t_flags&TF_ACKNOW) 945075Swnj goto send; 954678Swnj 965441Swnj 975441Swnj /* 985075Swnj * Calculate available window in i, and also amount 995075Swnj * of window known to peer (as advertised window less 1005075Swnj * next expected input.) If this is 35% or more of the 1015075Swnj * maximum possible window, then want to send a segment to peer. 1025075Swnj */ 1035088Swnj win = sbspace(&so->so_rcv); 1045088Swnj if (win > 0 && 1055088Swnj ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35)) 1065075Swnj goto send; 1074678Swnj 1085075Swnj /* 1097125Swnj * TCP window updates are not reliable, rather a polling protocol 1107125Swnj * using ``persist'' packets is used to insure receipt of window 1117125Swnj * updates. The three ``states'' for the output side are: 1127125Swnj * idle not doing retransmits or persists 1137125Swnj * persisting to move a zero window 1147125Swnj * (re)transmitting and thereby not persisting 1157125Swnj * 1167125Swnj * tp->t_timer[TCPT_PERSIST] 1177125Swnj * is set when we are in persist state. 1187125Swnj * tp->t_force 1197125Swnj * is set when we are called to send a persist packet. 1207125Swnj * tp->t_timer[TCPT_REXMT] 1217125Swnj * is set when we are retransmitting 1227125Swnj * The output side is idle when both timers are zero. 1237125Swnj * 1247125Swnj * If send window is closed, there is data to transmit, and no 1257125Swnj * retransmit or persist is pending, then go to persist state, 1267125Swnj * arranging to force out a byte to get more current window information 1277125Swnj * if nothing happens soon. 1287125Swnj */ 1297125Swnj if (tp->snd_wnd == 0 && so->so_snd.sb_cc && 1307125Swnj tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) { 1317125Swnj tp->t_rxtshift = 0; 1327125Swnj tcp_setpersist(tp); 1337125Swnj } 1347125Swnj 1357125Swnj /* 1365075Swnj * No reason to send a segment, just return. 1375075Swnj */ 1385110Swnj return (0); 1394678Swnj 1405075Swnj send: 1415075Swnj /* 1425075Swnj * Grab a header mbuf, attaching a copy of data to 1435075Swnj * be transmitted, and initialize the header from 1445075Swnj * the template for sends on this connection. 1455075Swnj */ 14611720Ssam MGET(m, M_DONTWAIT, MT_HEADER); 14711720Ssam if (m == NULL) 1486505Ssam return (ENOBUFS); 1495245Sroot m->m_off = MMAXOFF - sizeof (struct tcpiphdr); 1504885Swnj m->m_len = sizeof (struct tcpiphdr); 1515075Swnj if (len) { 1525075Swnj m->m_next = m_copy(so->so_snd.sb_mb, off, len); 1535075Swnj if (m->m_next == 0) 1545075Swnj len = 0; 1555075Swnj } 1565075Swnj ti = mtod(m, struct tcpiphdr *); 1575075Swnj if (tp->t_template == 0) 1585075Swnj panic("tcp_output"); 1595110Swnj bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr)); 1605075Swnj 1615075Swnj /* 1625075Swnj * Fill in fields, remembering maximum advertised 1635075Swnj * window for use in delaying messages about window sizes. 1645075Swnj */ 1655245Sroot ti->ti_seq = tp->snd_nxt; 1665245Sroot ti->ti_ack = tp->rcv_nxt; 1675245Sroot ti->ti_seq = htonl(ti->ti_seq); 1685245Sroot ti->ti_ack = htonl(ti->ti_ack); 1695441Swnj /* 1705441Swnj * Before ESTABLISHED, force sending of initial options 1715441Swnj * unless TCP set to not do any options. 1725441Swnj */ 1735441Swnj if (tp->t_state < TCPS_ESTABLISHED) { 1745441Swnj if (tp->t_flags&TF_NOOPT) 1755441Swnj goto noopt; 1765441Swnj opt = tcp_initopt; 1775441Swnj optlen = sizeof (tcp_initopt); 1788314Sroot *(u_short *)(opt + 2) = MIN(so->so_rcv.sb_hiwat / 2, 1024); 1795441Swnj *(u_short *)(opt + 2) = htons(*(u_short *)(opt + 2)); 1805441Swnj } else { 1815441Swnj if (tp->t_tcpopt == 0) 1825441Swnj goto noopt; 1835441Swnj opt = mtod(tp->t_tcpopt, u_char *); 1845441Swnj optlen = tp->t_tcpopt->m_len; 1855441Swnj } 1868314Sroot if (opt) { 1875110Swnj m0 = m->m_next; 1889643Ssam m->m_next = m_get(M_DONTWAIT, MT_DATA); 1895088Swnj if (m->m_next == 0) { 1905088Swnj (void) m_free(m); 1915441Swnj m_freem(m0); 1926505Ssam return (ENOBUFS); 1935088Swnj } 1945088Swnj m->m_next->m_next = m0; 1955441Swnj m0 = m->m_next; 1965441Swnj m0->m_len = optlen; 1976162Ssam bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen); 1985441Swnj opt = (u_char *)(mtod(m0, caddr_t) + optlen); 1995441Swnj while (m0->m_len & 0x3) { 2005441Swnj *opt++ = TCPOPT_EOL; 2015441Swnj m0->m_len++; 2025441Swnj } 2035441Swnj optlen = m0->m_len; 2045441Swnj ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; 2055088Swnj } 2065441Swnj noopt: 2075088Swnj ti->ti_flags = flags; 2085075Swnj win = sbspace(&so->so_rcv); 2096279Swnj if (win < so->so_rcv.sb_hiwat / 4) /* avoid silly window */ 2106279Swnj win = 0; 2115075Swnj if (win > 0) 2125110Swnj ti->ti_win = htons((u_short)win); 2135088Swnj if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { 2145420Swnj ti->ti_urp = tp->snd_up - tp->snd_nxt; 2155420Swnj ti->ti_urp = htons(ti->ti_urp); 2165075Swnj ti->ti_flags |= TH_URG; 2175075Swnj } else 2185075Swnj /* 2195075Swnj * If no urgent pointer to send, then we pull 2205075Swnj * the urgent pointer to the left edge of the send window 2215075Swnj * so that it doesn't drift into the send window on sequence 2225075Swnj * number wraparound. 2235075Swnj */ 2245088Swnj tp->snd_up = tp->snd_una; /* drag it along */ 2257644Sroot /* 2267644Sroot * If anything to send and we can send it all, set PUSH. 2277644Sroot * (This will keep happy those implementations which only 22810143Ssam * give data to the user when a buffer fills or a PUSH comes in.) 2297644Sroot */ 2307644Sroot if (len && off+len == so->so_snd.sb_cc) 2317644Sroot ti->ti_flags |= TH_PUSH; 2325075Swnj 2335075Swnj /* 2345075Swnj * Put TCP length in extended header, and then 2355075Swnj * checksum extended header and data. 2365075Swnj */ 2375441Swnj if (len + optlen) { 2385441Swnj ti->ti_len = sizeof (struct tcphdr) + optlen + len; 2395441Swnj ti->ti_len = htons((u_short)ti->ti_len); 2405441Swnj } 2416162Ssam ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len); 2425075Swnj 2435075Swnj /* 2447125Swnj * In transmit state, time the transmission and arrange for 2457125Swnj * the retransmit. In persist state, reset persist time for 2467125Swnj * next persist. 2475088Swnj */ 2487125Swnj if (tp->t_force == 0) { 2497125Swnj /* 2507146Swnj * Advance snd_nxt over sequence space of this segment. 2517125Swnj */ 2527125Swnj if (flags & (TH_SYN|TH_FIN)) 2537125Swnj tp->snd_nxt++; 2547125Swnj tp->snd_nxt += len; 2557149Swnj if (SEQ_GT(tp->snd_nxt, tp->snd_max)) 2567149Swnj tp->snd_max = tp->snd_nxt; 2575088Swnj 2587125Swnj /* 2597125Swnj * Time this transmission if not a retransmission and 2607125Swnj * not currently timing anything. 2617125Swnj */ 2627125Swnj if (SEQ_GT(tp->snd_nxt, tp->snd_max) && tp->t_rtt == 0) { 2637125Swnj tp->t_rtt = 1; 2647125Swnj tp->t_rtseq = tp->snd_nxt - len; 2657125Swnj } 2665088Swnj 2677125Swnj /* 2687125Swnj * Set retransmit timer if not currently set. 2697125Swnj * Initial value for retransmit timer to tcp_beta*tp->t_srtt. 2707125Swnj * Initialize shift counter which is used for exponential 2717125Swnj * backoff of retransmit time. 2727125Swnj */ 2737125Swnj if (tp->t_timer[TCPT_REXMT] == 0 && 2747125Swnj tp->snd_nxt != tp->snd_una) { 2757125Swnj TCPT_RANGESET(tp->t_timer[TCPT_REXMT], 2767125Swnj tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); 2777125Swnj tp->t_rtt = 0; 2787125Swnj tp->t_rxtshift = 0; 2797125Swnj } 2807125Swnj tp->t_timer[TCPT_PERSIST] = 0; 2817149Swnj } else { 2827149Swnj if (SEQ_GT(tp->snd_una+1, tp->snd_max)) 2837149Swnj tp->snd_max = tp->snd_una+1; 2847146Swnj } 2855163Swnj 2865163Swnj /* 2875268Sroot * Trace. 2885268Sroot */ 2897146Swnj if (so->so_options & SO_DEBUG) 2905268Sroot tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); 2915268Sroot 2925268Sroot /* 2935075Swnj * Fill in IP length and desired time to live and 2945075Swnj * send to IP level. 2955075Swnj */ 2965441Swnj ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len; 2975075Swnj ((struct ip *)ti)->ip_ttl = TCP_TTL; 298*12418Ssam if (so->so_options & SO_DONTROUTE) 299*12418Ssam error = ip_output(m, tp->t_ipopt, 0, IP_ROUTETOIF); 300*12418Ssam else 301*12418Ssam error = ip_output(m, tp->t_ipopt, &tp->t_inpcb->inp_route, 0); 302*12418Ssam if (error) 3036505Ssam return (error); 3045075Swnj 3055075Swnj /* 3065075Swnj * Data sent (as far as we can tell). 3075075Swnj * If this advertises a larger window than any other segment, 3085245Sroot * then remember the size of the advertised window. 3095088Swnj * Drop send for purpose of ACK requirements. 3105075Swnj */ 3115252Sroot if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 3125075Swnj tp->rcv_adv = tp->rcv_nxt + win; 3135088Swnj tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); 3147125Swnj if (sendalot && tp->t_force == 0) 3157125Swnj goto again; 3166505Ssam return (0); 3174677Swnj } 3187125Swnj 3197125Swnj tcp_setpersist(tp) 3207125Swnj register struct tcpcb *tp; 3217125Swnj { 3227125Swnj 3237125Swnj if (tp->t_timer[TCPT_REXMT]) 3247125Swnj panic("tcp_output REXMT"); 3257125Swnj /* 3267125Swnj * Start/restart persistance timer. 3277125Swnj */ 3287125Swnj TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], 3297125Swnj ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift, 3307125Swnj TCPTV_PERSMIN, TCPTV_MAX); 3317125Swnj tp->t_rxtshift++; 3327125Swnj if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) 3337125Swnj tp->t_rxtshift = 0; 3347125Swnj } 335