1*10143Ssam /* tcp_output.c 4.50 83/01/04 */ 24677Swnj 34677Swnj #include "../h/param.h" 44677Swnj #include "../h/systm.h" 54677Swnj #include "../h/mbuf.h" 65163Swnj #include "../h/protosw.h" 74677Swnj #include "../h/socket.h" 84804Swnj #include "../h/socketvar.h" 98402Swnj #include "../netinet/in.h" 106352Ssam #include "../net/route.h" 118402Swnj #include "../netinet/in_pcb.h" 128402Swnj #include "../netinet/in_systm.h" 138402Swnj #include "../netinet/ip.h" 148402Swnj #include "../netinet/ip_var.h" 158402Swnj #include "../netinet/tcp.h" 165088Swnj #define TCPOUTFLAGS 178402Swnj #include "../netinet/tcp_fsm.h" 188402Swnj #include "../netinet/tcp_seq.h" 198402Swnj #include "../netinet/tcp_timer.h" 208402Swnj #include "../netinet/tcp_var.h" 218402Swnj #include "../netinet/tcpip.h" 228402Swnj #include "../netinet/tcp_debug.h" 236505Ssam #include <errno.h> 244677Swnj 254678Swnj /* 268314Sroot * Initial options. 275441Swnj */ 285441Swnj u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, }; 295441Swnj 305441Swnj /* 315245Sroot * Tcp output routine: figure out what should be sent and send it. 324678Swnj */ 335075Swnj tcp_output(tp) 345075Swnj register struct tcpcb *tp; 354678Swnj { 365075Swnj register struct socket *so = tp->t_inpcb->inp_socket; 375075Swnj register int len; 385075Swnj struct mbuf *m0; 396505Ssam int off, flags, win, error; 405075Swnj register struct mbuf *m; 415075Swnj register struct tcpiphdr *ti; 425441Swnj u_char *opt; 435441Swnj unsigned optlen = 0; 447125Swnj int sendalot; 454678Swnj 464678Swnj 475075Swnj /* 486279Swnj * Determine length of data that should be transmitted, 495088Swnj * and flags that will be used. 505088Swnj * If there is some data or critical controls (SYN, RST) 515088Swnj * to send, then transmit; otherwise, investigate further. 525075Swnj */ 537125Swnj again: 547125Swnj sendalot = 0; 555075Swnj off = tp->snd_nxt - tp->snd_una; 565163Swnj len = MIN(so->so_snd.sb_cc, tp->snd_wnd+tp->t_force) - off; 575285Sroot if (len < 0) 586505Ssam return (0); /* ??? */ /* past FIN */ 597125Swnj if (len > tp->t_maxseg) { 605088Swnj len = tp->t_maxseg; 617125Swnj sendalot = 1; 627125Swnj } 636279Swnj 645088Swnj flags = tcp_outflags[tp->t_state]; 655299Sroot if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc) 665163Swnj flags &= ~TH_FIN; 676279Swnj if (flags & (TH_SYN|TH_RST|TH_FIN)) 685075Swnj goto send; 696279Swnj if (SEQ_GT(tp->snd_up, tp->snd_una)) 706279Swnj goto send; 714678Swnj 725075Swnj /* 736279Swnj * Sender silly window avoidance. If can send all data, 746279Swnj * a maximum segment, at least 1/4 of window do it, 756279Swnj * or are forced, do it; otherwise don't bother. 766279Swnj */ 776279Swnj if (len) { 786279Swnj if (len == tp->t_maxseg || off+len >= so->so_snd.sb_cc) 796279Swnj goto send; 806279Swnj if (len * 4 >= tp->snd_wnd) /* a lot */ 816279Swnj goto send; 826279Swnj if (tp->t_force) 836279Swnj goto send; 846279Swnj } 856279Swnj 866279Swnj /* 875285Sroot * Send if we owe peer an ACK. 885075Swnj */ 895441Swnj if (tp->t_flags&TF_ACKNOW) 905075Swnj goto send; 914678Swnj 925441Swnj 935441Swnj /* 945075Swnj * Calculate available window in i, and also amount 955075Swnj * of window known to peer (as advertised window less 965075Swnj * next expected input.) If this is 35% or more of the 975075Swnj * maximum possible window, then want to send a segment to peer. 985075Swnj */ 995088Swnj win = sbspace(&so->so_rcv); 1005088Swnj if (win > 0 && 1015088Swnj ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35)) 1025075Swnj goto send; 1034678Swnj 1045075Swnj /* 1057125Swnj * TCP window updates are not reliable, rather a polling protocol 1067125Swnj * using ``persist'' packets is used to insure receipt of window 1077125Swnj * updates. The three ``states'' for the output side are: 1087125Swnj * idle not doing retransmits or persists 1097125Swnj * persisting to move a zero window 1107125Swnj * (re)transmitting and thereby not persisting 1117125Swnj * 1127125Swnj * tp->t_timer[TCPT_PERSIST] 1137125Swnj * is set when we are in persist state. 1147125Swnj * tp->t_force 1157125Swnj * is set when we are called to send a persist packet. 1167125Swnj * tp->t_timer[TCPT_REXMT] 1177125Swnj * is set when we are retransmitting 1187125Swnj * The output side is idle when both timers are zero. 1197125Swnj * 1207125Swnj * If send window is closed, there is data to transmit, and no 1217125Swnj * retransmit or persist is pending, then go to persist state, 1227125Swnj * arranging to force out a byte to get more current window information 1237125Swnj * if nothing happens soon. 1247125Swnj */ 1257125Swnj if (tp->snd_wnd == 0 && so->so_snd.sb_cc && 1267125Swnj tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) { 1277125Swnj tp->t_rxtshift = 0; 1287125Swnj tcp_setpersist(tp); 1297125Swnj } 1307125Swnj 1317125Swnj /* 1325075Swnj * No reason to send a segment, just return. 1335075Swnj */ 1345110Swnj return (0); 1354678Swnj 1365075Swnj send: 1375075Swnj /* 1385075Swnj * Grab a header mbuf, attaching a copy of data to 1395075Swnj * be transmitted, and initialize the header from 1405075Swnj * the template for sends on this connection. 1415075Swnj */ 1429643Ssam MGET(m, M_DONTWAIT, MT_DATA); 143*10143Ssam if (m == INADDR_ANY) 1446505Ssam return (ENOBUFS); 1455245Sroot m->m_off = MMAXOFF - sizeof (struct tcpiphdr); 1464885Swnj m->m_len = sizeof (struct tcpiphdr); 1475075Swnj if (len) { 1485075Swnj m->m_next = m_copy(so->so_snd.sb_mb, off, len); 1495075Swnj if (m->m_next == 0) 1505075Swnj len = 0; 1515075Swnj } 1525075Swnj ti = mtod(m, struct tcpiphdr *); 1535075Swnj if (tp->t_template == 0) 1545075Swnj panic("tcp_output"); 1555110Swnj bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr)); 1565075Swnj 1575075Swnj /* 1585075Swnj * Fill in fields, remembering maximum advertised 1595075Swnj * window for use in delaying messages about window sizes. 1605075Swnj */ 1615245Sroot ti->ti_seq = tp->snd_nxt; 1625245Sroot ti->ti_ack = tp->rcv_nxt; 1635245Sroot ti->ti_seq = htonl(ti->ti_seq); 1645245Sroot ti->ti_ack = htonl(ti->ti_ack); 1655441Swnj /* 1665441Swnj * Before ESTABLISHED, force sending of initial options 1675441Swnj * unless TCP set to not do any options. 1685441Swnj */ 1695441Swnj if (tp->t_state < TCPS_ESTABLISHED) { 1705441Swnj if (tp->t_flags&TF_NOOPT) 1715441Swnj goto noopt; 1725441Swnj opt = tcp_initopt; 1735441Swnj optlen = sizeof (tcp_initopt); 1748314Sroot *(u_short *)(opt + 2) = MIN(so->so_rcv.sb_hiwat / 2, 1024); 1755441Swnj *(u_short *)(opt + 2) = htons(*(u_short *)(opt + 2)); 1765441Swnj } else { 1775441Swnj if (tp->t_tcpopt == 0) 1785441Swnj goto noopt; 1795441Swnj opt = mtod(tp->t_tcpopt, u_char *); 1805441Swnj optlen = tp->t_tcpopt->m_len; 1815441Swnj } 1828314Sroot if (opt) { 1835110Swnj m0 = m->m_next; 1849643Ssam m->m_next = m_get(M_DONTWAIT, MT_DATA); 1855088Swnj if (m->m_next == 0) { 1865088Swnj (void) m_free(m); 1875441Swnj m_freem(m0); 1886505Ssam return (ENOBUFS); 1895088Swnj } 1905088Swnj m->m_next->m_next = m0; 1915441Swnj m0 = m->m_next; 1925441Swnj m0->m_len = optlen; 1936162Ssam bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen); 1945441Swnj opt = (u_char *)(mtod(m0, caddr_t) + optlen); 1955441Swnj while (m0->m_len & 0x3) { 1965441Swnj *opt++ = TCPOPT_EOL; 1975441Swnj m0->m_len++; 1985441Swnj } 1995441Swnj optlen = m0->m_len; 2005441Swnj ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; 2015088Swnj } 2025441Swnj noopt: 2035088Swnj ti->ti_flags = flags; 2045075Swnj win = sbspace(&so->so_rcv); 2056279Swnj if (win < so->so_rcv.sb_hiwat / 4) /* avoid silly window */ 2066279Swnj win = 0; 2075075Swnj if (win > 0) 2085110Swnj ti->ti_win = htons((u_short)win); 2095088Swnj if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { 2105420Swnj ti->ti_urp = tp->snd_up - tp->snd_nxt; 2115420Swnj ti->ti_urp = htons(ti->ti_urp); 2125075Swnj ti->ti_flags |= TH_URG; 2135075Swnj } else 2145075Swnj /* 2155075Swnj * If no urgent pointer to send, then we pull 2165075Swnj * the urgent pointer to the left edge of the send window 2175075Swnj * so that it doesn't drift into the send window on sequence 2185075Swnj * number wraparound. 2195075Swnj */ 2205088Swnj tp->snd_up = tp->snd_una; /* drag it along */ 2217644Sroot /* 2227644Sroot * If anything to send and we can send it all, set PUSH. 2237644Sroot * (This will keep happy those implementations which only 224*10143Ssam * give data to the user when a buffer fills or a PUSH comes in.) 2257644Sroot */ 2267644Sroot if (len && off+len == so->so_snd.sb_cc) 2277644Sroot ti->ti_flags |= TH_PUSH; 2285075Swnj 2295075Swnj /* 2305075Swnj * Put TCP length in extended header, and then 2315075Swnj * checksum extended header and data. 2325075Swnj */ 2335441Swnj if (len + optlen) { 2345441Swnj ti->ti_len = sizeof (struct tcphdr) + optlen + len; 2355441Swnj ti->ti_len = htons((u_short)ti->ti_len); 2365441Swnj } 2376162Ssam ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len); 2385075Swnj 2395075Swnj /* 2407125Swnj * In transmit state, time the transmission and arrange for 2417125Swnj * the retransmit. In persist state, reset persist time for 2427125Swnj * next persist. 2435088Swnj */ 2447125Swnj if (tp->t_force == 0) { 2457125Swnj /* 2467146Swnj * Advance snd_nxt over sequence space of this segment. 2477125Swnj */ 2487125Swnj if (flags & (TH_SYN|TH_FIN)) 2497125Swnj tp->snd_nxt++; 2507125Swnj tp->snd_nxt += len; 2517149Swnj if (SEQ_GT(tp->snd_nxt, tp->snd_max)) 2527149Swnj tp->snd_max = tp->snd_nxt; 2535088Swnj 2547125Swnj /* 2557125Swnj * Time this transmission if not a retransmission and 2567125Swnj * not currently timing anything. 2577125Swnj */ 2587125Swnj if (SEQ_GT(tp->snd_nxt, tp->snd_max) && tp->t_rtt == 0) { 2597125Swnj tp->t_rtt = 1; 2607125Swnj tp->t_rtseq = tp->snd_nxt - len; 2617125Swnj } 2625088Swnj 2637125Swnj /* 2647125Swnj * Set retransmit timer if not currently set. 2657125Swnj * Initial value for retransmit timer to tcp_beta*tp->t_srtt. 2667125Swnj * Initialize shift counter which is used for exponential 2677125Swnj * backoff of retransmit time. 2687125Swnj */ 2697125Swnj if (tp->t_timer[TCPT_REXMT] == 0 && 2707125Swnj tp->snd_nxt != tp->snd_una) { 2717125Swnj TCPT_RANGESET(tp->t_timer[TCPT_REXMT], 2727125Swnj tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); 2737125Swnj tp->t_rtt = 0; 2747125Swnj tp->t_rxtshift = 0; 2757125Swnj } 2767125Swnj tp->t_timer[TCPT_PERSIST] = 0; 2777149Swnj } else { 2787149Swnj if (SEQ_GT(tp->snd_una+1, tp->snd_max)) 2797149Swnj tp->snd_max = tp->snd_una+1; 2807146Swnj } 2815163Swnj 2825163Swnj /* 2835268Sroot * Trace. 2845268Sroot */ 2857146Swnj if (so->so_options & SO_DEBUG) 2865268Sroot tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); 2875268Sroot 2885268Sroot /* 2895075Swnj * Fill in IP length and desired time to live and 2905075Swnj * send to IP level. 2915075Swnj */ 2925441Swnj ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len; 2935075Swnj ((struct ip *)ti)->ip_ttl = TCP_TTL; 2947146Swnj if (error = ip_output(m, tp->t_ipopt, (so->so_options & SO_DONTROUTE) ? 2957146Swnj &routetoif : &tp->t_inpcb->inp_route, 0)) 2966505Ssam return (error); 2975075Swnj 2985075Swnj /* 2995075Swnj * Data sent (as far as we can tell). 3005075Swnj * If this advertises a larger window than any other segment, 3015245Sroot * then remember the size of the advertised window. 3025088Swnj * Drop send for purpose of ACK requirements. 3035075Swnj */ 3045252Sroot if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 3055075Swnj tp->rcv_adv = tp->rcv_nxt + win; 3065088Swnj tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); 3077125Swnj if (sendalot && tp->t_force == 0) 3087125Swnj goto again; 3096505Ssam return (0); 3104677Swnj } 3117125Swnj 3127125Swnj tcp_setpersist(tp) 3137125Swnj register struct tcpcb *tp; 3147125Swnj { 3157125Swnj 3167125Swnj if (tp->t_timer[TCPT_REXMT]) 3177125Swnj panic("tcp_output REXMT"); 3187125Swnj /* 3197125Swnj * Start/restart persistance timer. 3207125Swnj */ 3217125Swnj TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], 3227125Swnj ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift, 3237125Swnj TCPTV_PERSMIN, TCPTV_MAX); 3247125Swnj tp->t_rxtshift++; 3257125Swnj if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) 3267125Swnj tp->t_rxtshift = 0; 3277125Swnj } 328