xref: /csrg-svn/sys/netinet/tcp_output.c (revision 24817)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)tcp_output.c	6.10 (Berkeley) 09/16/85
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "mbuf.h"
12 #include "protosw.h"
13 #include "socket.h"
14 #include "socketvar.h"
15 #include "errno.h"
16 
17 #include "../net/route.h"
18 
19 #include "in.h"
20 #include "in_pcb.h"
21 #include "in_systm.h"
22 #include "ip.h"
23 #include "ip_var.h"
24 #include "tcp.h"
25 #define	TCPOUTFLAGS
26 #include "tcp_fsm.h"
27 #include "tcp_seq.h"
28 #include "tcp_timer.h"
29 #include "tcp_var.h"
30 #include "tcpip.h"
31 #include "tcp_debug.h"
32 
33 /*
34  * Initial options.
35  */
36 u_char	tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, };
37 
38 /*
39  * Tcp output routine: figure out what should be sent and send it.
40  */
41 tcp_output(tp)
42 	register struct tcpcb *tp;
43 {
44 	register struct socket *so = tp->t_inpcb->inp_socket;
45 	register int len;
46 	struct mbuf *m0;
47 	int off, flags, win, error;
48 	register struct mbuf *m;
49 	register struct tcpiphdr *ti;
50 	u_char *opt;
51 	unsigned optlen = 0;
52 	int sendalot;
53 
54 	/*
55 	 * Determine length of data that should be transmitted,
56 	 * and flags that will be used.
57 	 * If there is some data or critical controls (SYN, RST)
58 	 * to send, then transmit; otherwise, investigate further.
59 	 */
60 again:
61 	sendalot = 0;
62 	off = tp->snd_nxt - tp->snd_una;
63 	win = MIN(tp->snd_wnd, tp->snd_cwnd);
64 	/*
65 	 * If in persist timeout with window of 0, send 1 byte.
66 	 * Otherwise, window is small but nonzero
67 	 * and timer expired, go to transmit state.
68 	 */
69 	if (tp->t_force) {
70 		if (win == 0)
71 			win = 1;
72 		else {
73 			tp->t_timer[TCPT_PERSIST] = 0;
74 			tp->t_rxtshift = 0;
75 		}
76 	}
77 	len = MIN(so->so_snd.sb_cc, win) - off;
78 	if (len < 0)
79 		return (0);	/* ??? */	/* past FIN */
80 	if (len > tp->t_maxseg) {
81 		len = tp->t_maxseg;
82 		/*
83 		 * Don't send more than one segment if retransmitting
84 		 * (or persisting, but then we shouldn't be here).
85 		 */
86 		if (tp->t_rxtshift == 0)
87 			sendalot = 1;
88 	}
89 
90 	win = sbspace(&so->so_rcv);
91 	flags = tcp_outflags[tp->t_state];
92 	if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc)
93 		flags &= ~TH_FIN;
94 	if (flags & (TH_SYN|TH_RST|TH_FIN))
95 		goto send;
96 	if (SEQ_GT(tp->snd_up, tp->snd_una))
97 		goto send;
98 
99 	/*
100 	 * Sender silly window avoidance.  If connection is idle
101 	 * and can send all data, a maximum segment,
102 	 * at least a maximum default-size segment do it,
103 	 * or are forced, do it; otherwise don't bother.
104 	 * If retransmitting (possibly after persist timer forced us
105 	 * to send into a small window), then must resend.
106 	 */
107 	if (len) {
108 		if (len == tp->t_maxseg || len >= so->so_snd.sb_cc) /* off = 0*/
109 			goto send;
110 		if (len >= TCP_MSS)	/* a lot */
111 			goto send;
112 		if (tp->t_force)
113 			goto send;
114 		if (SEQ_LT(tp->snd_nxt, tp->snd_max))
115 			goto send;
116 	}
117 
118 	/*
119 	 * Send if we owe peer an ACK.
120 	 */
121 	if (tp->t_flags&TF_ACKNOW)
122 		goto send;
123 
124 
125 	/*
126 	 * Calculate available window, and also amount
127 	 * of window known to peer (as advertised window less
128 	 * next expected input.)  If the difference is 35% or more of the
129 	 * maximum possible window, then want to send a window update to peer.
130 	 */
131 	win = sbspace(&so->so_rcv);
132 	if (win > 0 &&
133 	    ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35))
134 		goto send;
135 
136 	/*
137 	 * TCP window updates are not reliable, rather a polling protocol
138 	 * using ``persist'' packets is used to insure receipt of window
139 	 * updates.  The three ``states'' for the output side are:
140 	 *	idle			not doing retransmits or persists
141 	 *	persisting		to move a zero window
142 	 *	(re)transmitting	and thereby not persisting
143 	 *
144 	 * tp->t_timer[TCPT_PERSIST]
145 	 *	is set when we are in persist state.
146 	 * tp->t_force
147 	 *	is set when we are called to send a persist packet.
148 	 * tp->t_timer[TCPT_REXMT]
149 	 *	is set when we are retransmitting
150 	 * The output side is idle when both timers are zero.
151 	 *
152 	 * If send window is too small, there is data to transmit, and no
153 	 * retransmit or persist is pending, then go to persist state.
154 	 * If nothing happens soon, send when timer expires:
155 	 * if window is nonzero, transmit what we can,
156 	 * otherwise force out a byte.
157 	 */
158 	if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
159 	    tp->t_timer[TCPT_PERSIST] == 0) {
160 		tp->t_rxtshift = 0;
161 		tcp_setpersist(tp);
162 	}
163 
164 	/*
165 	 * No reason to send a segment, just return.
166 	 */
167 	return (0);
168 
169 send:
170 	/*
171 	 * Grab a header mbuf, attaching a copy of data to
172 	 * be transmitted, and initialize the header from
173 	 * the template for sends on this connection.
174 	 */
175 	MGET(m, M_DONTWAIT, MT_HEADER);
176 	if (m == NULL)
177 		return (ENOBUFS);
178 	m->m_off = MMAXOFF - sizeof (struct tcpiphdr);
179 	m->m_len = sizeof (struct tcpiphdr);
180 	if (len) {
181 		m->m_next = m_copy(so->so_snd.sb_mb, off, len);
182 		if (m->m_next == 0)
183 			len = 0;
184 	}
185 	ti = mtod(m, struct tcpiphdr *);
186 	if (tp->t_template == 0)
187 		panic("tcp_output");
188 	bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
189 
190 	/*
191 	 * Fill in fields, remembering maximum advertised
192 	 * window for use in delaying messages about window sizes.
193 	 */
194 	ti->ti_seq = tp->snd_nxt;
195 	ti->ti_ack = tp->rcv_nxt;
196 	ti->ti_seq = htonl(ti->ti_seq);
197 	ti->ti_ack = htonl(ti->ti_ack);
198 	/*
199 	 * Before ESTABLISHED, force sending of initial options
200 	 * unless TCP set to not do any options.
201 	 */
202 	if (tp->t_state < TCPS_ESTABLISHED) {
203 		int mss;
204 
205 		if (tp->t_flags&TF_NOOPT)
206 			goto noopt;
207 		mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp));
208 		if (mss <= IP_MSS - sizeof(struct tcpiphdr))
209 			goto noopt;
210 		opt = tcp_initopt;
211 		optlen = sizeof (tcp_initopt);
212 		*(u_short *)(opt + 2) = htons(mss);
213 	} else {
214 		if (tp->t_tcpopt == 0)
215 			goto noopt;
216 		opt = mtod(tp->t_tcpopt, u_char *);
217 		optlen = tp->t_tcpopt->m_len;
218 	}
219 	if (opt) {
220 		m0 = m->m_next;
221 		m->m_next = m_get(M_DONTWAIT, MT_DATA);
222 		if (m->m_next == 0) {
223 			(void) m_free(m);
224 			m_freem(m0);
225 			return (ENOBUFS);
226 		}
227 		m->m_next->m_next = m0;
228 		m0 = m->m_next;
229 		m0->m_len = optlen;
230 		bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen);
231 		opt = (u_char *)(mtod(m0, caddr_t) + optlen);
232 		while (m0->m_len & 0x3) {
233 			*opt++ = TCPOPT_EOL;
234 			m0->m_len++;
235 		}
236 		optlen = m0->m_len;
237 		ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
238 	}
239 noopt:
240 	ti->ti_flags = flags;
241 	if (win >= so->so_rcv.sb_hiwat / 4)	/* avoid silly window */
242 		ti->ti_win = htons((u_short)win);
243 	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
244 		ti->ti_urp = tp->snd_up - tp->snd_nxt;
245 		ti->ti_urp = htons(ti->ti_urp);
246 		ti->ti_flags |= TH_URG;
247 	} else
248 		/*
249 		 * If no urgent pointer to send, then we pull
250 		 * the urgent pointer to the left edge of the send window
251 		 * so that it doesn't drift into the send window on sequence
252 		 * number wraparound.
253 		 */
254 		tp->snd_up = tp->snd_una;		/* drag it along */
255 	/*
256 	 * If anything to send and we can send it all, set PUSH.
257 	 * (This will keep happy those implementations which only
258 	 * give data to the user when a buffer fills or a PUSH comes in.)
259 	 */
260 	if (len && off+len == so->so_snd.sb_cc)
261 		ti->ti_flags |= TH_PUSH;
262 
263 	/*
264 	 * Put TCP length in extended header, and then
265 	 * checksum extended header and data.
266 	 */
267 	if (len + optlen) {
268 		ti->ti_len = sizeof (struct tcphdr) + optlen + len;
269 		ti->ti_len = htons((u_short)ti->ti_len);
270 	}
271 	ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len);
272 
273 	/*
274 	 * In transmit state, time the transmission and arrange for
275 	 * the retransmit.  In persist state, just set snd_max.
276 	 */
277 	if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
278 		/*
279 		 * Advance snd_nxt over sequence space of this segment.
280 		 */
281 		if (flags & (TH_SYN|TH_FIN))
282 			tp->snd_nxt++;
283 		tp->snd_nxt += len;
284 		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
285 			tp->snd_max = tp->snd_nxt;
286 			/*
287 			 * Time this transmission if not a retransmission and
288 			 * not currently timing anything.
289 			 */
290 			if (tp->t_rtt == 0) {
291 				tp->t_rtt = 1;
292 				tp->t_rtseq = tp->snd_nxt - len;
293 			}
294 		}
295 
296 		/*
297 		 * Set retransmit timer if not currently set,
298 		 * and not doing a keep-alive probe.
299 		 * Initial value for retransmit timer to tcp_beta*tp->t_srtt.
300 		 * Initialize shift counter which is used for exponential
301 		 * backoff of retransmit time.
302 		 */
303 		if (tp->t_timer[TCPT_REXMT] == 0 &&
304 		    tp->snd_nxt != tp->snd_una) {
305 			TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
306 			    tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX);
307 			tp->t_rxtshift = 0;
308 		}
309 		tp->t_timer[TCPT_PERSIST] = 0;
310 	} else {
311 		if (SEQ_GT(tp->snd_una+1, tp->snd_max))
312 			tp->snd_max = tp->snd_una+1;
313 	}
314 
315 	/*
316 	 * Trace.
317 	 */
318 	if (so->so_options & SO_DEBUG)
319 		tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
320 
321 	/*
322 	 * Fill in IP length and desired time to live and
323 	 * send to IP level.
324 	 */
325 	((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len;
326 	((struct ip *)ti)->ip_ttl = TCP_TTL;
327 	if (so->so_options & SO_DONTROUTE)
328 		error =
329 		   ip_output(m, tp->t_inpcb->inp_options, (struct route *)0,
330 			IP_ROUTETOIF);
331 	else
332 		error = ip_output(m, tp->t_inpcb->inp_options,
333 		    &tp->t_inpcb->inp_route, 0);
334 	if (error)
335 		return (error);
336 
337 	/*
338 	 * Data sent (as far as we can tell).
339 	 * If this advertises a larger window than any other segment,
340 	 * then remember the size of the advertised window.
341 	 * Drop send for purpose of ACK requirements.
342 	 */
343 	if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
344 		tp->rcv_adv = tp->rcv_nxt + win;
345 	tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
346 	if (sendalot && tp->t_force == 0)
347 		goto again;
348 	return (0);
349 }
350 
351 tcp_setpersist(tp)
352 	register struct tcpcb *tp;
353 {
354 
355 	if (tp->t_timer[TCPT_REXMT])
356 		panic("tcp_output REXMT");
357 	/*
358 	 * Start/restart persistance timer.
359 	 */
360 	TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
361 	    ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift,
362 	    TCPTV_PERSMIN, TCPTV_MAX);
363 	tp->t_rxtshift++;
364 	if (tp->t_rxtshift >= TCP_MAXRXTSHIFT)
365 		tp->t_rxtshift = 0;
366 }
367