xref: /csrg-svn/sys/netinet/tcp_input.c (revision 5061)
1 /* tcp_input.c 1.29 81/11/24 */
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mbuf.h"
6 #include "../h/socket.h"
7 #include "../h/socketvar.h"
8 #include "../net/inet.h"
9 #include "../net/inet_pcb.h"
10 #include "../net/inet_systm.h"
11 #include "../net/imp.h"
12 #include "../net/ip.h"
13 #include "../net/ip_var.h"
14 #include "../net/tcp.h"
15 #include "../net/tcp_fsm.h"
16 #include "../net/tcp_var.h"
17 #include "/usr/include/errno.h"
18 
19 int	tcpcksum = 1;
20 
21 tcp_drain()
22 {
23 	register struct inpcb *ip;
24 
25 COUNT(TCP_DRAIN);
26 	for (ip = tcb.inp_next; ip != &tcb; ip = ip->inp_next)
27 		tcp_drainunack(intotcpcb(ip));
28 }
29 
30 tcp_drainunack(tp)
31 	register struct tcpcb *tp;
32 {
33 	register struct mbuf *m;
34 
35 COUNT(TCP_DRAINUNACK);
36 	for (m = tp->seg_unack; m; m = m->m_act)
37 		m_freem(m);
38 	tp->seg_unack = 0;
39 }
40 
41 tcp_ctlinput(m)
42 	struct mbuf *m;
43 {
44 
45 COUNT(TCP_CTLINPUT);
46 	m_freem(m);
47 }
48 
49 struct	sockaddr_in tcp_sockaddr = { AF_INET };
50 
51 tcp_input(m0)
52 	struct mbuf *m0;
53 {
54 	register struct tcpiphdr *ti;
55 	struct inpcb *inp;
56 	register struct mbuf *m;
57 	int len, tlen, off;
58 
59 	register struct tcpcb *tp;
60 	register int j;
61 	register int tiflags;
62 	int nstate;
63 	struct socket *so;
64 #ifdef TCPDEBUG
65 	struct tcp_debug tdb;
66 #endif
67 
68 COUNT(TCP_INPUT);
69 	/*
70 	 * Get ip and tcp header together in first mbuf.
71 	 */
72 	m = m0;
73 	if (m->m_len < sizeof (struct tcpiphdr) &&
74 	    m_pullup(m, sizeof (struct tcpiphdr)) == 0) {
75 		tcpstat.tcps_hdrops++;
76 		goto bad;
77 	}
78 	ti = mtod(m, struct tcpiphdr *);
79 	if (ti->ti_len > sizeof (struct ip))
80 		ip_stripoptions((struct ip *)ti, (char *)0);
81 
82 	/*
83 	 * Checksum extended tcp header and data.
84 	 */
85 	tlen = ((struct ip *)ti)->ip_len;
86 	len = sizeof (struct ip) + tlen;
87 	if (tcpcksum) {
88 		ti->ti_next = ti->ti_prev = 0;
89 		ti->ti_x1 = 0;
90 		ti->ti_len = htons((u_short)tlen);
91 		if ((ti->ti_sum = inet_cksum(m, len)) != 0xffff) {
92 			tcpstat.tcps_badsum++;
93 			printf("tcp cksum %x\ti", ti->ti_sum);
94 			goto bad;
95 		}
96 	}
97 
98 	/*
99 	 * Check that tcp offset makes sense,
100 	 * process tcp options and adjust length.
101 	 */
102 	off = ti->ti_off << 2;
103 	if (off < sizeof (struct tcphdr) || off > ti->ti_len) {
104 		tcpstat.tcps_badoff++;
105 		goto bad;
106 	}
107 	ti->ti_len = tlen - off;
108 	/* PROCESS OPTIONS */
109 
110 	/*
111 	 * Locate pcb for segment.
112 	 */
113 	inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport);
114 	if (inp == 0)
115 		goto notwanted;
116 	tp = intotcpcb(inp);		/* ??? */
117 	if (tp == 0)			/* ??? */
118 		goto notwanted;		/* ??? */
119 
120 	/*
121 	 * Convert tcp protocol specific fields to host format.
122 	 */
123 	ti->ti_seq = ntohl(ti->ti_seq);
124 	ti->ti_ackno = ntohl((n_long)ti->ti_ackno);
125 	ti->ti_win = ntohs(ti->ti_win);
126 	ti->ti_urp = ntohs(ti->ti_urp);
127 
128 	/*
129 	 * Check segment seq # and do rst processing
130 	 */
131 	tiflags = ti->ti_flags;
132 	switch (tp->t_state) {
133 
134 	case LISTEN:
135 		if ((tiflags&TH_ACK) || (tiflags&TH_SYN) == 0) {
136 			tcp_sndrst(tp, ti);
137 			goto bad;
138 		}
139 		if (tiflags&TH_RST)
140 			goto bad;
141 		goto good;
142 
143 	case SYN_SENT:
144 		if (!ack_ok(tp, ti) || (tiflags&TH_SYN) == 0) {
145 			tcp_sndrst(tp, ti);			/* 71,72,75 */
146 			goto bad;
147 		}
148 		if (tiflags&TH_RST) {
149 			tcp_drop(tp, ENETRESET);
150 			goto bad;
151 		}
152 		goto good;
153 
154 	default:
155         	if ((tiflags&TH_RST) == 0)
156 			goto common;
157 		if (ti->ti_seq < tp->rcv_nxt)		/* bad rst */
158 			goto bad;				/* 69 */
159 		switch (tp->t_state) {
160 
161 		case L_SYN_RCVD:
162 			if (ack_ok(tp, ti) == 0)
163 				goto bad;			/* 69 */
164 			tp->t_rexmt = 0;
165 			tp->t_rexmttl = 0;
166 			tp->t_persist = 0;
167 			inp->inp_faddr.s_addr = 0;
168 			tp->t_state = LISTEN;
169 			goto bad;
170 
171 		default:
172 			tcp_drop(tp, ENETRESET);
173 			goto bad;
174 		}
175 		/*NOTREACHED*/
176 
177 	case SYN_RCVD:
178 common:
179 		if (ack_ok(tp, ti) == 0) {
180 			tcp_sndrst(tp, ti);			/* 74 */
181 			goto bad;
182 		}
183 		if ((tiflags&TH_SYN) == 0 && ti->ti_seq != tp->irs) {
184 			tcp_sndnull(tp);			/* 74 */
185 			goto bad;
186 		}
187 		goto good;
188 	}
189 bad:
190 	m_freem(m);
191 	return;
192 
193 good:
194 	/*
195 	 * Defer processing if no buffer space for this connection.
196 	 */
197 	so = inp->inp_socket;
198 	if (so->so_rcv.sb_cc >= so->so_rcv.sb_hiwat &&
199 	     ti->ti_len != 0 && mbstat.m_bufs < mbstat.m_lowat) {
200 /*
201 		m->m_act = (struct mbuf *)0;
202 		if ((m = tp->seg_unack) != NULL) {
203 			while (m->m_act != NULL)
204 				m = m->m_act;
205 			m->m_act = m0;
206 		} else
207 			tp->seg_unack = m0;
208 */
209 		m_freem(m0);
210 		return;
211 	}
212 
213 	/*
214 	 * Discard ip header, and do tcp input processing.
215 	 */
216 	off += sizeof (struct ip);
217 	m->m_off += off;
218 	m->m_len -= off;
219 	nstate = tp->t_state;
220 	tp->tc_flags &= ~TC_NET_KEEP;
221 #ifdef KPROF
222 	acounts[tp->t_state][INRECV]++;
223 #endif
224 #ifdef TCPDEBUG
225 	if ((tp->t_socket->so_options & SO_DEBUG) || tcpconsdebug) {
226 		tdb_setup(tp, ti, INRECV, &tdb);
227 	} else
228 		tdb.td_tod = 0;
229 #endif
230 	switch (tp->t_state) {
231 
232 	case LISTEN:
233 		tcp_sockaddr.sin_addr = ti->ti_src;
234 		tcp_sockaddr.sin_port = ti->ti_sport;
235 		if ((tiflags&TH_SYN) == 0 || in_pcbsetpeer(inp, &tcp_sockaddr)) {
236 			nstate = EFAILEC;
237 			goto done;
238 		}
239 		tp->t_template = tcp_template(tp);
240 		tcp_ctldat(tp, ti, 1);
241 		if (tp->tc_flags&TC_FIN_RCVD) {
242 			tp->t_finack = T_2ML;			/* 3 */
243 			nstate = CLOSE_WAIT;
244 		} else {
245 			tp->t_init = T_INIT / 2;		/* 4 */
246 			nstate = L_SYN_RCVD;
247 		}
248 		goto done;
249 
250 	case SYN_SENT:
251 		if (!syn_ok(tp, ti)) {
252 			nstate = EFAILEC;
253 			goto done;
254 		}
255 		tcp_ctldat(tp, ti, 1);
256 		if (tp->tc_flags&TC_FIN_RCVD) {
257 			if ((tiflags&TH_ACK) == 0)
258 				tp->t_finack = T_2ML;		/* 9 */
259 			nstate = CLOSE_WAIT;
260 			goto done;
261 		}
262 		nstate = (tiflags&TH_ACK) ? ESTAB : SYN_RCVD; /* 11:8 */
263 		goto done;
264 
265 	case SYN_RCVD:
266 	case L_SYN_RCVD:
267 		if ((tiflags&TH_ACK) == 0 ||
268 		    (tiflags&TH_ACK) && ti->ti_ackno <= tp->iss) {
269 			nstate = EFAILEC;
270 			goto done;
271 		}
272 		goto input;
273 
274 	case ESTAB:
275 	case FIN_W1:
276 	case FIN_W2:
277 	case TIME_WAIT:
278 input:
279 		tcp_ctldat(tp, ti, 1);				/* 39 */
280 		switch (tp->t_state) {
281 
282 		case ESTAB:
283 			if (tp->tc_flags&TC_FIN_RCVD)
284 				nstate = CLOSE_WAIT;
285 			break;
286 
287 		case SYN_RCVD:
288 		case L_SYN_RCVD:
289 			nstate = (tp->tc_flags&TC_FIN_RCVD) ?
290 			    CLOSE_WAIT : ESTAB;			 /* 33:5 */
291 			break;
292 
293 		case FIN_W1:
294 			j = ack_fin(tp, ti);
295 			if ((tp->tc_flags & TC_FIN_RCVD) == 0) {
296 				if (j)
297 					nstate = FIN_W2;	/* 27 */
298 				break;
299 			}
300 			tp->t_finack = T_2ML;
301 			nstate = j ? TIME_WAIT : CLOSING;	/* 28:26 */
302 			break;
303 
304 		case FIN_W2:
305 			if (tp->tc_flags&TC_FIN_RCVD) {
306 				tp->t_finack = T_2ML;		/* 29 */
307 				nstate = TIME_WAIT;
308 				break;
309 			}
310 			break;
311 		}
312 		goto done;
313 
314 	case CLOSE_WAIT:
315 		if (tiflags&TH_FIN) {
316 			if ((tiflags&TH_ACK) &&
317 			    ti->ti_ackno <= tp->seq_fin) {
318 				tcp_ctldat(tp, ti, 0);		/* 30 */
319 				tp->t_finack = T_2ML;
320 			} else
321 				(void) tcp_sndctl(tp);		/* 31 */
322 			goto done;
323 		}
324 		goto input;
325 
326 	case CLOSING:
327 		j = ack_fin(tp, ti);
328 		if (tiflags&TH_FIN) {
329 			tcp_ctldat(tp, ti, 0);
330 			tp->t_finack = T_2ML;
331 			if (j)
332 				nstate = TIME_WAIT;		/* 23 */
333 			goto done;
334 		}
335 		if (j) {
336 			if (tp->t_finack == 0)
337 				if (rcv_empty(tp)) {
338 					sorwakeup(inp->inp_socket);
339 					nstate = CLOSED;	/* 15 */
340 				} else
341 					nstate = RCV_WAIT;	/* 18 */
342 			else
343 				nstate = TIME_WAIT;
344 			goto done;
345 		}
346 		goto input;
347 
348 	case LAST_ACK:
349 		if (ack_fin(tp, ti)) {
350 			if (rcv_empty(tp)) {		/* 16 */
351 				sorwakeup(inp->inp_socket);
352 				nstate = CLOSED;
353 			} else
354 				nstate = RCV_WAIT;		/* 19 */
355 			goto done;
356 		}
357 		if (tiflags&TH_FIN) {
358 			(void) tcp_sndctl(tp);			/* 31 */
359 			goto done;
360 		}
361 		goto input;
362 
363 	case RCV_WAIT:
364 		if ((tiflags&TH_FIN) && (tiflags&TH_ACK) &&
365 		    ti->ti_ackno <= tp->seq_fin) {
366 			tcp_ctldat(tp, ti, 0);
367 			tp->t_finack = T_2ML;			/* 30 */
368 		}
369 		goto done;
370 	}
371 	panic("tcp_input");
372 done:
373 
374 	/*
375 	 * Done with state*input specific processing.
376 	 * Form trace records, free input if not needed,
377 	 * and enter new state.
378 	 */
379 #ifdef TCPDEBUG
380 	if (tdb.td_tod)
381 		tdb_stuff(&tdb, nstate);
382 #endif
383 	switch (nstate) {
384 
385 	case EFAILEC:
386 		m_freem(m);
387 		return;
388 
389 	default:
390 		tp->t_state = nstate;
391 		/* fall into ... */
392 
393 	case CLOSED:
394 		/* IF CLOSED CANT LOOK AT tc_flags */
395 		if ((tp->tc_flags&TC_NET_KEEP) == 0) {
396 			register struct mbuf *n;
397 			/* inline expansion of m_freem */
398 			while (m) {
399 				MFREE(m, n);
400 				m = n;
401 			}
402 		}
403 		return;
404 	}
405 	/* NOTREACHED */
406 
407 	/*
408 	 * Unwanted packed; free everything
409 	 * but the header and return an rst.
410 	 */
411 notwanted:
412 	m_freem(m->m_next);
413 	m->m_next = NULL;
414 	m->m_len = sizeof(struct tcpiphdr);
415 #define xchg(a,b) j=a; a=b; b=j
416 	xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr);
417 	xchg(ti->ti_dport, ti->ti_sport);
418 #undef xchg
419 	if (tiflags&TH_ACK)
420 		ti->ti_seq = ti->ti_ackno;
421 	else {
422 		ti->ti_ackno = htonl((unsigned)(ntohl(ti->ti_seq) + ti->ti_len));
423 		ti->ti_seq = 0;
424 	}
425 	ti->ti_flags = ((tiflags & TH_ACK) ? 0 : TH_ACK) | TH_RST;
426 	ti->ti_len = htons(TCPSIZE);
427 	ti->ti_off = 5;
428 	ti->ti_sum = inet_cksum(m, sizeof(struct tcpiphdr));
429 	((struct ip *)ti)->ip_len = sizeof(struct tcpiphdr);
430 	((struct ip *)ti)->ip_ttl = MAXTTL;
431 	ip_output(m);
432 	tcpstat.tcps_badsegs++;
433 }
434 
435 tcp_ctldat(tp, n0, dataok)
436 	register struct tcpcb *tp;
437 	struct tcpiphdr *n0;
438 	int dataok;
439 {
440 	register struct tcpiphdr *ti = n0;
441 	register int tiflags = ti->ti_flags;
442 	struct socket *so = tp->t_inpcb->inp_socket;
443 	seq_t past = ti->ti_seq + ti->ti_len;
444 	seq_t urgent;
445 	int sent;
446 COUNT(TCP_CTLDAT);
447 
448 	if (tiflags & TH_URG)
449 		urgent = ti->ti_seq + ti->ti_urp;
450 	tp->tc_flags &= ~(TC_ACK_DUE|TC_NEW_WINDOW);
451 /* syn */
452 	if ((tp->tc_flags&TC_SYN_RCVD) == 0 && (tiflags&TH_SYN)) {
453 		tp->irs = ti->ti_seq;
454 		tp->rcv_nxt = ti->ti_seq + 1;
455 		tp->snd_wl = tp->rcv_urp = tp->irs;
456 		tp->tc_flags |= (TC_SYN_RCVD|TC_ACK_DUE);
457 	}
458 /* ack */
459 	if ((tiflags&TH_ACK) && (tp->tc_flags&TC_SYN_RCVD) &&
460 	    ti->ti_ackno > tp->snd_una) {
461 		/*
462 		 * Reflect newly acknowledged data.
463 		 */
464 		tp->snd_una = ti->ti_ackno;
465 		if (tp->snd_una > tp->snd_nxt)
466 			tp->snd_nxt = tp->snd_una;
467 
468 		/*
469 		 * If timed msg acked, update retransmit time value.
470 		 */
471 		if ((tp->tc_flags&TC_SYN_ACKED) &&
472 		    tp->snd_una > tp->t_xmt_val) {
473 			/* NEED SMOOTHING HERE */
474 			tp->t_xmtime = (tp->t_xmt != 0 ? tp->t_xmt : T_REXMT);
475 			if (tp->t_xmtime > T_REMAX)
476 				tp->t_xmtime = T_REMAX;
477 		}
478 
479 		/*
480 		 * Remove acked data from send buf
481 		 */
482 		sbdrop(&so->so_snd, (int)(tp->snd_una - tp->snd_off));
483 		tp->snd_off = tp->snd_una;
484 		if ((tp->tc_flags&TC_SYN_ACKED) == 0 &&
485 		    (tp->snd_una > tp->iss)) {
486 			tp->tc_flags |= TC_SYN_ACKED;
487 			tp->t_init = 0;
488 		}
489 		if (tp->seq_fin != tp->iss && tp->snd_una > tp->seq_fin)
490 			tp->tc_flags &= ~TC_SND_FIN;
491 		tp->t_rexmt = 0;
492 		tp->t_rexmttl = 0;
493 		tp->tc_flags |= TC_CANCELLED;
494 		sowwakeup(tp->t_inpcb->inp_socket);
495 	}
496 /* win */
497 	if ((tp->tc_flags & TC_SYN_RCVD) && ti->ti_seq >= tp->snd_wl) {
498 		tp->snd_wl = ti->ti_seq;
499 		tp->snd_wnd = ti->ti_win;
500 		tp->tc_flags |= TC_NEW_WINDOW;
501 		tp->t_persist = 0;
502 	}
503 /* text */
504 	if (dataok && ti->ti_len) {
505 		register struct tcpiphdr *q;
506 		int overage;
507 
508 /* eol */
509 		if ((tiflags&TH_EOL)) {
510 			register struct mbuf *m;
511 			for (m = dtom(ti); m->m_next; m = m->m_next)
512 				;
513 			m->m_act = (struct mbuf *)(mtod(m, caddr_t) - 1);
514 		}
515 
516 /* text */
517 		/*
518 		 * Discard duplicate data already passed to user.
519 		 */
520 		if (SEQ_LT(ti->ti_seq, tp->rcv_nxt)) {
521 			register int i = tp->rcv_nxt - ti->ti_seq;
522 			if (i >= ti->ti_len)
523 				goto notext;
524 			ti->ti_seq += i;
525 			ti->ti_len -= i;
526 			m_adj(dtom(ti), i);
527 		}
528 
529 		/*
530 		 * Find a segment which begins after this one does.
531 		 */
532 		for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
533 		    q = (struct tcpiphdr *)q->ti_next)
534 			if (SEQ_GT(q->ti_seq, ti->ti_seq))
535 				break;
536 
537 		/*
538 		 * If there is a preceding segment, it may provide some of
539 		 * our data already.  If so, drop the data from the incoming
540 		 * segment.  If it provides all of our data, drop us.
541 		 */
542 		if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
543 			register int i;
544 			q = (struct tcpiphdr *)(q->ti_prev);
545 			/* conversion to int (in i) handles seq wraparound */
546 			i = q->ti_seq + q->ti_len - ti->ti_seq;
547 			if (i > 0) {
548 				if (i >= ti->ti_len)
549 					goto notext;
550 						/* w/o setting TC_NET_KEEP */
551 				m_adj(dtom(tp), i);
552 				ti->ti_len -= i;
553 				ti->ti_seq += i;
554 			}
555 			q = (struct tcpiphdr *)(q->ti_next);
556 		}
557 
558 		/*
559 		 * While we overlap succeeding segments trim them or,
560 		 * if they are completely covered, dequeue them.
561 		 */
562 		while (q != (struct tcpiphdr *)tp &&
563 		    SEQ_GT(ti->ti_seq + ti->ti_len, q->ti_seq)) {
564 			register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
565 			if (i < q->ti_len) {
566 				q->ti_len -= i;
567 				m_adj(dtom(q), i);
568 				break;
569 			}
570 			q = (struct tcpiphdr *)q->ti_next;
571 			m_freem(dtom(q->ti_prev));
572 			remque(q->ti_prev);
573 		}
574 
575 		/*
576 		 * Stick new segment in its place.
577 		 */
578 		insque(ti, q->ti_prev);
579 		tp->seqcnt += ti->ti_len;
580 
581 		/*
582 		 * Calculate available space and discard segments for
583 		 * which there is too much.
584 		 */
585 		overage =
586 		    (so->so_rcv.sb_cc /*XXX+tp->rcv_seqcnt*/) - so->so_rcv.sb_hiwat;
587 		if (overage > 0) {
588 			q = tp->seg_prev;
589 			for (;;) {
590 				register int i = MIN(q->ti_len, overage);
591 				overage -= i;
592 				q->ti_len -= i;
593 				m_adj(dtom(q), -i);
594 				if (q->ti_len)
595 					break;
596 				if (q == ti)
597 					panic("tcp_text dropall");
598 				q = (struct tcpiphdr *)q->ti_prev;
599 				remque(q->ti_next);
600 			}
601 		}
602 
603 		/*
604 		 * Advance rcv_next through newly completed sequence space.
605 		 */
606 		while (ti->ti_seq == tp->rcv_nxt) {
607 			tp->rcv_nxt += ti->ti_len;
608 			ti = (struct tcpiphdr *)ti->ti_next;
609 			if (ti == (struct tcpiphdr *)tp)
610 				break;
611 		}
612 /* urg */
613 		if (tiflags&TH_URG) {
614 			/* ... */
615 			if (SEQ_GT(urgent, tp->rcv_urp))
616 				tp->rcv_urp = urgent;
617 		}
618 		tp->tc_flags |= (TC_ACK_DUE|TC_NET_KEEP);
619 	}
620 notext:
621 /* fin */
622 	if ((tiflags&TH_FIN) && past == tp->rcv_nxt) {
623 		if ((tp->tc_flags&TC_FIN_RCVD) == 0) {
624 			tp->tc_flags |= TC_FIN_RCVD;
625 			sorwakeup(so);
626 			tp->rcv_nxt++;
627 		}
628 		tp->tc_flags |= TC_ACK_DUE;
629 	}
630 /* respond */
631 	sent = 0;
632 	if (tp->tc_flags&TC_ACK_DUE)
633 		sent = tcp_sndctl(tp);
634 	else if ((tp->tc_flags&TC_NEW_WINDOW))
635 		if (tp->snd_nxt <= tp->snd_off + so->so_snd.sb_cc ||
636 		    (tp->tc_flags&TC_SND_FIN))
637 			sent = tcp_send(tp);
638 
639 /* set for retrans */
640 	if (!sent && tp->snd_una < tp->snd_nxt &&
641 	    (tp->tc_flags&TC_CANCELLED)) {
642 		tp->t_rexmt = tp->t_xmtime;
643 		tp->t_rexmttl = T_REXMTTL;
644 		tp->t_rexmt_val = tp->t_rtl_val = tp->snd_lst;
645 		tp->tc_flags &= ~TC_CANCELLED;
646 	}
647 /* present data to user */
648 	if ((tp->tc_flags&TC_SYN_ACKED) == 0)
649 		return;
650 	ti = tp->seg_next;
651 	while (ti != (struct tcpiphdr *)tp && ti->ti_seq < tp->rcv_nxt) {
652 		remque(ti);
653 		sbappend(&so->so_rcv, dtom(ti));
654 		tp->seqcnt -= ti->ti_len;
655 		if (tp->seqcnt < 0)
656 			panic("tcp_input present");
657 		ti = (struct tcpiphdr *)ti->ti_next;
658 	}
659 	sorwakeup(so);
660 }
661