xref: /csrg-svn/sys/netinet/tcp_input.c (revision 4884)
1 /* tcp_input.c 1.22 81/11/14 */
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mbuf.h"
6 #include "../h/socket.h"
7 #include "../h/socketvar.h"
8 #include "../net/inet_cksum.h"
9 #include "../net/inet.h"
10 #include "../net/inet_pcb.h"
11 #include "../net/inet_systm.h"
12 #include "../net/imp.h"
13 #include "../net/inet_host.h"
14 #include "../net/ip.h"
15 #include "../net/tcp.h"
16 #include "../net/tcp_fsm.h"
17 #include "../net/tcp_var.h"
18 #include "/usr/include/errno.h"
19 
20 int	tcpcksum = 1;
21 
22 tcp_drain()
23 {
24 	register struct inpcb *ip;
25 
26 	for (ip = tcb.inp_next; ip != &tcb; ip = ip->inp_next)
27 		tcp_drainunack(intotcpcb(ip));
28 }
29 
30 tcp_drainunack(tp)
31 	register struct tcpcb *tp;
32 {
33 	register struct mbuf *m;
34 
35 	for (m = tp->seg_unack; m; m = m->m_act)
36 		m_freem(m);
37 	tp->seg_unack = 0;
38 }
39 
40 tcp_ctlinput(m)
41 	struct mbuf *m;
42 {
43 
44 	m_freem(m);
45 }
46 
47 tcp_input(mp)
48 	register struct mbuf *mp;
49 {
50 	register struct tcpiphdr *n;		/* known to be r10 */
51 	register int j;
52 	register struct tcpcb *tp;
53 	struct inpcb *inp;
54 	register int thflags;
55 	int nstate;
56 	struct mbuf *m;
57 	struct socket *so;
58 	int hlen, tlen;
59 	u_short lport, fport;
60 #ifdef TCPDEBUG
61 	struct tcp_debug tdb;
62 #endif
63 COUNT(TCP_INPUT);
64 
65 	/*
66 	 * Build extended tcp header
67 	 */
68 	n = mtod(mp, struct tcpiphdr *);
69 	thflags = n->th_flags;
70 	tlen = ((struct ip *)n)->ip_len;
71 	n->t_len = htons(tlen);
72 	n->t_next = NULL;
73 	n->t_prev = NULL;
74 	n->t_x1 = 0;
75 	lport = ntohs(n->t_dst);
76 	fport = ntohs(n->t_src);
77 
78 	/* WONT BE POSSIBLE WHEN MBUFS ARE 256 BYTES */
79 	if ((hlen = n->t_off << 2) > mp->m_len)
80 		{ printf("tcp header overflow\n"); m_freem(mp); return; }
81 
82 	if (tcpcksum) {
83 		/*
84 		 * Checksum extended header and data
85 		 */
86 		CKSUM_TCPCHK(mp, n, r10, sizeof (struct ip) + tlen);
87 		if (n->t_sum != 0) {
88 			netstat.t_badsum++;
89 			m_freem(mp);
90 			return;
91 		}
92 	}
93 
94 	/*
95 	 * Find tcb for message.
96 	 */
97 	inp = in_pcblookup(&tcb, &n->t_s, fport, &n_lhost, lport);
98 	if (inp == 0)
99 		goto notwanted;
100 
101 	/*
102 	 * Byte swap header
103 	 */
104 	n->t_len = tlen - hlen;
105 	n->t_src = fport;
106 	n->t_dst = lport;
107 	n->t_seq = ntohl(n->t_seq);
108 	n->t_ackno = ntohl(n->t_ackno);
109 	n->t_win = ntohs(n->t_win);
110 	n->t_urp = ntohs(n->t_urp);
111 
112 	/*
113 	 * Check segment seq # and do rst processing
114 	 */
115 	switch (tp->t_state) {
116 
117 	case LISTEN:
118 		if ((thflags&TH_ACK) || !syn_ok(tp, n)) {
119 			tcp_sndrst(tp, n);
120 			goto badseg;
121 		}
122 		if (thflags&TH_RST)
123 			goto badseg;
124 		goto goodseg;
125 
126 	case SYN_SENT:
127 		if (!ack_ok(tp, n) || !syn_ok(tp, n)) {
128 			tcp_sndrst(tp, n);			/* 71,72,75 */
129 			goto badseg;
130 		}
131 		if (thflags&TH_RST) {
132 			tcp_error(tp, ENETRESET);
133 			tcp_detach(tp);				/* 70 */
134 			tp->t_state = CLOSED;
135 			goto badseg;
136 		}
137 		goto goodseg;
138 
139 	default:
140         	if ((thflags&TH_RST) == 0)
141 			goto common;
142 		if (n->t_seq < tp->rcv_nxt)		/* bad rst */
143 			goto badseg;				/* 69 */
144 		switch (tp->t_state) {
145 
146 		case L_SYN_RCVD:
147 			if (ack_ok(tp, n) == 0)
148 				goto badseg;			/* 69 */
149 			tp->t_rexmt = 0;
150 			tp->t_rexmttl = 0;
151 			tp->t_persist = 0;
152 			h_free(inp->inp_fhost);
153 			inp->inp_fhost = 0;
154 			tp->t_state = LISTEN;
155 			goto badseg;
156 
157 		default:
158 			tcp_error(tp, ENETRESET);
159 			tcp_detach(tp);				/* 66 */
160 			tp->t_state = CLOSED;
161 			goto badseg;
162 		}
163 		/*NOTREACHED*/
164 
165 	case SYN_RCVD:
166 common:
167 		if (ack_ok(tp, n) == 0) {
168 			tcp_sndrst(tp, n);			/* 74 */
169 			goto badseg;
170 		}
171 		if (syn_ok(tp, n) && n->t_seq != tp->irs) {
172 			tcp_sndnull(tp);			/* 74 */
173 			goto badseg;
174 		}
175 		goto goodseg;
176 	}
177 badseg:
178 	m_freem(mp);
179 	return;
180 
181 goodseg:
182 	/*
183 	 * Defer processing if no buffer space for this connection.
184 	 */
185 	so = inp->inp_socket;
186 	if (so->so_rcv.sb_cc >= so->so_rcv.sb_hiwat &&
187 	     n->t_len != 0 && mbstat.m_bufs < mbstat.m_lowat) {
188 /*
189 		mp->m_act = (struct mbuf *)0;
190 		if ((m = tp->seg_unack) != NULL) {
191 			while (m->m_act != NULL)
192 				m = m->m_act;
193 			m->m_act = mp;
194 		} else
195 			tp->seg_unack = mp;
196 */
197 		m_freem(mp);
198 		return;
199 	}
200 
201 	/*
202 	 * Discard ip header, and do tcp input processing.
203 	 */
204 	hlen += sizeof(struct ip);
205 	mp->m_off += hlen;
206 	mp->m_len -= hlen;
207 	nstate = tp->t_state;
208 	tp->tc_flags &= ~TC_NET_KEEP;
209 #ifdef KPROF
210 	acounts[tp->t_state][INRECV]++;
211 #endif
212 #ifdef TCPDEBUG
213 	if ((tp->t_socket->so_options & SO_DEBUG) || tcpconsdebug) {
214 		tdb_setup(tp, n, INRECV, &tdb);
215 	} else
216 		tdb.td_tod = 0;
217 #endif
218 	switch (tp->t_state) {
219 
220 	case LISTEN:
221 		if (!syn_ok(tp, n) ||
222 		    ((inp->inp_lhost = in_hmake(&n->t_s)) == 0)) {
223 			nstate = EFAILEC;
224 			goto done;
225 		}
226 		inp->inp_fport = n->t_src;
227 		tp->t_template = tcp_template(tp);
228 		tcp_ctldat(tp, n, 1);
229 		if (tp->tc_flags&TC_FIN_RCVD) {
230 			tp->t_finack = T_2ML;			/* 3 */
231 			tp->tc_flags &= ~TC_WAITED_2_ML;
232 			nstate = CLOSE_WAIT;
233 		} else {
234 			tp->t_init = T_INIT / 2;		/* 4 */
235 			nstate = L_SYN_RCVD;
236 		}
237 		goto done;
238 
239 	case SYN_SENT:
240 		if (!syn_ok(tp, n)) {
241 			nstate = EFAILEC;
242 			goto done;
243 		}
244 		tcp_ctldat(tp, n, 1);
245 		if (tp->tc_flags&TC_FIN_RCVD) {
246 			if ((thflags&TH_ACK) == 0) {
247 				tp->t_finack = T_2ML;		/* 9 */
248 				tp->tc_flags &= ~TC_WAITED_2_ML;
249 			}
250 			nstate = CLOSE_WAIT;
251 			goto done;
252 		}
253 		nstate = (thflags&TH_ACK) ? ESTAB : SYN_RCVD; /* 11:8 */
254 		goto done;
255 
256 	case SYN_RCVD:
257 	case L_SYN_RCVD:
258 		if ((thflags&TH_ACK) == 0 ||
259 		    (thflags&TH_ACK) && n->t_ackno <= tp->iss) {
260 			nstate = EFAILEC;
261 			goto done;
262 		}
263 		goto input;
264 
265 	case ESTAB:
266 	case FIN_W1:
267 	case FIN_W2:
268 	case TIME_WAIT:
269 input:
270 		tcp_ctldat(tp, n, 1);				/* 39 */
271 		switch (tp->t_state) {
272 
273 		case ESTAB:
274 			if (tp->tc_flags&TC_FIN_RCVD)
275 				nstate = CLOSE_WAIT;
276 			break;
277 
278 		case SYN_RCVD:
279 		case L_SYN_RCVD:
280 			nstate = (tp->tc_flags&TC_FIN_RCVD) ?
281 			    CLOSE_WAIT : ESTAB;			 /* 33:5 */
282 			break;
283 
284 		case FIN_W1:
285 			j = ack_fin(tp, n);
286 			if ((tp->tc_flags & TC_FIN_RCVD) == 0) {
287 				if (j)
288 					nstate = FIN_W2;	/* 27 */
289 				break;
290 			}
291 			tp->t_finack = T_2ML;
292 			tp->tc_flags &= ~TC_WAITED_2_ML;
293 			nstate = j ? TIME_WAIT : CLOSING;	/* 28:26 */
294 			break;
295 
296 		case FIN_W2:
297 			if (tp->tc_flags&TC_FIN_RCVD) {
298 				tp->t_finack = T_2ML;		/* 29 */
299 				tp->tc_flags &= ~TC_WAITED_2_ML;
300 				nstate = TIME_WAIT;
301 				break;
302 			}
303 			break;
304 		}
305 		goto done;
306 
307 	case CLOSE_WAIT:
308 		if (thflags&TH_FIN) {
309 			if ((thflags&TH_ACK) &&
310 			    n->t_ackno <= tp->seq_fin) {
311 				tcp_ctldat(tp, n, 0);		/* 30 */
312 				tp->t_finack = T_2ML;
313 				tp->tc_flags &= ~TC_WAITED_2_ML;
314 			} else
315 				tcp_sndctl(tp);			/* 31 */
316 			goto done;
317 		}
318 		goto input;
319 
320 	case CLOSING:
321 		j = ack_fin(tp, n);
322 		if (thflags&TH_FIN) {
323 			tcp_ctldat(tp, n, 0);
324 			tp->t_finack = T_2ML;
325 			tp->tc_flags &= ~TC_WAITED_2_ML;
326 			if (j)
327 				nstate = TIME_WAIT;		/* 23 */
328 			goto done;
329 		}
330 		if (j) {
331 			if (tp->tc_flags&TC_WAITED_2_ML)
332 				if (rcv_empty(tp)) {
333 					sorwakeup(inp->inp_socket);
334 					nstate = CLOSED;	/* 15 */
335 				} else
336 					nstate = RCV_WAIT;	/* 18 */
337 			else
338 				nstate = TIME_WAIT;
339 			goto done;
340 		}
341 		goto input;
342 
343 	case LAST_ACK:
344 		if (ack_fin(tp, n)) {
345 			if (rcv_empty(tp)) {		/* 16 */
346 				sorwakeup(inp->inp_socket);
347 				nstate = CLOSED;
348 			} else
349 				nstate = RCV_WAIT;		/* 19 */
350 			goto done;
351 		}
352 		if (thflags&TH_FIN) {
353 			tcp_sndctl(tp);				/* 31 */
354 			goto done;
355 		}
356 		goto input;
357 
358 	case RCV_WAIT:
359 		if ((thflags&TH_FIN) && (thflags&TH_ACK) &&
360 		    n->t_ackno <= tp->seq_fin) {
361 			tcp_ctldat(tp, n, 0);
362 			tp->t_finack = T_2ML;
363 			tp->tc_flags &= ~TC_WAITED_2_ML;	/* 30 */
364 		}
365 		goto done;
366 	}
367 	panic("tcp_input");
368 done:
369 
370 	/*
371 	 * Done with state*input specific processing.
372 	 * Form trace records, free input if not needed,
373 	 * and enter new state.
374 	 */
375 #ifdef TCPDEBUG
376 	if (tdb.td_tod)
377 		tdb_stuff(&tdb, nstate);
378 #endif
379 	switch (nstate) {
380 
381 	case EFAILEC:
382 		m_freem(mp);
383 		return;
384 
385 	default:
386 		tp->t_state = nstate;
387 		/* fall into ... */
388 
389 	case CLOSED:
390 		/* IF CLOSED CANT LOOK AT tc_flags */
391 		if ((tp->tc_flags&TC_NET_KEEP) == 0)
392 			/* inline expansion of m_freem */
393 			while (mp) {
394 				MFREE(mp, m);
395 				mp = m;
396 			}
397 		return;
398 	}
399 	/* NOTREACHED */
400 
401 	/*
402 	 * Unwanted packed; free everything
403 	 * but the header and return an rst.
404 	 */
405 notwanted:
406 	m_freem(mp->m_next);
407 	mp->m_next = NULL;
408 	mp->m_len = sizeof(struct tcpiphdr);
409 #define xchg(a,b) j=a; a=b; b=j
410 	xchg(n->t_d.s_addr, n->t_s.s_addr); xchg(n->t_dst, n->t_src);
411 #undef xchg
412 	if (thflags&TH_ACK)
413 		n->t_seq = n->t_ackno;
414 	else {
415 		n->t_ackno = htonl(ntohl(n->t_seq) + tlen - hlen);
416 		n->t_seq = 0;
417 	}
418 	n->th_flags = ((thflags & TH_ACK) ? 0 : TH_ACK) | TH_RST;
419 	n->t_len = htons(TCPSIZE);
420 	n->t_off = 5;
421 	n->t_sum = inet_cksum(mp, sizeof(struct tcpiphdr));
422 	((struct ip *)n)->ip_len = sizeof(struct tcpiphdr);
423 	ip_output(mp);
424 	netstat.t_badsegs++;
425 }
426 
427 tcp_ctldat(tp, n0, dataok)
428 	register struct tcpcb *tp;
429 	struct tcpiphdr *n0;
430 	int dataok;
431 {
432 	register struct mbuf *m;
433 	register struct tcpiphdr *n = n0;
434 	register int thflags = n->th_flags;
435 	struct socket *so = tp->t_inpcb->inp_socket;
436 	seq_t past = n->t_seq + n->t_len;
437 	seq_t urgent;
438 	int sent;
439 COUNT(TCP_CTLDAT);
440 
441 	if (thflags & TH_URG)
442 		urgent = n->t_seq + n->t_urp;
443 	tp->tc_flags &= ~(TC_ACK_DUE|TC_NEW_WINDOW);
444 /* syn */
445 	if ((tp->tc_flags&TC_SYN_RCVD) == 0 && (thflags&TH_SYN)) {
446 		tp->irs = n->t_seq;
447 		tp->rcv_nxt = n->t_seq + 1;
448 		tp->snd_wl = tp->rcv_urp = tp->irs;
449 		tp->tc_flags |= (TC_SYN_RCVD|TC_ACK_DUE);
450 	}
451 /* ack */
452 	if ((thflags&TH_ACK) && (tp->tc_flags&TC_SYN_RCVD) &&
453 	    n->t_ackno > tp->snd_una) {
454 		register struct mbuf *mn;
455 
456 		/*
457 		 * Reflect newly acknowledged data.
458 		 */
459 		tp->snd_una = n->t_ackno;
460 		if (tp->snd_una > tp->snd_nxt)
461 			tp->snd_nxt = tp->snd_una;
462 
463 		/*
464 		 * If timed msg acked, update retransmit time value.
465 		 */
466 		if ((tp->tc_flags&TC_SYN_ACKED) &&
467 		    tp->snd_una > tp->t_xmt_val) {
468 			/* NEED SMOOTHING HERE */
469 			tp->t_xmtime = (tp->t_xmt != 0 ? tp->t_xmt : T_REXMT);
470 			if (tp->t_xmtime > T_REMAX)
471 				tp->t_xmtime = T_REMAX;
472 		}
473 
474 		/*
475 		 * Remove acked data from send buf
476 		 */
477 		sbdrop(&so->so_snd, tp->snd_una - tp->snd_off);
478 		tp->snd_off = tp->snd_una;
479 		if ((tp->tc_flags&TC_SYN_ACKED) == 0 &&
480 		    (tp->snd_una > tp->iss)) {
481 			tp->tc_flags |= TC_SYN_ACKED;
482 			tp->t_init = 0;
483 		}
484 		if (tp->seq_fin != tp->iss && tp->snd_una > tp->seq_fin)
485 			tp->tc_flags &= ~TC_SND_FIN;
486 		tp->t_rexmt = 0;
487 		tp->t_rexmttl = 0;
488 		tp->tc_flags |= TC_CANCELLED;
489 		sowwakeup(tp->t_inpcb->inp_socket);
490 	}
491 /* win */
492 	if ((tp->tc_flags & TC_SYN_RCVD) && n->t_seq >= tp->snd_wl) {
493 		tp->snd_wl = n->t_seq;
494 		tp->snd_wnd = n->t_win;
495 		tp->tc_flags |= TC_NEW_WINDOW;
496 		tp->t_persist = 0;
497 	}
498 /* text */
499 	if (dataok && n->t_len) {
500 		register struct tcpiphdr *p, *q;
501 		int overage;
502 
503 /* eol */
504 		if ((thflags&TH_EOL)) {
505 			register struct mbuf *m;
506 			for (m = dtom(n); m->m_next; m = m->m_next)
507 				;
508 			m->m_act = (struct mbuf *)(mtod(m, caddr_t) - 1);
509 		}
510 
511 /* text */
512 		/*
513 		 * Discard duplicate data already passed to user.
514 		 */
515 		if (SEQ_LT(n->t_seq, tp->rcv_nxt)) {
516 			register int i = tp->rcv_nxt - n->t_seq;
517 			if (i >= n->t_len)
518 				goto notext;
519 			n->t_seq += i;
520 			n->t_len -= i;
521 			m_adj(dtom(n), i);
522 		}
523 
524 		/*
525 		 * Find a segment which begins after this one does.
526 		 */
527 		for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
528 		    q = q->t_next)
529 			if (SEQ_GT(q->t_seq, n->t_seq))
530 				break;
531 
532 		/*
533 		 * If there is a preceding segment, it may provide some of
534 		 * our data already.  If so, drop the data from the incoming
535 		 * segment.  If it provides all of our data, drop us.
536 		 */
537 		if (q->t_prev != (struct tcpiphdr *)tp) {
538 			/* conversion to int (in i) handles seq wraparound */
539 			register int i =
540 			    q->t_prev->t_seq + q->t_prev->t_len - n->t_seq;
541 			if (i > 0) {
542 				if (i >= n->t_len)
543 					goto notext;
544 						/* w/o setting TC_NET_KEEP */
545 				m_adj(dtom(tp), i);
546 				n->t_len -= i;
547 				n->t_seq += i;
548 			}
549 		}
550 
551 		/*
552 		 * While we overlap succeeding segments trim them or,
553 		 * if they are completely covered, dequeue them.
554 		 */
555 		while (q != (struct tcpiphdr *)tp &&
556 		    SEQ_GT(n->t_seq + n->t_len, q->t_seq)) {
557 			register int i = (n->t_seq + n->t_len) - q->t_seq;
558 			if (i < q->t_len) {
559 				q->t_len -= i;
560 				m_adj(dtom(q), i);
561 				break;
562 			}
563 			q = q->t_next;
564 			m_freem(dtom(q->t_prev));
565 			remque(q->t_prev);
566 		}
567 
568 		/*
569 		 * Stick new segment in its place.
570 		 */
571 		insque(n, q->t_prev);
572 		tp->seqcnt += n->t_len;
573 
574 		/*
575 		 * Calculate available space and discard segments for
576 		 * which there is too much.
577 		 */
578 		overage =
579 		    (so->so_rcv.sb_cc /*XXX+tp->rcv_seqcnt*/) - so->so_rcv.sb_hiwat;
580 		if (overage > 0) {
581 			q = tp->seg_prev;
582 			for (;;) {
583 				register int i = MIN(q->t_len, overage);
584 				overage -= i;
585 				q->t_len -= i;
586 				m_adj(q, -i);
587 				if (q->t_len)
588 					break;
589 				if (q == n)
590 					panic("tcp_text dropall");
591 				q = q->t_prev;
592 				remque(q->t_next);
593 			}
594 		}
595 
596 		/*
597 		 * Advance rcv_next through newly completed sequence space.
598 		 */
599 		while (n->t_seq == tp->rcv_nxt) {
600 			tp->rcv_nxt += n->t_len;
601 			n = n->t_next;
602 			if (n == (struct tcpiphdr *)tp)
603 				break;
604 		}
605 /* urg */
606 		if (thflags&TH_URG) {
607 			/* ... */
608 			if (SEQ_GT(urgent, tp->rcv_urp))
609 				tp->rcv_urp = urgent;
610 		}
611 		tp->tc_flags |= (TC_ACK_DUE|TC_NET_KEEP);
612 	}
613 notext:
614 /* fin */
615 	if ((thflags&TH_FIN) && past == tp->rcv_nxt) {
616 		if ((tp->tc_flags&TC_FIN_RCVD) == 0) {
617 			tp->tc_flags |= TC_FIN_RCVD;
618 			sorwakeup(so);
619 			tp->rcv_nxt++;
620 		}
621 		tp->tc_flags |= TC_ACK_DUE;
622 	}
623 /* respond */
624 	sent = 0;
625 	if (tp->tc_flags&TC_ACK_DUE)
626 		sent = tcp_sndctl(tp);
627 	else if ((tp->tc_flags&TC_NEW_WINDOW))
628 		if (tp->snd_nxt <= tp->snd_off + so->so_snd.sb_cc ||
629 		    (tp->tc_flags&TC_SND_FIN))
630 			sent = tcp_send(tp);
631 
632 /* set for retrans */
633 	if (!sent && tp->snd_una < tp->snd_nxt &&
634 	    (tp->tc_flags&TC_CANCELLED)) {
635 		tp->t_rexmt = tp->t_xmtime;
636 		tp->t_rexmttl = T_REXMTTL;
637 		tp->t_rexmt_val = tp->t_rtl_val = tp->snd_lst;
638 		tp->tc_flags &= ~TC_CANCELLED;
639 	}
640 /* present data to user */
641 	if ((tp->tc_flags&TC_SYN_ACKED) == 0)
642 		return;
643 	n = tp->seg_next;
644 	while (n != (struct tcpiphdr *)tp && n->t_seq < tp->rcv_nxt) {
645 		remque(n);
646 		sbappend(so->so_rcv, dtom(n));
647 		tp->seqcnt -= n->t_len;
648 		if (tp->seqcnt < 0)
649 			panic("tcp_input present");
650 		n = n->t_next;
651 	}
652 	sorwakeup(so);
653 }
654