xref: /csrg-svn/sys/netinet/tcp_input.c (revision 4899)
1 /* tcp_input.c 1.23 81/11/15 */
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mbuf.h"
6 #include "../h/socket.h"
7 #include "../h/socketvar.h"
8 #include "../net/inet_cksum.h"
9 #include "../net/inet.h"
10 #include "../net/inet_pcb.h"
11 #include "../net/inet_systm.h"
12 #include "../net/imp.h"
13 #include "../net/inet_host.h"
14 #include "../net/ip.h"
15 #include "../net/ip_var.h"
16 #include "../net/tcp.h"
17 #include "../net/tcp_fsm.h"
18 #include "../net/tcp_var.h"
19 #include "/usr/include/errno.h"
20 
21 int	tcpcksum = 1;
22 
23 tcp_drain()
24 {
25 	register struct inpcb *ip;
26 
27 	for (ip = tcb.inp_next; ip != &tcb; ip = ip->inp_next)
28 		tcp_drainunack(intotcpcb(ip));
29 }
30 
31 tcp_drainunack(tp)
32 	register struct tcpcb *tp;
33 {
34 	register struct mbuf *m;
35 
36 	for (m = tp->seg_unack; m; m = m->m_act)
37 		m_freem(m);
38 	tp->seg_unack = 0;
39 }
40 
41 tcp_ctlinput(m)
42 	struct mbuf *m;
43 {
44 
45 	m_freem(m);
46 }
47 
48 tcp_input(mp)
49 	register struct mbuf *mp;
50 {
51 	register struct tcpiphdr *n;		/* known to be r10 */
52 	register int j;
53 	register struct tcpcb *tp;
54 	struct inpcb *inp;
55 	register int thflags;
56 	int nstate;
57 	struct mbuf *m;
58 	struct socket *so;
59 	int hlen, tlen;
60 	u_short lport, fport;
61 #ifdef TCPDEBUG
62 	struct tcp_debug tdb;
63 #endif
64 COUNT(TCP_INPUT);
65 
66 	/*
67 	 * Build extended tcp header
68 	 */
69 	n = mtod(mp, struct tcpiphdr *);
70 	thflags = n->ti_flags;
71 	tlen = ((struct ip *)n)->ip_len;
72 	n->ti_len = htons(tlen);
73 	n->ti_next = NULL;
74 	n->ti_prev = NULL;
75 	n->ti_x1 = 0;
76 	lport = ntohs(n->ti_dst);
77 	fport = ntohs(n->ti_src);
78 
79 	/* WONT BE POSSIBLE WHEN MBUFS ARE 256 BYTES */
80 	if ((hlen = n->ti_off << 2) > mp->m_len)
81 		{ printf("tcp header overflow\n"); m_freem(mp); return; }
82 
83 	if (tcpcksum) {
84 		/*
85 		 * Checksum extended header and data
86 		 */
87 		CKSUM_TCPCHK(mp, n, r10, sizeof (struct ip) + tlen);
88 		if (n->ti_sum != 0) {
89 			netstat.t_badsum++;
90 			m_freem(mp);
91 			return;
92 		}
93 	}
94 
95 	/*
96 	 * Find tcb for message.
97 	 */
98 	inp = inpcb_lookup(&tcb, &n->ti_src, fport, &n_lhost, lport);
99 	if (inp == 0)
100 		goto notwanted;
101 
102 	/*
103 	 * Byte swap header
104 	 */
105 	n->ti_len = tlen - hlen;
106 	n->ti_sport = fport;
107 	n->ti_dport = lport;
108 	n->ti_seq = ntohl(n->ti_seq);
109 	n->ti_ackno = ntohl(n->ti_ackno);
110 	n->ti_win = ntohs(n->ti_win);
111 	n->ti_urp = ntohs(n->ti_urp);
112 
113 	/*
114 	 * Check segment seq # and do rst processing
115 	 */
116 	switch (tp->t_state) {
117 
118 	case LISTEN:
119 		if ((thflags&TH_ACK) || !syn_ok(tp, n)) {
120 			tcp_sndrst(tp, n);
121 			goto badseg;
122 		}
123 		if (thflags&TH_RST)
124 			goto badseg;
125 		goto goodseg;
126 
127 	case SYN_SENT:
128 		if (!ack_ok(tp, n) || !syn_ok(tp, n)) {
129 			tcp_sndrst(tp, n);			/* 71,72,75 */
130 			goto badseg;
131 		}
132 		if (thflags&TH_RST) {
133 			tcp_error(tp, ENETRESET);
134 			tcp_detach(tp);				/* 70 */
135 			tp->t_state = CLOSED;
136 			goto badseg;
137 		}
138 		goto goodseg;
139 
140 	default:
141         	if ((thflags&TH_RST) == 0)
142 			goto common;
143 		if (n->ti_seq < tp->rcv_nxt)		/* bad rst */
144 			goto badseg;				/* 69 */
145 		switch (tp->t_state) {
146 
147 		case L_SYN_RCVD:
148 			if (ack_ok(tp, n) == 0)
149 				goto badseg;			/* 69 */
150 			tp->t_rexmt = 0;
151 			tp->t_rexmttl = 0;
152 			tp->t_persist = 0;
153 			h_free(inp->inp_fhost);
154 			inp->inp_fhost = 0;
155 			tp->t_state = LISTEN;
156 			goto badseg;
157 
158 		default:
159 			tcp_error(tp, ENETRESET);
160 			tcp_detach(tp);				/* 66 */
161 			tp->t_state = CLOSED;
162 			goto badseg;
163 		}
164 		/*NOTREACHED*/
165 
166 	case SYN_RCVD:
167 common:
168 		if (ack_ok(tp, n) == 0) {
169 			tcp_sndrst(tp, n);			/* 74 */
170 			goto badseg;
171 		}
172 		if (syn_ok(tp, n) && n->ti_seq != tp->irs) {
173 			tcp_sndnull(tp);			/* 74 */
174 			goto badseg;
175 		}
176 		goto goodseg;
177 	}
178 badseg:
179 	m_freem(mp);
180 	return;
181 
182 goodseg:
183 	/*
184 	 * Defer processing if no buffer space for this connection.
185 	 */
186 	so = inp->inp_socket;
187 	if (so->so_rcv.sb_cc >= so->so_rcv.sb_hiwat &&
188 	     n->ti_len != 0 && mbstat.m_bufs < mbstat.m_lowat) {
189 /*
190 		mp->m_act = (struct mbuf *)0;
191 		if ((m = tp->seg_unack) != NULL) {
192 			while (m->m_act != NULL)
193 				m = m->m_act;
194 			m->m_act = mp;
195 		} else
196 			tp->seg_unack = mp;
197 */
198 		m_freem(mp);
199 		return;
200 	}
201 
202 	/*
203 	 * Discard ip header, and do tcp input processing.
204 	 */
205 	hlen += sizeof(struct ip);
206 	mp->m_off += hlen;
207 	mp->m_len -= hlen;
208 	nstate = tp->t_state;
209 	tp->tc_flags &= ~TC_NET_KEEP;
210 #ifdef KPROF
211 	acounts[tp->t_state][INRECV]++;
212 #endif
213 #ifdef TCPDEBUG
214 	if ((tp->t_socket->so_options & SO_DEBUG) || tcpconsdebug) {
215 		tdb_setup(tp, n, INRECV, &tdb);
216 	} else
217 		tdb.td_tod = 0;
218 #endif
219 	switch (tp->t_state) {
220 
221 	case LISTEN:
222 		if (!syn_ok(tp, n) ||
223 		    ((inp->inp_lhost = in_hmake(&n->ti_src)) == 0)) {
224 			nstate = EFAILEC;
225 			goto done;
226 		}
227 		inp->inp_fport = n->ti_sport;
228 		tp->t_template = tcp_template(tp);
229 		tcp_ctldat(tp, n, 1);
230 		if (tp->tc_flags&TC_FIN_RCVD) {
231 			tp->t_finack = T_2ML;			/* 3 */
232 			tp->tc_flags &= ~TC_WAITED_2_ML;
233 			nstate = CLOSE_WAIT;
234 		} else {
235 			tp->t_init = T_INIT / 2;		/* 4 */
236 			nstate = L_SYN_RCVD;
237 		}
238 		goto done;
239 
240 	case SYN_SENT:
241 		if (!syn_ok(tp, n)) {
242 			nstate = EFAILEC;
243 			goto done;
244 		}
245 		tcp_ctldat(tp, n, 1);
246 		if (tp->tc_flags&TC_FIN_RCVD) {
247 			if ((thflags&TH_ACK) == 0) {
248 				tp->t_finack = T_2ML;		/* 9 */
249 				tp->tc_flags &= ~TC_WAITED_2_ML;
250 			}
251 			nstate = CLOSE_WAIT;
252 			goto done;
253 		}
254 		nstate = (thflags&TH_ACK) ? ESTAB : SYN_RCVD; /* 11:8 */
255 		goto done;
256 
257 	case SYN_RCVD:
258 	case L_SYN_RCVD:
259 		if ((thflags&TH_ACK) == 0 ||
260 		    (thflags&TH_ACK) && n->ti_ackno <= tp->iss) {
261 			nstate = EFAILEC;
262 			goto done;
263 		}
264 		goto input;
265 
266 	case ESTAB:
267 	case FIN_W1:
268 	case FIN_W2:
269 	case TIME_WAIT:
270 input:
271 		tcp_ctldat(tp, n, 1);				/* 39 */
272 		switch (tp->t_state) {
273 
274 		case ESTAB:
275 			if (tp->tc_flags&TC_FIN_RCVD)
276 				nstate = CLOSE_WAIT;
277 			break;
278 
279 		case SYN_RCVD:
280 		case L_SYN_RCVD:
281 			nstate = (tp->tc_flags&TC_FIN_RCVD) ?
282 			    CLOSE_WAIT : ESTAB;			 /* 33:5 */
283 			break;
284 
285 		case FIN_W1:
286 			j = ack_fin(tp, n);
287 			if ((tp->tc_flags & TC_FIN_RCVD) == 0) {
288 				if (j)
289 					nstate = FIN_W2;	/* 27 */
290 				break;
291 			}
292 			tp->t_finack = T_2ML;
293 			tp->tc_flags &= ~TC_WAITED_2_ML;
294 			nstate = j ? TIME_WAIT : CLOSING;	/* 28:26 */
295 			break;
296 
297 		case FIN_W2:
298 			if (tp->tc_flags&TC_FIN_RCVD) {
299 				tp->t_finack = T_2ML;		/* 29 */
300 				tp->tc_flags &= ~TC_WAITED_2_ML;
301 				nstate = TIME_WAIT;
302 				break;
303 			}
304 			break;
305 		}
306 		goto done;
307 
308 	case CLOSE_WAIT:
309 		if (thflags&TH_FIN) {
310 			if ((thflags&TH_ACK) &&
311 			    n->ti_ackno <= tp->seq_fin) {
312 				tcp_ctldat(tp, n, 0);		/* 30 */
313 				tp->t_finack = T_2ML;
314 				tp->tc_flags &= ~TC_WAITED_2_ML;
315 			} else
316 				tcp_sndctl(tp);			/* 31 */
317 			goto done;
318 		}
319 		goto input;
320 
321 	case CLOSING:
322 		j = ack_fin(tp, n);
323 		if (thflags&TH_FIN) {
324 			tcp_ctldat(tp, n, 0);
325 			tp->t_finack = T_2ML;
326 			tp->tc_flags &= ~TC_WAITED_2_ML;
327 			if (j)
328 				nstate = TIME_WAIT;		/* 23 */
329 			goto done;
330 		}
331 		if (j) {
332 			if (tp->tc_flags&TC_WAITED_2_ML)
333 				if (rcv_empty(tp)) {
334 					sorwakeup(inp->inp_socket);
335 					nstate = CLOSED;	/* 15 */
336 				} else
337 					nstate = RCV_WAIT;	/* 18 */
338 			else
339 				nstate = TIME_WAIT;
340 			goto done;
341 		}
342 		goto input;
343 
344 	case LAST_ACK:
345 		if (ack_fin(tp, n)) {
346 			if (rcv_empty(tp)) {		/* 16 */
347 				sorwakeup(inp->inp_socket);
348 				nstate = CLOSED;
349 			} else
350 				nstate = RCV_WAIT;		/* 19 */
351 			goto done;
352 		}
353 		if (thflags&TH_FIN) {
354 			tcp_sndctl(tp);				/* 31 */
355 			goto done;
356 		}
357 		goto input;
358 
359 	case RCV_WAIT:
360 		if ((thflags&TH_FIN) && (thflags&TH_ACK) &&
361 		    n->ti_ackno <= tp->seq_fin) {
362 			tcp_ctldat(tp, n, 0);
363 			tp->t_finack = T_2ML;
364 			tp->tc_flags &= ~TC_WAITED_2_ML;	/* 30 */
365 		}
366 		goto done;
367 	}
368 	panic("tcp_input");
369 done:
370 
371 	/*
372 	 * Done with state*input specific processing.
373 	 * Form trace records, free input if not needed,
374 	 * and enter new state.
375 	 */
376 #ifdef TCPDEBUG
377 	if (tdb.td_tod)
378 		tdb_stuff(&tdb, nstate);
379 #endif
380 	switch (nstate) {
381 
382 	case EFAILEC:
383 		m_freem(mp);
384 		return;
385 
386 	default:
387 		tp->t_state = nstate;
388 		/* fall into ... */
389 
390 	case CLOSED:
391 		/* IF CLOSED CANT LOOK AT tc_flags */
392 		if ((tp->tc_flags&TC_NET_KEEP) == 0)
393 			/* inline expansion of m_freem */
394 			while (mp) {
395 				MFREE(mp, m);
396 				mp = m;
397 			}
398 		return;
399 	}
400 	/* NOTREACHED */
401 
402 	/*
403 	 * Unwanted packed; free everything
404 	 * but the header and return an rst.
405 	 */
406 notwanted:
407 	m_freem(mp->m_next);
408 	mp->m_next = NULL;
409 	mp->m_len = sizeof(struct tcpiphdr);
410 #define xchg(a,b) j=a; a=b; b=j
411 	xchg(n->ti_dst.s_addr, n->ti_src.s_addr);
412 	xchg(n->ti_dport, n->ti_sport);
413 #undef xchg
414 	if (thflags&TH_ACK)
415 		n->ti_seq = n->ti_ackno;
416 	else {
417 		n->ti_ackno = htonl(ntohl(n->ti_seq) + tlen - hlen);
418 		n->ti_seq = 0;
419 	}
420 	n->ti_flags = ((thflags & TH_ACK) ? 0 : TH_ACK) | TH_RST;
421 	n->ti_len = htons(TCPSIZE);
422 	n->ti_off = 5;
423 	n->ti_sum = inet_cksum(mp, sizeof(struct tcpiphdr));
424 	((struct ip *)n)->ip_len = sizeof(struct tcpiphdr);
425 	ip_output(mp);
426 	netstat.t_badsegs++;
427 }
428 
429 tcp_ctldat(tp, n0, dataok)
430 	register struct tcpcb *tp;
431 	struct tcpiphdr *n0;
432 	int dataok;
433 {
434 	register struct mbuf *m;
435 	register struct tcpiphdr *n = n0;
436 	register int thflags = n->ti_flags;
437 	struct socket *so = tp->t_inpcb->inp_socket;
438 	seq_t past = n->ti_seq + n->ti_len;
439 	seq_t urgent;
440 	int sent;
441 COUNT(TCP_CTLDAT);
442 
443 	if (thflags & TH_URG)
444 		urgent = n->ti_seq + n->ti_urp;
445 	tp->tc_flags &= ~(TC_ACK_DUE|TC_NEW_WINDOW);
446 /* syn */
447 	if ((tp->tc_flags&TC_SYN_RCVD) == 0 && (thflags&TH_SYN)) {
448 		tp->irs = n->ti_seq;
449 		tp->rcv_nxt = n->ti_seq + 1;
450 		tp->snd_wl = tp->rcv_urp = tp->irs;
451 		tp->tc_flags |= (TC_SYN_RCVD|TC_ACK_DUE);
452 	}
453 /* ack */
454 	if ((thflags&TH_ACK) && (tp->tc_flags&TC_SYN_RCVD) &&
455 	    n->ti_ackno > tp->snd_una) {
456 		register struct mbuf *mn;
457 
458 		/*
459 		 * Reflect newly acknowledged data.
460 		 */
461 		tp->snd_una = n->ti_ackno;
462 		if (tp->snd_una > tp->snd_nxt)
463 			tp->snd_nxt = tp->snd_una;
464 
465 		/*
466 		 * If timed msg acked, update retransmit time value.
467 		 */
468 		if ((tp->tc_flags&TC_SYN_ACKED) &&
469 		    tp->snd_una > tp->t_xmt_val) {
470 			/* NEED SMOOTHING HERE */
471 			tp->t_xmtime = (tp->t_xmt != 0 ? tp->t_xmt : T_REXMT);
472 			if (tp->t_xmtime > T_REMAX)
473 				tp->t_xmtime = T_REMAX;
474 		}
475 
476 		/*
477 		 * Remove acked data from send buf
478 		 */
479 		sbdrop(&so->so_snd, tp->snd_una - tp->snd_off);
480 		tp->snd_off = tp->snd_una;
481 		if ((tp->tc_flags&TC_SYN_ACKED) == 0 &&
482 		    (tp->snd_una > tp->iss)) {
483 			tp->tc_flags |= TC_SYN_ACKED;
484 			tp->t_init = 0;
485 		}
486 		if (tp->seq_fin != tp->iss && tp->snd_una > tp->seq_fin)
487 			tp->tc_flags &= ~TC_SND_FIN;
488 		tp->t_rexmt = 0;
489 		tp->t_rexmttl = 0;
490 		tp->tc_flags |= TC_CANCELLED;
491 		sowwakeup(tp->t_inpcb->inp_socket);
492 	}
493 /* win */
494 	if ((tp->tc_flags & TC_SYN_RCVD) && n->ti_seq >= tp->snd_wl) {
495 		tp->snd_wl = n->ti_seq;
496 		tp->snd_wnd = n->ti_win;
497 		tp->tc_flags |= TC_NEW_WINDOW;
498 		tp->t_persist = 0;
499 	}
500 /* text */
501 	if (dataok && n->ti_len) {
502 		register struct tcpiphdr *p, *q;
503 		int overage;
504 
505 /* eol */
506 		if ((thflags&TH_EOL)) {
507 			register struct mbuf *m;
508 			for (m = dtom(n); m->m_next; m = m->m_next)
509 				;
510 			m->m_act = (struct mbuf *)(mtod(m, caddr_t) - 1);
511 		}
512 
513 /* text */
514 		/*
515 		 * Discard duplicate data already passed to user.
516 		 */
517 		if (SEQ_LT(n->ti_seq, tp->rcv_nxt)) {
518 			register int i = tp->rcv_nxt - n->ti_seq;
519 			if (i >= n->ti_len)
520 				goto notext;
521 			n->ti_seq += i;
522 			n->ti_len -= i;
523 			m_adj(dtom(n), i);
524 		}
525 
526 		/*
527 		 * Find a segment which begins after this one does.
528 		 */
529 		for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
530 		    q = (struct tcpiphdr *)q->ti_next)
531 			if (SEQ_GT(q->ti_seq, n->ti_seq))
532 				break;
533 
534 		/*
535 		 * If there is a preceding segment, it may provide some of
536 		 * our data already.  If so, drop the data from the incoming
537 		 * segment.  If it provides all of our data, drop us.
538 		 */
539 		if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
540 			register int i;
541 			q = (struct tcpiphdr *)(q->ti_prev);
542 			/* conversion to int (in i) handles seq wraparound */
543 			i = q->ti_seq + q->ti_len - n->th_seq;
544 			if (i > 0) {
545 				if (i >= n->ti_len)
546 					goto notext;
547 						/* w/o setting TC_NET_KEEP */
548 				m_adj(dtom(tp), i);
549 				n->ti_len -= i;
550 				n->ti_seq += i;
551 			}
552 			q = (struct tcpiphdr *)(q->ti_next);
553 		}
554 
555 		/*
556 		 * While we overlap succeeding segments trim them or,
557 		 * if they are completely covered, dequeue them.
558 		 */
559 		while (q != (struct tcpiphdr *)tp &&
560 		    SEQ_GT(n->ti_seq + n->ti_len, q->ti_seq)) {
561 			register int i = (n->ti_seq + n->ti_len) - q->ti_seq;
562 			if (i < q->ti_len) {
563 				q->ti_len -= i;
564 				m_adj(dtom(q), i);
565 				break;
566 			}
567 			q = (struct tcpiphdr *)q->ti_next;
568 			m_freem(dtom(q->ti_prev));
569 			remque(q->ti_prev);
570 		}
571 
572 		/*
573 		 * Stick new segment in its place.
574 		 */
575 		insque(n, q->ti_prev);
576 		tp->seqcnt += n->ti_len;
577 
578 		/*
579 		 * Calculate available space and discard segments for
580 		 * which there is too much.
581 		 */
582 		overage =
583 		    (so->so_rcv.sb_cc /*XXX+tp->rcv_seqcnt*/) - so->so_rcv.sb_hiwat;
584 		if (overage > 0) {
585 			q = tp->seg_prev;
586 			for (;;) {
587 				register int i = MIN(q->ti_len, overage);
588 				overage -= i;
589 				q->ti_len -= i;
590 				m_adj(q, -i);
591 				if (q->ti_len)
592 					break;
593 				if (q == n)
594 					panic("tcp_text dropall");
595 				q = (struct tcpiphdr *)q->ti_prev;
596 				remque(q->ti_next);
597 			}
598 		}
599 
600 		/*
601 		 * Advance rcv_next through newly completed sequence space.
602 		 */
603 		while (n->ti_seq == tp->rcv_nxt) {
604 			tp->rcv_nxt += n->ti_len;
605 			n = (struct tcpiphdr *)n->ti_next;
606 			if (n == (struct tcpiphdr *)tp)
607 				break;
608 		}
609 /* urg */
610 		if (thflags&TH_URG) {
611 			/* ... */
612 			if (SEQ_GT(urgent, tp->rcv_urp))
613 				tp->rcv_urp = urgent;
614 		}
615 		tp->tc_flags |= (TC_ACK_DUE|TC_NET_KEEP);
616 	}
617 notext:
618 /* fin */
619 	if ((thflags&TH_FIN) && past == tp->rcv_nxt) {
620 		if ((tp->tc_flags&TC_FIN_RCVD) == 0) {
621 			tp->tc_flags |= TC_FIN_RCVD;
622 			sorwakeup(so);
623 			tp->rcv_nxt++;
624 		}
625 		tp->tc_flags |= TC_ACK_DUE;
626 	}
627 /* respond */
628 	sent = 0;
629 	if (tp->tc_flags&TC_ACK_DUE)
630 		sent = tcp_sndctl(tp);
631 	else if ((tp->tc_flags&TC_NEW_WINDOW))
632 		if (tp->snd_nxt <= tp->snd_off + so->so_snd.sb_cc ||
633 		    (tp->tc_flags&TC_SND_FIN))
634 			sent = tcp_send(tp);
635 
636 /* set for retrans */
637 	if (!sent && tp->snd_una < tp->snd_nxt &&
638 	    (tp->tc_flags&TC_CANCELLED)) {
639 		tp->t_rexmt = tp->t_xmtime;
640 		tp->t_rexmttl = T_REXMTTL;
641 		tp->t_rexmt_val = tp->t_rtl_val = tp->snd_lst;
642 		tp->tc_flags &= ~TC_CANCELLED;
643 	}
644 /* present data to user */
645 	if ((tp->tc_flags&TC_SYN_ACKED) == 0)
646 		return;
647 	n = tp->seg_next;
648 	while (n != (struct tcpiphdr *)tp && n->ti_seq < tp->rcv_nxt) {
649 		remque(n);
650 		sbappend(so->so_rcv, dtom(n));
651 		tp->seqcnt -= n->ti_len;
652 		if (tp->seqcnt < 0)
653 			panic("tcp_input present");
654 		n = (struct tcpiphdr *)n->ti_next;
655 	}
656 	sorwakeup(so);
657 }
658