xref: /csrg-svn/sys/netinet/tcp_input.c (revision 4909)
1 /* tcp_input.c 1.24 81/11/16 */
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mbuf.h"
6 #include "../h/socket.h"
7 #include "../h/socketvar.h"
8 #include "../net/inet_cksum.h"
9 #include "../net/inet.h"
10 #include "../net/inet_pcb.h"
11 #include "../net/inet_systm.h"
12 #include "../net/imp.h"
13 #include "../net/inet_host.h"
14 #include "../net/ip.h"
15 #include "../net/ip_var.h"
16 #include "../net/tcp.h"
17 #include "../net/tcp_fsm.h"
18 #include "../net/tcp_var.h"
19 #include "/usr/include/errno.h"
20 
21 int	tcpcksum = 1;
22 
23 tcp_drain()
24 {
25 	register struct inpcb *ip;
26 
27 	for (ip = tcb.inp_next; ip != &tcb; ip = ip->inp_next)
28 		tcp_drainunack(intotcpcb(ip));
29 }
30 
31 tcp_drainunack(tp)
32 	register struct tcpcb *tp;
33 {
34 	register struct mbuf *m;
35 
36 	for (m = tp->seg_unack; m; m = m->m_act)
37 		m_freem(m);
38 	tp->seg_unack = 0;
39 }
40 
41 tcp_ctlinput(m)
42 	struct mbuf *m;
43 {
44 
45 	m_freem(m);
46 }
47 
48 tcp_input(mp)
49 	register struct mbuf *mp;
50 {
51 	register struct tcpiphdr *n;		/* known to be r10 */
52 	register int j;
53 	register struct tcpcb *tp;
54 	struct inpcb *inp;
55 	register int thflags;
56 	int nstate;
57 	struct mbuf *m;
58 	struct socket *so;
59 	int hlen;
60 	u_short tlen, lport, fport;
61 #ifdef TCPDEBUG
62 	struct tcp_debug tdb;
63 #endif
64 COUNT(TCP_INPUT);
65 
66 	/*
67 	 * Build extended tcp header
68 	 */
69 	n = mtod(mp, struct tcpiphdr *);
70 	thflags = n->ti_flags;
71 	tlen = ((struct ip *)n)->ip_len;
72 	n->ti_len = htons(tlen);
73 	n->ti_next = NULL;
74 	n->ti_prev = NULL;
75 	n->ti_x1 = 0;
76 	lport = ntohs(n->ti_dport);
77 	fport = ntohs(n->ti_sport);
78 
79 	/* WONT BE POSSIBLE WHEN MBUFS ARE 256 BYTES */
80 	if ((hlen = n->ti_off << 2) > mp->m_len)
81 		{ printf("tcp header overflow\n"); m_freem(mp); return; }
82 
83 	if (tcpcksum) {
84 		/*
85 		 * Checksum extended header and data
86 		 */
87 		CKSUM_TCPCHK(mp, n, r10, sizeof (struct ip) + tlen);
88 		if (n->ti_sum != 0) {
89 			netstat.t_badsum++;
90 			m_freem(mp);
91 			return;
92 		}
93 	}
94 
95 	/*
96 	 * Find tcb for message.
97 	 */
98 	inp = in_pcblookup(&tcb, &n->ti_src, fport, &n_lhost, lport);
99 	if (inp == 0)
100 		goto notwanted;
101 	tp = intotcpcb(inp);
102 	if (tp == 0)
103 		goto notwanted;
104 
105 	/*
106 	 * Byte swap header
107 	 */
108 	n->ti_len = tlen - hlen;
109 	n->ti_sport = fport;
110 	n->ti_dport = lport;
111 	n->ti_seq = ntohl(n->ti_seq);
112 	n->ti_ackno = ntohl((n_long)n->ti_ackno);
113 	n->ti_win = ntohs(n->ti_win);
114 	n->ti_urp = ntohs(n->ti_urp);
115 
116 	/*
117 	 * Check segment seq # and do rst processing
118 	 */
119 	switch (tp->t_state) {
120 
121 	case LISTEN:
122 		if ((thflags&TH_ACK) || !syn_ok(tp, n)) {
123 			tcp_sndrst(tp, n);
124 			goto badseg;
125 		}
126 		if (thflags&TH_RST)
127 			goto badseg;
128 		goto goodseg;
129 
130 	case SYN_SENT:
131 		if (!ack_ok(tp, n) || !syn_ok(tp, n)) {
132 			tcp_sndrst(tp, n);			/* 71,72,75 */
133 			goto badseg;
134 		}
135 		if (thflags&TH_RST) {
136 			tcp_error(tp, ENETRESET);
137 			tcp_detach(tp);				/* 70 */
138 			tp->t_state = CLOSED;
139 			goto badseg;
140 		}
141 		goto goodseg;
142 
143 	default:
144         	if ((thflags&TH_RST) == 0)
145 			goto common;
146 		if (n->ti_seq < tp->rcv_nxt)		/* bad rst */
147 			goto badseg;				/* 69 */
148 		switch (tp->t_state) {
149 
150 		case L_SYN_RCVD:
151 			if (ack_ok(tp, n) == 0)
152 				goto badseg;			/* 69 */
153 			tp->t_rexmt = 0;
154 			tp->t_rexmttl = 0;
155 			tp->t_persist = 0;
156 			in_hostfree(inp->inp_fhost);
157 			inp->inp_fhost = 0;
158 			tp->t_state = LISTEN;
159 			goto badseg;
160 
161 		default:
162 			tcp_error(tp, ENETRESET);
163 			tcp_detach(tp);				/* 66 */
164 			tp->t_state = CLOSED;
165 			goto badseg;
166 		}
167 		/*NOTREACHED*/
168 
169 	case SYN_RCVD:
170 common:
171 		if (ack_ok(tp, n) == 0) {
172 			tcp_sndrst(tp, n);			/* 74 */
173 			goto badseg;
174 		}
175 		if (syn_ok(tp, n) && n->ti_seq != tp->irs) {
176 			tcp_sndnull(tp);			/* 74 */
177 			goto badseg;
178 		}
179 		goto goodseg;
180 	}
181 badseg:
182 	m_freem(mp);
183 	return;
184 
185 goodseg:
186 	/*
187 	 * Defer processing if no buffer space for this connection.
188 	 */
189 	so = inp->inp_socket;
190 	if (so->so_rcv.sb_cc >= so->so_rcv.sb_hiwat &&
191 	     n->ti_len != 0 && mbstat.m_bufs < mbstat.m_lowat) {
192 /*
193 		mp->m_act = (struct mbuf *)0;
194 		if ((m = tp->seg_unack) != NULL) {
195 			while (m->m_act != NULL)
196 				m = m->m_act;
197 			m->m_act = mp;
198 		} else
199 			tp->seg_unack = mp;
200 */
201 		m_freem(mp);
202 		return;
203 	}
204 
205 	/*
206 	 * Discard ip header, and do tcp input processing.
207 	 */
208 	hlen += sizeof(struct ip);
209 	mp->m_off += hlen;
210 	mp->m_len -= hlen;
211 	nstate = tp->t_state;
212 	tp->tc_flags &= ~TC_NET_KEEP;
213 #ifdef KPROF
214 	acounts[tp->t_state][INRECV]++;
215 #endif
216 #ifdef TCPDEBUG
217 	if ((tp->t_socket->so_options & SO_DEBUG) || tcpconsdebug) {
218 		tdb_setup(tp, n, INRECV, &tdb);
219 	} else
220 		tdb.td_tod = 0;
221 #endif
222 	switch (tp->t_state) {
223 
224 	case LISTEN:
225 		if (!syn_ok(tp, n) ||
226 		    ((inp->inp_lhost = in_hostalloc(&n->ti_src)) == 0)) {
227 			nstate = EFAILEC;
228 			goto done;
229 		}
230 		inp->inp_fport = n->ti_sport;
231 		tp->t_template = tcp_template(tp);
232 		tcp_ctldat(tp, n, 1);
233 		if (tp->tc_flags&TC_FIN_RCVD) {
234 			tp->t_finack = T_2ML;			/* 3 */
235 			tp->tc_flags &= ~TC_WAITED_2_ML;
236 			nstate = CLOSE_WAIT;
237 		} else {
238 			tp->t_init = T_INIT / 2;		/* 4 */
239 			nstate = L_SYN_RCVD;
240 		}
241 		goto done;
242 
243 	case SYN_SENT:
244 		if (!syn_ok(tp, n)) {
245 			nstate = EFAILEC;
246 			goto done;
247 		}
248 		tcp_ctldat(tp, n, 1);
249 		if (tp->tc_flags&TC_FIN_RCVD) {
250 			if ((thflags&TH_ACK) == 0) {
251 				tp->t_finack = T_2ML;		/* 9 */
252 				tp->tc_flags &= ~TC_WAITED_2_ML;
253 			}
254 			nstate = CLOSE_WAIT;
255 			goto done;
256 		}
257 		nstate = (thflags&TH_ACK) ? ESTAB : SYN_RCVD; /* 11:8 */
258 		goto done;
259 
260 	case SYN_RCVD:
261 	case L_SYN_RCVD:
262 		if ((thflags&TH_ACK) == 0 ||
263 		    (thflags&TH_ACK) && n->ti_ackno <= tp->iss) {
264 			nstate = EFAILEC;
265 			goto done;
266 		}
267 		goto input;
268 
269 	case ESTAB:
270 	case FIN_W1:
271 	case FIN_W2:
272 	case TIME_WAIT:
273 input:
274 		tcp_ctldat(tp, n, 1);				/* 39 */
275 		switch (tp->t_state) {
276 
277 		case ESTAB:
278 			if (tp->tc_flags&TC_FIN_RCVD)
279 				nstate = CLOSE_WAIT;
280 			break;
281 
282 		case SYN_RCVD:
283 		case L_SYN_RCVD:
284 			nstate = (tp->tc_flags&TC_FIN_RCVD) ?
285 			    CLOSE_WAIT : ESTAB;			 /* 33:5 */
286 			break;
287 
288 		case FIN_W1:
289 			j = ack_fin(tp, n);
290 			if ((tp->tc_flags & TC_FIN_RCVD) == 0) {
291 				if (j)
292 					nstate = FIN_W2;	/* 27 */
293 				break;
294 			}
295 			tp->t_finack = T_2ML;
296 			tp->tc_flags &= ~TC_WAITED_2_ML;
297 			nstate = j ? TIME_WAIT : CLOSING;	/* 28:26 */
298 			break;
299 
300 		case FIN_W2:
301 			if (tp->tc_flags&TC_FIN_RCVD) {
302 				tp->t_finack = T_2ML;		/* 29 */
303 				tp->tc_flags &= ~TC_WAITED_2_ML;
304 				nstate = TIME_WAIT;
305 				break;
306 			}
307 			break;
308 		}
309 		goto done;
310 
311 	case CLOSE_WAIT:
312 		if (thflags&TH_FIN) {
313 			if ((thflags&TH_ACK) &&
314 			    n->ti_ackno <= tp->seq_fin) {
315 				tcp_ctldat(tp, n, 0);		/* 30 */
316 				tp->t_finack = T_2ML;
317 				tp->tc_flags &= ~TC_WAITED_2_ML;
318 			} else
319 				(void) tcp_sndctl(tp);		/* 31 */
320 			goto done;
321 		}
322 		goto input;
323 
324 	case CLOSING:
325 		j = ack_fin(tp, n);
326 		if (thflags&TH_FIN) {
327 			tcp_ctldat(tp, n, 0);
328 			tp->t_finack = T_2ML;
329 			tp->tc_flags &= ~TC_WAITED_2_ML;
330 			if (j)
331 				nstate = TIME_WAIT;		/* 23 */
332 			goto done;
333 		}
334 		if (j) {
335 			if (tp->tc_flags&TC_WAITED_2_ML)
336 				if (rcv_empty(tp)) {
337 					sorwakeup(inp->inp_socket);
338 					nstate = CLOSED;	/* 15 */
339 				} else
340 					nstate = RCV_WAIT;	/* 18 */
341 			else
342 				nstate = TIME_WAIT;
343 			goto done;
344 		}
345 		goto input;
346 
347 	case LAST_ACK:
348 		if (ack_fin(tp, n)) {
349 			if (rcv_empty(tp)) {		/* 16 */
350 				sorwakeup(inp->inp_socket);
351 				nstate = CLOSED;
352 			} else
353 				nstate = RCV_WAIT;		/* 19 */
354 			goto done;
355 		}
356 		if (thflags&TH_FIN) {
357 			(void) tcp_sndctl(tp);			/* 31 */
358 			goto done;
359 		}
360 		goto input;
361 
362 	case RCV_WAIT:
363 		if ((thflags&TH_FIN) && (thflags&TH_ACK) &&
364 		    n->ti_ackno <= tp->seq_fin) {
365 			tcp_ctldat(tp, n, 0);
366 			tp->t_finack = T_2ML;
367 			tp->tc_flags &= ~TC_WAITED_2_ML;	/* 30 */
368 		}
369 		goto done;
370 	}
371 	panic("tcp_input");
372 done:
373 
374 	/*
375 	 * Done with state*input specific processing.
376 	 * Form trace records, free input if not needed,
377 	 * and enter new state.
378 	 */
379 #ifdef TCPDEBUG
380 	if (tdb.td_tod)
381 		tdb_stuff(&tdb, nstate);
382 #endif
383 	switch (nstate) {
384 
385 	case EFAILEC:
386 		m_freem(mp);
387 		return;
388 
389 	default:
390 		tp->t_state = nstate;
391 		/* fall into ... */
392 
393 	case CLOSED:
394 		/* IF CLOSED CANT LOOK AT tc_flags */
395 		if ((tp->tc_flags&TC_NET_KEEP) == 0)
396 			/* inline expansion of m_freem */
397 			while (mp) {
398 				MFREE(mp, m);
399 				mp = m;
400 			}
401 		return;
402 	}
403 	/* NOTREACHED */
404 
405 	/*
406 	 * Unwanted packed; free everything
407 	 * but the header and return an rst.
408 	 */
409 notwanted:
410 	m_freem(mp->m_next);
411 	mp->m_next = NULL;
412 	mp->m_len = sizeof(struct tcpiphdr);
413 #define xchg(a,b) j=a; a=b; b=j
414 	xchg(n->ti_dst.s_addr, n->ti_src.s_addr);
415 	xchg(n->ti_dport, n->ti_sport);
416 #undef xchg
417 	if (thflags&TH_ACK)
418 		n->ti_seq = n->ti_ackno;
419 	else {
420 		n->ti_ackno = htonl((unsigned)(ntohl(n->ti_seq) + tlen-hlen));
421 		n->ti_seq = 0;
422 	}
423 	n->ti_flags = ((thflags & TH_ACK) ? 0 : TH_ACK) | TH_RST;
424 	n->ti_len = htons(TCPSIZE);
425 	n->ti_off = 5;
426 	n->ti_sum = inet_cksum(mp, sizeof(struct tcpiphdr));
427 	((struct ip *)n)->ip_len = sizeof(struct tcpiphdr);
428 	ip_output(mp);
429 	netstat.t_badsegs++;
430 }
431 
432 tcp_ctldat(tp, n0, dataok)
433 	register struct tcpcb *tp;
434 	struct tcpiphdr *n0;
435 	int dataok;
436 {
437 	register struct tcpiphdr *n = n0;
438 	register int thflags = n->ti_flags;
439 	struct socket *so = tp->t_inpcb->inp_socket;
440 	seq_t past = n->ti_seq + n->ti_len;
441 	seq_t urgent;
442 	int sent;
443 COUNT(TCP_CTLDAT);
444 
445 	if (thflags & TH_URG)
446 		urgent = n->ti_seq + n->ti_urp;
447 	tp->tc_flags &= ~(TC_ACK_DUE|TC_NEW_WINDOW);
448 /* syn */
449 	if ((tp->tc_flags&TC_SYN_RCVD) == 0 && (thflags&TH_SYN)) {
450 		tp->irs = n->ti_seq;
451 		tp->rcv_nxt = n->ti_seq + 1;
452 		tp->snd_wl = tp->rcv_urp = tp->irs;
453 		tp->tc_flags |= (TC_SYN_RCVD|TC_ACK_DUE);
454 	}
455 /* ack */
456 	if ((thflags&TH_ACK) && (tp->tc_flags&TC_SYN_RCVD) &&
457 	    n->ti_ackno > tp->snd_una) {
458 		/*
459 		 * Reflect newly acknowledged data.
460 		 */
461 		tp->snd_una = n->ti_ackno;
462 		if (tp->snd_una > tp->snd_nxt)
463 			tp->snd_nxt = tp->snd_una;
464 
465 		/*
466 		 * If timed msg acked, update retransmit time value.
467 		 */
468 		if ((tp->tc_flags&TC_SYN_ACKED) &&
469 		    tp->snd_una > tp->t_xmt_val) {
470 			/* NEED SMOOTHING HERE */
471 			tp->t_xmtime = (tp->t_xmt != 0 ? tp->t_xmt : T_REXMT);
472 			if (tp->t_xmtime > T_REMAX)
473 				tp->t_xmtime = T_REMAX;
474 		}
475 
476 		/*
477 		 * Remove acked data from send buf
478 		 */
479 		sbdrop(&so->so_snd, (int)(tp->snd_una - tp->snd_off));
480 		tp->snd_off = tp->snd_una;
481 		if ((tp->tc_flags&TC_SYN_ACKED) == 0 &&
482 		    (tp->snd_una > tp->iss)) {
483 			tp->tc_flags |= TC_SYN_ACKED;
484 			tp->t_init = 0;
485 		}
486 		if (tp->seq_fin != tp->iss && tp->snd_una > tp->seq_fin)
487 			tp->tc_flags &= ~TC_SND_FIN;
488 		tp->t_rexmt = 0;
489 		tp->t_rexmttl = 0;
490 		tp->tc_flags |= TC_CANCELLED;
491 		sowwakeup(tp->t_inpcb->inp_socket);
492 	}
493 /* win */
494 	if ((tp->tc_flags & TC_SYN_RCVD) && n->ti_seq >= tp->snd_wl) {
495 		tp->snd_wl = n->ti_seq;
496 		tp->snd_wnd = n->ti_win;
497 		tp->tc_flags |= TC_NEW_WINDOW;
498 		tp->t_persist = 0;
499 	}
500 /* text */
501 	if (dataok && n->ti_len) {
502 		register struct tcpiphdr *q;
503 		int overage;
504 
505 /* eol */
506 		if ((thflags&TH_EOL)) {
507 			register struct mbuf *m;
508 			for (m = dtom(n); m->m_next; m = m->m_next)
509 				;
510 			m->m_act = (struct mbuf *)(mtod(m, caddr_t) - 1);
511 		}
512 
513 /* text */
514 		/*
515 		 * Discard duplicate data already passed to user.
516 		 */
517 		if (SEQ_LT(n->ti_seq, tp->rcv_nxt)) {
518 			register int i = tp->rcv_nxt - n->ti_seq;
519 			if (i >= n->ti_len)
520 				goto notext;
521 			n->ti_seq += i;
522 			n->ti_len -= i;
523 			m_adj(dtom(n), i);
524 		}
525 
526 		/*
527 		 * Find a segment which begins after this one does.
528 		 */
529 		for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
530 		    q = (struct tcpiphdr *)q->ti_next)
531 			if (SEQ_GT(q->ti_seq, n->ti_seq))
532 				break;
533 
534 		/*
535 		 * If there is a preceding segment, it may provide some of
536 		 * our data already.  If so, drop the data from the incoming
537 		 * segment.  If it provides all of our data, drop us.
538 		 */
539 		if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
540 			register int i;
541 			q = (struct tcpiphdr *)(q->ti_prev);
542 			/* conversion to int (in i) handles seq wraparound */
543 			i = q->ti_seq + q->ti_len - n->ti_seq;
544 			if (i > 0) {
545 				if (i >= n->ti_len)
546 					goto notext;
547 						/* w/o setting TC_NET_KEEP */
548 				m_adj(dtom(tp), i);
549 				n->ti_len -= i;
550 				n->ti_seq += i;
551 			}
552 			q = (struct tcpiphdr *)(q->ti_next);
553 		}
554 
555 		/*
556 		 * While we overlap succeeding segments trim them or,
557 		 * if they are completely covered, dequeue them.
558 		 */
559 		while (q != (struct tcpiphdr *)tp &&
560 		    SEQ_GT(n->ti_seq + n->ti_len, q->ti_seq)) {
561 			register int i = (n->ti_seq + n->ti_len) - q->ti_seq;
562 			if (i < q->ti_len) {
563 				q->ti_len -= i;
564 				m_adj(dtom(q), i);
565 				break;
566 			}
567 			q = (struct tcpiphdr *)q->ti_next;
568 			m_freem(dtom(q->ti_prev));
569 			remque(q->ti_prev);
570 		}
571 
572 		/*
573 		 * Stick new segment in its place.
574 		 */
575 		insque(n, q->ti_prev);
576 		tp->seqcnt += n->ti_len;
577 
578 		/*
579 		 * Calculate available space and discard segments for
580 		 * which there is too much.
581 		 */
582 		overage =
583 		    (so->so_rcv.sb_cc /*XXX+tp->rcv_seqcnt*/) - so->so_rcv.sb_hiwat;
584 		if (overage > 0) {
585 			q = tp->seg_prev;
586 			for (;;) {
587 				register int i = MIN(q->ti_len, overage);
588 				overage -= i;
589 				q->ti_len -= i;
590 				m_adj(dtom(q), -i);
591 				if (q->ti_len)
592 					break;
593 				if (q == n)
594 					panic("tcp_text dropall");
595 				q = (struct tcpiphdr *)q->ti_prev;
596 				remque(q->ti_next);
597 			}
598 		}
599 
600 		/*
601 		 * Advance rcv_next through newly completed sequence space.
602 		 */
603 		while (n->ti_seq == tp->rcv_nxt) {
604 			tp->rcv_nxt += n->ti_len;
605 			n = (struct tcpiphdr *)n->ti_next;
606 			if (n == (struct tcpiphdr *)tp)
607 				break;
608 		}
609 /* urg */
610 		if (thflags&TH_URG) {
611 			/* ... */
612 			if (SEQ_GT(urgent, tp->rcv_urp))
613 				tp->rcv_urp = urgent;
614 		}
615 		tp->tc_flags |= (TC_ACK_DUE|TC_NET_KEEP);
616 	}
617 notext:
618 /* fin */
619 	if ((thflags&TH_FIN) && past == tp->rcv_nxt) {
620 		if ((tp->tc_flags&TC_FIN_RCVD) == 0) {
621 			tp->tc_flags |= TC_FIN_RCVD;
622 			sorwakeup(so);
623 			tp->rcv_nxt++;
624 		}
625 		tp->tc_flags |= TC_ACK_DUE;
626 	}
627 /* respond */
628 	sent = 0;
629 	if (tp->tc_flags&TC_ACK_DUE)
630 		sent = tcp_sndctl(tp);
631 	else if ((tp->tc_flags&TC_NEW_WINDOW))
632 		if (tp->snd_nxt <= tp->snd_off + so->so_snd.sb_cc ||
633 		    (tp->tc_flags&TC_SND_FIN))
634 			sent = tcp_send(tp);
635 
636 /* set for retrans */
637 	if (!sent && tp->snd_una < tp->snd_nxt &&
638 	    (tp->tc_flags&TC_CANCELLED)) {
639 		tp->t_rexmt = tp->t_xmtime;
640 		tp->t_rexmttl = T_REXMTTL;
641 		tp->t_rexmt_val = tp->t_rtl_val = tp->snd_lst;
642 		tp->tc_flags &= ~TC_CANCELLED;
643 	}
644 /* present data to user */
645 	if ((tp->tc_flags&TC_SYN_ACKED) == 0)
646 		return;
647 	n = tp->seg_next;
648 	while (n != (struct tcpiphdr *)tp && n->ti_seq < tp->rcv_nxt) {
649 		remque(n);
650 		sbappend(&so->so_rcv, dtom(n));
651 		tp->seqcnt -= n->ti_len;
652 		if (tp->seqcnt < 0)
653 			panic("tcp_input present");
654 		n = (struct tcpiphdr *)n->ti_next;
655 	}
656 	sorwakeup(so);
657 }
658