xref: /csrg-svn/sys/netinet/tcp_usrreq.c (revision 4809)
1 /* tcp_usrreq.c 1.26 81/11/08 */
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mbuf.h"
6 #include "../h/socket.h"
7 #include "../h/socketvar.h"
8 #include "../h/protosw.h"
9 #include "../net/inet.h"
10 #include "../net/inet_systm.h"
11 #include "../net/imp.h"
12 #include "../net/ip.h"
13 #include "../net/tcp.h"
14 #define TCPFSTAB
15 #ifdef TCPDEBUG
16 #define TCPSTATES
17 #endif
18 #include "../net/tcp_fsm.h"
19 #include "../net/tcp_var.h"
20 #include "/usr/include/errno.h"
21 
22 struct	tcb *tcp_attach();
23 
24 /*
25  * Tcp initialization
26  */
27 tcp_init()
28 {
29 
30 	tcp_iss = 1;		/* wrong */
31 	tcb.tcb_next = tcb.tcb_prev = (struct tcb *)&tcb;
32 }
33 
34 /*
35  * Tcp finite state machine entries for timer and user generated
36  * requests.  These routines raise the ipl to that of the network
37  * to prevent reentry.  In particluar, this requires that the software
38  * clock interrupt have lower priority than the network so that
39  * we can enter the network from timeout routines without improperly
40  * nesting the interrupt stack.
41  */
42 
43 /*
44  * Tcp protocol timeout routine called every 500 ms.
45  * Updates the timers in all active tcb's and
46  * causes finite state machine actions if timers expire.
47  */
48 tcp_slowtimo()
49 {
50 	register struct tcb *tp;
51 	int s = splnet();
52 	register short *tmp;
53 	register int i;
54 COUNT(TCP_TIMEO);
55 
56 	/*
57 	 * Search through tcb's and update active timers.
58 	 */
59 	tp = tcb.tcb_next;
60 	for (; tp != (struct tcb *)&tcb; tp = tp->tcb_hd.tcb_next) {
61 		tmp = &tp->t_init;
62 		for (i = 0; i < TNTIMERS; i++) {
63 			if (*tmp && --*tmp == 0)
64 				tcp_usrreq(tp->t_socket, PRU_SLOWTIMO, 0, i);
65 			tmp++;
66 		}
67 		tp->t_xmt++;
68 	}
69 	tcp_iss += ISSINCR/2;		/* increment iss */
70 	splx(s);
71 }
72 
73 /*
74  * Cancel all timers for tcp tp.
75  */
76 tcp_tcancel(tp)
77 	struct tcb *tp;
78 {
79 	register short *tmp = &tp->t_init;
80 	register int i;
81 
82 	for (i = 0; i < TNTIMERS; i++)
83 		*tmp++ = 0;
84 }
85 
86 /*
87  * Process a TCP user request for tcp tb.  If this is a send request
88  * then m is the mbuf chain of send data.  If this is a timer expiration
89  * (called from the software clock routine), then timertype tells which timer.
90  */
91 tcp_usrreq(so, req, m, addr)
92 	struct socket *so;
93 	int req;
94 	struct mbuf *m;
95 	caddr_t addr;
96 {
97 	register struct tcb *tp = (struct tcb *)so->so_pcb;
98 	int s = splnet();
99 	register int nstate;
100 #ifdef TCPDEBUG
101 	struct tcp_debug tdb;
102 #endif
103 	int error = 0;
104 COUNT(TCP_USRREQ);
105 
106 	if (tp) {
107 		nstate = tp->t_state;
108 		tp->tc_flags &= ~TC_NET_KEEP;
109 	} else
110 		if (req != PRU_ATTACH)
111 			return (ENOTCONN);
112 #ifdef KPROF
113 	acounts[nstate][req]++;
114 #endif
115 #ifdef TCPDEBUG
116 	if (tp && ((tp->t_socket->so_options & SO_DEBUG) || tcpconsdebug)) {
117 		tdb_setup(tp, (struct th *)0, req, &tdb);
118 		tdb.td_tim = timertype;
119 	} else
120 		tdb.td_tod = 0;
121 #endif
122 	switch (req) {
123 
124 	/*
125 	 * Attach a tcp control block to this socket.
126 	 * TCP is not multicast, so this is possible
127 	 * only if no connection currently exists.
128 	 */
129 	case PRU_ATTACH:
130 		if (tp)
131 			error = EISCONN;
132 		else
133 			tp = tcp_attach(so, &error);
134 		nstate = CLOSED;
135 		break;
136 
137 	/*
138 	 * Detach the TCP from this socket.  This
139 	 * is possible only if a connection currently exists.
140 	 */
141 	case PRU_DETACH:
142 		so->so_pcb = 0;
143 		break;
144 
145 	/*
146 	 * Form connection: send a SYN.
147 	 */
148 	case PRU_CONNECT:
149 		if (nstate != 0 && nstate != CLOSED)
150 			goto bad;
151 		tcp_sndctl(tp);
152 		nstate = SYN_SENT;
153 		break;
154 
155 	case PRU_DISCONNECT:
156 		so->so_pcb = 0;
157 		tcp_destroy(tp);
158 		break;
159 
160 	/*
161 	 * Declare no further transmissions.
162 	 * Can be generated by a user ioctl (half-close),
163 	 * or when higher level close occurs, if a close hasn't happened
164 	 * already.
165 	 */
166 	case PRU_SHUTDOWN:
167 		switch (nstate) {
168 
169 		/*
170 		 * If we are aborting out of a listener or a active
171 		 * connection which has not yet completed we can just
172 		 * delete the tcb.
173 		 */
174 		case LISTEN:
175 		case SYN_SENT:
176 			nstate = CLOSED;
177 			break;
178 
179 		/*
180 		 * If we have gotten as far as receiving a syn from
181 		 * our foreign peer, we must be sure to send a FIN.
182 		 * If we have gotten a FIN from the foreign peer already
183 		 * (CLOSE_WAIT state), then all that remains is to wait
184 		 * for his ack of the FIN (LAST_ACK state).  If we have
185 		 * not gotten a FIN from the foreign peer then we need
186 		 * to either:
187 		 *	1. rcv ack of our FIN (to FIN_W2) and then
188 		 *	   send an ACK (to TIME_WAIT) and timeout at 2*MSL.
189 		 * or	2. receive hist FIN (to CLOSING), send an ACK
190 		 *	   (to TIME_WAIT), and then timeout.
191 		 * In any case this starts with a transition to FIN_W1 here.
192 		 */
193 		case SYN_RCVD:
194 		case L_SYN_RCVD:
195 		case ESTAB:
196 		case CLOSE_WAIT:
197 			tp->tc_flags |= TC_SND_FIN;
198 			tcp_sndctl(tp);
199 			tp->tc_flags |= TC_USR_CLOSED;
200 			nstate = nstate != CLOSE_WAIT ? FIN_W1 : LAST_ACK;
201 			break;
202 
203 		/*
204 		 * In these states the user has already closed;
205 		 * trying to close again is an error.
206 		 */
207 		case FIN_W1:
208 		case FIN_W2:
209 		case TIME_WAIT:
210 		case CLOSING:
211 		case LAST_ACK:
212 		case RCV_WAIT:
213 			break;
214 
215 		default:
216 			goto bad;
217 		}
218 		break;
219 
220 	/*
221 	 * User notification of more window availability after
222 	 * reading out data.  This should not happen before a connection
223 	 * is established or after it is closed.
224 	 * If the foreign peer has closed and the local entity
225 	 * has not, inform him of the FIN (give end of file).
226 	 * If the local entity is in RCV_WAIT state (draining data
227 	 * out of the TCP buffers after foreign close) and there
228 	 * is no more data, institute a close.
229 	 */
230 	case PRU_RCVD:
231 		if (nstate < ESTAB || nstate == CLOSED)
232 			goto bad;
233 		tcp_sndwin(tp);
234 		if ((tp->tc_flags&TC_FIN_RCVD) &&
235 		    (tp->tc_flags&TC_USR_CLOSED) == 0 &&
236 		    rcv_empty(tp))
237 			tcp_error(tp, ESHUTDOWN);
238 		if (nstate == RCV_WAIT && rcv_empty(tp))
239 			nstate = CLOSED;
240 		break;
241 
242 	/*
243 	 * Send request on open connection.
244 	 * Should not happen if the connection is not yet established.
245 	 * Allowed only on ESTAB connection and after FIN from
246 	 * foreign peer.
247 	 */
248 	case PRU_SEND:
249 		switch (nstate) {
250 
251 		case ESTAB:
252 		case CLOSE_WAIT:
253 			nstate = tcp_usrsend(tp, m);
254 			break;
255 
256 		default:
257 			if (nstate < ESTAB)
258 				goto bad;
259 			m_freem(m);
260 			/* tcp_user(tp, UCLSERR); */
261 			break;
262 		}
263 		break;
264 
265 	/*
266 	 * User abort of connection.
267 	 * If a SYN has been received, but we have not exchanged FINs
268 	 * then we need to send an RST.  In any case we then
269 	 * enter closed state.
270 	 */
271 	case PRU_ABORT:
272 		if (nstate == 0 || nstate == CLOSED)
273 			break;
274 		switch (nstate) {
275 
276 		case 0:
277 		case CLOSED:
278 			break;
279 
280 		case SYN_RCVD:
281 		case ESTAB:
282 		case FIN_W1:
283 		case FIN_W2:
284 		case CLOSE_WAIT:
285 			tp->tc_flags |= TC_SND_RST;
286 			tcp_sndnull(tp);
287 			/* fall into ... */
288 
289 		default:
290 			nstate = CLOSED;
291 		}
292 		break;
293 
294 	/*
295 	 * Network down entry.  Discard the tcb and force
296 	 * the state to be closed, ungracefully.
297 	 */
298 	case PRU_CLEAR:
299 		if (nstate == 0 || nstate == CLOSED)
300 			break;
301 		nstate = CLOSED;
302 		break;
303 
304 	/*
305 	 * Ioctl on protocols.
306 	 */
307 	case PRU_CONTROL:
308 		break;
309 
310 	/*
311 	 * TCP Timer processing.
312 	 * Timers should expire only on open connections
313 	 * not in LISTEN state.
314 	 */
315 	case PRU_SLOWTIMO:
316 		switch (nstate) {
317 
318 		case 0:
319 		case CLOSED:
320 		case LISTEN:
321 			goto bad;
322 
323 		default:
324 			nstate = tcp_timers(tp, (int)addr);
325 		}
326 		break;
327 
328 	default:
329 		panic("tcp_usrreq");
330 	bad:
331 		printf("tcp: bad state: tcb=%x state=%d input=%d\n",
332 		    tp, tp->t_state, req);
333 		nstate = EFAILEC;
334 		break;
335 	}
336 #ifdef TCPDEBUG
337 	if (tdb.td_tod)
338 		tdb_stuff(&tdb, nstate);
339 #endif
340 	/* YECH */
341 	switch (nstate) {
342 
343 	case CLOSED:
344 	case SAME:
345 		break;
346 
347 	case EFAILEC:
348 		if (m)
349 			m_freem(dtom(m));
350 		break;
351 
352 	default:
353 		tp->t_state = nstate;
354 		break;
355 	}
356 	splx(s);
357 }
358 
359 tcp_sense()
360 {
361 
362 }
363 
364 /*
365  * Open routine, called to initialize newly created tcb fields.
366  */
367 struct tcb *
368 tcp_attach(so)
369 	register struct socket *so;
370 {
371 	register struct tcb *tp;
372 COUNT(TCP_ATTACH);
373 
374 	/*
375 	 * Link in tcb queue and make
376 	 * initialize empty reassembly queue.
377 	 */
378 	tp->tcb_hd.tcb_next = tcb.tcb_next;
379 	tcb.tcb_next->tcb_hd.tcb_prev = tp;
380 	tp->tcb_hd.tcb_prev = (struct tcb *)&tcb;
381 	tcb.tcb_next = tp;
382 	tp->tcb_hd.seg_next = tp->tcb_hd.seg_prev = (struct th *)tp;
383 
384 	/*
385 	 * Initialize sequence numbers and
386 	 * round trip retransmit timer.
387 	 * (Other fields were init'd to zero when tcb allocated.)
388 	 */
389 	tp->t_xmtime = T_REXMT;
390 	tp->snd_end = tp->seq_fin = tp->snd_nxt = tp->snd_hi = tp->snd_una =
391 	    tp->iss = tcp_iss;
392 	tp->snd_off = tp->iss + 1;
393 	tcp_iss += (ISSINCR >> 1) + 1;
394 }
395 
396 /*
397  * Destroy a tcb.
398  */
399 tcp_detach(tp)
400 	register struct tcb *tp;
401 {
402 	register struct socket *so = tp->t_socket;
403 	register struct th *t;
404 	register struct mbuf *m;
405 COUNT(TCP_DETACH);
406 
407 	/*
408 	 * Remove from tcb queue and cancel timers.
409 	 */
410 	tp->tcb_hd.tcb_prev->tcb_hd.tcb_next = tp->tcb_hd.tcb_next;
411 	tp->tcb_hd.tcb_next->tcb_hd.tcb_prev = tp->tcb_hd.tcb_prev;
412 	tcp_tcancel(tp);
413 
414 	/*
415 	 * Discard all buffers.
416 	 */
417 	for (t = tp->tcb_hd.seg_next; t != (struct th *)tp; t = t->t_next)
418 		m_freem(dtom(t));
419 	if (so->so_rcv.sb_mb)
420 	    { m_freem(so->so_rcv.sb_mb); so->so_rcv.sb_mb = 0; }
421 	so->so_rcv.sb_cc = 0; so->so_rcv.sb_mbcnt = 0;
422 	if (so->so_snd.sb_mb)
423 	    { m_freem(so->so_snd.sb_mb); so->so_rcv.sb_mb = 0; }
424 	so->so_snd.sb_cc = 0; so->so_snd.sb_mbcnt = 0;
425 
426 	for (m = tp->seg_unack; m; m = m->m_act)
427 		m_freem(m);
428 	tp->seg_unack = 0;
429 
430 	/*
431 	 * Free routing table entry.
432 	 */
433 	if (tp->t_host) {
434 		h_free(tp->t_host);
435 		tp->t_host = 0;
436 	}
437 
438 	/*
439 	 * Free tcp send template, the tcb itself,
440 	 * and the space we had reserved in the meory pool.
441 	 */
442 	if (tp->t_template) {
443 		m_free(dtom(tp->t_template));
444 		tp->t_template = 0;
445 	}
446 	wmemfree((caddr_t)tp, 1024);
447 	m_release(so->so_rcv.sb_hiwat + so->so_snd.sb_hiwat + 2 * MSIZE);
448 }
449 
450 /*
451  * Send data queue headed by m0 into the protocol.
452  */
453 tcp_usrsend(tp, m0)
454 	register struct tcb *tp;
455 	struct mbuf *m0;
456 {
457 	register struct mbuf *m, *n;
458 	register struct socket *so = tp->t_socket;
459 	register off;
460 	seq_t last;
461 COUNT(TCP_USRSEND);
462 
463 	last = tp->snd_off;
464 	for (m = n = m0; m != NULL; m = m->m_next) {
465 		so->so_snd.sb_mbcnt++;
466 		if (m->m_off > MMAXOFF)
467 			so->so_snd.sb_mbcnt += NMBPG;
468 		last += m->m_len;
469 	}
470 	if ((m = so->so_snd.sb_mb) == NULL)
471 		so->so_snd.sb_mb = n;
472 	else {
473 		while (m->m_next != NULL) {
474 			m = m->m_next;
475 			last += m->m_len;
476 		}
477 		if (m->m_off <= MMAXOFF) {
478 			last += m->m_len;
479 			off = m->m_off + m->m_len;
480 			while (n && n->m_off <= MMAXOFF &&
481 			    (MMAXOFF - off) >= n->m_len) {
482 				bcopy((caddr_t)((int)n + n->m_off),
483 				      (caddr_t)((int)m + off), n->m_len);
484 				m->m_len += n->m_len;
485 				off += n->m_len;
486 				so->so_snd.sb_mbcnt--;
487 				n = m_free(n);
488 			}
489 		}
490 		m->m_next = n;
491 	}
492 	if (tp->t_options & TO_EOL)
493 		tp->snd_end = last;
494 	if (tp->t_options & TO_URG) {
495 		tp->snd_urp = last+1;
496 		tp->tc_flags |= TC_SND_URG;
497 	}
498 	tcp_send(tp);
499 	return (SAME);
500 }
501 
502 /*
503  * TCP timer went off processing.
504  */
505 tcp_timers(tp, timertype)
506 	register struct tcb *tp;
507 	int timertype;
508 {
509 
510 COUNT(TCP_TIMERS);
511 	switch (timertype) {
512 
513 	case TINIT:		/* initialization timer */
514 		if ((tp->tc_flags&TC_SYN_ACKED) == 0) {		/* 35 */
515 /* XXX */		/* tcp_close(tp, UINTIMO); */
516 			return (CLOSED);
517 		}
518 		return (SAME);
519 
520 	case TFINACK:		/* fin-ack timer */
521 		switch (tp->t_state) {
522 
523 		case TIME_WAIT:
524 			/*
525 			 * We can be sure our ACK of foreign FIN was rcvd,
526 			 * and can close if no data left for user.
527 			 */
528 			if (rcv_empty(tp)) {
529 /* XXX */			/* tcp_close(tp, UCLOSED); */	/* 14 */
530 				return (CLOSED);
531 			}
532 			return (RCV_WAIT);			/* 17 */
533 
534 		case CLOSING:
535 			tp->tc_flags |= TC_WAITED_2_ML;
536 			return (SAME);
537 
538 		default:
539 			return (SAME);
540 		}
541 
542 	case TREXMT:		/* retransmission timer */
543 		if (tp->t_rexmt_val > tp->snd_una) {	 	/* 34 */
544 			/*
545 			 * Set so for a retransmission, increase rexmt time
546 			 * in case of multiple retransmissions.
547 			 */
548 			tp->snd_nxt = tp->snd_una;
549 			tp->tc_flags |= TC_REXMT;
550 			tp->t_xmtime = tp->t_xmtime << 1;
551 			if (tp->t_xmtime > T_REMAX)
552 				tp->t_xmtime = T_REMAX;
553 			tcp_send(tp);
554 		}
555 		return (SAME);
556 
557 	case TREXMTTL:		/* retransmit too long */
558 		if (tp->t_rtl_val > tp->snd_una)		/* 36 */
559 /* XXX */		/* to_user(tp->t_socket, URXTIMO); */;
560 		/*
561 		 * If user has already closed, abort the connection.
562 		 */
563 		if (tp->tc_flags & TC_USR_CLOSED) {
564 /* XXX */		/* tcp_close(tp, URXTIMO); */
565 			return (CLOSED);
566 		}
567 		return (SAME);
568 
569 	case TPERSIST:		/* persist timer */
570 		/*
571 		 * Force a byte send through closed window.
572 		 */
573 		tp->tc_flags |= TC_FORCE_ONE;
574 		tcp_send(tp);
575 		return (SAME);
576 	}
577 	panic("tcp_timers");
578 }
579 
580 tcp_error(so, errno)
581 	struct socket *so;
582 	int errno;
583 {
584 COUNT(TO_USER);
585 
586 	so->so_error = errno;
587 	sowakeup(so);
588 }
589 
590 #ifdef TCPDEBUG
591 /*
592  * TCP debugging utility subroutines.
593  * THE NAMES OF THE FIELDS USED BY THESE ROUTINES ARE STUPID.
594  */
595 tdb_setup(tp, n, input, tdp)
596 	struct tcb *tp;
597 	register struct th *n;
598 	int input;
599 	register struct tcp_debug *tdp;
600 {
601 
602 COUNT(TDB_SETUP);
603 	tdp->td_tod = time;
604 	tdp->td_tcb = tp;
605 	tdp->td_old = tp->t_state;
606 	tdp->td_inp = input;
607 	tdp->td_tim = 0;
608 	tdp->td_new = -1;
609 	if (n) {
610 		tdp->td_sno = n->t_seq;
611 		tdp->td_ano = n->t_ackno;
612 		tdp->td_wno = n->t_win;
613 		tdp->td_lno = n->t_len;
614 		tdp->td_flg = n->th_flags;
615 	} else
616 		tdp->td_sno = tdp->td_ano = tdp->td_wno = tdp->td_lno =
617 		    tdp->td_flg = 0;
618 }
619 
620 tdb_stuff(tdp, nstate)
621 	struct tcp_debug *tdp;
622 	int nstate;
623 {
624 COUNT(TDB_STUFF);
625 
626 	tdp->td_new = nstate;
627 	tcp_debug[tdbx++ % TDBSIZE] = *tdp;
628 	if (tcpconsdebug & 2)
629 		tcp_prt(tdp);
630 }
631 
632 tcp_prt(tdp)
633 	register struct tcp_debug *tdp;
634 {
635 COUNT(TCP_PRT);
636 
637 	printf("%x ", ((int)tdp->td_tcb)&0xffffff);
638 	if (tdp->td_inp == INSEND) {
639 		printf("SEND #%x", tdp->td_sno);
640 		tdp->td_lno = ntohs(tdp->td_lno);
641 		tdp->td_wno = ntohs(tdp->td_wno);
642 	} else {
643 		if (tdp->td_inp == INRECV)
644 			printf("RCV #%x ", tdp->td_sno);
645 		printf("%s.%s",
646 		    tcpstates[tdp->td_old], tcpinputs[tdp->td_inp]);
647 		if (tdp->td_inp == ISTIMER)
648 			printf("(%s)", tcptimers[tdp->td_tim]);
649 		printf(" -> %s",
650 		    tcpstates[(tdp->td_new > 0) ? tdp->td_new : tdp->td_old]);
651 		if (tdp->td_new == -1)
652 			printf(" (FAILED)");
653 	}
654 	/* GROSS... DEPENDS ON SIGN EXTENSION OF CHARACTERS */
655 	if (tdp->td_lno)
656 		printf(" len=%d", tdp->td_lno);
657 	if (tdp->td_wno)
658 		printf(" win=%d", tdp->td_wno);
659 	if (tdp->td_flg & TH_FIN) printf(" FIN");
660 	if (tdp->td_flg & TH_SYN) printf(" SYN");
661 	if (tdp->td_flg & TH_RST) printf(" RST");
662 	if (tdp->td_flg & TH_EOL) printf(" EOL");
663 	if (tdp->td_flg & TH_ACK)  printf(" ACK %x", tdp->td_ano);
664 	if (tdp->td_flg & TH_URG) printf(" URG");
665 	printf("\n");
666 }
667 #endif
668