125202Skarels #ifdef RCSIDENT
225202Skarels static char rcsident[] = "$Header: tcp_input.c,v 1.25 85/07/31 09:33:47 walsh Exp $";
325202Skarels #endif
425202Skarels
525202Skarels #include "../h/param.h"
625202Skarels #include "../h/dir.h"
725202Skarels #include "../h/user.h"
825202Skarels #include "../h/kernel.h"
925202Skarels #include "../h/inode.h"
1025202Skarels #include "../h/mbuf.h"
1125202Skarels #include "../h/socket.h"
1225202Skarels #include "../h/socketvar.h"
1325202Skarels #include "../h/syslog.h"
1425202Skarels
1525202Skarels #include "../net/if.h"
1625202Skarels #include "../net/route.h"
1725202Skarels
1825202Skarels #include "../bbnnet/in.h"
1925202Skarels #include "../bbnnet/net.h"
2025202Skarels #include "../bbnnet/in_pcb.h"
2125202Skarels #include "../bbnnet/in_var.h"
2225202Skarels #include "../bbnnet/fsm.h"
2325202Skarels #include "../bbnnet/tcp.h"
2425202Skarels #include "../bbnnet/seq.h"
2525202Skarels #include "../bbnnet/ip.h"
2625202Skarels #include "../bbnnet/fsmdef.h"
2725202Skarels #include "../bbnnet/macros.h"
2825202Skarels #include "../bbnnet/nopcb.h"
2925202Skarels #ifdef HMPTRAPS
3025202Skarels #include "../bbnnet/hmp_traps.h"
3125202Skarels #endif
3225202Skarels
3325202Skarels #ifdef HMPTRAPS
3425202Skarels #define HMP_TRAP(a,b,c) hmp_trap(a,b,c)
3525202Skarels #else
3625202Skarels #define HMP_TRAP(a,b,c)
3725202Skarels #endif
3825202Skarels
3925202Skarels extern int nosum;
4025202Skarels extern struct inpcb tcp;
4125202Skarels
4225202Skarels /*
4325202Skarels * net preproc (66,67,68,69,70,71,72,73,74,75,76)
4425202Skarels *
4525202Skarels * macro form of former function netprepr()
4625202Skarels *
4725202Skarels * tp valid tcpcb
4825202Skarels * n valid th
4925202Skarels * inp valid inpcb ( == tp->t_in_pcb )
5025202Skarels */
5125202Skarels #define NETPREPR(tp, n, inp, retval) \
5225202Skarels { \
5325202Skarels retval = (-1); /* assume bad */ \
5425202Skarels /* tell caller to eat segment (unacceptable) */ \
5525202Skarels \
5625202Skarels switch (tp->t_state) { \
5725202Skarels case LISTEN: \
5825202Skarels /* Ignore resets, ACKs cause resets, must have SYN. */ \
5925202Skarels if (n->t_flags&T_RST) \
6025202Skarels break; \
6125202Skarels else if (n->t_flags&T_ACK) \
6225202Skarels send_rst(tp, n); \
6325202Skarels else if (n->t_flags&T_SYN) \
6425202Skarels retval = SAME; \
6525202Skarels break; \
6625202Skarels \
6725202Skarels case SYN_SENT: \
6825202Skarels /* Bad ACKs cause resets, good resets close, must have SYN. */ \
6925202Skarels if (n->t_flags&T_ACK && (SEQ_GEQ(tp->iss, n->t_ackno) || \
7025202Skarels SEQ_GT(n->t_ackno, tp->snd_hi))) \
7125202Skarels send_rst(tp, n); \
7225202Skarels else if (n->t_flags&T_RST) { \
7325202Skarels if (n->t_flags&T_ACK) { \
7425202Skarels t_close(tp, ECONNREFUSED); \
7525202Skarels retval = CLOSED; \
7625202Skarels } \
7725202Skarels } else if (n->t_flags&T_SYN) \
7825202Skarels retval = SAME; \
7925202Skarels break; \
8025202Skarels \
8125202Skarels case 0: \
8225202Skarels /* \
8325202Skarels * after bind, but before we've had a chance to \
8425202Skarels * listen or connect \
8525202Skarels */ \
8625202Skarels break; \
8725202Skarels \
8825202Skarels default: \
8925202Skarels { struct sockbuf *sorcv; sequence xend; \
9025202Skarels /* \
9125202Skarels * Part of packet must fall in window. \
9225202Skarels * This allows for segments that are partially retransmits \
9325202Skarels * and partially new. \
9425202Skarels * otherwise just ACK and drop. \
9525202Skarels */ \
9625202Skarels sorcv = &inp->inp_socket->so_rcv; \
9725202Skarels xend = n->t_seq; \
9825202Skarels if (n->t_len) \
9925202Skarels /* remember, could be an ACK-only packet */ \
10025202Skarels xend += n->t_len -1; \
10125202Skarels if (n->t_flags & T_FIN) \
10225202Skarels xend ++; /* in case FIN + rxmitted data (TOPS-20) */ \
10325202Skarels if (SEQ_LT(xend, tp->rcv_nxt) || \
10425202Skarels SEQ_GEQ(n->t_seq, tp->rcv_nxt + sbspace(sorcv))) { \
10525202Skarels tp->t_preproc++; \
10625202Skarels send_tcp(tp, TCP_CTL); \
10725202Skarels HMP_TRAP(T_TCP_WINDOW, (caddr_t)0,0); \
10825202Skarels /* \
10925202Skarels * Due to 4.2BSD net architecture, don't need to send \
11025202Skarels * L_SYN_RCVD socket back to LISTEN on reset since server \
11125202Skarels * socket and communication paths are separate. \
11225202Skarels */ \
11325202Skarels } else if (n->t_flags&T_RST) { \
11425202Skarels t_close(tp, ENETRESET); \
11525202Skarels retval = CLOSED; \
11625202Skarels /* No SYNs allowed unless *SYN_RCVD */ \
11725202Skarels } else if ((n->t_flags&T_SYN) && (tp->t_state >= ESTAB)) { \
11825202Skarels send_rst(tp, n); \
11925202Skarels t_close(tp, ENETRESET); \
12025202Skarels retval = CLOSED; \
12125202Skarels /* \
12225202Skarels * Must have good ACK. Bad ACKs cause resets only in \
12325202Skarels * SYN_RCVD states. In other states, this may be a slow pkt? \
12425202Skarels */ \
12525202Skarels } else if (n->t_flags&T_ACK) \
12625202Skarels if (SEQ_GT(tp->snd_una, n->t_ackno) || \
12725202Skarels SEQ_GT(n->t_ackno, tp->snd_hi)) { \
12825202Skarels if (tp->t_state == SYN_RCVD || \
12925202Skarels tp->t_state == L_SYN_RCVD) \
13025202Skarels send_rst(tp, n); \
13125202Skarels } else { \
13225202Skarels /* \
13325202Skarels * Acceptable segment: \
13425202Skarels * Reset no activity timer on established and \
13525202Skarels * closing connections. \
13625202Skarels */ \
13725202Skarels if (tp->t_state >= ESTAB) \
13825202Skarels tp->t_timers[TNOACT] = tp->t_noact; \
13925202Skarels retval = SAME; \
14025202Skarels } } } }
14125202Skarels
14225202Skarels
14325202Skarels int tcp_net_keep;
14425202Skarels
14525202Skarels /*
14625202Skarels * This is the scheduler for the tcp machine. It is called
14725202Skarels * from the lower network levels, either directly from the
14825202Skarels * internet level, in case of input from the network; or
14925202Skarels * indirectly from netmain, in case of user or timer events
15025202Skarels * which awaken the main loop.
15125202Skarels */
tcp_input(mp,fragsize)15225202Skarels tcp_input(mp, fragsize)
15325202Skarels register struct mbuf *mp;
15425202Skarels int fragsize;
15525202Skarels {
15625202Skarels register struct th *tp;
15725202Skarels register int hlen;
15825202Skarels register struct tcpcb *t;
15925202Skarels register struct inpcb *inp;
16025202Skarels struct mbuf *m;
16125202Skarels int i, tlen;
16225202Skarels struct work w;
16325202Skarels u_short cks;
16425202Skarels
16525202Skarels tcpstat.t_total ++;
16625202Skarels
16725202Skarels /*
16825202Skarels * see ip_input()
16925202Skarels */
17025202Skarels if ((mp->m_off > MMAXOFF) || (mp->m_len < sizeof(struct th)))
17125202Skarels {
17225202Skarels if ((mp = m_pullup(mp, sizeof(struct th))) == NULL)
17325202Skarels {
17425202Skarels tcpstat.t_tooshort ++;
17525202Skarels return;
17625202Skarels }
17725202Skarels }
17825202Skarels
17925202Skarels /* set up needed info from ip header, note that beginning
18025202Skarels of tcp header struct overlaps ip header. ip options
18125202Skarels have been removed by ip level option processing */
18225202Skarels
18325202Skarels tp = mtod(mp, struct th *);
18425202Skarels
18525202Skarels /* make sure header does not overflow mbuf */
18625202Skarels
18725202Skarels hlen = tp->t_off << TCP_OFFSHIFT;
18825202Skarels if (hlen < TCPSIZE)
18925202Skarels {
19025202Skarels ip_log ((struct ip *) tp, "tcp t_off too small");
19125202Skarels netlog(mp);
19225202Skarels return;
19325202Skarels }
19425202Skarels if (hlen > mp->m_len)
19525202Skarels {
19625202Skarels if ((mp = m_pullup(mp, hlen)) == NULL)
19725202Skarels {
19825202Skarels ip_log((struct ip *) tp, "tcp header overflow");
19925202Skarels #ifdef HMPTRAPS
20025202Skarels /* hmp_trap(T_TCP_OVFLO, (caddr_t)0, 0); */
20125202Skarels #else
20225202Skarels /* netlog(mp); */
20325202Skarels #endif
20425202Skarels return;
20525202Skarels }
20625202Skarels tp = mtod(mp, struct th *);
20725202Skarels }
20825202Skarels
20925202Skarels tlen = ((struct ip *)tp)->ip_len;
21025202Skarels tp->t_len = htons((u_short)tlen);
21125202Skarels tp->t_next = NULL;
21225202Skarels tp->t_prev = NULL;
21325202Skarels tp->t_x1 = 0;
21425202Skarels
21525202Skarels /*
21625202Skarels * do checksum calculation, drop seg if bad
21725202Skarels */
21825202Skarels i = (u_short)tp->t_sum;
21925202Skarels tp->t_sum = 0;
22025202Skarels if (i != (cks = (u_short)in_cksum(mp, tlen + sizeof(struct ip))))
22125202Skarels {
22225202Skarels tcpstat.t_badsum++;
22325202Skarels if (! nosum)
22425202Skarels {
22525202Skarels #ifdef HMPTRAPS
22625202Skarels /* hmp_trap(T_TCP_CKSUM, (caddr_t)0,0); */
22725202Skarels #endif
22825202Skarels inet_cksum_err ("tcp", (struct ip *) tp, (u_long) i, (u_long) cks);
22925202Skarels netlog(mp);
23025202Skarels return;
23125202Skarels }
23225202Skarels }
23325202Skarels
23425202Skarels /* find a tcb for incoming message */
23525202Skarels inp = in_pcblookup(&tcp, tp->t_s.s_addr, tp->t_src,
23625202Skarels tp->t_d.s_addr, tp->t_dst, TRUE);
23725202Skarels
23825202Skarels if ((inp != NULL) && ((t = (struct tcpcb *)inp->inp_ppcb) != NULL))
23925202Skarels {
24025202Skarels /* found a tcp for message */
24125202Skarels /* byte swap header */
24225202Skarels
24325202Skarels if ((int)(tp->t_len = tlen - hlen) < 0)
24425202Skarels {
24525202Skarels ip_log((struct ip *) tp, "tcp header length");
24625202Skarels #ifdef HMPTRAPS
24725202Skarels /* hmp_trap(T_TCP_HLEN, (caddr_t)0,0); */
24825202Skarels #else
24925202Skarels netlog(mp);
25025202Skarels #endif
25125202Skarels return;
25225202Skarels }
25325202Skarels tp->t_seq = ntohl(tp->t_seq);
25425202Skarels tp->t_ackno = ntohl(tp->t_ackno);
25525202Skarels tp->t_win = ntohs((u_short)tp->t_win);
25625202Skarels tp->t_urp = ntohs((u_short)tp->t_urp);
25725202Skarels
25825202Skarels /* record the max fragment size */
25925202Skarels
26025202Skarels t->t_maxfrag = MAX(t->t_maxfrag, fragsize);
26125202Skarels
26225202Skarels /* do TCP option processing */
26325202Skarels
26425202Skarels if (hlen > TCPSIZE)
26525202Skarels tcp_opt(t, tp, hlen);
26625202Skarels
26725202Skarels /* check seg seq #, do RST processing */
26825202Skarels
26925202Skarels NETPREPR(t, tp, inp, i);
27025202Skarels if (i != SAME)
27125202Skarels {
27225202Skarels /* segment failed preprocessing. Drop it and
27325202Skarels * possibly enter new state. For now, always
27425202Skarels * returns SAME/-1/CLOSED
27525202Skarels */
27625202Skarels m_freem(mp);
27725202Skarels /*
27825202Skarels if ((i != -1) && (i != CLOSED))
27925202Skarels t->t_state = i;
28025202Skarels */
28125202Skarels }
28225202Skarels else
28325202Skarels {
28425202Skarels if (sbspace(&inp->inp_socket->so_rcv) <= 0 &&
28525202Skarels tp->t_len != 0)
28625202Skarels {
28725202Skarels /*
28825202Skarels * The user's receive q is full. Either the
28925202Skarels * remote TCP is not paying attention to the
29025202Skarels * window, or this is a persistence packet.
29125202Skarels *
29225202Skarels * The first reason was once common with
29325202Skarels * TOPS-20. Let's conserve network resources
29425202Skarels * by holding onto the packet in the unack q.
29525202Skarels * Place it at the end of the list.
29625202Skarels */
29725202Skarels mp->m_act = NULL;
29825202Skarels if ((m = t->t_rcv_unack) != NULL)
29925202Skarels {
30025202Skarels while (m->m_act != NULL)
30125202Skarels m = m->m_act;
30225202Skarels m->m_act = mp;
30325202Skarels }
30425202Skarels else
30525202Skarels t->t_rcv_unack = mp;
30625202Skarels
30725202Skarels /*
30825202Skarels * ACK if it was a window probe, just in case
30925202Skarels * they have a TNOACT timer running.
31025202Skarels */
31125202Skarels send_tcp(t, TCP_CTL);
31225202Skarels }
31325202Skarels else
31425202Skarels {
31525202Skarels int act, newstate;
31625202Skarels struct socket *so;
31725202Skarels
31825202Skarels /* set up work entry for seg, and call
31925202Skarels the fsm to process it */
32025202Skarels
32125202Skarels hlen += sizeof(struct ip);
32225202Skarels mp->m_off += hlen;
32325202Skarels mp->m_len -= hlen;
32425202Skarels
32525202Skarels /** HAND CODED action() CALL **/
32625202Skarels
32725202Skarels w.w_type = INRECV;
32825202Skarels w.w_tcb = t;
32925202Skarels w.w_dat = (char *)tp;
33025202Skarels
33125202Skarels /* get index of action routine from
33225202Skarels * transition table
33325202Skarels */
33425202Skarels act = fstab[t->t_state][INRECV];
33525202Skarels
33625202Skarels /* invalid state transition, just
33725202Skarels * print a message and ignore */
33825202Skarels
33925202Skarels if (act == 0)
34025202Skarels {
341*25209Skarels log(LOG_INFO, "tcp bad state: tcb=%x state=%d INRECV\n", t, t->t_state);
34225202Skarels m_freem(mp);
34325202Skarels return;
34425202Skarels }
34525202Skarels
34625202Skarels so = t->t_in_pcb->inp_socket;
34725202Skarels tcp_net_keep = FALSE;
34825202Skarels newstate = (*fsactab[act])(&w);
34925202Skarels
35025202Skarels /* debugging info */
35125202Skarels TCP_DEBUG (so, t, &w, act, newstate);
35225202Skarels
35325202Skarels /* if CLOSED, lost tcpcb */
35425202Skarels if ((newstate != SAME) && (newstate != CLOSED))
35525202Skarels t->t_state = newstate;
35625202Skarels if (! tcp_net_keep)
35725202Skarels m_freem(mp);
35825202Skarels
35925202Skarels /** END action() **/
36025202Skarels }
36125202Skarels }
36225202Skarels }
36325202Skarels else
36425202Skarels /* nobody wants it */
36525202Skarels send_uncon_rst (tp, mp, tlen, hlen);
36625202Skarels }
36725202Skarels
send_uncon_rst(n,mp,tlen,hlen)36825202Skarels send_uncon_rst (n, mp, tlen, hlen)
36925202Skarels register struct th *n;
37025202Skarels register struct mbuf *mp;
37125202Skarels {
37225202Skarels struct in_addr tempinaddr;
37325202Skarels u_short tempport;
37425202Skarels int error;
37525202Skarels
37625202Skarels /* make sure we don't send a RST in response to an RST */
37725202Skarels
37825202Skarels if (n->t_flags & T_RST)
37925202Skarels {
38025202Skarels m_freem(mp);
38125202Skarels return;
38225202Skarels }
38325202Skarels
38425202Skarels /* free everything but the header */
38525202Skarels
38625202Skarels m_freem(mp->m_next);
38725202Skarels mp->m_next = NULL;
38825202Skarels mp->m_len = sizeof(struct th);
38925202Skarels
39025202Skarels /* form a reset from the packet and send */
39125202Skarels
39225202Skarels tempinaddr = n->t_d;
39325202Skarels n->t_d = n->t_s;
39425202Skarels n->t_s = tempinaddr;
39525202Skarels
39625202Skarels tempport = n->t_src;
39725202Skarels n->t_src = n->t_dst;
39825202Skarels n->t_dst = tempport;
39925202Skarels
40025202Skarels if (n->t_flags&T_ACK)
40125202Skarels n->t_seq = n->t_ackno;
40225202Skarels else
40325202Skarels {
40425202Skarels n->t_ackno = htonl((u_long)
40525202Skarels ntohl((u_long)n->t_seq)
40625202Skarels + tlen - hlen
40725202Skarels + (n->t_flags&T_SYN ? 1 : 0));
40825202Skarels n->t_seq = 0;
40925202Skarels }
41025202Skarels n->t_flags = (n->t_flags&T_ACK) ? T_RST : T_RST+T_ACK;
41125202Skarels n->t_len = htons((u_short)TCPSIZE);
41225202Skarels n->t_off = TCPSIZE >> TCP_OFFSHIFT;
41325202Skarels n->t_sum = in_cksum(mp, sizeof(struct th));
41425202Skarels
41525202Skarels NOPCB_IPSEND (mp, TCPSIZE, FALSE, error);
41625202Skarels tcpstat.t_badsegs++;
41725202Skarels
41825202Skarels #ifdef lint
41925202Skarels error = error;
42025202Skarels #endif
42125202Skarels }
42225202Skarels
42325202Skarels /*
42425202Skarels * Entry into TCP finite state machine
42525202Skarels */
action(wp)42625202Skarels action(wp)
42725202Skarels register struct work *wp;
42825202Skarels {
42925202Skarels register act, newstate;
43025202Skarels register struct tcpcb *tp;
43125202Skarels register struct socket *so;
43225202Skarels
43325202Skarels tp = wp->w_tcb;
43425202Skarels so = tp->t_in_pcb->inp_socket;
43525202Skarels
43625202Skarels ACTION (tp, so, wp, wp->w_type, wp->w_dat, act, newstate);
43725202Skarels return(newstate);
43825202Skarels }
43925202Skarels
44025202Skarels
44125202Skarels struct mbuf *tcpdebuf;
44225202Skarels int tcprint;
44325202Skarels
44425202Skarels /*
44525202Skarels * Write a record in the tcp debugging log
44625202Skarels */
tcp_debug(tp,wp,newstate)44725202Skarels tcp_debug(tp, wp, newstate)
44825202Skarels register struct tcpcb *tp;
44925202Skarels register struct work *wp;
45025202Skarels register newstate;
45125202Skarels {
45225202Skarels register struct t_debug *dp;
45325202Skarels register struct mbuf *m;
45425202Skarels
45525202Skarels #ifdef TCPDEBUG
45625202Skarels if (tcprint)
45725202Skarels {
45825202Skarels /*
45925202Skarels * Print debugging info directly on the console (use this for
46025202Skarels * intial testing only).
46125202Skarels */
46225202Skarels printf("TCP(%x) %s X %s", tp, tcpstates[tp->t_state],
46325202Skarels tcpinputs[wp->w_type]);
46425202Skarels
46525202Skarels if (wp->w_type == ISTIMER)
46625202Skarels printf("(%s)", tcptimers[wp->w_stype]);
46725202Skarels
46825202Skarels printf(" --> %s",
46925202Skarels tcpstates[ (newstate > 0) ? newstate : tp->t_state]);
47025202Skarels
47125202Skarels if (newstate < 0)
47225202Skarels printf(" (FAILED)\n");
47325202Skarels else
47425202Skarels putchar('\n', 0);
47525202Skarels }
47625202Skarels #endif
47725202Skarels
47825202Skarels /*
47925202Skarels * Get an mbuf to write the debugging record into. If we don't already
48025202Skarels * have one, allocate a new one.
48125202Skarels */
48225202Skarels if ((m = tcpdebuf) == NULL)
48325202Skarels {
48425202Skarels register struct mbuf *c;
48525202Skarels
48625202Skarels if ((tcpdebuf = m = m_get(M_DONTWAIT, MT_DATA)) == NULL)
48725202Skarels return;
48825202Skarels /*
48925202Skarels * If possible, use a cluster so that we need to wake up the
49025202Skarels * raw listener less often and reduce likelihood he misses
49125202Skarels * some information.
49225202Skarels */
49325202Skarels MCLGET(c, 1);
49425202Skarels if (c)
49525202Skarels {
49625202Skarels m->m_off = ((int) c) - ((int) m);
49725202Skarels m->m_act = (struct mbuf *) TCDBLEN;
49825202Skarels }
49925202Skarels else
50025202Skarels m->m_act = (struct mbuf *) TDBLEN;
50125202Skarels m->m_len = 0;
50225202Skarels }
50325202Skarels
50425202Skarels dp = (struct t_debug *) (mtod(m, char *) + m->m_len);
50525202Skarels /*
50625202Skarels * Set up the debugging record.
50725202Skarels */
50825202Skarels dp->t_iptime = iptime();
50925202Skarels dp->t_input = wp->w_type;
51025202Skarels dp->t_timer = wp->w_stype;
51125202Skarels dp->t_newstate = newstate;
51225202Skarels if (tp != NULL)
51325202Skarels {
51425202Skarels dp->t_oldstate = tp->t_state;
51525202Skarels dp->t_tcb = (*tp); /* structure copy */
51625202Skarels }
51725202Skarels else
51825202Skarels dp->t_oldstate = 0;
51925202Skarels
52025202Skarels if (wp->w_type == INRECV)
52125202Skarels {
52225202Skarels register struct th *n;
52325202Skarels
52425202Skarels n = (struct th *)wp->w_dat;
52525202Skarels dp->t_hdr = (*n); /* structure copy */
52625202Skarels }
52725202Skarels /*
52825202Skarels * If the mbuf is full, dispatch it to a raw listener.
52925202Skarels * Also flush if the connection we're debugging closes so that
53025202Skarels * packet-printer/systems analyst sees final transitions.
53125202Skarels */
53225202Skarels m->m_len += sizeof(struct t_debug);
53325202Skarels if ((m->m_len >= ((int) m->m_act)) || (newstate == CLOSED))
53425202Skarels {
53525202Skarels m->m_act = 0;
53625202Skarels tcpdebuglog(m);
53725202Skarels tcpdebuf = NULL;
53825202Skarels }
53925202Skarels }
540