xref: /csrg-svn/sys/netinet/ip_input.c (revision 5172)
1*5172Swnj /* ip_input.c 1.22 81/12/03 */
24571Swnj 
34495Swnj #include "../h/param.h"
44543Swnj #include "../h/systm.h"
54640Swnj #include "../h/clock.h"
64640Swnj #include "../h/mbuf.h"
74898Swnj #include "../h/protosw.h"
84923Swnj #include "../h/socket.h"
95084Swnj #include "../net/in.h"
105084Swnj #include "../net/in_systm.h"
114951Swnj #include "../net/if.h"
125084Swnj #include "../net/ip.h"			/* belongs before in.h */
134898Swnj #include "../net/ip_var.h"
144801Swnj #include "../net/ip_icmp.h"
154801Swnj #include "../net/tcp.h"
164495Swnj 
174898Swnj u_char	ip_protox[IPPROTO_MAX];
184898Swnj 
194801Swnj /*
20*5172Swnj  * IP initialization: fill in IP protocol switch table.
215161Swnj  * All protocols not implemented in kernel go to raw IP protocol handler.
224801Swnj  */
234801Swnj ip_init()
244801Swnj {
254898Swnj 	register struct protosw *pr;
264898Swnj 	register int i;
274495Swnj 
284951Swnj COUNT(IP_INIT);
294898Swnj 	pr = pffindproto(PF_INET, IPPROTO_RAW);
304898Swnj 	if (pr == 0)
314898Swnj 		panic("ip_init");
324898Swnj 	for (i = 0; i < IPPROTO_MAX; i++)
334898Swnj 		ip_protox[i] = pr - protosw;
344898Swnj 	for (pr = protosw; pr <= protoswLAST; pr++)
354898Swnj 		if (pr->pr_family == PF_INET &&
364898Swnj 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
374898Swnj 			ip_protox[pr->pr_protocol] = pr - protosw;
384801Swnj 	ipq.next = ipq.prev = &ipq;
394801Swnj 	ip_id = time & 0xffff;
404801Swnj }
414801Swnj 
424898Swnj u_char	ipcksum = 1;
434640Swnj struct	ip *ip_reass();
444640Swnj 
454640Swnj /*
464640Swnj  * Ip input routine.  Checksum and byte swap header.  If fragmented
474640Swnj  * try to reassamble.  If complete and fragment queue exists, discard.
484640Swnj  * Process options.  Pass to next level.
494640Swnj  */
505084Swnj ipintr()
514495Swnj {
524923Swnj 	register struct ip *ip;
535084Swnj 	register struct mbuf *m;
545084Swnj 	struct mbuf *m0;
554640Swnj 	register int i;
564495Swnj 	register struct ipq *fp;
575084Swnj 	int hlen, s;
584495Swnj 
595084Swnj COUNT(IPINTR);
605084Swnj next:
614640Swnj 	/*
625084Swnj 	 * Get next datagram off input queue and get IP header
635084Swnj 	 * in first mbuf.
644640Swnj 	 */
655084Swnj 	s = splimp();
665084Swnj 	IF_DEQUEUE(&ipintrq, m);
675084Swnj 	splx(s);
685084Swnj 	if (m == 0)
695084Swnj 		return;
705046Swnj 	if (m->m_len < sizeof (struct ip) &&
715046Swnj 	    m_pullup(m, sizeof (struct ip)) == 0)
725046Swnj 		goto bad;
734640Swnj 	ip = mtod(m, struct ip *);
745161Swnj 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
755161Swnj 		if (m_pullup(m, hlen) == 0)
765161Swnj 			goto bad;
775161Swnj 		ip = mtod(m, struct ip *);
785161Swnj 	}
794951Swnj 	if (ipcksum)
805084Swnj 		if ((ip->ip_sum = in_cksum(m, hlen)) != 0xffff) {
815161Swnj 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
824951Swnj 			ipstat.ips_badsum++;
834951Swnj 			goto bad;
844495Swnj 		}
854951Swnj 
864951Swnj 	/*
874951Swnj 	 * Convert fields to host representation.
884951Swnj 	 */
894907Swnj 	ip->ip_len = ntohs((u_short)ip->ip_len);
904640Swnj 	ip->ip_id = ntohs(ip->ip_id);
914951Swnj 	ip->ip_off = ntohs((u_short)ip->ip_off);
924495Swnj 
934543Swnj 	/*
944640Swnj 	 * Check that the amount of data in the buffers
954640Swnj 	 * is as at least much as the IP header would have us expect.
964640Swnj 	 * Trim mbufs if longer than we expect.
974640Swnj 	 * Drop packet if shorter than we expect.
984543Swnj 	 */
994640Swnj 	i = 0;
1005161Swnj 	m0 = m;
1015161Swnj 	for (; m != NULL; m = m->m_next)
1024495Swnj 		i += m->m_len;
1034640Swnj 	m = m0;
1044640Swnj 	if (i != ip->ip_len) {
1055161Swnj 		if (i < ip->ip_len) {
1065161Swnj 			ipstat.ips_tooshort++;
1074951Swnj 			goto bad;
1085161Swnj 		}
1094640Swnj 		m_adj(m, ip->ip_len - i);
1104495Swnj 	}
1114495Swnj 
1124640Swnj 	/*
1134640Swnj 	 * Process options and, if not destined for us,
1144640Swnj 	 * ship it on.
1154640Swnj 	 */
1164543Swnj 	if (hlen > sizeof (struct ip))
1174907Swnj 		ip_dooptions(ip);
1185084Swnj 	if (ifnet && ip->ip_dst.s_addr != ifnet->if_addr.s_addr &&
1195045Swnj 	    if_ifwithaddr(ip->ip_dst) == 0) {
1204640Swnj 		if (--ip->ip_ttl == 0) {
1214907Swnj 			icmp_error(ip, ICMP_TIMXCEED, 0);
1225084Swnj 			goto next;
1234495Swnj 		}
1245108Swnj 		(void) ip_output(dtom(ip), (struct mbuf *)0);
1255084Swnj 		goto next;
1264543Swnj 	}
1274495Swnj 
1284640Swnj 	/*
1294640Swnj 	 * Look for queue of fragments
1304640Swnj 	 * of this datagram.
1314640Swnj 	 */
1324640Swnj 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
1334640Swnj 		if (ip->ip_id == fp->ipq_id &&
1344640Swnj 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
1354640Swnj 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
1364640Swnj 		    ip->ip_p == fp->ipq_p)
1374640Swnj 			goto found;
1384640Swnj 	fp = 0;
1394640Swnj found:
1404495Swnj 
1414640Swnj 	/*
1424640Swnj 	 * Adjust ip_len to not reflect header,
1434640Swnj 	 * set ip_mff if more fragments are expected,
1444640Swnj 	 * convert offset of this to bytes.
1454640Swnj 	 */
1464640Swnj 	ip->ip_len -= hlen;
1474898Swnj 	((struct ipasfrag *)ip)->ipf_mff = 0;
1484640Swnj 	if (ip->ip_off & IP_MF)
1494898Swnj 		((struct ipasfrag *)ip)->ipf_mff = 1;
1504640Swnj 	ip->ip_off <<= 3;
1514495Swnj 
1524640Swnj 	/*
1534640Swnj 	 * If datagram marked as having more fragments
1544640Swnj 	 * or if this is not the first fragment,
1554640Swnj 	 * attempt reassembly; if it succeeds, proceed.
1564640Swnj 	 */
1574898Swnj 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
1584898Swnj 		ip = ip_reass((struct ipasfrag *)ip, fp);
1594640Swnj 		if (ip == 0)
1605084Swnj 			goto next;
1614640Swnj 		hlen = ip->ip_hl << 2;
1624640Swnj 		m = dtom(ip);
1634640Swnj 	} else
1644640Swnj 		if (fp)
1654640Swnj 			(void) ip_freef(fp);
1664951Swnj 
1674951Swnj 	/*
1684951Swnj 	 * Switch out to protocol's input routine.
1694951Swnj 	 */
1704898Swnj 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
1715084Swnj 	goto next;
1724951Swnj bad:
1734951Swnj 	m_freem(m);
1745084Swnj 	goto next;
1754640Swnj }
1764495Swnj 
1774640Swnj /*
1784640Swnj  * Take incoming datagram fragment and try to
1794951Swnj  * reassemble it into whole datagram.  If a chain for
1804640Swnj  * reassembly of this datagram already exists, then it
1814640Swnj  * is given as fp; otherwise have to make a chain.
1824640Swnj  */
1834640Swnj struct ip *
1844640Swnj ip_reass(ip, fp)
1854898Swnj 	register struct ipasfrag *ip;
1864640Swnj 	register struct ipq *fp;
1874640Swnj {
1884640Swnj 	register struct mbuf *m = dtom(ip);
1894898Swnj 	register struct ipasfrag *q;
1904640Swnj 	struct mbuf *t;
1914640Swnj 	int hlen = ip->ip_hl << 2;
1924640Swnj 	int i, next;
1934951Swnj COUNT(IP_REASS);
1944543Swnj 
1954640Swnj 	/*
1964640Swnj 	 * Presence of header sizes in mbufs
1974640Swnj 	 * would confuse code below.
1984640Swnj 	 */
1994640Swnj 	m->m_off += hlen;
2004640Swnj 	m->m_len -= hlen;
2014495Swnj 
2024640Swnj 	/*
2034640Swnj 	 * If first fragment to arrive, create a reassembly queue.
2044640Swnj 	 */
2054640Swnj 	if (fp == 0) {
2064640Swnj 		if ((t = m_get(1)) == NULL)
2074640Swnj 			goto dropfrag;
2084640Swnj 		t->m_off = MMINOFF;
2094640Swnj 		fp = mtod(t, struct ipq *);
2104640Swnj 		insque(fp, &ipq);
2114640Swnj 		fp->ipq_ttl = IPFRAGTTL;
2124640Swnj 		fp->ipq_p = ip->ip_p;
2134640Swnj 		fp->ipq_id = ip->ip_id;
2144898Swnj 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
2154898Swnj 		fp->ipq_src = ((struct ip *)ip)->ip_src;
2164898Swnj 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
2175161Swnj 		q = (struct ipasfrag *)fp;
2185161Swnj 		goto insert;
2194640Swnj 	}
2204495Swnj 
2214640Swnj 	/*
2224640Swnj 	 * Find a segment which begins after this one does.
2234640Swnj 	 */
2244898Swnj 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
2254640Swnj 		if (q->ip_off > ip->ip_off)
2264640Swnj 			break;
2274495Swnj 
2284640Swnj 	/*
2294640Swnj 	 * If there is a preceding segment, it may provide some of
2304640Swnj 	 * our data already.  If so, drop the data from the incoming
2314640Swnj 	 * segment.  If it provides all of our data, drop us.
2324640Swnj 	 */
2334898Swnj 	if (q->ipf_prev != (struct ipasfrag *)fp) {
2344898Swnj 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
2354640Swnj 		if (i > 0) {
2364640Swnj 			if (i >= ip->ip_len)
2374640Swnj 				goto dropfrag;
2384640Swnj 			m_adj(dtom(ip), i);
2394640Swnj 			ip->ip_off += i;
2404640Swnj 			ip->ip_len -= i;
2414640Swnj 		}
2424640Swnj 	}
2434543Swnj 
2444640Swnj 	/*
2454640Swnj 	 * While we overlap succeeding segments trim them or,
2464640Swnj 	 * if they are completely covered, dequeue them.
2474640Swnj 	 */
2484898Swnj 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
2494640Swnj 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
2504640Swnj 		if (i < q->ip_len) {
2514640Swnj 			q->ip_len -= i;
2524640Swnj 			m_adj(dtom(q), i);
2534640Swnj 			break;
2544495Swnj 		}
2554898Swnj 		q = q->ipf_next;
2564898Swnj 		m_freem(dtom(q->ipf_prev));
2574898Swnj 		ip_deq(q->ipf_prev);
2584543Swnj 	}
2594495Swnj 
2605161Swnj insert:
2614640Swnj 	/*
2624640Swnj 	 * Stick new segment in its place;
2634640Swnj 	 * check for complete reassembly.
2644640Swnj 	 */
2654898Swnj 	ip_enq(ip, q->ipf_prev);
2664640Swnj 	next = 0;
2674898Swnj 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
2684640Swnj 		if (q->ip_off != next)
2694640Swnj 			return (0);
2704640Swnj 		next += q->ip_len;
2714640Swnj 	}
2724898Swnj 	if (q->ipf_prev->ipf_mff)
2734640Swnj 		return (0);
2744495Swnj 
2754640Swnj 	/*
2764640Swnj 	 * Reassembly is complete; concatenate fragments.
2774640Swnj 	 */
2784640Swnj 	q = fp->ipq_next;
2794640Swnj 	m = dtom(q);
2804640Swnj 	t = m->m_next;
2814640Swnj 	m->m_next = 0;
2824640Swnj 	m_cat(m, t);
2834898Swnj 	while ((q = q->ipf_next) != (struct ipasfrag *)fp)
2844640Swnj 		m_cat(m, dtom(q));
2854495Swnj 
2864640Swnj 	/*
2874640Swnj 	 * Create header for new ip packet by
2884640Swnj 	 * modifying header of first packet;
2894640Swnj 	 * dequeue and discard fragment reassembly header.
2904640Swnj 	 * Make header visible.
2914640Swnj 	 */
2924640Swnj 	ip = fp->ipq_next;
2934640Swnj 	ip->ip_len = next;
2944898Swnj 	((struct ip *)ip)->ip_src = fp->ipq_src;
2954898Swnj 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
2964640Swnj 	remque(fp);
2974907Swnj 	(void) m_free(dtom(fp));
2984640Swnj 	m = dtom(ip);
2994898Swnj 	m->m_len += sizeof (struct ipasfrag);
3004898Swnj 	m->m_off -= sizeof (struct ipasfrag);
3014898Swnj 	return ((struct ip *)ip);
3024495Swnj 
3034640Swnj dropfrag:
3044640Swnj 	m_freem(m);
3054640Swnj 	return (0);
3064495Swnj }
3074495Swnj 
3084640Swnj /*
3094640Swnj  * Free a fragment reassembly header and all
3104640Swnj  * associated datagrams.
3114640Swnj  */
3124640Swnj struct ipq *
3134640Swnj ip_freef(fp)
3144640Swnj 	struct ipq *fp;
3154495Swnj {
3164898Swnj 	register struct ipasfrag *q;
3174640Swnj 	struct mbuf *m;
3184951Swnj COUNT(IP_FREEF);
3194495Swnj 
3204898Swnj 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
3214640Swnj 		m_freem(dtom(q));
3224640Swnj 	m = dtom(fp);
3234640Swnj 	fp = fp->next;
3244640Swnj 	remque(fp->prev);
3254907Swnj 	(void) m_free(m);
3264640Swnj 	return (fp);
3274495Swnj }
3284495Swnj 
3294640Swnj /*
3304640Swnj  * Put an ip fragment on a reassembly chain.
3314640Swnj  * Like insque, but pointers in middle of structure.
3324640Swnj  */
3334640Swnj ip_enq(p, prev)
3344898Swnj 	register struct ipasfrag *p, *prev;
3354495Swnj {
3364951Swnj 
3374640Swnj COUNT(IP_ENQ);
3384898Swnj 	p->ipf_prev = prev;
3394898Swnj 	p->ipf_next = prev->ipf_next;
3404898Swnj 	prev->ipf_next->ipf_prev = p;
3414898Swnj 	prev->ipf_next = p;
3424495Swnj }
3434495Swnj 
3444640Swnj /*
3454640Swnj  * To ip_enq as remque is to insque.
3464640Swnj  */
3474640Swnj ip_deq(p)
3484898Swnj 	register struct ipasfrag *p;
3494640Swnj {
3504951Swnj 
3514640Swnj COUNT(IP_DEQ);
3524898Swnj 	p->ipf_prev->ipf_next = p->ipf_next;
3534898Swnj 	p->ipf_next->ipf_prev = p->ipf_prev;
3544495Swnj }
3554495Swnj 
3564640Swnj /*
3574640Swnj  * IP timer processing;
3584640Swnj  * if a timer expires on a reassembly
3594640Swnj  * queue, discard it.
3604640Swnj  */
3614801Swnj ip_slowtimo()
3624495Swnj {
3634495Swnj 	register struct ipq *fp;
3644640Swnj 	int s = splnet();
3654951Swnj 
3664801Swnj COUNT(IP_SLOWTIMO);
3674644Swnj 	for (fp = ipq.next; fp != &ipq; )
3684640Swnj 		if (--fp->ipq_ttl == 0)
3694640Swnj 			fp = ip_freef(fp);
3704640Swnj 		else
3714640Swnj 			fp = fp->next;
3724640Swnj 	splx(s);
3734495Swnj }
3744495Swnj 
3754951Swnj /*
3764951Swnj  * Drain off all datagram fragments.
3774951Swnj  */
3784801Swnj ip_drain()
3794801Swnj {
3804801Swnj 
3814951Swnj COUNT(IP_DRAIN);
3824951Swnj 	while (ipq.next != &ipq)
3834951Swnj 		(void) ip_freef(ipq.next);
3844801Swnj }
3854923Swnj 
3864640Swnj /*
3874640Swnj  * Do option processing on a datagram,
3884640Swnj  * possibly discarding it if bad options
3894640Swnj  * are encountered.
3904640Swnj  */
3914640Swnj ip_dooptions(ip)
3924640Swnj 	struct ip *ip;
3934495Swnj {
3944640Swnj 	register u_char *cp;
3954907Swnj 	int opt, optlen, cnt;
3964923Swnj 	struct in_addr *sin;
3974801Swnj 	register struct ip_timestamp *ipt;
3984951Swnj 	register struct ifnet *ifp;
3994951Swnj 	struct in_addr t;
4004495Swnj 
4014951Swnj COUNT(IP_DOOPTIONS);
4024640Swnj 	cp = (u_char *)(ip + 1);
4034640Swnj 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
4044640Swnj 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
4054640Swnj 		opt = cp[0];
4064640Swnj 		if (opt == IPOPT_EOL)
4074640Swnj 			break;
4084640Swnj 		if (opt == IPOPT_NOP)
4094640Swnj 			optlen = 1;
4104640Swnj 		else
4114640Swnj 			optlen = cp[1];
4124640Swnj 		switch (opt) {
4134495Swnj 
4144640Swnj 		default:
4154640Swnj 			break;
4164495Swnj 
4174951Swnj 		/*
4184951Swnj 		 * Source routing with record.
4194951Swnj 		 * Find interface with current destination address.
4204951Swnj 		 * If none on this machine then drop if strictly routed,
4214951Swnj 		 * or do nothing if loosely routed.
4224951Swnj 		 * Record interface address and bring up next address
4234951Swnj 		 * component.  If strictly routed make sure next
4244951Swnj 		 * address on directly accessible net.
4254951Swnj 		 */
4264640Swnj 		case IPOPT_LSRR:
4274801Swnj 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
4284640Swnj 				break;
4294923Swnj 			sin = (struct in_addr *)(cp + cp[2]);
4304951Swnj 			ifp = if_ifwithaddr(*sin);
4314951Swnj 			if (ifp == 0) {
4324951Swnj 				if (opt == IPOPT_SSRR)
4334951Swnj 					goto bad;
4344951Swnj 				break;
4354640Swnj 			}
4364951Swnj 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
4374951Swnj 			cp[2] += 4;
4384951Swnj 			if (cp[2] > optlen - (sizeof (long) - 1))
4394951Swnj 				break;
4404951Swnj 			ip->ip_dst = sin[1];
4414951Swnj 			if (opt == IPOPT_SSRR && if_ifonnetof(ip->ip_dst)==0)
4424951Swnj 				goto bad;
4434640Swnj 			break;
4444495Swnj 
4454640Swnj 		case IPOPT_TS:
4464801Swnj 			ipt = (struct ip_timestamp *)cp;
4474801Swnj 			if (ipt->ipt_len < 5)
4484640Swnj 				goto bad;
4494801Swnj 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
4504801Swnj 				if (++ipt->ipt_oflw == 0)
4514640Swnj 					goto bad;
4524495Swnj 				break;
4534640Swnj 			}
4544923Swnj 			sin = (struct in_addr *)(cp+cp[2]);
4554801Swnj 			switch (ipt->ipt_flg) {
4564495Swnj 
4574640Swnj 			case IPOPT_TS_TSONLY:
4584640Swnj 				break;
4594640Swnj 
4604640Swnj 			case IPOPT_TS_TSANDADDR:
4614801Swnj 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
4624640Swnj 					goto bad;
4634951Swnj 				/* stamp with ``first'' interface address */
4644951Swnj 				*sin++ = ifnet->if_addr;
4654640Swnj 				break;
4664640Swnj 
4674640Swnj 			case IPOPT_TS_PRESPEC:
4684951Swnj 				if (if_ifwithaddr(*sin) == 0)
4694951Swnj 					continue;
4704801Swnj 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
4714640Swnj 					goto bad;
4724801Swnj 				ipt->ipt_ptr += 4;
4734640Swnj 				break;
4744640Swnj 
4754495Swnj 			default:
4764640Swnj 				goto bad;
4774495Swnj 			}
4784923Swnj 			*(n_time *)sin = iptime();
4794801Swnj 			ipt->ipt_ptr += 4;
4804640Swnj 		}
4814495Swnj 	}
4824907Swnj 	return;
4834640Swnj bad:
4844640Swnj 	/* SHOULD FORCE ICMP MESSAGE */
4854907Swnj 	return;
4864495Swnj }
4874495Swnj 
4884640Swnj /*
4894951Swnj  * Strip out IP options, at higher
4904951Swnj  * level protocol in the kernel.
4914951Swnj  * Second argument is buffer to which options
4924951Swnj  * will be moved, and return value is their length.
4934640Swnj  */
4944951Swnj ip_stripoptions(ip, cp)
4954640Swnj 	struct ip *ip;
4964951Swnj 	char *cp;
4974495Swnj {
4984640Swnj 	register int i;
4994640Swnj 	register struct mbuf *m;
5004640Swnj 	int olen;
5014951Swnj COUNT(IP_STRIPOPTIONS);
5024640Swnj 
5034640Swnj 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
5044951Swnj 	m = dtom(ip);
5054951Swnj 	ip++;
5064951Swnj 	if (cp)
5074951Swnj 		bcopy((caddr_t)ip, cp, (unsigned)olen);
5084640Swnj 	i = m->m_len - (sizeof (struct ip) + olen);
5094907Swnj 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
5104640Swnj 	m->m_len -= i;
5114495Swnj }
512