xref: /openbsd-src/sys/netinet/ip_mroute.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: ip_mroute.c,v 1.21 2001/06/23 16:15:56 fgsch Exp $	*/
2 /*	$NetBSD: ip_mroute.c,v 1.27 1996/05/07 02:40:50 thorpej Exp $	*/
3 
4 /*
5  * IP multicast forwarding procedures
6  *
7  * Written by David Waitzman, BBN Labs, August 1988.
8  * Modified by Steve Deering, Stanford, February 1989.
9  * Modified by Mark J. Steiglitz, Stanford, May, 1991
10  * Modified by Van Jacobson, LBL, January 1993
11  * Modified by Ajit Thyagarajan, PARC, August 1993
12  * Modified by Bill Fenner, PARC, April 1994
13  * Modified by Charles M. Hannum, NetBSD, May 1995.
14  *
15  * MROUTING Revision: 1.2
16  */
17 
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/mbuf.h>
21 #include <sys/socket.h>
22 #include <sys/socketvar.h>
23 #include <sys/protosw.h>
24 #include <sys/errno.h>
25 #include <sys/time.h>
26 #include <sys/kernel.h>
27 #include <sys/ioctl.h>
28 #include <sys/syslog.h>
29 #include <net/if.h>
30 #include <net/route.h>
31 #include <net/raw_cb.h>
32 #include <netinet/in.h>
33 #include <netinet/in_var.h>
34 #include <netinet/in_systm.h>
35 #include <netinet/ip.h>
36 #include <netinet/ip_var.h>
37 #include <netinet/in_pcb.h>
38 #include <netinet/udp.h>
39 #include <netinet/igmp.h>
40 #include <netinet/igmp_var.h>
41 #include <netinet/ip_mroute.h>
42 
43 #include <machine/stdarg.h>
44 
45 #define IP_MULTICASTOPTS 0
46 #define	M_PULLUP(m, len) \
47 	do { \
48 		if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \
49 			(m) = m_pullup((m), (len)); \
50 	} while (0)
51 
52 /*
53  * Globals.  All but ip_mrouter and ip_mrtproto could be static,
54  * except for netstat or debugging purposes.
55  */
56 struct socket  *ip_mrouter  = NULL;
57 int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
58 
59 #define NO_RTE_FOUND 	0x1
60 #define RTE_FOUND	0x2
61 
62 #define	MFCHASH(a, g) \
63 	((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
64 	  ((g) >> 20) ^ ((g) >> 10) ^ (g)) & mfchash)
65 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;
66 u_long	mfchash;
67 
68 u_char		nexpire[MFCTBLSIZ];
69 struct vif	viftable[MAXVIFS];
70 struct mrtstat	mrtstat;
71 u_int		mrtdebug = 0;	  /* debug level 	*/
72 #define		DEBUG_MFC	0x02
73 #define		DEBUG_FORWARD	0x04
74 #define		DEBUG_EXPIRE	0x08
75 #define		DEBUG_XMIT	0x10
76 u_int       	tbfdebug = 0;     /* tbf debug level 	*/
77 #ifdef RSVP_ISI
78 u_int		rsvpdebug = 0;	  /* rsvp debug level   */
79 extern struct socket *ip_rsvpd;
80 extern int rsvp_on;
81 #endif /* RSVP_ISI */
82 
83 #define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second */
84 #define		UPCALL_EXPIRE	6		/* number of timeouts */
85 
86 /*
87  * Define the token bucket filter structures
88  * qtable   -> each interface has an associated queue of pkts
89  */
90 
91 struct pkt_queue qtable[MAXVIFS][MAXQSIZE];
92 
93 static int get_sg_cnt __P((struct sioc_sg_req *));
94 static int get_vif_cnt __P((struct sioc_vif_req *));
95 static int ip_mrouter_init __P((struct socket *, struct mbuf *));
96 static int get_version __P((struct mbuf *));
97 static int set_assert __P((struct mbuf *));
98 static int get_assert __P((struct mbuf *));
99 static int add_vif __P((struct mbuf *));
100 static int del_vif __P((struct mbuf *));
101 static void update_mfc __P((struct mfcctl *, struct mfc *));
102 static void expire_mfc __P((struct mfc *));
103 static int add_mfc __P((struct mbuf *));
104 #ifdef UPCALL_TIMING
105 static void collate __P((struct timeval *));
106 #endif
107 static int del_mfc __P((struct mbuf *));
108 static int socket_send __P((struct socket *, struct mbuf *,
109 			    struct sockaddr_in *));
110 static void expire_upcalls __P((void *));
111 #ifdef RSVP_ISI
112 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *, vifi_t));
113 #else
114 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *));
115 #endif
116 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *));
117 static void encap_send __P((struct ip *, struct vif *, struct mbuf *));
118 static void tbf_control __P((struct vif *, struct mbuf *, struct ip *,
119 			     u_int32_t));
120 static void tbf_queue __P((struct vif *, struct mbuf *, struct ip *));
121 static void tbf_process_q __P((struct vif *));
122 static void tbf_dequeue __P((struct vif *, int));
123 static void tbf_reprocess_q __P((void *));
124 static int tbf_dq_sel __P((struct vif *, struct ip *));
125 static void tbf_send_packet __P((struct vif *, struct mbuf *));
126 static void tbf_update_tokens __P((struct vif *));
127 static int priority __P((struct vif *, struct ip *));
128 
129 /*
130  * 'Interfaces' associated with decapsulator (so we can tell
131  * packets that went through it from ones that get reflected
132  * by a broken gateway).  These interfaces are never linked into
133  * the system ifnet list & no routes point to them.  I.e., packets
134  * can't be sent this way.  They only exist as a placeholder for
135  * multicast source verification.
136  */
137 #if 0
138 struct ifnet multicast_decap_if[MAXVIFS];
139 #endif
140 
141 #define	ENCAP_TTL	64
142 #define	ENCAP_PROTO	IPPROTO_IPIP	/* 4 */
143 
144 /* prototype IP hdr for encapsulated packets */
145 struct ip multicast_encap_iphdr = {
146 #if BYTE_ORDER == LITTLE_ENDIAN
147 	sizeof(struct ip) >> 2, IPVERSION,
148 #else
149 	IPVERSION, sizeof(struct ip) >> 2,
150 #endif
151 	0,				/* tos */
152 	sizeof(struct ip),		/* total length */
153 	0,				/* id */
154 	0,				/* frag offset */
155 	ENCAP_TTL, ENCAP_PROTO,
156 	0,				/* checksum */
157 };
158 
159 /*
160  * Private variables.
161  */
162 static vifi_t	   numvifs = 0;
163 static int have_encap_tunnel = 0;
164 
165 /*
166  * one-back cache used by ipip_mroute_input to locate a tunnel's vif
167  * given a datagram's src ip address.
168  */
169 static u_int32_t last_encap_src;
170 static struct vif *last_encap_vif;
171 
172 /*
173  * whether or not special PIM assert processing is enabled.
174  */
175 static int pim_assert;
176 /*
177  * Rate limit for assert notification messages, in usec
178  */
179 #define ASSERT_MSG_TIME		3000000
180 
181 /*
182  * Find a route for a given origin IP address and Multicast group address
183  * Type of service parameter to be added in the future!!!
184  */
185 
186 #define MFCFIND(o, g, rt) { \
187 	register struct mfc *_rt; \
188 	(rt) = NULL; \
189 	++mrtstat.mrts_mfc_lookups; \
190 	for (_rt = mfchashtbl[MFCHASH(o, g)].lh_first; \
191 	     _rt; _rt = _rt->mfc_hash.le_next) { \
192 		if (_rt->mfc_origin.s_addr == (o) && \
193 		    _rt->mfc_mcastgrp.s_addr == (g) && \
194 		    _rt->mfc_stall == NULL) { \
195 			(rt) = _rt; \
196 			break; \
197 		} \
198 	} \
199 	if ((rt) == NULL) \
200 		++mrtstat.mrts_mfc_misses; \
201 }
202 
203 /*
204  * Macros to compute elapsed time efficiently
205  * Borrowed from Van Jacobson's scheduling code
206  */
207 #define TV_DELTA(a, b, delta) { \
208 	register int xxs; \
209 	delta = (a).tv_usec - (b).tv_usec; \
210 	xxs = (a).tv_sec - (b).tv_sec; \
211 	switch (xxs) { \
212 	case 2: \
213 		delta += 1000000; \
214 		/* fall through */ \
215 	case 1: \
216 		delta += 1000000; \
217 		/* fall through */ \
218 	case 0: \
219 		break; \
220 	default: \
221 		delta += (1000000 * xxs); \
222 		break; \
223 	} \
224 }
225 
226 #ifdef UPCALL_TIMING
227 u_int32_t upcall_data[51];
228 #endif /* UPCALL_TIMING */
229 
230 /*
231  * Handle MRT setsockopt commands to modify the multicast routing tables.
232  */
233 int
234 ip_mrouter_set(cmd, so, m)
235 	int cmd;
236 	struct socket *so;
237 	struct mbuf **m;
238 {
239 	int error;
240 
241 	if (cmd != MRT_INIT && so != ip_mrouter)
242 		error = EACCES;
243 	else
244 		switch (cmd) {
245 		case MRT_INIT:
246 			error = ip_mrouter_init(so, *m);
247 			break;
248 		case MRT_DONE:
249 			error = ip_mrouter_done();
250 			break;
251 		case MRT_ADD_VIF:
252 			error = add_vif(*m);
253 			break;
254 		case MRT_DEL_VIF:
255 			error = del_vif(*m);
256 			break;
257 		case MRT_ADD_MFC:
258 			error = add_mfc(*m);
259 			break;
260 		case MRT_DEL_MFC:
261 			error = del_mfc(*m);
262 			break;
263 		case MRT_ASSERT:
264 			error = set_assert(*m);
265 			break;
266 		default:
267 			error = EOPNOTSUPP;
268 			break;
269 		}
270 
271 	if (*m)
272 		m_free(*m);
273 	return (error);
274 }
275 
276 /*
277  * Handle MRT getsockopt commands
278  */
279 int
280 ip_mrouter_get(cmd, so, m)
281 	int cmd;
282 	struct socket *so;
283 	struct mbuf **m;
284 {
285 	struct mbuf *mb;
286 	int error;
287 
288 	if (so != ip_mrouter)
289 		error = EACCES;
290 	else {
291 		*m = mb = m_get(M_WAIT, MT_SOOPTS);
292 
293 		switch (cmd) {
294 		case MRT_VERSION:
295 			error = get_version(mb);
296 			break;
297 		case MRT_ASSERT:
298 			error = get_assert(mb);
299 			break;
300 		default:
301 			error = EOPNOTSUPP;
302 			break;
303 		}
304 
305 		if (error)
306 			m_free(mb);
307 	}
308 
309 	return (error);
310 }
311 
312 /*
313  * Handle ioctl commands to obtain information from the cache
314  */
315 int
316 mrt_ioctl(cmd, data)
317 	u_long cmd;
318 	caddr_t data;
319 {
320 	int error;
321 
322 	switch (cmd) {
323 	case SIOCGETVIFCNT:
324 		error = get_vif_cnt((struct sioc_vif_req *)data);
325 		break;
326 	case SIOCGETSGCNT:
327 		error = get_sg_cnt((struct sioc_sg_req *)data);
328 		break;
329 	default:
330 		error = EINVAL;
331 		break;
332 	}
333 
334 	return (error);
335 }
336 
337 /*
338  * returns the packet, byte, rpf-failure count for the source group provided
339  */
340 static int
341 get_sg_cnt(req)
342 	register struct sioc_sg_req *req;
343 {
344 	register struct mfc *rt;
345 	int s;
346 
347 	s = splsoftnet();
348 	MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
349 	splx(s);
350 	if (rt != NULL) {
351 		req->pktcnt = rt->mfc_pkt_cnt;
352 		req->bytecnt = rt->mfc_byte_cnt;
353 		req->wrong_if = rt->mfc_wrong_if;
354 	} else
355 		req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
356 
357 	return (0);
358 }
359 
360 /*
361  * returns the input and output packet and byte counts on the vif provided
362  */
363 static int
364 get_vif_cnt(req)
365 	register struct sioc_vif_req *req;
366 {
367 	register vifi_t vifi = req->vifi;
368 
369 	if (vifi >= numvifs)
370 		return (EINVAL);
371 
372 	req->icount = viftable[vifi].v_pkt_in;
373 	req->ocount = viftable[vifi].v_pkt_out;
374 	req->ibytes = viftable[vifi].v_bytes_in;
375 	req->obytes = viftable[vifi].v_bytes_out;
376 
377 	return (0);
378 }
379 
380 /*
381  * Enable multicast routing
382  */
383 static int
384 ip_mrouter_init(so, m)
385 	struct socket *so;
386 	struct mbuf *m;
387 {
388 	int *v;
389 
390 	if (mrtdebug)
391 		log(LOG_DEBUG,
392 		    "ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
393 		    so->so_type, so->so_proto->pr_protocol);
394 
395 	if (so->so_type != SOCK_RAW ||
396 	    so->so_proto->pr_protocol != IPPROTO_IGMP)
397 		return (EOPNOTSUPP);
398 
399 	if (m == 0 || m->m_len < sizeof(int))
400 		return (EINVAL);
401 
402 	v = mtod(m, int *);
403 	if (*v != 1)
404 		return (EINVAL);
405 
406 	if (ip_mrouter != NULL)
407 		return (EADDRINUSE);
408 
409 	ip_mrouter = so;
410 
411 	mfchashtbl = hashinit(MFCTBLSIZ, M_MRTABLE, M_WAITOK, &mfchash);
412 	bzero((caddr_t)nexpire, sizeof(nexpire));
413 
414 	pim_assert = 0;
415 
416 	timeout(expire_upcalls, (caddr_t)0, EXPIRE_TIMEOUT);
417 
418 	if (mrtdebug)
419 		log(LOG_DEBUG, "ip_mrouter_init\n");
420 
421 	return (0);
422 }
423 
424 /*
425  * Disable multicast routing
426  */
427 int
428 ip_mrouter_done()
429 {
430 	vifi_t vifi;
431 	register struct vif *vifp;
432 	int i;
433 	int s;
434 
435 	s = splsoftnet();
436 
437 	/* Clear out all the vifs currently in use. */
438 	for (vifi = 0; vifi < numvifs; vifi++) {
439 		vifp = &viftable[vifi];
440 		if (vifp->v_lcl_addr.s_addr != 0)
441 			reset_vif(vifp);
442 	}
443 
444 	bzero((caddr_t)qtable, sizeof(qtable));
445 	numvifs = 0;
446 	pim_assert = 0;
447 
448 	untimeout(expire_upcalls, (caddr_t)NULL);
449 
450 	/*
451 	 * Free all multicast forwarding cache entries.
452 	 */
453 	for (i = 0; i < MFCTBLSIZ; i++) {
454 		register struct mfc *rt, *nrt;
455 
456 		for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) {
457 			nrt = rt->mfc_hash.le_next;
458 
459 			expire_mfc(rt);
460 		}
461 	}
462 
463 	free(mfchashtbl, M_MRTABLE);
464 	mfchashtbl = 0;
465 
466 	/* Reset de-encapsulation cache. */
467 	have_encap_tunnel = 0;
468 
469 	ip_mrouter = NULL;
470 
471 	splx(s);
472 
473 	if (mrtdebug)
474 		log(LOG_DEBUG, "ip_mrouter_done\n");
475 
476 	return (0);
477 }
478 
479 static int
480 get_version(m)
481 	struct mbuf *m;
482 {
483 	int *v = mtod(m, int *);
484 
485 	*v = 0x0305;	/* XXX !!!! */
486 	m->m_len = sizeof(int);
487 	return (0);
488 }
489 
490 /*
491  * Set PIM assert processing global
492  */
493 static int
494 set_assert(m)
495 	struct mbuf *m;
496 {
497 	int *i;
498 
499 	if (m == 0 || m->m_len < sizeof(int))
500 		return (EINVAL);
501 
502 	i = mtod(m, int *);
503 	pim_assert = !!*i;
504 	return (0);
505 }
506 
507 /*
508  * Get PIM assert processing global
509  */
510 static int
511 get_assert(m)
512 	struct mbuf *m;
513 {
514 	int *i = mtod(m, int *);
515 
516 	*i = pim_assert;
517 	m->m_len = sizeof(int);
518 	return (0);
519 }
520 
521 static struct sockaddr_in sin = { sizeof(sin), AF_INET };
522 
523 /*
524  * Add a vif to the vif table
525  */
526 static int
527 add_vif(m)
528 	struct mbuf *m;
529 {
530 	register struct vifctl *vifcp;
531 	register struct vif *vifp;
532 	struct ifaddr *ifa;
533 	struct ifnet *ifp;
534 	struct ifreq ifr;
535 	int error, s;
536 
537 	if (m == 0 || m->m_len < sizeof(struct vifctl))
538 		return (EINVAL);
539 
540 	vifcp = mtod(m, struct vifctl *);
541 	if (vifcp->vifc_vifi >= MAXVIFS)
542 		return (EINVAL);
543 
544 	vifp = &viftable[vifcp->vifc_vifi];
545 	if (vifp->v_lcl_addr.s_addr != 0)
546 		return (EADDRINUSE);
547 
548 	/* Find the interface with an address in AF_INET family. */
549 	sin.sin_addr = vifcp->vifc_lcl_addr;
550 	ifa = ifa_ifwithaddr(sintosa(&sin));
551 	if (ifa == 0)
552 		return (EADDRNOTAVAIL);
553 
554 	if (vifcp->vifc_flags & VIFF_TUNNEL) {
555 		if (vifcp->vifc_flags & VIFF_SRCRT) {
556 			log(LOG_ERR, "Source routed tunnels not supported.\n");
557 			return (EOPNOTSUPP);
558 		}
559 
560 		/* Create a fake encapsulation interface. */
561 		ifp = (struct ifnet *)malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK);
562 		bzero(ifp, sizeof(*ifp));
563 		sprintf(ifp->if_xname, "mdecap%d", vifcp->vifc_vifi);
564 
565 		/* Prepare cached route entry. */
566 		bzero(&vifp->v_route, sizeof(vifp->v_route));
567 
568 		/*
569 		 * Tell ipip_mroute_input() to start looking at
570 		 * encapsulated packets.
571 		 */
572 		have_encap_tunnel = 1;
573 	} else {
574 		/* Use the physical interface associated with the address. */
575 		ifp = ifa->ifa_ifp;
576 
577 		/* Make sure the interface supports multicast. */
578 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
579 			return (EOPNOTSUPP);
580 
581 		/* Enable promiscuous reception of all IP multicasts. */
582 		satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
583 		satosin(&ifr.ifr_addr)->sin_family = AF_INET;
584 		satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
585 		error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
586 		if (error)
587 			return (error);
588 	}
589 
590 	s = splsoftnet();
591 	/* Define parameters for the tbf structure. */
592 	vifp->v_tbf.q_len = 0;
593 	vifp->v_tbf.n_tok = 0;
594 	vifp->v_tbf.last_pkt_t = 0;
595 
596 	vifp->v_flags = vifcp->vifc_flags;
597 	vifp->v_threshold = vifcp->vifc_threshold;
598 	vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
599 	vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
600 	vifp->v_ifp = ifp;
601 	vifp->v_rate_limit = vifcp->vifc_rate_limit;
602 #ifdef RSVP_ISI
603 	vifp->v_rsvp_on = 0;
604 	vifp->v_rsvpd = NULL;
605 #endif /* RSVP_ISI */
606 	/* Initialize per vif pkt counters. */
607 	vifp->v_pkt_in = 0;
608 	vifp->v_pkt_out = 0;
609 	vifp->v_bytes_in = 0;
610 	vifp->v_bytes_out = 0;
611 	splx(s);
612 
613 	/* Adjust numvifs up if the vifi is higher than numvifs. */
614 	if (numvifs <= vifcp->vifc_vifi)
615 		numvifs = vifcp->vifc_vifi + 1;
616 
617 	if (mrtdebug)
618 		log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n",
619 		    vifcp->vifc_vifi,
620 		    ntohl(vifcp->vifc_lcl_addr.s_addr),
621 		    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
622 		    ntohl(vifcp->vifc_rmt_addr.s_addr),
623 		    vifcp->vifc_threshold,
624 		    vifcp->vifc_rate_limit);
625 
626 	return (0);
627 }
628 
629 void
630 reset_vif(vifp)
631 	register struct vif *vifp;
632 {
633 	struct ifnet *ifp;
634 	struct ifreq ifr;
635 
636 	if (vifp->v_flags & VIFF_TUNNEL) {
637 		free(vifp->v_ifp, M_MRTABLE);
638 		if (vifp == last_encap_vif) {
639 			last_encap_vif = 0;
640 			last_encap_src = 0;
641 		}
642 	} else {
643 		satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
644 		satosin(&ifr.ifr_addr)->sin_family = AF_INET;
645 		satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
646 		ifp = vifp->v_ifp;
647 		(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
648 	}
649 	bzero((caddr_t)vifp, sizeof(*vifp));
650 }
651 
652 /*
653  * Delete a vif from the vif table
654  */
655 static int
656 del_vif(m)
657 	struct mbuf *m;
658 {
659 	vifi_t *vifip;
660 	register struct vif *vifp;
661 	register vifi_t vifi;
662 	int s;
663 
664 	if (m == 0 || m->m_len < sizeof(vifi_t))
665 		return (EINVAL);
666 
667 	vifip = mtod(m, vifi_t *);
668 	if (*vifip >= numvifs)
669 		return (EINVAL);
670 
671 	vifp = &viftable[*vifip];
672 	if (vifp->v_lcl_addr.s_addr == 0)
673 		return (EADDRNOTAVAIL);
674 
675 	s = splsoftnet();
676 
677 	reset_vif(vifp);
678 
679 	bzero((caddr_t)qtable[*vifip], sizeof(qtable[*vifip]));
680 
681 	/* Adjust numvifs down */
682 	for (vifi = numvifs; vifi > 0; vifi--)
683 		if (viftable[vifi-1].v_lcl_addr.s_addr != 0)
684 			break;
685 	numvifs = vifi;
686 
687 	splx(s);
688 
689 	if (mrtdebug)
690 		log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs);
691 
692 	return (0);
693 }
694 
695 void
696 vif_delete(ifp)
697 	struct ifnet *ifp;
698 {
699 	int i;
700 	struct vif *vifp;
701 
702 	for (i = 0; i < numvifs; i++) {
703 		vifp = &viftable[i];
704 		if (vifp->v_ifp == ifp)
705 			bzero((caddr_t)vifp, sizeof *vifp);
706 	}
707 
708 	for (i = numvifs; i > 0; i--)
709 		if (viftable[i - 1].v_lcl_addr.s_addr != 0)
710 			break;
711 	numvifs = i;
712 }
713 
714 static void
715 update_mfc(mfccp, rt)
716 	struct mfcctl *mfccp;
717 	struct mfc *rt;
718 {
719 	vifi_t vifi;
720 
721 	rt->mfc_parent = mfccp->mfcc_parent;
722 	for (vifi = 0; vifi < numvifs; vifi++)
723 		rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi];
724 	rt->mfc_expire = 0;
725 	rt->mfc_stall = 0;
726 }
727 
728 static void
729 expire_mfc(rt)
730 	struct mfc *rt;
731 {
732 	struct rtdetq *rte, *nrte;
733 
734 	for (rte = rt->mfc_stall; rte != NULL; rte = nrte) {
735 		nrte = rte->next;
736 		m_freem(rte->m);
737 		free(rte, M_MRTABLE);
738 	}
739 
740 	LIST_REMOVE(rt, mfc_hash);
741 	free(rt, M_MRTABLE);
742 }
743 
744 /*
745  * Add an mfc entry
746  */
747 static int
748 add_mfc(m)
749 	struct mbuf *m;
750 {
751 	struct mfcctl *mfccp;
752 	struct mfc *rt;
753 	u_int32_t hash = 0;
754 	struct rtdetq *rte, *nrte;
755 	register u_short nstl;
756 	int s;
757 
758 	if (m == 0 || m->m_len < sizeof(struct mfcctl))
759 		return (EINVAL);
760 
761 	mfccp = mtod(m, struct mfcctl *);
762 
763 	s = splsoftnet();
764 	MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
765 
766 	/* If an entry already exists, just update the fields */
767 	if (rt) {
768 		if (mrtdebug & DEBUG_MFC)
769 			log(LOG_DEBUG,"add_mfc update o %x g %x p %x\n",
770 			    ntohl(mfccp->mfcc_origin.s_addr),
771 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
772 			    mfccp->mfcc_parent);
773 
774 		if (rt->mfc_expire)
775 			nexpire[hash]--;
776 
777 		update_mfc(mfccp, rt);
778 
779 		splx(s);
780 		return (0);
781 	}
782 
783 	/*
784 	 * Find the entry for which the upcall was made and update
785 	 */
786 	nstl = 0;
787 	hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
788 	for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) {
789 		if (rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr &&
790 		    rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr &&
791 		    rt->mfc_stall != NULL) {
792 			if (nstl++)
793 				log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p\n",
794 				    "multiple kernel entries",
795 				    ntohl(mfccp->mfcc_origin.s_addr),
796 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
797 				    mfccp->mfcc_parent, rt->mfc_stall);
798 
799 			if (mrtdebug & DEBUG_MFC)
800 				log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %p\n",
801 				    ntohl(mfccp->mfcc_origin.s_addr),
802 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
803 				    mfccp->mfcc_parent, rt->mfc_stall);
804 
805 			if (rt->mfc_expire)
806 				nexpire[hash]--;
807 
808 			/* free packets Qed at the end of this entry */
809 			for (rte = rt->mfc_stall; rte != NULL; rte = nrte) {
810 				nrte = rte->next;
811 #ifdef RSVP_ISI
812 				ip_mdq(rte->m, rte->ifp, rt, -1);
813 #else
814 				ip_mdq(rte->m, rte->ifp, rt);
815 #endif /* RSVP_ISI */
816 				m_freem(rte->m);
817 #ifdef UPCALL_TIMING
818 				collate(&rte->t);
819 #endif /* UPCALL_TIMING */
820 				free(rte, M_MRTABLE);
821 			}
822 
823 			update_mfc(mfccp, rt);
824 		}
825 	}
826 
827 	if (nstl == 0) {
828 		/*
829 		 * No mfc; make a new one
830 		 */
831 		if (mrtdebug & DEBUG_MFC)
832 			log(LOG_DEBUG,"add_mfc no upcall o %x g %x p %x\n",
833 			    ntohl(mfccp->mfcc_origin.s_addr),
834 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
835 			    mfccp->mfcc_parent);
836 
837 		rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
838 		if (rt == NULL) {
839 			splx(s);
840 			return (ENOBUFS);
841 		}
842 
843 		rt->mfc_origin = mfccp->mfcc_origin;
844 		rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
845 		/* initialize pkt counters per src-grp */
846 		rt->mfc_pkt_cnt = 0;
847 		rt->mfc_byte_cnt = 0;
848 		rt->mfc_wrong_if = 0;
849 		timerclear(&rt->mfc_last_assert);
850 		update_mfc(mfccp, rt);
851 
852 		/* insert new entry at head of hash chain */
853 		LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
854 	}
855 
856 	splx(s);
857 	return (0);
858 }
859 
860 #ifdef UPCALL_TIMING
861 /*
862  * collect delay statistics on the upcalls
863  */
864 static void collate(t)
865 register struct timeval *t;
866 {
867     register u_int32_t d;
868     register struct timeval tp;
869     register u_int32_t delta;
870 
871     microtime(&tp);
872 
873     if (timercmp(t, &tp, <)) {
874 	TV_DELTA(tp, *t, delta);
875 
876 	d = delta >> 10;
877 	if (d > 50)
878 	    d = 50;
879 
880 	++upcall_data[d];
881     }
882 }
883 #endif /* UPCALL_TIMING */
884 
885 /*
886  * Delete an mfc entry
887  */
888 static int
889 del_mfc(m)
890 	struct mbuf *m;
891 {
892 	struct mfcctl *mfccp;
893 	struct mfc *rt;
894 	int s;
895 
896 	if (m == 0 || m->m_len < sizeof(struct mfcctl))
897 		return (EINVAL);
898 
899 	mfccp = mtod(m, struct mfcctl *);
900 
901 	if (mrtdebug & DEBUG_MFC)
902 		log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x\n",
903 		    ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr));
904 
905 	s = splsoftnet();
906 
907 	MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
908 	if (rt == NULL) {
909 		splx(s);
910 		return (EADDRNOTAVAIL);
911 	}
912 
913 	LIST_REMOVE(rt, mfc_hash);
914 	free(rt, M_MRTABLE);
915 
916 	splx(s);
917 	return (0);
918 }
919 
920 static int
921 socket_send(s, mm, src)
922     struct socket *s;
923     struct mbuf *mm;
924     struct sockaddr_in *src;
925 {
926     if (s) {
927 	if (sbappendaddr(&s->so_rcv, sintosa(src), mm, (struct mbuf *)0) != 0) {
928 	    sorwakeup(s);
929 	    return (0);
930 	}
931     }
932     m_freem(mm);
933     return (-1);
934 }
935 
936 /*
937  * IP multicast forwarding function. This function assumes that the packet
938  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
939  * pointed to by "ifp", and the packet is to be relayed to other networks
940  * that have members of the packet's destination IP multicast group.
941  *
942  * The packet is returned unscathed to the caller, unless it is
943  * erroneous, in which case a non-zero return value tells the caller to
944  * discard it.
945  */
946 
947 #define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
948 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
949 
950 int
951 #ifdef RSVP_ISI
952 ip_mforward(m, ifp, imo)
953 #else
954 ip_mforward(m, ifp)
955 #endif /* RSVP_ISI */
956     struct mbuf *m;
957     struct ifnet *ifp;
958 #ifdef RSVP_ISI
959     struct ip_moptions *imo;
960 #endif /* RSVP_ISI */
961 {
962     register struct ip *ip = mtod(m, struct ip *);
963     register struct mfc *rt;
964     register u_char *ipoptions;
965     static int srctun = 0;
966     register struct mbuf *mm;
967     int s;
968 #ifdef RSVP_ISI
969     register struct vif *vifp;
970     vifi_t vifi;
971 #endif /* RSVP_ISI */
972 
973     if (mrtdebug & DEBUG_FORWARD)
974 	log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n",
975 	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
976 
977     if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
978 	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) {
979 	/*
980 	 * Packet arrived via a physical interface or
981 	 * an encapuslated tunnel.
982 	 */
983     } else {
984 	/*
985 	 * Packet arrived through a source-route tunnel.
986 	 * Source-route tunnels are no longer supported.
987 	 */
988 	if ((srctun++ % 1000) == 0)
989 	    log(LOG_ERR, "ip_mforward: received source-routed packet from %x\n",
990 		ntohl(ip->ip_src.s_addr));
991 
992 	return (1);
993     }
994 
995 #ifdef RSVP_ISI
996     if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
997 	if (ip->ip_ttl < 255)
998 	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
999 	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1000 	    vifp = viftable + vifi;
1001 	    printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n",
1002 		ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi,
1003 		(vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1004 		vifp->v_ifp->if_xname);
1005 	}
1006 	return (ip_mdq(m, ifp, rt, vifi));
1007     }
1008     if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1009 	printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
1010 	    ntohl(ip->ip_src), ntohl(ip->ip_dst));
1011     }
1012 #endif /* RSVP_ISI */
1013 
1014     /*
1015      * Don't forward a packet with time-to-live of zero or one,
1016      * or a packet destined to a local-only group.
1017      */
1018     if (ip->ip_ttl <= 1 ||
1019 	IN_LOCAL_GROUP(ip->ip_dst.s_addr))
1020 	return (0);
1021 
1022     /*
1023      * Determine forwarding vifs from the forwarding cache table
1024      */
1025     s = splsoftnet();
1026     MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1027 
1028     /* Entry exists, so forward if necessary */
1029     if (rt != NULL) {
1030 	splx(s);
1031 #ifdef RSVP_ISI
1032 	return (ip_mdq(m, ifp, rt, -1));
1033 #else
1034 	return (ip_mdq(m, ifp, rt));
1035 #endif /* RSVP_ISI */
1036     } else {
1037 	/*
1038 	 * If we don't have a route for packet's origin,
1039 	 * Make a copy of the packet &
1040 	 * send message to routing daemon
1041 	 */
1042 
1043 	register struct mbuf *mb0;
1044 	register struct rtdetq *rte;
1045 	register u_int32_t hash;
1046 #ifdef UPCALL_TIMING
1047 	struct timeval tp;
1048 
1049 	microtime(&tp);
1050 #endif /* UPCALL_TIMING */
1051 
1052 	mrtstat.mrts_no_route++;
1053 	if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1054 	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n",
1055 		ntohl(ip->ip_src.s_addr),
1056 		ntohl(ip->ip_dst.s_addr));
1057 
1058 	/*
1059 	 * Allocate mbufs early so that we don't do extra work if we are
1060 	 * just going to fail anyway.
1061 	 */
1062 	rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT);
1063 	if (rte == NULL) {
1064 	    splx(s);
1065 	    return (ENOBUFS);
1066 	}
1067 	mb0 = m_copy(m, 0, M_COPYALL);
1068 	if (mb0 == NULL) {
1069 	    free(rte, M_MRTABLE);
1070 	    splx(s);
1071 	    return (ENOBUFS);
1072 	}
1073 
1074 	/* is there an upcall waiting for this packet? */
1075 	hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1076 	for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) {
1077 	    if (ip->ip_src.s_addr == rt->mfc_origin.s_addr &&
1078 		ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr &&
1079 		rt->mfc_stall != NULL)
1080 		break;
1081 	}
1082 
1083 	if (rt == NULL) {
1084 	    int hlen = ip->ip_hl << 2;
1085 	    int i;
1086 	    struct igmpmsg *im;
1087 
1088 	    /* no upcall, so make a new entry */
1089 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1090 	    if (rt == NULL) {
1091 		free(rte, M_MRTABLE);
1092 		m_free(mb0);
1093 		splx(s);
1094 		return (ENOBUFS);
1095 	    }
1096 	    /* Make a copy of the header to send to the user level process */
1097 	    mm = m_copy(m, 0, hlen);
1098 	    M_PULLUP(mm, hlen);
1099 	    if (mm == NULL) {
1100 		free(rte, M_MRTABLE);
1101 		m_free(mb0);
1102 		free(rt, M_MRTABLE);
1103 		splx(s);
1104 		return (ENOBUFS);
1105 	    }
1106 
1107 	    /*
1108 	     * Send message to routing daemon to install
1109 	     * a route into the kernel table
1110 	     */
1111 	    sin.sin_addr = ip->ip_src;
1112 
1113 	    im = mtod(mm, struct igmpmsg *);
1114 	    im->im_msgtype	= IGMPMSG_NOCACHE;
1115 	    im->im_mbz		= 0;
1116 
1117 	    mrtstat.mrts_upcalls++;
1118 
1119 	    if (socket_send(ip_mrouter, mm, &sin) < 0) {
1120 		log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
1121 		++mrtstat.mrts_upq_sockfull;
1122 		free(rte, M_MRTABLE);
1123 		m_free(mb0);
1124 		free(rt, M_MRTABLE);
1125 		splx(s);
1126 		return (ENOBUFS);
1127 	    }
1128 
1129 	    /* insert new entry at head of hash chain */
1130 	    rt->mfc_origin = ip->ip_src;
1131 	    rt->mfc_mcastgrp = ip->ip_dst;
1132 	    rt->mfc_pkt_cnt = 0;
1133 	    rt->mfc_byte_cnt = 0;
1134 	    rt->mfc_wrong_if = 0;
1135 	    rt->mfc_expire = UPCALL_EXPIRE;
1136 	    nexpire[hash]++;
1137 	    for (i = 0; i < numvifs; i++)
1138 		rt->mfc_ttls[i] = 0;
1139 	    rt->mfc_parent = -1;
1140 
1141 	    /* link into table */
1142 	    LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
1143 	    /* Add this entry to the end of the queue */
1144 	    rt->mfc_stall = rte;
1145 	} else {
1146 	    /* determine if q has overflowed */
1147 	    struct rtdetq **p;
1148 	    register int npkts = 0;
1149 
1150 	    for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
1151 		if (++npkts > MAX_UPQ) {
1152 		    mrtstat.mrts_upq_ovflw++;
1153 		    free(rte, M_MRTABLE);
1154 		    m_free(mb0);
1155 		    splx(s);
1156 		    return (0);
1157 	        }
1158 
1159 	    /* Add this entry to the end of the queue */
1160 	    *p = rte;
1161 	}
1162 
1163 	rte->next		= NULL;
1164 	rte->m 			= mb0;
1165 	rte->ifp 		= ifp;
1166 #ifdef UPCALL_TIMING
1167 	rte->t			= tp;
1168 #endif /* UPCALL_TIMING */
1169 
1170 
1171 	splx(s);
1172 
1173 	return (0);
1174     }
1175 }
1176 
1177 
1178 /*ARGSUSED*/
1179 static void
1180 expire_upcalls(v)
1181 	void *v;
1182 {
1183 	int i;
1184 	int s;
1185 
1186 	s = splsoftnet();
1187 
1188 	for (i = 0; i < MFCTBLSIZ; i++) {
1189 		register struct mfc *rt, *nrt;
1190 
1191 		if (nexpire[i] == 0)
1192 			continue;
1193 
1194 		for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) {
1195 			nrt = rt->mfc_hash.le_next;
1196 
1197 			if (rt->mfc_expire == 0 ||
1198 			    --rt->mfc_expire > 0)
1199 				continue;
1200 			nexpire[i]--;
1201 
1202 			++mrtstat.mrts_cache_cleanups;
1203 			if (mrtdebug & DEBUG_EXPIRE)
1204 				log(LOG_DEBUG,
1205 				    "expire_upcalls: expiring (%x %x)\n",
1206 				    ntohl(rt->mfc_origin.s_addr),
1207 				    ntohl(rt->mfc_mcastgrp.s_addr));
1208 
1209 			expire_mfc(rt);
1210 		}
1211 	}
1212 
1213 	splx(s);
1214 	timeout(expire_upcalls, (caddr_t)0, EXPIRE_TIMEOUT);
1215 }
1216 
1217 /*
1218  * Packet forwarding routine once entry in the cache is made
1219  */
1220 static int
1221 #ifdef RSVP_ISI
1222 ip_mdq(m, ifp, rt, xmt_vif)
1223 #else
1224 ip_mdq(m, ifp, rt)
1225 #endif /* RSVP_ISI */
1226     register struct mbuf *m;
1227     register struct ifnet *ifp;
1228     register struct mfc *rt;
1229 #ifdef RSVP_ISI
1230     register vifi_t xmt_vif;
1231 #endif /* RSVP_ISI */
1232 {
1233     register struct ip  *ip = mtod(m, struct ip *);
1234     register vifi_t vifi;
1235     register struct vif *vifp;
1236     register int plen = ip->ip_len;
1237 
1238 /*
1239  * Macro to send packet on vif.  Since RSVP packets don't get counted on
1240  * input, they shouldn't get counted on output, so statistics keeping is
1241  * separate.
1242  */
1243 #define MC_SEND(ip,vifp,m) {                             \
1244                 if ((vifp)->v_flags & VIFF_TUNNEL)	 \
1245                     encap_send((ip), (vifp), (m));       \
1246                 else                                     \
1247                     phyint_send((ip), (vifp), (m));      \
1248 }
1249 
1250 #ifdef RSVP_ISI
1251     /*
1252      * If xmt_vif is not -1, send on only the requested vif.
1253      *
1254      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.
1255      */
1256     if (xmt_vif < numvifs) {
1257         MC_SEND(ip, viftable + xmt_vif, m);
1258 	return (1);
1259     }
1260 #endif /* RSVP_ISI */
1261 
1262     /*
1263      * Don't forward if it didn't arrive from the parent vif for its origin.
1264      */
1265     vifi = rt->mfc_parent;
1266     if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1267 	/* came in the wrong interface */
1268 	if (mrtdebug & DEBUG_FORWARD)
1269 	    log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1270 		ifp, vifi, vifi >= numvifs ? 0 : viftable[vifi].v_ifp);
1271 	++mrtstat.mrts_wrong_if;
1272 	++rt->mfc_wrong_if;
1273 	/*
1274 	 * If we are doing PIM assert processing, and we are forwarding
1275 	 * packets on this interface, and it is a broadcast medium
1276 	 * interface (and not a tunnel), send a message to the routing daemon.
1277 	 */
1278 	if (pim_assert && rt->mfc_ttls[vifi] &&
1279 		(ifp->if_flags & IFF_BROADCAST) &&
1280 		!(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1281 	    struct mbuf *mm;
1282 	    struct igmpmsg *im;
1283 	    int hlen = ip->ip_hl << 2;
1284 	    struct timeval now;
1285 	    register u_int32_t delta;
1286 
1287 	    microtime(&now);
1288 
1289 	    TV_DELTA(rt->mfc_last_assert, now, delta);
1290 
1291 	    if (delta > ASSERT_MSG_TIME) {
1292 		mm = m_copy(m, 0, hlen);
1293 		M_PULLUP(mm, hlen);
1294 		if (mm == NULL) {
1295 		    return (ENOBUFS);
1296 		}
1297 
1298 		rt->mfc_last_assert = now;
1299 
1300 		im = mtod(mm, struct igmpmsg *);
1301 		im->im_msgtype	= IGMPMSG_WRONGVIF;
1302 		im->im_mbz	= 0;
1303 		im->im_vif	= vifi;
1304 
1305 		sin.sin_addr = im->im_src;
1306 
1307 		socket_send(ip_mrouter, m, &sin);
1308 	    }
1309 	}
1310 	return (0);
1311     }
1312 
1313     /* If I sourced this packet, it counts as output, else it was input. */
1314     if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1315 	viftable[vifi].v_pkt_out++;
1316 	viftable[vifi].v_bytes_out += plen;
1317     } else {
1318 	viftable[vifi].v_pkt_in++;
1319 	viftable[vifi].v_bytes_in += plen;
1320     }
1321     rt->mfc_pkt_cnt++;
1322     rt->mfc_byte_cnt += plen;
1323 
1324     /*
1325      * For each vif, decide if a copy of the packet should be forwarded.
1326      * Forward if:
1327      *		- the ttl exceeds the vif's threshold
1328      *		- there are group members downstream on interface
1329      */
1330     for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1331 	if ((rt->mfc_ttls[vifi] > 0) &&
1332 	    (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1333 	    vifp->v_pkt_out++;
1334 	    vifp->v_bytes_out += plen;
1335 	    MC_SEND(ip, vifp, m);
1336 	}
1337 
1338     return (0);
1339 }
1340 
1341 #ifdef RSVP_ISI
1342 /*
1343  * check if a vif number is legal/ok. This is used by ip_output, to export
1344  * numvifs there,
1345  */
1346 int
1347 legal_vif_num(vif)
1348     int vif;
1349 {
1350     if (vif >= 0 && vif < numvifs)
1351        return (1);
1352     else
1353        return (0);
1354 }
1355 #endif /* RSVP_ISI */
1356 
1357 static void
1358 phyint_send(ip, vifp, m)
1359 	struct ip *ip;
1360 	struct vif *vifp;
1361 	struct mbuf *m;
1362 {
1363 	register struct mbuf *mb_copy;
1364 	register int hlen = ip->ip_hl << 2;
1365 
1366 	/*
1367 	 * Make a new reference to the packet; make sure that
1368 	 * the IP header is actually copied, not just referenced,
1369 	 * so that ip_output() only scribbles on the copy.
1370 	 */
1371 	mb_copy = m_copy(m, 0, M_COPYALL);
1372 	M_PULLUP(mb_copy, hlen);
1373 	if (mb_copy == NULL)
1374 		return;
1375 
1376 	if (vifp->v_rate_limit <= 0)
1377 		tbf_send_packet(vifp, mb_copy);
1378 	else
1379 		tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
1380 }
1381 
1382 static void
1383 encap_send(ip, vifp, m)
1384 	register struct ip *ip;
1385 	register struct vif *vifp;
1386 	register struct mbuf *m;
1387 {
1388 	register struct mbuf *mb_copy;
1389 	register struct ip *ip_copy;
1390 	register int i, len = ip->ip_len + sizeof(multicast_encap_iphdr);
1391 
1392 	/*
1393 	 * copy the old packet & pullup it's IP header into the
1394 	 * new mbuf so we can modify it.  Try to fill the new
1395 	 * mbuf since if we don't the ethernet driver will.
1396 	 */
1397 	MGETHDR(mb_copy, M_DONTWAIT, MT_DATA);
1398 	if (mb_copy == NULL)
1399 		return;
1400 	mb_copy->m_data += max_linkhdr;
1401 	mb_copy->m_pkthdr.len = len;
1402 	mb_copy->m_len = sizeof(multicast_encap_iphdr);
1403 
1404 	if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1405 		m_freem(mb_copy);
1406 		return;
1407 	}
1408 	i = MHLEN - max_linkhdr;
1409 	if (i > len)
1410 		i = len;
1411 	mb_copy = m_pullup(mb_copy, i);
1412 	if (mb_copy == NULL)
1413 		return;
1414 
1415 	/*
1416 	 * fill in the encapsulating IP header.
1417 	 */
1418 	ip_copy = mtod(mb_copy, struct ip *);
1419 	*ip_copy = multicast_encap_iphdr;
1420 	ip_copy->ip_id = htons(ip_randomid());
1421 	ip_copy->ip_len = len;
1422 	ip_copy->ip_src = vifp->v_lcl_addr;
1423 	ip_copy->ip_dst = vifp->v_rmt_addr;
1424 
1425 	/*
1426 	 * turn the encapsulated IP header back into a valid one.
1427 	 */
1428 	ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1429 	--ip->ip_ttl;
1430 	HTONS(ip->ip_len);
1431 	HTONS(ip->ip_off);
1432 	ip->ip_sum = 0;
1433 #if defined(LBL) && !defined(ultrix) && !defined(i386)
1434 	ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);
1435 #else
1436 	mb_copy->m_data += sizeof(multicast_encap_iphdr);
1437 	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1438 	mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1439 #endif
1440 
1441 	if (vifp->v_rate_limit <= 0)
1442 		tbf_send_packet(vifp, mb_copy);
1443 	else
1444 		tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
1445 }
1446 
1447 /*
1448  * De-encapsulate a packet and feed it back through ip input (this
1449  * routine is called whenever IP gets a packet with proto type
1450  * ENCAP_PROTO and a local destination address).
1451  */
1452 void
1453 #if __STDC__
1454 ipip_mroute_input(struct mbuf *m, ...)
1455 #else
1456 ipip_mroute_input(m, va_alist)
1457 	struct mbuf *m;
1458 	va_dcl
1459 #endif
1460 {
1461 	register int hlen;
1462 	register struct ip *ip = mtod(m, struct ip *);
1463 	register int s;
1464 	register struct ifqueue *ifq;
1465 	register struct vif *vifp;
1466 	va_list ap;
1467 
1468 	va_start(ap, m);
1469 	hlen = va_arg(ap, int);
1470 	va_end(ap);
1471 
1472 	if (!have_encap_tunnel) {
1473 		rip_input(m, 0);
1474 		return;
1475 	}
1476 
1477 	/*
1478 	 * dump the packet if we don't have an encapsulating tunnel
1479 	 * with the source.
1480 	 * Note:  This code assumes that the remote site IP address
1481 	 * uniquely identifies the tunnel (i.e., that this site has
1482 	 * at most one tunnel with the remote site).
1483 	 */
1484 	if (ip->ip_src.s_addr != last_encap_src) {
1485 		register struct vif *vife;
1486 
1487 		vifp = viftable;
1488 		vife = vifp + numvifs;
1489 		for (; vifp < vife; vifp++)
1490 			if (vifp->v_flags & VIFF_TUNNEL &&
1491 			    vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr)
1492 				break;
1493 		if (vifp == vife) {
1494 			mrtstat.mrts_cant_tunnel++; /*XXX*/
1495 			m_freem(m);
1496 			if (mrtdebug)
1497 				log(LOG_DEBUG,
1498 				    "ip_mforward: no tunnel with %x\n",
1499 				    ntohl(ip->ip_src.s_addr));
1500 			return;
1501 		}
1502 		last_encap_vif = vifp;
1503 		last_encap_src = ip->ip_src.s_addr;
1504 	} else
1505 		vifp = last_encap_vif;
1506 
1507 	m->m_data += hlen;
1508 	m->m_len -= hlen;
1509 	m->m_pkthdr.len -= hlen;
1510 	m->m_pkthdr.rcvif = vifp->v_ifp;
1511 	ifq = &ipintrq;
1512 	s = splimp();
1513 	if (IF_QFULL(ifq)) {
1514 		IF_DROP(ifq);
1515 		m_freem(m);
1516 	} else {
1517 		IF_ENQUEUE(ifq, m);
1518 		/*
1519 		 * normally we would need a "schednetisr(NETISR_IP)"
1520 		 * here but we were called by ip_input and it is going
1521 		 * to loop back & try to dequeue the packet we just
1522 		 * queued as soon as we return so we avoid the
1523 		 * unnecessary software interrrupt.
1524 		 */
1525 	}
1526 	splx(s);
1527 }
1528 
1529 /*
1530  * Token bucket filter module
1531  */
1532 static void
1533 tbf_control(vifp, m, ip, p_len)
1534 	register struct vif *vifp;
1535 	register struct mbuf *m;
1536 	register struct ip *ip;
1537 	register u_int32_t p_len;
1538 {
1539 
1540 	tbf_update_tokens(vifp);
1541 
1542 	/*
1543 	 * If there are enough tokens, and the queue is empty, send this packet
1544 	 * out immediately.  Otherwise, try to insert it on this vif's queue.
1545 	 */
1546 	if (vifp->v_tbf.q_len == 0) {
1547 		if (p_len <= vifp->v_tbf.n_tok) {
1548 			vifp->v_tbf.n_tok -= p_len;
1549 			tbf_send_packet(vifp, m);
1550 		} else if (p_len > MAX_BKT_SIZE) {
1551 			/* drop if packet is too large */
1552 			mrtstat.mrts_pkt2large++;
1553 			m_freem(m);
1554 		} else {
1555 			/* queue packet and timeout till later */
1556 			tbf_queue(vifp, m, ip);
1557 			timeout(tbf_reprocess_q, vifp, 1);
1558 		}
1559 	} else {
1560 		if (vifp->v_tbf.q_len >= MAXQSIZE &&
1561 		    !tbf_dq_sel(vifp, ip)) {
1562 			/* queue length too much, and couldn't make room */
1563 			mrtstat.mrts_q_overflow++;
1564 			m_freem(m);
1565 		} else {
1566 			/* queue length low enough, or made room */
1567 			tbf_queue(vifp, m, ip);
1568 			tbf_process_q(vifp);
1569 		}
1570 	}
1571 }
1572 
1573 /*
1574  * adds a packet to the queue at the interface
1575  */
1576 static void
1577 tbf_queue(vifp, m, ip)
1578     register struct vif *vifp;
1579     register struct mbuf *m;
1580     register struct ip *ip;
1581 {
1582     register u_int32_t ql;
1583     register int index = (vifp - viftable);
1584     register int s = splsoftnet();
1585 
1586     ql = vifp->v_tbf.q_len;
1587 
1588     qtable[index][ql].pkt_m = m;
1589     qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len;
1590     qtable[index][ql].pkt_ip = ip;
1591 
1592     vifp->v_tbf.q_len++;
1593     splx(s);
1594 }
1595 
1596 
1597 /*
1598  * processes the queue at the interface
1599  */
1600 static void
1601 tbf_process_q(vifp)
1602     register struct vif *vifp;
1603 {
1604     register struct pkt_queue pkt_1;
1605     register int index = (vifp - viftable);
1606     register int s = splsoftnet();
1607 
1608     /* loop through the queue at the interface and send as many packets
1609      * as possible
1610      */
1611     while (vifp->v_tbf.q_len > 0) {
1612 	/* locate the first packet */
1613 	pkt_1 = qtable[index][0];
1614 
1615 	/* determine if the packet can be sent */
1616 	if (pkt_1.pkt_len <= vifp->v_tbf.n_tok) {
1617 	    /* if so,
1618 	     * reduce no of tokens, dequeue the queue,
1619 	     * send the packet.
1620 	     */
1621 	    vifp->v_tbf.n_tok -= pkt_1.pkt_len;
1622 
1623 	    tbf_dequeue(vifp, 0);
1624 	    tbf_send_packet(vifp, pkt_1.pkt_m);
1625 	} else
1626 	    break;
1627     }
1628     splx(s);
1629 }
1630 
1631 /*
1632  * removes the jth packet from the queue at the interface
1633  */
1634 static void
1635 tbf_dequeue(vifp, j)
1636     register struct vif *vifp;
1637     register int j;
1638 {
1639     register u_int32_t index = vifp - viftable;
1640     register int i;
1641 
1642     for (i=j+1; i <= vifp->v_tbf.q_len - 1; i++) {
1643 	qtable[index][i-1] = qtable[index][i];
1644     }
1645     qtable[index][i-1].pkt_m = NULL;
1646     qtable[index][i-1].pkt_len = NULL;
1647     qtable[index][i-1].pkt_ip = NULL;
1648 
1649     vifp->v_tbf.q_len--;
1650 
1651     if (tbfdebug > 1)
1652 	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d\n",vifp-viftable, i-1);
1653 }
1654 
1655 static void
1656 tbf_reprocess_q(arg)
1657 	void *arg;
1658 {
1659 	register struct vif *vifp = arg;
1660 
1661 	if (ip_mrouter == NULL)
1662 		return;
1663 
1664 	tbf_update_tokens(vifp);
1665 	tbf_process_q(vifp);
1666 
1667 	if (vifp->v_tbf.q_len)
1668 		timeout(tbf_reprocess_q, vifp, 1);
1669 }
1670 
1671 /* function that will selectively discard a member of the queue
1672  * based on the precedence value and the priority obtained through
1673  * a lookup table - not yet implemented accurately!
1674  */
1675 static int
1676 tbf_dq_sel(vifp, ip)
1677     register struct vif *vifp;
1678     register struct ip *ip;
1679 {
1680     register int i;
1681     register int s = splsoftnet();
1682     register u_int p;
1683 
1684     p = priority(vifp, ip);
1685 
1686     for(i=vifp->v_tbf.q_len-1;i >= 0;i--) {
1687 	if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {
1688 	    m_freem(qtable[vifp-viftable][i].pkt_m);
1689 	    tbf_dequeue(vifp, i);
1690 	    splx(s);
1691 	    mrtstat.mrts_drop_sel++;
1692 	    return (1);
1693 	}
1694     }
1695     splx(s);
1696     return (0);
1697 }
1698 
1699 static void
1700 tbf_send_packet(vifp,m)
1701     register struct vif *vifp;
1702     register struct mbuf *m;
1703 {
1704     int error;
1705     int s = splsoftnet();
1706 
1707     if (vifp->v_flags & VIFF_TUNNEL) {
1708 	/* If tunnel options */
1709 	ip_output(m, (struct mbuf *)0, &vifp->v_route,
1710 		  IP_FORWARDING, NULL, NULL);
1711     } else {
1712 	/* if physical interface option, extract the options and then send */
1713 	struct ip *ip = mtod(m, struct ip *);
1714 	struct ip_moptions imo;
1715 	imo.imo_multicast_ifp  = vifp->v_ifp;
1716 	imo.imo_multicast_ttl  = ip->ip_ttl - 1;
1717 	imo.imo_multicast_loop = 1;
1718 #ifdef RSVP_ISI
1719 	imo.imo_multicast_vif  = -1;
1720 #endif
1721 
1722 	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1723 			  IP_FORWARDING|IP_MULTICASTOPTS, &imo, NULL);
1724 	if (mrtdebug & DEBUG_XMIT)
1725 	    log(LOG_DEBUG, "phyint_send on vif %d err %d\n", vifp-viftable, error);
1726     }
1727     splx(s);
1728 }
1729 
1730 /* determine the current time and then
1731  * the elapsed time (between the last time and time now)
1732  * in milliseconds & update the no. of tokens in the bucket
1733  */
1734 static void
1735 tbf_update_tokens(vifp)
1736     register struct vif *vifp;
1737 {
1738     struct timeval tp;
1739     register u_int32_t t;
1740     register u_int32_t elapsed;
1741     register int s = splsoftnet();
1742 
1743     microtime(&tp);
1744 
1745     t = tp.tv_sec*1000 + tp.tv_usec/1000;
1746 
1747     elapsed = (t - vifp->v_tbf.last_pkt_t) * vifp->v_rate_limit /8;
1748     vifp->v_tbf.n_tok += elapsed;
1749     vifp->v_tbf.last_pkt_t = t;
1750 
1751     if (vifp->v_tbf.n_tok > MAX_BKT_SIZE)
1752 	vifp->v_tbf.n_tok = MAX_BKT_SIZE;
1753 
1754     splx(s);
1755 }
1756 
1757 static int
1758 priority(vifp, ip)
1759     register struct vif *vifp;
1760     register struct ip *ip;
1761 {
1762     register int prio;
1763 
1764     /* temporary hack; may add general packet classifier some day */
1765 
1766     /*
1767      * The UDP port space is divided up into four priority ranges:
1768      * [0, 16384)     : unclassified - lowest priority
1769      * [16384, 32768) : audio - highest priority
1770      * [32768, 49152) : whiteboard - medium priority
1771      * [49152, 65536) : video - low priority
1772      */
1773     if (ip->ip_p == IPPROTO_UDP) {
1774 	struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1775 
1776 	switch (ntohs(udp->uh_dport) & 0xc000) {
1777 	    case 0x4000:
1778 		prio = 70;
1779 		break;
1780 	    case 0x8000:
1781 		prio = 60;
1782 		break;
1783 	    case 0xc000:
1784 		prio = 55;
1785 		break;
1786 	    default:
1787 		prio = 50;
1788 		break;
1789 	}
1790 
1791 	if (tbfdebug > 1) log(LOG_DEBUG, "port %x prio %d\n", ntohs(udp->uh_dport), prio);
1792     } else
1793 	prio = 50;
1794 
1795 
1796     return (prio);
1797 }
1798 
1799 /*
1800  * End of token bucket filter modifications
1801  */
1802 
1803 #ifdef RSVP_ISI
1804 
1805 int
1806 ip_rsvp_vif_init(so, m)
1807     struct socket *so;
1808     struct mbuf *m;
1809 {
1810     int i;
1811     register int s;
1812 
1813     if (rsvpdebug)
1814 	printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1815 	       so->so_type, so->so_proto->pr_protocol);
1816 
1817     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1818 	return (EOPNOTSUPP);
1819 
1820     /* Check mbuf. */
1821     if (m == NULL || m->m_len != sizeof(int)) {
1822 	return (EINVAL);
1823     }
1824     i = *(mtod(m, int *));
1825 
1826     if (rsvpdebug)
1827 	printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
1828 
1829     s = splsoftnet();
1830 
1831     /* Check vif. */
1832     if (!legal_vif_num(i)) {
1833 	splx(s);
1834 	return (EADDRNOTAVAIL);
1835     }
1836 
1837     /* Check if socket is available. */
1838     if (viftable[i].v_rsvpd != NULL) {
1839 	splx(s);
1840 	return (EADDRINUSE);
1841     }
1842 
1843     viftable[i].v_rsvpd = so;
1844     /* This may seem silly, but we need to be sure we don't over-increment
1845      * the RSVP counter, in case something slips up.
1846      */
1847     if (!viftable[i].v_rsvp_on) {
1848 	viftable[i].v_rsvp_on = 1;
1849 	rsvp_on++;
1850     }
1851 
1852     splx(s);
1853     return (0);
1854 }
1855 
1856 int
1857 ip_rsvp_vif_done(so, m)
1858     struct socket *so;
1859     struct mbuf *m;
1860 {
1861     int i;
1862     register int s;
1863 
1864     if (rsvpdebug)
1865 	printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
1866 	       so->so_type, so->so_proto->pr_protocol);
1867 
1868     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1869 	return (EOPNOTSUPP);
1870 
1871     /* Check mbuf. */
1872     if (m == NULL || m->m_len != sizeof(int)) {
1873 	return (EINVAL);
1874     }
1875     i = *(mtod(m, int *));
1876 
1877     s = splsoftnet();
1878 
1879     /* Check vif. */
1880     if (!legal_vif_num(i)) {
1881 	splx(s);
1882         return (EADDRNOTAVAIL);
1883     }
1884 
1885     if (rsvpdebug)
1886 	printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
1887 	       viftable[i].v_rsvpd, so);
1888 
1889     viftable[i].v_rsvpd = NULL;
1890     /* This may seem silly, but we need to be sure we don't over-decrement
1891      * the RSVP counter, in case something slips up.
1892      */
1893     if (viftable[i].v_rsvp_on) {
1894 	viftable[i].v_rsvp_on = 0;
1895 	rsvp_on--;
1896     }
1897 
1898     splx(s);
1899     return (0);
1900 }
1901 
1902 void
1903 ip_rsvp_force_done(so)
1904     struct socket *so;
1905 {
1906     int vifi;
1907     register int s;
1908 
1909     /* Don't bother if it is not the right type of socket. */
1910     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1911 	return;
1912 
1913     s = splsoftnet();
1914 
1915     /* The socket may be attached to more than one vif...this
1916      * is perfectly legal.
1917      */
1918     for (vifi = 0; vifi < numvifs; vifi++) {
1919 	if (viftable[vifi].v_rsvpd == so) {
1920 	    viftable[vifi].v_rsvpd = NULL;
1921 	    /* This may seem silly, but we need to be sure we don't
1922 	     * over-decrement the RSVP counter, in case something slips up.
1923 	     */
1924 	    if (viftable[vifi].v_rsvp_on) {
1925 		viftable[vifi].v_rsvp_on = 0;
1926 		rsvp_on--;
1927 	    }
1928 	}
1929     }
1930 
1931     splx(s);
1932     return;
1933 }
1934 
1935 void
1936 rsvp_input(m, ifp)
1937     struct mbuf *m;
1938     struct ifnet *ifp;
1939 {
1940     int vifi;
1941     register struct ip *ip = mtod(m, struct ip *);
1942     static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET };
1943     register int s;
1944 
1945     if (rsvpdebug)
1946 	printf("rsvp_input: rsvp_on %d\n",rsvp_on);
1947 
1948     /* Can still get packets with rsvp_on = 0 if there is a local member
1949      * of the group to which the RSVP packet is addressed.  But in this
1950      * case we want to throw the packet away.
1951      */
1952     if (!rsvp_on) {
1953 	m_freem(m);
1954 	return;
1955     }
1956 
1957     /* If the old-style non-vif-associated socket is set, then use
1958      * it and ignore the new ones.
1959      */
1960     if (ip_rsvpd != NULL) {
1961 	if (rsvpdebug)
1962 	    printf("rsvp_input: Sending packet up old-style socket\n");
1963 	rip_input(m, 0);
1964 	return;
1965     }
1966 
1967     s = splsoftnet();
1968 
1969     if (rsvpdebug)
1970 	printf("rsvp_input: check vifs\n");
1971 
1972     /* Find which vif the packet arrived on. */
1973     for (vifi = 0; vifi < numvifs; vifi++) {
1974 	if (viftable[vifi].v_ifp == ifp)
1975 	    break;
1976     }
1977 
1978     if (vifi == numvifs) {
1979 	/* Can't find vif packet arrived on. Drop packet. */
1980 	if (rsvpdebug)
1981 	    printf("rsvp_input: Can't find vif for packet...dropping it.\n");
1982 	m_freem(m);
1983 	splx(s);
1984 	return;
1985     }
1986 
1987     if (rsvpdebug)
1988 	printf("rsvp_input: check socket\n");
1989 
1990     if (viftable[vifi].v_rsvpd == NULL) {
1991 	/* drop packet, since there is no specific socket for this
1992 	 * interface */
1993 	if (rsvpdebug)
1994 	    printf("rsvp_input: No socket defined for vif %d\n",vifi);
1995 	m_freem(m);
1996 	splx(s);
1997 	return;
1998     }
1999 
2000     rsvp_src.sin_addr = ip->ip_src;
2001 
2002     if (rsvpdebug && m)
2003 	printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
2004 	       m->m_len,sbspace(&viftable[vifi].v_rsvpd->so_rcv));
2005 
2006     if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
2007 	if (rsvpdebug)
2008 	    printf("rsvp_input: Failed to append to socket\n");
2009     else
2010 	if (rsvpdebug)
2011 	    printf("rsvp_input: send packet up\n");
2012 
2013     splx(s);
2014 }
2015 #endif /* RSVP_ISI */
2016