xref: /onnv-gate/usr/src/uts/common/inet/ip/ip_mroute.c (revision 8485:633e5b5eb268)
17098Smeem /*
2*8485SPeter.Memishian@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
37098Smeem  * Use is subject to license terms.
47098Smeem  */
50Sstevel@tonic-gate /*
60Sstevel@tonic-gate  * CDDL HEADER START
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
91676Sjpk  * Common Development and Distribution License (the "License").
101676Sjpk  * You may not use this file except in compliance with the License.
110Sstevel@tonic-gate  *
120Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
130Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
140Sstevel@tonic-gate  * See the License for the specific language governing permissions
150Sstevel@tonic-gate  * and limitations under the License.
160Sstevel@tonic-gate  *
170Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
180Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
190Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
200Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
210Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
220Sstevel@tonic-gate  *
230Sstevel@tonic-gate  * CDDL HEADER END
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate /*
267240Srh87107  * Copyright 2008 Sun Microsystems, Inc.
270Sstevel@tonic-gate  * All rights reserved.  Use is subject to license terms.
280Sstevel@tonic-gate  */
290Sstevel@tonic-gate /* Copyright (c) 1990 Mentat Inc. */
300Sstevel@tonic-gate 
310Sstevel@tonic-gate /*
320Sstevel@tonic-gate  * Procedures for the kernel part of DVMRP,
330Sstevel@tonic-gate  * a Distance-Vector Multicast Routing Protocol.
340Sstevel@tonic-gate  * (See RFC-1075)
350Sstevel@tonic-gate  * Written by David Waitzman, BBN Labs, August 1988.
360Sstevel@tonic-gate  * Modified by Steve Deering, Stanford, February 1989.
370Sstevel@tonic-gate  * Modified by Mark J. Steiglitz, Stanford, May, 1991
380Sstevel@tonic-gate  * Modified by Van Jacobson, LBL, January 1993
390Sstevel@tonic-gate  * Modified by Ajit Thyagarajan, PARC, August 1993
400Sstevel@tonic-gate  * Modified by Bill Fenner, PARC, April 1995
410Sstevel@tonic-gate  *
420Sstevel@tonic-gate  * MROUTING 3.5
430Sstevel@tonic-gate  */
440Sstevel@tonic-gate 
450Sstevel@tonic-gate /*
460Sstevel@tonic-gate  * TODO
470Sstevel@tonic-gate  * - function pointer field in vif, void *vif_sendit()
480Sstevel@tonic-gate  */
490Sstevel@tonic-gate 
500Sstevel@tonic-gate #include <sys/types.h>
510Sstevel@tonic-gate #include <sys/stream.h>
520Sstevel@tonic-gate #include <sys/stropts.h>
530Sstevel@tonic-gate #include <sys/strlog.h>
540Sstevel@tonic-gate #include <sys/systm.h>
550Sstevel@tonic-gate #include <sys/ddi.h>
560Sstevel@tonic-gate #include <sys/cmn_err.h>
570Sstevel@tonic-gate #include <sys/zone.h>
580Sstevel@tonic-gate 
590Sstevel@tonic-gate #include <sys/param.h>
600Sstevel@tonic-gate #include <sys/socket.h>
610Sstevel@tonic-gate #include <sys/vtrace.h>
620Sstevel@tonic-gate #include <sys/debug.h>
630Sstevel@tonic-gate #include <net/if.h>
640Sstevel@tonic-gate #include <sys/sockio.h>
650Sstevel@tonic-gate #include <netinet/in.h>
660Sstevel@tonic-gate #include <net/if_dl.h>
670Sstevel@tonic-gate 
680Sstevel@tonic-gate #include <inet/common.h>
690Sstevel@tonic-gate #include <inet/mi.h>
700Sstevel@tonic-gate #include <inet/nd.h>
710Sstevel@tonic-gate #include <inet/mib2.h>
720Sstevel@tonic-gate #include <netinet/ip6.h>
730Sstevel@tonic-gate #include <inet/ip.h>
740Sstevel@tonic-gate #include <inet/snmpcom.h>
750Sstevel@tonic-gate 
760Sstevel@tonic-gate #include <netinet/igmp.h>
770Sstevel@tonic-gate #include <netinet/igmp_var.h>
780Sstevel@tonic-gate #include <netinet/udp.h>
790Sstevel@tonic-gate #include <netinet/ip_mroute.h>
800Sstevel@tonic-gate #include <inet/ip_multi.h>
810Sstevel@tonic-gate #include <inet/ip_ire.h>
820Sstevel@tonic-gate #include <inet/ip_if.h>
830Sstevel@tonic-gate #include <inet/ipclassifier.h>
840Sstevel@tonic-gate 
850Sstevel@tonic-gate #include <netinet/pim.h>
860Sstevel@tonic-gate 
870Sstevel@tonic-gate 
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate  * MT Design:
900Sstevel@tonic-gate  *
910Sstevel@tonic-gate  * There are three main data structures viftable, mfctable and tbftable that
920Sstevel@tonic-gate  * need to be protected against MT races.
930Sstevel@tonic-gate  *
940Sstevel@tonic-gate  * vitable is a fixed length array of vif structs. There is no lock to protect
950Sstevel@tonic-gate  * the whole array, instead each struct is protected by its own indiviual lock.
960Sstevel@tonic-gate  * The value of v_marks in conjuction with the value of v_refcnt determines the
970Sstevel@tonic-gate  * current state of a vif structure. One special state that needs mention
980Sstevel@tonic-gate  * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates
990Sstevel@tonic-gate  * that vif is being initalized.
1000Sstevel@tonic-gate  * Each structure is freed when the refcnt goes down to zero. If a delete comes
1010Sstevel@tonic-gate  * in when the the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED
1020Sstevel@tonic-gate  * which prevents the struct from further use.  When the refcnt goes to zero
1030Sstevel@tonic-gate  * the struct is freed and is marked VIF_MARK_NOTINUSE.
1040Sstevel@tonic-gate  * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill
1050Sstevel@tonic-gate  * from  going away a refhold is put on the ipif before using it. see
1060Sstevel@tonic-gate  * lock_good_vif() and unlock_good_vif().
1070Sstevel@tonic-gate  *
1080Sstevel@tonic-gate  * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts
1090Sstevel@tonic-gate  * of the vif struct.
1100Sstevel@tonic-gate  *
1110Sstevel@tonic-gate  * tbftable is also a fixed length array of tbf structs and is only accessed
1120Sstevel@tonic-gate  * via v_tbf.  It is protected by its own lock tbf_lock.
1130Sstevel@tonic-gate  *
1140Sstevel@tonic-gate  * Lock Ordering is
1150Sstevel@tonic-gate  * v_lock --> tbf_lock
1160Sstevel@tonic-gate  * v_lock --> ill_locK
1170Sstevel@tonic-gate  *
1180Sstevel@tonic-gate  * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb).
1190Sstevel@tonic-gate  * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker,
1200Sstevel@tonic-gate  * it also maintains a state. These fields are protected by a lock (mfcb_lock).
1210Sstevel@tonic-gate  * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to
1220Sstevel@tonic-gate  * protect the struct elements.
1230Sstevel@tonic-gate  *
1240Sstevel@tonic-gate  * mfc structs are dynamically allocated and are singly linked
1250Sstevel@tonic-gate  * at the head of the chain. When an mfc structure is to be deleted
1260Sstevel@tonic-gate  * it is marked condemned and so is the state in the bucket struct.
1270Sstevel@tonic-gate  * When the last walker of the hash bucket exits all the mfc structs
1280Sstevel@tonic-gate  * marked condemed are freed.
1290Sstevel@tonic-gate  *
1300Sstevel@tonic-gate  * Locking Hierarchy:
1310Sstevel@tonic-gate  * The bucket lock should be acquired before the mfc struct lock.
1320Sstevel@tonic-gate  * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking
1330Sstevel@tonic-gate  * operations on the bucket struct.
1340Sstevel@tonic-gate  *
1350Sstevel@tonic-gate  * last_encap_lock and numvifs_mutex should be acquired after
1360Sstevel@tonic-gate  * acquring vif or mfc locks. These locks protect some global variables.
1370Sstevel@tonic-gate  *
1380Sstevel@tonic-gate  * The statistics are not currently protected by a lock
1390Sstevel@tonic-gate  * causing the stats be be approximate, not exact.
1400Sstevel@tonic-gate  */
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate #define	NO_VIF	MAXVIFS 	/* from mrouted, no route for src */
1430Sstevel@tonic-gate 
1440Sstevel@tonic-gate /*
1450Sstevel@tonic-gate  * Timeouts:
1460Sstevel@tonic-gate  * 	Upcall timeouts - BSD uses boolean_t mfc->expire and
1470Sstevel@tonic-gate  *	nexpire[MFCTBLSIZE], the number of times expire has been called.
1480Sstevel@tonic-gate  *	SunOS 5.x uses mfc->timeout for each mfc.
1490Sstevel@tonic-gate  *	Some Unixes are limited in the number of simultaneous timeouts
1500Sstevel@tonic-gate  * 	that can be run, SunOS 5.x does not have this restriction.
1510Sstevel@tonic-gate  */
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate  * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and
1550Sstevel@tonic-gate  * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall
1560Sstevel@tonic-gate  * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE
1570Sstevel@tonic-gate  */
1580Sstevel@tonic-gate #define		EXPIRE_TIMEOUT	(hz/4)	/* 4x / second	*/
1590Sstevel@tonic-gate #define		UPCALL_EXPIRE	6	/* number of timeouts	*/
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate /*
1620Sstevel@tonic-gate  * Hash function for a source, group entry
1630Sstevel@tonic-gate  */
1640Sstevel@tonic-gate #define	MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
1650Sstevel@tonic-gate 	((g) >> 20) ^ ((g) >> 10) ^ (g))
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate #define			TBF_REPROCESS	(hz / 100)	/* 100x /second	*/
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate /* Identify PIM packet that came on a Register interface */
1700Sstevel@tonic-gate #define	PIM_REGISTER_MARKER	0xffffffff
1710Sstevel@tonic-gate 
1720Sstevel@tonic-gate /* Function declarations */
1733448Sdh155122 static int	add_mfc(struct mfcctl *, ip_stack_t *);
1745240Snordmark static int	add_vif(struct vifctl *, conn_t *, mblk_t *, ip_stack_t *);
1753448Sdh155122 static int	del_mfc(struct mfcctl *, ip_stack_t *);
1765240Snordmark static int	del_vif(vifi_t *, conn_t *, mblk_t *, ip_stack_t *);
1770Sstevel@tonic-gate static void	del_vifp(struct vif *);
1780Sstevel@tonic-gate static void	encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
1790Sstevel@tonic-gate static void	expire_upcalls(void *);
1803448Sdh155122 static void	fill_route(struct mfc *, struct mfcctl *, ip_stack_t *);
1813448Sdh155122 static void	free_queue(struct mfc *);
1823448Sdh155122 static int	get_assert(uchar_t *, ip_stack_t *);
1833448Sdh155122 static int	get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *);
1843448Sdh155122 static int	get_sg_cnt(struct sioc_sg_req *, ip_stack_t *);
1850Sstevel@tonic-gate static int	get_version(uchar_t *);
1863448Sdh155122 static int	get_vif_cnt(struct sioc_vif_req *, ip_stack_t *);
1870Sstevel@tonic-gate static int	ip_mdq(mblk_t *, ipha_t *, ill_t *,
1880Sstevel@tonic-gate 		    ipaddr_t, struct mfc *);
1895240Snordmark static int	ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *);
1900Sstevel@tonic-gate static void	phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
1913448Sdh155122 static int	register_mforward(queue_t *, mblk_t *, ill_t *);
1920Sstevel@tonic-gate static void	register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
1933448Sdh155122 static int	set_assert(int *, ip_stack_t *);
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate /*
1960Sstevel@tonic-gate  * Token Bucket Filter functions
1970Sstevel@tonic-gate  */
1980Sstevel@tonic-gate static int  priority(struct vif *, ipha_t *);
1990Sstevel@tonic-gate static void tbf_control(struct vif *, mblk_t *, ipha_t *);
2000Sstevel@tonic-gate static int  tbf_dq_sel(struct vif *, ipha_t *);
2010Sstevel@tonic-gate static void tbf_process_q(struct vif *);
2020Sstevel@tonic-gate static void tbf_queue(struct vif *, mblk_t *);
2030Sstevel@tonic-gate static void tbf_reprocess_q(void *);
2040Sstevel@tonic-gate static void tbf_send_packet(struct vif *, mblk_t *);
2050Sstevel@tonic-gate static void tbf_update_tokens(struct vif *);
2060Sstevel@tonic-gate static void release_mfc(struct mfcb *);
2070Sstevel@tonic-gate 
2083448Sdh155122 static boolean_t is_mrouter_off(ip_stack_t *);
2090Sstevel@tonic-gate /*
2100Sstevel@tonic-gate  * Encapsulation packets
2110Sstevel@tonic-gate  */
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate #define	ENCAP_TTL	64
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate /* prototype IP hdr for encapsulated packets */
2160Sstevel@tonic-gate static ipha_t multicast_encap_iphdr = {
2170Sstevel@tonic-gate 	IP_SIMPLE_HDR_VERSION,
2180Sstevel@tonic-gate 	0,				/* tos */
2190Sstevel@tonic-gate 	sizeof (ipha_t),		/* total length */
2200Sstevel@tonic-gate 	0,				/* id */
2210Sstevel@tonic-gate 	0,				/* frag offset */
2220Sstevel@tonic-gate 	ENCAP_TTL, IPPROTO_ENCAP,
2230Sstevel@tonic-gate 	0,				/* checksum */
2240Sstevel@tonic-gate };
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate /*
2270Sstevel@tonic-gate  * Rate limit for assert notification messages, in nsec.
2280Sstevel@tonic-gate  */
2290Sstevel@tonic-gate #define	ASSERT_MSG_TIME		3000000000
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate #define	VIF_REFHOLD(vifp) {			\
2330Sstevel@tonic-gate 	mutex_enter(&(vifp)->v_lock);		\
2340Sstevel@tonic-gate 	(vifp)->v_refcnt++;			\
2350Sstevel@tonic-gate 	mutex_exit(&(vifp)->v_lock);		\
2360Sstevel@tonic-gate }
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate #define	VIF_REFRELE_LOCKED(vifp) {				\
2390Sstevel@tonic-gate 	(vifp)->v_refcnt--;					\
2400Sstevel@tonic-gate 	if ((vifp)->v_refcnt == 0 &&				\
2410Sstevel@tonic-gate 		((vifp)->v_marks & VIF_MARK_CONDEMNED)) {	\
2420Sstevel@tonic-gate 			del_vifp(vifp);				\
2430Sstevel@tonic-gate 	} else {						\
2440Sstevel@tonic-gate 		mutex_exit(&(vifp)->v_lock);			\
2450Sstevel@tonic-gate 	}							\
2460Sstevel@tonic-gate }
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate #define	VIF_REFRELE(vifp) {					\
2490Sstevel@tonic-gate 	mutex_enter(&(vifp)->v_lock);				\
2500Sstevel@tonic-gate 	(vifp)->v_refcnt--;					\
2510Sstevel@tonic-gate 	if ((vifp)->v_refcnt == 0 &&				\
2520Sstevel@tonic-gate 		((vifp)->v_marks & VIF_MARK_CONDEMNED)) {	\
2530Sstevel@tonic-gate 			del_vifp(vifp);				\
2540Sstevel@tonic-gate 	} else {						\
2550Sstevel@tonic-gate 		mutex_exit(&(vifp)->v_lock);			\
2560Sstevel@tonic-gate 	}							\
2570Sstevel@tonic-gate }
2580Sstevel@tonic-gate 
2590Sstevel@tonic-gate #define	MFCB_REFHOLD(mfcb) {				\
2600Sstevel@tonic-gate 	mutex_enter(&(mfcb)->mfcb_lock);		\
2610Sstevel@tonic-gate 	(mfcb)->mfcb_refcnt++;				\
2620Sstevel@tonic-gate 	ASSERT((mfcb)->mfcb_refcnt != 0);		\
2630Sstevel@tonic-gate 	mutex_exit(&(mfcb)->mfcb_lock);			\
2640Sstevel@tonic-gate }
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate #define	MFCB_REFRELE(mfcb) {					\
2670Sstevel@tonic-gate 	mutex_enter(&(mfcb)->mfcb_lock);			\
2680Sstevel@tonic-gate 	ASSERT((mfcb)->mfcb_refcnt != 0);			\
2690Sstevel@tonic-gate 	if (--(mfcb)->mfcb_refcnt == 0 &&			\
2700Sstevel@tonic-gate 		((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) {	\
2710Sstevel@tonic-gate 			release_mfc(mfcb);			\
2720Sstevel@tonic-gate 	}							\
2730Sstevel@tonic-gate 	mutex_exit(&(mfcb)->mfcb_lock);				\
2740Sstevel@tonic-gate }
2750Sstevel@tonic-gate 
2760Sstevel@tonic-gate /*
2770Sstevel@tonic-gate  * MFCFIND:
2780Sstevel@tonic-gate  * Find a route for a given origin IP address and multicast group address.
2790Sstevel@tonic-gate  * Skip entries with pending upcalls.
2800Sstevel@tonic-gate  * Type of service parameter to be added in the future!
2810Sstevel@tonic-gate  */
2820Sstevel@tonic-gate #define	MFCFIND(mfcbp, o, g, rt) { \
2830Sstevel@tonic-gate 	struct mfc *_mb_rt = NULL; \
2840Sstevel@tonic-gate 	rt = NULL; \
2850Sstevel@tonic-gate 	_mb_rt = mfcbp->mfcb_mfc; \
2860Sstevel@tonic-gate 	while (_mb_rt) { \
2870Sstevel@tonic-gate 		if ((_mb_rt->mfc_origin.s_addr == o) && \
2880Sstevel@tonic-gate 		    (_mb_rt->mfc_mcastgrp.s_addr == g) && \
2890Sstevel@tonic-gate 		    (_mb_rt->mfc_rte == NULL) && \
2900Sstevel@tonic-gate 		    (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) {        \
2910Sstevel@tonic-gate 		    rt = _mb_rt; \
2920Sstevel@tonic-gate 		    break; \
2930Sstevel@tonic-gate 		} \
2940Sstevel@tonic-gate 	_mb_rt = _mb_rt->mfc_next; \
2950Sstevel@tonic-gate 	} \
2960Sstevel@tonic-gate }
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate /*
2990Sstevel@tonic-gate  * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime()
3000Sstevel@tonic-gate  * are inefficient. We use gethrestime() which returns a timespec_t with
3010Sstevel@tonic-gate  * sec and nsec, the resolution is machine dependent.
3020Sstevel@tonic-gate  * The following 2 macros have been changed to use nsec instead of usec.
3030Sstevel@tonic-gate  */
3040Sstevel@tonic-gate /*
3050Sstevel@tonic-gate  * Macros to compute elapsed time efficiently.
3060Sstevel@tonic-gate  * Borrowed from Van Jacobson's scheduling code.
3070Sstevel@tonic-gate  * Delta should be a hrtime_t.
3080Sstevel@tonic-gate  */
3090Sstevel@tonic-gate #define	TV_DELTA(a, b, delta) { \
3100Sstevel@tonic-gate 	int xxs; \
3110Sstevel@tonic-gate  \
3120Sstevel@tonic-gate 	delta = (a).tv_nsec - (b).tv_nsec; \
3130Sstevel@tonic-gate 	if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
3140Sstevel@tonic-gate 		switch (xxs) { \
3150Sstevel@tonic-gate 		case 2: \
3160Sstevel@tonic-gate 		    delta += 1000000000; \
3170Sstevel@tonic-gate 		    /*FALLTHROUGH*/ \
3180Sstevel@tonic-gate 		case 1: \
3190Sstevel@tonic-gate 		    delta += 1000000000; \
3200Sstevel@tonic-gate 		    break; \
3210Sstevel@tonic-gate 		default: \
3220Sstevel@tonic-gate 		    delta += (1000000000 * xxs); \
3230Sstevel@tonic-gate 		} \
3240Sstevel@tonic-gate 	} \
3250Sstevel@tonic-gate }
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate #define	TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \
3280Sstevel@tonic-gate 	(a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
3290Sstevel@tonic-gate 
3300Sstevel@tonic-gate /*
3310Sstevel@tonic-gate  * Handle MRT setsockopt commands to modify the multicast routing tables.
3320Sstevel@tonic-gate  */
3330Sstevel@tonic-gate int
3340Sstevel@tonic-gate ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data,
3350Sstevel@tonic-gate     int datalen, mblk_t *first_mp)
3360Sstevel@tonic-gate {
3375240Snordmark 	conn_t		*connp = Q_TO_CONN(q);
3385240Snordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
3393448Sdh155122 
3403448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
3415240Snordmark 	if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) {
3423448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
3430Sstevel@tonic-gate 		return (EACCES);
3440Sstevel@tonic-gate 	}
3453448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
3460Sstevel@tonic-gate 
3470Sstevel@tonic-gate 	if (checkonly) {
3480Sstevel@tonic-gate 		/*
3490Sstevel@tonic-gate 		 * do not do operation, just pretend to - new T_CHECK
3500Sstevel@tonic-gate 		 * Note: Even routines further on can probably fail but
3510Sstevel@tonic-gate 		 * this T_CHECK stuff is only to please XTI so it not
3520Sstevel@tonic-gate 		 * necessary to be perfect.
3530Sstevel@tonic-gate 		 */
3540Sstevel@tonic-gate 		switch (cmd) {
3550Sstevel@tonic-gate 		case MRT_INIT:
3560Sstevel@tonic-gate 		case MRT_DONE:
3570Sstevel@tonic-gate 		case MRT_ADD_VIF:
3580Sstevel@tonic-gate 		case MRT_DEL_VIF:
3590Sstevel@tonic-gate 		case MRT_ADD_MFC:
3600Sstevel@tonic-gate 		case MRT_DEL_MFC:
3610Sstevel@tonic-gate 		case MRT_ASSERT:
3625240Snordmark 			return (0);
3630Sstevel@tonic-gate 		default:
3645240Snordmark 			return (EOPNOTSUPP);
3650Sstevel@tonic-gate 		}
3660Sstevel@tonic-gate 	}
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate 	/*
3690Sstevel@tonic-gate 	 * make sure no command is issued after multicast routing has been
3700Sstevel@tonic-gate 	 * turned off.
3710Sstevel@tonic-gate 	 */
3720Sstevel@tonic-gate 	if (cmd != MRT_INIT && cmd != MRT_DONE) {
3733448Sdh155122 		if (is_mrouter_off(ipst))
3740Sstevel@tonic-gate 			return (EINVAL);
3750Sstevel@tonic-gate 	}
3760Sstevel@tonic-gate 
3770Sstevel@tonic-gate 	switch (cmd) {
3785240Snordmark 	case MRT_INIT:	return (ip_mrouter_init(connp, data, datalen, ipst));
3793448Sdh155122 	case MRT_DONE:	return (ip_mrouter_done(first_mp, ipst));
3805240Snordmark 	case MRT_ADD_VIF:  return (add_vif((struct vifctl *)data, connp,
3815240Snordmark 			    first_mp, ipst));
3825240Snordmark 	case MRT_DEL_VIF:  return (del_vif((vifi_t *)data, connp, first_mp,
3835240Snordmark 			    ipst));
3843448Sdh155122 	case MRT_ADD_MFC:  return (add_mfc((struct mfcctl *)data, ipst));
3853448Sdh155122 	case MRT_DEL_MFC:  return (del_mfc((struct mfcctl *)data, ipst));
3863448Sdh155122 	case MRT_ASSERT:   return (set_assert((int *)data, ipst));
3870Sstevel@tonic-gate 	default:	   return (EOPNOTSUPP);
3880Sstevel@tonic-gate 	}
3890Sstevel@tonic-gate }
3900Sstevel@tonic-gate 
3910Sstevel@tonic-gate /*
3920Sstevel@tonic-gate  * Handle MRT getsockopt commands
3930Sstevel@tonic-gate  */
3940Sstevel@tonic-gate int
3950Sstevel@tonic-gate ip_mrouter_get(int cmd, queue_t *q, uchar_t *data)
3960Sstevel@tonic-gate {
3975240Snordmark 	conn_t		*connp = Q_TO_CONN(q);
3985240Snordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
3995240Snordmark 
4005240Snordmark 	if (connp != ipst->ips_ip_g_mrouter)
4010Sstevel@tonic-gate 		return (EACCES);
4020Sstevel@tonic-gate 
4030Sstevel@tonic-gate 	switch (cmd) {
4040Sstevel@tonic-gate 	case MRT_VERSION:	return (get_version((uchar_t *)data));
4053448Sdh155122 	case MRT_ASSERT:	return (get_assert((uchar_t *)data, ipst));
4060Sstevel@tonic-gate 	default:		return (EOPNOTSUPP);
4070Sstevel@tonic-gate 	}
4080Sstevel@tonic-gate }
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate /*
4110Sstevel@tonic-gate  * Handle ioctl commands to obtain information from the cache.
4120Sstevel@tonic-gate  * Called with shared access to IP. These are read_only ioctls.
4130Sstevel@tonic-gate  */
4140Sstevel@tonic-gate /* ARGSUSED */
4150Sstevel@tonic-gate int
4160Sstevel@tonic-gate mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
4170Sstevel@tonic-gate     ip_ioctl_cmd_t *ipip, void *if_req)
4180Sstevel@tonic-gate {
4190Sstevel@tonic-gate 	mblk_t	*mp1;
4200Sstevel@tonic-gate 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
4215240Snordmark 	conn_t		*connp = Q_TO_CONN(q);
4225240Snordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate 	/* Existence verified in ip_wput_nondata */
4250Sstevel@tonic-gate 	mp1 = mp->b_cont->b_cont;
4260Sstevel@tonic-gate 
4270Sstevel@tonic-gate 	switch (iocp->ioc_cmd) {
4280Sstevel@tonic-gate 	case (SIOCGETVIFCNT):
4293448Sdh155122 		return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst));
4300Sstevel@tonic-gate 	case (SIOCGETSGCNT):
4313448Sdh155122 		return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst));
4320Sstevel@tonic-gate 	case (SIOCGETLSGCNT):
4333448Sdh155122 		return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst));
4340Sstevel@tonic-gate 	default:
4350Sstevel@tonic-gate 		return (EINVAL);
4360Sstevel@tonic-gate 	}
4370Sstevel@tonic-gate }
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate /*
4400Sstevel@tonic-gate  * Returns the packet, byte, rpf-failure count for the source, group provided.
4410Sstevel@tonic-gate  */
4420Sstevel@tonic-gate static int
4433448Sdh155122 get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst)
4440Sstevel@tonic-gate {
4450Sstevel@tonic-gate 	struct mfc *rt;
4460Sstevel@tonic-gate 	struct mfcb *mfcbp;
4470Sstevel@tonic-gate 
4483448Sdh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)];
4490Sstevel@tonic-gate 	MFCB_REFHOLD(mfcbp);
4500Sstevel@tonic-gate 	MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt);
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	if (rt != NULL) {
4530Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
4540Sstevel@tonic-gate 		req->pktcnt   = rt->mfc_pkt_cnt;
4550Sstevel@tonic-gate 		req->bytecnt  = rt->mfc_byte_cnt;
4560Sstevel@tonic-gate 		req->wrong_if = rt->mfc_wrong_if;
4570Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
4580Sstevel@tonic-gate 	} else
4590Sstevel@tonic-gate 		req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU;
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate 	MFCB_REFRELE(mfcbp);
4620Sstevel@tonic-gate 	return (0);
4630Sstevel@tonic-gate }
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate /*
4660Sstevel@tonic-gate  * Returns the packet, byte, rpf-failure count for the source, group provided.
4670Sstevel@tonic-gate  * Uses larger counters and IPv6 addresses.
4680Sstevel@tonic-gate  */
4690Sstevel@tonic-gate /* ARGSUSED XXX until implemented */
4700Sstevel@tonic-gate static int
4713448Sdh155122 get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst)
4720Sstevel@tonic-gate {
4730Sstevel@tonic-gate 	/* XXX TODO SIOCGETLSGCNT */
4740Sstevel@tonic-gate 	return (ENXIO);
4750Sstevel@tonic-gate }
4760Sstevel@tonic-gate 
4770Sstevel@tonic-gate /*
4780Sstevel@tonic-gate  * Returns the input and output packet and byte counts on the vif provided.
4790Sstevel@tonic-gate  */
4800Sstevel@tonic-gate static int
4813448Sdh155122 get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst)
4820Sstevel@tonic-gate {
4830Sstevel@tonic-gate 	vifi_t vifi = req->vifi;
4840Sstevel@tonic-gate 
4853448Sdh155122 	if (vifi >= ipst->ips_numvifs)
4860Sstevel@tonic-gate 		return (EINVAL);
4870Sstevel@tonic-gate 
4880Sstevel@tonic-gate 	/*
4890Sstevel@tonic-gate 	 * No locks here, an approximation is fine.
4900Sstevel@tonic-gate 	 */
4913448Sdh155122 	req->icount = ipst->ips_vifs[vifi].v_pkt_in;
4923448Sdh155122 	req->ocount = ipst->ips_vifs[vifi].v_pkt_out;
4933448Sdh155122 	req->ibytes = ipst->ips_vifs[vifi].v_bytes_in;
4943448Sdh155122 	req->obytes = ipst->ips_vifs[vifi].v_bytes_out;
4950Sstevel@tonic-gate 
4960Sstevel@tonic-gate 	return (0);
4970Sstevel@tonic-gate }
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate static int
5000Sstevel@tonic-gate get_version(uchar_t *data)
5010Sstevel@tonic-gate {
5020Sstevel@tonic-gate 	int *v = (int *)data;
5030Sstevel@tonic-gate 
5040Sstevel@tonic-gate 	*v = 0x0305;	/* XXX !!!! */
5050Sstevel@tonic-gate 
5060Sstevel@tonic-gate 	return (0);
5070Sstevel@tonic-gate }
5080Sstevel@tonic-gate 
5090Sstevel@tonic-gate /*
5100Sstevel@tonic-gate  * Set PIM assert processing global.
5110Sstevel@tonic-gate  */
5120Sstevel@tonic-gate static int
5133448Sdh155122 set_assert(int *i, ip_stack_t *ipst)
5140Sstevel@tonic-gate {
5150Sstevel@tonic-gate 	if ((*i != 1) && (*i != 0))
5160Sstevel@tonic-gate 		return (EINVAL);
5170Sstevel@tonic-gate 
5183448Sdh155122 	ipst->ips_pim_assert = *i;
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate 	return (0);
5210Sstevel@tonic-gate }
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate /*
5240Sstevel@tonic-gate  * Get PIM assert processing global.
5250Sstevel@tonic-gate  */
5260Sstevel@tonic-gate static int
5273448Sdh155122 get_assert(uchar_t *data, ip_stack_t *ipst)
5280Sstevel@tonic-gate {
5290Sstevel@tonic-gate 	int *i = (int *)data;
5300Sstevel@tonic-gate 
5313448Sdh155122 	*i = ipst->ips_pim_assert;
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate 	return (0);
5340Sstevel@tonic-gate }
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate /*
5370Sstevel@tonic-gate  * Enable multicast routing.
5380Sstevel@tonic-gate  */
5390Sstevel@tonic-gate static int
5405240Snordmark ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst)
5410Sstevel@tonic-gate {
5420Sstevel@tonic-gate 	int	*v;
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate 	if (data == NULL || (datalen != sizeof (int)))
5450Sstevel@tonic-gate 		return (ENOPROTOOPT);
5460Sstevel@tonic-gate 
5470Sstevel@tonic-gate 	v = (int *)data;
5480Sstevel@tonic-gate 	if (*v != 1)
5490Sstevel@tonic-gate 		return (ENOPROTOOPT);
5500Sstevel@tonic-gate 
5513448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
5523448Sdh155122 	if (ipst->ips_ip_g_mrouter != NULL) {
5533448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5540Sstevel@tonic-gate 		return (EADDRINUSE);
5550Sstevel@tonic-gate 	}
5560Sstevel@tonic-gate 
5575240Snordmark 	/*
5585240Snordmark 	 * MRT_INIT should only be allowed for RAW sockets, but we double
5595240Snordmark 	 * check.
5605240Snordmark 	 */
5615240Snordmark 	if (!IPCL_IS_RAWIP(connp)) {
5625240Snordmark 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5635240Snordmark 		return (EINVAL);
5645240Snordmark 	}
5655240Snordmark 
5665240Snordmark 	ipst->ips_ip_g_mrouter = connp;
5670Sstevel@tonic-gate 	connp->conn_multi_router = 1;
5680Sstevel@tonic-gate 	/* In order for tunnels to work we have to turn ip_g_forward on */
5693448Sdh155122 	if (!WE_ARE_FORWARDING(ipst)) {
5703448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
5715240Snordmark 			(void) mi_strlog(connp->conn_rq, 1, SL_TRACE,
5720Sstevel@tonic-gate 			    "ip_mrouter_init: turning on forwarding");
5730Sstevel@tonic-gate 		}
5743448Sdh155122 		ipst->ips_saved_ip_g_forward = ipst->ips_ip_g_forward;
5753448Sdh155122 		ipst->ips_ip_g_forward = IP_FORWARD_ALWAYS;
5760Sstevel@tonic-gate 	}
5770Sstevel@tonic-gate 
5783448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5790Sstevel@tonic-gate 	return (0);
5800Sstevel@tonic-gate }
5810Sstevel@tonic-gate 
5823448Sdh155122 void
5833448Sdh155122 ip_mrouter_stack_init(ip_stack_t *ipst)
5843448Sdh155122 {
5853448Sdh155122 	mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL);
5863448Sdh155122 
5873448Sdh155122 	ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1),
5883448Sdh155122 	    KM_SLEEP);
5893448Sdh155122 	ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP);
5903448Sdh155122 	/*
5913448Sdh155122 	 * mfctable:
5923448Sdh155122 	 * Includes all mfcs, including waiting upcalls.
5933448Sdh155122 	 * Multiple mfcs per bucket.
5943448Sdh155122 	 */
5953448Sdh155122 	ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ,
5963448Sdh155122 	    KM_SLEEP);
5973448Sdh155122 	/*
5983448Sdh155122 	 * Define the token bucket filter structures.
5993448Sdh155122 	 * tbftable -> each vif has one of these for storing info.
6003448Sdh155122 	 */
6013448Sdh155122 	ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP);
6023448Sdh155122 
6033448Sdh155122 	mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL);
6043448Sdh155122 
6053448Sdh155122 	ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
6063448Sdh155122 	ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
6073448Sdh155122 }
6083448Sdh155122 
6090Sstevel@tonic-gate /*
6100Sstevel@tonic-gate  * Disable multicast routing.
6110Sstevel@tonic-gate  * Didn't use global timeout_val (BSD version), instead check the mfctable.
6120Sstevel@tonic-gate  */
6130Sstevel@tonic-gate int
6143448Sdh155122 ip_mrouter_done(mblk_t *mp, ip_stack_t *ipst)
6150Sstevel@tonic-gate {
6165240Snordmark 	conn_t		*mrouter;
6170Sstevel@tonic-gate 	vifi_t 		vifi;
6180Sstevel@tonic-gate 	struct mfc	*mfc_rt;
6190Sstevel@tonic-gate 	int		i;
6200Sstevel@tonic-gate 
6213448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
6223448Sdh155122 	if (ipst->ips_ip_g_mrouter == NULL) {
6233448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
6240Sstevel@tonic-gate 		return (EINVAL);
6250Sstevel@tonic-gate 	}
6260Sstevel@tonic-gate 
6275240Snordmark 	mrouter = ipst->ips_ip_g_mrouter;
6283448Sdh155122 
6293448Sdh155122 	if (ipst->ips_saved_ip_g_forward != -1) {
6303448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
6315240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
6320Sstevel@tonic-gate 			    "ip_mrouter_done: turning off forwarding");
6330Sstevel@tonic-gate 		}
6343448Sdh155122 		ipst->ips_ip_g_forward = ipst->ips_saved_ip_g_forward;
6353448Sdh155122 		ipst->ips_saved_ip_g_forward = -1;
6360Sstevel@tonic-gate 	}
6370Sstevel@tonic-gate 
6380Sstevel@tonic-gate 	/*
6390Sstevel@tonic-gate 	 * Always clear cache when vifs change.
6403448Sdh155122 	 * No need to get ipst->ips_last_encap_lock since we are running as
6413448Sdh155122 	 * a writer.
6420Sstevel@tonic-gate 	 */
6433448Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
6443448Sdh155122 	ipst->ips_last_encap_src = 0;
6453448Sdh155122 	ipst->ips_last_encap_vif = NULL;
6463448Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
6475240Snordmark 	mrouter->conn_multi_router = 0;
6480Sstevel@tonic-gate 
6493448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
6500Sstevel@tonic-gate 
6510Sstevel@tonic-gate 	/*
6520Sstevel@tonic-gate 	 * For each phyint in use,
6530Sstevel@tonic-gate 	 * disable promiscuous reception of all IP multicasts.
6540Sstevel@tonic-gate 	 */
6550Sstevel@tonic-gate 	for (vifi = 0; vifi < MAXVIFS; vifi++) {
6563448Sdh155122 		struct vif *vifp = ipst->ips_vifs + vifi;
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate 		mutex_enter(&vifp->v_lock);
6590Sstevel@tonic-gate 		/*
6600Sstevel@tonic-gate 		 * if the vif is active mark it condemned.
6610Sstevel@tonic-gate 		 */
6620Sstevel@tonic-gate 		if (vifp->v_marks & VIF_MARK_GOOD) {
6630Sstevel@tonic-gate 			ASSERT(vifp->v_ipif != NULL);
6640Sstevel@tonic-gate 			ipif_refhold(vifp->v_ipif);
6650Sstevel@tonic-gate 			/* Phyint only */
6660Sstevel@tonic-gate 			if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
6670Sstevel@tonic-gate 				ipif_t *ipif = vifp->v_ipif;
6680Sstevel@tonic-gate 				ipsq_t  *ipsq;
6690Sstevel@tonic-gate 				boolean_t suc;
6700Sstevel@tonic-gate 				ill_t *ill;
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 				ill = ipif->ipif_ill;
6730Sstevel@tonic-gate 				suc = B_FALSE;
6740Sstevel@tonic-gate 				if (mp == NULL) {
6750Sstevel@tonic-gate 					/*
6760Sstevel@tonic-gate 					 * being called from ip_close,
6770Sstevel@tonic-gate 					 * lets do it synchronously.
6780Sstevel@tonic-gate 					 * Clear VIF_MARK_GOOD and
6790Sstevel@tonic-gate 					 * set VIF_MARK_CONDEMNED.
6800Sstevel@tonic-gate 					 */
6810Sstevel@tonic-gate 					vifp->v_marks &= ~VIF_MARK_GOOD;
6820Sstevel@tonic-gate 					vifp->v_marks |= VIF_MARK_CONDEMNED;
6830Sstevel@tonic-gate 					mutex_exit(&(vifp)->v_lock);
6848275SEric Cheng 					suc = ipsq_enter(ill, B_FALSE, NEW_OP);
6850Sstevel@tonic-gate 					ipsq = ill->ill_phyint->phyint_ipsq;
6860Sstevel@tonic-gate 				} else {
6870Sstevel@tonic-gate 					ipsq = ipsq_try_enter(ipif, NULL,
6885240Snordmark 					    mrouter->conn_wq, mp,
6890Sstevel@tonic-gate 					    ip_restart_optmgmt, NEW_OP, B_TRUE);
6900Sstevel@tonic-gate 					if (ipsq == NULL) {
6910Sstevel@tonic-gate 						mutex_exit(&(vifp)->v_lock);
6925240Snordmark 						ipif_refrele(ipif);
6930Sstevel@tonic-gate 						return (EINPROGRESS);
6940Sstevel@tonic-gate 					}
6950Sstevel@tonic-gate 					/*
6960Sstevel@tonic-gate 					 * Clear VIF_MARK_GOOD and
6970Sstevel@tonic-gate 					 * set VIF_MARK_CONDEMNED.
6980Sstevel@tonic-gate 					 */
6990Sstevel@tonic-gate 					vifp->v_marks &= ~VIF_MARK_GOOD;
7000Sstevel@tonic-gate 					vifp->v_marks |= VIF_MARK_CONDEMNED;
7015240Snordmark 					mutex_exit(&(vifp)->v_lock);
7020Sstevel@tonic-gate 					suc = B_TRUE;
7030Sstevel@tonic-gate 				}
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate 				if (suc) {
7060Sstevel@tonic-gate 					(void) ip_delmulti(INADDR_ANY, ipif,
7070Sstevel@tonic-gate 					    B_TRUE, B_TRUE);
7087098Smeem 					ipsq_exit(ipsq);
7090Sstevel@tonic-gate 				}
7100Sstevel@tonic-gate 				mutex_enter(&vifp->v_lock);
7110Sstevel@tonic-gate 			}
7120Sstevel@tonic-gate 			/*
7130Sstevel@tonic-gate 			 * decreases the refcnt added in add_vif.
7140Sstevel@tonic-gate 			 * and release v_lock.
7150Sstevel@tonic-gate 			 */
7160Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
7170Sstevel@tonic-gate 		} else {
7180Sstevel@tonic-gate 			mutex_exit(&vifp->v_lock);
7190Sstevel@tonic-gate 			continue;
7200Sstevel@tonic-gate 		}
7210Sstevel@tonic-gate 	}
7220Sstevel@tonic-gate 
7233448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
7243448Sdh155122 	ipst->ips_numvifs = 0;
7253448Sdh155122 	ipst->ips_pim_assert = 0;
7263448Sdh155122 	ipst->ips_reg_vif_num = ALL_VIFS;
7273448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
7280Sstevel@tonic-gate 
7290Sstevel@tonic-gate 	/*
7300Sstevel@tonic-gate 	 * Free upcall msgs.
7310Sstevel@tonic-gate 	 * Go through mfctable and stop any outstanding upcall
7320Sstevel@tonic-gate 	 * timeouts remaining on mfcs.
7330Sstevel@tonic-gate 	 */
7340Sstevel@tonic-gate 	for (i = 0; i < MFCTBLSIZ; i++) {
7353448Sdh155122 		mutex_enter(&ipst->ips_mfcs[i].mfcb_lock);
7363448Sdh155122 		ipst->ips_mfcs[i].mfcb_refcnt++;
7373448Sdh155122 		ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED;
7383448Sdh155122 		mutex_exit(&ipst->ips_mfcs[i].mfcb_lock);
7393448Sdh155122 		mfc_rt = ipst->ips_mfcs[i].mfcb_mfc;
7400Sstevel@tonic-gate 		while (mfc_rt) {
7410Sstevel@tonic-gate 			/* Free upcalls */
7420Sstevel@tonic-gate 			mutex_enter(&mfc_rt->mfc_mutex);
7430Sstevel@tonic-gate 			if (mfc_rt->mfc_rte != NULL) {
7440Sstevel@tonic-gate 				if (mfc_rt->mfc_timeout_id != 0) {
7450Sstevel@tonic-gate 					/*
7460Sstevel@tonic-gate 					 * OK to drop the lock as we have
7470Sstevel@tonic-gate 					 * a refcnt on the bucket. timeout
7480Sstevel@tonic-gate 					 * can fire but it will see that
7490Sstevel@tonic-gate 					 * mfc_timeout_id == 0 and not do
7500Sstevel@tonic-gate 					 * anything. see expire_upcalls().
7510Sstevel@tonic-gate 					 */
7520Sstevel@tonic-gate 					mfc_rt->mfc_timeout_id = 0;
7530Sstevel@tonic-gate 					mutex_exit(&mfc_rt->mfc_mutex);
7540Sstevel@tonic-gate 					(void) untimeout(
7550Sstevel@tonic-gate 					    mfc_rt->mfc_timeout_id);
7560Sstevel@tonic-gate 						mfc_rt->mfc_timeout_id = 0;
7570Sstevel@tonic-gate 					mutex_enter(&mfc_rt->mfc_mutex);
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 					/*
7600Sstevel@tonic-gate 					 * all queued upcall packets
7610Sstevel@tonic-gate 					 * and mblk will be freed in
7620Sstevel@tonic-gate 					 * release_mfc().
7630Sstevel@tonic-gate 					 */
7640Sstevel@tonic-gate 				}
7650Sstevel@tonic-gate 			}
7660Sstevel@tonic-gate 
7670Sstevel@tonic-gate 			mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
7680Sstevel@tonic-gate 
7690Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
7700Sstevel@tonic-gate 			mfc_rt = mfc_rt->mfc_next;
7710Sstevel@tonic-gate 		}
7723448Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
7730Sstevel@tonic-gate 	}
7740Sstevel@tonic-gate 
7753448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
7763448Sdh155122 	ipst->ips_ip_g_mrouter = NULL;
7773448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
7780Sstevel@tonic-gate 	return (0);
7790Sstevel@tonic-gate }
7800Sstevel@tonic-gate 
7813448Sdh155122 void
7823448Sdh155122 ip_mrouter_stack_destroy(ip_stack_t *ipst)
7833448Sdh155122 {
7843448Sdh155122 	struct mfcb *mfcbp;
7853448Sdh155122 	struct mfc  *rt;
7863448Sdh155122 	int i;
7873448Sdh155122 
7883448Sdh155122 	for (i = 0; i < MFCTBLSIZ; i++) {
7893448Sdh155122 		mfcbp = &ipst->ips_mfcs[i];
7903448Sdh155122 
7913448Sdh155122 		while ((rt = mfcbp->mfcb_mfc) != NULL) {
7923448Sdh155122 			(void) printf("ip_mrouter_stack_destroy: free for %d\n",
7933448Sdh155122 			    i);
7943448Sdh155122 
7953448Sdh155122 			mfcbp->mfcb_mfc = rt->mfc_next;
7963448Sdh155122 			free_queue(rt);
7973448Sdh155122 			mi_free(rt);
7983448Sdh155122 		}
7993448Sdh155122 	}
8003448Sdh155122 	kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1));
8013448Sdh155122 	ipst->ips_vifs = NULL;
8023448Sdh155122 	kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat));
8033448Sdh155122 	ipst->ips_mrtstat = NULL;
8043448Sdh155122 	kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ);
8053448Sdh155122 	ipst->ips_mfcs = NULL;
8063448Sdh155122 	kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS);
8073448Sdh155122 	ipst->ips_tbfs = NULL;
8083448Sdh155122 
8093448Sdh155122 	mutex_destroy(&ipst->ips_last_encap_lock);
8103448Sdh155122 	mutex_destroy(&ipst->ips_ip_g_mrouter_mutex);
8113448Sdh155122 }
8123448Sdh155122 
8130Sstevel@tonic-gate static boolean_t
8143448Sdh155122 is_mrouter_off(ip_stack_t *ipst)
8150Sstevel@tonic-gate {
8165240Snordmark 	conn_t	*mrouter;
8170Sstevel@tonic-gate 
8183448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
8193448Sdh155122 	if (ipst->ips_ip_g_mrouter == NULL) {
8203448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
8210Sstevel@tonic-gate 		return (B_TRUE);
8220Sstevel@tonic-gate 	}
8230Sstevel@tonic-gate 
8245240Snordmark 	mrouter = ipst->ips_ip_g_mrouter;
8255240Snordmark 	if (mrouter->conn_multi_router == 0) {
8263448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
8270Sstevel@tonic-gate 		return (B_TRUE);
8280Sstevel@tonic-gate 	}
8293448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
8300Sstevel@tonic-gate 	return (B_FALSE);
8310Sstevel@tonic-gate }
8320Sstevel@tonic-gate 
8330Sstevel@tonic-gate static void
8340Sstevel@tonic-gate unlock_good_vif(struct vif *vifp)
8350Sstevel@tonic-gate {
8360Sstevel@tonic-gate 	ASSERT(vifp->v_ipif != NULL);
8370Sstevel@tonic-gate 	ipif_refrele(vifp->v_ipif);
8380Sstevel@tonic-gate 	VIF_REFRELE(vifp);
8390Sstevel@tonic-gate }
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate static boolean_t
8420Sstevel@tonic-gate lock_good_vif(struct vif *vifp)
8430Sstevel@tonic-gate {
8440Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
8450Sstevel@tonic-gate 	if (!(vifp->v_marks & VIF_MARK_GOOD)) {
8460Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
8470Sstevel@tonic-gate 		return (B_FALSE);
8480Sstevel@tonic-gate 	}
8490Sstevel@tonic-gate 
8500Sstevel@tonic-gate 	ASSERT(vifp->v_ipif != NULL);
8510Sstevel@tonic-gate 	mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock);
8520Sstevel@tonic-gate 	if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) {
8530Sstevel@tonic-gate 		mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
8540Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
8550Sstevel@tonic-gate 		return (B_FALSE);
8560Sstevel@tonic-gate 	}
8570Sstevel@tonic-gate 	ipif_refhold_locked(vifp->v_ipif);
8580Sstevel@tonic-gate 	mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
8590Sstevel@tonic-gate 	vifp->v_refcnt++;
8600Sstevel@tonic-gate 	mutex_exit(&vifp->v_lock);
8610Sstevel@tonic-gate 	return (B_TRUE);
8620Sstevel@tonic-gate }
8630Sstevel@tonic-gate 
8640Sstevel@tonic-gate /*
8650Sstevel@tonic-gate  * Add a vif to the vif table.
8660Sstevel@tonic-gate  */
8670Sstevel@tonic-gate static int
8685240Snordmark add_vif(struct vifctl *vifcp, conn_t *connp, mblk_t *first_mp, ip_stack_t *ipst)
8690Sstevel@tonic-gate {
8703448Sdh155122 	struct vif	*vifp = ipst->ips_vifs + vifcp->vifc_vifi;
8710Sstevel@tonic-gate 	ipif_t		*ipif;
8720Sstevel@tonic-gate 	int		error;
8733448Sdh155122 	struct tbf	*v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi;
8740Sstevel@tonic-gate 	ipsq_t  	*ipsq;
8755240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
8760Sstevel@tonic-gate 
8770Sstevel@tonic-gate 	ASSERT(connp != NULL);
8780Sstevel@tonic-gate 
8790Sstevel@tonic-gate 	if (vifcp->vifc_vifi >= MAXVIFS)
8800Sstevel@tonic-gate 		return (EINVAL);
8810Sstevel@tonic-gate 
8823448Sdh155122 	if (is_mrouter_off(ipst))
8830Sstevel@tonic-gate 		return (EINVAL);
8840Sstevel@tonic-gate 
8850Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
8860Sstevel@tonic-gate 	/*
8870Sstevel@tonic-gate 	 * Viftable entry should be 0.
8880Sstevel@tonic-gate 	 * if v_marks == 0 but v_refcnt != 0 means struct is being
8890Sstevel@tonic-gate 	 * initialized.
8900Sstevel@tonic-gate 	 *
8910Sstevel@tonic-gate 	 * Also note that it is very unlikely that we will get a MRT_ADD_VIF
8920Sstevel@tonic-gate 	 * request while the delete is in progress, mrouted only sends add
8930Sstevel@tonic-gate 	 * requests when a new interface is added and the new interface cannot
8940Sstevel@tonic-gate 	 * have the same vifi as an existing interface. We make sure that
8950Sstevel@tonic-gate 	 * ill_delete will block till the vif is deleted by adding a refcnt
8960Sstevel@tonic-gate 	 * to ipif in del_vif().
8970Sstevel@tonic-gate 	 */
8980Sstevel@tonic-gate 	if (vifp->v_lcl_addr.s_addr != 0 ||
8990Sstevel@tonic-gate 	    vifp->v_marks != 0 ||
9000Sstevel@tonic-gate 	    vifp->v_refcnt != 0) {
9010Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
9020Sstevel@tonic-gate 		return (EADDRINUSE);
9030Sstevel@tonic-gate 	}
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate 	/* Incoming vif should not be 0 */
9060Sstevel@tonic-gate 	if (vifcp->vifc_lcl_addr.s_addr == 0) {
9070Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
9080Sstevel@tonic-gate 		return (EINVAL);
9090Sstevel@tonic-gate 	}
9100Sstevel@tonic-gate 
9110Sstevel@tonic-gate 	vifp->v_refcnt++;
9120Sstevel@tonic-gate 	mutex_exit(&vifp->v_lock);
9130Sstevel@tonic-gate 	/* Find the interface with the local address */
9140Sstevel@tonic-gate 	ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL,
9150Sstevel@tonic-gate 	    connp->conn_zoneid, CONNP_TO_WQ(connp), first_mp,
9163448Sdh155122 	    ip_restart_optmgmt, &error, ipst);
9170Sstevel@tonic-gate 	if (ipif == NULL) {
9180Sstevel@tonic-gate 		VIF_REFRELE(vifp);
9190Sstevel@tonic-gate 		if (error == EINPROGRESS)
9200Sstevel@tonic-gate 			return (error);
9210Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
9220Sstevel@tonic-gate 	}
9230Sstevel@tonic-gate 
9240Sstevel@tonic-gate 	/*
9250Sstevel@tonic-gate 	 * We have to be exclusive as we have to call ip_addmulti()
9260Sstevel@tonic-gate 	 * This is the best position to try to be exclusive in case
9270Sstevel@tonic-gate 	 * we have to wait.
9280Sstevel@tonic-gate 	 */
9290Sstevel@tonic-gate 	ipsq = ipsq_try_enter(ipif, NULL, CONNP_TO_WQ(connp), first_mp,
9300Sstevel@tonic-gate 	    ip_restart_optmgmt, NEW_OP, B_TRUE);
9310Sstevel@tonic-gate 	if ((ipsq) == NULL) {
9320Sstevel@tonic-gate 		VIF_REFRELE(vifp);
9330Sstevel@tonic-gate 		ipif_refrele(ipif);
9340Sstevel@tonic-gate 		return (EINPROGRESS);
9350Sstevel@tonic-gate 	}
9360Sstevel@tonic-gate 
9373448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
9385240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
9390Sstevel@tonic-gate 		    "add_vif: src 0x%x enter",
9400Sstevel@tonic-gate 		    vifcp->vifc_lcl_addr.s_addr);
9410Sstevel@tonic-gate 	}
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
9440Sstevel@tonic-gate 	/*
9450Sstevel@tonic-gate 	 * Always clear cache when vifs change.
9460Sstevel@tonic-gate 	 * Needed to ensure that src isn't left over from before vif was added.
9470Sstevel@tonic-gate 	 * No need to get last_encap_lock, since we are running as a writer.
9480Sstevel@tonic-gate 	 */
9490Sstevel@tonic-gate 
9503448Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
9513448Sdh155122 	ipst->ips_last_encap_src = 0;
9523448Sdh155122 	ipst->ips_last_encap_vif = NULL;
9533448Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
9540Sstevel@tonic-gate 
9550Sstevel@tonic-gate 	if (vifcp->vifc_flags & VIFF_TUNNEL) {
9560Sstevel@tonic-gate 		if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) {
9570Sstevel@tonic-gate 			cmn_err(CE_WARN,
9580Sstevel@tonic-gate 			    "add_vif: source route tunnels not supported\n");
9590Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
9600Sstevel@tonic-gate 			ipif_refrele(ipif);
9617098Smeem 			ipsq_exit(ipsq);
9620Sstevel@tonic-gate 			return (EOPNOTSUPP);
9630Sstevel@tonic-gate 		}
9640Sstevel@tonic-gate 		vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
9650Sstevel@tonic-gate 
9660Sstevel@tonic-gate 	} else {
9670Sstevel@tonic-gate 		/* Phyint or Register vif */
9680Sstevel@tonic-gate 		if (vifcp->vifc_flags & VIFF_REGISTER) {
9690Sstevel@tonic-gate 			/*
9700Sstevel@tonic-gate 			 * Note: Since all IPPROTO_IP level options (including
9710Sstevel@tonic-gate 			 * MRT_ADD_VIF) are done exclusively via
9720Sstevel@tonic-gate 			 * ip_optmgmt_writer(), a lock is not necessary to
9730Sstevel@tonic-gate 			 * protect reg_vif_num.
9740Sstevel@tonic-gate 			 */
9753448Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
9763448Sdh155122 			if (ipst->ips_reg_vif_num == ALL_VIFS) {
9773448Sdh155122 				ipst->ips_reg_vif_num = vifcp->vifc_vifi;
9783448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9790Sstevel@tonic-gate 			} else {
9803448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9810Sstevel@tonic-gate 				VIF_REFRELE_LOCKED(vifp);
9820Sstevel@tonic-gate 				ipif_refrele(ipif);
9837098Smeem 				ipsq_exit(ipsq);
9840Sstevel@tonic-gate 				return (EADDRINUSE);
9850Sstevel@tonic-gate 			}
9860Sstevel@tonic-gate 		}
9870Sstevel@tonic-gate 
9880Sstevel@tonic-gate 		/* Make sure the interface supports multicast */
9890Sstevel@tonic-gate 		if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) {
9900Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
9910Sstevel@tonic-gate 			ipif_refrele(ipif);
9920Sstevel@tonic-gate 			if (vifcp->vifc_flags & VIFF_REGISTER) {
9933448Sdh155122 				mutex_enter(&ipst->ips_numvifs_mutex);
9943448Sdh155122 				ipst->ips_reg_vif_num = ALL_VIFS;
9953448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9960Sstevel@tonic-gate 			}
9977098Smeem 			ipsq_exit(ipsq);
9980Sstevel@tonic-gate 			return (EOPNOTSUPP);
9990Sstevel@tonic-gate 		}
10000Sstevel@tonic-gate 		/* Enable promiscuous reception of all IP mcasts from the if */
10010Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
10020Sstevel@tonic-gate 		error = ip_addmulti(INADDR_ANY, ipif, ILGSTAT_NONE,
10030Sstevel@tonic-gate 		    MODE_IS_EXCLUDE, NULL);
10040Sstevel@tonic-gate 		mutex_enter(&vifp->v_lock);
10050Sstevel@tonic-gate 		/*
10060Sstevel@tonic-gate 		 * since we released the lock lets make sure that
10070Sstevel@tonic-gate 		 * ip_mrouter_done() has not been called.
10080Sstevel@tonic-gate 		 */
10093448Sdh155122 		if (error != 0 || is_mrouter_off(ipst)) {
10100Sstevel@tonic-gate 			if (error == 0)
10110Sstevel@tonic-gate 				(void) ip_delmulti(INADDR_ANY, ipif, B_TRUE,
10120Sstevel@tonic-gate 				    B_TRUE);
10130Sstevel@tonic-gate 			if (vifcp->vifc_flags & VIFF_REGISTER) {
10143448Sdh155122 				mutex_enter(&ipst->ips_numvifs_mutex);
10153448Sdh155122 				ipst->ips_reg_vif_num = ALL_VIFS;
10163448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
10170Sstevel@tonic-gate 			}
10180Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
10190Sstevel@tonic-gate 			ipif_refrele(ipif);
10207098Smeem 			ipsq_exit(ipsq);
10210Sstevel@tonic-gate 			return (error?error:EINVAL);
10220Sstevel@tonic-gate 		}
10230Sstevel@tonic-gate 	}
10240Sstevel@tonic-gate 	/* Define parameters for the tbf structure */
10250Sstevel@tonic-gate 	vifp->v_tbf = v_tbf;
10260Sstevel@tonic-gate 	gethrestime(&vifp->v_tbf->tbf_last_pkt_t);
10270Sstevel@tonic-gate 	vifp->v_tbf->tbf_n_tok = 0;
10280Sstevel@tonic-gate 	vifp->v_tbf->tbf_q_len = 0;
10290Sstevel@tonic-gate 	vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
10300Sstevel@tonic-gate 	vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
10310Sstevel@tonic-gate 
10320Sstevel@tonic-gate 	vifp->v_flags = vifcp->vifc_flags;
10330Sstevel@tonic-gate 	vifp->v_threshold = vifcp->vifc_threshold;
10340Sstevel@tonic-gate 	vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
10350Sstevel@tonic-gate 	vifp->v_ipif = ipif;
10360Sstevel@tonic-gate 	ipif_refrele(ipif);
10370Sstevel@tonic-gate 	/* Scaling up here, allows division by 1024 in critical code.	*/
10380Sstevel@tonic-gate 	vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000);
10390Sstevel@tonic-gate 	vifp->v_timeout_id = 0;
10400Sstevel@tonic-gate 	/* initialize per vif pkt counters */
10410Sstevel@tonic-gate 	vifp->v_pkt_in = 0;
10420Sstevel@tonic-gate 	vifp->v_pkt_out = 0;
10430Sstevel@tonic-gate 	vifp->v_bytes_in = 0;
10440Sstevel@tonic-gate 	vifp->v_bytes_out = 0;
10450Sstevel@tonic-gate 	mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL);
10460Sstevel@tonic-gate 
10470Sstevel@tonic-gate 	/* Adjust numvifs up, if the vifi is higher than numvifs */
10483448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
10493448Sdh155122 	if (ipst->ips_numvifs <= vifcp->vifc_vifi)
10503448Sdh155122 		ipst->ips_numvifs = vifcp->vifc_vifi + 1;
10513448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
10523448Sdh155122 
10533448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
10545240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
10550Sstevel@tonic-gate 		    "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d",
10560Sstevel@tonic-gate 		    vifcp->vifc_vifi,
10570Sstevel@tonic-gate 		    ntohl(vifcp->vifc_lcl_addr.s_addr),
10580Sstevel@tonic-gate 		    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
10590Sstevel@tonic-gate 		    ntohl(vifcp->vifc_rmt_addr.s_addr),
10600Sstevel@tonic-gate 		    vifcp->vifc_threshold, vifcp->vifc_rate_limit);
10610Sstevel@tonic-gate 	}
10620Sstevel@tonic-gate 
10630Sstevel@tonic-gate 	vifp->v_marks = VIF_MARK_GOOD;
10640Sstevel@tonic-gate 	mutex_exit(&vifp->v_lock);
10657098Smeem 	ipsq_exit(ipsq);
10660Sstevel@tonic-gate 	return (0);
10670Sstevel@tonic-gate }
10680Sstevel@tonic-gate 
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate /* Delete a vif from the vif table. */
10710Sstevel@tonic-gate static void
10720Sstevel@tonic-gate del_vifp(struct vif *vifp)
10730Sstevel@tonic-gate {
10740Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
10750Sstevel@tonic-gate 	mblk_t  *mp0;
10760Sstevel@tonic-gate 	vifi_t  vifi;
10773448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
10785240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
10790Sstevel@tonic-gate 
10800Sstevel@tonic-gate 	ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED);
10810Sstevel@tonic-gate 	ASSERT(t != NULL);
10820Sstevel@tonic-gate 
10830Sstevel@tonic-gate 	/*
10840Sstevel@tonic-gate 	 * release the ref we put in vif_del.
10850Sstevel@tonic-gate 	 */
10860Sstevel@tonic-gate 	ASSERT(vifp->v_ipif != NULL);
10870Sstevel@tonic-gate 	ipif_refrele(vifp->v_ipif);
10880Sstevel@tonic-gate 
10893448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
10905240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
10910Sstevel@tonic-gate 		    "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr);
10920Sstevel@tonic-gate 	}
10930Sstevel@tonic-gate 
10940Sstevel@tonic-gate 	if (vifp->v_timeout_id != 0) {
10950Sstevel@tonic-gate 		(void) untimeout(vifp->v_timeout_id);
10960Sstevel@tonic-gate 		vifp->v_timeout_id = 0;
10970Sstevel@tonic-gate 	}
10980Sstevel@tonic-gate 
10990Sstevel@tonic-gate 	/*
11000Sstevel@tonic-gate 	 * Free packets queued at the interface.
11010Sstevel@tonic-gate 	 * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc.
11020Sstevel@tonic-gate 	 */
11030Sstevel@tonic-gate 	mutex_enter(&t->tbf_lock);
11040Sstevel@tonic-gate 	while (t->tbf_q != NULL) {
11050Sstevel@tonic-gate 		mp0 = t->tbf_q;
11060Sstevel@tonic-gate 		t->tbf_q = t->tbf_q->b_next;
11070Sstevel@tonic-gate 		mp0->b_prev = mp0->b_next = NULL;
11080Sstevel@tonic-gate 		freemsg(mp0);
11090Sstevel@tonic-gate 	}
11100Sstevel@tonic-gate 	mutex_exit(&t->tbf_lock);
11110Sstevel@tonic-gate 
11120Sstevel@tonic-gate 	/*
11130Sstevel@tonic-gate 	 * Always clear cache when vifs change.
11140Sstevel@tonic-gate 	 * No need to get last_encap_lock since we are running as a writer.
11150Sstevel@tonic-gate 	 */
11163448Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
11173448Sdh155122 	if (vifp == ipst->ips_last_encap_vif) {
11183448Sdh155122 		ipst->ips_last_encap_vif = NULL;
11193448Sdh155122 		ipst->ips_last_encap_src = 0;
11200Sstevel@tonic-gate 	}
11213448Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
11220Sstevel@tonic-gate 
11230Sstevel@tonic-gate 	mutex_destroy(&t->tbf_lock);
11240Sstevel@tonic-gate 
11250Sstevel@tonic-gate 	bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf)));
11260Sstevel@tonic-gate 
11270Sstevel@tonic-gate 	/* Adjust numvifs down */
11283448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
11293448Sdh155122 	for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */
11303448Sdh155122 		if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0)
11310Sstevel@tonic-gate 			break;
11323448Sdh155122 	ipst->ips_numvifs = vifi;
11333448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
11340Sstevel@tonic-gate 
11350Sstevel@tonic-gate 	bzero(vifp, sizeof (*vifp));
11360Sstevel@tonic-gate }
11370Sstevel@tonic-gate 
11380Sstevel@tonic-gate static int
11395240Snordmark del_vif(vifi_t *vifip, conn_t *connp, mblk_t *first_mp, ip_stack_t *ipst)
11400Sstevel@tonic-gate {
11413448Sdh155122 	struct vif	*vifp = ipst->ips_vifs + *vifip;
11420Sstevel@tonic-gate 	ipsq_t  	*ipsq;
11430Sstevel@tonic-gate 
11443448Sdh155122 	if (*vifip >= ipst->ips_numvifs)
11450Sstevel@tonic-gate 		return (EINVAL);
11460Sstevel@tonic-gate 
11470Sstevel@tonic-gate 
11480Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
11490Sstevel@tonic-gate 	/*
11500Sstevel@tonic-gate 	 * Not initialized
11510Sstevel@tonic-gate 	 * Here we are not looking at the vif that is being initialized
11520Sstevel@tonic-gate 	 * i.e vifp->v_marks == 0 and refcnt > 0.
11530Sstevel@tonic-gate 	 */
11540Sstevel@tonic-gate 	if (vifp->v_lcl_addr.s_addr == 0 ||
11550Sstevel@tonic-gate 	    !(vifp->v_marks & VIF_MARK_GOOD)) {
11560Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
11570Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
11580Sstevel@tonic-gate 	}
11590Sstevel@tonic-gate 
11600Sstevel@tonic-gate 	/*
11610Sstevel@tonic-gate 	 * This is an optimization, if first_mp == NULL
11620Sstevel@tonic-gate 	 * than we are being called from reset_mrt_vif_ipif()
11630Sstevel@tonic-gate 	 * so we already have exclusive access to the ipsq.
11640Sstevel@tonic-gate 	 * the ASSERT below is a check for this condition.
11650Sstevel@tonic-gate 	 */
11660Sstevel@tonic-gate 	if (first_mp != NULL &&
11670Sstevel@tonic-gate 	    !(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
11680Sstevel@tonic-gate 		ASSERT(connp != NULL);
11690Sstevel@tonic-gate 		/*
11700Sstevel@tonic-gate 		 * We have to be exclusive as we have to call ip_delmulti()
11710Sstevel@tonic-gate 		 * This is the best position to try to be exclusive in case
11720Sstevel@tonic-gate 		 * we have to wait.
11730Sstevel@tonic-gate 		 */
11740Sstevel@tonic-gate 		ipsq = ipsq_try_enter(vifp->v_ipif, NULL, CONNP_TO_WQ(connp),
11750Sstevel@tonic-gate 		    first_mp, ip_restart_optmgmt, NEW_OP, B_TRUE);
11760Sstevel@tonic-gate 		if ((ipsq) == NULL) {
11770Sstevel@tonic-gate 			mutex_exit(&vifp->v_lock);
11780Sstevel@tonic-gate 			return (EINPROGRESS);
11790Sstevel@tonic-gate 		}
11800Sstevel@tonic-gate 		/* recheck after being exclusive */
11810Sstevel@tonic-gate 		if (vifp->v_lcl_addr.s_addr == 0 ||
11820Sstevel@tonic-gate 		    !vifp->v_marks & VIF_MARK_GOOD) {
11830Sstevel@tonic-gate 			/*
11840Sstevel@tonic-gate 			 * someone beat us.
11850Sstevel@tonic-gate 			 */
11860Sstevel@tonic-gate 			mutex_exit(&vifp->v_lock);
11877098Smeem 			ipsq_exit(ipsq);
11880Sstevel@tonic-gate 			return (EADDRNOTAVAIL);
11890Sstevel@tonic-gate 		}
11900Sstevel@tonic-gate 	}
11910Sstevel@tonic-gate 
11920Sstevel@tonic-gate 
11930Sstevel@tonic-gate 	ASSERT(IAM_WRITER_IPIF(vifp->v_ipif));
11940Sstevel@tonic-gate 
11950Sstevel@tonic-gate 
11960Sstevel@tonic-gate 	/*
11970Sstevel@tonic-gate 	 * add a refhold so that ipif does not go away while
11980Sstevel@tonic-gate 	 * there are still users, this will be released in del_vifp
11990Sstevel@tonic-gate 	 * when we free the vif.
12000Sstevel@tonic-gate 	 */
12010Sstevel@tonic-gate 	ipif_refhold(vifp->v_ipif);
12020Sstevel@tonic-gate 
12030Sstevel@tonic-gate 	/* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */
12040Sstevel@tonic-gate 	vifp->v_marks &= ~VIF_MARK_GOOD;
12050Sstevel@tonic-gate 	vifp->v_marks |= VIF_MARK_CONDEMNED;
12060Sstevel@tonic-gate 
12070Sstevel@tonic-gate 	/* Phyint only */
12080Sstevel@tonic-gate 	if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
12090Sstevel@tonic-gate 		ipif_t *ipif = vifp->v_ipif;
12100Sstevel@tonic-gate 		ASSERT(ipif != NULL);
12110Sstevel@tonic-gate 		/*
12120Sstevel@tonic-gate 		 * should be OK to drop the lock as we
12130Sstevel@tonic-gate 		 * have marked this as CONDEMNED.
12140Sstevel@tonic-gate 		 */
12150Sstevel@tonic-gate 		mutex_exit(&(vifp)->v_lock);
12160Sstevel@tonic-gate 		(void) ip_delmulti(INADDR_ANY, ipif, B_TRUE, B_TRUE);
12170Sstevel@tonic-gate 		if (first_mp != NULL)
12187098Smeem 			ipsq_exit(ipsq);
12190Sstevel@tonic-gate 		mutex_enter(&(vifp)->v_lock);
12200Sstevel@tonic-gate 	}
12210Sstevel@tonic-gate 
12220Sstevel@tonic-gate 	/*
12230Sstevel@tonic-gate 	 * decreases the refcnt added in add_vif.
12240Sstevel@tonic-gate 	 */
12250Sstevel@tonic-gate 	VIF_REFRELE_LOCKED(vifp);
12260Sstevel@tonic-gate 	return (0);
12270Sstevel@tonic-gate }
12280Sstevel@tonic-gate 
12290Sstevel@tonic-gate /*
12300Sstevel@tonic-gate  * Add an mfc entry.
12310Sstevel@tonic-gate  */
12320Sstevel@tonic-gate static int
12333448Sdh155122 add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
12340Sstevel@tonic-gate {
12350Sstevel@tonic-gate 	struct mfc *rt;
12360Sstevel@tonic-gate 	struct rtdetq *rte;
12370Sstevel@tonic-gate 	ushort_t nstl;
12380Sstevel@tonic-gate 	int i;
12390Sstevel@tonic-gate 	struct mfcb *mfcbp;
12405240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
12410Sstevel@tonic-gate 
12420Sstevel@tonic-gate 	/*
12430Sstevel@tonic-gate 	 * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted
12440Sstevel@tonic-gate 	 * did not have a real route for pkt.
12450Sstevel@tonic-gate 	 * We want this pkt without rt installed in the mfctable to prevent
12460Sstevel@tonic-gate 	 * multiiple tries, so go ahead and put it in mfctable, it will
12470Sstevel@tonic-gate 	 * be discarded later in ip_mdq() because the child is NULL.
12480Sstevel@tonic-gate 	 */
12490Sstevel@tonic-gate 
12500Sstevel@tonic-gate 	/* Error checking, out of bounds? */
12510Sstevel@tonic-gate 	if (mfccp->mfcc_parent > MAXVIFS) {
12520Sstevel@tonic-gate 		ip0dbg(("ADD_MFC: mfcc_parent out of range %d",
12530Sstevel@tonic-gate 		    (int)mfccp->mfcc_parent));
12540Sstevel@tonic-gate 		return (EINVAL);
12550Sstevel@tonic-gate 	}
12560Sstevel@tonic-gate 
12570Sstevel@tonic-gate 	if ((mfccp->mfcc_parent != NO_VIF) &&
12583448Sdh155122 	    (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) {
12590Sstevel@tonic-gate 		ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n",
12600Sstevel@tonic-gate 		    (int)mfccp->mfcc_parent));
12610Sstevel@tonic-gate 		return (EINVAL);
12620Sstevel@tonic-gate 	}
12630Sstevel@tonic-gate 
12643448Sdh155122 	if (is_mrouter_off(ipst)) {
12650Sstevel@tonic-gate 		return (EINVAL);
12660Sstevel@tonic-gate 	}
12670Sstevel@tonic-gate 
12683448Sdh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr,
12690Sstevel@tonic-gate 	    mfccp->mfcc_mcastgrp.s_addr)];
12700Sstevel@tonic-gate 	MFCB_REFHOLD(mfcbp);
12710Sstevel@tonic-gate 	MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr,
12720Sstevel@tonic-gate 	    mfccp->mfcc_mcastgrp.s_addr, rt);
12730Sstevel@tonic-gate 
12740Sstevel@tonic-gate 	/* If an entry already exists, just update the fields */
12750Sstevel@tonic-gate 	if (rt) {
12763448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
12775240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
12780Sstevel@tonic-gate 			    "add_mfc: update o %x grp %x parent %x",
12790Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_origin.s_addr),
12800Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
12810Sstevel@tonic-gate 			    mfccp->mfcc_parent);
12820Sstevel@tonic-gate 		}
12830Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
12840Sstevel@tonic-gate 		rt->mfc_parent = mfccp->mfcc_parent;
12850Sstevel@tonic-gate 
12863448Sdh155122 		mutex_enter(&ipst->ips_numvifs_mutex);
12873448Sdh155122 		for (i = 0; i < (int)ipst->ips_numvifs; i++)
12880Sstevel@tonic-gate 			rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
12893448Sdh155122 		mutex_exit(&ipst->ips_numvifs_mutex);
12900Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
12910Sstevel@tonic-gate 
12920Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
12930Sstevel@tonic-gate 		return (0);
12940Sstevel@tonic-gate 	}
12950Sstevel@tonic-gate 
12960Sstevel@tonic-gate 	/*
12970Sstevel@tonic-gate 	 * Find the entry for which the upcall was made and update.
12980Sstevel@tonic-gate 	 */
12990Sstevel@tonic-gate 	for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) {
13000Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
13010Sstevel@tonic-gate 		if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
13020Sstevel@tonic-gate 		    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
13030Sstevel@tonic-gate 		    (rt->mfc_rte != NULL) &&
13040Sstevel@tonic-gate 		    !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
13050Sstevel@tonic-gate 			if (nstl++ != 0)
13060Sstevel@tonic-gate 				cmn_err(CE_WARN,
13070Sstevel@tonic-gate 				    "add_mfc: %s o %x g %x p %x",
13080Sstevel@tonic-gate 				    "multiple kernel entries",
13090Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_origin.s_addr),
13100Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
13110Sstevel@tonic-gate 				    mfccp->mfcc_parent);
13120Sstevel@tonic-gate 
13133448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
13145240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
13153448Sdh155122 				    SL_TRACE,
13160Sstevel@tonic-gate 				    "add_mfc: o %x g %x p %x",
13170Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_origin.s_addr),
13180Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
13190Sstevel@tonic-gate 				    mfccp->mfcc_parent);
13200Sstevel@tonic-gate 			}
13213448Sdh155122 			fill_route(rt, mfccp, ipst);
13220Sstevel@tonic-gate 
13230Sstevel@tonic-gate 			/*
13240Sstevel@tonic-gate 			 * Prevent cleanup of cache entry.
13250Sstevel@tonic-gate 			 * Timer starts in ip_mforward.
13260Sstevel@tonic-gate 			 */
13270Sstevel@tonic-gate 			if (rt->mfc_timeout_id != 0) {
13280Sstevel@tonic-gate 				timeout_id_t id;
13290Sstevel@tonic-gate 				id = rt->mfc_timeout_id;
13300Sstevel@tonic-gate 				/*
13310Sstevel@tonic-gate 				 * setting id to zero will avoid this
13320Sstevel@tonic-gate 				 * entry from being cleaned up in
13330Sstevel@tonic-gate 				 * expire_up_calls().
13340Sstevel@tonic-gate 				 */
13350Sstevel@tonic-gate 				rt->mfc_timeout_id = 0;
13360Sstevel@tonic-gate 				/*
13370Sstevel@tonic-gate 				 * dropping the lock is fine as we
13380Sstevel@tonic-gate 				 * have a refhold on the bucket.
13390Sstevel@tonic-gate 				 * so mfc cannot be freed.
13400Sstevel@tonic-gate 				 * The timeout can fire but it will see
13410Sstevel@tonic-gate 				 * that mfc_timeout_id == 0 and not cleanup.
13420Sstevel@tonic-gate 				 */
13430Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
13440Sstevel@tonic-gate 				(void) untimeout(id);
13450Sstevel@tonic-gate 				mutex_enter(&rt->mfc_mutex);
13460Sstevel@tonic-gate 			}
13470Sstevel@tonic-gate 
13480Sstevel@tonic-gate 			/*
13490Sstevel@tonic-gate 			 * Send all pkts that are queued waiting for the upcall.
13500Sstevel@tonic-gate 			 * ip_mdq param tun set to 0 -
13510Sstevel@tonic-gate 			 * the return value of ip_mdq() isn't used here,
13520Sstevel@tonic-gate 			 * so value we send doesn't matter.
13530Sstevel@tonic-gate 			 */
13540Sstevel@tonic-gate 			while (rt->mfc_rte != NULL) {
13550Sstevel@tonic-gate 				rte = rt->mfc_rte;
13560Sstevel@tonic-gate 				rt->mfc_rte = rte->rte_next;
13570Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
13580Sstevel@tonic-gate 				(void) ip_mdq(rte->mp, (ipha_t *)
13590Sstevel@tonic-gate 				    rte->mp->b_rptr, rte->ill, 0, rt);
13600Sstevel@tonic-gate 				freemsg(rte->mp);
13610Sstevel@tonic-gate 				mi_free((char *)rte);
13620Sstevel@tonic-gate 				mutex_enter(&rt->mfc_mutex);
13630Sstevel@tonic-gate 			}
13640Sstevel@tonic-gate 		}
13650Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
13660Sstevel@tonic-gate 	}
13670Sstevel@tonic-gate 
13680Sstevel@tonic-gate 
13690Sstevel@tonic-gate 	/*
13700Sstevel@tonic-gate 	 * It is possible that an entry is being inserted without an upcall
13710Sstevel@tonic-gate 	 */
13720Sstevel@tonic-gate 	if (nstl == 0) {
13730Sstevel@tonic-gate 		mutex_enter(&(mfcbp->mfcb_lock));
13743448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
13755240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
13760Sstevel@tonic-gate 			    "add_mfc: no upcall o %x g %x p %x",
13770Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_origin.s_addr),
13780Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
13790Sstevel@tonic-gate 			    mfccp->mfcc_parent);
13800Sstevel@tonic-gate 		}
13813448Sdh155122 		if (is_mrouter_off(ipst)) {
13820Sstevel@tonic-gate 			mutex_exit(&mfcbp->mfcb_lock);
13830Sstevel@tonic-gate 			MFCB_REFRELE(mfcbp);
13840Sstevel@tonic-gate 			return (EINVAL);
13850Sstevel@tonic-gate 		}
13860Sstevel@tonic-gate 
13870Sstevel@tonic-gate 		for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) {
13880Sstevel@tonic-gate 
13890Sstevel@tonic-gate 			mutex_enter(&rt->mfc_mutex);
13900Sstevel@tonic-gate 			if ((rt->mfc_origin.s_addr ==
13910Sstevel@tonic-gate 			    mfccp->mfcc_origin.s_addr) &&
13920Sstevel@tonic-gate 			    (rt->mfc_mcastgrp.s_addr ==
13935240Snordmark 			    mfccp->mfcc_mcastgrp.s_addr) &&
13945240Snordmark 			    (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) {
13953448Sdh155122 				fill_route(rt, mfccp, ipst);
13960Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
13970Sstevel@tonic-gate 				break;
13980Sstevel@tonic-gate 			}
13990Sstevel@tonic-gate 			mutex_exit(&rt->mfc_mutex);
14000Sstevel@tonic-gate 		}
14010Sstevel@tonic-gate 
14020Sstevel@tonic-gate 		/* No upcall, so make a new entry into mfctable */
14030Sstevel@tonic-gate 		if (rt == NULL) {
14040Sstevel@tonic-gate 			rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
14050Sstevel@tonic-gate 			if (rt == NULL) {
14060Sstevel@tonic-gate 				ip1dbg(("add_mfc: out of memory\n"));
14070Sstevel@tonic-gate 				mutex_exit(&mfcbp->mfcb_lock);
14080Sstevel@tonic-gate 				MFCB_REFRELE(mfcbp);
14090Sstevel@tonic-gate 				return (ENOBUFS);
14100Sstevel@tonic-gate 			}
14110Sstevel@tonic-gate 
14120Sstevel@tonic-gate 			/* Insert new entry at head of hash chain */
14130Sstevel@tonic-gate 			mutex_enter(&rt->mfc_mutex);
14143448Sdh155122 			fill_route(rt, mfccp, ipst);
14150Sstevel@tonic-gate 
14160Sstevel@tonic-gate 			/* Link into table */
14170Sstevel@tonic-gate 			rt->mfc_next   = mfcbp->mfcb_mfc;
14180Sstevel@tonic-gate 			mfcbp->mfcb_mfc = rt;
14190Sstevel@tonic-gate 			mutex_exit(&rt->mfc_mutex);
14200Sstevel@tonic-gate 		}
14210Sstevel@tonic-gate 		mutex_exit(&mfcbp->mfcb_lock);
14220Sstevel@tonic-gate 	}
14230Sstevel@tonic-gate 
14240Sstevel@tonic-gate 	MFCB_REFRELE(mfcbp);
14250Sstevel@tonic-gate 	return (0);
14260Sstevel@tonic-gate }
14270Sstevel@tonic-gate 
14280Sstevel@tonic-gate /*
14290Sstevel@tonic-gate  * Fills in mfc structure from mrouted mfcctl.
14300Sstevel@tonic-gate  */
14310Sstevel@tonic-gate static void
14323448Sdh155122 fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst)
14330Sstevel@tonic-gate {
14340Sstevel@tonic-gate 	int i;
14350Sstevel@tonic-gate 
14360Sstevel@tonic-gate 	rt->mfc_origin		= mfccp->mfcc_origin;
14370Sstevel@tonic-gate 	rt->mfc_mcastgrp	= mfccp->mfcc_mcastgrp;
14380Sstevel@tonic-gate 	rt->mfc_parent		= mfccp->mfcc_parent;
14393448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
14403448Sdh155122 	for (i = 0; i < (int)ipst->ips_numvifs; i++) {
14410Sstevel@tonic-gate 		rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
14420Sstevel@tonic-gate 	}
14433448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
14440Sstevel@tonic-gate 	/* Initialize pkt counters per src-grp */
14450Sstevel@tonic-gate 	rt->mfc_pkt_cnt	= 0;
14460Sstevel@tonic-gate 	rt->mfc_byte_cnt	= 0;
14470Sstevel@tonic-gate 	rt->mfc_wrong_if	= 0;
14480Sstevel@tonic-gate 	rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0;
14490Sstevel@tonic-gate 
14500Sstevel@tonic-gate }
14510Sstevel@tonic-gate 
14520Sstevel@tonic-gate static void
14530Sstevel@tonic-gate free_queue(struct mfc *mfcp)
14540Sstevel@tonic-gate {
14550Sstevel@tonic-gate 	struct rtdetq *rte0;
14560Sstevel@tonic-gate 
14570Sstevel@tonic-gate 	/*
14580Sstevel@tonic-gate 	 * Drop all queued upcall packets.
14590Sstevel@tonic-gate 	 * Free the mbuf with the pkt.
14600Sstevel@tonic-gate 	 */
14610Sstevel@tonic-gate 	while ((rte0 = mfcp->mfc_rte) != NULL) {
14620Sstevel@tonic-gate 		mfcp->mfc_rte = rte0->rte_next;
14630Sstevel@tonic-gate 		freemsg(rte0->mp);
14640Sstevel@tonic-gate 		mi_free((char *)rte0);
14650Sstevel@tonic-gate 	}
14660Sstevel@tonic-gate }
14670Sstevel@tonic-gate /*
14680Sstevel@tonic-gate  * go thorugh the hash bucket and free all the entries marked condemned.
14690Sstevel@tonic-gate  */
14700Sstevel@tonic-gate void
14710Sstevel@tonic-gate release_mfc(struct mfcb *mfcbp)
14720Sstevel@tonic-gate {
14730Sstevel@tonic-gate 	struct mfc *current_mfcp;
14740Sstevel@tonic-gate 	struct mfc *prev_mfcp;
14750Sstevel@tonic-gate 
14760Sstevel@tonic-gate 	prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
14770Sstevel@tonic-gate 
14780Sstevel@tonic-gate 	while (current_mfcp != NULL) {
14790Sstevel@tonic-gate 		if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) {
14800Sstevel@tonic-gate 			if (current_mfcp == mfcbp->mfcb_mfc) {
14810Sstevel@tonic-gate 				mfcbp->mfcb_mfc = current_mfcp->mfc_next;
14820Sstevel@tonic-gate 				free_queue(current_mfcp);
14830Sstevel@tonic-gate 				mi_free(current_mfcp);
14840Sstevel@tonic-gate 				prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
14850Sstevel@tonic-gate 				continue;
14860Sstevel@tonic-gate 			}
14870Sstevel@tonic-gate 			ASSERT(prev_mfcp != NULL);
14880Sstevel@tonic-gate 			prev_mfcp->mfc_next = current_mfcp->mfc_next;
14890Sstevel@tonic-gate 			free_queue(current_mfcp);
14900Sstevel@tonic-gate 			mi_free(current_mfcp);
14910Sstevel@tonic-gate 			current_mfcp = NULL;
14920Sstevel@tonic-gate 		} else {
14930Sstevel@tonic-gate 			prev_mfcp = current_mfcp;
14940Sstevel@tonic-gate 		}
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate 		current_mfcp = prev_mfcp->mfc_next;
14970Sstevel@tonic-gate 
14980Sstevel@tonic-gate 	}
14990Sstevel@tonic-gate 	mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED;
15000Sstevel@tonic-gate 	ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0);
15010Sstevel@tonic-gate }
15020Sstevel@tonic-gate 
15030Sstevel@tonic-gate /*
15040Sstevel@tonic-gate  * Delete an mfc entry.
15050Sstevel@tonic-gate  */
15060Sstevel@tonic-gate static int
15073448Sdh155122 del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
15080Sstevel@tonic-gate {
15090Sstevel@tonic-gate 	struct in_addr	origin;
15100Sstevel@tonic-gate 	struct in_addr	mcastgrp;
15115240Snordmark 	struct mfc 	*rt;
15125240Snordmark 	uint_t		hash;
15135240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
15140Sstevel@tonic-gate 
15150Sstevel@tonic-gate 	origin = mfccp->mfcc_origin;
15160Sstevel@tonic-gate 	mcastgrp = mfccp->mfcc_mcastgrp;
15170Sstevel@tonic-gate 	hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
15180Sstevel@tonic-gate 
15193448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
15205240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
15210Sstevel@tonic-gate 		    "del_mfc: o %x g %x",
15220Sstevel@tonic-gate 		    ntohl(origin.s_addr),
15230Sstevel@tonic-gate 		    ntohl(mcastgrp.s_addr));
15240Sstevel@tonic-gate 	}
15250Sstevel@tonic-gate 
15263448Sdh155122 	MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
15270Sstevel@tonic-gate 
15280Sstevel@tonic-gate 	/* Find mfc in mfctable, finds only entries without upcalls */
15293448Sdh155122 	for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) {
15300Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
15310Sstevel@tonic-gate 		if (origin.s_addr == rt->mfc_origin.s_addr &&
15320Sstevel@tonic-gate 		    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
15330Sstevel@tonic-gate 		    rt->mfc_rte == NULL &&
15340Sstevel@tonic-gate 		    !(rt->mfc_marks & MFCB_MARK_CONDEMNED))
15350Sstevel@tonic-gate 			break;
15360Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
15370Sstevel@tonic-gate 	}
15380Sstevel@tonic-gate 
15390Sstevel@tonic-gate 	/*
15400Sstevel@tonic-gate 	 * Return if there was an upcall (mfc_rte != NULL,
15410Sstevel@tonic-gate 	 * or rt not in mfctable.
15420Sstevel@tonic-gate 	 */
15430Sstevel@tonic-gate 	if (rt == NULL) {
15443448Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[hash]);
15450Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
15460Sstevel@tonic-gate 	}
15470Sstevel@tonic-gate 
15480Sstevel@tonic-gate 
15490Sstevel@tonic-gate 	/*
15500Sstevel@tonic-gate 	 * no need to hold lock as we have a reference.
15510Sstevel@tonic-gate 	 */
15523448Sdh155122 	ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
15530Sstevel@tonic-gate 	/* error checking */
15540Sstevel@tonic-gate 	if (rt->mfc_timeout_id != 0) {
15550Sstevel@tonic-gate 		ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null"));
15560Sstevel@tonic-gate 		/*
15570Sstevel@tonic-gate 		 * Its ok to drop the lock,  the struct cannot be freed
15580Sstevel@tonic-gate 		 * since we have a ref on the hash bucket.
15590Sstevel@tonic-gate 		 */
15600Sstevel@tonic-gate 		rt->mfc_timeout_id = 0;
15610Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
15620Sstevel@tonic-gate 		(void) untimeout(rt->mfc_timeout_id);
15630Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
15640Sstevel@tonic-gate 	}
15650Sstevel@tonic-gate 
15660Sstevel@tonic-gate 	ASSERT(rt->mfc_rte == NULL);
15670Sstevel@tonic-gate 
15680Sstevel@tonic-gate 
15690Sstevel@tonic-gate 	/*
15700Sstevel@tonic-gate 	 * Delete the entry from the cache
15710Sstevel@tonic-gate 	 */
15720Sstevel@tonic-gate 	rt->mfc_marks |= MFCB_MARK_CONDEMNED;
15730Sstevel@tonic-gate 	mutex_exit(&rt->mfc_mutex);
15740Sstevel@tonic-gate 
15753448Sdh155122 	MFCB_REFRELE(&ipst->ips_mfcs[hash]);
15760Sstevel@tonic-gate 
15770Sstevel@tonic-gate 	return (0);
15780Sstevel@tonic-gate }
15790Sstevel@tonic-gate 
15800Sstevel@tonic-gate #define	TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
15810Sstevel@tonic-gate 
15820Sstevel@tonic-gate /*
15830Sstevel@tonic-gate  * IP multicast forwarding function. This function assumes that the packet
15840Sstevel@tonic-gate  * pointed to by ipha has arrived on (or is about to be sent to) the interface
15850Sstevel@tonic-gate  * pointed to by "ill", and the packet is to be relayed to other networks
15860Sstevel@tonic-gate  * that have members of the packet's destination IP multicast group.
15870Sstevel@tonic-gate  *
15880Sstevel@tonic-gate  * The packet is returned unscathed to the caller, unless it is
15890Sstevel@tonic-gate  * erroneous, in which case a -1 value tells the caller (IP)
15900Sstevel@tonic-gate  * to discard it.
15910Sstevel@tonic-gate  *
15920Sstevel@tonic-gate  * Unlike BSD, SunOS 5.x needs to return to IP info about
15930Sstevel@tonic-gate  * whether pkt came in thru a tunnel, so it can be discarded, unless
15940Sstevel@tonic-gate  * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try
15950Sstevel@tonic-gate  * to be delivered.
15960Sstevel@tonic-gate  * Return values are 0 - pkt is okay and phyint
15970Sstevel@tonic-gate  *		    -1 - pkt is malformed and to be tossed
15980Sstevel@tonic-gate  *                   1 - pkt came in on tunnel
15990Sstevel@tonic-gate  */
16000Sstevel@tonic-gate int
16010Sstevel@tonic-gate ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp)
16020Sstevel@tonic-gate {
16030Sstevel@tonic-gate 	struct mfc 	*rt;
16040Sstevel@tonic-gate 	ipaddr_t	src, dst, tunnel_src = 0;
16050Sstevel@tonic-gate 	static int	srctun = 0;
16060Sstevel@tonic-gate 	vifi_t		vifi;
16070Sstevel@tonic-gate 	boolean_t	pim_reg_packet = B_FALSE;
16080Sstevel@tonic-gate 	struct mfcb *mfcbp;
16093448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
16105240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
16113448Sdh155122 
16123448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
16135240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16140Sstevel@tonic-gate 		    "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s",
16150Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
16160Sstevel@tonic-gate 		    ill->ill_name);
16170Sstevel@tonic-gate 	}
16180Sstevel@tonic-gate 
16190Sstevel@tonic-gate 	dst = ipha->ipha_dst;
16200Sstevel@tonic-gate 	if ((uint32_t)(uintptr_t)mp->b_prev == PIM_REGISTER_MARKER)
16210Sstevel@tonic-gate 		pim_reg_packet = B_TRUE;
16220Sstevel@tonic-gate 	else
16230Sstevel@tonic-gate 		tunnel_src = (ipaddr_t)(uintptr_t)mp->b_prev;
16240Sstevel@tonic-gate 
16250Sstevel@tonic-gate 	/*
16260Sstevel@tonic-gate 	 * Don't forward a packet with time-to-live of zero or one,
16270Sstevel@tonic-gate 	 * or a packet destined to a local-only group.
16280Sstevel@tonic-gate 	 */
16290Sstevel@tonic-gate 	if (CLASSD(dst) && (ipha->ipha_ttl <= 1 ||
16305240Snordmark 	    (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) {
16313448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
16325240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16330Sstevel@tonic-gate 			    "ip_mforward: not forwarded ttl %d,"
16340Sstevel@tonic-gate 			    " dst 0x%x ill %s",
16350Sstevel@tonic-gate 			    ipha->ipha_ttl, ntohl(dst), ill->ill_name);
16360Sstevel@tonic-gate 		}
16370Sstevel@tonic-gate 		mp->b_prev = NULL;
16380Sstevel@tonic-gate 		if (tunnel_src != 0)
16390Sstevel@tonic-gate 			return (1);
16400Sstevel@tonic-gate 		else
16410Sstevel@tonic-gate 			return (0);
16420Sstevel@tonic-gate 	}
16430Sstevel@tonic-gate 
16440Sstevel@tonic-gate 	if ((tunnel_src != 0) || pim_reg_packet) {
16450Sstevel@tonic-gate 		/*
16460Sstevel@tonic-gate 		 * Packet arrived over an encapsulated tunnel or via a PIM
16470Sstevel@tonic-gate 		 * register message. Both ip_mroute_decap() and pim_input()
16480Sstevel@tonic-gate 		 * encode information in mp->b_prev.
16490Sstevel@tonic-gate 		 */
16500Sstevel@tonic-gate 		mp->b_prev = NULL;
16513448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
16520Sstevel@tonic-gate 			if (tunnel_src != 0) {
16535240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
16543448Sdh155122 				    SL_TRACE,
16550Sstevel@tonic-gate 				    "ip_mforward: ill %s arrived via ENCAP TUN",
16560Sstevel@tonic-gate 				    ill->ill_name);
16570Sstevel@tonic-gate 			} else if (pim_reg_packet) {
16585240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
16593448Sdh155122 				    SL_TRACE,
16600Sstevel@tonic-gate 				    "ip_mforward: ill %s arrived via"
16610Sstevel@tonic-gate 				    "  REGISTER VIF",
16620Sstevel@tonic-gate 				    ill->ill_name);
16630Sstevel@tonic-gate 			}
16640Sstevel@tonic-gate 		}
16650Sstevel@tonic-gate 	} else if ((ipha->ipha_version_and_hdr_length & 0xf) <
16660Sstevel@tonic-gate 	    (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 ||
16670Sstevel@tonic-gate 	    ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) {
16680Sstevel@tonic-gate 		/* Packet arrived via a physical interface. */
16693448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
16705240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16710Sstevel@tonic-gate 			    "ip_mforward: ill %s arrived via PHYINT",
16720Sstevel@tonic-gate 			    ill->ill_name);
16730Sstevel@tonic-gate 		}
16740Sstevel@tonic-gate 
16750Sstevel@tonic-gate 	} else {
16760Sstevel@tonic-gate 		/*
16770Sstevel@tonic-gate 		 * Packet arrived through a SRCRT tunnel.
16780Sstevel@tonic-gate 		 * Source-route tunnels are no longer supported.
16790Sstevel@tonic-gate 		 * Error message printed every 1000 times.
16800Sstevel@tonic-gate 		 */
16810Sstevel@tonic-gate 		if ((srctun++ % 1000) == 0) {
16820Sstevel@tonic-gate 			cmn_err(CE_WARN,
16830Sstevel@tonic-gate 			    "ip_mforward: received source-routed pkt from %x",
16840Sstevel@tonic-gate 			    ntohl(ipha->ipha_src));
16850Sstevel@tonic-gate 		}
16860Sstevel@tonic-gate 		return (-1);
16870Sstevel@tonic-gate 	}
16880Sstevel@tonic-gate 
16893448Sdh155122 	ipst->ips_mrtstat->mrts_fwd_in++;
16900Sstevel@tonic-gate 	src = ipha->ipha_src;
16910Sstevel@tonic-gate 
16920Sstevel@tonic-gate 	/* Find route in cache, return NULL if not there or upcalls q'ed. */
16930Sstevel@tonic-gate 
16940Sstevel@tonic-gate 	/*
16950Sstevel@tonic-gate 	 * Lock the mfctable against changes made by ip_mforward.
16960Sstevel@tonic-gate 	 * Note that only add_mfc and del_mfc can remove entries and
16970Sstevel@tonic-gate 	 * they run with exclusive access to IP. So we do not need to
16980Sstevel@tonic-gate 	 * guard against the rt being deleted, so release lock after reading.
16990Sstevel@tonic-gate 	 */
17000Sstevel@tonic-gate 
17013448Sdh155122 	if (is_mrouter_off(ipst))
17020Sstevel@tonic-gate 		return (-1);
17030Sstevel@tonic-gate 
17043448Sdh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)];
17050Sstevel@tonic-gate 	MFCB_REFHOLD(mfcbp);
17060Sstevel@tonic-gate 	MFCFIND(mfcbp, src, dst, rt);
17070Sstevel@tonic-gate 
17080Sstevel@tonic-gate 	/* Entry exists, so forward if necessary */
17090Sstevel@tonic-gate 	if (rt != NULL) {
17100Sstevel@tonic-gate 		int ret = 0;
17113448Sdh155122 		ipst->ips_mrtstat->mrts_mfc_hits++;
17120Sstevel@tonic-gate 		if (pim_reg_packet) {
17133448Sdh155122 			ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
17140Sstevel@tonic-gate 			ret = ip_mdq(mp, ipha,
17153448Sdh155122 			    ipst->ips_vifs[ipst->ips_reg_vif_num].
17163448Sdh155122 			    v_ipif->ipif_ill,
17173448Sdh155122 			    0, rt);
17180Sstevel@tonic-gate 		} else {
17190Sstevel@tonic-gate 			ret = ip_mdq(mp, ipha, ill, tunnel_src, rt);
17200Sstevel@tonic-gate 		}
17210Sstevel@tonic-gate 
17220Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
17230Sstevel@tonic-gate 		return (ret);
17240Sstevel@tonic-gate 
17250Sstevel@tonic-gate 		/*
17260Sstevel@tonic-gate 		 * Don't forward if we don't have a cache entry.  Mrouted will
17270Sstevel@tonic-gate 		 * always provide a cache entry in response to an upcall.
17280Sstevel@tonic-gate 		 */
17290Sstevel@tonic-gate 	} else {
17300Sstevel@tonic-gate 		/*
17310Sstevel@tonic-gate 		 * If we don't have a route for packet's origin, make a copy
17320Sstevel@tonic-gate 		 * of the packet and send message to routing daemon.
17330Sstevel@tonic-gate 		 */
17340Sstevel@tonic-gate 		struct mfc	*mfc_rt	 = NULL;
17350Sstevel@tonic-gate 		mblk_t		*mp0	 = NULL;
17360Sstevel@tonic-gate 		mblk_t		*mp_copy = NULL;
17370Sstevel@tonic-gate 		struct rtdetq	*rte	 = NULL;
17380Sstevel@tonic-gate 		struct rtdetq	*rte_m, *rte1, *prev_rte;
17390Sstevel@tonic-gate 		uint_t		hash;
17400Sstevel@tonic-gate 		int		npkts;
17410Sstevel@tonic-gate 		boolean_t	new_mfc = B_FALSE;
17423448Sdh155122 		ipst->ips_mrtstat->mrts_mfc_misses++;
17430Sstevel@tonic-gate 		/* BSD uses mrts_no_route++ */
17443448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
17455240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
17460Sstevel@tonic-gate 			    "ip_mforward: no rte ill %s src %x g %x misses %d",
17470Sstevel@tonic-gate 			    ill->ill_name, ntohl(src), ntohl(dst),
17483448Sdh155122 			    (int)ipst->ips_mrtstat->mrts_mfc_misses);
17490Sstevel@tonic-gate 		}
17500Sstevel@tonic-gate 		/*
17510Sstevel@tonic-gate 		 * The order of the following code differs from the BSD code.
17520Sstevel@tonic-gate 		 * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x
17530Sstevel@tonic-gate 		 * code works, so SunOS 5.x wasn't changed to conform to the
17540Sstevel@tonic-gate 		 * BSD version.
17550Sstevel@tonic-gate 		 */
17560Sstevel@tonic-gate 
17570Sstevel@tonic-gate 		/* Lock mfctable. */
17580Sstevel@tonic-gate 		hash = MFCHASH(src, dst);
17593448Sdh155122 		mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock));
17600Sstevel@tonic-gate 
17610Sstevel@tonic-gate 		/*
17620Sstevel@tonic-gate 		 * If we are turning off mrouted return an error
17630Sstevel@tonic-gate 		 */
17643448Sdh155122 		if (is_mrouter_off(ipst)) {
17650Sstevel@tonic-gate 			mutex_exit(&mfcbp->mfcb_lock);
17660Sstevel@tonic-gate 			MFCB_REFRELE(mfcbp);
17670Sstevel@tonic-gate 			return (-1);
17680Sstevel@tonic-gate 		}
17690Sstevel@tonic-gate 
17700Sstevel@tonic-gate 		/* Is there an upcall waiting for this packet? */
17713448Sdh155122 		for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt;
17720Sstevel@tonic-gate 		    mfc_rt = mfc_rt->mfc_next) {
17730Sstevel@tonic-gate 			mutex_enter(&mfc_rt->mfc_mutex);
17743448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
17755240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
17763448Sdh155122 				    SL_TRACE,
17770Sstevel@tonic-gate 				    "ip_mforward: MFCTAB hash %d o 0x%x"
17780Sstevel@tonic-gate 				    " g 0x%x\n",
17790Sstevel@tonic-gate 				    hash, ntohl(mfc_rt->mfc_origin.s_addr),
17800Sstevel@tonic-gate 				    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
17810Sstevel@tonic-gate 			}
17820Sstevel@tonic-gate 			/* There is an upcall */
17830Sstevel@tonic-gate 			if ((src == mfc_rt->mfc_origin.s_addr) &&
17840Sstevel@tonic-gate 			    (dst == mfc_rt->mfc_mcastgrp.s_addr) &&
17850Sstevel@tonic-gate 			    (mfc_rt->mfc_rte != NULL) &&
17860Sstevel@tonic-gate 			    !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
17870Sstevel@tonic-gate 				break;
17880Sstevel@tonic-gate 			}
17890Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
17900Sstevel@tonic-gate 		}
17910Sstevel@tonic-gate 		/* No upcall, so make a new entry into mfctable */
17920Sstevel@tonic-gate 		if (mfc_rt == NULL) {
17930Sstevel@tonic-gate 			mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
17940Sstevel@tonic-gate 			if (mfc_rt == NULL) {
17953448Sdh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
17960Sstevel@tonic-gate 				ip1dbg(("ip_mforward: out of memory "
17970Sstevel@tonic-gate 				    "for mfc, mfc_rt\n"));
17980Sstevel@tonic-gate 				goto error_return;
17990Sstevel@tonic-gate 			} else
18000Sstevel@tonic-gate 				new_mfc = B_TRUE;
18010Sstevel@tonic-gate 			/* Get resources */
18020Sstevel@tonic-gate 			/* TODO could copy header and dup rest */
18030Sstevel@tonic-gate 			mp_copy = copymsg(mp);
18040Sstevel@tonic-gate 			if (mp_copy == NULL) {
18053448Sdh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
18060Sstevel@tonic-gate 				ip1dbg(("ip_mforward: out of memory for "
18070Sstevel@tonic-gate 				    "mblk, mp_copy\n"));
18080Sstevel@tonic-gate 				goto error_return;
18090Sstevel@tonic-gate 			}
18100Sstevel@tonic-gate 			mutex_enter(&mfc_rt->mfc_mutex);
18110Sstevel@tonic-gate 		}
18120Sstevel@tonic-gate 		/* Get resources for rte, whether first rte or not first. */
18130Sstevel@tonic-gate 		/* Add this packet into rtdetq */
18140Sstevel@tonic-gate 		rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq));
18150Sstevel@tonic-gate 		if (rte == NULL) {
18163448Sdh155122 			ipst->ips_mrtstat->mrts_fwd_drop++;
18170Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
18180Sstevel@tonic-gate 			ip1dbg(("ip_mforward: out of memory for"
18190Sstevel@tonic-gate 			    " rtdetq, rte\n"));
18200Sstevel@tonic-gate 			goto error_return;
18210Sstevel@tonic-gate 		}
18220Sstevel@tonic-gate 
18230Sstevel@tonic-gate 		mp0 = copymsg(mp);
18240Sstevel@tonic-gate 		if (mp0 == NULL) {
18253448Sdh155122 			ipst->ips_mrtstat->mrts_fwd_drop++;
18260Sstevel@tonic-gate 			ip1dbg(("ip_mforward: out of memory for mblk, mp0\n"));
18270Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
18280Sstevel@tonic-gate 			goto error_return;
18290Sstevel@tonic-gate 		}
18300Sstevel@tonic-gate 		rte->mp		= mp0;
18310Sstevel@tonic-gate 		if (pim_reg_packet) {
18323448Sdh155122 			ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
18333448Sdh155122 			rte->ill =
18343448Sdh155122 			    ipst->ips_vifs[ipst->ips_reg_vif_num].
18353448Sdh155122 			    v_ipif->ipif_ill;
18360Sstevel@tonic-gate 		} else {
18370Sstevel@tonic-gate 			rte->ill = ill;
18380Sstevel@tonic-gate 		}
18390Sstevel@tonic-gate 		rte->rte_next	= NULL;
18400Sstevel@tonic-gate 
18410Sstevel@tonic-gate 		/*
18420Sstevel@tonic-gate 		 * Determine if upcall q (rtdetq) has overflowed.
18430Sstevel@tonic-gate 		 * mfc_rt->mfc_rte is null by mi_zalloc
18440Sstevel@tonic-gate 		 * if it is the first message.
18450Sstevel@tonic-gate 		 */
18460Sstevel@tonic-gate 		for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m;
18470Sstevel@tonic-gate 		    rte_m = rte_m->rte_next)
18480Sstevel@tonic-gate 			npkts++;
18493448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
18505240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
18510Sstevel@tonic-gate 			    "ip_mforward: upcalls %d\n", npkts);
18520Sstevel@tonic-gate 		}
18530Sstevel@tonic-gate 		if (npkts > MAX_UPQ) {
18543448Sdh155122 			ipst->ips_mrtstat->mrts_upq_ovflw++;
18550Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
18560Sstevel@tonic-gate 			goto error_return;
18570Sstevel@tonic-gate 		}
18580Sstevel@tonic-gate 
18590Sstevel@tonic-gate 		if (npkts == 0) {	/* first upcall */
18600Sstevel@tonic-gate 			int i = 0;
18610Sstevel@tonic-gate 			/*
18620Sstevel@tonic-gate 			 * Now finish installing the new mfc! Now that we have
18630Sstevel@tonic-gate 			 * resources!  Insert new entry at head of hash chain.
18640Sstevel@tonic-gate 			 * Use src and dst which are ipaddr_t's.
18650Sstevel@tonic-gate 			 */
18660Sstevel@tonic-gate 			mfc_rt->mfc_origin.s_addr = src;
18670Sstevel@tonic-gate 			mfc_rt->mfc_mcastgrp.s_addr = dst;
18680Sstevel@tonic-gate 
18693448Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
18703448Sdh155122 			for (i = 0; i < (int)ipst->ips_numvifs; i++)
18710Sstevel@tonic-gate 				mfc_rt->mfc_ttls[i] = 0;
18723448Sdh155122 			mutex_exit(&ipst->ips_numvifs_mutex);
18730Sstevel@tonic-gate 			mfc_rt->mfc_parent = ALL_VIFS;
18740Sstevel@tonic-gate 
18750Sstevel@tonic-gate 			/* Link into table */
18763448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
18775240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
18783448Sdh155122 				    SL_TRACE,
18790Sstevel@tonic-gate 				    "ip_mforward: NEW MFCTAB hash %d o 0x%x "
18800Sstevel@tonic-gate 				    "g 0x%x\n", hash,
18810Sstevel@tonic-gate 				    ntohl(mfc_rt->mfc_origin.s_addr),
18820Sstevel@tonic-gate 				    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
18830Sstevel@tonic-gate 			}
18843448Sdh155122 			mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc;
18853448Sdh155122 			ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt;
18860Sstevel@tonic-gate 			mfc_rt->mfc_rte = NULL;
18870Sstevel@tonic-gate 		}
18880Sstevel@tonic-gate 
18890Sstevel@tonic-gate 		/* Link in the upcall */
18900Sstevel@tonic-gate 		/* First upcall */
18910Sstevel@tonic-gate 		if (mfc_rt->mfc_rte == NULL)
18920Sstevel@tonic-gate 			mfc_rt->mfc_rte = rte;
18930Sstevel@tonic-gate 		else {
18940Sstevel@tonic-gate 			/* not the first upcall */
18950Sstevel@tonic-gate 			prev_rte = mfc_rt->mfc_rte;
18960Sstevel@tonic-gate 			for (rte1 = mfc_rt->mfc_rte->rte_next; rte1;
18975240Snordmark 			    prev_rte = rte1, rte1 = rte1->rte_next)
18985240Snordmark 				;
18990Sstevel@tonic-gate 			prev_rte->rte_next = rte;
19000Sstevel@tonic-gate 		}
19010Sstevel@tonic-gate 
19020Sstevel@tonic-gate 		/*
19030Sstevel@tonic-gate 		 * No upcalls waiting, this is first one, so send a message to
19040Sstevel@tonic-gate 		 * routing daemon to install a route into kernel table.
19050Sstevel@tonic-gate 		 */
19060Sstevel@tonic-gate 		if (npkts == 0) {
19070Sstevel@tonic-gate 			struct igmpmsg	*im;
19080Sstevel@tonic-gate 			/* ipha_protocol is 0, for upcall */
19090Sstevel@tonic-gate 			ASSERT(mp_copy != NULL);
19100Sstevel@tonic-gate 			im = (struct igmpmsg *)mp_copy->b_rptr;
19110Sstevel@tonic-gate 			im->im_msgtype	= IGMPMSG_NOCACHE;
19120Sstevel@tonic-gate 			im->im_mbz = 0;
19133448Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
19140Sstevel@tonic-gate 			if (pim_reg_packet) {
19153448Sdh155122 				im->im_vif = (uchar_t)ipst->ips_reg_vif_num;
19163448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
19170Sstevel@tonic-gate 			} else {
19180Sstevel@tonic-gate 				/*
19190Sstevel@tonic-gate 				 * XXX do we need to hold locks here ?
19200Sstevel@tonic-gate 				 */
19213448Sdh155122 				for (vifi = 0;
19223448Sdh155122 				    vifi < ipst->ips_numvifs;
19233448Sdh155122 				    vifi++) {
19243448Sdh155122 					if (ipst->ips_vifs[vifi].v_ipif == NULL)
19250Sstevel@tonic-gate 						continue;
19263448Sdh155122 					if (ipst->ips_vifs[vifi].
19273448Sdh155122 					    v_ipif->ipif_ill == ill) {
19280Sstevel@tonic-gate 						im->im_vif = (uchar_t)vifi;
19290Sstevel@tonic-gate 						break;
19300Sstevel@tonic-gate 					}
19310Sstevel@tonic-gate 				}
19323448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
19333448Sdh155122 				ASSERT(vifi < ipst->ips_numvifs);
19340Sstevel@tonic-gate 			}
19350Sstevel@tonic-gate 
19363448Sdh155122 			ipst->ips_mrtstat->mrts_upcalls++;
19370Sstevel@tonic-gate 			/* Timer to discard upcalls if mrouted is too slow */
19380Sstevel@tonic-gate 			mfc_rt->mfc_timeout_id = timeout(expire_upcalls,
19390Sstevel@tonic-gate 			    mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE);
19400Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
19413448Sdh155122 			mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
19425240Snordmark 			/* Pass to RAWIP */
19435240Snordmark 			(mrouter->conn_recv)(mrouter, mp_copy, NULL);
19440Sstevel@tonic-gate 		} else {
19450Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
19463448Sdh155122 			mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
19470Sstevel@tonic-gate 			freemsg(mp_copy);
19480Sstevel@tonic-gate 		}
19490Sstevel@tonic-gate 
19500Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
19510Sstevel@tonic-gate 		if (tunnel_src != 0)
19520Sstevel@tonic-gate 			return (1);
19530Sstevel@tonic-gate 		else
19540Sstevel@tonic-gate 			return (0);
19550Sstevel@tonic-gate 	error_return:
19563448Sdh155122 		mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
19570Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
19580Sstevel@tonic-gate 		if (mfc_rt != NULL && (new_mfc == B_TRUE))
19590Sstevel@tonic-gate 			mi_free((char *)mfc_rt);
19600Sstevel@tonic-gate 		if (rte != NULL)
19610Sstevel@tonic-gate 			mi_free((char *)rte);
19620Sstevel@tonic-gate 		if (mp_copy != NULL)
19630Sstevel@tonic-gate 			freemsg(mp_copy);
19640Sstevel@tonic-gate 		if (mp0 != NULL)
19650Sstevel@tonic-gate 			freemsg(mp0);
19660Sstevel@tonic-gate 		return (-1);
19670Sstevel@tonic-gate 	}
19680Sstevel@tonic-gate }
19690Sstevel@tonic-gate 
19700Sstevel@tonic-gate /*
19710Sstevel@tonic-gate  * Clean up the mfctable cache entry if upcall is not serviced.
19720Sstevel@tonic-gate  * SunOS 5.x has timeout per mfc, unlike BSD which has one timer.
19730Sstevel@tonic-gate  */
19740Sstevel@tonic-gate static void
19750Sstevel@tonic-gate expire_upcalls(void *arg)
19760Sstevel@tonic-gate {
19770Sstevel@tonic-gate 	struct mfc *mfc_rt = arg;
19780Sstevel@tonic-gate 	uint_t hash;
19790Sstevel@tonic-gate 	struct mfc *prev_mfc, *mfc0;
19803448Sdh155122 	ip_stack_t	*ipst;
19815240Snordmark 	conn_t		*mrouter;
19823448Sdh155122 
19833448Sdh155122 	if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) {
19843448Sdh155122 		cmn_err(CE_WARN, "expire_upcalls: no ILL\n");
19853448Sdh155122 		return;
19863448Sdh155122 	}
19873448Sdh155122 	ipst = mfc_rt->mfc_rte->ill->ill_ipst;
19885240Snordmark 	mrouter = ipst->ips_ip_g_mrouter;
19890Sstevel@tonic-gate 
19900Sstevel@tonic-gate 	hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr);
19913448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
19925240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
19930Sstevel@tonic-gate 		    "expire_upcalls: hash %d s %x g %x",
19940Sstevel@tonic-gate 		    hash, ntohl(mfc_rt->mfc_origin.s_addr),
19950Sstevel@tonic-gate 		    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
19960Sstevel@tonic-gate 	}
19973448Sdh155122 	MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
19980Sstevel@tonic-gate 	mutex_enter(&mfc_rt->mfc_mutex);
19990Sstevel@tonic-gate 	/*
20000Sstevel@tonic-gate 	 * if timeout has been set to zero, than the
20010Sstevel@tonic-gate 	 * entry has been filled, no need to delete it.
20020Sstevel@tonic-gate 	 */
20030Sstevel@tonic-gate 	if (mfc_rt->mfc_timeout_id == 0)
20040Sstevel@tonic-gate 		goto done;
20053448Sdh155122 	ipst->ips_mrtstat->mrts_cache_cleanups++;
20060Sstevel@tonic-gate 	mfc_rt->mfc_timeout_id = 0;
20070Sstevel@tonic-gate 
20080Sstevel@tonic-gate 	/* Determine entry to be cleaned up in cache table. */
20093448Sdh155122 	for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0;
20100Sstevel@tonic-gate 	    prev_mfc = mfc0, mfc0 = mfc0->mfc_next)
20110Sstevel@tonic-gate 		if (mfc0 == mfc_rt)
20120Sstevel@tonic-gate 			break;
20130Sstevel@tonic-gate 
20140Sstevel@tonic-gate 	/* del_mfc takes care of gone mfcs */
20150Sstevel@tonic-gate 	ASSERT(prev_mfc != NULL);
20160Sstevel@tonic-gate 	ASSERT(mfc0 != NULL);
20170Sstevel@tonic-gate 
20180Sstevel@tonic-gate 	/*
20190Sstevel@tonic-gate 	 * Delete the entry from the cache
20200Sstevel@tonic-gate 	 */
20213448Sdh155122 	ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
20220Sstevel@tonic-gate 	mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
20230Sstevel@tonic-gate 
20240Sstevel@tonic-gate 	/*
20250Sstevel@tonic-gate 	 * release_mfc will drop all queued upcall packets.
20260Sstevel@tonic-gate 	 * and will free the mbuf with the pkt, if, timing info.
20270Sstevel@tonic-gate 	 */
20280Sstevel@tonic-gate done:
20290Sstevel@tonic-gate 	mutex_exit(&mfc_rt->mfc_mutex);
20303448Sdh155122 	MFCB_REFRELE(&ipst->ips_mfcs[hash]);
20310Sstevel@tonic-gate }
20320Sstevel@tonic-gate 
20330Sstevel@tonic-gate /*
20340Sstevel@tonic-gate  * Packet forwarding routine once entry in the cache is made.
20350Sstevel@tonic-gate  */
20360Sstevel@tonic-gate static int
20370Sstevel@tonic-gate ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src,
20380Sstevel@tonic-gate     struct mfc *rt)
20390Sstevel@tonic-gate {
2040*8485SPeter.Memishian@Sun.COM 	ill_t *vill;
20410Sstevel@tonic-gate 	vifi_t vifi;
20420Sstevel@tonic-gate 	struct vif *vifp;
20430Sstevel@tonic-gate 	ipaddr_t dst = ipha->ipha_dst;
20440Sstevel@tonic-gate 	size_t  plen = msgdsize(mp);
20450Sstevel@tonic-gate 	vifi_t num_of_vifs;
20463448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
20475240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
20483448Sdh155122 
20493448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
20505240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
20510Sstevel@tonic-gate 		    "ip_mdq: SEND src %x, ipha_dst %x, ill %s",
20520Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
20530Sstevel@tonic-gate 		    ill->ill_name);
20540Sstevel@tonic-gate 	}
20550Sstevel@tonic-gate 
20560Sstevel@tonic-gate 	/* Macro to send packet on vif */
20570Sstevel@tonic-gate #define	MC_SEND(ipha, mp, vifp, dst) { \
20580Sstevel@tonic-gate 	if ((vifp)->v_flags & VIFF_TUNNEL) \
20590Sstevel@tonic-gate 		encap_send((ipha), (mp), (vifp), (dst)); \
20600Sstevel@tonic-gate 	else if ((vifp)->v_flags & VIFF_REGISTER) \
20610Sstevel@tonic-gate 		register_send((ipha), (mp), (vifp), (dst)); \
20620Sstevel@tonic-gate 	else \
20630Sstevel@tonic-gate 		phyint_send((ipha), (mp), (vifp), (dst)); \
20640Sstevel@tonic-gate }
20650Sstevel@tonic-gate 
20660Sstevel@tonic-gate 	vifi = rt->mfc_parent;
20670Sstevel@tonic-gate 
20680Sstevel@tonic-gate 	/*
20690Sstevel@tonic-gate 	 * The value of vifi is MAXVIFS if the pkt had no parent, i.e.,
20700Sstevel@tonic-gate 	 * Mrouted had no route.
20710Sstevel@tonic-gate 	 * We wanted the route installed in the mfctable to prevent multiple
20720Sstevel@tonic-gate 	 * tries, so it passed add_mfc(), but is discarded here. The v_ipif is
20730Sstevel@tonic-gate 	 * NULL so we don't want to check the ill. Still needed as of Mrouted
20740Sstevel@tonic-gate 	 * 3.6.
20750Sstevel@tonic-gate 	 */
20760Sstevel@tonic-gate 	if (vifi == NO_VIF) {
20770Sstevel@tonic-gate 		ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n",
20780Sstevel@tonic-gate 		    ill->ill_name));
20793448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
20805240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
20810Sstevel@tonic-gate 			    "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name);
20820Sstevel@tonic-gate 		}
20830Sstevel@tonic-gate 		return (-1);	/* drop pkt */
20840Sstevel@tonic-gate 	}
20850Sstevel@tonic-gate 
20863448Sdh155122 	if (!lock_good_vif(&ipst->ips_vifs[vifi]))
20870Sstevel@tonic-gate 		return (-1);
20880Sstevel@tonic-gate 	/*
20890Sstevel@tonic-gate 	 * The MFC entries are not cleaned up when an ipif goes
20900Sstevel@tonic-gate 	 * away thus this code has to guard against an MFC referencing
20910Sstevel@tonic-gate 	 * an ipif that has been closed. Note: reset_mrt_vif_ipif
20920Sstevel@tonic-gate 	 * sets the v_ipif to NULL when the ipif disappears.
20930Sstevel@tonic-gate 	 */
20943448Sdh155122 	ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL);
20953448Sdh155122 
20963448Sdh155122 	if (vifi >= ipst->ips_numvifs) {
20970Sstevel@tonic-gate 		cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs "
20980Sstevel@tonic-gate 		    "%d ill %s viftable ill %s\n",
20993448Sdh155122 		    (int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
21003448Sdh155122 		    ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
21013448Sdh155122 		unlock_good_vif(&ipst->ips_vifs[vifi]);
21020Sstevel@tonic-gate 		return (-1);
21030Sstevel@tonic-gate 	}
21040Sstevel@tonic-gate 	/*
21050Sstevel@tonic-gate 	 * Don't forward if it didn't arrive from the parent vif for its
2106*8485SPeter.Memishian@Sun.COM 	 * origin.
21070Sstevel@tonic-gate 	 */
2108*8485SPeter.Memishian@Sun.COM 	vill = ipst->ips_vifs[vifi].v_ipif->ipif_ill;
2109*8485SPeter.Memishian@Sun.COM 	if ((vill != ill && !IS_IN_SAME_ILLGRP(vill, ill)) ||
21103448Sdh155122 	    (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) {
21110Sstevel@tonic-gate 		/* Came in the wrong interface */
21120Sstevel@tonic-gate 		ip1dbg(("ip_mdq: arrived wrong if, vifi %d "
21130Sstevel@tonic-gate 			"numvifs %d ill %s viftable ill %s\n",
21143448Sdh155122 			(int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
2115*8485SPeter.Memishian@Sun.COM 			vill->ill_name));
21163448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
21175240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
21180Sstevel@tonic-gate 			    "ip_mdq: arrived wrong if, vifi %d ill "
21190Sstevel@tonic-gate 			    "%s viftable ill %s\n",
2120*8485SPeter.Memishian@Sun.COM 			    (int)vifi, ill->ill_name, vill->ill_name);
21210Sstevel@tonic-gate 		}
21223448Sdh155122 		ipst->ips_mrtstat->mrts_wrong_if++;
21230Sstevel@tonic-gate 		rt->mfc_wrong_if++;
21240Sstevel@tonic-gate 
21250Sstevel@tonic-gate 		/*
21260Sstevel@tonic-gate 		 * If we are doing PIM assert processing and we are forwarding
21270Sstevel@tonic-gate 		 * packets on this interface, and it is a broadcast medium
21280Sstevel@tonic-gate 		 * interface (and not a tunnel), send a message to the routing.
21290Sstevel@tonic-gate 		 *
21300Sstevel@tonic-gate 		 * We use the first ipif on the list, since it's all we have.
21310Sstevel@tonic-gate 		 * Chances are the ipif_flags are the same for ipifs on the ill.
21320Sstevel@tonic-gate 		 */
21333448Sdh155122 		if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 &&
21340Sstevel@tonic-gate 		    (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) &&
21353448Sdh155122 		    !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) {
21360Sstevel@tonic-gate 			mblk_t		*mp_copy;
21370Sstevel@tonic-gate 			struct igmpmsg	*im;
21380Sstevel@tonic-gate 
21390Sstevel@tonic-gate 			/* TODO could copy header and dup rest */
21400Sstevel@tonic-gate 			mp_copy = copymsg(mp);
21410Sstevel@tonic-gate 			if (mp_copy == NULL) {
21423448Sdh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
21430Sstevel@tonic-gate 				ip1dbg(("ip_mdq: out of memory "
21440Sstevel@tonic-gate 				    "for mblk, mp_copy\n"));
21453448Sdh155122 				unlock_good_vif(&ipst->ips_vifs[vifi]);
21460Sstevel@tonic-gate 				return (-1);
21470Sstevel@tonic-gate 			}
21480Sstevel@tonic-gate 
21490Sstevel@tonic-gate 			im = (struct igmpmsg *)mp_copy->b_rptr;
21500Sstevel@tonic-gate 			im->im_msgtype = IGMPMSG_WRONGVIF;
21510Sstevel@tonic-gate 			im->im_mbz = 0;
21520Sstevel@tonic-gate 			im->im_vif = (ushort_t)vifi;
21535240Snordmark 			/* Pass to RAWIP */
21545240Snordmark 			(mrouter->conn_recv)(mrouter, mp_copy, NULL);
21550Sstevel@tonic-gate 		}
21563448Sdh155122 		unlock_good_vif(&ipst->ips_vifs[vifi]);
21570Sstevel@tonic-gate 		if (tunnel_src != 0)
21580Sstevel@tonic-gate 			return (1);
21590Sstevel@tonic-gate 		else
21600Sstevel@tonic-gate 			return (0);
21610Sstevel@tonic-gate 	}
21620Sstevel@tonic-gate 	/*
21630Sstevel@tonic-gate 	 * If I sourced this packet, it counts as output, else it was input.
21640Sstevel@tonic-gate 	 */
21653448Sdh155122 	if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) {
21663448Sdh155122 		ipst->ips_vifs[vifi].v_pkt_out++;
21673448Sdh155122 		ipst->ips_vifs[vifi].v_bytes_out += plen;
21680Sstevel@tonic-gate 	} else {
21693448Sdh155122 		ipst->ips_vifs[vifi].v_pkt_in++;
21703448Sdh155122 		ipst->ips_vifs[vifi].v_bytes_in += plen;
21710Sstevel@tonic-gate 	}
21720Sstevel@tonic-gate 	mutex_enter(&rt->mfc_mutex);
21730Sstevel@tonic-gate 	rt->mfc_pkt_cnt++;
21740Sstevel@tonic-gate 	rt->mfc_byte_cnt += plen;
21750Sstevel@tonic-gate 	mutex_exit(&rt->mfc_mutex);
21763448Sdh155122 	unlock_good_vif(&ipst->ips_vifs[vifi]);
21770Sstevel@tonic-gate 	/*
21780Sstevel@tonic-gate 	 * For each vif, decide if a copy of the packet should be forwarded.
21790Sstevel@tonic-gate 	 * Forward if:
21800Sstevel@tonic-gate 	 *		- the vif threshold ttl is non-zero AND
21810Sstevel@tonic-gate 	 *		- the pkt ttl exceeds the vif's threshold
21820Sstevel@tonic-gate 	 * A non-zero mfc_ttl indicates that the vif is part of
21830Sstevel@tonic-gate 	 * the output set for the mfc entry.
21840Sstevel@tonic-gate 	 */
21853448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
21863448Sdh155122 	num_of_vifs = ipst->ips_numvifs;
21873448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
21883448Sdh155122 	for (vifp = ipst->ips_vifs, vifi = 0;
21893448Sdh155122 	    vifi < num_of_vifs;
21903448Sdh155122 	    vifp++, vifi++) {
21910Sstevel@tonic-gate 		if (!lock_good_vif(vifp))
21920Sstevel@tonic-gate 			continue;
21930Sstevel@tonic-gate 		if ((rt->mfc_ttls[vifi] > 0) &&
21940Sstevel@tonic-gate 		    (ipha->ipha_ttl > rt->mfc_ttls[vifi])) {
21950Sstevel@tonic-gate 			/*
21960Sstevel@tonic-gate 			 * lock_good_vif should not have succedded if
21970Sstevel@tonic-gate 			 * v_ipif is null.
21980Sstevel@tonic-gate 			 */
21990Sstevel@tonic-gate 			ASSERT(vifp->v_ipif != NULL);
22000Sstevel@tonic-gate 			vifp->v_pkt_out++;
22010Sstevel@tonic-gate 			vifp->v_bytes_out += plen;
22020Sstevel@tonic-gate 			MC_SEND(ipha, mp, vifp, dst);
22033448Sdh155122 			ipst->ips_mrtstat->mrts_fwd_out++;
22040Sstevel@tonic-gate 		}
22050Sstevel@tonic-gate 		unlock_good_vif(vifp);
22060Sstevel@tonic-gate 	}
22070Sstevel@tonic-gate 	if (tunnel_src != 0)
22080Sstevel@tonic-gate 		return (1);
22090Sstevel@tonic-gate 	else
22100Sstevel@tonic-gate 		return (0);
22110Sstevel@tonic-gate }
22120Sstevel@tonic-gate 
22130Sstevel@tonic-gate /*
22140Sstevel@tonic-gate  * Send the packet on physical interface.
22150Sstevel@tonic-gate  * Caller assumes can continue to use mp on return.
22160Sstevel@tonic-gate  */
22170Sstevel@tonic-gate /* ARGSUSED */
22180Sstevel@tonic-gate static void
22190Sstevel@tonic-gate phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
22200Sstevel@tonic-gate {
22210Sstevel@tonic-gate 	mblk_t 	*mp_copy;
22223448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
22235240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
22240Sstevel@tonic-gate 
22250Sstevel@tonic-gate 	/* Make a new reference to the packet */
22260Sstevel@tonic-gate 	mp_copy = copymsg(mp);	/* TODO could copy header and dup rest */
22270Sstevel@tonic-gate 	if (mp_copy == NULL) {
22283448Sdh155122 		ipst->ips_mrtstat->mrts_fwd_drop++;
22290Sstevel@tonic-gate 		ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n"));
22300Sstevel@tonic-gate 		return;
22310Sstevel@tonic-gate 	}
22320Sstevel@tonic-gate 	if (vifp->v_rate_limit <= 0)
22330Sstevel@tonic-gate 		tbf_send_packet(vifp, mp_copy);
22340Sstevel@tonic-gate 	else  {
22353448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
22365240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22370Sstevel@tonic-gate 			    "phyint_send: tbf_contr rate %d "
22380Sstevel@tonic-gate 			    "vifp 0x%p mp 0x%p dst 0x%x",
22390Sstevel@tonic-gate 			    vifp->v_rate_limit, (void *)vifp, (void *)mp, dst);
22400Sstevel@tonic-gate 		}
22410Sstevel@tonic-gate 		tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr);
22420Sstevel@tonic-gate 	}
22430Sstevel@tonic-gate }
22440Sstevel@tonic-gate 
22450Sstevel@tonic-gate /*
22460Sstevel@tonic-gate  * Send the whole packet for REGISTER encapsulation to PIM daemon
22470Sstevel@tonic-gate  * Caller assumes it can continue to use mp on return.
22480Sstevel@tonic-gate  */
22490Sstevel@tonic-gate /* ARGSUSED */
22500Sstevel@tonic-gate static void
22510Sstevel@tonic-gate register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
22520Sstevel@tonic-gate {
22530Sstevel@tonic-gate 	struct igmpmsg	*im;
22540Sstevel@tonic-gate 	mblk_t		*mp_copy;
22550Sstevel@tonic-gate 	ipha_t		*ipha_copy;
22563448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
22575240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
22583448Sdh155122 
22593448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
22605240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22610Sstevel@tonic-gate 		    "register_send: src %x, dst %x\n",
22620Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
22630Sstevel@tonic-gate 	}
22640Sstevel@tonic-gate 
22650Sstevel@tonic-gate 	/*
22660Sstevel@tonic-gate 	 * Copy the old packet & pullup its IP header into the new mblk_t so we
22670Sstevel@tonic-gate 	 * can modify it.  Try to fill the new mblk_t since if we don't the
22680Sstevel@tonic-gate 	 * ethernet driver will.
22690Sstevel@tonic-gate 	 */
22700Sstevel@tonic-gate 	mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED);
22710Sstevel@tonic-gate 	if (mp_copy == NULL) {
22723448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
22733448Sdh155122 		if (ipst->ips_ip_mrtdebug > 3) {
22745240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22750Sstevel@tonic-gate 			    "register_send: allocb failure.");
22760Sstevel@tonic-gate 		}
22770Sstevel@tonic-gate 		return;
22780Sstevel@tonic-gate 	}
22790Sstevel@tonic-gate 
22800Sstevel@tonic-gate 	/*
22810Sstevel@tonic-gate 	 * Bump write pointer to account for igmpmsg being added.
22820Sstevel@tonic-gate 	 */
22830Sstevel@tonic-gate 	mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg);
22840Sstevel@tonic-gate 
22850Sstevel@tonic-gate 	/*
22860Sstevel@tonic-gate 	 * Chain packet to new mblk_t.
22870Sstevel@tonic-gate 	 */
22880Sstevel@tonic-gate 	if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
22893448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
22903448Sdh155122 		if (ipst->ips_ip_mrtdebug > 3) {
22915240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22920Sstevel@tonic-gate 			    "register_send: copymsg failure.");
22930Sstevel@tonic-gate 		}
22940Sstevel@tonic-gate 		freeb(mp_copy);
22950Sstevel@tonic-gate 		return;
22960Sstevel@tonic-gate 	}
22970Sstevel@tonic-gate 
22980Sstevel@tonic-gate 	/*
22995240Snordmark 	 * icmp_input() asserts that IP version field is set to an
23000Sstevel@tonic-gate 	 * appropriate version. Hence, the struct igmpmsg that this really
23010Sstevel@tonic-gate 	 * becomes, needs to have the correct IP version field.
23020Sstevel@tonic-gate 	 */
23030Sstevel@tonic-gate 	ipha_copy = (ipha_t *)mp_copy->b_rptr;
23040Sstevel@tonic-gate 	*ipha_copy = multicast_encap_iphdr;
23050Sstevel@tonic-gate 
23060Sstevel@tonic-gate 	/*
23070Sstevel@tonic-gate 	 * The kernel uses the struct igmpmsg header to encode the messages to
23080Sstevel@tonic-gate 	 * the multicast routing daemon. Fill in the fields in the header
23090Sstevel@tonic-gate 	 * starting with the message type which is IGMPMSG_WHOLEPKT
23100Sstevel@tonic-gate 	 */
23110Sstevel@tonic-gate 	im = (struct igmpmsg *)mp_copy->b_rptr;
23120Sstevel@tonic-gate 	im->im_msgtype = IGMPMSG_WHOLEPKT;
23130Sstevel@tonic-gate 	im->im_src.s_addr = ipha->ipha_src;
23140Sstevel@tonic-gate 	im->im_dst.s_addr = ipha->ipha_dst;
23150Sstevel@tonic-gate 
23160Sstevel@tonic-gate 	/*
23170Sstevel@tonic-gate 	 * Must Be Zero. This is because the struct igmpmsg is really an IP
23180Sstevel@tonic-gate 	 * header with renamed fields and the multicast routing daemon uses
23190Sstevel@tonic-gate 	 * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages.
23200Sstevel@tonic-gate 	 */
23210Sstevel@tonic-gate 	im->im_mbz = 0;
23220Sstevel@tonic-gate 
23233448Sdh155122 	++ipst->ips_mrtstat->mrts_upcalls;
23245240Snordmark 	if (!canputnext(mrouter->conn_rq)) {
23253448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_regsend_drops;
23263448Sdh155122 		if (ipst->ips_ip_mrtdebug > 3) {
23275240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
23280Sstevel@tonic-gate 			    "register_send: register upcall failure.");
23290Sstevel@tonic-gate 		}
23300Sstevel@tonic-gate 		freemsg(mp_copy);
23310Sstevel@tonic-gate 	} else {
23325240Snordmark 		/* Pass to RAWIP */
23335240Snordmark 		(mrouter->conn_recv)(mrouter, mp_copy, NULL);
23340Sstevel@tonic-gate 	}
23350Sstevel@tonic-gate }
23360Sstevel@tonic-gate 
23370Sstevel@tonic-gate /*
23380Sstevel@tonic-gate  * pim_validate_cksum handles verification of the checksum in the
23390Sstevel@tonic-gate  * pim header.  For PIM Register packets, the checksum is calculated
23400Sstevel@tonic-gate  * across the PIM header only.  For all other packets, the checksum
23410Sstevel@tonic-gate  * is for the PIM header and remainder of the packet.
23420Sstevel@tonic-gate  *
23430Sstevel@tonic-gate  * returns: B_TRUE, if checksum is okay.
23440Sstevel@tonic-gate  *          B_FALSE, if checksum is not valid.
23450Sstevel@tonic-gate  */
23460Sstevel@tonic-gate static boolean_t
23470Sstevel@tonic-gate pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp)
23480Sstevel@tonic-gate {
23490Sstevel@tonic-gate 	mblk_t *mp_dup;
23500Sstevel@tonic-gate 
23510Sstevel@tonic-gate 	if ((mp_dup = dupmsg(mp)) == NULL)
23520Sstevel@tonic-gate 		return (B_FALSE);
23530Sstevel@tonic-gate 
23540Sstevel@tonic-gate 	mp_dup->b_rptr += IPH_HDR_LENGTH(ip);
23550Sstevel@tonic-gate 	if (pimp->pim_type == PIM_REGISTER)
23560Sstevel@tonic-gate 		mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN;
23570Sstevel@tonic-gate 	if (IP_CSUM(mp_dup, 0, 0)) {
23580Sstevel@tonic-gate 		freemsg(mp_dup);
23590Sstevel@tonic-gate 		return (B_FALSE);
23600Sstevel@tonic-gate 	}
23610Sstevel@tonic-gate 	freemsg(mp_dup);
23620Sstevel@tonic-gate 	return (B_TRUE);
23630Sstevel@tonic-gate }
23640Sstevel@tonic-gate 
23650Sstevel@tonic-gate /*
23660Sstevel@tonic-gate  * int
23673448Sdh155122  * pim_input(queue_t *, mblk_t *, ill_t *ill) - Process PIM protocol packets.
23680Sstevel@tonic-gate  *	IP Protocol 103. Register messages are decapsulated and sent
23690Sstevel@tonic-gate  *	onto multicast forwarding.
23700Sstevel@tonic-gate  */
23710Sstevel@tonic-gate int
23723448Sdh155122 pim_input(queue_t *q, mblk_t *mp, ill_t *ill)
23730Sstevel@tonic-gate {
23740Sstevel@tonic-gate 	ipha_t		*eip, *ip;
23750Sstevel@tonic-gate 	int		iplen, pimlen, iphlen;
23760Sstevel@tonic-gate 	struct pim	*pimp;	/* pointer to a pim struct */
23770Sstevel@tonic-gate 	uint32_t	*reghdr;
23783448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
23795240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
23800Sstevel@tonic-gate 
23810Sstevel@tonic-gate 	/*
23820Sstevel@tonic-gate 	 * Pullup the msg for PIM protocol processing.
23830Sstevel@tonic-gate 	 */
23840Sstevel@tonic-gate 	if (pullupmsg(mp, -1) == 0) {
23853448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
23860Sstevel@tonic-gate 		freemsg(mp);
23870Sstevel@tonic-gate 		return (-1);
23880Sstevel@tonic-gate 	}
23890Sstevel@tonic-gate 
23900Sstevel@tonic-gate 	ip = (ipha_t *)mp->b_rptr;
23910Sstevel@tonic-gate 	iplen = ip->ipha_length;
23920Sstevel@tonic-gate 	iphlen = IPH_HDR_LENGTH(ip);
23930Sstevel@tonic-gate 	pimlen = ntohs(iplen) - iphlen;
23940Sstevel@tonic-gate 
23950Sstevel@tonic-gate 	/*
23960Sstevel@tonic-gate 	 * Validate lengths
23970Sstevel@tonic-gate 	 */
23980Sstevel@tonic-gate 	if (pimlen < PIM_MINLEN) {
23993448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_malformed;
24003448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
24015240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24020Sstevel@tonic-gate 			    "pim_input: length not at least minlen");
24030Sstevel@tonic-gate 		}
24040Sstevel@tonic-gate 		freemsg(mp);
24050Sstevel@tonic-gate 		return (-1);
24060Sstevel@tonic-gate 	}
24070Sstevel@tonic-gate 
24080Sstevel@tonic-gate 	/*
24090Sstevel@tonic-gate 	 * Point to the PIM header.
24100Sstevel@tonic-gate 	 */
24110Sstevel@tonic-gate 	pimp = (struct pim *)((caddr_t)ip + iphlen);
24120Sstevel@tonic-gate 
24130Sstevel@tonic-gate 	/*
24140Sstevel@tonic-gate 	 * Check the version number.
24150Sstevel@tonic-gate 	 */
24160Sstevel@tonic-gate 	if (pimp->pim_vers != PIM_VERSION) {
24173448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_badversion;
24183448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
24195240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24200Sstevel@tonic-gate 			    "pim_input: unknown version of PIM");
24210Sstevel@tonic-gate 		}
24220Sstevel@tonic-gate 		freemsg(mp);
24230Sstevel@tonic-gate 		return (-1);
24240Sstevel@tonic-gate 	}
24250Sstevel@tonic-gate 
24260Sstevel@tonic-gate 	/*
24270Sstevel@tonic-gate 	 * Validate the checksum
24280Sstevel@tonic-gate 	 */
24290Sstevel@tonic-gate 	if (!pim_validate_cksum(mp, ip, pimp)) {
24303448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_rcv_badcsum;
24313448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
24325240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24330Sstevel@tonic-gate 			    "pim_input: invalid checksum");
24340Sstevel@tonic-gate 		}
24350Sstevel@tonic-gate 		freemsg(mp);
24360Sstevel@tonic-gate 		return (-1);
24370Sstevel@tonic-gate 	}
24380Sstevel@tonic-gate 
24390Sstevel@tonic-gate 	if (pimp->pim_type != PIM_REGISTER)
24400Sstevel@tonic-gate 		return (0);
24410Sstevel@tonic-gate 
24420Sstevel@tonic-gate 	reghdr = (uint32_t *)(pimp + 1);
24430Sstevel@tonic-gate 	eip = (ipha_t *)(reghdr + 1);
24440Sstevel@tonic-gate 
24450Sstevel@tonic-gate 	/*
24460Sstevel@tonic-gate 	 * check if the inner packet is destined to mcast group
24470Sstevel@tonic-gate 	 */
24480Sstevel@tonic-gate 	if (!CLASSD(eip->ipha_dst)) {
24493448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_badregisters;
24503448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
24515240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24520Sstevel@tonic-gate 			    "pim_input: Inner pkt not mcast .. !");
24530Sstevel@tonic-gate 		}
24540Sstevel@tonic-gate 		freemsg(mp);
24550Sstevel@tonic-gate 		return (-1);
24560Sstevel@tonic-gate 	}
24573448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
24585240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24590Sstevel@tonic-gate 		    "register from %x, to %x, len %d",
24600Sstevel@tonic-gate 		    ntohl(eip->ipha_src),
24610Sstevel@tonic-gate 		    ntohl(eip->ipha_dst),
24620Sstevel@tonic-gate 		    ntohs(eip->ipha_length));
24630Sstevel@tonic-gate 	}
24640Sstevel@tonic-gate 	/*
24650Sstevel@tonic-gate 	 * If the null register bit is not set, decapsulate
24660Sstevel@tonic-gate 	 * the packet before forwarding it.
24670Sstevel@tonic-gate 	 */
24680Sstevel@tonic-gate 	if (!(ntohl(*reghdr) & PIM_NULL_REGISTER)) {
24690Sstevel@tonic-gate 		mblk_t *mp_copy;
24700Sstevel@tonic-gate 
24710Sstevel@tonic-gate 		/* Copy the message */
24720Sstevel@tonic-gate 		if ((mp_copy = copymsg(mp)) == NULL) {
24733448Sdh155122 			++ipst->ips_mrtstat->mrts_pim_nomemory;
24740Sstevel@tonic-gate 			freemsg(mp);
24750Sstevel@tonic-gate 			return (-1);
24760Sstevel@tonic-gate 		}
24770Sstevel@tonic-gate 
24780Sstevel@tonic-gate 		/*
24790Sstevel@tonic-gate 		 * Decapsulate the packet and give it to
24800Sstevel@tonic-gate 		 * register_mforward.
24810Sstevel@tonic-gate 		 */
24820Sstevel@tonic-gate 		mp_copy->b_rptr += iphlen + sizeof (pim_t) +
24830Sstevel@tonic-gate 		    sizeof (*reghdr);
24843448Sdh155122 		if (register_mforward(q, mp_copy, ill) != 0) {
24850Sstevel@tonic-gate 			freemsg(mp);
24860Sstevel@tonic-gate 			return (-1);
24870Sstevel@tonic-gate 		}
24880Sstevel@tonic-gate 	}
24890Sstevel@tonic-gate 
24900Sstevel@tonic-gate 	/*
24910Sstevel@tonic-gate 	 * Pass all valid PIM packets up to any process(es) listening on a raw
24920Sstevel@tonic-gate 	 * PIM socket. For Solaris it is done right after pim_input() is
24930Sstevel@tonic-gate 	 * called.
24940Sstevel@tonic-gate 	 */
24950Sstevel@tonic-gate 	return (0);
24960Sstevel@tonic-gate }
24970Sstevel@tonic-gate 
24980Sstevel@tonic-gate /*
24990Sstevel@tonic-gate  * PIM sparse mode hook.  Called by pim_input after decapsulating
25000Sstevel@tonic-gate  * the packet. Loop back the packet, as if we have received it.
25010Sstevel@tonic-gate  * In pim_input() we have to check if the destination is a multicast address.
25020Sstevel@tonic-gate  */
25030Sstevel@tonic-gate /* ARGSUSED */
25040Sstevel@tonic-gate static int
25053448Sdh155122 register_mforward(queue_t *q, mblk_t *mp, ill_t *ill)
25060Sstevel@tonic-gate {
25073448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
25085240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
25093448Sdh155122 
25103448Sdh155122 	ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs);
25113448Sdh155122 
25123448Sdh155122 	if (ipst->ips_ip_mrtdebug > 3) {
25130Sstevel@tonic-gate 		ipha_t *ipha;
25140Sstevel@tonic-gate 
25150Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
25165240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25170Sstevel@tonic-gate 		    "register_mforward: src %x, dst %x\n",
25180Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
25190Sstevel@tonic-gate 	}
25200Sstevel@tonic-gate 	/*
25210Sstevel@tonic-gate 	 * Need to pass in to ip_mforward() the information that the
25220Sstevel@tonic-gate 	 * packet has arrived on the register_vif. We use the solution that
25230Sstevel@tonic-gate 	 * ip_mroute_decap() employs: use mp->b_prev to pass some information
25240Sstevel@tonic-gate 	 * to ip_mforward(). Nonzero value means the packet has arrived on a
25250Sstevel@tonic-gate 	 * tunnel (ip_mroute_decap() puts the address of the other side of the
25260Sstevel@tonic-gate 	 * tunnel there.) This is safe since ip_rput() either frees the packet
25270Sstevel@tonic-gate 	 * or passes it to ip_mforward(). We use
25280Sstevel@tonic-gate 	 * PIM_REGISTER_MARKER = 0xffffffff to indicate the has arrived on the
25290Sstevel@tonic-gate 	 * register vif. If in the future we have more than one register vifs,
25300Sstevel@tonic-gate 	 * then this will need re-examination.
25310Sstevel@tonic-gate 	 */
25320Sstevel@tonic-gate 	mp->b_prev = (mblk_t *)PIM_REGISTER_MARKER;
25333448Sdh155122 	++ipst->ips_mrtstat->mrts_pim_regforwards;
25340Sstevel@tonic-gate 	ip_rput(q, mp);
25350Sstevel@tonic-gate 	return (0);
25360Sstevel@tonic-gate }
25370Sstevel@tonic-gate 
25380Sstevel@tonic-gate /*
25390Sstevel@tonic-gate  * Send an encapsulated packet.
25400Sstevel@tonic-gate  * Caller assumes can continue to use mp when routine returns.
25410Sstevel@tonic-gate  */
25420Sstevel@tonic-gate /* ARGSUSED */
25430Sstevel@tonic-gate static void
25440Sstevel@tonic-gate encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
25450Sstevel@tonic-gate {
25460Sstevel@tonic-gate 	mblk_t 	*mp_copy;
25470Sstevel@tonic-gate 	ipha_t 	*ipha_copy;
25480Sstevel@tonic-gate 	size_t	len;
25493448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
25505240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
25513448Sdh155122 
25523448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
25535240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25543448Sdh155122 		    "encap_send: vif %ld enter",
25553448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs));
25560Sstevel@tonic-gate 	}
25570Sstevel@tonic-gate 	len = ntohs(ipha->ipha_length);
25580Sstevel@tonic-gate 
25590Sstevel@tonic-gate 	/*
25600Sstevel@tonic-gate 	 * Copy the old packet & pullup it's IP header into the
25610Sstevel@tonic-gate 	 * new mbuf so we can modify it.  Try to fill the new
25620Sstevel@tonic-gate 	 * mbuf since if we don't the ethernet driver will.
25630Sstevel@tonic-gate 	 */
25640Sstevel@tonic-gate 	mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED);
25650Sstevel@tonic-gate 	if (mp_copy == NULL)
25660Sstevel@tonic-gate 		return;
25670Sstevel@tonic-gate 	mp_copy->b_rptr += 32;
25680Sstevel@tonic-gate 	mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr);
25690Sstevel@tonic-gate 	if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
25700Sstevel@tonic-gate 		freeb(mp_copy);
25710Sstevel@tonic-gate 		return;
25720Sstevel@tonic-gate 	}
25730Sstevel@tonic-gate 
25740Sstevel@tonic-gate 	/*
25750Sstevel@tonic-gate 	 * Fill in the encapsulating IP header.
25760Sstevel@tonic-gate 	 * Remote tunnel dst in rmt_addr, from add_vif().
25770Sstevel@tonic-gate 	 */
25780Sstevel@tonic-gate 	ipha_copy = (ipha_t *)mp_copy->b_rptr;
25790Sstevel@tonic-gate 	*ipha_copy = multicast_encap_iphdr;
25800Sstevel@tonic-gate 	ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET);
25810Sstevel@tonic-gate 	ipha_copy->ipha_length = htons(len + sizeof (ipha_t));
25820Sstevel@tonic-gate 	ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr;
25830Sstevel@tonic-gate 	ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr;
25840Sstevel@tonic-gate 	ASSERT(ipha_copy->ipha_ident == 0);
25850Sstevel@tonic-gate 
25860Sstevel@tonic-gate 	/* Turn the encapsulated IP header back into a valid one. */
25870Sstevel@tonic-gate 	ipha = (ipha_t *)mp_copy->b_cont->b_rptr;
25880Sstevel@tonic-gate 	ipha->ipha_ttl--;
25890Sstevel@tonic-gate 	ipha->ipha_hdr_checksum = 0;
25900Sstevel@tonic-gate 	ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
25910Sstevel@tonic-gate 
25923448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
25935240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25940Sstevel@tonic-gate 		    "encap_send: group 0x%x", ntohl(ipha->ipha_dst));
25950Sstevel@tonic-gate 	}
25960Sstevel@tonic-gate 	if (vifp->v_rate_limit <= 0)
25970Sstevel@tonic-gate 		tbf_send_packet(vifp, mp_copy);
25980Sstevel@tonic-gate 	else
25990Sstevel@tonic-gate 		/* ipha is from the original header */
26000Sstevel@tonic-gate 		tbf_control(vifp, mp_copy, ipha);
26010Sstevel@tonic-gate }
26020Sstevel@tonic-gate 
26030Sstevel@tonic-gate /*
26040Sstevel@tonic-gate  * De-encapsulate a packet and feed it back through IP input.
26050Sstevel@tonic-gate  * This routine is called whenever IP gets a packet with prototype
26060Sstevel@tonic-gate  * IPPROTO_ENCAP and a local destination address.
26070Sstevel@tonic-gate  */
26080Sstevel@tonic-gate void
26093448Sdh155122 ip_mroute_decap(queue_t *q, mblk_t *mp, ill_t *ill)
26100Sstevel@tonic-gate {
26110Sstevel@tonic-gate 	ipha_t		*ipha = (ipha_t *)mp->b_rptr;
26120Sstevel@tonic-gate 	ipha_t		*ipha_encap;
26130Sstevel@tonic-gate 	int		hlen = IPH_HDR_LENGTH(ipha);
26140Sstevel@tonic-gate 	ipaddr_t	src;
26150Sstevel@tonic-gate 	struct vif	*vifp;
26163448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
26175240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
26180Sstevel@tonic-gate 
26190Sstevel@tonic-gate 	/*
26200Sstevel@tonic-gate 	 * Dump the packet if it's not to a multicast destination or if
26210Sstevel@tonic-gate 	 * we don't have an encapsulating tunnel with the source.
26220Sstevel@tonic-gate 	 * Note:  This code assumes that the remote site IP address
26230Sstevel@tonic-gate 	 * uniquely identifies the tunnel (i.e., that this site has
26240Sstevel@tonic-gate 	 * at most one tunnel with the remote site).
26250Sstevel@tonic-gate 	 */
26260Sstevel@tonic-gate 	ipha_encap = (ipha_t *)((char *)ipha + hlen);
26270Sstevel@tonic-gate 	if (!CLASSD(ipha_encap->ipha_dst)) {
26283448Sdh155122 		ipst->ips_mrtstat->mrts_bad_tunnel++;
26290Sstevel@tonic-gate 		ip1dbg(("ip_mroute_decap: bad tunnel\n"));
26300Sstevel@tonic-gate 		freemsg(mp);
26310Sstevel@tonic-gate 		return;
26320Sstevel@tonic-gate 	}
26330Sstevel@tonic-gate 	src = (ipaddr_t)ipha->ipha_src;
26343448Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
26353448Sdh155122 	if (src != ipst->ips_last_encap_src) {
26360Sstevel@tonic-gate 		struct vif *vife;
26370Sstevel@tonic-gate 
26383448Sdh155122 		vifp = ipst->ips_vifs;
26393448Sdh155122 		vife = vifp + ipst->ips_numvifs;
26403448Sdh155122 		ipst->ips_last_encap_src = src;
26413448Sdh155122 		ipst->ips_last_encap_vif = 0;
26420Sstevel@tonic-gate 		for (; vifp < vife; ++vifp) {
26430Sstevel@tonic-gate 			if (!lock_good_vif(vifp))
26440Sstevel@tonic-gate 				continue;
26450Sstevel@tonic-gate 			if (vifp->v_rmt_addr.s_addr == src) {
26460Sstevel@tonic-gate 				if (vifp->v_flags & VIFF_TUNNEL)
26473448Sdh155122 					ipst->ips_last_encap_vif = vifp;
26483448Sdh155122 				if (ipst->ips_ip_mrtdebug > 1) {
26495240Snordmark 					(void) mi_strlog(mrouter->conn_rq,
26500Sstevel@tonic-gate 					    1, SL_TRACE,
26510Sstevel@tonic-gate 					    "ip_mroute_decap: good tun "
26520Sstevel@tonic-gate 					    "vif %ld with %x",
26533448Sdh155122 					    (ptrdiff_t)(vifp - ipst->ips_vifs),
26540Sstevel@tonic-gate 					    ntohl(src));
26550Sstevel@tonic-gate 				}
26560Sstevel@tonic-gate 				unlock_good_vif(vifp);
26570Sstevel@tonic-gate 				break;
26580Sstevel@tonic-gate 			}
26590Sstevel@tonic-gate 			unlock_good_vif(vifp);
26600Sstevel@tonic-gate 		}
26610Sstevel@tonic-gate 	}
26623448Sdh155122 	if ((vifp = ipst->ips_last_encap_vif) == 0) {
26633448Sdh155122 		mutex_exit(&ipst->ips_last_encap_lock);
26643448Sdh155122 		ipst->ips_mrtstat->mrts_bad_tunnel++;
26650Sstevel@tonic-gate 		freemsg(mp);
26660Sstevel@tonic-gate 		ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n",
26673448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src)));
26680Sstevel@tonic-gate 		return;
26690Sstevel@tonic-gate 	}
26703448Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
26710Sstevel@tonic-gate 
26720Sstevel@tonic-gate 	/*
26730Sstevel@tonic-gate 	 * Need to pass in the tunnel source to ip_mforward (so that it can
26740Sstevel@tonic-gate 	 * verify that the packet arrived over the correct vif.)  We use b_prev
26750Sstevel@tonic-gate 	 * to pass this information. This is safe since the ip_rput either
26760Sstevel@tonic-gate 	 * frees the packet or passes it to ip_mforward.
26770Sstevel@tonic-gate 	 */
26780Sstevel@tonic-gate 	mp->b_prev = (mblk_t *)(uintptr_t)src;
26790Sstevel@tonic-gate 	mp->b_rptr += hlen;
26800Sstevel@tonic-gate 	/* Feed back into ip_rput as an M_DATA. */
26810Sstevel@tonic-gate 	ip_rput(q, mp);
26820Sstevel@tonic-gate }
26830Sstevel@tonic-gate 
26840Sstevel@tonic-gate /*
26850Sstevel@tonic-gate  * Remove all records with v_ipif == ipif.  Called when an interface goes away
26860Sstevel@tonic-gate  * (stream closed).  Called as writer.
26870Sstevel@tonic-gate  */
26880Sstevel@tonic-gate void
26890Sstevel@tonic-gate reset_mrt_vif_ipif(ipif_t *ipif)
26900Sstevel@tonic-gate {
26910Sstevel@tonic-gate 	vifi_t vifi, tmp_vifi;
26920Sstevel@tonic-gate 	vifi_t num_of_vifs;
26933448Sdh155122 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
26940Sstevel@tonic-gate 
26950Sstevel@tonic-gate 	/* Can't check vifi >= 0 since vifi_t is unsigned! */
26960Sstevel@tonic-gate 
26973448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
26983448Sdh155122 	num_of_vifs = ipst->ips_numvifs;
26993448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
27000Sstevel@tonic-gate 
27010Sstevel@tonic-gate 	for (vifi = num_of_vifs; vifi != 0; vifi--) {
27020Sstevel@tonic-gate 		tmp_vifi = vifi - 1;
27033448Sdh155122 		if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) {
27043448Sdh155122 			(void) del_vif(&tmp_vifi, NULL, NULL, ipst);
27050Sstevel@tonic-gate 		}
27060Sstevel@tonic-gate 	}
27070Sstevel@tonic-gate }
27080Sstevel@tonic-gate 
27090Sstevel@tonic-gate /* Remove pending upcall msgs when ill goes away.  Called by ill_delete.  */
27100Sstevel@tonic-gate void
27110Sstevel@tonic-gate reset_mrt_ill(ill_t *ill)
27120Sstevel@tonic-gate {
27130Sstevel@tonic-gate 	struct mfc		*rt;
27140Sstevel@tonic-gate 	struct rtdetq	*rte;
27150Sstevel@tonic-gate 	int			i;
27163448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
27175240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
27180Sstevel@tonic-gate 
27190Sstevel@tonic-gate 	for (i = 0; i < MFCTBLSIZ; i++) {
27203448Sdh155122 		MFCB_REFHOLD(&ipst->ips_mfcs[i]);
27213448Sdh155122 		if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) {
27223448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
27235240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
27243448Sdh155122 				    SL_TRACE,
27250Sstevel@tonic-gate 				    "reset_mrt_ill: mfctable [%d]", i);
27260Sstevel@tonic-gate 			}
27270Sstevel@tonic-gate 			while (rt != NULL) {
27280Sstevel@tonic-gate 				mutex_enter(&rt->mfc_mutex);
27290Sstevel@tonic-gate 				while ((rte = rt->mfc_rte) != NULL) {
27300Sstevel@tonic-gate 					if (rte->ill == ill) {
27313448Sdh155122 						if (ipst->ips_ip_mrtdebug > 1) {
27323448Sdh155122 						(void) mi_strlog(
27335240Snordmark 						    mrouter->conn_rq,
27343448Sdh155122 						    1, SL_TRACE,
27353448Sdh155122 						    "reset_mrt_ill: "
27367240Srh87107 						    "ill 0x%p", (void *)ill);
27370Sstevel@tonic-gate 						}
27380Sstevel@tonic-gate 						rt->mfc_rte = rte->rte_next;
27390Sstevel@tonic-gate 						freemsg(rte->mp);
27400Sstevel@tonic-gate 						mi_free((char *)rte);
27410Sstevel@tonic-gate 					}
27420Sstevel@tonic-gate 				}
27430Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
27440Sstevel@tonic-gate 				rt = rt->mfc_next;
27450Sstevel@tonic-gate 			}
27460Sstevel@tonic-gate 		}
27473448Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
27480Sstevel@tonic-gate 	}
27490Sstevel@tonic-gate }
27500Sstevel@tonic-gate 
27510Sstevel@tonic-gate /*
27520Sstevel@tonic-gate  * Token bucket filter module.
27530Sstevel@tonic-gate  * The ipha is for mcastgrp destination for phyint and encap.
27540Sstevel@tonic-gate  */
27550Sstevel@tonic-gate static void
27560Sstevel@tonic-gate tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha)
27570Sstevel@tonic-gate {
27580Sstevel@tonic-gate 	size_t 	p_len =  msgdsize(mp);
27590Sstevel@tonic-gate 	struct tbf	*t    = vifp->v_tbf;
27600Sstevel@tonic-gate 	timeout_id_t id = 0;
27613448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
27625240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
27630Sstevel@tonic-gate 
27640Sstevel@tonic-gate 	/* Drop if packet is too large */
27650Sstevel@tonic-gate 	if (p_len > MAX_BKT_SIZE) {
27663448Sdh155122 		ipst->ips_mrtstat->mrts_pkt2large++;
27670Sstevel@tonic-gate 		freemsg(mp);
27680Sstevel@tonic-gate 		return;
27690Sstevel@tonic-gate 	}
27703448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
27715240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
27720Sstevel@tonic-gate 		    "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x",
27733448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len,
27740Sstevel@tonic-gate 		    ntohl(ipha->ipha_dst));
27750Sstevel@tonic-gate 	}
27760Sstevel@tonic-gate 
27770Sstevel@tonic-gate 	mutex_enter(&t->tbf_lock);
27780Sstevel@tonic-gate 
27790Sstevel@tonic-gate 	tbf_update_tokens(vifp);
27800Sstevel@tonic-gate 
27810Sstevel@tonic-gate 	/*
27820Sstevel@tonic-gate 	 * If there are enough tokens,
27830Sstevel@tonic-gate 	 * and the queue is empty, send this packet out.
27840Sstevel@tonic-gate 	 */
27853448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
27865240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
27870Sstevel@tonic-gate 		    "tbf_control: vif %ld, TOKENS  %d, pkt len  %lu, qlen  %d",
27883448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len,
27890Sstevel@tonic-gate 		    t->tbf_q_len);
27900Sstevel@tonic-gate 	}
27910Sstevel@tonic-gate 	/* No packets are queued */
27920Sstevel@tonic-gate 	if (t->tbf_q_len == 0) {
27930Sstevel@tonic-gate 		/* queue empty, send packet if enough tokens */
27940Sstevel@tonic-gate 		if (p_len <= t->tbf_n_tok) {
27950Sstevel@tonic-gate 			t->tbf_n_tok -= p_len;
27960Sstevel@tonic-gate 			mutex_exit(&t->tbf_lock);
27970Sstevel@tonic-gate 			tbf_send_packet(vifp, mp);
27980Sstevel@tonic-gate 			return;
27990Sstevel@tonic-gate 		} else {
28000Sstevel@tonic-gate 			/* Queue packet and timeout till later */
28010Sstevel@tonic-gate 			tbf_queue(vifp, mp);
28020Sstevel@tonic-gate 			ASSERT(vifp->v_timeout_id == 0);
28030Sstevel@tonic-gate 			vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
28040Sstevel@tonic-gate 			    TBF_REPROCESS);
28050Sstevel@tonic-gate 		}
28060Sstevel@tonic-gate 	} else if (t->tbf_q_len < t->tbf_max_q_len) {
28070Sstevel@tonic-gate 		/* Finite queue length, so queue pkts and process queue */
28080Sstevel@tonic-gate 		tbf_queue(vifp, mp);
28090Sstevel@tonic-gate 		tbf_process_q(vifp);
28100Sstevel@tonic-gate 	} else {
28110Sstevel@tonic-gate 		/* Check that we have UDP header with IP header */
28120Sstevel@tonic-gate 		size_t hdr_length = IPH_HDR_LENGTH(ipha) +
28135240Snordmark 		    sizeof (struct udphdr);
28140Sstevel@tonic-gate 
28150Sstevel@tonic-gate 		if ((mp->b_wptr - mp->b_rptr) < hdr_length) {
28160Sstevel@tonic-gate 			if (!pullupmsg(mp, hdr_length)) {
28170Sstevel@tonic-gate 				freemsg(mp);
28180Sstevel@tonic-gate 				ip1dbg(("tbf_ctl: couldn't pullup udp hdr, "
28190Sstevel@tonic-gate 				    "vif %ld src 0x%x dst 0x%x\n",
28203448Sdh155122 				    (ptrdiff_t)(vifp - ipst->ips_vifs),
28210Sstevel@tonic-gate 				    ntohl(ipha->ipha_src),
28220Sstevel@tonic-gate 				    ntohl(ipha->ipha_dst)));
28230Sstevel@tonic-gate 				mutex_exit(&vifp->v_tbf->tbf_lock);
28240Sstevel@tonic-gate 				return;
28250Sstevel@tonic-gate 			} else
28260Sstevel@tonic-gate 				/* Have to reassign ipha after pullupmsg */
28270Sstevel@tonic-gate 				ipha = (ipha_t *)mp->b_rptr;
28280Sstevel@tonic-gate 		}
28290Sstevel@tonic-gate 		/*
28300Sstevel@tonic-gate 		 * Queue length too much,
28310Sstevel@tonic-gate 		 * try to selectively dq, or queue and process
28320Sstevel@tonic-gate 		 */
28330Sstevel@tonic-gate 		if (!tbf_dq_sel(vifp, ipha)) {
28343448Sdh155122 			ipst->ips_mrtstat->mrts_q_overflow++;
28350Sstevel@tonic-gate 			freemsg(mp);
28360Sstevel@tonic-gate 		} else {
28370Sstevel@tonic-gate 			tbf_queue(vifp, mp);
28380Sstevel@tonic-gate 			tbf_process_q(vifp);
28390Sstevel@tonic-gate 		}
28400Sstevel@tonic-gate 	}
28410Sstevel@tonic-gate 	if (t->tbf_q_len == 0) {
28420Sstevel@tonic-gate 		id = vifp->v_timeout_id;
28430Sstevel@tonic-gate 		vifp->v_timeout_id = 0;
28440Sstevel@tonic-gate 	}
28450Sstevel@tonic-gate 	mutex_exit(&vifp->v_tbf->tbf_lock);
28460Sstevel@tonic-gate 	if (id != 0)
28470Sstevel@tonic-gate 		(void) untimeout(id);
28480Sstevel@tonic-gate }
28490Sstevel@tonic-gate 
28500Sstevel@tonic-gate /*
28510Sstevel@tonic-gate  * Adds a packet to the tbf queue at the interface.
28520Sstevel@tonic-gate  * The ipha is for mcastgrp destination for phyint and encap.
28530Sstevel@tonic-gate  */
28540Sstevel@tonic-gate static void
28550Sstevel@tonic-gate tbf_queue(struct vif *vifp, mblk_t *mp)
28560Sstevel@tonic-gate {
28570Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
28583448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
28595240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
28603448Sdh155122 
28613448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
28625240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
28633448Sdh155122 		    "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs));
28640Sstevel@tonic-gate 	}
28650Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
28660Sstevel@tonic-gate 
28670Sstevel@tonic-gate 	if (t->tbf_t == NULL) {
28680Sstevel@tonic-gate 		/* Queue was empty */
28690Sstevel@tonic-gate 		t->tbf_q = mp;
28700Sstevel@tonic-gate 	} else {
28710Sstevel@tonic-gate 		/* Insert at tail */
28720Sstevel@tonic-gate 		t->tbf_t->b_next = mp;
28730Sstevel@tonic-gate 	}
28740Sstevel@tonic-gate 	/* set new tail pointer */
28750Sstevel@tonic-gate 	t->tbf_t = mp;
28760Sstevel@tonic-gate 
28770Sstevel@tonic-gate 	mp->b_next = mp->b_prev = NULL;
28780Sstevel@tonic-gate 
28790Sstevel@tonic-gate 	t->tbf_q_len++;
28800Sstevel@tonic-gate }
28810Sstevel@tonic-gate 
28820Sstevel@tonic-gate /*
28830Sstevel@tonic-gate  * Process the queue at the vif interface.
28840Sstevel@tonic-gate  * Drops the tbf_lock when sending packets.
28850Sstevel@tonic-gate  *
28860Sstevel@tonic-gate  * NOTE : The caller should quntimeout if the queue length is 0.
28870Sstevel@tonic-gate  */
28880Sstevel@tonic-gate static void
28890Sstevel@tonic-gate tbf_process_q(struct vif *vifp)
28900Sstevel@tonic-gate {
28910Sstevel@tonic-gate 	mblk_t	*mp;
28920Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
28930Sstevel@tonic-gate 	size_t	len;
28943448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
28955240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
28963448Sdh155122 
28973448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
28985240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
28990Sstevel@tonic-gate 		    "tbf_process_q 1: vif %ld qlen = %d",
29003448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len);
29010Sstevel@tonic-gate 	}
29020Sstevel@tonic-gate 
29030Sstevel@tonic-gate 	/*
29040Sstevel@tonic-gate 	 * Loop through the queue at the interface and send
29050Sstevel@tonic-gate 	 * as many packets as possible.
29060Sstevel@tonic-gate 	 */
29070Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
29080Sstevel@tonic-gate 
29090Sstevel@tonic-gate 	while (t->tbf_q_len > 0) {
29100Sstevel@tonic-gate 		mp = t->tbf_q;
29110Sstevel@tonic-gate 		len = (size_t)msgdsize(mp); /* length of ip pkt */
29120Sstevel@tonic-gate 
29130Sstevel@tonic-gate 		/* Determine if the packet can be sent */
29140Sstevel@tonic-gate 		if (len <= t->tbf_n_tok) {
29150Sstevel@tonic-gate 			/*
29160Sstevel@tonic-gate 			 * If so, reduce no. of tokens, dequeue the packet,
29170Sstevel@tonic-gate 			 * send the packet.
29180Sstevel@tonic-gate 			 */
29190Sstevel@tonic-gate 			t->tbf_n_tok -= len;
29200Sstevel@tonic-gate 
29210Sstevel@tonic-gate 			t->tbf_q = mp->b_next;
29220Sstevel@tonic-gate 			if (--t->tbf_q_len == 0) {
29230Sstevel@tonic-gate 				t->tbf_t = NULL;
29240Sstevel@tonic-gate 			}
29250Sstevel@tonic-gate 			mp->b_next = NULL;
29260Sstevel@tonic-gate 			/* Exit mutex before sending packet, then re-enter */
29270Sstevel@tonic-gate 			mutex_exit(&t->tbf_lock);
29280Sstevel@tonic-gate 			tbf_send_packet(vifp, mp);
29290Sstevel@tonic-gate 			mutex_enter(&t->tbf_lock);
29300Sstevel@tonic-gate 		} else
29310Sstevel@tonic-gate 			break;
29320Sstevel@tonic-gate 	}
29330Sstevel@tonic-gate }
29340Sstevel@tonic-gate 
29350Sstevel@tonic-gate /* Called at tbf timeout to update tokens, process q and reset timer.  */
29360Sstevel@tonic-gate static void
29370Sstevel@tonic-gate tbf_reprocess_q(void *arg)
29380Sstevel@tonic-gate {
29390Sstevel@tonic-gate 	struct vif *vifp = arg;
29403448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
29415240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
29420Sstevel@tonic-gate 
29430Sstevel@tonic-gate 	mutex_enter(&vifp->v_tbf->tbf_lock);
29440Sstevel@tonic-gate 	vifp->v_timeout_id = 0;
29450Sstevel@tonic-gate 	tbf_update_tokens(vifp);
29460Sstevel@tonic-gate 
29470Sstevel@tonic-gate 	tbf_process_q(vifp);
29480Sstevel@tonic-gate 
29490Sstevel@tonic-gate 	if (vifp->v_tbf->tbf_q_len > 0) {
29500Sstevel@tonic-gate 		vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
29510Sstevel@tonic-gate 		    TBF_REPROCESS);
29520Sstevel@tonic-gate 	}
29530Sstevel@tonic-gate 	mutex_exit(&vifp->v_tbf->tbf_lock);
29540Sstevel@tonic-gate 
29553448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
29565240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
29570Sstevel@tonic-gate 		    "tbf_reprcess_q: vif %ld timeout id = %p",
29583448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id);
29590Sstevel@tonic-gate 	}
29600Sstevel@tonic-gate }
29610Sstevel@tonic-gate 
29620Sstevel@tonic-gate /*
29630Sstevel@tonic-gate  * Function that will selectively discard a member of the tbf queue,
29640Sstevel@tonic-gate  * based on the precedence value and the priority.
29650Sstevel@tonic-gate  *
29660Sstevel@tonic-gate  * NOTE : The caller should quntimeout if the queue length is 0.
29670Sstevel@tonic-gate  */
29680Sstevel@tonic-gate static int
29690Sstevel@tonic-gate tbf_dq_sel(struct vif *vifp, ipha_t *ipha)
29700Sstevel@tonic-gate {
29710Sstevel@tonic-gate 	uint_t		p;
29720Sstevel@tonic-gate 	struct tbf		*t = vifp->v_tbf;
29730Sstevel@tonic-gate 	mblk_t		**np;
29740Sstevel@tonic-gate 	mblk_t		*last, *mp;
29753448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
29765240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
29773448Sdh155122 
29783448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
29795240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
29800Sstevel@tonic-gate 		    "dq_sel: vif %ld dst 0x%x",
29813448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst));
29820Sstevel@tonic-gate 	}
29830Sstevel@tonic-gate 
29840Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
29850Sstevel@tonic-gate 	p = priority(vifp, ipha);
29860Sstevel@tonic-gate 
29870Sstevel@tonic-gate 	np = &t->tbf_q;
29880Sstevel@tonic-gate 	last = NULL;
29890Sstevel@tonic-gate 	while ((mp = *np) != NULL) {
29900Sstevel@tonic-gate 		if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) {
29910Sstevel@tonic-gate 			*np = mp->b_next;
29920Sstevel@tonic-gate 			/* If removing the last packet, fix the tail pointer */
29930Sstevel@tonic-gate 			if (mp == t->tbf_t)
29940Sstevel@tonic-gate 				t->tbf_t = last;
29950Sstevel@tonic-gate 			mp->b_prev = mp->b_next = NULL;
29960Sstevel@tonic-gate 			freemsg(mp);
29970Sstevel@tonic-gate 			/*
29980Sstevel@tonic-gate 			 * It's impossible for the queue to be empty, but
29990Sstevel@tonic-gate 			 * we check anyway.
30000Sstevel@tonic-gate 			 */
30010Sstevel@tonic-gate 			if (--t->tbf_q_len == 0) {
30020Sstevel@tonic-gate 				t->tbf_t = NULL;
30030Sstevel@tonic-gate 			}
30043448Sdh155122 			ipst->ips_mrtstat->mrts_drop_sel++;
30050Sstevel@tonic-gate 			return (1);
30060Sstevel@tonic-gate 		}
30070Sstevel@tonic-gate 		np = &mp->b_next;
30080Sstevel@tonic-gate 		last = mp;
30090Sstevel@tonic-gate 	}
30100Sstevel@tonic-gate 	return (0);
30110Sstevel@tonic-gate }
30120Sstevel@tonic-gate 
30130Sstevel@tonic-gate /* Sends packet, 2 cases - encap tunnel, phyint.  */
30140Sstevel@tonic-gate static void
30150Sstevel@tonic-gate tbf_send_packet(struct vif *vifp, mblk_t *mp)
30160Sstevel@tonic-gate {
30170Sstevel@tonic-gate 	ipif_t  *ipif;
30183448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
30195240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
30200Sstevel@tonic-gate 
30210Sstevel@tonic-gate 	/* If encap tunnel options */
30220Sstevel@tonic-gate 	if (vifp->v_flags & VIFF_TUNNEL)  {
30233448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
30245240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
30250Sstevel@tonic-gate 			    "tbf_send_pkt: ENCAP tunnel vif %ld",
30263448Sdh155122 			    (ptrdiff_t)(vifp - ipst->ips_vifs));
30270Sstevel@tonic-gate 		}
30280Sstevel@tonic-gate 
30290Sstevel@tonic-gate 		/*
30300Sstevel@tonic-gate 		 * Feed into ip_wput which will set the ident field and
30310Sstevel@tonic-gate 		 * checksum the encapsulating header.
30320Sstevel@tonic-gate 		 * BSD gets the cached route vifp->v_route from ip_output()
30330Sstevel@tonic-gate 		 * to speed up route table lookups. Not necessary in SunOS 5.x.
30340Sstevel@tonic-gate 		 */
30350Sstevel@tonic-gate 		put(vifp->v_ipif->ipif_wq, mp);
30360Sstevel@tonic-gate 		return;
30370Sstevel@tonic-gate 
30380Sstevel@tonic-gate 		/* phyint */
30390Sstevel@tonic-gate 	} else {
30400Sstevel@tonic-gate 		/* Need to loop back to members on the outgoing interface. */
30410Sstevel@tonic-gate 		ipha_t  *ipha;
30420Sstevel@tonic-gate 		ipaddr_t    dst;
30430Sstevel@tonic-gate 		ipha  = (ipha_t *)mp->b_rptr;
30440Sstevel@tonic-gate 		dst  = ipha->ipha_dst;
30450Sstevel@tonic-gate 		ipif = vifp->v_ipif;
30460Sstevel@tonic-gate 
30470Sstevel@tonic-gate 		if (ilm_lookup_ipif(ipif, dst) != NULL) {
30480Sstevel@tonic-gate 			/*
30490Sstevel@tonic-gate 			 * The packet is not yet reassembled, thus we need to
30500Sstevel@tonic-gate 			 * pass it to ip_rput_local for checksum verification
30510Sstevel@tonic-gate 			 * and reassembly (and fanout the user stream).
30520Sstevel@tonic-gate 			 */
30530Sstevel@tonic-gate 			mblk_t 	*mp_loop;
30540Sstevel@tonic-gate 			ire_t	*ire;
30550Sstevel@tonic-gate 
30563448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
30575240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
30583448Sdh155122 				    SL_TRACE,
30590Sstevel@tonic-gate 				    "tbf_send_pkt: loopback vif %ld",
30603448Sdh155122 				    (ptrdiff_t)(vifp - ipst->ips_vifs));
30610Sstevel@tonic-gate 			}
30620Sstevel@tonic-gate 			mp_loop = copymsg(mp);
30630Sstevel@tonic-gate 			ire = ire_ctable_lookup(~0, 0, IRE_BROADCAST, NULL,
30643448Sdh155122 			    ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst);
30650Sstevel@tonic-gate 
30660Sstevel@tonic-gate 			if (mp_loop != NULL && ire != NULL) {
30670Sstevel@tonic-gate 				IP_RPUT_LOCAL(ipif->ipif_rq, mp_loop,
30680Sstevel@tonic-gate 				    ((ipha_t *)mp_loop->b_rptr),
30690Sstevel@tonic-gate 				    ire, (ill_t *)ipif->ipif_rq->q_ptr);
30700Sstevel@tonic-gate 			} else {
30710Sstevel@tonic-gate 				/* Either copymsg failed or no ire */
30725240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
30733448Sdh155122 				    SL_TRACE,
30740Sstevel@tonic-gate 				    "tbf_send_pkt: mp_loop 0x%p, ire 0x%p "
30757240Srh87107 				    "vif %ld\n", (void *)mp_loop, (void *)ire,
30763448Sdh155122 				    (ptrdiff_t)(vifp - ipst->ips_vifs));
30770Sstevel@tonic-gate 			}
30780Sstevel@tonic-gate 			if (ire != NULL)
30790Sstevel@tonic-gate 				ire_refrele(ire);
30800Sstevel@tonic-gate 		}
30813448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
30825240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
30830Sstevel@tonic-gate 			    "tbf_send_pkt: phyint forward  vif %ld dst = 0x%x",
30843448Sdh155122 			    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst));
30850Sstevel@tonic-gate 		}
30860Sstevel@tonic-gate 		ip_rput_forward_multicast(dst, mp, ipif);
30870Sstevel@tonic-gate 	}
30880Sstevel@tonic-gate }
30890Sstevel@tonic-gate 
30900Sstevel@tonic-gate /*
30910Sstevel@tonic-gate  * Determine the current time and then the elapsed time (between the last time
30920Sstevel@tonic-gate  * and time now).  Update the no. of tokens in the bucket.
30930Sstevel@tonic-gate  */
30940Sstevel@tonic-gate static void
30950Sstevel@tonic-gate tbf_update_tokens(struct vif *vifp)
30960Sstevel@tonic-gate {
30970Sstevel@tonic-gate 	timespec_t	tp;
30980Sstevel@tonic-gate 	hrtime_t	tm;
30990Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
31003448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
31015240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
31020Sstevel@tonic-gate 
31030Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
31040Sstevel@tonic-gate 
31050Sstevel@tonic-gate 	/* Time in secs and nsecs, rate limit in kbits/sec */
31060Sstevel@tonic-gate 	gethrestime(&tp);
31070Sstevel@tonic-gate 
31080Sstevel@tonic-gate 	/*LINTED*/
31090Sstevel@tonic-gate 	TV_DELTA(tp, t->tbf_last_pkt_t, tm);
31100Sstevel@tonic-gate 
31110Sstevel@tonic-gate 	/*
31120Sstevel@tonic-gate 	 * This formula is actually
31130Sstevel@tonic-gate 	 * "time in seconds" * "bytes/second".  Scaled for nsec.
31140Sstevel@tonic-gate 	 * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8)
31150Sstevel@tonic-gate 	 *
31160Sstevel@tonic-gate 	 * The (1000/1024) was introduced in add_vif to optimize
31170Sstevel@tonic-gate 	 * this divide into a shift.
31180Sstevel@tonic-gate 	 */
31190Sstevel@tonic-gate 	t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8;
31200Sstevel@tonic-gate 	t->tbf_last_pkt_t = tp;
31210Sstevel@tonic-gate 
31220Sstevel@tonic-gate 	if (t->tbf_n_tok > MAX_BKT_SIZE)
31230Sstevel@tonic-gate 		t->tbf_n_tok = MAX_BKT_SIZE;
31243448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
31255240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
31260Sstevel@tonic-gate 		    "tbf_update_tok: tm %lld tok %d vif %ld",
31273448Sdh155122 		    tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs));
31280Sstevel@tonic-gate 	}
31290Sstevel@tonic-gate }
31300Sstevel@tonic-gate 
31310Sstevel@tonic-gate /*
31320Sstevel@tonic-gate  * Priority currently is based on port nos.
31330Sstevel@tonic-gate  * Different forwarding mechanisms have different ways
31340Sstevel@tonic-gate  * of obtaining the port no. Hence, the vif must be
31350Sstevel@tonic-gate  * given along with the packet itself.
31360Sstevel@tonic-gate  *
31370Sstevel@tonic-gate  */
31380Sstevel@tonic-gate static int
31390Sstevel@tonic-gate priority(struct vif *vifp, ipha_t *ipha)
31400Sstevel@tonic-gate {
31410Sstevel@tonic-gate 	int prio;
31423448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
31435240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
31440Sstevel@tonic-gate 
31450Sstevel@tonic-gate 	/* Temporary hack; may add general packet classifier some day */
31460Sstevel@tonic-gate 
31470Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock));
31480Sstevel@tonic-gate 
31490Sstevel@tonic-gate 	/*
31500Sstevel@tonic-gate 	 * The UDP port space is divided up into four priority ranges:
31510Sstevel@tonic-gate 	 * [0, 16384)	: unclassified - lowest priority
31520Sstevel@tonic-gate 	 * [16384, 32768)	: audio - highest priority
31530Sstevel@tonic-gate 	 * [32768, 49152)	: whiteboard - medium priority
31540Sstevel@tonic-gate 	 * [49152, 65536)	: video - low priority
31550Sstevel@tonic-gate 	 */
31560Sstevel@tonic-gate 
31570Sstevel@tonic-gate 	if (ipha->ipha_protocol == IPPROTO_UDP) {
31580Sstevel@tonic-gate 		struct udphdr *udp =
31590Sstevel@tonic-gate 		    (struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha));
31600Sstevel@tonic-gate 		switch (ntohs(udp->uh_dport) & 0xc000) {
31610Sstevel@tonic-gate 		case 0x4000:
31620Sstevel@tonic-gate 			prio = 70;
31630Sstevel@tonic-gate 			break;
31640Sstevel@tonic-gate 		case 0x8000:
31650Sstevel@tonic-gate 			prio = 60;
31660Sstevel@tonic-gate 			break;
31670Sstevel@tonic-gate 		case 0xc000:
31680Sstevel@tonic-gate 			prio = 55;
31690Sstevel@tonic-gate 			break;
31700Sstevel@tonic-gate 		default:
31710Sstevel@tonic-gate 			prio = 50;
31720Sstevel@tonic-gate 			break;
31730Sstevel@tonic-gate 		}
31743448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
31755240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
31760Sstevel@tonic-gate 			    "priority: port %x prio %d\n",
31770Sstevel@tonic-gate 			    ntohs(udp->uh_dport), prio);
31780Sstevel@tonic-gate 		}
31790Sstevel@tonic-gate 	} else
31800Sstevel@tonic-gate 		prio = 50;  /* default priority */
31810Sstevel@tonic-gate 	return (prio);
31820Sstevel@tonic-gate }
31830Sstevel@tonic-gate 
31840Sstevel@tonic-gate /*
31850Sstevel@tonic-gate  * End of token bucket filter modifications
31860Sstevel@tonic-gate  */
31870Sstevel@tonic-gate 
31880Sstevel@tonic-gate 
31890Sstevel@tonic-gate 
31900Sstevel@tonic-gate /*
31910Sstevel@tonic-gate  * Produces data for netstat -M.
31920Sstevel@tonic-gate  */
31930Sstevel@tonic-gate int
31943448Sdh155122 ip_mroute_stats(mblk_t *mp, ip_stack_t *ipst)
31950Sstevel@tonic-gate {
31963448Sdh155122 	ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
31973448Sdh155122 	ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
31983448Sdh155122 	if (!snmp_append_data(mp, (char *)ipst->ips_mrtstat,
31993448Sdh155122 		sizeof (struct mrtstat))) {
32000Sstevel@tonic-gate 		ip0dbg(("ip_mroute_stats: failed %ld bytes\n",
32013448Sdh155122 		    (size_t)sizeof (struct mrtstat)));
32020Sstevel@tonic-gate 		return (0);
32030Sstevel@tonic-gate 	}
32040Sstevel@tonic-gate 	return (1);
32050Sstevel@tonic-gate }
32060Sstevel@tonic-gate 
32070Sstevel@tonic-gate /*
32080Sstevel@tonic-gate  * Sends info for SNMP's MIB.
32090Sstevel@tonic-gate  */
32100Sstevel@tonic-gate int
32113448Sdh155122 ip_mroute_vif(mblk_t *mp, ip_stack_t *ipst)
32120Sstevel@tonic-gate {
32130Sstevel@tonic-gate 	struct vifctl 	vi;
32140Sstevel@tonic-gate 	vifi_t		vifi;
32150Sstevel@tonic-gate 
32163448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
32173448Sdh155122 	for (vifi = 0; vifi < ipst->ips_numvifs; vifi++) {
32183448Sdh155122 		if (ipst->ips_vifs[vifi].v_lcl_addr.s_addr == 0)
32190Sstevel@tonic-gate 			continue;
32200Sstevel@tonic-gate 		/*
32210Sstevel@tonic-gate 		 * No locks here, an approximation is fine.
32220Sstevel@tonic-gate 		 */
32230Sstevel@tonic-gate 		vi.vifc_vifi = vifi;
32243448Sdh155122 		vi.vifc_flags = ipst->ips_vifs[vifi].v_flags;
32253448Sdh155122 		vi.vifc_threshold = ipst->ips_vifs[vifi].v_threshold;
32263448Sdh155122 		vi.vifc_rate_limit	= ipst->ips_vifs[vifi].v_rate_limit;
32273448Sdh155122 		vi.vifc_lcl_addr	= ipst->ips_vifs[vifi].v_lcl_addr;
32283448Sdh155122 		vi.vifc_rmt_addr	= ipst->ips_vifs[vifi].v_rmt_addr;
32293448Sdh155122 		vi.vifc_pkt_in		= ipst->ips_vifs[vifi].v_pkt_in;
32303448Sdh155122 		vi.vifc_pkt_out		= ipst->ips_vifs[vifi].v_pkt_out;
32310Sstevel@tonic-gate 
32320Sstevel@tonic-gate 		if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) {
32330Sstevel@tonic-gate 			ip0dbg(("ip_mroute_vif: failed %ld bytes\n",
32340Sstevel@tonic-gate 			    (size_t)sizeof (vi)));
32350Sstevel@tonic-gate 			return (0);
32360Sstevel@tonic-gate 		}
32370Sstevel@tonic-gate 	}
32383448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
32390Sstevel@tonic-gate 	return (1);
32400Sstevel@tonic-gate }
32410Sstevel@tonic-gate 
32420Sstevel@tonic-gate /*
32430Sstevel@tonic-gate  * Called by ip_snmp_get to send up multicast routing table.
32440Sstevel@tonic-gate  */
32450Sstevel@tonic-gate int
32463448Sdh155122 ip_mroute_mrt(mblk_t *mp, ip_stack_t *ipst)
32470Sstevel@tonic-gate {
32480Sstevel@tonic-gate 	int			i, j;
32490Sstevel@tonic-gate 	struct mfc		*rt;
32500Sstevel@tonic-gate 	struct mfcctl	mfcc;
32510Sstevel@tonic-gate 
32520Sstevel@tonic-gate 	/*
32530Sstevel@tonic-gate 	 * Make sure multicast has not been turned off.
32540Sstevel@tonic-gate 	 */
32553448Sdh155122 	if (is_mrouter_off(ipst))
32560Sstevel@tonic-gate 		return (1);
32570Sstevel@tonic-gate 
32580Sstevel@tonic-gate 	/* Loop over all hash buckets and their chains */
32590Sstevel@tonic-gate 	for (i = 0; i < MFCTBLSIZ; i++) {
32603448Sdh155122 		MFCB_REFHOLD(&ipst->ips_mfcs[i]);
32613448Sdh155122 		for (rt = ipst->ips_mfcs[i].mfcb_mfc; rt; rt = rt->mfc_next) {
32620Sstevel@tonic-gate 			mutex_enter(&rt->mfc_mutex);
32630Sstevel@tonic-gate 			if (rt->mfc_rte != NULL ||
32640Sstevel@tonic-gate 			    (rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
32650Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
32660Sstevel@tonic-gate 				continue;
32670Sstevel@tonic-gate 			}
32680Sstevel@tonic-gate 			mfcc.mfcc_origin = rt->mfc_origin;
32690Sstevel@tonic-gate 			mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp;
32700Sstevel@tonic-gate 			mfcc.mfcc_parent = rt->mfc_parent;
32710Sstevel@tonic-gate 			mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt;
32723448Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
32733448Sdh155122 			for (j = 0; j < (int)ipst->ips_numvifs; j++)
32740Sstevel@tonic-gate 				mfcc.mfcc_ttls[j] = rt->mfc_ttls[j];
32753448Sdh155122 			for (j = (int)ipst->ips_numvifs; j < MAXVIFS; j++)
32760Sstevel@tonic-gate 				mfcc.mfcc_ttls[j] = 0;
32773448Sdh155122 			mutex_exit(&ipst->ips_numvifs_mutex);
32780Sstevel@tonic-gate 
32790Sstevel@tonic-gate 			mutex_exit(&rt->mfc_mutex);
32800Sstevel@tonic-gate 			if (!snmp_append_data(mp, (char *)&mfcc,
32810Sstevel@tonic-gate 			    sizeof (mfcc))) {
32823448Sdh155122 				MFCB_REFRELE(&ipst->ips_mfcs[i]);
32830Sstevel@tonic-gate 				ip0dbg(("ip_mroute_mrt: failed %ld bytes\n",
32840Sstevel@tonic-gate 				    (size_t)sizeof (mfcc)));
32850Sstevel@tonic-gate 				return (0);
32860Sstevel@tonic-gate 			}
32870Sstevel@tonic-gate 		}
32883448Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
32890Sstevel@tonic-gate 	}
32900Sstevel@tonic-gate 	return (1);
32910Sstevel@tonic-gate }
3292