xref: /onnv-gate/usr/src/uts/common/inet/ip/ip_mroute.c (revision 7240:c4957ab6a78e)
17098Smeem /*
27098Smeem  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
37098Smeem  * Use is subject to license terms.
47098Smeem  */
50Sstevel@tonic-gate /*
60Sstevel@tonic-gate  * CDDL HEADER START
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
91676Sjpk  * Common Development and Distribution License (the "License").
101676Sjpk  * You may not use this file except in compliance with the License.
110Sstevel@tonic-gate  *
120Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
130Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
140Sstevel@tonic-gate  * See the License for the specific language governing permissions
150Sstevel@tonic-gate  * and limitations under the License.
160Sstevel@tonic-gate  *
170Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
180Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
190Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
200Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
210Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
220Sstevel@tonic-gate  *
230Sstevel@tonic-gate  * CDDL HEADER END
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate /*
26*7240Srh87107  * Copyright 2008 Sun Microsystems, Inc.
270Sstevel@tonic-gate  * All rights reserved.  Use is subject to license terms.
280Sstevel@tonic-gate  */
290Sstevel@tonic-gate /* Copyright (c) 1990 Mentat Inc. */
300Sstevel@tonic-gate 
310Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
320Sstevel@tonic-gate 
330Sstevel@tonic-gate /*
340Sstevel@tonic-gate  * Procedures for the kernel part of DVMRP,
350Sstevel@tonic-gate  * a Distance-Vector Multicast Routing Protocol.
360Sstevel@tonic-gate  * (See RFC-1075)
370Sstevel@tonic-gate  * Written by David Waitzman, BBN Labs, August 1988.
380Sstevel@tonic-gate  * Modified by Steve Deering, Stanford, February 1989.
390Sstevel@tonic-gate  * Modified by Mark J. Steiglitz, Stanford, May, 1991
400Sstevel@tonic-gate  * Modified by Van Jacobson, LBL, January 1993
410Sstevel@tonic-gate  * Modified by Ajit Thyagarajan, PARC, August 1993
420Sstevel@tonic-gate  * Modified by Bill Fenner, PARC, April 1995
430Sstevel@tonic-gate  *
440Sstevel@tonic-gate  * MROUTING 3.5
450Sstevel@tonic-gate  */
460Sstevel@tonic-gate 
470Sstevel@tonic-gate /*
480Sstevel@tonic-gate  * TODO
490Sstevel@tonic-gate  * - function pointer field in vif, void *vif_sendit()
500Sstevel@tonic-gate  */
510Sstevel@tonic-gate 
520Sstevel@tonic-gate #include <sys/types.h>
530Sstevel@tonic-gate #include <sys/stream.h>
540Sstevel@tonic-gate #include <sys/stropts.h>
550Sstevel@tonic-gate #include <sys/strlog.h>
560Sstevel@tonic-gate #include <sys/systm.h>
570Sstevel@tonic-gate #include <sys/ddi.h>
580Sstevel@tonic-gate #include <sys/cmn_err.h>
590Sstevel@tonic-gate #include <sys/zone.h>
600Sstevel@tonic-gate 
610Sstevel@tonic-gate #include <sys/param.h>
620Sstevel@tonic-gate #include <sys/socket.h>
630Sstevel@tonic-gate #include <sys/vtrace.h>
640Sstevel@tonic-gate #include <sys/debug.h>
650Sstevel@tonic-gate #include <net/if.h>
660Sstevel@tonic-gate #include <sys/sockio.h>
670Sstevel@tonic-gate #include <netinet/in.h>
680Sstevel@tonic-gate #include <net/if_dl.h>
690Sstevel@tonic-gate 
700Sstevel@tonic-gate #include <inet/common.h>
710Sstevel@tonic-gate #include <inet/mi.h>
720Sstevel@tonic-gate #include <inet/nd.h>
730Sstevel@tonic-gate #include <inet/mib2.h>
740Sstevel@tonic-gate #include <netinet/ip6.h>
750Sstevel@tonic-gate #include <inet/ip.h>
760Sstevel@tonic-gate #include <inet/snmpcom.h>
770Sstevel@tonic-gate 
780Sstevel@tonic-gate #include <netinet/igmp.h>
790Sstevel@tonic-gate #include <netinet/igmp_var.h>
800Sstevel@tonic-gate #include <netinet/udp.h>
810Sstevel@tonic-gate #include <netinet/ip_mroute.h>
820Sstevel@tonic-gate #include <inet/ip_multi.h>
830Sstevel@tonic-gate #include <inet/ip_ire.h>
840Sstevel@tonic-gate #include <inet/ip_if.h>
850Sstevel@tonic-gate #include <inet/ipclassifier.h>
860Sstevel@tonic-gate 
870Sstevel@tonic-gate #include <netinet/pim.h>
880Sstevel@tonic-gate 
890Sstevel@tonic-gate 
900Sstevel@tonic-gate /*
910Sstevel@tonic-gate  * MT Design:
920Sstevel@tonic-gate  *
930Sstevel@tonic-gate  * There are three main data structures viftable, mfctable and tbftable that
940Sstevel@tonic-gate  * need to be protected against MT races.
950Sstevel@tonic-gate  *
960Sstevel@tonic-gate  * vitable is a fixed length array of vif structs. There is no lock to protect
970Sstevel@tonic-gate  * the whole array, instead each struct is protected by its own indiviual lock.
980Sstevel@tonic-gate  * The value of v_marks in conjuction with the value of v_refcnt determines the
990Sstevel@tonic-gate  * current state of a vif structure. One special state that needs mention
1000Sstevel@tonic-gate  * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates
1010Sstevel@tonic-gate  * that vif is being initalized.
1020Sstevel@tonic-gate  * Each structure is freed when the refcnt goes down to zero. If a delete comes
1030Sstevel@tonic-gate  * in when the the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED
1040Sstevel@tonic-gate  * which prevents the struct from further use.  When the refcnt goes to zero
1050Sstevel@tonic-gate  * the struct is freed and is marked VIF_MARK_NOTINUSE.
1060Sstevel@tonic-gate  * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill
1070Sstevel@tonic-gate  * from  going away a refhold is put on the ipif before using it. see
1080Sstevel@tonic-gate  * lock_good_vif() and unlock_good_vif().
1090Sstevel@tonic-gate  *
1100Sstevel@tonic-gate  * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts
1110Sstevel@tonic-gate  * of the vif struct.
1120Sstevel@tonic-gate  *
1130Sstevel@tonic-gate  * tbftable is also a fixed length array of tbf structs and is only accessed
1140Sstevel@tonic-gate  * via v_tbf.  It is protected by its own lock tbf_lock.
1150Sstevel@tonic-gate  *
1160Sstevel@tonic-gate  * Lock Ordering is
1170Sstevel@tonic-gate  * v_lock --> tbf_lock
1180Sstevel@tonic-gate  * v_lock --> ill_locK
1190Sstevel@tonic-gate  *
1200Sstevel@tonic-gate  * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb).
1210Sstevel@tonic-gate  * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker,
1220Sstevel@tonic-gate  * it also maintains a state. These fields are protected by a lock (mfcb_lock).
1230Sstevel@tonic-gate  * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to
1240Sstevel@tonic-gate  * protect the struct elements.
1250Sstevel@tonic-gate  *
1260Sstevel@tonic-gate  * mfc structs are dynamically allocated and are singly linked
1270Sstevel@tonic-gate  * at the head of the chain. When an mfc structure is to be deleted
1280Sstevel@tonic-gate  * it is marked condemned and so is the state in the bucket struct.
1290Sstevel@tonic-gate  * When the last walker of the hash bucket exits all the mfc structs
1300Sstevel@tonic-gate  * marked condemed are freed.
1310Sstevel@tonic-gate  *
1320Sstevel@tonic-gate  * Locking Hierarchy:
1330Sstevel@tonic-gate  * The bucket lock should be acquired before the mfc struct lock.
1340Sstevel@tonic-gate  * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking
1350Sstevel@tonic-gate  * operations on the bucket struct.
1360Sstevel@tonic-gate  *
1370Sstevel@tonic-gate  * last_encap_lock and numvifs_mutex should be acquired after
1380Sstevel@tonic-gate  * acquring vif or mfc locks. These locks protect some global variables.
1390Sstevel@tonic-gate  *
1400Sstevel@tonic-gate  * The statistics are not currently protected by a lock
1410Sstevel@tonic-gate  * causing the stats be be approximate, not exact.
1420Sstevel@tonic-gate  */
1430Sstevel@tonic-gate 
1440Sstevel@tonic-gate #define	NO_VIF	MAXVIFS 	/* from mrouted, no route for src */
1450Sstevel@tonic-gate 
1460Sstevel@tonic-gate /*
1470Sstevel@tonic-gate  * Timeouts:
1480Sstevel@tonic-gate  * 	Upcall timeouts - BSD uses boolean_t mfc->expire and
1490Sstevel@tonic-gate  *	nexpire[MFCTBLSIZE], the number of times expire has been called.
1500Sstevel@tonic-gate  *	SunOS 5.x uses mfc->timeout for each mfc.
1510Sstevel@tonic-gate  *	Some Unixes are limited in the number of simultaneous timeouts
1520Sstevel@tonic-gate  * 	that can be run, SunOS 5.x does not have this restriction.
1530Sstevel@tonic-gate  */
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate /*
1560Sstevel@tonic-gate  * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and
1570Sstevel@tonic-gate  * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall
1580Sstevel@tonic-gate  * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE
1590Sstevel@tonic-gate  */
1600Sstevel@tonic-gate #define		EXPIRE_TIMEOUT	(hz/4)	/* 4x / second	*/
1610Sstevel@tonic-gate #define		UPCALL_EXPIRE	6	/* number of timeouts	*/
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate /*
1640Sstevel@tonic-gate  * Hash function for a source, group entry
1650Sstevel@tonic-gate  */
1660Sstevel@tonic-gate #define	MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
1670Sstevel@tonic-gate 	((g) >> 20) ^ ((g) >> 10) ^ (g))
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate #define			TBF_REPROCESS	(hz / 100)	/* 100x /second	*/
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate /* Identify PIM packet that came on a Register interface */
1720Sstevel@tonic-gate #define	PIM_REGISTER_MARKER	0xffffffff
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate /* Function declarations */
1753448Sdh155122 static int	add_mfc(struct mfcctl *, ip_stack_t *);
1765240Snordmark static int	add_vif(struct vifctl *, conn_t *, mblk_t *, ip_stack_t *);
1773448Sdh155122 static int	del_mfc(struct mfcctl *, ip_stack_t *);
1785240Snordmark static int	del_vif(vifi_t *, conn_t *, mblk_t *, ip_stack_t *);
1790Sstevel@tonic-gate static void	del_vifp(struct vif *);
1800Sstevel@tonic-gate static void	encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
1810Sstevel@tonic-gate static void	expire_upcalls(void *);
1823448Sdh155122 static void	fill_route(struct mfc *, struct mfcctl *, ip_stack_t *);
1833448Sdh155122 static void	free_queue(struct mfc *);
1843448Sdh155122 static int	get_assert(uchar_t *, ip_stack_t *);
1853448Sdh155122 static int	get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *);
1863448Sdh155122 static int	get_sg_cnt(struct sioc_sg_req *, ip_stack_t *);
1870Sstevel@tonic-gate static int	get_version(uchar_t *);
1883448Sdh155122 static int	get_vif_cnt(struct sioc_vif_req *, ip_stack_t *);
1890Sstevel@tonic-gate static int	ip_mdq(mblk_t *, ipha_t *, ill_t *,
1900Sstevel@tonic-gate 		    ipaddr_t, struct mfc *);
1915240Snordmark static int	ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *);
1920Sstevel@tonic-gate static void	phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
1933448Sdh155122 static int	register_mforward(queue_t *, mblk_t *, ill_t *);
1940Sstevel@tonic-gate static void	register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
1953448Sdh155122 static int	set_assert(int *, ip_stack_t *);
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate /*
1980Sstevel@tonic-gate  * Token Bucket Filter functions
1990Sstevel@tonic-gate  */
2000Sstevel@tonic-gate static int  priority(struct vif *, ipha_t *);
2010Sstevel@tonic-gate static void tbf_control(struct vif *, mblk_t *, ipha_t *);
2020Sstevel@tonic-gate static int  tbf_dq_sel(struct vif *, ipha_t *);
2030Sstevel@tonic-gate static void tbf_process_q(struct vif *);
2040Sstevel@tonic-gate static void tbf_queue(struct vif *, mblk_t *);
2050Sstevel@tonic-gate static void tbf_reprocess_q(void *);
2060Sstevel@tonic-gate static void tbf_send_packet(struct vif *, mblk_t *);
2070Sstevel@tonic-gate static void tbf_update_tokens(struct vif *);
2080Sstevel@tonic-gate static void release_mfc(struct mfcb *);
2090Sstevel@tonic-gate 
2103448Sdh155122 static boolean_t is_mrouter_off(ip_stack_t *);
2110Sstevel@tonic-gate /*
2120Sstevel@tonic-gate  * Encapsulation packets
2130Sstevel@tonic-gate  */
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate #define	ENCAP_TTL	64
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate /* prototype IP hdr for encapsulated packets */
2180Sstevel@tonic-gate static ipha_t multicast_encap_iphdr = {
2190Sstevel@tonic-gate 	IP_SIMPLE_HDR_VERSION,
2200Sstevel@tonic-gate 	0,				/* tos */
2210Sstevel@tonic-gate 	sizeof (ipha_t),		/* total length */
2220Sstevel@tonic-gate 	0,				/* id */
2230Sstevel@tonic-gate 	0,				/* frag offset */
2240Sstevel@tonic-gate 	ENCAP_TTL, IPPROTO_ENCAP,
2250Sstevel@tonic-gate 	0,				/* checksum */
2260Sstevel@tonic-gate };
2270Sstevel@tonic-gate 
2280Sstevel@tonic-gate /*
2290Sstevel@tonic-gate  * Rate limit for assert notification messages, in nsec.
2300Sstevel@tonic-gate  */
2310Sstevel@tonic-gate #define	ASSERT_MSG_TIME		3000000000
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate #define	VIF_REFHOLD(vifp) {			\
2350Sstevel@tonic-gate 	mutex_enter(&(vifp)->v_lock);		\
2360Sstevel@tonic-gate 	(vifp)->v_refcnt++;			\
2370Sstevel@tonic-gate 	mutex_exit(&(vifp)->v_lock);		\
2380Sstevel@tonic-gate }
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate #define	VIF_REFRELE_LOCKED(vifp) {				\
2410Sstevel@tonic-gate 	(vifp)->v_refcnt--;					\
2420Sstevel@tonic-gate 	if ((vifp)->v_refcnt == 0 &&				\
2430Sstevel@tonic-gate 		((vifp)->v_marks & VIF_MARK_CONDEMNED)) {	\
2440Sstevel@tonic-gate 			del_vifp(vifp);				\
2450Sstevel@tonic-gate 	} else {						\
2460Sstevel@tonic-gate 		mutex_exit(&(vifp)->v_lock);			\
2470Sstevel@tonic-gate 	}							\
2480Sstevel@tonic-gate }
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate #define	VIF_REFRELE(vifp) {					\
2510Sstevel@tonic-gate 	mutex_enter(&(vifp)->v_lock);				\
2520Sstevel@tonic-gate 	(vifp)->v_refcnt--;					\
2530Sstevel@tonic-gate 	if ((vifp)->v_refcnt == 0 &&				\
2540Sstevel@tonic-gate 		((vifp)->v_marks & VIF_MARK_CONDEMNED)) {	\
2550Sstevel@tonic-gate 			del_vifp(vifp);				\
2560Sstevel@tonic-gate 	} else {						\
2570Sstevel@tonic-gate 		mutex_exit(&(vifp)->v_lock);			\
2580Sstevel@tonic-gate 	}							\
2590Sstevel@tonic-gate }
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate #define	MFCB_REFHOLD(mfcb) {				\
2620Sstevel@tonic-gate 	mutex_enter(&(mfcb)->mfcb_lock);		\
2630Sstevel@tonic-gate 	(mfcb)->mfcb_refcnt++;				\
2640Sstevel@tonic-gate 	ASSERT((mfcb)->mfcb_refcnt != 0);		\
2650Sstevel@tonic-gate 	mutex_exit(&(mfcb)->mfcb_lock);			\
2660Sstevel@tonic-gate }
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate #define	MFCB_REFRELE(mfcb) {					\
2690Sstevel@tonic-gate 	mutex_enter(&(mfcb)->mfcb_lock);			\
2700Sstevel@tonic-gate 	ASSERT((mfcb)->mfcb_refcnt != 0);			\
2710Sstevel@tonic-gate 	if (--(mfcb)->mfcb_refcnt == 0 &&			\
2720Sstevel@tonic-gate 		((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) {	\
2730Sstevel@tonic-gate 			release_mfc(mfcb);			\
2740Sstevel@tonic-gate 	}							\
2750Sstevel@tonic-gate 	mutex_exit(&(mfcb)->mfcb_lock);				\
2760Sstevel@tonic-gate }
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate /*
2790Sstevel@tonic-gate  * MFCFIND:
2800Sstevel@tonic-gate  * Find a route for a given origin IP address and multicast group address.
2810Sstevel@tonic-gate  * Skip entries with pending upcalls.
2820Sstevel@tonic-gate  * Type of service parameter to be added in the future!
2830Sstevel@tonic-gate  */
2840Sstevel@tonic-gate #define	MFCFIND(mfcbp, o, g, rt) { \
2850Sstevel@tonic-gate 	struct mfc *_mb_rt = NULL; \
2860Sstevel@tonic-gate 	rt = NULL; \
2870Sstevel@tonic-gate 	_mb_rt = mfcbp->mfcb_mfc; \
2880Sstevel@tonic-gate 	while (_mb_rt) { \
2890Sstevel@tonic-gate 		if ((_mb_rt->mfc_origin.s_addr == o) && \
2900Sstevel@tonic-gate 		    (_mb_rt->mfc_mcastgrp.s_addr == g) && \
2910Sstevel@tonic-gate 		    (_mb_rt->mfc_rte == NULL) && \
2920Sstevel@tonic-gate 		    (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) {        \
2930Sstevel@tonic-gate 		    rt = _mb_rt; \
2940Sstevel@tonic-gate 		    break; \
2950Sstevel@tonic-gate 		} \
2960Sstevel@tonic-gate 	_mb_rt = _mb_rt->mfc_next; \
2970Sstevel@tonic-gate 	} \
2980Sstevel@tonic-gate }
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate /*
3010Sstevel@tonic-gate  * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime()
3020Sstevel@tonic-gate  * are inefficient. We use gethrestime() which returns a timespec_t with
3030Sstevel@tonic-gate  * sec and nsec, the resolution is machine dependent.
3040Sstevel@tonic-gate  * The following 2 macros have been changed to use nsec instead of usec.
3050Sstevel@tonic-gate  */
3060Sstevel@tonic-gate /*
3070Sstevel@tonic-gate  * Macros to compute elapsed time efficiently.
3080Sstevel@tonic-gate  * Borrowed from Van Jacobson's scheduling code.
3090Sstevel@tonic-gate  * Delta should be a hrtime_t.
3100Sstevel@tonic-gate  */
3110Sstevel@tonic-gate #define	TV_DELTA(a, b, delta) { \
3120Sstevel@tonic-gate 	int xxs; \
3130Sstevel@tonic-gate  \
3140Sstevel@tonic-gate 	delta = (a).tv_nsec - (b).tv_nsec; \
3150Sstevel@tonic-gate 	if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
3160Sstevel@tonic-gate 		switch (xxs) { \
3170Sstevel@tonic-gate 		case 2: \
3180Sstevel@tonic-gate 		    delta += 1000000000; \
3190Sstevel@tonic-gate 		    /*FALLTHROUGH*/ \
3200Sstevel@tonic-gate 		case 1: \
3210Sstevel@tonic-gate 		    delta += 1000000000; \
3220Sstevel@tonic-gate 		    break; \
3230Sstevel@tonic-gate 		default: \
3240Sstevel@tonic-gate 		    delta += (1000000000 * xxs); \
3250Sstevel@tonic-gate 		} \
3260Sstevel@tonic-gate 	} \
3270Sstevel@tonic-gate }
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate #define	TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \
3300Sstevel@tonic-gate 	(a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
3310Sstevel@tonic-gate 
3320Sstevel@tonic-gate /*
3330Sstevel@tonic-gate  * Handle MRT setsockopt commands to modify the multicast routing tables.
3340Sstevel@tonic-gate  */
3350Sstevel@tonic-gate int
3360Sstevel@tonic-gate ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data,
3370Sstevel@tonic-gate     int datalen, mblk_t *first_mp)
3380Sstevel@tonic-gate {
3395240Snordmark 	conn_t		*connp = Q_TO_CONN(q);
3405240Snordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
3413448Sdh155122 
3423448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
3435240Snordmark 	if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) {
3443448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
3450Sstevel@tonic-gate 		return (EACCES);
3460Sstevel@tonic-gate 	}
3473448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate 	if (checkonly) {
3500Sstevel@tonic-gate 		/*
3510Sstevel@tonic-gate 		 * do not do operation, just pretend to - new T_CHECK
3520Sstevel@tonic-gate 		 * Note: Even routines further on can probably fail but
3530Sstevel@tonic-gate 		 * this T_CHECK stuff is only to please XTI so it not
3540Sstevel@tonic-gate 		 * necessary to be perfect.
3550Sstevel@tonic-gate 		 */
3560Sstevel@tonic-gate 		switch (cmd) {
3570Sstevel@tonic-gate 		case MRT_INIT:
3580Sstevel@tonic-gate 		case MRT_DONE:
3590Sstevel@tonic-gate 		case MRT_ADD_VIF:
3600Sstevel@tonic-gate 		case MRT_DEL_VIF:
3610Sstevel@tonic-gate 		case MRT_ADD_MFC:
3620Sstevel@tonic-gate 		case MRT_DEL_MFC:
3630Sstevel@tonic-gate 		case MRT_ASSERT:
3645240Snordmark 			return (0);
3650Sstevel@tonic-gate 		default:
3665240Snordmark 			return (EOPNOTSUPP);
3670Sstevel@tonic-gate 		}
3680Sstevel@tonic-gate 	}
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate 	/*
3710Sstevel@tonic-gate 	 * make sure no command is issued after multicast routing has been
3720Sstevel@tonic-gate 	 * turned off.
3730Sstevel@tonic-gate 	 */
3740Sstevel@tonic-gate 	if (cmd != MRT_INIT && cmd != MRT_DONE) {
3753448Sdh155122 		if (is_mrouter_off(ipst))
3760Sstevel@tonic-gate 			return (EINVAL);
3770Sstevel@tonic-gate 	}
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 	switch (cmd) {
3805240Snordmark 	case MRT_INIT:	return (ip_mrouter_init(connp, data, datalen, ipst));
3813448Sdh155122 	case MRT_DONE:	return (ip_mrouter_done(first_mp, ipst));
3825240Snordmark 	case MRT_ADD_VIF:  return (add_vif((struct vifctl *)data, connp,
3835240Snordmark 			    first_mp, ipst));
3845240Snordmark 	case MRT_DEL_VIF:  return (del_vif((vifi_t *)data, connp, first_mp,
3855240Snordmark 			    ipst));
3863448Sdh155122 	case MRT_ADD_MFC:  return (add_mfc((struct mfcctl *)data, ipst));
3873448Sdh155122 	case MRT_DEL_MFC:  return (del_mfc((struct mfcctl *)data, ipst));
3883448Sdh155122 	case MRT_ASSERT:   return (set_assert((int *)data, ipst));
3890Sstevel@tonic-gate 	default:	   return (EOPNOTSUPP);
3900Sstevel@tonic-gate 	}
3910Sstevel@tonic-gate }
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate /*
3940Sstevel@tonic-gate  * Handle MRT getsockopt commands
3950Sstevel@tonic-gate  */
3960Sstevel@tonic-gate int
3970Sstevel@tonic-gate ip_mrouter_get(int cmd, queue_t *q, uchar_t *data)
3980Sstevel@tonic-gate {
3995240Snordmark 	conn_t		*connp = Q_TO_CONN(q);
4005240Snordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
4015240Snordmark 
4025240Snordmark 	if (connp != ipst->ips_ip_g_mrouter)
4030Sstevel@tonic-gate 		return (EACCES);
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 	switch (cmd) {
4060Sstevel@tonic-gate 	case MRT_VERSION:	return (get_version((uchar_t *)data));
4073448Sdh155122 	case MRT_ASSERT:	return (get_assert((uchar_t *)data, ipst));
4080Sstevel@tonic-gate 	default:		return (EOPNOTSUPP);
4090Sstevel@tonic-gate 	}
4100Sstevel@tonic-gate }
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate /*
4130Sstevel@tonic-gate  * Handle ioctl commands to obtain information from the cache.
4140Sstevel@tonic-gate  * Called with shared access to IP. These are read_only ioctls.
4150Sstevel@tonic-gate  */
4160Sstevel@tonic-gate /* ARGSUSED */
4170Sstevel@tonic-gate int
4180Sstevel@tonic-gate mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
4190Sstevel@tonic-gate     ip_ioctl_cmd_t *ipip, void *if_req)
4200Sstevel@tonic-gate {
4210Sstevel@tonic-gate 	mblk_t	*mp1;
4220Sstevel@tonic-gate 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
4235240Snordmark 	conn_t		*connp = Q_TO_CONN(q);
4245240Snordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate 	/* Existence verified in ip_wput_nondata */
4270Sstevel@tonic-gate 	mp1 = mp->b_cont->b_cont;
4280Sstevel@tonic-gate 
4290Sstevel@tonic-gate 	switch (iocp->ioc_cmd) {
4300Sstevel@tonic-gate 	case (SIOCGETVIFCNT):
4313448Sdh155122 		return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst));
4320Sstevel@tonic-gate 	case (SIOCGETSGCNT):
4333448Sdh155122 		return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst));
4340Sstevel@tonic-gate 	case (SIOCGETLSGCNT):
4353448Sdh155122 		return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst));
4360Sstevel@tonic-gate 	default:
4370Sstevel@tonic-gate 		return (EINVAL);
4380Sstevel@tonic-gate 	}
4390Sstevel@tonic-gate }
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate /*
4420Sstevel@tonic-gate  * Returns the packet, byte, rpf-failure count for the source, group provided.
4430Sstevel@tonic-gate  */
4440Sstevel@tonic-gate static int
4453448Sdh155122 get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst)
4460Sstevel@tonic-gate {
4470Sstevel@tonic-gate 	struct mfc *rt;
4480Sstevel@tonic-gate 	struct mfcb *mfcbp;
4490Sstevel@tonic-gate 
4503448Sdh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)];
4510Sstevel@tonic-gate 	MFCB_REFHOLD(mfcbp);
4520Sstevel@tonic-gate 	MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt);
4530Sstevel@tonic-gate 
4540Sstevel@tonic-gate 	if (rt != NULL) {
4550Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
4560Sstevel@tonic-gate 		req->pktcnt   = rt->mfc_pkt_cnt;
4570Sstevel@tonic-gate 		req->bytecnt  = rt->mfc_byte_cnt;
4580Sstevel@tonic-gate 		req->wrong_if = rt->mfc_wrong_if;
4590Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
4600Sstevel@tonic-gate 	} else
4610Sstevel@tonic-gate 		req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU;
4620Sstevel@tonic-gate 
4630Sstevel@tonic-gate 	MFCB_REFRELE(mfcbp);
4640Sstevel@tonic-gate 	return (0);
4650Sstevel@tonic-gate }
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate /*
4680Sstevel@tonic-gate  * Returns the packet, byte, rpf-failure count for the source, group provided.
4690Sstevel@tonic-gate  * Uses larger counters and IPv6 addresses.
4700Sstevel@tonic-gate  */
4710Sstevel@tonic-gate /* ARGSUSED XXX until implemented */
4720Sstevel@tonic-gate static int
4733448Sdh155122 get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst)
4740Sstevel@tonic-gate {
4750Sstevel@tonic-gate 	/* XXX TODO SIOCGETLSGCNT */
4760Sstevel@tonic-gate 	return (ENXIO);
4770Sstevel@tonic-gate }
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate /*
4800Sstevel@tonic-gate  * Returns the input and output packet and byte counts on the vif provided.
4810Sstevel@tonic-gate  */
4820Sstevel@tonic-gate static int
4833448Sdh155122 get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst)
4840Sstevel@tonic-gate {
4850Sstevel@tonic-gate 	vifi_t vifi = req->vifi;
4860Sstevel@tonic-gate 
4873448Sdh155122 	if (vifi >= ipst->ips_numvifs)
4880Sstevel@tonic-gate 		return (EINVAL);
4890Sstevel@tonic-gate 
4900Sstevel@tonic-gate 	/*
4910Sstevel@tonic-gate 	 * No locks here, an approximation is fine.
4920Sstevel@tonic-gate 	 */
4933448Sdh155122 	req->icount = ipst->ips_vifs[vifi].v_pkt_in;
4943448Sdh155122 	req->ocount = ipst->ips_vifs[vifi].v_pkt_out;
4953448Sdh155122 	req->ibytes = ipst->ips_vifs[vifi].v_bytes_in;
4963448Sdh155122 	req->obytes = ipst->ips_vifs[vifi].v_bytes_out;
4970Sstevel@tonic-gate 
4980Sstevel@tonic-gate 	return (0);
4990Sstevel@tonic-gate }
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate static int
5020Sstevel@tonic-gate get_version(uchar_t *data)
5030Sstevel@tonic-gate {
5040Sstevel@tonic-gate 	int *v = (int *)data;
5050Sstevel@tonic-gate 
5060Sstevel@tonic-gate 	*v = 0x0305;	/* XXX !!!! */
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 	return (0);
5090Sstevel@tonic-gate }
5100Sstevel@tonic-gate 
5110Sstevel@tonic-gate /*
5120Sstevel@tonic-gate  * Set PIM assert processing global.
5130Sstevel@tonic-gate  */
5140Sstevel@tonic-gate static int
5153448Sdh155122 set_assert(int *i, ip_stack_t *ipst)
5160Sstevel@tonic-gate {
5170Sstevel@tonic-gate 	if ((*i != 1) && (*i != 0))
5180Sstevel@tonic-gate 		return (EINVAL);
5190Sstevel@tonic-gate 
5203448Sdh155122 	ipst->ips_pim_assert = *i;
5210Sstevel@tonic-gate 
5220Sstevel@tonic-gate 	return (0);
5230Sstevel@tonic-gate }
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate /*
5260Sstevel@tonic-gate  * Get PIM assert processing global.
5270Sstevel@tonic-gate  */
5280Sstevel@tonic-gate static int
5293448Sdh155122 get_assert(uchar_t *data, ip_stack_t *ipst)
5300Sstevel@tonic-gate {
5310Sstevel@tonic-gate 	int *i = (int *)data;
5320Sstevel@tonic-gate 
5333448Sdh155122 	*i = ipst->ips_pim_assert;
5340Sstevel@tonic-gate 
5350Sstevel@tonic-gate 	return (0);
5360Sstevel@tonic-gate }
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate /*
5390Sstevel@tonic-gate  * Enable multicast routing.
5400Sstevel@tonic-gate  */
5410Sstevel@tonic-gate static int
5425240Snordmark ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst)
5430Sstevel@tonic-gate {
5440Sstevel@tonic-gate 	int	*v;
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 	if (data == NULL || (datalen != sizeof (int)))
5470Sstevel@tonic-gate 		return (ENOPROTOOPT);
5480Sstevel@tonic-gate 
5490Sstevel@tonic-gate 	v = (int *)data;
5500Sstevel@tonic-gate 	if (*v != 1)
5510Sstevel@tonic-gate 		return (ENOPROTOOPT);
5520Sstevel@tonic-gate 
5533448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
5543448Sdh155122 	if (ipst->ips_ip_g_mrouter != NULL) {
5553448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5560Sstevel@tonic-gate 		return (EADDRINUSE);
5570Sstevel@tonic-gate 	}
5580Sstevel@tonic-gate 
5595240Snordmark 	/*
5605240Snordmark 	 * MRT_INIT should only be allowed for RAW sockets, but we double
5615240Snordmark 	 * check.
5625240Snordmark 	 */
5635240Snordmark 	if (!IPCL_IS_RAWIP(connp)) {
5645240Snordmark 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5655240Snordmark 		return (EINVAL);
5665240Snordmark 	}
5675240Snordmark 
5685240Snordmark 	ipst->ips_ip_g_mrouter = connp;
5690Sstevel@tonic-gate 	connp->conn_multi_router = 1;
5700Sstevel@tonic-gate 	/* In order for tunnels to work we have to turn ip_g_forward on */
5713448Sdh155122 	if (!WE_ARE_FORWARDING(ipst)) {
5723448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
5735240Snordmark 			(void) mi_strlog(connp->conn_rq, 1, SL_TRACE,
5740Sstevel@tonic-gate 			    "ip_mrouter_init: turning on forwarding");
5750Sstevel@tonic-gate 		}
5763448Sdh155122 		ipst->ips_saved_ip_g_forward = ipst->ips_ip_g_forward;
5773448Sdh155122 		ipst->ips_ip_g_forward = IP_FORWARD_ALWAYS;
5780Sstevel@tonic-gate 	}
5790Sstevel@tonic-gate 
5803448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5810Sstevel@tonic-gate 	return (0);
5820Sstevel@tonic-gate }
5830Sstevel@tonic-gate 
5843448Sdh155122 void
5853448Sdh155122 ip_mrouter_stack_init(ip_stack_t *ipst)
5863448Sdh155122 {
5873448Sdh155122 	mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL);
5883448Sdh155122 
5893448Sdh155122 	ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1),
5903448Sdh155122 	    KM_SLEEP);
5913448Sdh155122 	ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP);
5923448Sdh155122 	/*
5933448Sdh155122 	 * mfctable:
5943448Sdh155122 	 * Includes all mfcs, including waiting upcalls.
5953448Sdh155122 	 * Multiple mfcs per bucket.
5963448Sdh155122 	 */
5973448Sdh155122 	ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ,
5983448Sdh155122 	    KM_SLEEP);
5993448Sdh155122 	/*
6003448Sdh155122 	 * Define the token bucket filter structures.
6013448Sdh155122 	 * tbftable -> each vif has one of these for storing info.
6023448Sdh155122 	 */
6033448Sdh155122 	ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP);
6043448Sdh155122 
6053448Sdh155122 	mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL);
6063448Sdh155122 
6073448Sdh155122 	ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
6083448Sdh155122 	ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
6093448Sdh155122 }
6103448Sdh155122 
6110Sstevel@tonic-gate /*
6120Sstevel@tonic-gate  * Disable multicast routing.
6130Sstevel@tonic-gate  * Didn't use global timeout_val (BSD version), instead check the mfctable.
6140Sstevel@tonic-gate  */
6150Sstevel@tonic-gate int
6163448Sdh155122 ip_mrouter_done(mblk_t *mp, ip_stack_t *ipst)
6170Sstevel@tonic-gate {
6185240Snordmark 	conn_t		*mrouter;
6190Sstevel@tonic-gate 	vifi_t 		vifi;
6200Sstevel@tonic-gate 	struct mfc	*mfc_rt;
6210Sstevel@tonic-gate 	int		i;
6220Sstevel@tonic-gate 
6233448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
6243448Sdh155122 	if (ipst->ips_ip_g_mrouter == NULL) {
6253448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
6260Sstevel@tonic-gate 		return (EINVAL);
6270Sstevel@tonic-gate 	}
6280Sstevel@tonic-gate 
6295240Snordmark 	mrouter = ipst->ips_ip_g_mrouter;
6303448Sdh155122 
6313448Sdh155122 	if (ipst->ips_saved_ip_g_forward != -1) {
6323448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
6335240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
6340Sstevel@tonic-gate 			    "ip_mrouter_done: turning off forwarding");
6350Sstevel@tonic-gate 		}
6363448Sdh155122 		ipst->ips_ip_g_forward = ipst->ips_saved_ip_g_forward;
6373448Sdh155122 		ipst->ips_saved_ip_g_forward = -1;
6380Sstevel@tonic-gate 	}
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate 	/*
6410Sstevel@tonic-gate 	 * Always clear cache when vifs change.
6423448Sdh155122 	 * No need to get ipst->ips_last_encap_lock since we are running as
6433448Sdh155122 	 * a writer.
6440Sstevel@tonic-gate 	 */
6453448Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
6463448Sdh155122 	ipst->ips_last_encap_src = 0;
6473448Sdh155122 	ipst->ips_last_encap_vif = NULL;
6483448Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
6495240Snordmark 	mrouter->conn_multi_router = 0;
6500Sstevel@tonic-gate 
6513448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate 	/*
6540Sstevel@tonic-gate 	 * For each phyint in use,
6550Sstevel@tonic-gate 	 * disable promiscuous reception of all IP multicasts.
6560Sstevel@tonic-gate 	 */
6570Sstevel@tonic-gate 	for (vifi = 0; vifi < MAXVIFS; vifi++) {
6583448Sdh155122 		struct vif *vifp = ipst->ips_vifs + vifi;
6590Sstevel@tonic-gate 
6600Sstevel@tonic-gate 		mutex_enter(&vifp->v_lock);
6610Sstevel@tonic-gate 		/*
6620Sstevel@tonic-gate 		 * if the vif is active mark it condemned.
6630Sstevel@tonic-gate 		 */
6640Sstevel@tonic-gate 		if (vifp->v_marks & VIF_MARK_GOOD) {
6650Sstevel@tonic-gate 			ASSERT(vifp->v_ipif != NULL);
6660Sstevel@tonic-gate 			ipif_refhold(vifp->v_ipif);
6670Sstevel@tonic-gate 			/* Phyint only */
6680Sstevel@tonic-gate 			if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
6690Sstevel@tonic-gate 				ipif_t *ipif = vifp->v_ipif;
6700Sstevel@tonic-gate 				ipsq_t  *ipsq;
6710Sstevel@tonic-gate 				boolean_t suc;
6720Sstevel@tonic-gate 				ill_t *ill;
6730Sstevel@tonic-gate 
6740Sstevel@tonic-gate 				ill = ipif->ipif_ill;
6750Sstevel@tonic-gate 				suc = B_FALSE;
6760Sstevel@tonic-gate 				if (mp == NULL) {
6770Sstevel@tonic-gate 					/*
6780Sstevel@tonic-gate 					 * being called from ip_close,
6790Sstevel@tonic-gate 					 * lets do it synchronously.
6800Sstevel@tonic-gate 					 * Clear VIF_MARK_GOOD and
6810Sstevel@tonic-gate 					 * set VIF_MARK_CONDEMNED.
6820Sstevel@tonic-gate 					 */
6830Sstevel@tonic-gate 					vifp->v_marks &= ~VIF_MARK_GOOD;
6840Sstevel@tonic-gate 					vifp->v_marks |= VIF_MARK_CONDEMNED;
6850Sstevel@tonic-gate 					mutex_exit(&(vifp)->v_lock);
6860Sstevel@tonic-gate 					suc = ipsq_enter(ill, B_FALSE);
6870Sstevel@tonic-gate 					ipsq = ill->ill_phyint->phyint_ipsq;
6880Sstevel@tonic-gate 				} else {
6890Sstevel@tonic-gate 					ipsq = ipsq_try_enter(ipif, NULL,
6905240Snordmark 					    mrouter->conn_wq, mp,
6910Sstevel@tonic-gate 					    ip_restart_optmgmt, NEW_OP, B_TRUE);
6920Sstevel@tonic-gate 					if (ipsq == NULL) {
6930Sstevel@tonic-gate 						mutex_exit(&(vifp)->v_lock);
6945240Snordmark 						ipif_refrele(ipif);
6950Sstevel@tonic-gate 						return (EINPROGRESS);
6960Sstevel@tonic-gate 					}
6970Sstevel@tonic-gate 					/*
6980Sstevel@tonic-gate 					 * Clear VIF_MARK_GOOD and
6990Sstevel@tonic-gate 					 * set VIF_MARK_CONDEMNED.
7000Sstevel@tonic-gate 					 */
7010Sstevel@tonic-gate 					vifp->v_marks &= ~VIF_MARK_GOOD;
7020Sstevel@tonic-gate 					vifp->v_marks |= VIF_MARK_CONDEMNED;
7035240Snordmark 					mutex_exit(&(vifp)->v_lock);
7040Sstevel@tonic-gate 					suc = B_TRUE;
7050Sstevel@tonic-gate 				}
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate 				if (suc) {
7080Sstevel@tonic-gate 					(void) ip_delmulti(INADDR_ANY, ipif,
7090Sstevel@tonic-gate 					    B_TRUE, B_TRUE);
7107098Smeem 					ipsq_exit(ipsq);
7110Sstevel@tonic-gate 				}
7120Sstevel@tonic-gate 				mutex_enter(&vifp->v_lock);
7130Sstevel@tonic-gate 			}
7140Sstevel@tonic-gate 			/*
7150Sstevel@tonic-gate 			 * decreases the refcnt added in add_vif.
7160Sstevel@tonic-gate 			 * and release v_lock.
7170Sstevel@tonic-gate 			 */
7180Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
7190Sstevel@tonic-gate 		} else {
7200Sstevel@tonic-gate 			mutex_exit(&vifp->v_lock);
7210Sstevel@tonic-gate 			continue;
7220Sstevel@tonic-gate 		}
7230Sstevel@tonic-gate 	}
7240Sstevel@tonic-gate 
7253448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
7263448Sdh155122 	ipst->ips_numvifs = 0;
7273448Sdh155122 	ipst->ips_pim_assert = 0;
7283448Sdh155122 	ipst->ips_reg_vif_num = ALL_VIFS;
7293448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 	/*
7320Sstevel@tonic-gate 	 * Free upcall msgs.
7330Sstevel@tonic-gate 	 * Go through mfctable and stop any outstanding upcall
7340Sstevel@tonic-gate 	 * timeouts remaining on mfcs.
7350Sstevel@tonic-gate 	 */
7360Sstevel@tonic-gate 	for (i = 0; i < MFCTBLSIZ; i++) {
7373448Sdh155122 		mutex_enter(&ipst->ips_mfcs[i].mfcb_lock);
7383448Sdh155122 		ipst->ips_mfcs[i].mfcb_refcnt++;
7393448Sdh155122 		ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED;
7403448Sdh155122 		mutex_exit(&ipst->ips_mfcs[i].mfcb_lock);
7413448Sdh155122 		mfc_rt = ipst->ips_mfcs[i].mfcb_mfc;
7420Sstevel@tonic-gate 		while (mfc_rt) {
7430Sstevel@tonic-gate 			/* Free upcalls */
7440Sstevel@tonic-gate 			mutex_enter(&mfc_rt->mfc_mutex);
7450Sstevel@tonic-gate 			if (mfc_rt->mfc_rte != NULL) {
7460Sstevel@tonic-gate 				if (mfc_rt->mfc_timeout_id != 0) {
7470Sstevel@tonic-gate 					/*
7480Sstevel@tonic-gate 					 * OK to drop the lock as we have
7490Sstevel@tonic-gate 					 * a refcnt on the bucket. timeout
7500Sstevel@tonic-gate 					 * can fire but it will see that
7510Sstevel@tonic-gate 					 * mfc_timeout_id == 0 and not do
7520Sstevel@tonic-gate 					 * anything. see expire_upcalls().
7530Sstevel@tonic-gate 					 */
7540Sstevel@tonic-gate 					mfc_rt->mfc_timeout_id = 0;
7550Sstevel@tonic-gate 					mutex_exit(&mfc_rt->mfc_mutex);
7560Sstevel@tonic-gate 					(void) untimeout(
7570Sstevel@tonic-gate 					    mfc_rt->mfc_timeout_id);
7580Sstevel@tonic-gate 						mfc_rt->mfc_timeout_id = 0;
7590Sstevel@tonic-gate 					mutex_enter(&mfc_rt->mfc_mutex);
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate 					/*
7620Sstevel@tonic-gate 					 * all queued upcall packets
7630Sstevel@tonic-gate 					 * and mblk will be freed in
7640Sstevel@tonic-gate 					 * release_mfc().
7650Sstevel@tonic-gate 					 */
7660Sstevel@tonic-gate 				}
7670Sstevel@tonic-gate 			}
7680Sstevel@tonic-gate 
7690Sstevel@tonic-gate 			mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
7720Sstevel@tonic-gate 			mfc_rt = mfc_rt->mfc_next;
7730Sstevel@tonic-gate 		}
7743448Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
7750Sstevel@tonic-gate 	}
7760Sstevel@tonic-gate 
7773448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
7783448Sdh155122 	ipst->ips_ip_g_mrouter = NULL;
7793448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
7800Sstevel@tonic-gate 	return (0);
7810Sstevel@tonic-gate }
7820Sstevel@tonic-gate 
7833448Sdh155122 void
7843448Sdh155122 ip_mrouter_stack_destroy(ip_stack_t *ipst)
7853448Sdh155122 {
7863448Sdh155122 	struct mfcb *mfcbp;
7873448Sdh155122 	struct mfc  *rt;
7883448Sdh155122 	int i;
7893448Sdh155122 
7903448Sdh155122 	for (i = 0; i < MFCTBLSIZ; i++) {
7913448Sdh155122 		mfcbp = &ipst->ips_mfcs[i];
7923448Sdh155122 
7933448Sdh155122 		while ((rt = mfcbp->mfcb_mfc) != NULL) {
7943448Sdh155122 			(void) printf("ip_mrouter_stack_destroy: free for %d\n",
7953448Sdh155122 			    i);
7963448Sdh155122 
7973448Sdh155122 			mfcbp->mfcb_mfc = rt->mfc_next;
7983448Sdh155122 			free_queue(rt);
7993448Sdh155122 			mi_free(rt);
8003448Sdh155122 		}
8013448Sdh155122 	}
8023448Sdh155122 	kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1));
8033448Sdh155122 	ipst->ips_vifs = NULL;
8043448Sdh155122 	kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat));
8053448Sdh155122 	ipst->ips_mrtstat = NULL;
8063448Sdh155122 	kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ);
8073448Sdh155122 	ipst->ips_mfcs = NULL;
8083448Sdh155122 	kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS);
8093448Sdh155122 	ipst->ips_tbfs = NULL;
8103448Sdh155122 
8113448Sdh155122 	mutex_destroy(&ipst->ips_last_encap_lock);
8123448Sdh155122 	mutex_destroy(&ipst->ips_ip_g_mrouter_mutex);
8133448Sdh155122 }
8143448Sdh155122 
8150Sstevel@tonic-gate static boolean_t
8163448Sdh155122 is_mrouter_off(ip_stack_t *ipst)
8170Sstevel@tonic-gate {
8185240Snordmark 	conn_t	*mrouter;
8190Sstevel@tonic-gate 
8203448Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
8213448Sdh155122 	if (ipst->ips_ip_g_mrouter == NULL) {
8223448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
8230Sstevel@tonic-gate 		return (B_TRUE);
8240Sstevel@tonic-gate 	}
8250Sstevel@tonic-gate 
8265240Snordmark 	mrouter = ipst->ips_ip_g_mrouter;
8275240Snordmark 	if (mrouter->conn_multi_router == 0) {
8283448Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
8290Sstevel@tonic-gate 		return (B_TRUE);
8300Sstevel@tonic-gate 	}
8313448Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
8320Sstevel@tonic-gate 	return (B_FALSE);
8330Sstevel@tonic-gate }
8340Sstevel@tonic-gate 
8350Sstevel@tonic-gate static void
8360Sstevel@tonic-gate unlock_good_vif(struct vif *vifp)
8370Sstevel@tonic-gate {
8380Sstevel@tonic-gate 	ASSERT(vifp->v_ipif != NULL);
8390Sstevel@tonic-gate 	ipif_refrele(vifp->v_ipif);
8400Sstevel@tonic-gate 	VIF_REFRELE(vifp);
8410Sstevel@tonic-gate }
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate static boolean_t
8440Sstevel@tonic-gate lock_good_vif(struct vif *vifp)
8450Sstevel@tonic-gate {
8460Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
8470Sstevel@tonic-gate 	if (!(vifp->v_marks & VIF_MARK_GOOD)) {
8480Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
8490Sstevel@tonic-gate 		return (B_FALSE);
8500Sstevel@tonic-gate 	}
8510Sstevel@tonic-gate 
8520Sstevel@tonic-gate 	ASSERT(vifp->v_ipif != NULL);
8530Sstevel@tonic-gate 	mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock);
8540Sstevel@tonic-gate 	if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) {
8550Sstevel@tonic-gate 		mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
8560Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
8570Sstevel@tonic-gate 		return (B_FALSE);
8580Sstevel@tonic-gate 	}
8590Sstevel@tonic-gate 	ipif_refhold_locked(vifp->v_ipif);
8600Sstevel@tonic-gate 	mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
8610Sstevel@tonic-gate 	vifp->v_refcnt++;
8620Sstevel@tonic-gate 	mutex_exit(&vifp->v_lock);
8630Sstevel@tonic-gate 	return (B_TRUE);
8640Sstevel@tonic-gate }
8650Sstevel@tonic-gate 
8660Sstevel@tonic-gate /*
8670Sstevel@tonic-gate  * Add a vif to the vif table.
8680Sstevel@tonic-gate  */
8690Sstevel@tonic-gate static int
8705240Snordmark add_vif(struct vifctl *vifcp, conn_t *connp, mblk_t *first_mp, ip_stack_t *ipst)
8710Sstevel@tonic-gate {
8723448Sdh155122 	struct vif	*vifp = ipst->ips_vifs + vifcp->vifc_vifi;
8730Sstevel@tonic-gate 	ipif_t		*ipif;
8740Sstevel@tonic-gate 	int		error;
8753448Sdh155122 	struct tbf	*v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi;
8760Sstevel@tonic-gate 	ipsq_t  	*ipsq;
8775240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
8780Sstevel@tonic-gate 
8790Sstevel@tonic-gate 	ASSERT(connp != NULL);
8800Sstevel@tonic-gate 
8810Sstevel@tonic-gate 	if (vifcp->vifc_vifi >= MAXVIFS)
8820Sstevel@tonic-gate 		return (EINVAL);
8830Sstevel@tonic-gate 
8843448Sdh155122 	if (is_mrouter_off(ipst))
8850Sstevel@tonic-gate 		return (EINVAL);
8860Sstevel@tonic-gate 
8870Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
8880Sstevel@tonic-gate 	/*
8890Sstevel@tonic-gate 	 * Viftable entry should be 0.
8900Sstevel@tonic-gate 	 * if v_marks == 0 but v_refcnt != 0 means struct is being
8910Sstevel@tonic-gate 	 * initialized.
8920Sstevel@tonic-gate 	 *
8930Sstevel@tonic-gate 	 * Also note that it is very unlikely that we will get a MRT_ADD_VIF
8940Sstevel@tonic-gate 	 * request while the delete is in progress, mrouted only sends add
8950Sstevel@tonic-gate 	 * requests when a new interface is added and the new interface cannot
8960Sstevel@tonic-gate 	 * have the same vifi as an existing interface. We make sure that
8970Sstevel@tonic-gate 	 * ill_delete will block till the vif is deleted by adding a refcnt
8980Sstevel@tonic-gate 	 * to ipif in del_vif().
8990Sstevel@tonic-gate 	 */
9000Sstevel@tonic-gate 	if (vifp->v_lcl_addr.s_addr != 0 ||
9010Sstevel@tonic-gate 	    vifp->v_marks != 0 ||
9020Sstevel@tonic-gate 	    vifp->v_refcnt != 0) {
9030Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
9040Sstevel@tonic-gate 		return (EADDRINUSE);
9050Sstevel@tonic-gate 	}
9060Sstevel@tonic-gate 
9070Sstevel@tonic-gate 	/* Incoming vif should not be 0 */
9080Sstevel@tonic-gate 	if (vifcp->vifc_lcl_addr.s_addr == 0) {
9090Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
9100Sstevel@tonic-gate 		return (EINVAL);
9110Sstevel@tonic-gate 	}
9120Sstevel@tonic-gate 
9130Sstevel@tonic-gate 	vifp->v_refcnt++;
9140Sstevel@tonic-gate 	mutex_exit(&vifp->v_lock);
9150Sstevel@tonic-gate 	/* Find the interface with the local address */
9160Sstevel@tonic-gate 	ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL,
9170Sstevel@tonic-gate 	    connp->conn_zoneid, CONNP_TO_WQ(connp), first_mp,
9183448Sdh155122 	    ip_restart_optmgmt, &error, ipst);
9190Sstevel@tonic-gate 	if (ipif == NULL) {
9200Sstevel@tonic-gate 		VIF_REFRELE(vifp);
9210Sstevel@tonic-gate 		if (error == EINPROGRESS)
9220Sstevel@tonic-gate 			return (error);
9230Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
9240Sstevel@tonic-gate 	}
9250Sstevel@tonic-gate 
9260Sstevel@tonic-gate 	/*
9270Sstevel@tonic-gate 	 * We have to be exclusive as we have to call ip_addmulti()
9280Sstevel@tonic-gate 	 * This is the best position to try to be exclusive in case
9290Sstevel@tonic-gate 	 * we have to wait.
9300Sstevel@tonic-gate 	 */
9310Sstevel@tonic-gate 	ipsq = ipsq_try_enter(ipif, NULL, CONNP_TO_WQ(connp), first_mp,
9320Sstevel@tonic-gate 	    ip_restart_optmgmt, NEW_OP, B_TRUE);
9330Sstevel@tonic-gate 	if ((ipsq) == NULL) {
9340Sstevel@tonic-gate 		VIF_REFRELE(vifp);
9350Sstevel@tonic-gate 		ipif_refrele(ipif);
9360Sstevel@tonic-gate 		return (EINPROGRESS);
9370Sstevel@tonic-gate 	}
9380Sstevel@tonic-gate 
9393448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
9405240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
9410Sstevel@tonic-gate 		    "add_vif: src 0x%x enter",
9420Sstevel@tonic-gate 		    vifcp->vifc_lcl_addr.s_addr);
9430Sstevel@tonic-gate 	}
9440Sstevel@tonic-gate 
9450Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
9460Sstevel@tonic-gate 	/*
9470Sstevel@tonic-gate 	 * Always clear cache when vifs change.
9480Sstevel@tonic-gate 	 * Needed to ensure that src isn't left over from before vif was added.
9490Sstevel@tonic-gate 	 * No need to get last_encap_lock, since we are running as a writer.
9500Sstevel@tonic-gate 	 */
9510Sstevel@tonic-gate 
9523448Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
9533448Sdh155122 	ipst->ips_last_encap_src = 0;
9543448Sdh155122 	ipst->ips_last_encap_vif = NULL;
9553448Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
9560Sstevel@tonic-gate 
9570Sstevel@tonic-gate 	if (vifcp->vifc_flags & VIFF_TUNNEL) {
9580Sstevel@tonic-gate 		if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) {
9590Sstevel@tonic-gate 			cmn_err(CE_WARN,
9600Sstevel@tonic-gate 			    "add_vif: source route tunnels not supported\n");
9610Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
9620Sstevel@tonic-gate 			ipif_refrele(ipif);
9637098Smeem 			ipsq_exit(ipsq);
9640Sstevel@tonic-gate 			return (EOPNOTSUPP);
9650Sstevel@tonic-gate 		}
9660Sstevel@tonic-gate 		vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
9670Sstevel@tonic-gate 
9680Sstevel@tonic-gate 	} else {
9690Sstevel@tonic-gate 		/* Phyint or Register vif */
9700Sstevel@tonic-gate 		if (vifcp->vifc_flags & VIFF_REGISTER) {
9710Sstevel@tonic-gate 			/*
9720Sstevel@tonic-gate 			 * Note: Since all IPPROTO_IP level options (including
9730Sstevel@tonic-gate 			 * MRT_ADD_VIF) are done exclusively via
9740Sstevel@tonic-gate 			 * ip_optmgmt_writer(), a lock is not necessary to
9750Sstevel@tonic-gate 			 * protect reg_vif_num.
9760Sstevel@tonic-gate 			 */
9773448Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
9783448Sdh155122 			if (ipst->ips_reg_vif_num == ALL_VIFS) {
9793448Sdh155122 				ipst->ips_reg_vif_num = vifcp->vifc_vifi;
9803448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9810Sstevel@tonic-gate 			} else {
9823448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9830Sstevel@tonic-gate 				VIF_REFRELE_LOCKED(vifp);
9840Sstevel@tonic-gate 				ipif_refrele(ipif);
9857098Smeem 				ipsq_exit(ipsq);
9860Sstevel@tonic-gate 				return (EADDRINUSE);
9870Sstevel@tonic-gate 			}
9880Sstevel@tonic-gate 		}
9890Sstevel@tonic-gate 
9900Sstevel@tonic-gate 		/* Make sure the interface supports multicast */
9910Sstevel@tonic-gate 		if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) {
9920Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
9930Sstevel@tonic-gate 			ipif_refrele(ipif);
9940Sstevel@tonic-gate 			if (vifcp->vifc_flags & VIFF_REGISTER) {
9953448Sdh155122 				mutex_enter(&ipst->ips_numvifs_mutex);
9963448Sdh155122 				ipst->ips_reg_vif_num = ALL_VIFS;
9973448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9980Sstevel@tonic-gate 			}
9997098Smeem 			ipsq_exit(ipsq);
10000Sstevel@tonic-gate 			return (EOPNOTSUPP);
10010Sstevel@tonic-gate 		}
10020Sstevel@tonic-gate 		/* Enable promiscuous reception of all IP mcasts from the if */
10030Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
10040Sstevel@tonic-gate 		error = ip_addmulti(INADDR_ANY, ipif, ILGSTAT_NONE,
10050Sstevel@tonic-gate 		    MODE_IS_EXCLUDE, NULL);
10060Sstevel@tonic-gate 		mutex_enter(&vifp->v_lock);
10070Sstevel@tonic-gate 		/*
10080Sstevel@tonic-gate 		 * since we released the lock lets make sure that
10090Sstevel@tonic-gate 		 * ip_mrouter_done() has not been called.
10100Sstevel@tonic-gate 		 */
10113448Sdh155122 		if (error != 0 || is_mrouter_off(ipst)) {
10120Sstevel@tonic-gate 			if (error == 0)
10130Sstevel@tonic-gate 				(void) ip_delmulti(INADDR_ANY, ipif, B_TRUE,
10140Sstevel@tonic-gate 				    B_TRUE);
10150Sstevel@tonic-gate 			if (vifcp->vifc_flags & VIFF_REGISTER) {
10163448Sdh155122 				mutex_enter(&ipst->ips_numvifs_mutex);
10173448Sdh155122 				ipst->ips_reg_vif_num = ALL_VIFS;
10183448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
10190Sstevel@tonic-gate 			}
10200Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
10210Sstevel@tonic-gate 			ipif_refrele(ipif);
10227098Smeem 			ipsq_exit(ipsq);
10230Sstevel@tonic-gate 			return (error?error:EINVAL);
10240Sstevel@tonic-gate 		}
10250Sstevel@tonic-gate 	}
10260Sstevel@tonic-gate 	/* Define parameters for the tbf structure */
10270Sstevel@tonic-gate 	vifp->v_tbf = v_tbf;
10280Sstevel@tonic-gate 	gethrestime(&vifp->v_tbf->tbf_last_pkt_t);
10290Sstevel@tonic-gate 	vifp->v_tbf->tbf_n_tok = 0;
10300Sstevel@tonic-gate 	vifp->v_tbf->tbf_q_len = 0;
10310Sstevel@tonic-gate 	vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
10320Sstevel@tonic-gate 	vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate 	vifp->v_flags = vifcp->vifc_flags;
10350Sstevel@tonic-gate 	vifp->v_threshold = vifcp->vifc_threshold;
10360Sstevel@tonic-gate 	vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
10370Sstevel@tonic-gate 	vifp->v_ipif = ipif;
10380Sstevel@tonic-gate 	ipif_refrele(ipif);
10390Sstevel@tonic-gate 	/* Scaling up here, allows division by 1024 in critical code.	*/
10400Sstevel@tonic-gate 	vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000);
10410Sstevel@tonic-gate 	vifp->v_timeout_id = 0;
10420Sstevel@tonic-gate 	/* initialize per vif pkt counters */
10430Sstevel@tonic-gate 	vifp->v_pkt_in = 0;
10440Sstevel@tonic-gate 	vifp->v_pkt_out = 0;
10450Sstevel@tonic-gate 	vifp->v_bytes_in = 0;
10460Sstevel@tonic-gate 	vifp->v_bytes_out = 0;
10470Sstevel@tonic-gate 	mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL);
10480Sstevel@tonic-gate 
10490Sstevel@tonic-gate 	/* Adjust numvifs up, if the vifi is higher than numvifs */
10503448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
10513448Sdh155122 	if (ipst->ips_numvifs <= vifcp->vifc_vifi)
10523448Sdh155122 		ipst->ips_numvifs = vifcp->vifc_vifi + 1;
10533448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
10543448Sdh155122 
10553448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
10565240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
10570Sstevel@tonic-gate 		    "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d",
10580Sstevel@tonic-gate 		    vifcp->vifc_vifi,
10590Sstevel@tonic-gate 		    ntohl(vifcp->vifc_lcl_addr.s_addr),
10600Sstevel@tonic-gate 		    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
10610Sstevel@tonic-gate 		    ntohl(vifcp->vifc_rmt_addr.s_addr),
10620Sstevel@tonic-gate 		    vifcp->vifc_threshold, vifcp->vifc_rate_limit);
10630Sstevel@tonic-gate 	}
10640Sstevel@tonic-gate 
10650Sstevel@tonic-gate 	vifp->v_marks = VIF_MARK_GOOD;
10660Sstevel@tonic-gate 	mutex_exit(&vifp->v_lock);
10677098Smeem 	ipsq_exit(ipsq);
10680Sstevel@tonic-gate 	return (0);
10690Sstevel@tonic-gate }
10700Sstevel@tonic-gate 
10710Sstevel@tonic-gate 
10720Sstevel@tonic-gate /* Delete a vif from the vif table. */
10730Sstevel@tonic-gate static void
10740Sstevel@tonic-gate del_vifp(struct vif *vifp)
10750Sstevel@tonic-gate {
10760Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
10770Sstevel@tonic-gate 	mblk_t  *mp0;
10780Sstevel@tonic-gate 	vifi_t  vifi;
10793448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
10805240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
10810Sstevel@tonic-gate 
10820Sstevel@tonic-gate 	ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED);
10830Sstevel@tonic-gate 	ASSERT(t != NULL);
10840Sstevel@tonic-gate 
10850Sstevel@tonic-gate 	/*
10860Sstevel@tonic-gate 	 * release the ref we put in vif_del.
10870Sstevel@tonic-gate 	 */
10880Sstevel@tonic-gate 	ASSERT(vifp->v_ipif != NULL);
10890Sstevel@tonic-gate 	ipif_refrele(vifp->v_ipif);
10900Sstevel@tonic-gate 
10913448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
10925240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
10930Sstevel@tonic-gate 		    "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr);
10940Sstevel@tonic-gate 	}
10950Sstevel@tonic-gate 
10960Sstevel@tonic-gate 	if (vifp->v_timeout_id != 0) {
10970Sstevel@tonic-gate 		(void) untimeout(vifp->v_timeout_id);
10980Sstevel@tonic-gate 		vifp->v_timeout_id = 0;
10990Sstevel@tonic-gate 	}
11000Sstevel@tonic-gate 
11010Sstevel@tonic-gate 	/*
11020Sstevel@tonic-gate 	 * Free packets queued at the interface.
11030Sstevel@tonic-gate 	 * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc.
11040Sstevel@tonic-gate 	 */
11050Sstevel@tonic-gate 	mutex_enter(&t->tbf_lock);
11060Sstevel@tonic-gate 	while (t->tbf_q != NULL) {
11070Sstevel@tonic-gate 		mp0 = t->tbf_q;
11080Sstevel@tonic-gate 		t->tbf_q = t->tbf_q->b_next;
11090Sstevel@tonic-gate 		mp0->b_prev = mp0->b_next = NULL;
11100Sstevel@tonic-gate 		freemsg(mp0);
11110Sstevel@tonic-gate 	}
11120Sstevel@tonic-gate 	mutex_exit(&t->tbf_lock);
11130Sstevel@tonic-gate 
11140Sstevel@tonic-gate 	/*
11150Sstevel@tonic-gate 	 * Always clear cache when vifs change.
11160Sstevel@tonic-gate 	 * No need to get last_encap_lock since we are running as a writer.
11170Sstevel@tonic-gate 	 */
11183448Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
11193448Sdh155122 	if (vifp == ipst->ips_last_encap_vif) {
11203448Sdh155122 		ipst->ips_last_encap_vif = NULL;
11213448Sdh155122 		ipst->ips_last_encap_src = 0;
11220Sstevel@tonic-gate 	}
11233448Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
11240Sstevel@tonic-gate 
11250Sstevel@tonic-gate 	mutex_destroy(&t->tbf_lock);
11260Sstevel@tonic-gate 
11270Sstevel@tonic-gate 	bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf)));
11280Sstevel@tonic-gate 
11290Sstevel@tonic-gate 	/* Adjust numvifs down */
11303448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
11313448Sdh155122 	for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */
11323448Sdh155122 		if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0)
11330Sstevel@tonic-gate 			break;
11343448Sdh155122 	ipst->ips_numvifs = vifi;
11353448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
11360Sstevel@tonic-gate 
11370Sstevel@tonic-gate 	bzero(vifp, sizeof (*vifp));
11380Sstevel@tonic-gate }
11390Sstevel@tonic-gate 
11400Sstevel@tonic-gate static int
11415240Snordmark del_vif(vifi_t *vifip, conn_t *connp, mblk_t *first_mp, ip_stack_t *ipst)
11420Sstevel@tonic-gate {
11433448Sdh155122 	struct vif	*vifp = ipst->ips_vifs + *vifip;
11440Sstevel@tonic-gate 	ipsq_t  	*ipsq;
11450Sstevel@tonic-gate 
11463448Sdh155122 	if (*vifip >= ipst->ips_numvifs)
11470Sstevel@tonic-gate 		return (EINVAL);
11480Sstevel@tonic-gate 
11490Sstevel@tonic-gate 
11500Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
11510Sstevel@tonic-gate 	/*
11520Sstevel@tonic-gate 	 * Not initialized
11530Sstevel@tonic-gate 	 * Here we are not looking at the vif that is being initialized
11540Sstevel@tonic-gate 	 * i.e vifp->v_marks == 0 and refcnt > 0.
11550Sstevel@tonic-gate 	 */
11560Sstevel@tonic-gate 	if (vifp->v_lcl_addr.s_addr == 0 ||
11570Sstevel@tonic-gate 	    !(vifp->v_marks & VIF_MARK_GOOD)) {
11580Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
11590Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
11600Sstevel@tonic-gate 	}
11610Sstevel@tonic-gate 
11620Sstevel@tonic-gate 	/*
11630Sstevel@tonic-gate 	 * This is an optimization, if first_mp == NULL
11640Sstevel@tonic-gate 	 * than we are being called from reset_mrt_vif_ipif()
11650Sstevel@tonic-gate 	 * so we already have exclusive access to the ipsq.
11660Sstevel@tonic-gate 	 * the ASSERT below is a check for this condition.
11670Sstevel@tonic-gate 	 */
11680Sstevel@tonic-gate 	if (first_mp != NULL &&
11690Sstevel@tonic-gate 	    !(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
11700Sstevel@tonic-gate 		ASSERT(connp != NULL);
11710Sstevel@tonic-gate 		/*
11720Sstevel@tonic-gate 		 * We have to be exclusive as we have to call ip_delmulti()
11730Sstevel@tonic-gate 		 * This is the best position to try to be exclusive in case
11740Sstevel@tonic-gate 		 * we have to wait.
11750Sstevel@tonic-gate 		 */
11760Sstevel@tonic-gate 		ipsq = ipsq_try_enter(vifp->v_ipif, NULL, CONNP_TO_WQ(connp),
11770Sstevel@tonic-gate 		    first_mp, ip_restart_optmgmt, NEW_OP, B_TRUE);
11780Sstevel@tonic-gate 		if ((ipsq) == NULL) {
11790Sstevel@tonic-gate 			mutex_exit(&vifp->v_lock);
11800Sstevel@tonic-gate 			return (EINPROGRESS);
11810Sstevel@tonic-gate 		}
11820Sstevel@tonic-gate 		/* recheck after being exclusive */
11830Sstevel@tonic-gate 		if (vifp->v_lcl_addr.s_addr == 0 ||
11840Sstevel@tonic-gate 		    !vifp->v_marks & VIF_MARK_GOOD) {
11850Sstevel@tonic-gate 			/*
11860Sstevel@tonic-gate 			 * someone beat us.
11870Sstevel@tonic-gate 			 */
11880Sstevel@tonic-gate 			mutex_exit(&vifp->v_lock);
11897098Smeem 			ipsq_exit(ipsq);
11900Sstevel@tonic-gate 			return (EADDRNOTAVAIL);
11910Sstevel@tonic-gate 		}
11920Sstevel@tonic-gate 	}
11930Sstevel@tonic-gate 
11940Sstevel@tonic-gate 
11950Sstevel@tonic-gate 	ASSERT(IAM_WRITER_IPIF(vifp->v_ipif));
11960Sstevel@tonic-gate 
11970Sstevel@tonic-gate 
11980Sstevel@tonic-gate 	/*
11990Sstevel@tonic-gate 	 * add a refhold so that ipif does not go away while
12000Sstevel@tonic-gate 	 * there are still users, this will be released in del_vifp
12010Sstevel@tonic-gate 	 * when we free the vif.
12020Sstevel@tonic-gate 	 */
12030Sstevel@tonic-gate 	ipif_refhold(vifp->v_ipif);
12040Sstevel@tonic-gate 
12050Sstevel@tonic-gate 	/* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */
12060Sstevel@tonic-gate 	vifp->v_marks &= ~VIF_MARK_GOOD;
12070Sstevel@tonic-gate 	vifp->v_marks |= VIF_MARK_CONDEMNED;
12080Sstevel@tonic-gate 
12090Sstevel@tonic-gate 	/* Phyint only */
12100Sstevel@tonic-gate 	if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
12110Sstevel@tonic-gate 		ipif_t *ipif = vifp->v_ipif;
12120Sstevel@tonic-gate 		ASSERT(ipif != NULL);
12130Sstevel@tonic-gate 		/*
12140Sstevel@tonic-gate 		 * should be OK to drop the lock as we
12150Sstevel@tonic-gate 		 * have marked this as CONDEMNED.
12160Sstevel@tonic-gate 		 */
12170Sstevel@tonic-gate 		mutex_exit(&(vifp)->v_lock);
12180Sstevel@tonic-gate 		(void) ip_delmulti(INADDR_ANY, ipif, B_TRUE, B_TRUE);
12190Sstevel@tonic-gate 		if (first_mp != NULL)
12207098Smeem 			ipsq_exit(ipsq);
12210Sstevel@tonic-gate 		mutex_enter(&(vifp)->v_lock);
12220Sstevel@tonic-gate 	}
12230Sstevel@tonic-gate 
12240Sstevel@tonic-gate 	/*
12250Sstevel@tonic-gate 	 * decreases the refcnt added in add_vif.
12260Sstevel@tonic-gate 	 */
12270Sstevel@tonic-gate 	VIF_REFRELE_LOCKED(vifp);
12280Sstevel@tonic-gate 	return (0);
12290Sstevel@tonic-gate }
12300Sstevel@tonic-gate 
12310Sstevel@tonic-gate /*
12320Sstevel@tonic-gate  * Add an mfc entry.
12330Sstevel@tonic-gate  */
12340Sstevel@tonic-gate static int
12353448Sdh155122 add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
12360Sstevel@tonic-gate {
12370Sstevel@tonic-gate 	struct mfc *rt;
12380Sstevel@tonic-gate 	struct rtdetq *rte;
12390Sstevel@tonic-gate 	ushort_t nstl;
12400Sstevel@tonic-gate 	int i;
12410Sstevel@tonic-gate 	struct mfcb *mfcbp;
12425240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
12430Sstevel@tonic-gate 
12440Sstevel@tonic-gate 	/*
12450Sstevel@tonic-gate 	 * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted
12460Sstevel@tonic-gate 	 * did not have a real route for pkt.
12470Sstevel@tonic-gate 	 * We want this pkt without rt installed in the mfctable to prevent
12480Sstevel@tonic-gate 	 * multiiple tries, so go ahead and put it in mfctable, it will
12490Sstevel@tonic-gate 	 * be discarded later in ip_mdq() because the child is NULL.
12500Sstevel@tonic-gate 	 */
12510Sstevel@tonic-gate 
12520Sstevel@tonic-gate 	/* Error checking, out of bounds? */
12530Sstevel@tonic-gate 	if (mfccp->mfcc_parent > MAXVIFS) {
12540Sstevel@tonic-gate 		ip0dbg(("ADD_MFC: mfcc_parent out of range %d",
12550Sstevel@tonic-gate 		    (int)mfccp->mfcc_parent));
12560Sstevel@tonic-gate 		return (EINVAL);
12570Sstevel@tonic-gate 	}
12580Sstevel@tonic-gate 
12590Sstevel@tonic-gate 	if ((mfccp->mfcc_parent != NO_VIF) &&
12603448Sdh155122 	    (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) {
12610Sstevel@tonic-gate 		ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n",
12620Sstevel@tonic-gate 		    (int)mfccp->mfcc_parent));
12630Sstevel@tonic-gate 		return (EINVAL);
12640Sstevel@tonic-gate 	}
12650Sstevel@tonic-gate 
12663448Sdh155122 	if (is_mrouter_off(ipst)) {
12670Sstevel@tonic-gate 		return (EINVAL);
12680Sstevel@tonic-gate 	}
12690Sstevel@tonic-gate 
12703448Sdh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr,
12710Sstevel@tonic-gate 	    mfccp->mfcc_mcastgrp.s_addr)];
12720Sstevel@tonic-gate 	MFCB_REFHOLD(mfcbp);
12730Sstevel@tonic-gate 	MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr,
12740Sstevel@tonic-gate 	    mfccp->mfcc_mcastgrp.s_addr, rt);
12750Sstevel@tonic-gate 
12760Sstevel@tonic-gate 	/* If an entry already exists, just update the fields */
12770Sstevel@tonic-gate 	if (rt) {
12783448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
12795240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
12800Sstevel@tonic-gate 			    "add_mfc: update o %x grp %x parent %x",
12810Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_origin.s_addr),
12820Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
12830Sstevel@tonic-gate 			    mfccp->mfcc_parent);
12840Sstevel@tonic-gate 		}
12850Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
12860Sstevel@tonic-gate 		rt->mfc_parent = mfccp->mfcc_parent;
12870Sstevel@tonic-gate 
12883448Sdh155122 		mutex_enter(&ipst->ips_numvifs_mutex);
12893448Sdh155122 		for (i = 0; i < (int)ipst->ips_numvifs; i++)
12900Sstevel@tonic-gate 			rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
12913448Sdh155122 		mutex_exit(&ipst->ips_numvifs_mutex);
12920Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
12930Sstevel@tonic-gate 
12940Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
12950Sstevel@tonic-gate 		return (0);
12960Sstevel@tonic-gate 	}
12970Sstevel@tonic-gate 
12980Sstevel@tonic-gate 	/*
12990Sstevel@tonic-gate 	 * Find the entry for which the upcall was made and update.
13000Sstevel@tonic-gate 	 */
13010Sstevel@tonic-gate 	for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) {
13020Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
13030Sstevel@tonic-gate 		if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
13040Sstevel@tonic-gate 		    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
13050Sstevel@tonic-gate 		    (rt->mfc_rte != NULL) &&
13060Sstevel@tonic-gate 		    !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
13070Sstevel@tonic-gate 			if (nstl++ != 0)
13080Sstevel@tonic-gate 				cmn_err(CE_WARN,
13090Sstevel@tonic-gate 				    "add_mfc: %s o %x g %x p %x",
13100Sstevel@tonic-gate 				    "multiple kernel entries",
13110Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_origin.s_addr),
13120Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
13130Sstevel@tonic-gate 				    mfccp->mfcc_parent);
13140Sstevel@tonic-gate 
13153448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
13165240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
13173448Sdh155122 				    SL_TRACE,
13180Sstevel@tonic-gate 				    "add_mfc: o %x g %x p %x",
13190Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_origin.s_addr),
13200Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
13210Sstevel@tonic-gate 				    mfccp->mfcc_parent);
13220Sstevel@tonic-gate 			}
13233448Sdh155122 			fill_route(rt, mfccp, ipst);
13240Sstevel@tonic-gate 
13250Sstevel@tonic-gate 			/*
13260Sstevel@tonic-gate 			 * Prevent cleanup of cache entry.
13270Sstevel@tonic-gate 			 * Timer starts in ip_mforward.
13280Sstevel@tonic-gate 			 */
13290Sstevel@tonic-gate 			if (rt->mfc_timeout_id != 0) {
13300Sstevel@tonic-gate 				timeout_id_t id;
13310Sstevel@tonic-gate 				id = rt->mfc_timeout_id;
13320Sstevel@tonic-gate 				/*
13330Sstevel@tonic-gate 				 * setting id to zero will avoid this
13340Sstevel@tonic-gate 				 * entry from being cleaned up in
13350Sstevel@tonic-gate 				 * expire_up_calls().
13360Sstevel@tonic-gate 				 */
13370Sstevel@tonic-gate 				rt->mfc_timeout_id = 0;
13380Sstevel@tonic-gate 				/*
13390Sstevel@tonic-gate 				 * dropping the lock is fine as we
13400Sstevel@tonic-gate 				 * have a refhold on the bucket.
13410Sstevel@tonic-gate 				 * so mfc cannot be freed.
13420Sstevel@tonic-gate 				 * The timeout can fire but it will see
13430Sstevel@tonic-gate 				 * that mfc_timeout_id == 0 and not cleanup.
13440Sstevel@tonic-gate 				 */
13450Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
13460Sstevel@tonic-gate 				(void) untimeout(id);
13470Sstevel@tonic-gate 				mutex_enter(&rt->mfc_mutex);
13480Sstevel@tonic-gate 			}
13490Sstevel@tonic-gate 
13500Sstevel@tonic-gate 			/*
13510Sstevel@tonic-gate 			 * Send all pkts that are queued waiting for the upcall.
13520Sstevel@tonic-gate 			 * ip_mdq param tun set to 0 -
13530Sstevel@tonic-gate 			 * the return value of ip_mdq() isn't used here,
13540Sstevel@tonic-gate 			 * so value we send doesn't matter.
13550Sstevel@tonic-gate 			 */
13560Sstevel@tonic-gate 			while (rt->mfc_rte != NULL) {
13570Sstevel@tonic-gate 				rte = rt->mfc_rte;
13580Sstevel@tonic-gate 				rt->mfc_rte = rte->rte_next;
13590Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
13600Sstevel@tonic-gate 				(void) ip_mdq(rte->mp, (ipha_t *)
13610Sstevel@tonic-gate 				    rte->mp->b_rptr, rte->ill, 0, rt);
13620Sstevel@tonic-gate 				freemsg(rte->mp);
13630Sstevel@tonic-gate 				mi_free((char *)rte);
13640Sstevel@tonic-gate 				mutex_enter(&rt->mfc_mutex);
13650Sstevel@tonic-gate 			}
13660Sstevel@tonic-gate 		}
13670Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
13680Sstevel@tonic-gate 	}
13690Sstevel@tonic-gate 
13700Sstevel@tonic-gate 
13710Sstevel@tonic-gate 	/*
13720Sstevel@tonic-gate 	 * It is possible that an entry is being inserted without an upcall
13730Sstevel@tonic-gate 	 */
13740Sstevel@tonic-gate 	if (nstl == 0) {
13750Sstevel@tonic-gate 		mutex_enter(&(mfcbp->mfcb_lock));
13763448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
13775240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
13780Sstevel@tonic-gate 			    "add_mfc: no upcall o %x g %x p %x",
13790Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_origin.s_addr),
13800Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
13810Sstevel@tonic-gate 			    mfccp->mfcc_parent);
13820Sstevel@tonic-gate 		}
13833448Sdh155122 		if (is_mrouter_off(ipst)) {
13840Sstevel@tonic-gate 			mutex_exit(&mfcbp->mfcb_lock);
13850Sstevel@tonic-gate 			MFCB_REFRELE(mfcbp);
13860Sstevel@tonic-gate 			return (EINVAL);
13870Sstevel@tonic-gate 		}
13880Sstevel@tonic-gate 
13890Sstevel@tonic-gate 		for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) {
13900Sstevel@tonic-gate 
13910Sstevel@tonic-gate 			mutex_enter(&rt->mfc_mutex);
13920Sstevel@tonic-gate 			if ((rt->mfc_origin.s_addr ==
13930Sstevel@tonic-gate 			    mfccp->mfcc_origin.s_addr) &&
13940Sstevel@tonic-gate 			    (rt->mfc_mcastgrp.s_addr ==
13955240Snordmark 			    mfccp->mfcc_mcastgrp.s_addr) &&
13965240Snordmark 			    (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) {
13973448Sdh155122 				fill_route(rt, mfccp, ipst);
13980Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
13990Sstevel@tonic-gate 				break;
14000Sstevel@tonic-gate 			}
14010Sstevel@tonic-gate 			mutex_exit(&rt->mfc_mutex);
14020Sstevel@tonic-gate 		}
14030Sstevel@tonic-gate 
14040Sstevel@tonic-gate 		/* No upcall, so make a new entry into mfctable */
14050Sstevel@tonic-gate 		if (rt == NULL) {
14060Sstevel@tonic-gate 			rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
14070Sstevel@tonic-gate 			if (rt == NULL) {
14080Sstevel@tonic-gate 				ip1dbg(("add_mfc: out of memory\n"));
14090Sstevel@tonic-gate 				mutex_exit(&mfcbp->mfcb_lock);
14100Sstevel@tonic-gate 				MFCB_REFRELE(mfcbp);
14110Sstevel@tonic-gate 				return (ENOBUFS);
14120Sstevel@tonic-gate 			}
14130Sstevel@tonic-gate 
14140Sstevel@tonic-gate 			/* Insert new entry at head of hash chain */
14150Sstevel@tonic-gate 			mutex_enter(&rt->mfc_mutex);
14163448Sdh155122 			fill_route(rt, mfccp, ipst);
14170Sstevel@tonic-gate 
14180Sstevel@tonic-gate 			/* Link into table */
14190Sstevel@tonic-gate 			rt->mfc_next   = mfcbp->mfcb_mfc;
14200Sstevel@tonic-gate 			mfcbp->mfcb_mfc = rt;
14210Sstevel@tonic-gate 			mutex_exit(&rt->mfc_mutex);
14220Sstevel@tonic-gate 		}
14230Sstevel@tonic-gate 		mutex_exit(&mfcbp->mfcb_lock);
14240Sstevel@tonic-gate 	}
14250Sstevel@tonic-gate 
14260Sstevel@tonic-gate 	MFCB_REFRELE(mfcbp);
14270Sstevel@tonic-gate 	return (0);
14280Sstevel@tonic-gate }
14290Sstevel@tonic-gate 
14300Sstevel@tonic-gate /*
14310Sstevel@tonic-gate  * Fills in mfc structure from mrouted mfcctl.
14320Sstevel@tonic-gate  */
14330Sstevel@tonic-gate static void
14343448Sdh155122 fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst)
14350Sstevel@tonic-gate {
14360Sstevel@tonic-gate 	int i;
14370Sstevel@tonic-gate 
14380Sstevel@tonic-gate 	rt->mfc_origin		= mfccp->mfcc_origin;
14390Sstevel@tonic-gate 	rt->mfc_mcastgrp	= mfccp->mfcc_mcastgrp;
14400Sstevel@tonic-gate 	rt->mfc_parent		= mfccp->mfcc_parent;
14413448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
14423448Sdh155122 	for (i = 0; i < (int)ipst->ips_numvifs; i++) {
14430Sstevel@tonic-gate 		rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
14440Sstevel@tonic-gate 	}
14453448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
14460Sstevel@tonic-gate 	/* Initialize pkt counters per src-grp */
14470Sstevel@tonic-gate 	rt->mfc_pkt_cnt	= 0;
14480Sstevel@tonic-gate 	rt->mfc_byte_cnt	= 0;
14490Sstevel@tonic-gate 	rt->mfc_wrong_if	= 0;
14500Sstevel@tonic-gate 	rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0;
14510Sstevel@tonic-gate 
14520Sstevel@tonic-gate }
14530Sstevel@tonic-gate 
14540Sstevel@tonic-gate static void
14550Sstevel@tonic-gate free_queue(struct mfc *mfcp)
14560Sstevel@tonic-gate {
14570Sstevel@tonic-gate 	struct rtdetq *rte0;
14580Sstevel@tonic-gate 
14590Sstevel@tonic-gate 	/*
14600Sstevel@tonic-gate 	 * Drop all queued upcall packets.
14610Sstevel@tonic-gate 	 * Free the mbuf with the pkt.
14620Sstevel@tonic-gate 	 */
14630Sstevel@tonic-gate 	while ((rte0 = mfcp->mfc_rte) != NULL) {
14640Sstevel@tonic-gate 		mfcp->mfc_rte = rte0->rte_next;
14650Sstevel@tonic-gate 		freemsg(rte0->mp);
14660Sstevel@tonic-gate 		mi_free((char *)rte0);
14670Sstevel@tonic-gate 	}
14680Sstevel@tonic-gate }
14690Sstevel@tonic-gate /*
14700Sstevel@tonic-gate  * go thorugh the hash bucket and free all the entries marked condemned.
14710Sstevel@tonic-gate  */
14720Sstevel@tonic-gate void
14730Sstevel@tonic-gate release_mfc(struct mfcb *mfcbp)
14740Sstevel@tonic-gate {
14750Sstevel@tonic-gate 	struct mfc *current_mfcp;
14760Sstevel@tonic-gate 	struct mfc *prev_mfcp;
14770Sstevel@tonic-gate 
14780Sstevel@tonic-gate 	prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
14790Sstevel@tonic-gate 
14800Sstevel@tonic-gate 	while (current_mfcp != NULL) {
14810Sstevel@tonic-gate 		if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) {
14820Sstevel@tonic-gate 			if (current_mfcp == mfcbp->mfcb_mfc) {
14830Sstevel@tonic-gate 				mfcbp->mfcb_mfc = current_mfcp->mfc_next;
14840Sstevel@tonic-gate 				free_queue(current_mfcp);
14850Sstevel@tonic-gate 				mi_free(current_mfcp);
14860Sstevel@tonic-gate 				prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
14870Sstevel@tonic-gate 				continue;
14880Sstevel@tonic-gate 			}
14890Sstevel@tonic-gate 			ASSERT(prev_mfcp != NULL);
14900Sstevel@tonic-gate 			prev_mfcp->mfc_next = current_mfcp->mfc_next;
14910Sstevel@tonic-gate 			free_queue(current_mfcp);
14920Sstevel@tonic-gate 			mi_free(current_mfcp);
14930Sstevel@tonic-gate 			current_mfcp = NULL;
14940Sstevel@tonic-gate 		} else {
14950Sstevel@tonic-gate 			prev_mfcp = current_mfcp;
14960Sstevel@tonic-gate 		}
14970Sstevel@tonic-gate 
14980Sstevel@tonic-gate 		current_mfcp = prev_mfcp->mfc_next;
14990Sstevel@tonic-gate 
15000Sstevel@tonic-gate 	}
15010Sstevel@tonic-gate 	mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED;
15020Sstevel@tonic-gate 	ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0);
15030Sstevel@tonic-gate }
15040Sstevel@tonic-gate 
15050Sstevel@tonic-gate /*
15060Sstevel@tonic-gate  * Delete an mfc entry.
15070Sstevel@tonic-gate  */
15080Sstevel@tonic-gate static int
15093448Sdh155122 del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
15100Sstevel@tonic-gate {
15110Sstevel@tonic-gate 	struct in_addr	origin;
15120Sstevel@tonic-gate 	struct in_addr	mcastgrp;
15135240Snordmark 	struct mfc 	*rt;
15145240Snordmark 	uint_t		hash;
15155240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
15160Sstevel@tonic-gate 
15170Sstevel@tonic-gate 	origin = mfccp->mfcc_origin;
15180Sstevel@tonic-gate 	mcastgrp = mfccp->mfcc_mcastgrp;
15190Sstevel@tonic-gate 	hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
15200Sstevel@tonic-gate 
15213448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
15225240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
15230Sstevel@tonic-gate 		    "del_mfc: o %x g %x",
15240Sstevel@tonic-gate 		    ntohl(origin.s_addr),
15250Sstevel@tonic-gate 		    ntohl(mcastgrp.s_addr));
15260Sstevel@tonic-gate 	}
15270Sstevel@tonic-gate 
15283448Sdh155122 	MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
15290Sstevel@tonic-gate 
15300Sstevel@tonic-gate 	/* Find mfc in mfctable, finds only entries without upcalls */
15313448Sdh155122 	for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) {
15320Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
15330Sstevel@tonic-gate 		if (origin.s_addr == rt->mfc_origin.s_addr &&
15340Sstevel@tonic-gate 		    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
15350Sstevel@tonic-gate 		    rt->mfc_rte == NULL &&
15360Sstevel@tonic-gate 		    !(rt->mfc_marks & MFCB_MARK_CONDEMNED))
15370Sstevel@tonic-gate 			break;
15380Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
15390Sstevel@tonic-gate 	}
15400Sstevel@tonic-gate 
15410Sstevel@tonic-gate 	/*
15420Sstevel@tonic-gate 	 * Return if there was an upcall (mfc_rte != NULL,
15430Sstevel@tonic-gate 	 * or rt not in mfctable.
15440Sstevel@tonic-gate 	 */
15450Sstevel@tonic-gate 	if (rt == NULL) {
15463448Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[hash]);
15470Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
15480Sstevel@tonic-gate 	}
15490Sstevel@tonic-gate 
15500Sstevel@tonic-gate 
15510Sstevel@tonic-gate 	/*
15520Sstevel@tonic-gate 	 * no need to hold lock as we have a reference.
15530Sstevel@tonic-gate 	 */
15543448Sdh155122 	ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
15550Sstevel@tonic-gate 	/* error checking */
15560Sstevel@tonic-gate 	if (rt->mfc_timeout_id != 0) {
15570Sstevel@tonic-gate 		ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null"));
15580Sstevel@tonic-gate 		/*
15590Sstevel@tonic-gate 		 * Its ok to drop the lock,  the struct cannot be freed
15600Sstevel@tonic-gate 		 * since we have a ref on the hash bucket.
15610Sstevel@tonic-gate 		 */
15620Sstevel@tonic-gate 		rt->mfc_timeout_id = 0;
15630Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
15640Sstevel@tonic-gate 		(void) untimeout(rt->mfc_timeout_id);
15650Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
15660Sstevel@tonic-gate 	}
15670Sstevel@tonic-gate 
15680Sstevel@tonic-gate 	ASSERT(rt->mfc_rte == NULL);
15690Sstevel@tonic-gate 
15700Sstevel@tonic-gate 
15710Sstevel@tonic-gate 	/*
15720Sstevel@tonic-gate 	 * Delete the entry from the cache
15730Sstevel@tonic-gate 	 */
15740Sstevel@tonic-gate 	rt->mfc_marks |= MFCB_MARK_CONDEMNED;
15750Sstevel@tonic-gate 	mutex_exit(&rt->mfc_mutex);
15760Sstevel@tonic-gate 
15773448Sdh155122 	MFCB_REFRELE(&ipst->ips_mfcs[hash]);
15780Sstevel@tonic-gate 
15790Sstevel@tonic-gate 	return (0);
15800Sstevel@tonic-gate }
15810Sstevel@tonic-gate 
15820Sstevel@tonic-gate #define	TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
15830Sstevel@tonic-gate 
15840Sstevel@tonic-gate /*
15850Sstevel@tonic-gate  * IP multicast forwarding function. This function assumes that the packet
15860Sstevel@tonic-gate  * pointed to by ipha has arrived on (or is about to be sent to) the interface
15870Sstevel@tonic-gate  * pointed to by "ill", and the packet is to be relayed to other networks
15880Sstevel@tonic-gate  * that have members of the packet's destination IP multicast group.
15890Sstevel@tonic-gate  *
15900Sstevel@tonic-gate  * The packet is returned unscathed to the caller, unless it is
15910Sstevel@tonic-gate  * erroneous, in which case a -1 value tells the caller (IP)
15920Sstevel@tonic-gate  * to discard it.
15930Sstevel@tonic-gate  *
15940Sstevel@tonic-gate  * Unlike BSD, SunOS 5.x needs to return to IP info about
15950Sstevel@tonic-gate  * whether pkt came in thru a tunnel, so it can be discarded, unless
15960Sstevel@tonic-gate  * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try
15970Sstevel@tonic-gate  * to be delivered.
15980Sstevel@tonic-gate  * Return values are 0 - pkt is okay and phyint
15990Sstevel@tonic-gate  *		    -1 - pkt is malformed and to be tossed
16000Sstevel@tonic-gate  *                   1 - pkt came in on tunnel
16010Sstevel@tonic-gate  */
16020Sstevel@tonic-gate int
16030Sstevel@tonic-gate ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp)
16040Sstevel@tonic-gate {
16050Sstevel@tonic-gate 	struct mfc 	*rt;
16060Sstevel@tonic-gate 	ipaddr_t	src, dst, tunnel_src = 0;
16070Sstevel@tonic-gate 	static int	srctun = 0;
16080Sstevel@tonic-gate 	vifi_t		vifi;
16090Sstevel@tonic-gate 	boolean_t	pim_reg_packet = B_FALSE;
16100Sstevel@tonic-gate 	struct mfcb *mfcbp;
16113448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
16125240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
16133448Sdh155122 
16143448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
16155240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16160Sstevel@tonic-gate 		    "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s",
16170Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
16180Sstevel@tonic-gate 		    ill->ill_name);
16190Sstevel@tonic-gate 	}
16200Sstevel@tonic-gate 
16210Sstevel@tonic-gate 	dst = ipha->ipha_dst;
16220Sstevel@tonic-gate 	if ((uint32_t)(uintptr_t)mp->b_prev == PIM_REGISTER_MARKER)
16230Sstevel@tonic-gate 		pim_reg_packet = B_TRUE;
16240Sstevel@tonic-gate 	else
16250Sstevel@tonic-gate 		tunnel_src = (ipaddr_t)(uintptr_t)mp->b_prev;
16260Sstevel@tonic-gate 
16270Sstevel@tonic-gate 	/*
16280Sstevel@tonic-gate 	 * Don't forward a packet with time-to-live of zero or one,
16290Sstevel@tonic-gate 	 * or a packet destined to a local-only group.
16300Sstevel@tonic-gate 	 */
16310Sstevel@tonic-gate 	if (CLASSD(dst) && (ipha->ipha_ttl <= 1 ||
16325240Snordmark 	    (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) {
16333448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
16345240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16350Sstevel@tonic-gate 			    "ip_mforward: not forwarded ttl %d,"
16360Sstevel@tonic-gate 			    " dst 0x%x ill %s",
16370Sstevel@tonic-gate 			    ipha->ipha_ttl, ntohl(dst), ill->ill_name);
16380Sstevel@tonic-gate 		}
16390Sstevel@tonic-gate 		mp->b_prev = NULL;
16400Sstevel@tonic-gate 		if (tunnel_src != 0)
16410Sstevel@tonic-gate 			return (1);
16420Sstevel@tonic-gate 		else
16430Sstevel@tonic-gate 			return (0);
16440Sstevel@tonic-gate 	}
16450Sstevel@tonic-gate 
16460Sstevel@tonic-gate 	if ((tunnel_src != 0) || pim_reg_packet) {
16470Sstevel@tonic-gate 		/*
16480Sstevel@tonic-gate 		 * Packet arrived over an encapsulated tunnel or via a PIM
16490Sstevel@tonic-gate 		 * register message. Both ip_mroute_decap() and pim_input()
16500Sstevel@tonic-gate 		 * encode information in mp->b_prev.
16510Sstevel@tonic-gate 		 */
16520Sstevel@tonic-gate 		mp->b_prev = NULL;
16533448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
16540Sstevel@tonic-gate 			if (tunnel_src != 0) {
16555240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
16563448Sdh155122 				    SL_TRACE,
16570Sstevel@tonic-gate 				    "ip_mforward: ill %s arrived via ENCAP TUN",
16580Sstevel@tonic-gate 				    ill->ill_name);
16590Sstevel@tonic-gate 			} else if (pim_reg_packet) {
16605240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
16613448Sdh155122 				    SL_TRACE,
16620Sstevel@tonic-gate 				    "ip_mforward: ill %s arrived via"
16630Sstevel@tonic-gate 				    "  REGISTER VIF",
16640Sstevel@tonic-gate 				    ill->ill_name);
16650Sstevel@tonic-gate 			}
16660Sstevel@tonic-gate 		}
16670Sstevel@tonic-gate 	} else if ((ipha->ipha_version_and_hdr_length & 0xf) <
16680Sstevel@tonic-gate 	    (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 ||
16690Sstevel@tonic-gate 	    ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) {
16700Sstevel@tonic-gate 		/* Packet arrived via a physical interface. */
16713448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
16725240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16730Sstevel@tonic-gate 			    "ip_mforward: ill %s arrived via PHYINT",
16740Sstevel@tonic-gate 			    ill->ill_name);
16750Sstevel@tonic-gate 		}
16760Sstevel@tonic-gate 
16770Sstevel@tonic-gate 	} else {
16780Sstevel@tonic-gate 		/*
16790Sstevel@tonic-gate 		 * Packet arrived through a SRCRT tunnel.
16800Sstevel@tonic-gate 		 * Source-route tunnels are no longer supported.
16810Sstevel@tonic-gate 		 * Error message printed every 1000 times.
16820Sstevel@tonic-gate 		 */
16830Sstevel@tonic-gate 		if ((srctun++ % 1000) == 0) {
16840Sstevel@tonic-gate 			cmn_err(CE_WARN,
16850Sstevel@tonic-gate 			    "ip_mforward: received source-routed pkt from %x",
16860Sstevel@tonic-gate 			    ntohl(ipha->ipha_src));
16870Sstevel@tonic-gate 		}
16880Sstevel@tonic-gate 		return (-1);
16890Sstevel@tonic-gate 	}
16900Sstevel@tonic-gate 
16913448Sdh155122 	ipst->ips_mrtstat->mrts_fwd_in++;
16920Sstevel@tonic-gate 	src = ipha->ipha_src;
16930Sstevel@tonic-gate 
16940Sstevel@tonic-gate 	/* Find route in cache, return NULL if not there or upcalls q'ed. */
16950Sstevel@tonic-gate 
16960Sstevel@tonic-gate 	/*
16970Sstevel@tonic-gate 	 * Lock the mfctable against changes made by ip_mforward.
16980Sstevel@tonic-gate 	 * Note that only add_mfc and del_mfc can remove entries and
16990Sstevel@tonic-gate 	 * they run with exclusive access to IP. So we do not need to
17000Sstevel@tonic-gate 	 * guard against the rt being deleted, so release lock after reading.
17010Sstevel@tonic-gate 	 */
17020Sstevel@tonic-gate 
17033448Sdh155122 	if (is_mrouter_off(ipst))
17040Sstevel@tonic-gate 		return (-1);
17050Sstevel@tonic-gate 
17063448Sdh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)];
17070Sstevel@tonic-gate 	MFCB_REFHOLD(mfcbp);
17080Sstevel@tonic-gate 	MFCFIND(mfcbp, src, dst, rt);
17090Sstevel@tonic-gate 
17100Sstevel@tonic-gate 	/* Entry exists, so forward if necessary */
17110Sstevel@tonic-gate 	if (rt != NULL) {
17120Sstevel@tonic-gate 		int ret = 0;
17133448Sdh155122 		ipst->ips_mrtstat->mrts_mfc_hits++;
17140Sstevel@tonic-gate 		if (pim_reg_packet) {
17153448Sdh155122 			ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
17160Sstevel@tonic-gate 			ret = ip_mdq(mp, ipha,
17173448Sdh155122 			    ipst->ips_vifs[ipst->ips_reg_vif_num].
17183448Sdh155122 			    v_ipif->ipif_ill,
17193448Sdh155122 			    0, rt);
17200Sstevel@tonic-gate 		} else {
17210Sstevel@tonic-gate 			ret = ip_mdq(mp, ipha, ill, tunnel_src, rt);
17220Sstevel@tonic-gate 		}
17230Sstevel@tonic-gate 
17240Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
17250Sstevel@tonic-gate 		return (ret);
17260Sstevel@tonic-gate 
17270Sstevel@tonic-gate 		/*
17280Sstevel@tonic-gate 		 * Don't forward if we don't have a cache entry.  Mrouted will
17290Sstevel@tonic-gate 		 * always provide a cache entry in response to an upcall.
17300Sstevel@tonic-gate 		 */
17310Sstevel@tonic-gate 	} else {
17320Sstevel@tonic-gate 		/*
17330Sstevel@tonic-gate 		 * If we don't have a route for packet's origin, make a copy
17340Sstevel@tonic-gate 		 * of the packet and send message to routing daemon.
17350Sstevel@tonic-gate 		 */
17360Sstevel@tonic-gate 		struct mfc	*mfc_rt	 = NULL;
17370Sstevel@tonic-gate 		mblk_t		*mp0	 = NULL;
17380Sstevel@tonic-gate 		mblk_t		*mp_copy = NULL;
17390Sstevel@tonic-gate 		struct rtdetq	*rte	 = NULL;
17400Sstevel@tonic-gate 		struct rtdetq	*rte_m, *rte1, *prev_rte;
17410Sstevel@tonic-gate 		uint_t		hash;
17420Sstevel@tonic-gate 		int		npkts;
17430Sstevel@tonic-gate 		boolean_t	new_mfc = B_FALSE;
17443448Sdh155122 		ipst->ips_mrtstat->mrts_mfc_misses++;
17450Sstevel@tonic-gate 		/* BSD uses mrts_no_route++ */
17463448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
17475240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
17480Sstevel@tonic-gate 			    "ip_mforward: no rte ill %s src %x g %x misses %d",
17490Sstevel@tonic-gate 			    ill->ill_name, ntohl(src), ntohl(dst),
17503448Sdh155122 			    (int)ipst->ips_mrtstat->mrts_mfc_misses);
17510Sstevel@tonic-gate 		}
17520Sstevel@tonic-gate 		/*
17530Sstevel@tonic-gate 		 * The order of the following code differs from the BSD code.
17540Sstevel@tonic-gate 		 * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x
17550Sstevel@tonic-gate 		 * code works, so SunOS 5.x wasn't changed to conform to the
17560Sstevel@tonic-gate 		 * BSD version.
17570Sstevel@tonic-gate 		 */
17580Sstevel@tonic-gate 
17590Sstevel@tonic-gate 		/* Lock mfctable. */
17600Sstevel@tonic-gate 		hash = MFCHASH(src, dst);
17613448Sdh155122 		mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock));
17620Sstevel@tonic-gate 
17630Sstevel@tonic-gate 		/*
17640Sstevel@tonic-gate 		 * If we are turning off mrouted return an error
17650Sstevel@tonic-gate 		 */
17663448Sdh155122 		if (is_mrouter_off(ipst)) {
17670Sstevel@tonic-gate 			mutex_exit(&mfcbp->mfcb_lock);
17680Sstevel@tonic-gate 			MFCB_REFRELE(mfcbp);
17690Sstevel@tonic-gate 			return (-1);
17700Sstevel@tonic-gate 		}
17710Sstevel@tonic-gate 
17720Sstevel@tonic-gate 		/* Is there an upcall waiting for this packet? */
17733448Sdh155122 		for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt;
17740Sstevel@tonic-gate 		    mfc_rt = mfc_rt->mfc_next) {
17750Sstevel@tonic-gate 			mutex_enter(&mfc_rt->mfc_mutex);
17763448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
17775240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
17783448Sdh155122 				    SL_TRACE,
17790Sstevel@tonic-gate 				    "ip_mforward: MFCTAB hash %d o 0x%x"
17800Sstevel@tonic-gate 				    " g 0x%x\n",
17810Sstevel@tonic-gate 				    hash, ntohl(mfc_rt->mfc_origin.s_addr),
17820Sstevel@tonic-gate 				    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
17830Sstevel@tonic-gate 			}
17840Sstevel@tonic-gate 			/* There is an upcall */
17850Sstevel@tonic-gate 			if ((src == mfc_rt->mfc_origin.s_addr) &&
17860Sstevel@tonic-gate 			    (dst == mfc_rt->mfc_mcastgrp.s_addr) &&
17870Sstevel@tonic-gate 			    (mfc_rt->mfc_rte != NULL) &&
17880Sstevel@tonic-gate 			    !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
17890Sstevel@tonic-gate 				break;
17900Sstevel@tonic-gate 			}
17910Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
17920Sstevel@tonic-gate 		}
17930Sstevel@tonic-gate 		/* No upcall, so make a new entry into mfctable */
17940Sstevel@tonic-gate 		if (mfc_rt == NULL) {
17950Sstevel@tonic-gate 			mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
17960Sstevel@tonic-gate 			if (mfc_rt == NULL) {
17973448Sdh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
17980Sstevel@tonic-gate 				ip1dbg(("ip_mforward: out of memory "
17990Sstevel@tonic-gate 				    "for mfc, mfc_rt\n"));
18000Sstevel@tonic-gate 				goto error_return;
18010Sstevel@tonic-gate 			} else
18020Sstevel@tonic-gate 				new_mfc = B_TRUE;
18030Sstevel@tonic-gate 			/* Get resources */
18040Sstevel@tonic-gate 			/* TODO could copy header and dup rest */
18050Sstevel@tonic-gate 			mp_copy = copymsg(mp);
18060Sstevel@tonic-gate 			if (mp_copy == NULL) {
18073448Sdh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
18080Sstevel@tonic-gate 				ip1dbg(("ip_mforward: out of memory for "
18090Sstevel@tonic-gate 				    "mblk, mp_copy\n"));
18100Sstevel@tonic-gate 				goto error_return;
18110Sstevel@tonic-gate 			}
18120Sstevel@tonic-gate 			mutex_enter(&mfc_rt->mfc_mutex);
18130Sstevel@tonic-gate 		}
18140Sstevel@tonic-gate 		/* Get resources for rte, whether first rte or not first. */
18150Sstevel@tonic-gate 		/* Add this packet into rtdetq */
18160Sstevel@tonic-gate 		rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq));
18170Sstevel@tonic-gate 		if (rte == NULL) {
18183448Sdh155122 			ipst->ips_mrtstat->mrts_fwd_drop++;
18190Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
18200Sstevel@tonic-gate 			ip1dbg(("ip_mforward: out of memory for"
18210Sstevel@tonic-gate 			    " rtdetq, rte\n"));
18220Sstevel@tonic-gate 			goto error_return;
18230Sstevel@tonic-gate 		}
18240Sstevel@tonic-gate 
18250Sstevel@tonic-gate 		mp0 = copymsg(mp);
18260Sstevel@tonic-gate 		if (mp0 == NULL) {
18273448Sdh155122 			ipst->ips_mrtstat->mrts_fwd_drop++;
18280Sstevel@tonic-gate 			ip1dbg(("ip_mforward: out of memory for mblk, mp0\n"));
18290Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
18300Sstevel@tonic-gate 			goto error_return;
18310Sstevel@tonic-gate 		}
18320Sstevel@tonic-gate 		rte->mp		= mp0;
18330Sstevel@tonic-gate 		if (pim_reg_packet) {
18343448Sdh155122 			ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
18353448Sdh155122 			rte->ill =
18363448Sdh155122 			    ipst->ips_vifs[ipst->ips_reg_vif_num].
18373448Sdh155122 			    v_ipif->ipif_ill;
18380Sstevel@tonic-gate 		} else {
18390Sstevel@tonic-gate 			rte->ill = ill;
18400Sstevel@tonic-gate 		}
18410Sstevel@tonic-gate 		rte->rte_next	= NULL;
18420Sstevel@tonic-gate 
18430Sstevel@tonic-gate 		/*
18440Sstevel@tonic-gate 		 * Determine if upcall q (rtdetq) has overflowed.
18450Sstevel@tonic-gate 		 * mfc_rt->mfc_rte is null by mi_zalloc
18460Sstevel@tonic-gate 		 * if it is the first message.
18470Sstevel@tonic-gate 		 */
18480Sstevel@tonic-gate 		for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m;
18490Sstevel@tonic-gate 		    rte_m = rte_m->rte_next)
18500Sstevel@tonic-gate 			npkts++;
18513448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
18525240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
18530Sstevel@tonic-gate 			    "ip_mforward: upcalls %d\n", npkts);
18540Sstevel@tonic-gate 		}
18550Sstevel@tonic-gate 		if (npkts > MAX_UPQ) {
18563448Sdh155122 			ipst->ips_mrtstat->mrts_upq_ovflw++;
18570Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
18580Sstevel@tonic-gate 			goto error_return;
18590Sstevel@tonic-gate 		}
18600Sstevel@tonic-gate 
18610Sstevel@tonic-gate 		if (npkts == 0) {	/* first upcall */
18620Sstevel@tonic-gate 			int i = 0;
18630Sstevel@tonic-gate 			/*
18640Sstevel@tonic-gate 			 * Now finish installing the new mfc! Now that we have
18650Sstevel@tonic-gate 			 * resources!  Insert new entry at head of hash chain.
18660Sstevel@tonic-gate 			 * Use src and dst which are ipaddr_t's.
18670Sstevel@tonic-gate 			 */
18680Sstevel@tonic-gate 			mfc_rt->mfc_origin.s_addr = src;
18690Sstevel@tonic-gate 			mfc_rt->mfc_mcastgrp.s_addr = dst;
18700Sstevel@tonic-gate 
18713448Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
18723448Sdh155122 			for (i = 0; i < (int)ipst->ips_numvifs; i++)
18730Sstevel@tonic-gate 				mfc_rt->mfc_ttls[i] = 0;
18743448Sdh155122 			mutex_exit(&ipst->ips_numvifs_mutex);
18750Sstevel@tonic-gate 			mfc_rt->mfc_parent = ALL_VIFS;
18760Sstevel@tonic-gate 
18770Sstevel@tonic-gate 			/* Link into table */
18783448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
18795240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
18803448Sdh155122 				    SL_TRACE,
18810Sstevel@tonic-gate 				    "ip_mforward: NEW MFCTAB hash %d o 0x%x "
18820Sstevel@tonic-gate 				    "g 0x%x\n", hash,
18830Sstevel@tonic-gate 				    ntohl(mfc_rt->mfc_origin.s_addr),
18840Sstevel@tonic-gate 				    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
18850Sstevel@tonic-gate 			}
18863448Sdh155122 			mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc;
18873448Sdh155122 			ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt;
18880Sstevel@tonic-gate 			mfc_rt->mfc_rte = NULL;
18890Sstevel@tonic-gate 		}
18900Sstevel@tonic-gate 
18910Sstevel@tonic-gate 		/* Link in the upcall */
18920Sstevel@tonic-gate 		/* First upcall */
18930Sstevel@tonic-gate 		if (mfc_rt->mfc_rte == NULL)
18940Sstevel@tonic-gate 			mfc_rt->mfc_rte = rte;
18950Sstevel@tonic-gate 		else {
18960Sstevel@tonic-gate 			/* not the first upcall */
18970Sstevel@tonic-gate 			prev_rte = mfc_rt->mfc_rte;
18980Sstevel@tonic-gate 			for (rte1 = mfc_rt->mfc_rte->rte_next; rte1;
18995240Snordmark 			    prev_rte = rte1, rte1 = rte1->rte_next)
19005240Snordmark 				;
19010Sstevel@tonic-gate 			prev_rte->rte_next = rte;
19020Sstevel@tonic-gate 		}
19030Sstevel@tonic-gate 
19040Sstevel@tonic-gate 		/*
19050Sstevel@tonic-gate 		 * No upcalls waiting, this is first one, so send a message to
19060Sstevel@tonic-gate 		 * routing daemon to install a route into kernel table.
19070Sstevel@tonic-gate 		 */
19080Sstevel@tonic-gate 		if (npkts == 0) {
19090Sstevel@tonic-gate 			struct igmpmsg	*im;
19100Sstevel@tonic-gate 			/* ipha_protocol is 0, for upcall */
19110Sstevel@tonic-gate 			ASSERT(mp_copy != NULL);
19120Sstevel@tonic-gate 			im = (struct igmpmsg *)mp_copy->b_rptr;
19130Sstevel@tonic-gate 			im->im_msgtype	= IGMPMSG_NOCACHE;
19140Sstevel@tonic-gate 			im->im_mbz = 0;
19153448Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
19160Sstevel@tonic-gate 			if (pim_reg_packet) {
19173448Sdh155122 				im->im_vif = (uchar_t)ipst->ips_reg_vif_num;
19183448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
19190Sstevel@tonic-gate 			} else {
19200Sstevel@tonic-gate 				/*
19210Sstevel@tonic-gate 				 * XXX do we need to hold locks here ?
19220Sstevel@tonic-gate 				 */
19233448Sdh155122 				for (vifi = 0;
19243448Sdh155122 				    vifi < ipst->ips_numvifs;
19253448Sdh155122 				    vifi++) {
19263448Sdh155122 					if (ipst->ips_vifs[vifi].v_ipif == NULL)
19270Sstevel@tonic-gate 						continue;
19283448Sdh155122 					if (ipst->ips_vifs[vifi].
19293448Sdh155122 					    v_ipif->ipif_ill == ill) {
19300Sstevel@tonic-gate 						im->im_vif = (uchar_t)vifi;
19310Sstevel@tonic-gate 						break;
19320Sstevel@tonic-gate 					}
19330Sstevel@tonic-gate 				}
19343448Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
19353448Sdh155122 				ASSERT(vifi < ipst->ips_numvifs);
19360Sstevel@tonic-gate 			}
19370Sstevel@tonic-gate 
19383448Sdh155122 			ipst->ips_mrtstat->mrts_upcalls++;
19390Sstevel@tonic-gate 			/* Timer to discard upcalls if mrouted is too slow */
19400Sstevel@tonic-gate 			mfc_rt->mfc_timeout_id = timeout(expire_upcalls,
19410Sstevel@tonic-gate 			    mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE);
19420Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
19433448Sdh155122 			mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
19445240Snordmark 			/* Pass to RAWIP */
19455240Snordmark 			(mrouter->conn_recv)(mrouter, mp_copy, NULL);
19460Sstevel@tonic-gate 		} else {
19470Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
19483448Sdh155122 			mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
19490Sstevel@tonic-gate 			freemsg(mp_copy);
19500Sstevel@tonic-gate 		}
19510Sstevel@tonic-gate 
19520Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
19530Sstevel@tonic-gate 		if (tunnel_src != 0)
19540Sstevel@tonic-gate 			return (1);
19550Sstevel@tonic-gate 		else
19560Sstevel@tonic-gate 			return (0);
19570Sstevel@tonic-gate 	error_return:
19583448Sdh155122 		mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
19590Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
19600Sstevel@tonic-gate 		if (mfc_rt != NULL && (new_mfc == B_TRUE))
19610Sstevel@tonic-gate 			mi_free((char *)mfc_rt);
19620Sstevel@tonic-gate 		if (rte != NULL)
19630Sstevel@tonic-gate 			mi_free((char *)rte);
19640Sstevel@tonic-gate 		if (mp_copy != NULL)
19650Sstevel@tonic-gate 			freemsg(mp_copy);
19660Sstevel@tonic-gate 		if (mp0 != NULL)
19670Sstevel@tonic-gate 			freemsg(mp0);
19680Sstevel@tonic-gate 		return (-1);
19690Sstevel@tonic-gate 	}
19700Sstevel@tonic-gate }
19710Sstevel@tonic-gate 
19720Sstevel@tonic-gate /*
19730Sstevel@tonic-gate  * Clean up the mfctable cache entry if upcall is not serviced.
19740Sstevel@tonic-gate  * SunOS 5.x has timeout per mfc, unlike BSD which has one timer.
19750Sstevel@tonic-gate  */
19760Sstevel@tonic-gate static void
19770Sstevel@tonic-gate expire_upcalls(void *arg)
19780Sstevel@tonic-gate {
19790Sstevel@tonic-gate 	struct mfc *mfc_rt = arg;
19800Sstevel@tonic-gate 	uint_t hash;
19810Sstevel@tonic-gate 	struct mfc *prev_mfc, *mfc0;
19823448Sdh155122 	ip_stack_t	*ipst;
19835240Snordmark 	conn_t		*mrouter;
19843448Sdh155122 
19853448Sdh155122 	if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) {
19863448Sdh155122 		cmn_err(CE_WARN, "expire_upcalls: no ILL\n");
19873448Sdh155122 		return;
19883448Sdh155122 	}
19893448Sdh155122 	ipst = mfc_rt->mfc_rte->ill->ill_ipst;
19905240Snordmark 	mrouter = ipst->ips_ip_g_mrouter;
19910Sstevel@tonic-gate 
19920Sstevel@tonic-gate 	hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr);
19933448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
19945240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
19950Sstevel@tonic-gate 		    "expire_upcalls: hash %d s %x g %x",
19960Sstevel@tonic-gate 		    hash, ntohl(mfc_rt->mfc_origin.s_addr),
19970Sstevel@tonic-gate 		    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
19980Sstevel@tonic-gate 	}
19993448Sdh155122 	MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
20000Sstevel@tonic-gate 	mutex_enter(&mfc_rt->mfc_mutex);
20010Sstevel@tonic-gate 	/*
20020Sstevel@tonic-gate 	 * if timeout has been set to zero, than the
20030Sstevel@tonic-gate 	 * entry has been filled, no need to delete it.
20040Sstevel@tonic-gate 	 */
20050Sstevel@tonic-gate 	if (mfc_rt->mfc_timeout_id == 0)
20060Sstevel@tonic-gate 		goto done;
20073448Sdh155122 	ipst->ips_mrtstat->mrts_cache_cleanups++;
20080Sstevel@tonic-gate 	mfc_rt->mfc_timeout_id = 0;
20090Sstevel@tonic-gate 
20100Sstevel@tonic-gate 	/* Determine entry to be cleaned up in cache table. */
20113448Sdh155122 	for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0;
20120Sstevel@tonic-gate 	    prev_mfc = mfc0, mfc0 = mfc0->mfc_next)
20130Sstevel@tonic-gate 		if (mfc0 == mfc_rt)
20140Sstevel@tonic-gate 			break;
20150Sstevel@tonic-gate 
20160Sstevel@tonic-gate 	/* del_mfc takes care of gone mfcs */
20170Sstevel@tonic-gate 	ASSERT(prev_mfc != NULL);
20180Sstevel@tonic-gate 	ASSERT(mfc0 != NULL);
20190Sstevel@tonic-gate 
20200Sstevel@tonic-gate 	/*
20210Sstevel@tonic-gate 	 * Delete the entry from the cache
20220Sstevel@tonic-gate 	 */
20233448Sdh155122 	ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
20240Sstevel@tonic-gate 	mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
20250Sstevel@tonic-gate 
20260Sstevel@tonic-gate 	/*
20270Sstevel@tonic-gate 	 * release_mfc will drop all queued upcall packets.
20280Sstevel@tonic-gate 	 * and will free the mbuf with the pkt, if, timing info.
20290Sstevel@tonic-gate 	 */
20300Sstevel@tonic-gate done:
20310Sstevel@tonic-gate 	mutex_exit(&mfc_rt->mfc_mutex);
20323448Sdh155122 	MFCB_REFRELE(&ipst->ips_mfcs[hash]);
20330Sstevel@tonic-gate }
20340Sstevel@tonic-gate 
20350Sstevel@tonic-gate /*
20360Sstevel@tonic-gate  * Packet forwarding routine once entry in the cache is made.
20370Sstevel@tonic-gate  */
20380Sstevel@tonic-gate static int
20390Sstevel@tonic-gate ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src,
20400Sstevel@tonic-gate     struct mfc *rt)
20410Sstevel@tonic-gate {
20420Sstevel@tonic-gate 	vifi_t vifi;
20430Sstevel@tonic-gate 	struct vif *vifp;
20440Sstevel@tonic-gate 	ipaddr_t dst = ipha->ipha_dst;
20450Sstevel@tonic-gate 	size_t  plen = msgdsize(mp);
20460Sstevel@tonic-gate 	vifi_t num_of_vifs;
20473448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
20485240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
20493448Sdh155122 
20503448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
20515240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
20520Sstevel@tonic-gate 		    "ip_mdq: SEND src %x, ipha_dst %x, ill %s",
20530Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
20540Sstevel@tonic-gate 		    ill->ill_name);
20550Sstevel@tonic-gate 	}
20560Sstevel@tonic-gate 
20570Sstevel@tonic-gate 	/* Macro to send packet on vif */
20580Sstevel@tonic-gate #define	MC_SEND(ipha, mp, vifp, dst) { \
20590Sstevel@tonic-gate 	if ((vifp)->v_flags & VIFF_TUNNEL) \
20600Sstevel@tonic-gate 		encap_send((ipha), (mp), (vifp), (dst)); \
20610Sstevel@tonic-gate 	else if ((vifp)->v_flags & VIFF_REGISTER) \
20620Sstevel@tonic-gate 		register_send((ipha), (mp), (vifp), (dst)); \
20630Sstevel@tonic-gate 	else \
20640Sstevel@tonic-gate 		phyint_send((ipha), (mp), (vifp), (dst)); \
20650Sstevel@tonic-gate }
20660Sstevel@tonic-gate 
20670Sstevel@tonic-gate 	vifi = rt->mfc_parent;
20680Sstevel@tonic-gate 
20690Sstevel@tonic-gate 	/*
20700Sstevel@tonic-gate 	 * The value of vifi is MAXVIFS if the pkt had no parent, i.e.,
20710Sstevel@tonic-gate 	 * Mrouted had no route.
20720Sstevel@tonic-gate 	 * We wanted the route installed in the mfctable to prevent multiple
20730Sstevel@tonic-gate 	 * tries, so it passed add_mfc(), but is discarded here. The v_ipif is
20740Sstevel@tonic-gate 	 * NULL so we don't want to check the ill. Still needed as of Mrouted
20750Sstevel@tonic-gate 	 * 3.6.
20760Sstevel@tonic-gate 	 */
20770Sstevel@tonic-gate 	if (vifi == NO_VIF) {
20780Sstevel@tonic-gate 		ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n",
20790Sstevel@tonic-gate 		    ill->ill_name));
20803448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
20815240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
20820Sstevel@tonic-gate 			    "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name);
20830Sstevel@tonic-gate 		}
20840Sstevel@tonic-gate 		return (-1);	/* drop pkt */
20850Sstevel@tonic-gate 	}
20860Sstevel@tonic-gate 
20873448Sdh155122 	if (!lock_good_vif(&ipst->ips_vifs[vifi]))
20880Sstevel@tonic-gate 		return (-1);
20890Sstevel@tonic-gate 	/*
20900Sstevel@tonic-gate 	 * The MFC entries are not cleaned up when an ipif goes
20910Sstevel@tonic-gate 	 * away thus this code has to guard against an MFC referencing
20920Sstevel@tonic-gate 	 * an ipif that has been closed. Note: reset_mrt_vif_ipif
20930Sstevel@tonic-gate 	 * sets the v_ipif to NULL when the ipif disappears.
20940Sstevel@tonic-gate 	 */
20953448Sdh155122 	ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL);
20963448Sdh155122 
20973448Sdh155122 	if (vifi >= ipst->ips_numvifs) {
20980Sstevel@tonic-gate 		cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs "
20990Sstevel@tonic-gate 		    "%d ill %s viftable ill %s\n",
21003448Sdh155122 		    (int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
21013448Sdh155122 		    ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
21023448Sdh155122 		unlock_good_vif(&ipst->ips_vifs[vifi]);
21030Sstevel@tonic-gate 		return (-1);
21040Sstevel@tonic-gate 	}
21050Sstevel@tonic-gate 	/*
21060Sstevel@tonic-gate 	 * Don't forward if it didn't arrive from the parent vif for its
21070Sstevel@tonic-gate 	 * origin. But do match on the groups as we nominate only one
21080Sstevel@tonic-gate 	 * ill in the group for receiving allmulti packets.
21090Sstevel@tonic-gate 	 */
21103448Sdh155122 	if ((ipst->ips_vifs[vifi].v_ipif->ipif_ill != ill &&
21110Sstevel@tonic-gate 	    (ill->ill_group == NULL ||
21123448Sdh155122 	    ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_group !=
21133448Sdh155122 		ill->ill_group)) ||
21143448Sdh155122 	    (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) {
21150Sstevel@tonic-gate 		/* Came in the wrong interface */
21160Sstevel@tonic-gate 		ip1dbg(("ip_mdq: arrived wrong if, vifi %d "
21170Sstevel@tonic-gate 			"numvifs %d ill %s viftable ill %s\n",
21183448Sdh155122 			(int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
21193448Sdh155122 			ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name));
21203448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
21215240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
21220Sstevel@tonic-gate 			    "ip_mdq: arrived wrong if, vifi %d ill "
21230Sstevel@tonic-gate 			    "%s viftable ill %s\n",
21240Sstevel@tonic-gate 			    (int)vifi, ill->ill_name,
21253448Sdh155122 			    ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
21260Sstevel@tonic-gate 		}
21273448Sdh155122 		ipst->ips_mrtstat->mrts_wrong_if++;
21280Sstevel@tonic-gate 		rt->mfc_wrong_if++;
21290Sstevel@tonic-gate 
21300Sstevel@tonic-gate 		/*
21310Sstevel@tonic-gate 		 * If we are doing PIM assert processing and we are forwarding
21320Sstevel@tonic-gate 		 * packets on this interface, and it is a broadcast medium
21330Sstevel@tonic-gate 		 * interface (and not a tunnel), send a message to the routing.
21340Sstevel@tonic-gate 		 *
21350Sstevel@tonic-gate 		 * We use the first ipif on the list, since it's all we have.
21360Sstevel@tonic-gate 		 * Chances are the ipif_flags are the same for ipifs on the ill.
21370Sstevel@tonic-gate 		 */
21383448Sdh155122 		if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 &&
21390Sstevel@tonic-gate 		    (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) &&
21403448Sdh155122 		    !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) {
21410Sstevel@tonic-gate 			mblk_t		*mp_copy;
21420Sstevel@tonic-gate 			struct igmpmsg	*im;
21430Sstevel@tonic-gate 
21440Sstevel@tonic-gate 			/* TODO could copy header and dup rest */
21450Sstevel@tonic-gate 			mp_copy = copymsg(mp);
21460Sstevel@tonic-gate 			if (mp_copy == NULL) {
21473448Sdh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
21480Sstevel@tonic-gate 				ip1dbg(("ip_mdq: out of memory "
21490Sstevel@tonic-gate 				    "for mblk, mp_copy\n"));
21503448Sdh155122 				unlock_good_vif(&ipst->ips_vifs[vifi]);
21510Sstevel@tonic-gate 				return (-1);
21520Sstevel@tonic-gate 			}
21530Sstevel@tonic-gate 
21540Sstevel@tonic-gate 			im = (struct igmpmsg *)mp_copy->b_rptr;
21550Sstevel@tonic-gate 			im->im_msgtype = IGMPMSG_WRONGVIF;
21560Sstevel@tonic-gate 			im->im_mbz = 0;
21570Sstevel@tonic-gate 			im->im_vif = (ushort_t)vifi;
21585240Snordmark 			/* Pass to RAWIP */
21595240Snordmark 			(mrouter->conn_recv)(mrouter, mp_copy, NULL);
21600Sstevel@tonic-gate 		}
21613448Sdh155122 		unlock_good_vif(&ipst->ips_vifs[vifi]);
21620Sstevel@tonic-gate 		if (tunnel_src != 0)
21630Sstevel@tonic-gate 			return (1);
21640Sstevel@tonic-gate 		else
21650Sstevel@tonic-gate 			return (0);
21660Sstevel@tonic-gate 	}
21670Sstevel@tonic-gate 	/*
21680Sstevel@tonic-gate 	 * If I sourced this packet, it counts as output, else it was input.
21690Sstevel@tonic-gate 	 */
21703448Sdh155122 	if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) {
21713448Sdh155122 		ipst->ips_vifs[vifi].v_pkt_out++;
21723448Sdh155122 		ipst->ips_vifs[vifi].v_bytes_out += plen;
21730Sstevel@tonic-gate 	} else {
21743448Sdh155122 		ipst->ips_vifs[vifi].v_pkt_in++;
21753448Sdh155122 		ipst->ips_vifs[vifi].v_bytes_in += plen;
21760Sstevel@tonic-gate 	}
21770Sstevel@tonic-gate 	mutex_enter(&rt->mfc_mutex);
21780Sstevel@tonic-gate 	rt->mfc_pkt_cnt++;
21790Sstevel@tonic-gate 	rt->mfc_byte_cnt += plen;
21800Sstevel@tonic-gate 	mutex_exit(&rt->mfc_mutex);
21813448Sdh155122 	unlock_good_vif(&ipst->ips_vifs[vifi]);
21820Sstevel@tonic-gate 	/*
21830Sstevel@tonic-gate 	 * For each vif, decide if a copy of the packet should be forwarded.
21840Sstevel@tonic-gate 	 * Forward if:
21850Sstevel@tonic-gate 	 *		- the vif threshold ttl is non-zero AND
21860Sstevel@tonic-gate 	 *		- the pkt ttl exceeds the vif's threshold
21870Sstevel@tonic-gate 	 * A non-zero mfc_ttl indicates that the vif is part of
21880Sstevel@tonic-gate 	 * the output set for the mfc entry.
21890Sstevel@tonic-gate 	 */
21903448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
21913448Sdh155122 	num_of_vifs = ipst->ips_numvifs;
21923448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
21933448Sdh155122 	for (vifp = ipst->ips_vifs, vifi = 0;
21943448Sdh155122 	    vifi < num_of_vifs;
21953448Sdh155122 	    vifp++, vifi++) {
21960Sstevel@tonic-gate 		if (!lock_good_vif(vifp))
21970Sstevel@tonic-gate 			continue;
21980Sstevel@tonic-gate 		if ((rt->mfc_ttls[vifi] > 0) &&
21990Sstevel@tonic-gate 		    (ipha->ipha_ttl > rt->mfc_ttls[vifi])) {
22000Sstevel@tonic-gate 			/*
22010Sstevel@tonic-gate 			 * lock_good_vif should not have succedded if
22020Sstevel@tonic-gate 			 * v_ipif is null.
22030Sstevel@tonic-gate 			 */
22040Sstevel@tonic-gate 			ASSERT(vifp->v_ipif != NULL);
22050Sstevel@tonic-gate 			vifp->v_pkt_out++;
22060Sstevel@tonic-gate 			vifp->v_bytes_out += plen;
22070Sstevel@tonic-gate 			MC_SEND(ipha, mp, vifp, dst);
22083448Sdh155122 			ipst->ips_mrtstat->mrts_fwd_out++;
22090Sstevel@tonic-gate 		}
22100Sstevel@tonic-gate 		unlock_good_vif(vifp);
22110Sstevel@tonic-gate 	}
22120Sstevel@tonic-gate 	if (tunnel_src != 0)
22130Sstevel@tonic-gate 		return (1);
22140Sstevel@tonic-gate 	else
22150Sstevel@tonic-gate 		return (0);
22160Sstevel@tonic-gate }
22170Sstevel@tonic-gate 
22180Sstevel@tonic-gate /*
22190Sstevel@tonic-gate  * Send the packet on physical interface.
22200Sstevel@tonic-gate  * Caller assumes can continue to use mp on return.
22210Sstevel@tonic-gate  */
22220Sstevel@tonic-gate /* ARGSUSED */
22230Sstevel@tonic-gate static void
22240Sstevel@tonic-gate phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
22250Sstevel@tonic-gate {
22260Sstevel@tonic-gate 	mblk_t 	*mp_copy;
22273448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
22285240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
22290Sstevel@tonic-gate 
22300Sstevel@tonic-gate 	/* Make a new reference to the packet */
22310Sstevel@tonic-gate 	mp_copy = copymsg(mp);	/* TODO could copy header and dup rest */
22320Sstevel@tonic-gate 	if (mp_copy == NULL) {
22333448Sdh155122 		ipst->ips_mrtstat->mrts_fwd_drop++;
22340Sstevel@tonic-gate 		ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n"));
22350Sstevel@tonic-gate 		return;
22360Sstevel@tonic-gate 	}
22370Sstevel@tonic-gate 	if (vifp->v_rate_limit <= 0)
22380Sstevel@tonic-gate 		tbf_send_packet(vifp, mp_copy);
22390Sstevel@tonic-gate 	else  {
22403448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
22415240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22420Sstevel@tonic-gate 			    "phyint_send: tbf_contr rate %d "
22430Sstevel@tonic-gate 			    "vifp 0x%p mp 0x%p dst 0x%x",
22440Sstevel@tonic-gate 			    vifp->v_rate_limit, (void *)vifp, (void *)mp, dst);
22450Sstevel@tonic-gate 		}
22460Sstevel@tonic-gate 		tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr);
22470Sstevel@tonic-gate 	}
22480Sstevel@tonic-gate }
22490Sstevel@tonic-gate 
22500Sstevel@tonic-gate /*
22510Sstevel@tonic-gate  * Send the whole packet for REGISTER encapsulation to PIM daemon
22520Sstevel@tonic-gate  * Caller assumes it can continue to use mp on return.
22530Sstevel@tonic-gate  */
22540Sstevel@tonic-gate /* ARGSUSED */
22550Sstevel@tonic-gate static void
22560Sstevel@tonic-gate register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
22570Sstevel@tonic-gate {
22580Sstevel@tonic-gate 	struct igmpmsg	*im;
22590Sstevel@tonic-gate 	mblk_t		*mp_copy;
22600Sstevel@tonic-gate 	ipha_t		*ipha_copy;
22613448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
22625240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
22633448Sdh155122 
22643448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
22655240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22660Sstevel@tonic-gate 		    "register_send: src %x, dst %x\n",
22670Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
22680Sstevel@tonic-gate 	}
22690Sstevel@tonic-gate 
22700Sstevel@tonic-gate 	/*
22710Sstevel@tonic-gate 	 * Copy the old packet & pullup its IP header into the new mblk_t so we
22720Sstevel@tonic-gate 	 * can modify it.  Try to fill the new mblk_t since if we don't the
22730Sstevel@tonic-gate 	 * ethernet driver will.
22740Sstevel@tonic-gate 	 */
22750Sstevel@tonic-gate 	mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED);
22760Sstevel@tonic-gate 	if (mp_copy == NULL) {
22773448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
22783448Sdh155122 		if (ipst->ips_ip_mrtdebug > 3) {
22795240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22800Sstevel@tonic-gate 			    "register_send: allocb failure.");
22810Sstevel@tonic-gate 		}
22820Sstevel@tonic-gate 		return;
22830Sstevel@tonic-gate 	}
22840Sstevel@tonic-gate 
22850Sstevel@tonic-gate 	/*
22860Sstevel@tonic-gate 	 * Bump write pointer to account for igmpmsg being added.
22870Sstevel@tonic-gate 	 */
22880Sstevel@tonic-gate 	mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg);
22890Sstevel@tonic-gate 
22900Sstevel@tonic-gate 	/*
22910Sstevel@tonic-gate 	 * Chain packet to new mblk_t.
22920Sstevel@tonic-gate 	 */
22930Sstevel@tonic-gate 	if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
22943448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
22953448Sdh155122 		if (ipst->ips_ip_mrtdebug > 3) {
22965240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22970Sstevel@tonic-gate 			    "register_send: copymsg failure.");
22980Sstevel@tonic-gate 		}
22990Sstevel@tonic-gate 		freeb(mp_copy);
23000Sstevel@tonic-gate 		return;
23010Sstevel@tonic-gate 	}
23020Sstevel@tonic-gate 
23030Sstevel@tonic-gate 	/*
23045240Snordmark 	 * icmp_input() asserts that IP version field is set to an
23050Sstevel@tonic-gate 	 * appropriate version. Hence, the struct igmpmsg that this really
23060Sstevel@tonic-gate 	 * becomes, needs to have the correct IP version field.
23070Sstevel@tonic-gate 	 */
23080Sstevel@tonic-gate 	ipha_copy = (ipha_t *)mp_copy->b_rptr;
23090Sstevel@tonic-gate 	*ipha_copy = multicast_encap_iphdr;
23100Sstevel@tonic-gate 
23110Sstevel@tonic-gate 	/*
23120Sstevel@tonic-gate 	 * The kernel uses the struct igmpmsg header to encode the messages to
23130Sstevel@tonic-gate 	 * the multicast routing daemon. Fill in the fields in the header
23140Sstevel@tonic-gate 	 * starting with the message type which is IGMPMSG_WHOLEPKT
23150Sstevel@tonic-gate 	 */
23160Sstevel@tonic-gate 	im = (struct igmpmsg *)mp_copy->b_rptr;
23170Sstevel@tonic-gate 	im->im_msgtype = IGMPMSG_WHOLEPKT;
23180Sstevel@tonic-gate 	im->im_src.s_addr = ipha->ipha_src;
23190Sstevel@tonic-gate 	im->im_dst.s_addr = ipha->ipha_dst;
23200Sstevel@tonic-gate 
23210Sstevel@tonic-gate 	/*
23220Sstevel@tonic-gate 	 * Must Be Zero. This is because the struct igmpmsg is really an IP
23230Sstevel@tonic-gate 	 * header with renamed fields and the multicast routing daemon uses
23240Sstevel@tonic-gate 	 * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages.
23250Sstevel@tonic-gate 	 */
23260Sstevel@tonic-gate 	im->im_mbz = 0;
23270Sstevel@tonic-gate 
23283448Sdh155122 	++ipst->ips_mrtstat->mrts_upcalls;
23295240Snordmark 	if (!canputnext(mrouter->conn_rq)) {
23303448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_regsend_drops;
23313448Sdh155122 		if (ipst->ips_ip_mrtdebug > 3) {
23325240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
23330Sstevel@tonic-gate 			    "register_send: register upcall failure.");
23340Sstevel@tonic-gate 		}
23350Sstevel@tonic-gate 		freemsg(mp_copy);
23360Sstevel@tonic-gate 	} else {
23375240Snordmark 		/* Pass to RAWIP */
23385240Snordmark 		(mrouter->conn_recv)(mrouter, mp_copy, NULL);
23390Sstevel@tonic-gate 	}
23400Sstevel@tonic-gate }
23410Sstevel@tonic-gate 
23420Sstevel@tonic-gate /*
23430Sstevel@tonic-gate  * pim_validate_cksum handles verification of the checksum in the
23440Sstevel@tonic-gate  * pim header.  For PIM Register packets, the checksum is calculated
23450Sstevel@tonic-gate  * across the PIM header only.  For all other packets, the checksum
23460Sstevel@tonic-gate  * is for the PIM header and remainder of the packet.
23470Sstevel@tonic-gate  *
23480Sstevel@tonic-gate  * returns: B_TRUE, if checksum is okay.
23490Sstevel@tonic-gate  *          B_FALSE, if checksum is not valid.
23500Sstevel@tonic-gate  */
23510Sstevel@tonic-gate static boolean_t
23520Sstevel@tonic-gate pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp)
23530Sstevel@tonic-gate {
23540Sstevel@tonic-gate 	mblk_t *mp_dup;
23550Sstevel@tonic-gate 
23560Sstevel@tonic-gate 	if ((mp_dup = dupmsg(mp)) == NULL)
23570Sstevel@tonic-gate 		return (B_FALSE);
23580Sstevel@tonic-gate 
23590Sstevel@tonic-gate 	mp_dup->b_rptr += IPH_HDR_LENGTH(ip);
23600Sstevel@tonic-gate 	if (pimp->pim_type == PIM_REGISTER)
23610Sstevel@tonic-gate 		mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN;
23620Sstevel@tonic-gate 	if (IP_CSUM(mp_dup, 0, 0)) {
23630Sstevel@tonic-gate 		freemsg(mp_dup);
23640Sstevel@tonic-gate 		return (B_FALSE);
23650Sstevel@tonic-gate 	}
23660Sstevel@tonic-gate 	freemsg(mp_dup);
23670Sstevel@tonic-gate 	return (B_TRUE);
23680Sstevel@tonic-gate }
23690Sstevel@tonic-gate 
23700Sstevel@tonic-gate /*
23710Sstevel@tonic-gate  * int
23723448Sdh155122  * pim_input(queue_t *, mblk_t *, ill_t *ill) - Process PIM protocol packets.
23730Sstevel@tonic-gate  *	IP Protocol 103. Register messages are decapsulated and sent
23740Sstevel@tonic-gate  *	onto multicast forwarding.
23750Sstevel@tonic-gate  */
23760Sstevel@tonic-gate int
23773448Sdh155122 pim_input(queue_t *q, mblk_t *mp, ill_t *ill)
23780Sstevel@tonic-gate {
23790Sstevel@tonic-gate 	ipha_t		*eip, *ip;
23800Sstevel@tonic-gate 	int		iplen, pimlen, iphlen;
23810Sstevel@tonic-gate 	struct pim	*pimp;	/* pointer to a pim struct */
23820Sstevel@tonic-gate 	uint32_t	*reghdr;
23833448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
23845240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
23850Sstevel@tonic-gate 
23860Sstevel@tonic-gate 	/*
23870Sstevel@tonic-gate 	 * Pullup the msg for PIM protocol processing.
23880Sstevel@tonic-gate 	 */
23890Sstevel@tonic-gate 	if (pullupmsg(mp, -1) == 0) {
23903448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
23910Sstevel@tonic-gate 		freemsg(mp);
23920Sstevel@tonic-gate 		return (-1);
23930Sstevel@tonic-gate 	}
23940Sstevel@tonic-gate 
23950Sstevel@tonic-gate 	ip = (ipha_t *)mp->b_rptr;
23960Sstevel@tonic-gate 	iplen = ip->ipha_length;
23970Sstevel@tonic-gate 	iphlen = IPH_HDR_LENGTH(ip);
23980Sstevel@tonic-gate 	pimlen = ntohs(iplen) - iphlen;
23990Sstevel@tonic-gate 
24000Sstevel@tonic-gate 	/*
24010Sstevel@tonic-gate 	 * Validate lengths
24020Sstevel@tonic-gate 	 */
24030Sstevel@tonic-gate 	if (pimlen < PIM_MINLEN) {
24043448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_malformed;
24053448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
24065240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24070Sstevel@tonic-gate 			    "pim_input: length not at least minlen");
24080Sstevel@tonic-gate 		}
24090Sstevel@tonic-gate 		freemsg(mp);
24100Sstevel@tonic-gate 		return (-1);
24110Sstevel@tonic-gate 	}
24120Sstevel@tonic-gate 
24130Sstevel@tonic-gate 	/*
24140Sstevel@tonic-gate 	 * Point to the PIM header.
24150Sstevel@tonic-gate 	 */
24160Sstevel@tonic-gate 	pimp = (struct pim *)((caddr_t)ip + iphlen);
24170Sstevel@tonic-gate 
24180Sstevel@tonic-gate 	/*
24190Sstevel@tonic-gate 	 * Check the version number.
24200Sstevel@tonic-gate 	 */
24210Sstevel@tonic-gate 	if (pimp->pim_vers != PIM_VERSION) {
24223448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_badversion;
24233448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
24245240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24250Sstevel@tonic-gate 			    "pim_input: unknown version of PIM");
24260Sstevel@tonic-gate 		}
24270Sstevel@tonic-gate 		freemsg(mp);
24280Sstevel@tonic-gate 		return (-1);
24290Sstevel@tonic-gate 	}
24300Sstevel@tonic-gate 
24310Sstevel@tonic-gate 	/*
24320Sstevel@tonic-gate 	 * Validate the checksum
24330Sstevel@tonic-gate 	 */
24340Sstevel@tonic-gate 	if (!pim_validate_cksum(mp, ip, pimp)) {
24353448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_rcv_badcsum;
24363448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
24375240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24380Sstevel@tonic-gate 			    "pim_input: invalid checksum");
24390Sstevel@tonic-gate 		}
24400Sstevel@tonic-gate 		freemsg(mp);
24410Sstevel@tonic-gate 		return (-1);
24420Sstevel@tonic-gate 	}
24430Sstevel@tonic-gate 
24440Sstevel@tonic-gate 	if (pimp->pim_type != PIM_REGISTER)
24450Sstevel@tonic-gate 		return (0);
24460Sstevel@tonic-gate 
24470Sstevel@tonic-gate 	reghdr = (uint32_t *)(pimp + 1);
24480Sstevel@tonic-gate 	eip = (ipha_t *)(reghdr + 1);
24490Sstevel@tonic-gate 
24500Sstevel@tonic-gate 	/*
24510Sstevel@tonic-gate 	 * check if the inner packet is destined to mcast group
24520Sstevel@tonic-gate 	 */
24530Sstevel@tonic-gate 	if (!CLASSD(eip->ipha_dst)) {
24543448Sdh155122 		++ipst->ips_mrtstat->mrts_pim_badregisters;
24553448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
24565240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24570Sstevel@tonic-gate 			    "pim_input: Inner pkt not mcast .. !");
24580Sstevel@tonic-gate 		}
24590Sstevel@tonic-gate 		freemsg(mp);
24600Sstevel@tonic-gate 		return (-1);
24610Sstevel@tonic-gate 	}
24623448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
24635240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24640Sstevel@tonic-gate 		    "register from %x, to %x, len %d",
24650Sstevel@tonic-gate 		    ntohl(eip->ipha_src),
24660Sstevel@tonic-gate 		    ntohl(eip->ipha_dst),
24670Sstevel@tonic-gate 		    ntohs(eip->ipha_length));
24680Sstevel@tonic-gate 	}
24690Sstevel@tonic-gate 	/*
24700Sstevel@tonic-gate 	 * If the null register bit is not set, decapsulate
24710Sstevel@tonic-gate 	 * the packet before forwarding it.
24720Sstevel@tonic-gate 	 */
24730Sstevel@tonic-gate 	if (!(ntohl(*reghdr) & PIM_NULL_REGISTER)) {
24740Sstevel@tonic-gate 		mblk_t *mp_copy;
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 		/* Copy the message */
24770Sstevel@tonic-gate 		if ((mp_copy = copymsg(mp)) == NULL) {
24783448Sdh155122 			++ipst->ips_mrtstat->mrts_pim_nomemory;
24790Sstevel@tonic-gate 			freemsg(mp);
24800Sstevel@tonic-gate 			return (-1);
24810Sstevel@tonic-gate 		}
24820Sstevel@tonic-gate 
24830Sstevel@tonic-gate 		/*
24840Sstevel@tonic-gate 		 * Decapsulate the packet and give it to
24850Sstevel@tonic-gate 		 * register_mforward.
24860Sstevel@tonic-gate 		 */
24870Sstevel@tonic-gate 		mp_copy->b_rptr += iphlen + sizeof (pim_t) +
24880Sstevel@tonic-gate 		    sizeof (*reghdr);
24893448Sdh155122 		if (register_mforward(q, mp_copy, ill) != 0) {
24900Sstevel@tonic-gate 			freemsg(mp);
24910Sstevel@tonic-gate 			return (-1);
24920Sstevel@tonic-gate 		}
24930Sstevel@tonic-gate 	}
24940Sstevel@tonic-gate 
24950Sstevel@tonic-gate 	/*
24960Sstevel@tonic-gate 	 * Pass all valid PIM packets up to any process(es) listening on a raw
24970Sstevel@tonic-gate 	 * PIM socket. For Solaris it is done right after pim_input() is
24980Sstevel@tonic-gate 	 * called.
24990Sstevel@tonic-gate 	 */
25000Sstevel@tonic-gate 	return (0);
25010Sstevel@tonic-gate }
25020Sstevel@tonic-gate 
25030Sstevel@tonic-gate /*
25040Sstevel@tonic-gate  * PIM sparse mode hook.  Called by pim_input after decapsulating
25050Sstevel@tonic-gate  * the packet. Loop back the packet, as if we have received it.
25060Sstevel@tonic-gate  * In pim_input() we have to check if the destination is a multicast address.
25070Sstevel@tonic-gate  */
25080Sstevel@tonic-gate /* ARGSUSED */
25090Sstevel@tonic-gate static int
25103448Sdh155122 register_mforward(queue_t *q, mblk_t *mp, ill_t *ill)
25110Sstevel@tonic-gate {
25123448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
25135240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
25143448Sdh155122 
25153448Sdh155122 	ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs);
25163448Sdh155122 
25173448Sdh155122 	if (ipst->ips_ip_mrtdebug > 3) {
25180Sstevel@tonic-gate 		ipha_t *ipha;
25190Sstevel@tonic-gate 
25200Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
25215240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25220Sstevel@tonic-gate 		    "register_mforward: src %x, dst %x\n",
25230Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
25240Sstevel@tonic-gate 	}
25250Sstevel@tonic-gate 	/*
25260Sstevel@tonic-gate 	 * Need to pass in to ip_mforward() the information that the
25270Sstevel@tonic-gate 	 * packet has arrived on the register_vif. We use the solution that
25280Sstevel@tonic-gate 	 * ip_mroute_decap() employs: use mp->b_prev to pass some information
25290Sstevel@tonic-gate 	 * to ip_mforward(). Nonzero value means the packet has arrived on a
25300Sstevel@tonic-gate 	 * tunnel (ip_mroute_decap() puts the address of the other side of the
25310Sstevel@tonic-gate 	 * tunnel there.) This is safe since ip_rput() either frees the packet
25320Sstevel@tonic-gate 	 * or passes it to ip_mforward(). We use
25330Sstevel@tonic-gate 	 * PIM_REGISTER_MARKER = 0xffffffff to indicate the has arrived on the
25340Sstevel@tonic-gate 	 * register vif. If in the future we have more than one register vifs,
25350Sstevel@tonic-gate 	 * then this will need re-examination.
25360Sstevel@tonic-gate 	 */
25370Sstevel@tonic-gate 	mp->b_prev = (mblk_t *)PIM_REGISTER_MARKER;
25383448Sdh155122 	++ipst->ips_mrtstat->mrts_pim_regforwards;
25390Sstevel@tonic-gate 	ip_rput(q, mp);
25400Sstevel@tonic-gate 	return (0);
25410Sstevel@tonic-gate }
25420Sstevel@tonic-gate 
25430Sstevel@tonic-gate /*
25440Sstevel@tonic-gate  * Send an encapsulated packet.
25450Sstevel@tonic-gate  * Caller assumes can continue to use mp when routine returns.
25460Sstevel@tonic-gate  */
25470Sstevel@tonic-gate /* ARGSUSED */
25480Sstevel@tonic-gate static void
25490Sstevel@tonic-gate encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
25500Sstevel@tonic-gate {
25510Sstevel@tonic-gate 	mblk_t 	*mp_copy;
25520Sstevel@tonic-gate 	ipha_t 	*ipha_copy;
25530Sstevel@tonic-gate 	size_t	len;
25543448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
25555240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
25563448Sdh155122 
25573448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
25585240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25593448Sdh155122 		    "encap_send: vif %ld enter",
25603448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs));
25610Sstevel@tonic-gate 	}
25620Sstevel@tonic-gate 	len = ntohs(ipha->ipha_length);
25630Sstevel@tonic-gate 
25640Sstevel@tonic-gate 	/*
25650Sstevel@tonic-gate 	 * Copy the old packet & pullup it's IP header into the
25660Sstevel@tonic-gate 	 * new mbuf so we can modify it.  Try to fill the new
25670Sstevel@tonic-gate 	 * mbuf since if we don't the ethernet driver will.
25680Sstevel@tonic-gate 	 */
25690Sstevel@tonic-gate 	mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED);
25700Sstevel@tonic-gate 	if (mp_copy == NULL)
25710Sstevel@tonic-gate 		return;
25720Sstevel@tonic-gate 	mp_copy->b_rptr += 32;
25730Sstevel@tonic-gate 	mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr);
25740Sstevel@tonic-gate 	if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
25750Sstevel@tonic-gate 		freeb(mp_copy);
25760Sstevel@tonic-gate 		return;
25770Sstevel@tonic-gate 	}
25780Sstevel@tonic-gate 
25790Sstevel@tonic-gate 	/*
25800Sstevel@tonic-gate 	 * Fill in the encapsulating IP header.
25810Sstevel@tonic-gate 	 * Remote tunnel dst in rmt_addr, from add_vif().
25820Sstevel@tonic-gate 	 */
25830Sstevel@tonic-gate 	ipha_copy = (ipha_t *)mp_copy->b_rptr;
25840Sstevel@tonic-gate 	*ipha_copy = multicast_encap_iphdr;
25850Sstevel@tonic-gate 	ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET);
25860Sstevel@tonic-gate 	ipha_copy->ipha_length = htons(len + sizeof (ipha_t));
25870Sstevel@tonic-gate 	ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr;
25880Sstevel@tonic-gate 	ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr;
25890Sstevel@tonic-gate 	ASSERT(ipha_copy->ipha_ident == 0);
25900Sstevel@tonic-gate 
25910Sstevel@tonic-gate 	/* Turn the encapsulated IP header back into a valid one. */
25920Sstevel@tonic-gate 	ipha = (ipha_t *)mp_copy->b_cont->b_rptr;
25930Sstevel@tonic-gate 	ipha->ipha_ttl--;
25940Sstevel@tonic-gate 	ipha->ipha_hdr_checksum = 0;
25950Sstevel@tonic-gate 	ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
25960Sstevel@tonic-gate 
25973448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
25985240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25990Sstevel@tonic-gate 		    "encap_send: group 0x%x", ntohl(ipha->ipha_dst));
26000Sstevel@tonic-gate 	}
26010Sstevel@tonic-gate 	if (vifp->v_rate_limit <= 0)
26020Sstevel@tonic-gate 		tbf_send_packet(vifp, mp_copy);
26030Sstevel@tonic-gate 	else
26040Sstevel@tonic-gate 		/* ipha is from the original header */
26050Sstevel@tonic-gate 		tbf_control(vifp, mp_copy, ipha);
26060Sstevel@tonic-gate }
26070Sstevel@tonic-gate 
26080Sstevel@tonic-gate /*
26090Sstevel@tonic-gate  * De-encapsulate a packet and feed it back through IP input.
26100Sstevel@tonic-gate  * This routine is called whenever IP gets a packet with prototype
26110Sstevel@tonic-gate  * IPPROTO_ENCAP and a local destination address.
26120Sstevel@tonic-gate  */
26130Sstevel@tonic-gate void
26143448Sdh155122 ip_mroute_decap(queue_t *q, mblk_t *mp, ill_t *ill)
26150Sstevel@tonic-gate {
26160Sstevel@tonic-gate 	ipha_t		*ipha = (ipha_t *)mp->b_rptr;
26170Sstevel@tonic-gate 	ipha_t		*ipha_encap;
26180Sstevel@tonic-gate 	int		hlen = IPH_HDR_LENGTH(ipha);
26190Sstevel@tonic-gate 	ipaddr_t	src;
26200Sstevel@tonic-gate 	struct vif	*vifp;
26213448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
26225240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
26230Sstevel@tonic-gate 
26240Sstevel@tonic-gate 	/*
26250Sstevel@tonic-gate 	 * Dump the packet if it's not to a multicast destination or if
26260Sstevel@tonic-gate 	 * we don't have an encapsulating tunnel with the source.
26270Sstevel@tonic-gate 	 * Note:  This code assumes that the remote site IP address
26280Sstevel@tonic-gate 	 * uniquely identifies the tunnel (i.e., that this site has
26290Sstevel@tonic-gate 	 * at most one tunnel with the remote site).
26300Sstevel@tonic-gate 	 */
26310Sstevel@tonic-gate 	ipha_encap = (ipha_t *)((char *)ipha + hlen);
26320Sstevel@tonic-gate 	if (!CLASSD(ipha_encap->ipha_dst)) {
26333448Sdh155122 		ipst->ips_mrtstat->mrts_bad_tunnel++;
26340Sstevel@tonic-gate 		ip1dbg(("ip_mroute_decap: bad tunnel\n"));
26350Sstevel@tonic-gate 		freemsg(mp);
26360Sstevel@tonic-gate 		return;
26370Sstevel@tonic-gate 	}
26380Sstevel@tonic-gate 	src = (ipaddr_t)ipha->ipha_src;
26393448Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
26403448Sdh155122 	if (src != ipst->ips_last_encap_src) {
26410Sstevel@tonic-gate 		struct vif *vife;
26420Sstevel@tonic-gate 
26433448Sdh155122 		vifp = ipst->ips_vifs;
26443448Sdh155122 		vife = vifp + ipst->ips_numvifs;
26453448Sdh155122 		ipst->ips_last_encap_src = src;
26463448Sdh155122 		ipst->ips_last_encap_vif = 0;
26470Sstevel@tonic-gate 		for (; vifp < vife; ++vifp) {
26480Sstevel@tonic-gate 			if (!lock_good_vif(vifp))
26490Sstevel@tonic-gate 				continue;
26500Sstevel@tonic-gate 			if (vifp->v_rmt_addr.s_addr == src) {
26510Sstevel@tonic-gate 				if (vifp->v_flags & VIFF_TUNNEL)
26523448Sdh155122 					ipst->ips_last_encap_vif = vifp;
26533448Sdh155122 				if (ipst->ips_ip_mrtdebug > 1) {
26545240Snordmark 					(void) mi_strlog(mrouter->conn_rq,
26550Sstevel@tonic-gate 					    1, SL_TRACE,
26560Sstevel@tonic-gate 					    "ip_mroute_decap: good tun "
26570Sstevel@tonic-gate 					    "vif %ld with %x",
26583448Sdh155122 					    (ptrdiff_t)(vifp - ipst->ips_vifs),
26590Sstevel@tonic-gate 					    ntohl(src));
26600Sstevel@tonic-gate 				}
26610Sstevel@tonic-gate 				unlock_good_vif(vifp);
26620Sstevel@tonic-gate 				break;
26630Sstevel@tonic-gate 			}
26640Sstevel@tonic-gate 			unlock_good_vif(vifp);
26650Sstevel@tonic-gate 		}
26660Sstevel@tonic-gate 	}
26673448Sdh155122 	if ((vifp = ipst->ips_last_encap_vif) == 0) {
26683448Sdh155122 		mutex_exit(&ipst->ips_last_encap_lock);
26693448Sdh155122 		ipst->ips_mrtstat->mrts_bad_tunnel++;
26700Sstevel@tonic-gate 		freemsg(mp);
26710Sstevel@tonic-gate 		ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n",
26723448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src)));
26730Sstevel@tonic-gate 		return;
26740Sstevel@tonic-gate 	}
26753448Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
26760Sstevel@tonic-gate 
26770Sstevel@tonic-gate 	/*
26780Sstevel@tonic-gate 	 * Need to pass in the tunnel source to ip_mforward (so that it can
26790Sstevel@tonic-gate 	 * verify that the packet arrived over the correct vif.)  We use b_prev
26800Sstevel@tonic-gate 	 * to pass this information. This is safe since the ip_rput either
26810Sstevel@tonic-gate 	 * frees the packet or passes it to ip_mforward.
26820Sstevel@tonic-gate 	 */
26830Sstevel@tonic-gate 	mp->b_prev = (mblk_t *)(uintptr_t)src;
26840Sstevel@tonic-gate 	mp->b_rptr += hlen;
26850Sstevel@tonic-gate 	/* Feed back into ip_rput as an M_DATA. */
26860Sstevel@tonic-gate 	ip_rput(q, mp);
26870Sstevel@tonic-gate }
26880Sstevel@tonic-gate 
26890Sstevel@tonic-gate /*
26900Sstevel@tonic-gate  * Remove all records with v_ipif == ipif.  Called when an interface goes away
26910Sstevel@tonic-gate  * (stream closed).  Called as writer.
26920Sstevel@tonic-gate  */
26930Sstevel@tonic-gate void
26940Sstevel@tonic-gate reset_mrt_vif_ipif(ipif_t *ipif)
26950Sstevel@tonic-gate {
26960Sstevel@tonic-gate 	vifi_t vifi, tmp_vifi;
26970Sstevel@tonic-gate 	vifi_t num_of_vifs;
26983448Sdh155122 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
26990Sstevel@tonic-gate 
27000Sstevel@tonic-gate 	/* Can't check vifi >= 0 since vifi_t is unsigned! */
27010Sstevel@tonic-gate 
27023448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
27033448Sdh155122 	num_of_vifs = ipst->ips_numvifs;
27043448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
27050Sstevel@tonic-gate 
27060Sstevel@tonic-gate 	for (vifi = num_of_vifs; vifi != 0; vifi--) {
27070Sstevel@tonic-gate 		tmp_vifi = vifi - 1;
27083448Sdh155122 		if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) {
27093448Sdh155122 			(void) del_vif(&tmp_vifi, NULL, NULL, ipst);
27100Sstevel@tonic-gate 		}
27110Sstevel@tonic-gate 	}
27120Sstevel@tonic-gate }
27130Sstevel@tonic-gate 
27140Sstevel@tonic-gate /* Remove pending upcall msgs when ill goes away.  Called by ill_delete.  */
27150Sstevel@tonic-gate void
27160Sstevel@tonic-gate reset_mrt_ill(ill_t *ill)
27170Sstevel@tonic-gate {
27180Sstevel@tonic-gate 	struct mfc		*rt;
27190Sstevel@tonic-gate 	struct rtdetq	*rte;
27200Sstevel@tonic-gate 	int			i;
27213448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
27225240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
27230Sstevel@tonic-gate 
27240Sstevel@tonic-gate 	for (i = 0; i < MFCTBLSIZ; i++) {
27253448Sdh155122 		MFCB_REFHOLD(&ipst->ips_mfcs[i]);
27263448Sdh155122 		if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) {
27273448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
27285240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
27293448Sdh155122 				    SL_TRACE,
27300Sstevel@tonic-gate 				    "reset_mrt_ill: mfctable [%d]", i);
27310Sstevel@tonic-gate 			}
27320Sstevel@tonic-gate 			while (rt != NULL) {
27330Sstevel@tonic-gate 				mutex_enter(&rt->mfc_mutex);
27340Sstevel@tonic-gate 				while ((rte = rt->mfc_rte) != NULL) {
27350Sstevel@tonic-gate 					if (rte->ill == ill) {
27363448Sdh155122 						if (ipst->ips_ip_mrtdebug > 1) {
27373448Sdh155122 						(void) mi_strlog(
27385240Snordmark 						    mrouter->conn_rq,
27393448Sdh155122 						    1, SL_TRACE,
27403448Sdh155122 						    "reset_mrt_ill: "
2741*7240Srh87107 						    "ill 0x%p", (void *)ill);
27420Sstevel@tonic-gate 						}
27430Sstevel@tonic-gate 						rt->mfc_rte = rte->rte_next;
27440Sstevel@tonic-gate 						freemsg(rte->mp);
27450Sstevel@tonic-gate 						mi_free((char *)rte);
27460Sstevel@tonic-gate 					}
27470Sstevel@tonic-gate 				}
27480Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
27490Sstevel@tonic-gate 				rt = rt->mfc_next;
27500Sstevel@tonic-gate 			}
27510Sstevel@tonic-gate 		}
27523448Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
27530Sstevel@tonic-gate 	}
27540Sstevel@tonic-gate }
27550Sstevel@tonic-gate 
27560Sstevel@tonic-gate /*
27570Sstevel@tonic-gate  * Token bucket filter module.
27580Sstevel@tonic-gate  * The ipha is for mcastgrp destination for phyint and encap.
27590Sstevel@tonic-gate  */
27600Sstevel@tonic-gate static void
27610Sstevel@tonic-gate tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha)
27620Sstevel@tonic-gate {
27630Sstevel@tonic-gate 	size_t 	p_len =  msgdsize(mp);
27640Sstevel@tonic-gate 	struct tbf	*t    = vifp->v_tbf;
27650Sstevel@tonic-gate 	timeout_id_t id = 0;
27663448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
27675240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
27680Sstevel@tonic-gate 
27690Sstevel@tonic-gate 	/* Drop if packet is too large */
27700Sstevel@tonic-gate 	if (p_len > MAX_BKT_SIZE) {
27713448Sdh155122 		ipst->ips_mrtstat->mrts_pkt2large++;
27720Sstevel@tonic-gate 		freemsg(mp);
27730Sstevel@tonic-gate 		return;
27740Sstevel@tonic-gate 	}
27753448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
27765240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
27770Sstevel@tonic-gate 		    "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x",
27783448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len,
27790Sstevel@tonic-gate 		    ntohl(ipha->ipha_dst));
27800Sstevel@tonic-gate 	}
27810Sstevel@tonic-gate 
27820Sstevel@tonic-gate 	mutex_enter(&t->tbf_lock);
27830Sstevel@tonic-gate 
27840Sstevel@tonic-gate 	tbf_update_tokens(vifp);
27850Sstevel@tonic-gate 
27860Sstevel@tonic-gate 	/*
27870Sstevel@tonic-gate 	 * If there are enough tokens,
27880Sstevel@tonic-gate 	 * and the queue is empty, send this packet out.
27890Sstevel@tonic-gate 	 */
27903448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
27915240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
27920Sstevel@tonic-gate 		    "tbf_control: vif %ld, TOKENS  %d, pkt len  %lu, qlen  %d",
27933448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len,
27940Sstevel@tonic-gate 		    t->tbf_q_len);
27950Sstevel@tonic-gate 	}
27960Sstevel@tonic-gate 	/* No packets are queued */
27970Sstevel@tonic-gate 	if (t->tbf_q_len == 0) {
27980Sstevel@tonic-gate 		/* queue empty, send packet if enough tokens */
27990Sstevel@tonic-gate 		if (p_len <= t->tbf_n_tok) {
28000Sstevel@tonic-gate 			t->tbf_n_tok -= p_len;
28010Sstevel@tonic-gate 			mutex_exit(&t->tbf_lock);
28020Sstevel@tonic-gate 			tbf_send_packet(vifp, mp);
28030Sstevel@tonic-gate 			return;
28040Sstevel@tonic-gate 		} else {
28050Sstevel@tonic-gate 			/* Queue packet and timeout till later */
28060Sstevel@tonic-gate 			tbf_queue(vifp, mp);
28070Sstevel@tonic-gate 			ASSERT(vifp->v_timeout_id == 0);
28080Sstevel@tonic-gate 			vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
28090Sstevel@tonic-gate 			    TBF_REPROCESS);
28100Sstevel@tonic-gate 		}
28110Sstevel@tonic-gate 	} else if (t->tbf_q_len < t->tbf_max_q_len) {
28120Sstevel@tonic-gate 		/* Finite queue length, so queue pkts and process queue */
28130Sstevel@tonic-gate 		tbf_queue(vifp, mp);
28140Sstevel@tonic-gate 		tbf_process_q(vifp);
28150Sstevel@tonic-gate 	} else {
28160Sstevel@tonic-gate 		/* Check that we have UDP header with IP header */
28170Sstevel@tonic-gate 		size_t hdr_length = IPH_HDR_LENGTH(ipha) +
28185240Snordmark 		    sizeof (struct udphdr);
28190Sstevel@tonic-gate 
28200Sstevel@tonic-gate 		if ((mp->b_wptr - mp->b_rptr) < hdr_length) {
28210Sstevel@tonic-gate 			if (!pullupmsg(mp, hdr_length)) {
28220Sstevel@tonic-gate 				freemsg(mp);
28230Sstevel@tonic-gate 				ip1dbg(("tbf_ctl: couldn't pullup udp hdr, "
28240Sstevel@tonic-gate 				    "vif %ld src 0x%x dst 0x%x\n",
28253448Sdh155122 				    (ptrdiff_t)(vifp - ipst->ips_vifs),
28260Sstevel@tonic-gate 				    ntohl(ipha->ipha_src),
28270Sstevel@tonic-gate 				    ntohl(ipha->ipha_dst)));
28280Sstevel@tonic-gate 				mutex_exit(&vifp->v_tbf->tbf_lock);
28290Sstevel@tonic-gate 				return;
28300Sstevel@tonic-gate 			} else
28310Sstevel@tonic-gate 				/* Have to reassign ipha after pullupmsg */
28320Sstevel@tonic-gate 				ipha = (ipha_t *)mp->b_rptr;
28330Sstevel@tonic-gate 		}
28340Sstevel@tonic-gate 		/*
28350Sstevel@tonic-gate 		 * Queue length too much,
28360Sstevel@tonic-gate 		 * try to selectively dq, or queue and process
28370Sstevel@tonic-gate 		 */
28380Sstevel@tonic-gate 		if (!tbf_dq_sel(vifp, ipha)) {
28393448Sdh155122 			ipst->ips_mrtstat->mrts_q_overflow++;
28400Sstevel@tonic-gate 			freemsg(mp);
28410Sstevel@tonic-gate 		} else {
28420Sstevel@tonic-gate 			tbf_queue(vifp, mp);
28430Sstevel@tonic-gate 			tbf_process_q(vifp);
28440Sstevel@tonic-gate 		}
28450Sstevel@tonic-gate 	}
28460Sstevel@tonic-gate 	if (t->tbf_q_len == 0) {
28470Sstevel@tonic-gate 		id = vifp->v_timeout_id;
28480Sstevel@tonic-gate 		vifp->v_timeout_id = 0;
28490Sstevel@tonic-gate 	}
28500Sstevel@tonic-gate 	mutex_exit(&vifp->v_tbf->tbf_lock);
28510Sstevel@tonic-gate 	if (id != 0)
28520Sstevel@tonic-gate 		(void) untimeout(id);
28530Sstevel@tonic-gate }
28540Sstevel@tonic-gate 
28550Sstevel@tonic-gate /*
28560Sstevel@tonic-gate  * Adds a packet to the tbf queue at the interface.
28570Sstevel@tonic-gate  * The ipha is for mcastgrp destination for phyint and encap.
28580Sstevel@tonic-gate  */
28590Sstevel@tonic-gate static void
28600Sstevel@tonic-gate tbf_queue(struct vif *vifp, mblk_t *mp)
28610Sstevel@tonic-gate {
28620Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
28633448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
28645240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
28653448Sdh155122 
28663448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
28675240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
28683448Sdh155122 		    "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs));
28690Sstevel@tonic-gate 	}
28700Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
28710Sstevel@tonic-gate 
28720Sstevel@tonic-gate 	if (t->tbf_t == NULL) {
28730Sstevel@tonic-gate 		/* Queue was empty */
28740Sstevel@tonic-gate 		t->tbf_q = mp;
28750Sstevel@tonic-gate 	} else {
28760Sstevel@tonic-gate 		/* Insert at tail */
28770Sstevel@tonic-gate 		t->tbf_t->b_next = mp;
28780Sstevel@tonic-gate 	}
28790Sstevel@tonic-gate 	/* set new tail pointer */
28800Sstevel@tonic-gate 	t->tbf_t = mp;
28810Sstevel@tonic-gate 
28820Sstevel@tonic-gate 	mp->b_next = mp->b_prev = NULL;
28830Sstevel@tonic-gate 
28840Sstevel@tonic-gate 	t->tbf_q_len++;
28850Sstevel@tonic-gate }
28860Sstevel@tonic-gate 
28870Sstevel@tonic-gate /*
28880Sstevel@tonic-gate  * Process the queue at the vif interface.
28890Sstevel@tonic-gate  * Drops the tbf_lock when sending packets.
28900Sstevel@tonic-gate  *
28910Sstevel@tonic-gate  * NOTE : The caller should quntimeout if the queue length is 0.
28920Sstevel@tonic-gate  */
28930Sstevel@tonic-gate static void
28940Sstevel@tonic-gate tbf_process_q(struct vif *vifp)
28950Sstevel@tonic-gate {
28960Sstevel@tonic-gate 	mblk_t	*mp;
28970Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
28980Sstevel@tonic-gate 	size_t	len;
28993448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
29005240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
29013448Sdh155122 
29023448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
29035240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
29040Sstevel@tonic-gate 		    "tbf_process_q 1: vif %ld qlen = %d",
29053448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len);
29060Sstevel@tonic-gate 	}
29070Sstevel@tonic-gate 
29080Sstevel@tonic-gate 	/*
29090Sstevel@tonic-gate 	 * Loop through the queue at the interface and send
29100Sstevel@tonic-gate 	 * as many packets as possible.
29110Sstevel@tonic-gate 	 */
29120Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
29130Sstevel@tonic-gate 
29140Sstevel@tonic-gate 	while (t->tbf_q_len > 0) {
29150Sstevel@tonic-gate 		mp = t->tbf_q;
29160Sstevel@tonic-gate 		len = (size_t)msgdsize(mp); /* length of ip pkt */
29170Sstevel@tonic-gate 
29180Sstevel@tonic-gate 		/* Determine if the packet can be sent */
29190Sstevel@tonic-gate 		if (len <= t->tbf_n_tok) {
29200Sstevel@tonic-gate 			/*
29210Sstevel@tonic-gate 			 * If so, reduce no. of tokens, dequeue the packet,
29220Sstevel@tonic-gate 			 * send the packet.
29230Sstevel@tonic-gate 			 */
29240Sstevel@tonic-gate 			t->tbf_n_tok -= len;
29250Sstevel@tonic-gate 
29260Sstevel@tonic-gate 			t->tbf_q = mp->b_next;
29270Sstevel@tonic-gate 			if (--t->tbf_q_len == 0) {
29280Sstevel@tonic-gate 				t->tbf_t = NULL;
29290Sstevel@tonic-gate 			}
29300Sstevel@tonic-gate 			mp->b_next = NULL;
29310Sstevel@tonic-gate 			/* Exit mutex before sending packet, then re-enter */
29320Sstevel@tonic-gate 			mutex_exit(&t->tbf_lock);
29330Sstevel@tonic-gate 			tbf_send_packet(vifp, mp);
29340Sstevel@tonic-gate 			mutex_enter(&t->tbf_lock);
29350Sstevel@tonic-gate 		} else
29360Sstevel@tonic-gate 			break;
29370Sstevel@tonic-gate 	}
29380Sstevel@tonic-gate }
29390Sstevel@tonic-gate 
29400Sstevel@tonic-gate /* Called at tbf timeout to update tokens, process q and reset timer.  */
29410Sstevel@tonic-gate static void
29420Sstevel@tonic-gate tbf_reprocess_q(void *arg)
29430Sstevel@tonic-gate {
29440Sstevel@tonic-gate 	struct vif *vifp = arg;
29453448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
29465240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
29470Sstevel@tonic-gate 
29480Sstevel@tonic-gate 	mutex_enter(&vifp->v_tbf->tbf_lock);
29490Sstevel@tonic-gate 	vifp->v_timeout_id = 0;
29500Sstevel@tonic-gate 	tbf_update_tokens(vifp);
29510Sstevel@tonic-gate 
29520Sstevel@tonic-gate 	tbf_process_q(vifp);
29530Sstevel@tonic-gate 
29540Sstevel@tonic-gate 	if (vifp->v_tbf->tbf_q_len > 0) {
29550Sstevel@tonic-gate 		vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
29560Sstevel@tonic-gate 		    TBF_REPROCESS);
29570Sstevel@tonic-gate 	}
29580Sstevel@tonic-gate 	mutex_exit(&vifp->v_tbf->tbf_lock);
29590Sstevel@tonic-gate 
29603448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
29615240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
29620Sstevel@tonic-gate 		    "tbf_reprcess_q: vif %ld timeout id = %p",
29633448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id);
29640Sstevel@tonic-gate 	}
29650Sstevel@tonic-gate }
29660Sstevel@tonic-gate 
29670Sstevel@tonic-gate /*
29680Sstevel@tonic-gate  * Function that will selectively discard a member of the tbf queue,
29690Sstevel@tonic-gate  * based on the precedence value and the priority.
29700Sstevel@tonic-gate  *
29710Sstevel@tonic-gate  * NOTE : The caller should quntimeout if the queue length is 0.
29720Sstevel@tonic-gate  */
29730Sstevel@tonic-gate static int
29740Sstevel@tonic-gate tbf_dq_sel(struct vif *vifp, ipha_t *ipha)
29750Sstevel@tonic-gate {
29760Sstevel@tonic-gate 	uint_t		p;
29770Sstevel@tonic-gate 	struct tbf		*t = vifp->v_tbf;
29780Sstevel@tonic-gate 	mblk_t		**np;
29790Sstevel@tonic-gate 	mblk_t		*last, *mp;
29803448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
29815240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
29823448Sdh155122 
29833448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
29845240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
29850Sstevel@tonic-gate 		    "dq_sel: vif %ld dst 0x%x",
29863448Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst));
29870Sstevel@tonic-gate 	}
29880Sstevel@tonic-gate 
29890Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
29900Sstevel@tonic-gate 	p = priority(vifp, ipha);
29910Sstevel@tonic-gate 
29920Sstevel@tonic-gate 	np = &t->tbf_q;
29930Sstevel@tonic-gate 	last = NULL;
29940Sstevel@tonic-gate 	while ((mp = *np) != NULL) {
29950Sstevel@tonic-gate 		if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) {
29960Sstevel@tonic-gate 			*np = mp->b_next;
29970Sstevel@tonic-gate 			/* If removing the last packet, fix the tail pointer */
29980Sstevel@tonic-gate 			if (mp == t->tbf_t)
29990Sstevel@tonic-gate 				t->tbf_t = last;
30000Sstevel@tonic-gate 			mp->b_prev = mp->b_next = NULL;
30010Sstevel@tonic-gate 			freemsg(mp);
30020Sstevel@tonic-gate 			/*
30030Sstevel@tonic-gate 			 * It's impossible for the queue to be empty, but
30040Sstevel@tonic-gate 			 * we check anyway.
30050Sstevel@tonic-gate 			 */
30060Sstevel@tonic-gate 			if (--t->tbf_q_len == 0) {
30070Sstevel@tonic-gate 				t->tbf_t = NULL;
30080Sstevel@tonic-gate 			}
30093448Sdh155122 			ipst->ips_mrtstat->mrts_drop_sel++;
30100Sstevel@tonic-gate 			return (1);
30110Sstevel@tonic-gate 		}
30120Sstevel@tonic-gate 		np = &mp->b_next;
30130Sstevel@tonic-gate 		last = mp;
30140Sstevel@tonic-gate 	}
30150Sstevel@tonic-gate 	return (0);
30160Sstevel@tonic-gate }
30170Sstevel@tonic-gate 
30180Sstevel@tonic-gate /* Sends packet, 2 cases - encap tunnel, phyint.  */
30190Sstevel@tonic-gate static void
30200Sstevel@tonic-gate tbf_send_packet(struct vif *vifp, mblk_t *mp)
30210Sstevel@tonic-gate {
30220Sstevel@tonic-gate 	ipif_t  *ipif;
30233448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
30245240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
30250Sstevel@tonic-gate 
30260Sstevel@tonic-gate 	/* If encap tunnel options */
30270Sstevel@tonic-gate 	if (vifp->v_flags & VIFF_TUNNEL)  {
30283448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
30295240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
30300Sstevel@tonic-gate 			    "tbf_send_pkt: ENCAP tunnel vif %ld",
30313448Sdh155122 			    (ptrdiff_t)(vifp - ipst->ips_vifs));
30320Sstevel@tonic-gate 		}
30330Sstevel@tonic-gate 
30340Sstevel@tonic-gate 		/*
30350Sstevel@tonic-gate 		 * Feed into ip_wput which will set the ident field and
30360Sstevel@tonic-gate 		 * checksum the encapsulating header.
30370Sstevel@tonic-gate 		 * BSD gets the cached route vifp->v_route from ip_output()
30380Sstevel@tonic-gate 		 * to speed up route table lookups. Not necessary in SunOS 5.x.
30390Sstevel@tonic-gate 		 */
30400Sstevel@tonic-gate 		put(vifp->v_ipif->ipif_wq, mp);
30410Sstevel@tonic-gate 		return;
30420Sstevel@tonic-gate 
30430Sstevel@tonic-gate 		/* phyint */
30440Sstevel@tonic-gate 	} else {
30450Sstevel@tonic-gate 		/* Need to loop back to members on the outgoing interface. */
30460Sstevel@tonic-gate 		ipha_t  *ipha;
30470Sstevel@tonic-gate 		ipaddr_t    dst;
30480Sstevel@tonic-gate 		ipha  = (ipha_t *)mp->b_rptr;
30490Sstevel@tonic-gate 		dst  = ipha->ipha_dst;
30500Sstevel@tonic-gate 		ipif = vifp->v_ipif;
30510Sstevel@tonic-gate 
30520Sstevel@tonic-gate 		mutex_enter(&ipif->ipif_ill->ill_lock);
30530Sstevel@tonic-gate 		if (ilm_lookup_ipif(ipif, dst) != NULL) {
30540Sstevel@tonic-gate 			/*
30550Sstevel@tonic-gate 			 * The packet is not yet reassembled, thus we need to
30560Sstevel@tonic-gate 			 * pass it to ip_rput_local for checksum verification
30570Sstevel@tonic-gate 			 * and reassembly (and fanout the user stream).
30580Sstevel@tonic-gate 			 */
30590Sstevel@tonic-gate 			mblk_t 	*mp_loop;
30600Sstevel@tonic-gate 			ire_t	*ire;
30610Sstevel@tonic-gate 
30620Sstevel@tonic-gate 			mutex_exit(&ipif->ipif_ill->ill_lock);
30633448Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
30645240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
30653448Sdh155122 				    SL_TRACE,
30660Sstevel@tonic-gate 				    "tbf_send_pkt: loopback vif %ld",
30673448Sdh155122 				    (ptrdiff_t)(vifp - ipst->ips_vifs));
30680Sstevel@tonic-gate 			}
30690Sstevel@tonic-gate 			mp_loop = copymsg(mp);
30700Sstevel@tonic-gate 			ire = ire_ctable_lookup(~0, 0, IRE_BROADCAST, NULL,
30713448Sdh155122 			    ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst);
30720Sstevel@tonic-gate 
30730Sstevel@tonic-gate 			if (mp_loop != NULL && ire != NULL) {
30740Sstevel@tonic-gate 				IP_RPUT_LOCAL(ipif->ipif_rq, mp_loop,
30750Sstevel@tonic-gate 				    ((ipha_t *)mp_loop->b_rptr),
30760Sstevel@tonic-gate 				    ire, (ill_t *)ipif->ipif_rq->q_ptr);
30770Sstevel@tonic-gate 			} else {
30780Sstevel@tonic-gate 				/* Either copymsg failed or no ire */
30795240Snordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
30803448Sdh155122 				    SL_TRACE,
30810Sstevel@tonic-gate 				    "tbf_send_pkt: mp_loop 0x%p, ire 0x%p "
3082*7240Srh87107 				    "vif %ld\n", (void *)mp_loop, (void *)ire,
30833448Sdh155122 				    (ptrdiff_t)(vifp - ipst->ips_vifs));
30840Sstevel@tonic-gate 			}
30850Sstevel@tonic-gate 			if (ire != NULL)
30860Sstevel@tonic-gate 				ire_refrele(ire);
30870Sstevel@tonic-gate 		} else {
30880Sstevel@tonic-gate 			mutex_exit(&ipif->ipif_ill->ill_lock);
30890Sstevel@tonic-gate 		}
30903448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
30915240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
30920Sstevel@tonic-gate 			    "tbf_send_pkt: phyint forward  vif %ld dst = 0x%x",
30933448Sdh155122 			    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst));
30940Sstevel@tonic-gate 		}
30950Sstevel@tonic-gate 		ip_rput_forward_multicast(dst, mp, ipif);
30960Sstevel@tonic-gate 	}
30970Sstevel@tonic-gate }
30980Sstevel@tonic-gate 
30990Sstevel@tonic-gate /*
31000Sstevel@tonic-gate  * Determine the current time and then the elapsed time (between the last time
31010Sstevel@tonic-gate  * and time now).  Update the no. of tokens in the bucket.
31020Sstevel@tonic-gate  */
31030Sstevel@tonic-gate static void
31040Sstevel@tonic-gate tbf_update_tokens(struct vif *vifp)
31050Sstevel@tonic-gate {
31060Sstevel@tonic-gate 	timespec_t	tp;
31070Sstevel@tonic-gate 	hrtime_t	tm;
31080Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
31093448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
31105240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
31110Sstevel@tonic-gate 
31120Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
31130Sstevel@tonic-gate 
31140Sstevel@tonic-gate 	/* Time in secs and nsecs, rate limit in kbits/sec */
31150Sstevel@tonic-gate 	gethrestime(&tp);
31160Sstevel@tonic-gate 
31170Sstevel@tonic-gate 	/*LINTED*/
31180Sstevel@tonic-gate 	TV_DELTA(tp, t->tbf_last_pkt_t, tm);
31190Sstevel@tonic-gate 
31200Sstevel@tonic-gate 	/*
31210Sstevel@tonic-gate 	 * This formula is actually
31220Sstevel@tonic-gate 	 * "time in seconds" * "bytes/second".  Scaled for nsec.
31230Sstevel@tonic-gate 	 * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8)
31240Sstevel@tonic-gate 	 *
31250Sstevel@tonic-gate 	 * The (1000/1024) was introduced in add_vif to optimize
31260Sstevel@tonic-gate 	 * this divide into a shift.
31270Sstevel@tonic-gate 	 */
31280Sstevel@tonic-gate 	t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8;
31290Sstevel@tonic-gate 	t->tbf_last_pkt_t = tp;
31300Sstevel@tonic-gate 
31310Sstevel@tonic-gate 	if (t->tbf_n_tok > MAX_BKT_SIZE)
31320Sstevel@tonic-gate 		t->tbf_n_tok = MAX_BKT_SIZE;
31333448Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
31345240Snordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
31350Sstevel@tonic-gate 		    "tbf_update_tok: tm %lld tok %d vif %ld",
31363448Sdh155122 		    tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs));
31370Sstevel@tonic-gate 	}
31380Sstevel@tonic-gate }
31390Sstevel@tonic-gate 
31400Sstevel@tonic-gate /*
31410Sstevel@tonic-gate  * Priority currently is based on port nos.
31420Sstevel@tonic-gate  * Different forwarding mechanisms have different ways
31430Sstevel@tonic-gate  * of obtaining the port no. Hence, the vif must be
31440Sstevel@tonic-gate  * given along with the packet itself.
31450Sstevel@tonic-gate  *
31460Sstevel@tonic-gate  */
31470Sstevel@tonic-gate static int
31480Sstevel@tonic-gate priority(struct vif *vifp, ipha_t *ipha)
31490Sstevel@tonic-gate {
31500Sstevel@tonic-gate 	int prio;
31513448Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
31525240Snordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
31530Sstevel@tonic-gate 
31540Sstevel@tonic-gate 	/* Temporary hack; may add general packet classifier some day */
31550Sstevel@tonic-gate 
31560Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock));
31570Sstevel@tonic-gate 
31580Sstevel@tonic-gate 	/*
31590Sstevel@tonic-gate 	 * The UDP port space is divided up into four priority ranges:
31600Sstevel@tonic-gate 	 * [0, 16384)	: unclassified - lowest priority
31610Sstevel@tonic-gate 	 * [16384, 32768)	: audio - highest priority
31620Sstevel@tonic-gate 	 * [32768, 49152)	: whiteboard - medium priority
31630Sstevel@tonic-gate 	 * [49152, 65536)	: video - low priority
31640Sstevel@tonic-gate 	 */
31650Sstevel@tonic-gate 
31660Sstevel@tonic-gate 	if (ipha->ipha_protocol == IPPROTO_UDP) {
31670Sstevel@tonic-gate 		struct udphdr *udp =
31680Sstevel@tonic-gate 		    (struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha));
31690Sstevel@tonic-gate 		switch (ntohs(udp->uh_dport) & 0xc000) {
31700Sstevel@tonic-gate 		case 0x4000:
31710Sstevel@tonic-gate 			prio = 70;
31720Sstevel@tonic-gate 			break;
31730Sstevel@tonic-gate 		case 0x8000:
31740Sstevel@tonic-gate 			prio = 60;
31750Sstevel@tonic-gate 			break;
31760Sstevel@tonic-gate 		case 0xc000:
31770Sstevel@tonic-gate 			prio = 55;
31780Sstevel@tonic-gate 			break;
31790Sstevel@tonic-gate 		default:
31800Sstevel@tonic-gate 			prio = 50;
31810Sstevel@tonic-gate 			break;
31820Sstevel@tonic-gate 		}
31833448Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
31845240Snordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
31850Sstevel@tonic-gate 			    "priority: port %x prio %d\n",
31860Sstevel@tonic-gate 			    ntohs(udp->uh_dport), prio);
31870Sstevel@tonic-gate 		}
31880Sstevel@tonic-gate 	} else
31890Sstevel@tonic-gate 		prio = 50;  /* default priority */
31900Sstevel@tonic-gate 	return (prio);
31910Sstevel@tonic-gate }
31920Sstevel@tonic-gate 
31930Sstevel@tonic-gate /*
31940Sstevel@tonic-gate  * End of token bucket filter modifications
31950Sstevel@tonic-gate  */
31960Sstevel@tonic-gate 
31970Sstevel@tonic-gate 
31980Sstevel@tonic-gate 
31990Sstevel@tonic-gate /*
32000Sstevel@tonic-gate  * Produces data for netstat -M.
32010Sstevel@tonic-gate  */
32020Sstevel@tonic-gate int
32033448Sdh155122 ip_mroute_stats(mblk_t *mp, ip_stack_t *ipst)
32040Sstevel@tonic-gate {
32053448Sdh155122 	ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
32063448Sdh155122 	ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
32073448Sdh155122 	if (!snmp_append_data(mp, (char *)ipst->ips_mrtstat,
32083448Sdh155122 		sizeof (struct mrtstat))) {
32090Sstevel@tonic-gate 		ip0dbg(("ip_mroute_stats: failed %ld bytes\n",
32103448Sdh155122 		    (size_t)sizeof (struct mrtstat)));
32110Sstevel@tonic-gate 		return (0);
32120Sstevel@tonic-gate 	}
32130Sstevel@tonic-gate 	return (1);
32140Sstevel@tonic-gate }
32150Sstevel@tonic-gate 
32160Sstevel@tonic-gate /*
32170Sstevel@tonic-gate  * Sends info for SNMP's MIB.
32180Sstevel@tonic-gate  */
32190Sstevel@tonic-gate int
32203448Sdh155122 ip_mroute_vif(mblk_t *mp, ip_stack_t *ipst)
32210Sstevel@tonic-gate {
32220Sstevel@tonic-gate 	struct vifctl 	vi;
32230Sstevel@tonic-gate 	vifi_t		vifi;
32240Sstevel@tonic-gate 
32253448Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
32263448Sdh155122 	for (vifi = 0; vifi < ipst->ips_numvifs; vifi++) {
32273448Sdh155122 		if (ipst->ips_vifs[vifi].v_lcl_addr.s_addr == 0)
32280Sstevel@tonic-gate 			continue;
32290Sstevel@tonic-gate 		/*
32300Sstevel@tonic-gate 		 * No locks here, an approximation is fine.
32310Sstevel@tonic-gate 		 */
32320Sstevel@tonic-gate 		vi.vifc_vifi = vifi;
32333448Sdh155122 		vi.vifc_flags = ipst->ips_vifs[vifi].v_flags;
32343448Sdh155122 		vi.vifc_threshold = ipst->ips_vifs[vifi].v_threshold;
32353448Sdh155122 		vi.vifc_rate_limit	= ipst->ips_vifs[vifi].v_rate_limit;
32363448Sdh155122 		vi.vifc_lcl_addr	= ipst->ips_vifs[vifi].v_lcl_addr;
32373448Sdh155122 		vi.vifc_rmt_addr	= ipst->ips_vifs[vifi].v_rmt_addr;
32383448Sdh155122 		vi.vifc_pkt_in		= ipst->ips_vifs[vifi].v_pkt_in;
32393448Sdh155122 		vi.vifc_pkt_out		= ipst->ips_vifs[vifi].v_pkt_out;
32400Sstevel@tonic-gate 
32410Sstevel@tonic-gate 		if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) {
32420Sstevel@tonic-gate 			ip0dbg(("ip_mroute_vif: failed %ld bytes\n",
32430Sstevel@tonic-gate 			    (size_t)sizeof (vi)));
32440Sstevel@tonic-gate 			return (0);
32450Sstevel@tonic-gate 		}
32460Sstevel@tonic-gate 	}
32473448Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
32480Sstevel@tonic-gate 	return (1);
32490Sstevel@tonic-gate }
32500Sstevel@tonic-gate 
32510Sstevel@tonic-gate /*
32520Sstevel@tonic-gate  * Called by ip_snmp_get to send up multicast routing table.
32530Sstevel@tonic-gate  */
32540Sstevel@tonic-gate int
32553448Sdh155122 ip_mroute_mrt(mblk_t *mp, ip_stack_t *ipst)
32560Sstevel@tonic-gate {
32570Sstevel@tonic-gate 	int			i, j;
32580Sstevel@tonic-gate 	struct mfc		*rt;
32590Sstevel@tonic-gate 	struct mfcctl	mfcc;
32600Sstevel@tonic-gate 
32610Sstevel@tonic-gate 	/*
32620Sstevel@tonic-gate 	 * Make sure multicast has not been turned off.
32630Sstevel@tonic-gate 	 */
32643448Sdh155122 	if (is_mrouter_off(ipst))
32650Sstevel@tonic-gate 		return (1);
32660Sstevel@tonic-gate 
32670Sstevel@tonic-gate 	/* Loop over all hash buckets and their chains */
32680Sstevel@tonic-gate 	for (i = 0; i < MFCTBLSIZ; i++) {
32693448Sdh155122 		MFCB_REFHOLD(&ipst->ips_mfcs[i]);
32703448Sdh155122 		for (rt = ipst->ips_mfcs[i].mfcb_mfc; rt; rt = rt->mfc_next) {
32710Sstevel@tonic-gate 			mutex_enter(&rt->mfc_mutex);
32720Sstevel@tonic-gate 			if (rt->mfc_rte != NULL ||
32730Sstevel@tonic-gate 			    (rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
32740Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
32750Sstevel@tonic-gate 				continue;
32760Sstevel@tonic-gate 			}
32770Sstevel@tonic-gate 			mfcc.mfcc_origin = rt->mfc_origin;
32780Sstevel@tonic-gate 			mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp;
32790Sstevel@tonic-gate 			mfcc.mfcc_parent = rt->mfc_parent;
32800Sstevel@tonic-gate 			mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt;
32813448Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
32823448Sdh155122 			for (j = 0; j < (int)ipst->ips_numvifs; j++)
32830Sstevel@tonic-gate 				mfcc.mfcc_ttls[j] = rt->mfc_ttls[j];
32843448Sdh155122 			for (j = (int)ipst->ips_numvifs; j < MAXVIFS; j++)
32850Sstevel@tonic-gate 				mfcc.mfcc_ttls[j] = 0;
32863448Sdh155122 			mutex_exit(&ipst->ips_numvifs_mutex);
32870Sstevel@tonic-gate 
32880Sstevel@tonic-gate 			mutex_exit(&rt->mfc_mutex);
32890Sstevel@tonic-gate 			if (!snmp_append_data(mp, (char *)&mfcc,
32900Sstevel@tonic-gate 			    sizeof (mfcc))) {
32913448Sdh155122 				MFCB_REFRELE(&ipst->ips_mfcs[i]);
32920Sstevel@tonic-gate 				ip0dbg(("ip_mroute_mrt: failed %ld bytes\n",
32930Sstevel@tonic-gate 				    (size_t)sizeof (mfcc)));
32940Sstevel@tonic-gate 				return (0);
32950Sstevel@tonic-gate 			}
32960Sstevel@tonic-gate 		}
32973448Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
32980Sstevel@tonic-gate 	}
32990Sstevel@tonic-gate 	return (1);
33000Sstevel@tonic-gate }
3301