xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_mroute.c (revision e82490700e19f1b8a2cef6102f4726144d281988)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
221eee170aSErik Nordmark  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate  */
247c478bd9Sstevel@tonic-gate /* Copyright (c) 1990 Mentat Inc. */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
2715c07adcSJohn Levon  * Copyright (c) 2018, Joyent, Inc.
28*e8249070SRobert Mustacchi  * Copyright 2024 Oxide Computer Company
2915c07adcSJohn Levon  */
3015c07adcSJohn Levon 
3115c07adcSJohn Levon /*
327c478bd9Sstevel@tonic-gate  * Procedures for the kernel part of DVMRP,
337c478bd9Sstevel@tonic-gate  * a Distance-Vector Multicast Routing Protocol.
347c478bd9Sstevel@tonic-gate  * (See RFC-1075)
357c478bd9Sstevel@tonic-gate  * Written by David Waitzman, BBN Labs, August 1988.
367c478bd9Sstevel@tonic-gate  * Modified by Steve Deering, Stanford, February 1989.
377c478bd9Sstevel@tonic-gate  * Modified by Mark J. Steiglitz, Stanford, May, 1991
387c478bd9Sstevel@tonic-gate  * Modified by Van Jacobson, LBL, January 1993
397c478bd9Sstevel@tonic-gate  * Modified by Ajit Thyagarajan, PARC, August 1993
407c478bd9Sstevel@tonic-gate  * Modified by Bill Fenner, PARC, April 1995
417c478bd9Sstevel@tonic-gate  *
427c478bd9Sstevel@tonic-gate  * MROUTING 3.5
437c478bd9Sstevel@tonic-gate  */
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate /*
467c478bd9Sstevel@tonic-gate  * TODO
477c478bd9Sstevel@tonic-gate  * - function pointer field in vif, void *vif_sendit()
487c478bd9Sstevel@tonic-gate  */
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate #include <sys/types.h>
517c478bd9Sstevel@tonic-gate #include <sys/stream.h>
527c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
537c478bd9Sstevel@tonic-gate #include <sys/strlog.h>
547c478bd9Sstevel@tonic-gate #include <sys/systm.h>
557c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
567c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
577c478bd9Sstevel@tonic-gate #include <sys/zone.h>
587c478bd9Sstevel@tonic-gate 
597c478bd9Sstevel@tonic-gate #include <sys/param.h>
607c478bd9Sstevel@tonic-gate #include <sys/socket.h>
617c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
627c478bd9Sstevel@tonic-gate #include <sys/debug.h>
637c478bd9Sstevel@tonic-gate #include <net/if.h>
647c478bd9Sstevel@tonic-gate #include <sys/sockio.h>
657c478bd9Sstevel@tonic-gate #include <netinet/in.h>
667c478bd9Sstevel@tonic-gate #include <net/if_dl.h>
677c478bd9Sstevel@tonic-gate 
68bd670b35SErik Nordmark #include <inet/ipsec_impl.h>
697c478bd9Sstevel@tonic-gate #include <inet/common.h>
707c478bd9Sstevel@tonic-gate #include <inet/mi.h>
717c478bd9Sstevel@tonic-gate #include <inet/nd.h>
726e91bba0SGirish Moodalbail #include <inet/tunables.h>
737c478bd9Sstevel@tonic-gate #include <inet/mib2.h>
747c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
757c478bd9Sstevel@tonic-gate #include <inet/ip.h>
767c478bd9Sstevel@tonic-gate #include <inet/snmpcom.h>
777c478bd9Sstevel@tonic-gate 
787c478bd9Sstevel@tonic-gate #include <netinet/igmp.h>
797c478bd9Sstevel@tonic-gate #include <netinet/igmp_var.h>
807c478bd9Sstevel@tonic-gate #include <netinet/udp.h>
817c478bd9Sstevel@tonic-gate #include <netinet/ip_mroute.h>
827c478bd9Sstevel@tonic-gate #include <inet/ip_multi.h>
837c478bd9Sstevel@tonic-gate #include <inet/ip_ire.h>
84bd670b35SErik Nordmark #include <inet/ip_ndp.h>
857c478bd9Sstevel@tonic-gate #include <inet/ip_if.h>
867c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h>
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate #include <netinet/pim.h>
897c478bd9Sstevel@tonic-gate 
907c478bd9Sstevel@tonic-gate 
917c478bd9Sstevel@tonic-gate /*
927c478bd9Sstevel@tonic-gate  * MT Design:
937c478bd9Sstevel@tonic-gate  *
947c478bd9Sstevel@tonic-gate  * There are three main data structures viftable, mfctable and tbftable that
957c478bd9Sstevel@tonic-gate  * need to be protected against MT races.
967c478bd9Sstevel@tonic-gate  *
977c478bd9Sstevel@tonic-gate  * vitable is a fixed length array of vif structs. There is no lock to protect
987c478bd9Sstevel@tonic-gate  * the whole array, instead each struct is protected by its own indiviual lock.
997c478bd9Sstevel@tonic-gate  * The value of v_marks in conjuction with the value of v_refcnt determines the
1007c478bd9Sstevel@tonic-gate  * current state of a vif structure. One special state that needs mention
1017c478bd9Sstevel@tonic-gate  * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates
1027c478bd9Sstevel@tonic-gate  * that vif is being initalized.
1037c478bd9Sstevel@tonic-gate  * Each structure is freed when the refcnt goes down to zero. If a delete comes
104bd670b35SErik Nordmark  * in when the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED
1057c478bd9Sstevel@tonic-gate  * which prevents the struct from further use.  When the refcnt goes to zero
1067c478bd9Sstevel@tonic-gate  * the struct is freed and is marked VIF_MARK_NOTINUSE.
1077c478bd9Sstevel@tonic-gate  * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill
1087c478bd9Sstevel@tonic-gate  * from  going away a refhold is put on the ipif before using it. see
1097c478bd9Sstevel@tonic-gate  * lock_good_vif() and unlock_good_vif().
1107c478bd9Sstevel@tonic-gate  *
1117c478bd9Sstevel@tonic-gate  * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts
1127c478bd9Sstevel@tonic-gate  * of the vif struct.
1137c478bd9Sstevel@tonic-gate  *
1147c478bd9Sstevel@tonic-gate  * tbftable is also a fixed length array of tbf structs and is only accessed
1157c478bd9Sstevel@tonic-gate  * via v_tbf.  It is protected by its own lock tbf_lock.
1167c478bd9Sstevel@tonic-gate  *
1177c478bd9Sstevel@tonic-gate  * Lock Ordering is
1187c478bd9Sstevel@tonic-gate  * v_lock --> tbf_lock
1197c478bd9Sstevel@tonic-gate  * v_lock --> ill_locK
1207c478bd9Sstevel@tonic-gate  *
1217c478bd9Sstevel@tonic-gate  * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb).
1227c478bd9Sstevel@tonic-gate  * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker,
1237c478bd9Sstevel@tonic-gate  * it also maintains a state. These fields are protected by a lock (mfcb_lock).
1247c478bd9Sstevel@tonic-gate  * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to
1257c478bd9Sstevel@tonic-gate  * protect the struct elements.
1267c478bd9Sstevel@tonic-gate  *
1277c478bd9Sstevel@tonic-gate  * mfc structs are dynamically allocated and are singly linked
1287c478bd9Sstevel@tonic-gate  * at the head of the chain. When an mfc structure is to be deleted
1297c478bd9Sstevel@tonic-gate  * it is marked condemned and so is the state in the bucket struct.
1307c478bd9Sstevel@tonic-gate  * When the last walker of the hash bucket exits all the mfc structs
1317c478bd9Sstevel@tonic-gate  * marked condemed are freed.
1327c478bd9Sstevel@tonic-gate  *
1337c478bd9Sstevel@tonic-gate  * Locking Hierarchy:
1347c478bd9Sstevel@tonic-gate  * The bucket lock should be acquired before the mfc struct lock.
1357c478bd9Sstevel@tonic-gate  * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking
1367c478bd9Sstevel@tonic-gate  * operations on the bucket struct.
1377c478bd9Sstevel@tonic-gate  *
1387c478bd9Sstevel@tonic-gate  * last_encap_lock and numvifs_mutex should be acquired after
1397c478bd9Sstevel@tonic-gate  * acquring vif or mfc locks. These locks protect some global variables.
1407c478bd9Sstevel@tonic-gate  *
1417c478bd9Sstevel@tonic-gate  * The statistics are not currently protected by a lock
1427c478bd9Sstevel@tonic-gate  * causing the stats be be approximate, not exact.
1437c478bd9Sstevel@tonic-gate  */
1447c478bd9Sstevel@tonic-gate 
1457c478bd9Sstevel@tonic-gate #define	NO_VIF	MAXVIFS		/* from mrouted, no route for src */
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate /*
1487c478bd9Sstevel@tonic-gate  * Timeouts:
1497c478bd9Sstevel@tonic-gate  *	Upcall timeouts - BSD uses boolean_t mfc->expire and
1507c478bd9Sstevel@tonic-gate  *	nexpire[MFCTBLSIZE], the number of times expire has been called.
1517c478bd9Sstevel@tonic-gate  *	SunOS 5.x uses mfc->timeout for each mfc.
1527c478bd9Sstevel@tonic-gate  *	Some Unixes are limited in the number of simultaneous timeouts
1537c478bd9Sstevel@tonic-gate  *	that can be run, SunOS 5.x does not have this restriction.
1547c478bd9Sstevel@tonic-gate  */
1557c478bd9Sstevel@tonic-gate 
1567c478bd9Sstevel@tonic-gate /*
1577c478bd9Sstevel@tonic-gate  * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and
1587c478bd9Sstevel@tonic-gate  * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall
1597c478bd9Sstevel@tonic-gate  * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE
1607c478bd9Sstevel@tonic-gate  */
1617c478bd9Sstevel@tonic-gate #define		EXPIRE_TIMEOUT	(hz/4)	/* 4x / second	*/
1627c478bd9Sstevel@tonic-gate #define		UPCALL_EXPIRE	6	/* number of timeouts	*/
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate /*
1657c478bd9Sstevel@tonic-gate  * Hash function for a source, group entry
1667c478bd9Sstevel@tonic-gate  */
1677c478bd9Sstevel@tonic-gate #define	MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
1687c478bd9Sstevel@tonic-gate 	((g) >> 20) ^ ((g) >> 10) ^ (g))
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate #define			TBF_REPROCESS	(hz / 100)	/* 100x /second	*/
1717c478bd9Sstevel@tonic-gate 
1727c478bd9Sstevel@tonic-gate /* Identify PIM packet that came on a Register interface */
1737c478bd9Sstevel@tonic-gate #define	PIM_REGISTER_MARKER	0xffffffff
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate /* Function declarations */
176f4b3ec61Sdh155122 static int	add_mfc(struct mfcctl *, ip_stack_t *);
177bd670b35SErik Nordmark static int	add_vif(struct vifctl *, conn_t *, ip_stack_t *);
178f4b3ec61Sdh155122 static int	del_mfc(struct mfcctl *, ip_stack_t *);
179bd670b35SErik Nordmark static int	del_vif(vifi_t *, ip_stack_t *);
1807c478bd9Sstevel@tonic-gate static void	del_vifp(struct vif *);
1817c478bd9Sstevel@tonic-gate static void	encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
1827c478bd9Sstevel@tonic-gate static void	expire_upcalls(void *);
183f4b3ec61Sdh155122 static void	fill_route(struct mfc *, struct mfcctl *, ip_stack_t *);
184f4b3ec61Sdh155122 static void	free_queue(struct mfc *);
185f4b3ec61Sdh155122 static int	get_assert(uchar_t *, ip_stack_t *);
186f4b3ec61Sdh155122 static int	get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *);
187f4b3ec61Sdh155122 static int	get_sg_cnt(struct sioc_sg_req *, ip_stack_t *);
1887c478bd9Sstevel@tonic-gate static int	get_version(uchar_t *);
189f4b3ec61Sdh155122 static int	get_vif_cnt(struct sioc_vif_req *, ip_stack_t *);
1907c478bd9Sstevel@tonic-gate static int	ip_mdq(mblk_t *, ipha_t *, ill_t *,
1917c478bd9Sstevel@tonic-gate 		    ipaddr_t, struct mfc *);
192fc80c0dfSnordmark static int	ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *);
1937c478bd9Sstevel@tonic-gate static void	phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
194bd670b35SErik Nordmark static int	register_mforward(mblk_t *, ip_recv_attr_t *);
1957c478bd9Sstevel@tonic-gate static void	register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
196f4b3ec61Sdh155122 static int	set_assert(int *, ip_stack_t *);
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate /*
1997c478bd9Sstevel@tonic-gate  * Token Bucket Filter functions
2007c478bd9Sstevel@tonic-gate  */
2017c478bd9Sstevel@tonic-gate static int  priority(struct vif *, ipha_t *);
2027c478bd9Sstevel@tonic-gate static void tbf_control(struct vif *, mblk_t *, ipha_t *);
2037c478bd9Sstevel@tonic-gate static int  tbf_dq_sel(struct vif *, ipha_t *);
2047c478bd9Sstevel@tonic-gate static void tbf_process_q(struct vif *);
2057c478bd9Sstevel@tonic-gate static void tbf_queue(struct vif *, mblk_t *);
2067c478bd9Sstevel@tonic-gate static void tbf_reprocess_q(void *);
2077c478bd9Sstevel@tonic-gate static void tbf_send_packet(struct vif *, mblk_t *);
2087c478bd9Sstevel@tonic-gate static void tbf_update_tokens(struct vif *);
2097c478bd9Sstevel@tonic-gate static void release_mfc(struct mfcb *);
2107c478bd9Sstevel@tonic-gate 
211f4b3ec61Sdh155122 static boolean_t is_mrouter_off(ip_stack_t *);
2127c478bd9Sstevel@tonic-gate /*
2137c478bd9Sstevel@tonic-gate  * Encapsulation packets
2147c478bd9Sstevel@tonic-gate  */
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate #define	ENCAP_TTL	64
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate /* prototype IP hdr for encapsulated packets */
2197c478bd9Sstevel@tonic-gate static ipha_t multicast_encap_iphdr = {
2207c478bd9Sstevel@tonic-gate 	IP_SIMPLE_HDR_VERSION,
2217c478bd9Sstevel@tonic-gate 	0,				/* tos */
2227c478bd9Sstevel@tonic-gate 	sizeof (ipha_t),		/* total length */
2237c478bd9Sstevel@tonic-gate 	0,				/* id */
2247c478bd9Sstevel@tonic-gate 	0,				/* frag offset */
2257c478bd9Sstevel@tonic-gate 	ENCAP_TTL, IPPROTO_ENCAP,
2267c478bd9Sstevel@tonic-gate 	0,				/* checksum */
2277c478bd9Sstevel@tonic-gate };
2287c478bd9Sstevel@tonic-gate 
2297c478bd9Sstevel@tonic-gate /*
2307c478bd9Sstevel@tonic-gate  * Rate limit for assert notification messages, in nsec.
2317c478bd9Sstevel@tonic-gate  */
2327c478bd9Sstevel@tonic-gate #define	ASSERT_MSG_TIME		3000000000
2337c478bd9Sstevel@tonic-gate 
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate #define	VIF_REFHOLD(vifp) {			\
2367c478bd9Sstevel@tonic-gate 	mutex_enter(&(vifp)->v_lock);		\
2377c478bd9Sstevel@tonic-gate 	(vifp)->v_refcnt++;			\
2387c478bd9Sstevel@tonic-gate 	mutex_exit(&(vifp)->v_lock);		\
2397c478bd9Sstevel@tonic-gate }
2407c478bd9Sstevel@tonic-gate 
2417c478bd9Sstevel@tonic-gate #define	VIF_REFRELE_LOCKED(vifp) {				\
2427c478bd9Sstevel@tonic-gate 	(vifp)->v_refcnt--;					\
2437c478bd9Sstevel@tonic-gate 	if ((vifp)->v_refcnt == 0 &&				\
2447c478bd9Sstevel@tonic-gate 		((vifp)->v_marks & VIF_MARK_CONDEMNED)) {	\
2457c478bd9Sstevel@tonic-gate 			del_vifp(vifp);				\
2467c478bd9Sstevel@tonic-gate 	} else {						\
2477c478bd9Sstevel@tonic-gate 		mutex_exit(&(vifp)->v_lock);			\
2487c478bd9Sstevel@tonic-gate 	}							\
2497c478bd9Sstevel@tonic-gate }
2507c478bd9Sstevel@tonic-gate 
2517c478bd9Sstevel@tonic-gate #define	VIF_REFRELE(vifp) {					\
2527c478bd9Sstevel@tonic-gate 	mutex_enter(&(vifp)->v_lock);				\
2537c478bd9Sstevel@tonic-gate 	(vifp)->v_refcnt--;					\
2547c478bd9Sstevel@tonic-gate 	if ((vifp)->v_refcnt == 0 &&				\
2557c478bd9Sstevel@tonic-gate 		((vifp)->v_marks & VIF_MARK_CONDEMNED)) {	\
2567c478bd9Sstevel@tonic-gate 			del_vifp(vifp);				\
2577c478bd9Sstevel@tonic-gate 	} else {						\
2587c478bd9Sstevel@tonic-gate 		mutex_exit(&(vifp)->v_lock);			\
2597c478bd9Sstevel@tonic-gate 	}							\
2607c478bd9Sstevel@tonic-gate }
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate #define	MFCB_REFHOLD(mfcb) {				\
2637c478bd9Sstevel@tonic-gate 	mutex_enter(&(mfcb)->mfcb_lock);		\
2647c478bd9Sstevel@tonic-gate 	(mfcb)->mfcb_refcnt++;				\
2657c478bd9Sstevel@tonic-gate 	ASSERT((mfcb)->mfcb_refcnt != 0);		\
2667c478bd9Sstevel@tonic-gate 	mutex_exit(&(mfcb)->mfcb_lock);			\
2677c478bd9Sstevel@tonic-gate }
2687c478bd9Sstevel@tonic-gate 
2697c478bd9Sstevel@tonic-gate #define	MFCB_REFRELE(mfcb) {					\
2707c478bd9Sstevel@tonic-gate 	mutex_enter(&(mfcb)->mfcb_lock);			\
2717c478bd9Sstevel@tonic-gate 	ASSERT((mfcb)->mfcb_refcnt != 0);			\
2727c478bd9Sstevel@tonic-gate 	if (--(mfcb)->mfcb_refcnt == 0 &&			\
2737c478bd9Sstevel@tonic-gate 		((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) {	\
2747c478bd9Sstevel@tonic-gate 			release_mfc(mfcb);			\
2757c478bd9Sstevel@tonic-gate 	}							\
2767c478bd9Sstevel@tonic-gate 	mutex_exit(&(mfcb)->mfcb_lock);				\
2777c478bd9Sstevel@tonic-gate }
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate /*
2807c478bd9Sstevel@tonic-gate  * MFCFIND:
2817c478bd9Sstevel@tonic-gate  * Find a route for a given origin IP address and multicast group address.
2827c478bd9Sstevel@tonic-gate  * Skip entries with pending upcalls.
2837c478bd9Sstevel@tonic-gate  * Type of service parameter to be added in the future!
2847c478bd9Sstevel@tonic-gate  */
2857c478bd9Sstevel@tonic-gate #define	MFCFIND(mfcbp, o, g, rt) { \
2867c478bd9Sstevel@tonic-gate 	struct mfc *_mb_rt = NULL; \
2877c478bd9Sstevel@tonic-gate 	rt = NULL; \
2887c478bd9Sstevel@tonic-gate 	_mb_rt = mfcbp->mfcb_mfc; \
2897c478bd9Sstevel@tonic-gate 	while (_mb_rt) { \
2907c478bd9Sstevel@tonic-gate 		if ((_mb_rt->mfc_origin.s_addr == o) && \
2917c478bd9Sstevel@tonic-gate 		    (_mb_rt->mfc_mcastgrp.s_addr == g) && \
2927c478bd9Sstevel@tonic-gate 		    (_mb_rt->mfc_rte == NULL) && \
2937c478bd9Sstevel@tonic-gate 		    (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) {        \
2947c478bd9Sstevel@tonic-gate 		    rt = _mb_rt; \
2957c478bd9Sstevel@tonic-gate 		    break; \
2967c478bd9Sstevel@tonic-gate 		} \
2977c478bd9Sstevel@tonic-gate 	_mb_rt = _mb_rt->mfc_next; \
2987c478bd9Sstevel@tonic-gate 	} \
2997c478bd9Sstevel@tonic-gate }
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate /*
3027c478bd9Sstevel@tonic-gate  * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime()
3037c478bd9Sstevel@tonic-gate  * are inefficient. We use gethrestime() which returns a timespec_t with
3047c478bd9Sstevel@tonic-gate  * sec and nsec, the resolution is machine dependent.
3057c478bd9Sstevel@tonic-gate  * The following 2 macros have been changed to use nsec instead of usec.
3067c478bd9Sstevel@tonic-gate  */
3077c478bd9Sstevel@tonic-gate /*
3087c478bd9Sstevel@tonic-gate  * Macros to compute elapsed time efficiently.
3097c478bd9Sstevel@tonic-gate  * Borrowed from Van Jacobson's scheduling code.
3107c478bd9Sstevel@tonic-gate  * Delta should be a hrtime_t.
3117c478bd9Sstevel@tonic-gate  */
3127c478bd9Sstevel@tonic-gate #define	TV_DELTA(a, b, delta) { \
3137c478bd9Sstevel@tonic-gate 	int xxs; \
3147c478bd9Sstevel@tonic-gate  \
3157c478bd9Sstevel@tonic-gate 	delta = (a).tv_nsec - (b).tv_nsec; \
3167c478bd9Sstevel@tonic-gate 	if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
3177c478bd9Sstevel@tonic-gate 		switch (xxs) { \
3187c478bd9Sstevel@tonic-gate 		case 2: \
3197c478bd9Sstevel@tonic-gate 		    delta += 1000000000; \
3207c478bd9Sstevel@tonic-gate 		    /*FALLTHROUGH*/ \
3217c478bd9Sstevel@tonic-gate 		case 1: \
3227c478bd9Sstevel@tonic-gate 		    delta += 1000000000; \
3237c478bd9Sstevel@tonic-gate 		    break; \
3247c478bd9Sstevel@tonic-gate 		default: \
3257c478bd9Sstevel@tonic-gate 		    delta += (1000000000 * xxs); \
3267c478bd9Sstevel@tonic-gate 		} \
3277c478bd9Sstevel@tonic-gate 	} \
3287c478bd9Sstevel@tonic-gate }
3297c478bd9Sstevel@tonic-gate 
3307c478bd9Sstevel@tonic-gate #define	TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \
3317c478bd9Sstevel@tonic-gate 	(a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
3327c478bd9Sstevel@tonic-gate 
3337c478bd9Sstevel@tonic-gate /*
3347c478bd9Sstevel@tonic-gate  * Handle MRT setsockopt commands to modify the multicast routing tables.
3357c478bd9Sstevel@tonic-gate  */
3367c478bd9Sstevel@tonic-gate int
ip_mrouter_set(int cmd,conn_t * connp,int checkonly,uchar_t * data,int datalen)337bd670b35SErik Nordmark ip_mrouter_set(int cmd, conn_t *connp, int checkonly, uchar_t *data,
338bd670b35SErik Nordmark     int datalen)
3397c478bd9Sstevel@tonic-gate {
340fc80c0dfSnordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
341f4b3ec61Sdh155122 
342f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
343fc80c0dfSnordmark 	if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) {
344f4b3ec61Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
3457c478bd9Sstevel@tonic-gate 		return (EACCES);
3467c478bd9Sstevel@tonic-gate 	}
347f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate 	if (checkonly) {
3507c478bd9Sstevel@tonic-gate 		/*
3517c478bd9Sstevel@tonic-gate 		 * do not do operation, just pretend to - new T_CHECK
3527c478bd9Sstevel@tonic-gate 		 * Note: Even routines further on can probably fail but
3537c478bd9Sstevel@tonic-gate 		 * this T_CHECK stuff is only to please XTI so it not
3547c478bd9Sstevel@tonic-gate 		 * necessary to be perfect.
3557c478bd9Sstevel@tonic-gate 		 */
3567c478bd9Sstevel@tonic-gate 		switch (cmd) {
3577c478bd9Sstevel@tonic-gate 		case MRT_INIT:
3587c478bd9Sstevel@tonic-gate 		case MRT_DONE:
3597c478bd9Sstevel@tonic-gate 		case MRT_ADD_VIF:
3607c478bd9Sstevel@tonic-gate 		case MRT_DEL_VIF:
3617c478bd9Sstevel@tonic-gate 		case MRT_ADD_MFC:
3627c478bd9Sstevel@tonic-gate 		case MRT_DEL_MFC:
3637c478bd9Sstevel@tonic-gate 		case MRT_ASSERT:
3647c478bd9Sstevel@tonic-gate 			return (0);
3657c478bd9Sstevel@tonic-gate 		default:
3667c478bd9Sstevel@tonic-gate 			return (EOPNOTSUPP);
3677c478bd9Sstevel@tonic-gate 		}
3687c478bd9Sstevel@tonic-gate 	}
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate 	/*
3717c478bd9Sstevel@tonic-gate 	 * make sure no command is issued after multicast routing has been
3727c478bd9Sstevel@tonic-gate 	 * turned off.
3737c478bd9Sstevel@tonic-gate 	 */
3747c478bd9Sstevel@tonic-gate 	if (cmd != MRT_INIT && cmd != MRT_DONE) {
375f4b3ec61Sdh155122 		if (is_mrouter_off(ipst))
3767c478bd9Sstevel@tonic-gate 			return (EINVAL);
3777c478bd9Sstevel@tonic-gate 	}
3787c478bd9Sstevel@tonic-gate 
3797c478bd9Sstevel@tonic-gate 	switch (cmd) {
380fc80c0dfSnordmark 	case MRT_INIT:	return (ip_mrouter_init(connp, data, datalen, ipst));
381bd670b35SErik Nordmark 	case MRT_DONE:	return (ip_mrouter_done(ipst));
382bd670b35SErik Nordmark 	case MRT_ADD_VIF:  return (add_vif((struct vifctl *)data, connp, ipst));
383bd670b35SErik Nordmark 	case MRT_DEL_VIF:  return (del_vif((vifi_t *)data, ipst));
384f4b3ec61Sdh155122 	case MRT_ADD_MFC:  return (add_mfc((struct mfcctl *)data, ipst));
385f4b3ec61Sdh155122 	case MRT_DEL_MFC:  return (del_mfc((struct mfcctl *)data, ipst));
386f4b3ec61Sdh155122 	case MRT_ASSERT:   return (set_assert((int *)data, ipst));
3877c478bd9Sstevel@tonic-gate 	default:	   return (EOPNOTSUPP);
3887c478bd9Sstevel@tonic-gate 	}
3897c478bd9Sstevel@tonic-gate }
3907c478bd9Sstevel@tonic-gate 
3917c478bd9Sstevel@tonic-gate /*
3927c478bd9Sstevel@tonic-gate  * Handle MRT getsockopt commands
3937c478bd9Sstevel@tonic-gate  */
3947c478bd9Sstevel@tonic-gate int
ip_mrouter_get(int cmd,conn_t * connp,uchar_t * data)395bd670b35SErik Nordmark ip_mrouter_get(int cmd, conn_t *connp, uchar_t *data)
3967c478bd9Sstevel@tonic-gate {
397fc80c0dfSnordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
398f4b3ec61Sdh155122 
399fc80c0dfSnordmark 	if (connp != ipst->ips_ip_g_mrouter)
4007c478bd9Sstevel@tonic-gate 		return (EACCES);
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate 	switch (cmd) {
4037c478bd9Sstevel@tonic-gate 	case MRT_VERSION:	return (get_version((uchar_t *)data));
404f4b3ec61Sdh155122 	case MRT_ASSERT:	return (get_assert((uchar_t *)data, ipst));
4057c478bd9Sstevel@tonic-gate 	default:		return (EOPNOTSUPP);
4067c478bd9Sstevel@tonic-gate 	}
4077c478bd9Sstevel@tonic-gate }
4087c478bd9Sstevel@tonic-gate 
4097c478bd9Sstevel@tonic-gate /*
4107c478bd9Sstevel@tonic-gate  * Handle ioctl commands to obtain information from the cache.
4117c478bd9Sstevel@tonic-gate  * Called with shared access to IP. These are read_only ioctls.
4127c478bd9Sstevel@tonic-gate  */
4137c478bd9Sstevel@tonic-gate /* ARGSUSED */
4147c478bd9Sstevel@tonic-gate int
mrt_ioctl(ipif_t * ipif,sin_t * sin,queue_t * q,mblk_t * mp,ip_ioctl_cmd_t * ipip,void * if_req)4157c478bd9Sstevel@tonic-gate mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
4167c478bd9Sstevel@tonic-gate     ip_ioctl_cmd_t *ipip, void *if_req)
4177c478bd9Sstevel@tonic-gate {
4187c478bd9Sstevel@tonic-gate 	mblk_t	*mp1;
4197c478bd9Sstevel@tonic-gate 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
420fc80c0dfSnordmark 	conn_t		*connp = Q_TO_CONN(q);
421fc80c0dfSnordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate 	/* Existence verified in ip_wput_nondata */
4247c478bd9Sstevel@tonic-gate 	mp1 = mp->b_cont->b_cont;
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate 	switch (iocp->ioc_cmd) {
4277c478bd9Sstevel@tonic-gate 	case (SIOCGETVIFCNT):
428f4b3ec61Sdh155122 		return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst));
4297c478bd9Sstevel@tonic-gate 	case (SIOCGETSGCNT):
430f4b3ec61Sdh155122 		return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst));
4317c478bd9Sstevel@tonic-gate 	case (SIOCGETLSGCNT):
432f4b3ec61Sdh155122 		return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst));
4337c478bd9Sstevel@tonic-gate 	default:
4347c478bd9Sstevel@tonic-gate 		return (EINVAL);
4357c478bd9Sstevel@tonic-gate 	}
4367c478bd9Sstevel@tonic-gate }
4377c478bd9Sstevel@tonic-gate 
4387c478bd9Sstevel@tonic-gate /*
4397c478bd9Sstevel@tonic-gate  * Returns the packet, byte, rpf-failure count for the source, group provided.
4407c478bd9Sstevel@tonic-gate  */
4417c478bd9Sstevel@tonic-gate static int
get_sg_cnt(struct sioc_sg_req * req,ip_stack_t * ipst)442f4b3ec61Sdh155122 get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst)
4437c478bd9Sstevel@tonic-gate {
4447c478bd9Sstevel@tonic-gate 	struct mfc *rt;
4457c478bd9Sstevel@tonic-gate 	struct mfcb *mfcbp;
4467c478bd9Sstevel@tonic-gate 
447f4b3ec61Sdh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)];
4487c478bd9Sstevel@tonic-gate 	MFCB_REFHOLD(mfcbp);
4497c478bd9Sstevel@tonic-gate 	MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt);
4507c478bd9Sstevel@tonic-gate 
4517c478bd9Sstevel@tonic-gate 	if (rt != NULL) {
4527c478bd9Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
4537c478bd9Sstevel@tonic-gate 		req->pktcnt   = rt->mfc_pkt_cnt;
4547c478bd9Sstevel@tonic-gate 		req->bytecnt  = rt->mfc_byte_cnt;
4557c478bd9Sstevel@tonic-gate 		req->wrong_if = rt->mfc_wrong_if;
4567c478bd9Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
4577c478bd9Sstevel@tonic-gate 	} else
4587c478bd9Sstevel@tonic-gate 		req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU;
4597c478bd9Sstevel@tonic-gate 
4607c478bd9Sstevel@tonic-gate 	MFCB_REFRELE(mfcbp);
4617c478bd9Sstevel@tonic-gate 	return (0);
4627c478bd9Sstevel@tonic-gate }
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate /*
4657c478bd9Sstevel@tonic-gate  * Returns the packet, byte, rpf-failure count for the source, group provided.
4667c478bd9Sstevel@tonic-gate  * Uses larger counters and IPv6 addresses.
4677c478bd9Sstevel@tonic-gate  */
4687c478bd9Sstevel@tonic-gate /* ARGSUSED XXX until implemented */
4697c478bd9Sstevel@tonic-gate static int
get_lsg_cnt(struct sioc_lsg_req * req,ip_stack_t * ipst)470f4b3ec61Sdh155122 get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst)
4717c478bd9Sstevel@tonic-gate {
4727c478bd9Sstevel@tonic-gate 	/* XXX TODO SIOCGETLSGCNT */
4737c478bd9Sstevel@tonic-gate 	return (ENXIO);
4747c478bd9Sstevel@tonic-gate }
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate /*
4777c478bd9Sstevel@tonic-gate  * Returns the input and output packet and byte counts on the vif provided.
4787c478bd9Sstevel@tonic-gate  */
4797c478bd9Sstevel@tonic-gate static int
get_vif_cnt(struct sioc_vif_req * req,ip_stack_t * ipst)480f4b3ec61Sdh155122 get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst)
4817c478bd9Sstevel@tonic-gate {
4827c478bd9Sstevel@tonic-gate 	vifi_t vifi = req->vifi;
4837c478bd9Sstevel@tonic-gate 
484f4b3ec61Sdh155122 	if (vifi >= ipst->ips_numvifs)
4857c478bd9Sstevel@tonic-gate 		return (EINVAL);
4867c478bd9Sstevel@tonic-gate 
4877c478bd9Sstevel@tonic-gate 	/*
4887c478bd9Sstevel@tonic-gate 	 * No locks here, an approximation is fine.
4897c478bd9Sstevel@tonic-gate 	 */
490f4b3ec61Sdh155122 	req->icount = ipst->ips_vifs[vifi].v_pkt_in;
491f4b3ec61Sdh155122 	req->ocount = ipst->ips_vifs[vifi].v_pkt_out;
492f4b3ec61Sdh155122 	req->ibytes = ipst->ips_vifs[vifi].v_bytes_in;
493f4b3ec61Sdh155122 	req->obytes = ipst->ips_vifs[vifi].v_bytes_out;
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 	return (0);
4967c478bd9Sstevel@tonic-gate }
4977c478bd9Sstevel@tonic-gate 
4987c478bd9Sstevel@tonic-gate static int
get_version(uchar_t * data)4997c478bd9Sstevel@tonic-gate get_version(uchar_t *data)
5007c478bd9Sstevel@tonic-gate {
5017c478bd9Sstevel@tonic-gate 	int *v = (int *)data;
5027c478bd9Sstevel@tonic-gate 
5037c478bd9Sstevel@tonic-gate 	*v = 0x0305;	/* XXX !!!! */
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 	return (0);
5067c478bd9Sstevel@tonic-gate }
5077c478bd9Sstevel@tonic-gate 
5087c478bd9Sstevel@tonic-gate /*
5097c478bd9Sstevel@tonic-gate  * Set PIM assert processing global.
5107c478bd9Sstevel@tonic-gate  */
5117c478bd9Sstevel@tonic-gate static int
set_assert(int * i,ip_stack_t * ipst)512f4b3ec61Sdh155122 set_assert(int *i, ip_stack_t *ipst)
5137c478bd9Sstevel@tonic-gate {
5147c478bd9Sstevel@tonic-gate 	if ((*i != 1) && (*i != 0))
5157c478bd9Sstevel@tonic-gate 		return (EINVAL);
5167c478bd9Sstevel@tonic-gate 
517f4b3ec61Sdh155122 	ipst->ips_pim_assert = *i;
5187c478bd9Sstevel@tonic-gate 
5197c478bd9Sstevel@tonic-gate 	return (0);
5207c478bd9Sstevel@tonic-gate }
5217c478bd9Sstevel@tonic-gate 
5227c478bd9Sstevel@tonic-gate /*
5237c478bd9Sstevel@tonic-gate  * Get PIM assert processing global.
5247c478bd9Sstevel@tonic-gate  */
5257c478bd9Sstevel@tonic-gate static int
get_assert(uchar_t * data,ip_stack_t * ipst)526f4b3ec61Sdh155122 get_assert(uchar_t *data, ip_stack_t *ipst)
5277c478bd9Sstevel@tonic-gate {
5287c478bd9Sstevel@tonic-gate 	int *i = (int *)data;
5297c478bd9Sstevel@tonic-gate 
530f4b3ec61Sdh155122 	*i = ipst->ips_pim_assert;
5317c478bd9Sstevel@tonic-gate 
5327c478bd9Sstevel@tonic-gate 	return (0);
5337c478bd9Sstevel@tonic-gate }
5347c478bd9Sstevel@tonic-gate 
5357c478bd9Sstevel@tonic-gate /*
5367c478bd9Sstevel@tonic-gate  * Enable multicast routing.
5377c478bd9Sstevel@tonic-gate  */
5387c478bd9Sstevel@tonic-gate static int
ip_mrouter_init(conn_t * connp,uchar_t * data,int datalen,ip_stack_t * ipst)539fc80c0dfSnordmark ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst)
5407c478bd9Sstevel@tonic-gate {
5417c478bd9Sstevel@tonic-gate 	int	*v;
5427c478bd9Sstevel@tonic-gate 
5437c478bd9Sstevel@tonic-gate 	if (data == NULL || (datalen != sizeof (int)))
5447c478bd9Sstevel@tonic-gate 		return (ENOPROTOOPT);
5457c478bd9Sstevel@tonic-gate 
5467c478bd9Sstevel@tonic-gate 	v = (int *)data;
5477c478bd9Sstevel@tonic-gate 	if (*v != 1)
5487c478bd9Sstevel@tonic-gate 		return (ENOPROTOOPT);
5497c478bd9Sstevel@tonic-gate 
550f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
551f4b3ec61Sdh155122 	if (ipst->ips_ip_g_mrouter != NULL) {
552f4b3ec61Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5537c478bd9Sstevel@tonic-gate 		return (EADDRINUSE);
5547c478bd9Sstevel@tonic-gate 	}
5557c478bd9Sstevel@tonic-gate 
556fc80c0dfSnordmark 	/*
557fc80c0dfSnordmark 	 * MRT_INIT should only be allowed for RAW sockets, but we double
558fc80c0dfSnordmark 	 * check.
559fc80c0dfSnordmark 	 */
560fc80c0dfSnordmark 	if (!IPCL_IS_RAWIP(connp)) {
561fc80c0dfSnordmark 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
562fc80c0dfSnordmark 		return (EINVAL);
563fc80c0dfSnordmark 	}
564fc80c0dfSnordmark 
565fc80c0dfSnordmark 	ipst->ips_ip_g_mrouter = connp;
5667c478bd9Sstevel@tonic-gate 	connp->conn_multi_router = 1;
5677c478bd9Sstevel@tonic-gate 	/* In order for tunnels to work we have to turn ip_g_forward on */
568f4b3ec61Sdh155122 	if (!WE_ARE_FORWARDING(ipst)) {
569f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
570fc80c0dfSnordmark 			(void) mi_strlog(connp->conn_rq, 1, SL_TRACE,
5717c478bd9Sstevel@tonic-gate 			    "ip_mrouter_init: turning on forwarding");
5727c478bd9Sstevel@tonic-gate 		}
5736e91bba0SGirish Moodalbail 		ipst->ips_saved_ip_forwarding = ipst->ips_ip_forwarding;
5746e91bba0SGirish Moodalbail 		ipst->ips_ip_forwarding = IP_FORWARD_ALWAYS;
5757c478bd9Sstevel@tonic-gate 	}
5767c478bd9Sstevel@tonic-gate 
577f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5787c478bd9Sstevel@tonic-gate 	return (0);
5797c478bd9Sstevel@tonic-gate }
5807c478bd9Sstevel@tonic-gate 
581f4b3ec61Sdh155122 void
ip_mrouter_stack_init(ip_stack_t * ipst)582f4b3ec61Sdh155122 ip_mrouter_stack_init(ip_stack_t *ipst)
583f4b3ec61Sdh155122 {
584f4b3ec61Sdh155122 	mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL);
585f4b3ec61Sdh155122 
586f4b3ec61Sdh155122 	ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1),
587f4b3ec61Sdh155122 	    KM_SLEEP);
588f4b3ec61Sdh155122 	ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP);
589f4b3ec61Sdh155122 	/*
590f4b3ec61Sdh155122 	 * mfctable:
591f4b3ec61Sdh155122 	 * Includes all mfcs, including waiting upcalls.
592f4b3ec61Sdh155122 	 * Multiple mfcs per bucket.
593f4b3ec61Sdh155122 	 */
594f4b3ec61Sdh155122 	ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ,
595f4b3ec61Sdh155122 	    KM_SLEEP);
596f4b3ec61Sdh155122 	/*
597f4b3ec61Sdh155122 	 * Define the token bucket filter structures.
598f4b3ec61Sdh155122 	 * tbftable -> each vif has one of these for storing info.
599f4b3ec61Sdh155122 	 */
600f4b3ec61Sdh155122 	ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP);
601f4b3ec61Sdh155122 
602f4b3ec61Sdh155122 	mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL);
603f4b3ec61Sdh155122 
604f4b3ec61Sdh155122 	ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
605f4b3ec61Sdh155122 	ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
606f4b3ec61Sdh155122 }
607f4b3ec61Sdh155122 
6087c478bd9Sstevel@tonic-gate /*
6097c478bd9Sstevel@tonic-gate  * Disable multicast routing.
6107c478bd9Sstevel@tonic-gate  * Didn't use global timeout_val (BSD version), instead check the mfctable.
6117c478bd9Sstevel@tonic-gate  */
6127c478bd9Sstevel@tonic-gate int
ip_mrouter_done(ip_stack_t * ipst)613bd670b35SErik Nordmark ip_mrouter_done(ip_stack_t *ipst)
6147c478bd9Sstevel@tonic-gate {
615fc80c0dfSnordmark 	conn_t		*mrouter;
6167c478bd9Sstevel@tonic-gate 	vifi_t		vifi;
6177c478bd9Sstevel@tonic-gate 	struct mfc	*mfc_rt;
6187c478bd9Sstevel@tonic-gate 	int		i;
6197c478bd9Sstevel@tonic-gate 
620f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
621f4b3ec61Sdh155122 	if (ipst->ips_ip_g_mrouter == NULL) {
622f4b3ec61Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
6237c478bd9Sstevel@tonic-gate 		return (EINVAL);
6247c478bd9Sstevel@tonic-gate 	}
6257c478bd9Sstevel@tonic-gate 
626fc80c0dfSnordmark 	mrouter = ipst->ips_ip_g_mrouter;
6277c478bd9Sstevel@tonic-gate 
6286e91bba0SGirish Moodalbail 	if (ipst->ips_saved_ip_forwarding != -1) {
629f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
630fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
6317c478bd9Sstevel@tonic-gate 			    "ip_mrouter_done: turning off forwarding");
6327c478bd9Sstevel@tonic-gate 		}
6336e91bba0SGirish Moodalbail 		ipst->ips_ip_forwarding = ipst->ips_saved_ip_forwarding;
6346e91bba0SGirish Moodalbail 		ipst->ips_saved_ip_forwarding = -1;
6357c478bd9Sstevel@tonic-gate 	}
6367c478bd9Sstevel@tonic-gate 
6377c478bd9Sstevel@tonic-gate 	/*
6387c478bd9Sstevel@tonic-gate 	 * Always clear cache when vifs change.
639f4b3ec61Sdh155122 	 * No need to get ipst->ips_last_encap_lock since we are running as
640f4b3ec61Sdh155122 	 * a writer.
6417c478bd9Sstevel@tonic-gate 	 */
642f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
643f4b3ec61Sdh155122 	ipst->ips_last_encap_src = 0;
644f4b3ec61Sdh155122 	ipst->ips_last_encap_vif = NULL;
645f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
646fc80c0dfSnordmark 	mrouter->conn_multi_router = 0;
6477c478bd9Sstevel@tonic-gate 
648f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
6497c478bd9Sstevel@tonic-gate 
6507c478bd9Sstevel@tonic-gate 	/*
6517c478bd9Sstevel@tonic-gate 	 * For each phyint in use,
6527c478bd9Sstevel@tonic-gate 	 * disable promiscuous reception of all IP multicasts.
6537c478bd9Sstevel@tonic-gate 	 */
6547c478bd9Sstevel@tonic-gate 	for (vifi = 0; vifi < MAXVIFS; vifi++) {
655f4b3ec61Sdh155122 		struct vif *vifp = ipst->ips_vifs + vifi;
6567c478bd9Sstevel@tonic-gate 
6577c478bd9Sstevel@tonic-gate 		mutex_enter(&vifp->v_lock);
6587c478bd9Sstevel@tonic-gate 		/*
6597c478bd9Sstevel@tonic-gate 		 * if the vif is active mark it condemned.
6607c478bd9Sstevel@tonic-gate 		 */
6617c478bd9Sstevel@tonic-gate 		if (vifp->v_marks & VIF_MARK_GOOD) {
6627c478bd9Sstevel@tonic-gate 			ASSERT(vifp->v_ipif != NULL);
6637c478bd9Sstevel@tonic-gate 			ipif_refhold(vifp->v_ipif);
6647c478bd9Sstevel@tonic-gate 			/* Phyint only */
6657c478bd9Sstevel@tonic-gate 			if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
6667c478bd9Sstevel@tonic-gate 				ipif_t *ipif = vifp->v_ipif;
667bd670b35SErik Nordmark 				ilm_t *ilm = vifp->v_ilm;
6687c478bd9Sstevel@tonic-gate 
669bd670b35SErik Nordmark 				vifp->v_ilm = NULL;
6707c478bd9Sstevel@tonic-gate 				vifp->v_marks &= ~VIF_MARK_GOOD;
6717c478bd9Sstevel@tonic-gate 				vifp->v_marks |= VIF_MARK_CONDEMNED;
6727c478bd9Sstevel@tonic-gate 
673bd670b35SErik Nordmark 				mutex_exit(&(vifp)->v_lock);
674bd670b35SErik Nordmark 				if (ilm != NULL) {
675bd670b35SErik Nordmark 					ill_t *ill = ipif->ipif_ill;
676bd670b35SErik Nordmark 
677bd670b35SErik Nordmark 					(void) ip_delmulti(ilm);
678bd670b35SErik Nordmark 					ASSERT(ill->ill_mrouter_cnt > 0);
679bd670b35SErik Nordmark 					atomic_dec_32(&ill->ill_mrouter_cnt);
6807c478bd9Sstevel@tonic-gate 				}
6817c478bd9Sstevel@tonic-gate 				mutex_enter(&vifp->v_lock);
6827c478bd9Sstevel@tonic-gate 			}
6837ba7860fSErik Nordmark 			ipif_refrele(vifp->v_ipif);
6847c478bd9Sstevel@tonic-gate 			/*
6857c478bd9Sstevel@tonic-gate 			 * decreases the refcnt added in add_vif.
6867c478bd9Sstevel@tonic-gate 			 * and release v_lock.
6877c478bd9Sstevel@tonic-gate 			 */
6887c478bd9Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
6897c478bd9Sstevel@tonic-gate 		} else {
6907c478bd9Sstevel@tonic-gate 			mutex_exit(&vifp->v_lock);
6917c478bd9Sstevel@tonic-gate 			continue;
6927c478bd9Sstevel@tonic-gate 		}
6937c478bd9Sstevel@tonic-gate 	}
6947c478bd9Sstevel@tonic-gate 
695f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
696f4b3ec61Sdh155122 	ipst->ips_numvifs = 0;
697f4b3ec61Sdh155122 	ipst->ips_pim_assert = 0;
698f4b3ec61Sdh155122 	ipst->ips_reg_vif_num = ALL_VIFS;
699f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
7007c478bd9Sstevel@tonic-gate 
7017c478bd9Sstevel@tonic-gate 	/*
7027c478bd9Sstevel@tonic-gate 	 * Free upcall msgs.
7037c478bd9Sstevel@tonic-gate 	 * Go through mfctable and stop any outstanding upcall
7047c478bd9Sstevel@tonic-gate 	 * timeouts remaining on mfcs.
7057c478bd9Sstevel@tonic-gate 	 */
7067c478bd9Sstevel@tonic-gate 	for (i = 0; i < MFCTBLSIZ; i++) {
707f4b3ec61Sdh155122 		mutex_enter(&ipst->ips_mfcs[i].mfcb_lock);
708f4b3ec61Sdh155122 		ipst->ips_mfcs[i].mfcb_refcnt++;
709f4b3ec61Sdh155122 		ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED;
710f4b3ec61Sdh155122 		mutex_exit(&ipst->ips_mfcs[i].mfcb_lock);
711f4b3ec61Sdh155122 		mfc_rt = ipst->ips_mfcs[i].mfcb_mfc;
7127c478bd9Sstevel@tonic-gate 		while (mfc_rt) {
7137c478bd9Sstevel@tonic-gate 			/* Free upcalls */
7147c478bd9Sstevel@tonic-gate 			mutex_enter(&mfc_rt->mfc_mutex);
7157c478bd9Sstevel@tonic-gate 			if (mfc_rt->mfc_rte != NULL) {
7167c478bd9Sstevel@tonic-gate 				if (mfc_rt->mfc_timeout_id != 0) {
7177c478bd9Sstevel@tonic-gate 					/*
7187c478bd9Sstevel@tonic-gate 					 * OK to drop the lock as we have
7197c478bd9Sstevel@tonic-gate 					 * a refcnt on the bucket. timeout
7207c478bd9Sstevel@tonic-gate 					 * can fire but it will see that
7217c478bd9Sstevel@tonic-gate 					 * mfc_timeout_id == 0 and not do
7227c478bd9Sstevel@tonic-gate 					 * anything. see expire_upcalls().
7237c478bd9Sstevel@tonic-gate 					 */
7247c478bd9Sstevel@tonic-gate 					mfc_rt->mfc_timeout_id = 0;
7257c478bd9Sstevel@tonic-gate 					mutex_exit(&mfc_rt->mfc_mutex);
7267c478bd9Sstevel@tonic-gate 					(void) untimeout(
7277c478bd9Sstevel@tonic-gate 					    mfc_rt->mfc_timeout_id);
7287c478bd9Sstevel@tonic-gate 					mfc_rt->mfc_timeout_id = 0;
7297c478bd9Sstevel@tonic-gate 					mutex_enter(&mfc_rt->mfc_mutex);
7307c478bd9Sstevel@tonic-gate 
7317c478bd9Sstevel@tonic-gate 					/*
7327c478bd9Sstevel@tonic-gate 					 * all queued upcall packets
7337c478bd9Sstevel@tonic-gate 					 * and mblk will be freed in
7347c478bd9Sstevel@tonic-gate 					 * release_mfc().
7357c478bd9Sstevel@tonic-gate 					 */
7367c478bd9Sstevel@tonic-gate 				}
7377c478bd9Sstevel@tonic-gate 			}
7387c478bd9Sstevel@tonic-gate 
7397c478bd9Sstevel@tonic-gate 			mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
7407c478bd9Sstevel@tonic-gate 
7417c478bd9Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
7427c478bd9Sstevel@tonic-gate 			mfc_rt = mfc_rt->mfc_next;
7437c478bd9Sstevel@tonic-gate 		}
744f4b3ec61Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
7457c478bd9Sstevel@tonic-gate 	}
7467c478bd9Sstevel@tonic-gate 
747f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
748f4b3ec61Sdh155122 	ipst->ips_ip_g_mrouter = NULL;
749f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
7507c478bd9Sstevel@tonic-gate 	return (0);
7517c478bd9Sstevel@tonic-gate }
7527c478bd9Sstevel@tonic-gate 
753f4b3ec61Sdh155122 void
ip_mrouter_stack_destroy(ip_stack_t * ipst)754f4b3ec61Sdh155122 ip_mrouter_stack_destroy(ip_stack_t *ipst)
755f4b3ec61Sdh155122 {
756f4b3ec61Sdh155122 	struct mfcb *mfcbp;
757f4b3ec61Sdh155122 	struct mfc  *rt;
758f4b3ec61Sdh155122 	int i;
759f4b3ec61Sdh155122 
760f4b3ec61Sdh155122 	for (i = 0; i < MFCTBLSIZ; i++) {
761f4b3ec61Sdh155122 		mfcbp = &ipst->ips_mfcs[i];
762f4b3ec61Sdh155122 
763f4b3ec61Sdh155122 		while ((rt = mfcbp->mfcb_mfc) != NULL) {
764f4b3ec61Sdh155122 			(void) printf("ip_mrouter_stack_destroy: free for %d\n",
765f4b3ec61Sdh155122 			    i);
766f4b3ec61Sdh155122 
767f4b3ec61Sdh155122 			mfcbp->mfcb_mfc = rt->mfc_next;
768f4b3ec61Sdh155122 			free_queue(rt);
769f4b3ec61Sdh155122 			mi_free(rt);
770f4b3ec61Sdh155122 		}
771f4b3ec61Sdh155122 	}
772f4b3ec61Sdh155122 	kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1));
773f4b3ec61Sdh155122 	ipst->ips_vifs = NULL;
774f4b3ec61Sdh155122 	kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat));
775f4b3ec61Sdh155122 	ipst->ips_mrtstat = NULL;
776f4b3ec61Sdh155122 	kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ);
777f4b3ec61Sdh155122 	ipst->ips_mfcs = NULL;
778f4b3ec61Sdh155122 	kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS);
779f4b3ec61Sdh155122 	ipst->ips_tbfs = NULL;
780f4b3ec61Sdh155122 
781f4b3ec61Sdh155122 	mutex_destroy(&ipst->ips_last_encap_lock);
782f4b3ec61Sdh155122 	mutex_destroy(&ipst->ips_ip_g_mrouter_mutex);
783f4b3ec61Sdh155122 }
784f4b3ec61Sdh155122 
7857c478bd9Sstevel@tonic-gate static boolean_t
is_mrouter_off(ip_stack_t * ipst)786f4b3ec61Sdh155122 is_mrouter_off(ip_stack_t *ipst)
7877c478bd9Sstevel@tonic-gate {
788fc80c0dfSnordmark 	conn_t	*mrouter;
7897c478bd9Sstevel@tonic-gate 
790f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
791f4b3ec61Sdh155122 	if (ipst->ips_ip_g_mrouter == NULL) {
792f4b3ec61Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
7937c478bd9Sstevel@tonic-gate 		return (B_TRUE);
7947c478bd9Sstevel@tonic-gate 	}
7957c478bd9Sstevel@tonic-gate 
796fc80c0dfSnordmark 	mrouter = ipst->ips_ip_g_mrouter;
797fc80c0dfSnordmark 	if (mrouter->conn_multi_router == 0) {
798f4b3ec61Sdh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
7997c478bd9Sstevel@tonic-gate 		return (B_TRUE);
8007c478bd9Sstevel@tonic-gate 	}
801f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
8027c478bd9Sstevel@tonic-gate 	return (B_FALSE);
8037c478bd9Sstevel@tonic-gate }
8047c478bd9Sstevel@tonic-gate 
8057c478bd9Sstevel@tonic-gate static void
unlock_good_vif(struct vif * vifp)8067c478bd9Sstevel@tonic-gate unlock_good_vif(struct vif *vifp)
8077c478bd9Sstevel@tonic-gate {
8087c478bd9Sstevel@tonic-gate 	ASSERT(vifp->v_ipif != NULL);
8097c478bd9Sstevel@tonic-gate 	ipif_refrele(vifp->v_ipif);
8107c478bd9Sstevel@tonic-gate 	VIF_REFRELE(vifp);
8117c478bd9Sstevel@tonic-gate }
8127c478bd9Sstevel@tonic-gate 
8137c478bd9Sstevel@tonic-gate static boolean_t
lock_good_vif(struct vif * vifp)8147c478bd9Sstevel@tonic-gate lock_good_vif(struct vif *vifp)
8157c478bd9Sstevel@tonic-gate {
8167c478bd9Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
8177c478bd9Sstevel@tonic-gate 	if (!(vifp->v_marks & VIF_MARK_GOOD)) {
8187c478bd9Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
8197c478bd9Sstevel@tonic-gate 		return (B_FALSE);
8207c478bd9Sstevel@tonic-gate 	}
8217c478bd9Sstevel@tonic-gate 
8227c478bd9Sstevel@tonic-gate 	ASSERT(vifp->v_ipif != NULL);
8237c478bd9Sstevel@tonic-gate 	mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock);
8247c478bd9Sstevel@tonic-gate 	if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) {
8257c478bd9Sstevel@tonic-gate 		mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
8267c478bd9Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
8277c478bd9Sstevel@tonic-gate 		return (B_FALSE);
8287c478bd9Sstevel@tonic-gate 	}
8297c478bd9Sstevel@tonic-gate 	ipif_refhold_locked(vifp->v_ipif);
8307c478bd9Sstevel@tonic-gate 	mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
8317c478bd9Sstevel@tonic-gate 	vifp->v_refcnt++;
8327c478bd9Sstevel@tonic-gate 	mutex_exit(&vifp->v_lock);
8337c478bd9Sstevel@tonic-gate 	return (B_TRUE);
8347c478bd9Sstevel@tonic-gate }
8357c478bd9Sstevel@tonic-gate 
8367c478bd9Sstevel@tonic-gate /*
8377c478bd9Sstevel@tonic-gate  * Add a vif to the vif table.
8387c478bd9Sstevel@tonic-gate  */
8397c478bd9Sstevel@tonic-gate static int
add_vif(struct vifctl * vifcp,conn_t * connp,ip_stack_t * ipst)840bd670b35SErik Nordmark add_vif(struct vifctl *vifcp, conn_t *connp, ip_stack_t *ipst)
8417c478bd9Sstevel@tonic-gate {
842f4b3ec61Sdh155122 	struct vif	*vifp = ipst->ips_vifs + vifcp->vifc_vifi;
8437c478bd9Sstevel@tonic-gate 	ipif_t		*ipif;
844bd670b35SErik Nordmark 	int		error = 0;
845f4b3ec61Sdh155122 	struct tbf	*v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi;
846fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
847bd670b35SErik Nordmark 	ilm_t		*ilm;
848bd670b35SErik Nordmark 	ill_t		*ill;
8497c478bd9Sstevel@tonic-gate 
8507c478bd9Sstevel@tonic-gate 	ASSERT(connp != NULL);
8517c478bd9Sstevel@tonic-gate 
8527c478bd9Sstevel@tonic-gate 	if (vifcp->vifc_vifi >= MAXVIFS)
8537c478bd9Sstevel@tonic-gate 		return (EINVAL);
8547c478bd9Sstevel@tonic-gate 
855f4b3ec61Sdh155122 	if (is_mrouter_off(ipst))
8567c478bd9Sstevel@tonic-gate 		return (EINVAL);
8577c478bd9Sstevel@tonic-gate 
8587c478bd9Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
8597c478bd9Sstevel@tonic-gate 	/*
8607c478bd9Sstevel@tonic-gate 	 * Viftable entry should be 0.
8617c478bd9Sstevel@tonic-gate 	 * if v_marks == 0 but v_refcnt != 0 means struct is being
8627c478bd9Sstevel@tonic-gate 	 * initialized.
8637c478bd9Sstevel@tonic-gate 	 *
8647c478bd9Sstevel@tonic-gate 	 * Also note that it is very unlikely that we will get a MRT_ADD_VIF
8657c478bd9Sstevel@tonic-gate 	 * request while the delete is in progress, mrouted only sends add
8667c478bd9Sstevel@tonic-gate 	 * requests when a new interface is added and the new interface cannot
8677c478bd9Sstevel@tonic-gate 	 * have the same vifi as an existing interface. We make sure that
8687c478bd9Sstevel@tonic-gate 	 * ill_delete will block till the vif is deleted by adding a refcnt
8697c478bd9Sstevel@tonic-gate 	 * to ipif in del_vif().
8707c478bd9Sstevel@tonic-gate 	 */
8717c478bd9Sstevel@tonic-gate 	if (vifp->v_lcl_addr.s_addr != 0 ||
8727c478bd9Sstevel@tonic-gate 	    vifp->v_marks != 0 ||
8737c478bd9Sstevel@tonic-gate 	    vifp->v_refcnt != 0) {
8747c478bd9Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
8757c478bd9Sstevel@tonic-gate 		return (EADDRINUSE);
8767c478bd9Sstevel@tonic-gate 	}
8777c478bd9Sstevel@tonic-gate 
8787c478bd9Sstevel@tonic-gate 	/* Incoming vif should not be 0 */
8797c478bd9Sstevel@tonic-gate 	if (vifcp->vifc_lcl_addr.s_addr == 0) {
8807c478bd9Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
8817c478bd9Sstevel@tonic-gate 		return (EINVAL);
8827c478bd9Sstevel@tonic-gate 	}
8837c478bd9Sstevel@tonic-gate 
8847c478bd9Sstevel@tonic-gate 	vifp->v_refcnt++;
8857c478bd9Sstevel@tonic-gate 	mutex_exit(&vifp->v_lock);
8867c478bd9Sstevel@tonic-gate 	/* Find the interface with the local address */
8877c478bd9Sstevel@tonic-gate 	ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL,
888bd670b35SErik Nordmark 	    IPCL_ZONEID(connp), ipst);
8897c478bd9Sstevel@tonic-gate 	if (ipif == NULL) {
8907c478bd9Sstevel@tonic-gate 		VIF_REFRELE(vifp);
8917c478bd9Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
8927c478bd9Sstevel@tonic-gate 	}
8937c478bd9Sstevel@tonic-gate 
894f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
895fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
8967c478bd9Sstevel@tonic-gate 		    "add_vif: src 0x%x enter",
8977c478bd9Sstevel@tonic-gate 		    vifcp->vifc_lcl_addr.s_addr);
8987c478bd9Sstevel@tonic-gate 	}
8997c478bd9Sstevel@tonic-gate 
9007c478bd9Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
9017c478bd9Sstevel@tonic-gate 	/*
9027c478bd9Sstevel@tonic-gate 	 * Always clear cache when vifs change.
9037c478bd9Sstevel@tonic-gate 	 * Needed to ensure that src isn't left over from before vif was added.
9047c478bd9Sstevel@tonic-gate 	 * No need to get last_encap_lock, since we are running as a writer.
9057c478bd9Sstevel@tonic-gate 	 */
9067c478bd9Sstevel@tonic-gate 
907f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
908f4b3ec61Sdh155122 	ipst->ips_last_encap_src = 0;
909f4b3ec61Sdh155122 	ipst->ips_last_encap_vif = NULL;
910f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
9117c478bd9Sstevel@tonic-gate 
9127c478bd9Sstevel@tonic-gate 	if (vifcp->vifc_flags & VIFF_TUNNEL) {
9137c478bd9Sstevel@tonic-gate 		if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) {
9147c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
9157c478bd9Sstevel@tonic-gate 			    "add_vif: source route tunnels not supported\n");
9167c478bd9Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
9177c478bd9Sstevel@tonic-gate 			ipif_refrele(ipif);
9187c478bd9Sstevel@tonic-gate 			return (EOPNOTSUPP);
9197c478bd9Sstevel@tonic-gate 		}
9207c478bd9Sstevel@tonic-gate 		vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
9217c478bd9Sstevel@tonic-gate 
9227c478bd9Sstevel@tonic-gate 	} else {
9237c478bd9Sstevel@tonic-gate 		/* Phyint or Register vif */
9247c478bd9Sstevel@tonic-gate 		if (vifcp->vifc_flags & VIFF_REGISTER) {
9257c478bd9Sstevel@tonic-gate 			/*
9267c478bd9Sstevel@tonic-gate 			 * Note: Since all IPPROTO_IP level options (including
9277c478bd9Sstevel@tonic-gate 			 * MRT_ADD_VIF) are done exclusively via
9287c478bd9Sstevel@tonic-gate 			 * ip_optmgmt_writer(), a lock is not necessary to
9297c478bd9Sstevel@tonic-gate 			 * protect reg_vif_num.
9307c478bd9Sstevel@tonic-gate 			 */
931f4b3ec61Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
932f4b3ec61Sdh155122 			if (ipst->ips_reg_vif_num == ALL_VIFS) {
933f4b3ec61Sdh155122 				ipst->ips_reg_vif_num = vifcp->vifc_vifi;
934f4b3ec61Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9357c478bd9Sstevel@tonic-gate 			} else {
936f4b3ec61Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9377c478bd9Sstevel@tonic-gate 				VIF_REFRELE_LOCKED(vifp);
9387c478bd9Sstevel@tonic-gate 				ipif_refrele(ipif);
9397c478bd9Sstevel@tonic-gate 				return (EADDRINUSE);
9407c478bd9Sstevel@tonic-gate 			}
9417c478bd9Sstevel@tonic-gate 		}
9427c478bd9Sstevel@tonic-gate 
9437c478bd9Sstevel@tonic-gate 		/* Make sure the interface supports multicast */
9447c478bd9Sstevel@tonic-gate 		if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) {
9457c478bd9Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
9467c478bd9Sstevel@tonic-gate 			ipif_refrele(ipif);
9477c478bd9Sstevel@tonic-gate 			if (vifcp->vifc_flags & VIFF_REGISTER) {
948f4b3ec61Sdh155122 				mutex_enter(&ipst->ips_numvifs_mutex);
949f4b3ec61Sdh155122 				ipst->ips_reg_vif_num = ALL_VIFS;
950f4b3ec61Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9517c478bd9Sstevel@tonic-gate 			}
9527c478bd9Sstevel@tonic-gate 			return (EOPNOTSUPP);
9537c478bd9Sstevel@tonic-gate 		}
9547c478bd9Sstevel@tonic-gate 		/* Enable promiscuous reception of all IP mcasts from the if */
9557c478bd9Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
956bd670b35SErik Nordmark 
957bd670b35SErik Nordmark 		ill = ipif->ipif_ill;
958bd670b35SErik Nordmark 		if (IS_UNDER_IPMP(ill))
959bd670b35SErik Nordmark 			ill = ipmp_ill_hold_ipmp_ill(ill);
960bd670b35SErik Nordmark 
961bd670b35SErik Nordmark 		if (ill == NULL) {
962bd670b35SErik Nordmark 			ilm = NULL;
963bd670b35SErik Nordmark 		} else {
964bd670b35SErik Nordmark 			ilm = ip_addmulti(&ipv6_all_zeros, ill,
965bd670b35SErik Nordmark 			    ipif->ipif_zoneid, &error);
966bd670b35SErik Nordmark 			if (ilm != NULL)
967bd670b35SErik Nordmark 				atomic_inc_32(&ill->ill_mrouter_cnt);
968bd670b35SErik Nordmark 			if (IS_UNDER_IPMP(ipif->ipif_ill)) {
969bd670b35SErik Nordmark 				ill_refrele(ill);
970bd670b35SErik Nordmark 				ill = ipif->ipif_ill;
971bd670b35SErik Nordmark 			}
972bd670b35SErik Nordmark 		}
973bd670b35SErik Nordmark 
9747c478bd9Sstevel@tonic-gate 		mutex_enter(&vifp->v_lock);
9757c478bd9Sstevel@tonic-gate 		/*
9767c478bd9Sstevel@tonic-gate 		 * since we released the lock lets make sure that
9777c478bd9Sstevel@tonic-gate 		 * ip_mrouter_done() has not been called.
9787c478bd9Sstevel@tonic-gate 		 */
979bd670b35SErik Nordmark 		if (ilm == NULL || is_mrouter_off(ipst)) {
980bd670b35SErik Nordmark 			if (ilm != NULL) {
981bd670b35SErik Nordmark 				(void) ip_delmulti(ilm);
982bd670b35SErik Nordmark 				ASSERT(ill->ill_mrouter_cnt > 0);
983bd670b35SErik Nordmark 				atomic_dec_32(&ill->ill_mrouter_cnt);
984bd670b35SErik Nordmark 			}
9857c478bd9Sstevel@tonic-gate 			if (vifcp->vifc_flags & VIFF_REGISTER) {
986f4b3ec61Sdh155122 				mutex_enter(&ipst->ips_numvifs_mutex);
987f4b3ec61Sdh155122 				ipst->ips_reg_vif_num = ALL_VIFS;
988f4b3ec61Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
9897c478bd9Sstevel@tonic-gate 			}
9907c478bd9Sstevel@tonic-gate 			VIF_REFRELE_LOCKED(vifp);
9917c478bd9Sstevel@tonic-gate 			ipif_refrele(ipif);
9927c478bd9Sstevel@tonic-gate 			return (error?error:EINVAL);
9937c478bd9Sstevel@tonic-gate 		}
994bd670b35SErik Nordmark 		vifp->v_ilm = ilm;
9957c478bd9Sstevel@tonic-gate 	}
9967c478bd9Sstevel@tonic-gate 	/* Define parameters for the tbf structure */
9977c478bd9Sstevel@tonic-gate 	vifp->v_tbf = v_tbf;
9987c478bd9Sstevel@tonic-gate 	gethrestime(&vifp->v_tbf->tbf_last_pkt_t);
9997c478bd9Sstevel@tonic-gate 	vifp->v_tbf->tbf_n_tok = 0;
10007c478bd9Sstevel@tonic-gate 	vifp->v_tbf->tbf_q_len = 0;
10017c478bd9Sstevel@tonic-gate 	vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
10027c478bd9Sstevel@tonic-gate 	vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 	vifp->v_flags = vifcp->vifc_flags;
10057c478bd9Sstevel@tonic-gate 	vifp->v_threshold = vifcp->vifc_threshold;
10067c478bd9Sstevel@tonic-gate 	vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
10077c478bd9Sstevel@tonic-gate 	vifp->v_ipif = ipif;
10087c478bd9Sstevel@tonic-gate 	ipif_refrele(ipif);
10097c478bd9Sstevel@tonic-gate 	/* Scaling up here, allows division by 1024 in critical code.	*/
10107c478bd9Sstevel@tonic-gate 	vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000);
10117c478bd9Sstevel@tonic-gate 	vifp->v_timeout_id = 0;
10127c478bd9Sstevel@tonic-gate 	/* initialize per vif pkt counters */
10137c478bd9Sstevel@tonic-gate 	vifp->v_pkt_in = 0;
10147c478bd9Sstevel@tonic-gate 	vifp->v_pkt_out = 0;
10157c478bd9Sstevel@tonic-gate 	vifp->v_bytes_in = 0;
10167c478bd9Sstevel@tonic-gate 	vifp->v_bytes_out = 0;
10177c478bd9Sstevel@tonic-gate 	mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL);
10187c478bd9Sstevel@tonic-gate 
10197c478bd9Sstevel@tonic-gate 	/* Adjust numvifs up, if the vifi is higher than numvifs */
1020f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
1021f4b3ec61Sdh155122 	if (ipst->ips_numvifs <= vifcp->vifc_vifi)
1022f4b3ec61Sdh155122 		ipst->ips_numvifs = vifcp->vifc_vifi + 1;
1023f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
10247c478bd9Sstevel@tonic-gate 
1025f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
1026fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
10277c478bd9Sstevel@tonic-gate 		    "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d",
10287c478bd9Sstevel@tonic-gate 		    vifcp->vifc_vifi,
10297c478bd9Sstevel@tonic-gate 		    ntohl(vifcp->vifc_lcl_addr.s_addr),
10307c478bd9Sstevel@tonic-gate 		    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
10317c478bd9Sstevel@tonic-gate 		    ntohl(vifcp->vifc_rmt_addr.s_addr),
10327c478bd9Sstevel@tonic-gate 		    vifcp->vifc_threshold, vifcp->vifc_rate_limit);
10337c478bd9Sstevel@tonic-gate 	}
10347c478bd9Sstevel@tonic-gate 
10357c478bd9Sstevel@tonic-gate 	vifp->v_marks = VIF_MARK_GOOD;
10367c478bd9Sstevel@tonic-gate 	mutex_exit(&vifp->v_lock);
10377c478bd9Sstevel@tonic-gate 	return (0);
10387c478bd9Sstevel@tonic-gate }
10397c478bd9Sstevel@tonic-gate 
10407c478bd9Sstevel@tonic-gate 
10417c478bd9Sstevel@tonic-gate /* Delete a vif from the vif table. */
10427c478bd9Sstevel@tonic-gate static void
del_vifp(struct vif * vifp)10437c478bd9Sstevel@tonic-gate del_vifp(struct vif *vifp)
10447c478bd9Sstevel@tonic-gate {
10457c478bd9Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
10467c478bd9Sstevel@tonic-gate 	mblk_t  *mp0;
10477c478bd9Sstevel@tonic-gate 	vifi_t  vifi;
1048f4b3ec61Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
1049fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
10507c478bd9Sstevel@tonic-gate 
10517c478bd9Sstevel@tonic-gate 	ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED);
10527c478bd9Sstevel@tonic-gate 	ASSERT(t != NULL);
10537c478bd9Sstevel@tonic-gate 
1054f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
1055fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
10567c478bd9Sstevel@tonic-gate 		    "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr);
10577c478bd9Sstevel@tonic-gate 	}
10587c478bd9Sstevel@tonic-gate 
10597c478bd9Sstevel@tonic-gate 	if (vifp->v_timeout_id != 0) {
10607c478bd9Sstevel@tonic-gate 		(void) untimeout(vifp->v_timeout_id);
10617c478bd9Sstevel@tonic-gate 		vifp->v_timeout_id = 0;
10627c478bd9Sstevel@tonic-gate 	}
10637c478bd9Sstevel@tonic-gate 
10647c478bd9Sstevel@tonic-gate 	/*
10657c478bd9Sstevel@tonic-gate 	 * Free packets queued at the interface.
10667c478bd9Sstevel@tonic-gate 	 * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc.
10677c478bd9Sstevel@tonic-gate 	 */
10687c478bd9Sstevel@tonic-gate 	mutex_enter(&t->tbf_lock);
10697c478bd9Sstevel@tonic-gate 	while (t->tbf_q != NULL) {
10707c478bd9Sstevel@tonic-gate 		mp0 = t->tbf_q;
10717c478bd9Sstevel@tonic-gate 		t->tbf_q = t->tbf_q->b_next;
10727c478bd9Sstevel@tonic-gate 		mp0->b_prev = mp0->b_next = NULL;
10737c478bd9Sstevel@tonic-gate 		freemsg(mp0);
10747c478bd9Sstevel@tonic-gate 	}
10757c478bd9Sstevel@tonic-gate 	mutex_exit(&t->tbf_lock);
10767c478bd9Sstevel@tonic-gate 
10777c478bd9Sstevel@tonic-gate 	/*
10787c478bd9Sstevel@tonic-gate 	 * Always clear cache when vifs change.
10797c478bd9Sstevel@tonic-gate 	 * No need to get last_encap_lock since we are running as a writer.
10807c478bd9Sstevel@tonic-gate 	 */
1081f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
1082f4b3ec61Sdh155122 	if (vifp == ipst->ips_last_encap_vif) {
1083f4b3ec61Sdh155122 		ipst->ips_last_encap_vif = NULL;
1084f4b3ec61Sdh155122 		ipst->ips_last_encap_src = 0;
10857c478bd9Sstevel@tonic-gate 	}
1086f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
10877c478bd9Sstevel@tonic-gate 
10887c478bd9Sstevel@tonic-gate 	mutex_destroy(&t->tbf_lock);
10897c478bd9Sstevel@tonic-gate 
10907c478bd9Sstevel@tonic-gate 	bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf)));
10917c478bd9Sstevel@tonic-gate 
10927c478bd9Sstevel@tonic-gate 	/* Adjust numvifs down */
1093f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
1094f4b3ec61Sdh155122 	for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */
1095f4b3ec61Sdh155122 		if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0)
10967c478bd9Sstevel@tonic-gate 			break;
1097f4b3ec61Sdh155122 	ipst->ips_numvifs = vifi;
1098f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
10997c478bd9Sstevel@tonic-gate 
11007c478bd9Sstevel@tonic-gate 	bzero(vifp, sizeof (*vifp));
11017c478bd9Sstevel@tonic-gate }
11027c478bd9Sstevel@tonic-gate 
11037c478bd9Sstevel@tonic-gate static int
del_vif(vifi_t * vifip,ip_stack_t * ipst)1104bd670b35SErik Nordmark del_vif(vifi_t *vifip, ip_stack_t *ipst)
11057c478bd9Sstevel@tonic-gate {
1106f4b3ec61Sdh155122 	struct vif	*vifp = ipst->ips_vifs + *vifip;
11077c478bd9Sstevel@tonic-gate 
1108f4b3ec61Sdh155122 	if (*vifip >= ipst->ips_numvifs)
11097c478bd9Sstevel@tonic-gate 		return (EINVAL);
11107c478bd9Sstevel@tonic-gate 
11117c478bd9Sstevel@tonic-gate 	mutex_enter(&vifp->v_lock);
11127c478bd9Sstevel@tonic-gate 	/*
11137c478bd9Sstevel@tonic-gate 	 * Not initialized
11147c478bd9Sstevel@tonic-gate 	 * Here we are not looking at the vif that is being initialized
11157c478bd9Sstevel@tonic-gate 	 * i.e vifp->v_marks == 0 and refcnt > 0.
11167c478bd9Sstevel@tonic-gate 	 */
11177c478bd9Sstevel@tonic-gate 	if (vifp->v_lcl_addr.s_addr == 0 ||
11187c478bd9Sstevel@tonic-gate 	    !(vifp->v_marks & VIF_MARK_GOOD)) {
11197c478bd9Sstevel@tonic-gate 		mutex_exit(&vifp->v_lock);
11207c478bd9Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
11217c478bd9Sstevel@tonic-gate 	}
11227c478bd9Sstevel@tonic-gate 
11237c478bd9Sstevel@tonic-gate 	/* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */
11247c478bd9Sstevel@tonic-gate 	vifp->v_marks &= ~VIF_MARK_GOOD;
11257c478bd9Sstevel@tonic-gate 	vifp->v_marks |= VIF_MARK_CONDEMNED;
11267c478bd9Sstevel@tonic-gate 
11277c478bd9Sstevel@tonic-gate 	/* Phyint only */
11287c478bd9Sstevel@tonic-gate 	if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
11297c478bd9Sstevel@tonic-gate 		ipif_t *ipif = vifp->v_ipif;
1130bd670b35SErik Nordmark 		ilm_t *ilm = vifp->v_ilm;
1131bd670b35SErik Nordmark 
1132bd670b35SErik Nordmark 		vifp->v_ilm = NULL;
1133bd670b35SErik Nordmark 
11347c478bd9Sstevel@tonic-gate 		ASSERT(ipif != NULL);
11357c478bd9Sstevel@tonic-gate 		/*
11367c478bd9Sstevel@tonic-gate 		 * should be OK to drop the lock as we
11377c478bd9Sstevel@tonic-gate 		 * have marked this as CONDEMNED.
11387c478bd9Sstevel@tonic-gate 		 */
11397c478bd9Sstevel@tonic-gate 		mutex_exit(&(vifp)->v_lock);
1140bd670b35SErik Nordmark 		if (ilm != NULL) {
1141bd670b35SErik Nordmark 			(void) ip_delmulti(ilm);
1142bd670b35SErik Nordmark 			ASSERT(ipif->ipif_ill->ill_mrouter_cnt > 0);
1143bd670b35SErik Nordmark 			atomic_dec_32(&ipif->ipif_ill->ill_mrouter_cnt);
1144bd670b35SErik Nordmark 		}
11457c478bd9Sstevel@tonic-gate 		mutex_enter(&(vifp)->v_lock);
11467c478bd9Sstevel@tonic-gate 	}
11477c478bd9Sstevel@tonic-gate 
1148bd670b35SErik Nordmark 	if (vifp->v_flags & VIFF_REGISTER) {
1149bd670b35SErik Nordmark 		mutex_enter(&ipst->ips_numvifs_mutex);
1150bd670b35SErik Nordmark 		ipst->ips_reg_vif_num = ALL_VIFS;
1151bd670b35SErik Nordmark 		mutex_exit(&ipst->ips_numvifs_mutex);
1152bd670b35SErik Nordmark 	}
1153bd670b35SErik Nordmark 
11547c478bd9Sstevel@tonic-gate 	/*
11557c478bd9Sstevel@tonic-gate 	 * decreases the refcnt added in add_vif.
11567c478bd9Sstevel@tonic-gate 	 */
11577c478bd9Sstevel@tonic-gate 	VIF_REFRELE_LOCKED(vifp);
11587c478bd9Sstevel@tonic-gate 	return (0);
11597c478bd9Sstevel@tonic-gate }
11607c478bd9Sstevel@tonic-gate 
11617c478bd9Sstevel@tonic-gate /*
11627c478bd9Sstevel@tonic-gate  * Add an mfc entry.
11637c478bd9Sstevel@tonic-gate  */
11647c478bd9Sstevel@tonic-gate static int
add_mfc(struct mfcctl * mfccp,ip_stack_t * ipst)1165f4b3ec61Sdh155122 add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
11667c478bd9Sstevel@tonic-gate {
11677c478bd9Sstevel@tonic-gate 	struct mfc *rt;
11687c478bd9Sstevel@tonic-gate 	struct rtdetq *rte;
11697c478bd9Sstevel@tonic-gate 	ushort_t nstl;
11707c478bd9Sstevel@tonic-gate 	int i;
11717c478bd9Sstevel@tonic-gate 	struct mfcb *mfcbp;
1172fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
11737c478bd9Sstevel@tonic-gate 
11747c478bd9Sstevel@tonic-gate 	/*
11757c478bd9Sstevel@tonic-gate 	 * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted
11767c478bd9Sstevel@tonic-gate 	 * did not have a real route for pkt.
11777c478bd9Sstevel@tonic-gate 	 * We want this pkt without rt installed in the mfctable to prevent
11787c478bd9Sstevel@tonic-gate 	 * multiiple tries, so go ahead and put it in mfctable, it will
11797c478bd9Sstevel@tonic-gate 	 * be discarded later in ip_mdq() because the child is NULL.
11807c478bd9Sstevel@tonic-gate 	 */
11817c478bd9Sstevel@tonic-gate 
11827c478bd9Sstevel@tonic-gate 	/* Error checking, out of bounds? */
11837c478bd9Sstevel@tonic-gate 	if (mfccp->mfcc_parent > MAXVIFS) {
11847c478bd9Sstevel@tonic-gate 		ip0dbg(("ADD_MFC: mfcc_parent out of range %d",
11857c478bd9Sstevel@tonic-gate 		    (int)mfccp->mfcc_parent));
11867c478bd9Sstevel@tonic-gate 		return (EINVAL);
11877c478bd9Sstevel@tonic-gate 	}
11887c478bd9Sstevel@tonic-gate 
11897c478bd9Sstevel@tonic-gate 	if ((mfccp->mfcc_parent != NO_VIF) &&
1190f4b3ec61Sdh155122 	    (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) {
11917c478bd9Sstevel@tonic-gate 		ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n",
11927c478bd9Sstevel@tonic-gate 		    (int)mfccp->mfcc_parent));
11937c478bd9Sstevel@tonic-gate 		return (EINVAL);
11947c478bd9Sstevel@tonic-gate 	}
11957c478bd9Sstevel@tonic-gate 
1196f4b3ec61Sdh155122 	if (is_mrouter_off(ipst)) {
11977c478bd9Sstevel@tonic-gate 		return (EINVAL);
11987c478bd9Sstevel@tonic-gate 	}
11997c478bd9Sstevel@tonic-gate 
1200f4b3ec61Sdh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr,
12017c478bd9Sstevel@tonic-gate 	    mfccp->mfcc_mcastgrp.s_addr)];
12027c478bd9Sstevel@tonic-gate 	MFCB_REFHOLD(mfcbp);
12037c478bd9Sstevel@tonic-gate 	MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr,
12047c478bd9Sstevel@tonic-gate 	    mfccp->mfcc_mcastgrp.s_addr, rt);
12057c478bd9Sstevel@tonic-gate 
12067c478bd9Sstevel@tonic-gate 	/* If an entry already exists, just update the fields */
12077c478bd9Sstevel@tonic-gate 	if (rt) {
1208f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
1209fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
12107c478bd9Sstevel@tonic-gate 			    "add_mfc: update o %x grp %x parent %x",
12117c478bd9Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_origin.s_addr),
12127c478bd9Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
12137c478bd9Sstevel@tonic-gate 			    mfccp->mfcc_parent);
12147c478bd9Sstevel@tonic-gate 		}
12157c478bd9Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
12167c478bd9Sstevel@tonic-gate 		rt->mfc_parent = mfccp->mfcc_parent;
12177c478bd9Sstevel@tonic-gate 
1218f4b3ec61Sdh155122 		mutex_enter(&ipst->ips_numvifs_mutex);
1219f4b3ec61Sdh155122 		for (i = 0; i < (int)ipst->ips_numvifs; i++)
12207c478bd9Sstevel@tonic-gate 			rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
1221f4b3ec61Sdh155122 		mutex_exit(&ipst->ips_numvifs_mutex);
12227c478bd9Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
12237c478bd9Sstevel@tonic-gate 
12247c478bd9Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
12257c478bd9Sstevel@tonic-gate 		return (0);
12267c478bd9Sstevel@tonic-gate 	}
12277c478bd9Sstevel@tonic-gate 
12287c478bd9Sstevel@tonic-gate 	/*
12297c478bd9Sstevel@tonic-gate 	 * Find the entry for which the upcall was made and update.
12307c478bd9Sstevel@tonic-gate 	 */
12317c478bd9Sstevel@tonic-gate 	for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) {
12327c478bd9Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
12337c478bd9Sstevel@tonic-gate 		if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
12347c478bd9Sstevel@tonic-gate 		    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
12357c478bd9Sstevel@tonic-gate 		    (rt->mfc_rte != NULL) &&
12367c478bd9Sstevel@tonic-gate 		    !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
12377c478bd9Sstevel@tonic-gate 			if (nstl++ != 0)
12387c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN,
12397c478bd9Sstevel@tonic-gate 				    "add_mfc: %s o %x g %x p %x",
12407c478bd9Sstevel@tonic-gate 				    "multiple kernel entries",
12417c478bd9Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_origin.s_addr),
12427c478bd9Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
12437c478bd9Sstevel@tonic-gate 				    mfccp->mfcc_parent);
12447c478bd9Sstevel@tonic-gate 
1245f4b3ec61Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
1246fc80c0dfSnordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
1247f4b3ec61Sdh155122 				    SL_TRACE,
12487c478bd9Sstevel@tonic-gate 				    "add_mfc: o %x g %x p %x",
12497c478bd9Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_origin.s_addr),
12507c478bd9Sstevel@tonic-gate 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
12517c478bd9Sstevel@tonic-gate 				    mfccp->mfcc_parent);
12527c478bd9Sstevel@tonic-gate 			}
1253f4b3ec61Sdh155122 			fill_route(rt, mfccp, ipst);
12547c478bd9Sstevel@tonic-gate 
12557c478bd9Sstevel@tonic-gate 			/*
12567c478bd9Sstevel@tonic-gate 			 * Prevent cleanup of cache entry.
12577c478bd9Sstevel@tonic-gate 			 * Timer starts in ip_mforward.
12587c478bd9Sstevel@tonic-gate 			 */
12597c478bd9Sstevel@tonic-gate 			if (rt->mfc_timeout_id != 0) {
12607c478bd9Sstevel@tonic-gate 				timeout_id_t id;
12617c478bd9Sstevel@tonic-gate 				id = rt->mfc_timeout_id;
12627c478bd9Sstevel@tonic-gate 				/*
12637c478bd9Sstevel@tonic-gate 				 * setting id to zero will avoid this
12647c478bd9Sstevel@tonic-gate 				 * entry from being cleaned up in
12657c478bd9Sstevel@tonic-gate 				 * expire_up_calls().
12667c478bd9Sstevel@tonic-gate 				 */
12677c478bd9Sstevel@tonic-gate 				rt->mfc_timeout_id = 0;
12687c478bd9Sstevel@tonic-gate 				/*
12697c478bd9Sstevel@tonic-gate 				 * dropping the lock is fine as we
12707c478bd9Sstevel@tonic-gate 				 * have a refhold on the bucket.
12717c478bd9Sstevel@tonic-gate 				 * so mfc cannot be freed.
12727c478bd9Sstevel@tonic-gate 				 * The timeout can fire but it will see
12737c478bd9Sstevel@tonic-gate 				 * that mfc_timeout_id == 0 and not cleanup.
12747c478bd9Sstevel@tonic-gate 				 */
12757c478bd9Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
12767c478bd9Sstevel@tonic-gate 				(void) untimeout(id);
12777c478bd9Sstevel@tonic-gate 				mutex_enter(&rt->mfc_mutex);
12787c478bd9Sstevel@tonic-gate 			}
12797c478bd9Sstevel@tonic-gate 
12807c478bd9Sstevel@tonic-gate 			/*
12817c478bd9Sstevel@tonic-gate 			 * Send all pkts that are queued waiting for the upcall.
12827c478bd9Sstevel@tonic-gate 			 * ip_mdq param tun set to 0 -
12837c478bd9Sstevel@tonic-gate 			 * the return value of ip_mdq() isn't used here,
12847c478bd9Sstevel@tonic-gate 			 * so value we send doesn't matter.
12857c478bd9Sstevel@tonic-gate 			 */
12867c478bd9Sstevel@tonic-gate 			while (rt->mfc_rte != NULL) {
12877c478bd9Sstevel@tonic-gate 				rte = rt->mfc_rte;
12887c478bd9Sstevel@tonic-gate 				rt->mfc_rte = rte->rte_next;
12897c478bd9Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
12907c478bd9Sstevel@tonic-gate 				(void) ip_mdq(rte->mp, (ipha_t *)
12917c478bd9Sstevel@tonic-gate 				    rte->mp->b_rptr, rte->ill, 0, rt);
12927c478bd9Sstevel@tonic-gate 				freemsg(rte->mp);
12937c478bd9Sstevel@tonic-gate 				mi_free((char *)rte);
12947c478bd9Sstevel@tonic-gate 				mutex_enter(&rt->mfc_mutex);
12957c478bd9Sstevel@tonic-gate 			}
12967c478bd9Sstevel@tonic-gate 		}
12977c478bd9Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
12987c478bd9Sstevel@tonic-gate 	}
12997c478bd9Sstevel@tonic-gate 
13007c478bd9Sstevel@tonic-gate 
13017c478bd9Sstevel@tonic-gate 	/*
13027c478bd9Sstevel@tonic-gate 	 * It is possible that an entry is being inserted without an upcall
13037c478bd9Sstevel@tonic-gate 	 */
13047c478bd9Sstevel@tonic-gate 	if (nstl == 0) {
13057c478bd9Sstevel@tonic-gate 		mutex_enter(&(mfcbp->mfcb_lock));
1306f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
1307fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
13087c478bd9Sstevel@tonic-gate 			    "add_mfc: no upcall o %x g %x p %x",
13097c478bd9Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_origin.s_addr),
13107c478bd9Sstevel@tonic-gate 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
13117c478bd9Sstevel@tonic-gate 			    mfccp->mfcc_parent);
13127c478bd9Sstevel@tonic-gate 		}
1313f4b3ec61Sdh155122 		if (is_mrouter_off(ipst)) {
13147c478bd9Sstevel@tonic-gate 			mutex_exit(&mfcbp->mfcb_lock);
13157c478bd9Sstevel@tonic-gate 			MFCB_REFRELE(mfcbp);
13167c478bd9Sstevel@tonic-gate 			return (EINVAL);
13177c478bd9Sstevel@tonic-gate 		}
13187c478bd9Sstevel@tonic-gate 
13197c478bd9Sstevel@tonic-gate 		for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) {
13207c478bd9Sstevel@tonic-gate 
13217c478bd9Sstevel@tonic-gate 			mutex_enter(&rt->mfc_mutex);
13227c478bd9Sstevel@tonic-gate 			if ((rt->mfc_origin.s_addr ==
13237c478bd9Sstevel@tonic-gate 			    mfccp->mfcc_origin.s_addr) &&
13247c478bd9Sstevel@tonic-gate 			    (rt->mfc_mcastgrp.s_addr ==
13257c478bd9Sstevel@tonic-gate 			    mfccp->mfcc_mcastgrp.s_addr) &&
13267c478bd9Sstevel@tonic-gate 			    (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) {
1327f4b3ec61Sdh155122 				fill_route(rt, mfccp, ipst);
13287c478bd9Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
13297c478bd9Sstevel@tonic-gate 				break;
13307c478bd9Sstevel@tonic-gate 			}
13317c478bd9Sstevel@tonic-gate 			mutex_exit(&rt->mfc_mutex);
13327c478bd9Sstevel@tonic-gate 		}
13337c478bd9Sstevel@tonic-gate 
13347c478bd9Sstevel@tonic-gate 		/* No upcall, so make a new entry into mfctable */
13357c478bd9Sstevel@tonic-gate 		if (rt == NULL) {
13367c478bd9Sstevel@tonic-gate 			rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
13377c478bd9Sstevel@tonic-gate 			if (rt == NULL) {
13387c478bd9Sstevel@tonic-gate 				ip1dbg(("add_mfc: out of memory\n"));
13397c478bd9Sstevel@tonic-gate 				mutex_exit(&mfcbp->mfcb_lock);
13407c478bd9Sstevel@tonic-gate 				MFCB_REFRELE(mfcbp);
13417c478bd9Sstevel@tonic-gate 				return (ENOBUFS);
13427c478bd9Sstevel@tonic-gate 			}
13437c478bd9Sstevel@tonic-gate 
13447c478bd9Sstevel@tonic-gate 			/* Insert new entry at head of hash chain */
13457c478bd9Sstevel@tonic-gate 			mutex_enter(&rt->mfc_mutex);
1346f4b3ec61Sdh155122 			fill_route(rt, mfccp, ipst);
13477c478bd9Sstevel@tonic-gate 
13487c478bd9Sstevel@tonic-gate 			/* Link into table */
13497c478bd9Sstevel@tonic-gate 			rt->mfc_next   = mfcbp->mfcb_mfc;
13507c478bd9Sstevel@tonic-gate 			mfcbp->mfcb_mfc = rt;
13517c478bd9Sstevel@tonic-gate 			mutex_exit(&rt->mfc_mutex);
13527c478bd9Sstevel@tonic-gate 		}
13537c478bd9Sstevel@tonic-gate 		mutex_exit(&mfcbp->mfcb_lock);
13547c478bd9Sstevel@tonic-gate 	}
13557c478bd9Sstevel@tonic-gate 
13567c478bd9Sstevel@tonic-gate 	MFCB_REFRELE(mfcbp);
13577c478bd9Sstevel@tonic-gate 	return (0);
13587c478bd9Sstevel@tonic-gate }
13597c478bd9Sstevel@tonic-gate 
13607c478bd9Sstevel@tonic-gate /*
13617c478bd9Sstevel@tonic-gate  * Fills in mfc structure from mrouted mfcctl.
13627c478bd9Sstevel@tonic-gate  */
13637c478bd9Sstevel@tonic-gate static void
fill_route(struct mfc * rt,struct mfcctl * mfccp,ip_stack_t * ipst)1364f4b3ec61Sdh155122 fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst)
13657c478bd9Sstevel@tonic-gate {
13667c478bd9Sstevel@tonic-gate 	int i;
13677c478bd9Sstevel@tonic-gate 
13687c478bd9Sstevel@tonic-gate 	rt->mfc_origin		= mfccp->mfcc_origin;
13697c478bd9Sstevel@tonic-gate 	rt->mfc_mcastgrp	= mfccp->mfcc_mcastgrp;
13707c478bd9Sstevel@tonic-gate 	rt->mfc_parent		= mfccp->mfcc_parent;
1371f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
1372f4b3ec61Sdh155122 	for (i = 0; i < (int)ipst->ips_numvifs; i++) {
13737c478bd9Sstevel@tonic-gate 		rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
13747c478bd9Sstevel@tonic-gate 	}
1375f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
13767c478bd9Sstevel@tonic-gate 	/* Initialize pkt counters per src-grp */
13777c478bd9Sstevel@tonic-gate 	rt->mfc_pkt_cnt	= 0;
13787c478bd9Sstevel@tonic-gate 	rt->mfc_byte_cnt	= 0;
13797c478bd9Sstevel@tonic-gate 	rt->mfc_wrong_if	= 0;
13807c478bd9Sstevel@tonic-gate 	rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0;
13817c478bd9Sstevel@tonic-gate 
13827c478bd9Sstevel@tonic-gate }
13837c478bd9Sstevel@tonic-gate 
13847c478bd9Sstevel@tonic-gate static void
free_queue(struct mfc * mfcp)13857c478bd9Sstevel@tonic-gate free_queue(struct mfc *mfcp)
13867c478bd9Sstevel@tonic-gate {
13877c478bd9Sstevel@tonic-gate 	struct rtdetq *rte0;
13887c478bd9Sstevel@tonic-gate 
13897c478bd9Sstevel@tonic-gate 	/*
13907c478bd9Sstevel@tonic-gate 	 * Drop all queued upcall packets.
13917c478bd9Sstevel@tonic-gate 	 * Free the mbuf with the pkt.
13927c478bd9Sstevel@tonic-gate 	 */
13937c478bd9Sstevel@tonic-gate 	while ((rte0 = mfcp->mfc_rte) != NULL) {
13947c478bd9Sstevel@tonic-gate 		mfcp->mfc_rte = rte0->rte_next;
13957c478bd9Sstevel@tonic-gate 		freemsg(rte0->mp);
13967c478bd9Sstevel@tonic-gate 		mi_free((char *)rte0);
13977c478bd9Sstevel@tonic-gate 	}
13987c478bd9Sstevel@tonic-gate }
13997c478bd9Sstevel@tonic-gate /*
14007c478bd9Sstevel@tonic-gate  * go thorugh the hash bucket and free all the entries marked condemned.
14017c478bd9Sstevel@tonic-gate  */
14027c478bd9Sstevel@tonic-gate void
release_mfc(struct mfcb * mfcbp)14037c478bd9Sstevel@tonic-gate release_mfc(struct mfcb *mfcbp)
14047c478bd9Sstevel@tonic-gate {
14057c478bd9Sstevel@tonic-gate 	struct mfc *current_mfcp;
14067c478bd9Sstevel@tonic-gate 	struct mfc *prev_mfcp;
14077c478bd9Sstevel@tonic-gate 
14087c478bd9Sstevel@tonic-gate 	prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
14097c478bd9Sstevel@tonic-gate 
14107c478bd9Sstevel@tonic-gate 	while (current_mfcp != NULL) {
14117c478bd9Sstevel@tonic-gate 		if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) {
14127c478bd9Sstevel@tonic-gate 			if (current_mfcp == mfcbp->mfcb_mfc) {
14137c478bd9Sstevel@tonic-gate 				mfcbp->mfcb_mfc = current_mfcp->mfc_next;
14147c478bd9Sstevel@tonic-gate 				free_queue(current_mfcp);
14157c478bd9Sstevel@tonic-gate 				mi_free(current_mfcp);
14167c478bd9Sstevel@tonic-gate 				prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
14177c478bd9Sstevel@tonic-gate 				continue;
14187c478bd9Sstevel@tonic-gate 			}
14197c478bd9Sstevel@tonic-gate 			ASSERT(prev_mfcp != NULL);
14207c478bd9Sstevel@tonic-gate 			prev_mfcp->mfc_next = current_mfcp->mfc_next;
14217c478bd9Sstevel@tonic-gate 			free_queue(current_mfcp);
14227c478bd9Sstevel@tonic-gate 			mi_free(current_mfcp);
14237c478bd9Sstevel@tonic-gate 			current_mfcp = NULL;
14247c478bd9Sstevel@tonic-gate 		} else {
14257c478bd9Sstevel@tonic-gate 			prev_mfcp = current_mfcp;
14267c478bd9Sstevel@tonic-gate 		}
14277c478bd9Sstevel@tonic-gate 
14287c478bd9Sstevel@tonic-gate 		current_mfcp = prev_mfcp->mfc_next;
14297c478bd9Sstevel@tonic-gate 
14307c478bd9Sstevel@tonic-gate 	}
14317c478bd9Sstevel@tonic-gate 	mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED;
14327c478bd9Sstevel@tonic-gate 	ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0);
14337c478bd9Sstevel@tonic-gate }
14347c478bd9Sstevel@tonic-gate 
14357c478bd9Sstevel@tonic-gate /*
14367c478bd9Sstevel@tonic-gate  * Delete an mfc entry.
14377c478bd9Sstevel@tonic-gate  */
14387c478bd9Sstevel@tonic-gate static int
del_mfc(struct mfcctl * mfccp,ip_stack_t * ipst)1439f4b3ec61Sdh155122 del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
14407c478bd9Sstevel@tonic-gate {
14417c478bd9Sstevel@tonic-gate 	struct in_addr	origin;
14427c478bd9Sstevel@tonic-gate 	struct in_addr	mcastgrp;
14437c478bd9Sstevel@tonic-gate 	struct mfc	*rt;
14447c478bd9Sstevel@tonic-gate 	uint_t		hash;
1445fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
14467c478bd9Sstevel@tonic-gate 
14477c478bd9Sstevel@tonic-gate 	origin = mfccp->mfcc_origin;
14487c478bd9Sstevel@tonic-gate 	mcastgrp = mfccp->mfcc_mcastgrp;
14497c478bd9Sstevel@tonic-gate 	hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
14507c478bd9Sstevel@tonic-gate 
1451f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
1452fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
14537c478bd9Sstevel@tonic-gate 		    "del_mfc: o %x g %x",
14547c478bd9Sstevel@tonic-gate 		    ntohl(origin.s_addr),
14557c478bd9Sstevel@tonic-gate 		    ntohl(mcastgrp.s_addr));
14567c478bd9Sstevel@tonic-gate 	}
14577c478bd9Sstevel@tonic-gate 
1458f4b3ec61Sdh155122 	MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
14597c478bd9Sstevel@tonic-gate 
14607c478bd9Sstevel@tonic-gate 	/* Find mfc in mfctable, finds only entries without upcalls */
1461f4b3ec61Sdh155122 	for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) {
14627c478bd9Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
14637c478bd9Sstevel@tonic-gate 		if (origin.s_addr == rt->mfc_origin.s_addr &&
14647c478bd9Sstevel@tonic-gate 		    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
14657c478bd9Sstevel@tonic-gate 		    rt->mfc_rte == NULL &&
14667c478bd9Sstevel@tonic-gate 		    !(rt->mfc_marks & MFCB_MARK_CONDEMNED))
14677c478bd9Sstevel@tonic-gate 			break;
14687c478bd9Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
14697c478bd9Sstevel@tonic-gate 	}
14707c478bd9Sstevel@tonic-gate 
14717c478bd9Sstevel@tonic-gate 	/*
14727c478bd9Sstevel@tonic-gate 	 * Return if there was an upcall (mfc_rte != NULL,
14737c478bd9Sstevel@tonic-gate 	 * or rt not in mfctable.
14747c478bd9Sstevel@tonic-gate 	 */
14757c478bd9Sstevel@tonic-gate 	if (rt == NULL) {
1476f4b3ec61Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[hash]);
14777c478bd9Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
14787c478bd9Sstevel@tonic-gate 	}
14797c478bd9Sstevel@tonic-gate 
14807c478bd9Sstevel@tonic-gate 
14817c478bd9Sstevel@tonic-gate 	/*
14827c478bd9Sstevel@tonic-gate 	 * no need to hold lock as we have a reference.
14837c478bd9Sstevel@tonic-gate 	 */
1484f4b3ec61Sdh155122 	ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
14857c478bd9Sstevel@tonic-gate 	/* error checking */
14867c478bd9Sstevel@tonic-gate 	if (rt->mfc_timeout_id != 0) {
14877c478bd9Sstevel@tonic-gate 		ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null"));
14887c478bd9Sstevel@tonic-gate 		/*
14897c478bd9Sstevel@tonic-gate 		 * Its ok to drop the lock,  the struct cannot be freed
14907c478bd9Sstevel@tonic-gate 		 * since we have a ref on the hash bucket.
14917c478bd9Sstevel@tonic-gate 		 */
14927c478bd9Sstevel@tonic-gate 		rt->mfc_timeout_id = 0;
14937c478bd9Sstevel@tonic-gate 		mutex_exit(&rt->mfc_mutex);
14947c478bd9Sstevel@tonic-gate 		(void) untimeout(rt->mfc_timeout_id);
14957c478bd9Sstevel@tonic-gate 		mutex_enter(&rt->mfc_mutex);
14967c478bd9Sstevel@tonic-gate 	}
14977c478bd9Sstevel@tonic-gate 
14987c478bd9Sstevel@tonic-gate 	ASSERT(rt->mfc_rte == NULL);
14997c478bd9Sstevel@tonic-gate 
15007c478bd9Sstevel@tonic-gate 
15017c478bd9Sstevel@tonic-gate 	/*
15027c478bd9Sstevel@tonic-gate 	 * Delete the entry from the cache
15037c478bd9Sstevel@tonic-gate 	 */
15047c478bd9Sstevel@tonic-gate 	rt->mfc_marks |= MFCB_MARK_CONDEMNED;
15057c478bd9Sstevel@tonic-gate 	mutex_exit(&rt->mfc_mutex);
15067c478bd9Sstevel@tonic-gate 
1507f4b3ec61Sdh155122 	MFCB_REFRELE(&ipst->ips_mfcs[hash]);
15087c478bd9Sstevel@tonic-gate 
15097c478bd9Sstevel@tonic-gate 	return (0);
15107c478bd9Sstevel@tonic-gate }
15117c478bd9Sstevel@tonic-gate 
15127c478bd9Sstevel@tonic-gate #define	TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
15137c478bd9Sstevel@tonic-gate 
15147c478bd9Sstevel@tonic-gate /*
15157c478bd9Sstevel@tonic-gate  * IP multicast forwarding function. This function assumes that the packet
15167c478bd9Sstevel@tonic-gate  * pointed to by ipha has arrived on (or is about to be sent to) the interface
15177c478bd9Sstevel@tonic-gate  * pointed to by "ill", and the packet is to be relayed to other networks
15187c478bd9Sstevel@tonic-gate  * that have members of the packet's destination IP multicast group.
15197c478bd9Sstevel@tonic-gate  *
15207c478bd9Sstevel@tonic-gate  * The packet is returned unscathed to the caller, unless it is
15217c478bd9Sstevel@tonic-gate  * erroneous, in which case a -1 value tells the caller (IP)
15227c478bd9Sstevel@tonic-gate  * to discard it.
15237c478bd9Sstevel@tonic-gate  *
15247c478bd9Sstevel@tonic-gate  * Unlike BSD, SunOS 5.x needs to return to IP info about
15257c478bd9Sstevel@tonic-gate  * whether pkt came in thru a tunnel, so it can be discarded, unless
15267c478bd9Sstevel@tonic-gate  * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try
15277c478bd9Sstevel@tonic-gate  * to be delivered.
15287c478bd9Sstevel@tonic-gate  * Return values are 0 - pkt is okay and phyint
15297c478bd9Sstevel@tonic-gate  *		    -1 - pkt is malformed and to be tossed
15307c478bd9Sstevel@tonic-gate  *                   1 - pkt came in on tunnel
15317c478bd9Sstevel@tonic-gate  */
15327c478bd9Sstevel@tonic-gate int
ip_mforward(mblk_t * mp,ip_recv_attr_t * ira)1533bd670b35SErik Nordmark ip_mforward(mblk_t *mp, ip_recv_attr_t *ira)
15347c478bd9Sstevel@tonic-gate {
1535bd670b35SErik Nordmark 	ipha_t		*ipha = (ipha_t *)mp->b_rptr;
1536bd670b35SErik Nordmark 	ill_t		*ill = ira->ira_ill;
15377c478bd9Sstevel@tonic-gate 	struct mfc	*rt;
15387c478bd9Sstevel@tonic-gate 	ipaddr_t	src, dst, tunnel_src = 0;
15397c478bd9Sstevel@tonic-gate 	static int	srctun = 0;
15407c478bd9Sstevel@tonic-gate 	vifi_t		vifi;
15417c478bd9Sstevel@tonic-gate 	boolean_t	pim_reg_packet = B_FALSE;
15427c478bd9Sstevel@tonic-gate 	struct mfcb	*mfcbp;
1543f4b3ec61Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
1544fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
1545bd670b35SErik Nordmark 	ill_t		*rill = ira->ira_rill;
1546bd670b35SErik Nordmark 
1547bd670b35SErik Nordmark 	ASSERT(ira->ira_pktlen == msgdsize(mp));
15487c478bd9Sstevel@tonic-gate 
1549f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
1550fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
15517c478bd9Sstevel@tonic-gate 		    "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s",
15527c478bd9Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
15537c478bd9Sstevel@tonic-gate 		    ill->ill_name);
15547c478bd9Sstevel@tonic-gate 	}
15557c478bd9Sstevel@tonic-gate 
15567c478bd9Sstevel@tonic-gate 	dst = ipha->ipha_dst;
1557bd670b35SErik Nordmark 	if (ira->ira_flags & IRAF_PIM_REGISTER)
15587c478bd9Sstevel@tonic-gate 		pim_reg_packet = B_TRUE;
1559bd670b35SErik Nordmark 	else if (ira->ira_flags & IRAF_MROUTE_TUNNEL_SET)
1560bd670b35SErik Nordmark 		tunnel_src = ira->ira_mroute_tunnel;
15617c478bd9Sstevel@tonic-gate 
15627c478bd9Sstevel@tonic-gate 	/*
15637c478bd9Sstevel@tonic-gate 	 * Don't forward a packet with time-to-live of zero or one,
15647c478bd9Sstevel@tonic-gate 	 * or a packet destined to a local-only group.
15657c478bd9Sstevel@tonic-gate 	 */
15667c478bd9Sstevel@tonic-gate 	if (CLASSD(dst) && (ipha->ipha_ttl <= 1 ||
15677c478bd9Sstevel@tonic-gate 	    (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) {
1568f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
1569fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
15707c478bd9Sstevel@tonic-gate 			    "ip_mforward: not forwarded ttl %d,"
15717c478bd9Sstevel@tonic-gate 			    " dst 0x%x ill %s",
15727c478bd9Sstevel@tonic-gate 			    ipha->ipha_ttl, ntohl(dst), ill->ill_name);
15737c478bd9Sstevel@tonic-gate 		}
15747c478bd9Sstevel@tonic-gate 		if (tunnel_src != 0)
15757c478bd9Sstevel@tonic-gate 			return (1);
15767c478bd9Sstevel@tonic-gate 		else
15777c478bd9Sstevel@tonic-gate 			return (0);
15787c478bd9Sstevel@tonic-gate 	}
15797c478bd9Sstevel@tonic-gate 
15807c478bd9Sstevel@tonic-gate 	if ((tunnel_src != 0) || pim_reg_packet) {
15817c478bd9Sstevel@tonic-gate 		/*
15827c478bd9Sstevel@tonic-gate 		 * Packet arrived over an encapsulated tunnel or via a PIM
1583bd670b35SErik Nordmark 		 * register message.
15847c478bd9Sstevel@tonic-gate 		 */
1585f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
15867c478bd9Sstevel@tonic-gate 			if (tunnel_src != 0) {
1587fc80c0dfSnordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
1588f4b3ec61Sdh155122 				    SL_TRACE,
15897c478bd9Sstevel@tonic-gate 				    "ip_mforward: ill %s arrived via ENCAP TUN",
15907c478bd9Sstevel@tonic-gate 				    ill->ill_name);
15917c478bd9Sstevel@tonic-gate 			} else if (pim_reg_packet) {
1592fc80c0dfSnordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
1593f4b3ec61Sdh155122 				    SL_TRACE,
15947c478bd9Sstevel@tonic-gate 				    "ip_mforward: ill %s arrived via"
15957c478bd9Sstevel@tonic-gate 				    "  REGISTER VIF",
15967c478bd9Sstevel@tonic-gate 				    ill->ill_name);
15977c478bd9Sstevel@tonic-gate 			}
15987c478bd9Sstevel@tonic-gate 		}
15997c478bd9Sstevel@tonic-gate 	} else if ((ipha->ipha_version_and_hdr_length & 0xf) <
16007c478bd9Sstevel@tonic-gate 	    (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 ||
16017c478bd9Sstevel@tonic-gate 	    ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) {
16027c478bd9Sstevel@tonic-gate 		/* Packet arrived via a physical interface. */
1603f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
1604fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16057c478bd9Sstevel@tonic-gate 			    "ip_mforward: ill %s arrived via PHYINT",
16067c478bd9Sstevel@tonic-gate 			    ill->ill_name);
16077c478bd9Sstevel@tonic-gate 		}
16087c478bd9Sstevel@tonic-gate 
16097c478bd9Sstevel@tonic-gate 	} else {
16107c478bd9Sstevel@tonic-gate 		/*
16117c478bd9Sstevel@tonic-gate 		 * Packet arrived through a SRCRT tunnel.
16127c478bd9Sstevel@tonic-gate 		 * Source-route tunnels are no longer supported.
16137c478bd9Sstevel@tonic-gate 		 * Error message printed every 1000 times.
16147c478bd9Sstevel@tonic-gate 		 */
16157c478bd9Sstevel@tonic-gate 		if ((srctun++ % 1000) == 0) {
16167c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
16177c478bd9Sstevel@tonic-gate 			    "ip_mforward: received source-routed pkt from %x",
16187c478bd9Sstevel@tonic-gate 			    ntohl(ipha->ipha_src));
16197c478bd9Sstevel@tonic-gate 		}
16207c478bd9Sstevel@tonic-gate 		return (-1);
16217c478bd9Sstevel@tonic-gate 	}
16227c478bd9Sstevel@tonic-gate 
1623f4b3ec61Sdh155122 	ipst->ips_mrtstat->mrts_fwd_in++;
16247c478bd9Sstevel@tonic-gate 	src = ipha->ipha_src;
16257c478bd9Sstevel@tonic-gate 
16267c478bd9Sstevel@tonic-gate 	/* Find route in cache, return NULL if not there or upcalls q'ed. */
16277c478bd9Sstevel@tonic-gate 
16287c478bd9Sstevel@tonic-gate 	/*
16297c478bd9Sstevel@tonic-gate 	 * Lock the mfctable against changes made by ip_mforward.
16307c478bd9Sstevel@tonic-gate 	 * Note that only add_mfc and del_mfc can remove entries and
16317c478bd9Sstevel@tonic-gate 	 * they run with exclusive access to IP. So we do not need to
16327c478bd9Sstevel@tonic-gate 	 * guard against the rt being deleted, so release lock after reading.
16337c478bd9Sstevel@tonic-gate 	 */
16347c478bd9Sstevel@tonic-gate 
1635f4b3ec61Sdh155122 	if (is_mrouter_off(ipst))
16367c478bd9Sstevel@tonic-gate 		return (-1);
16377c478bd9Sstevel@tonic-gate 
1638f4b3ec61Sdh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)];
16397c478bd9Sstevel@tonic-gate 	MFCB_REFHOLD(mfcbp);
16407c478bd9Sstevel@tonic-gate 	MFCFIND(mfcbp, src, dst, rt);
16417c478bd9Sstevel@tonic-gate 
16427c478bd9Sstevel@tonic-gate 	/* Entry exists, so forward if necessary */
16437c478bd9Sstevel@tonic-gate 	if (rt != NULL) {
16447c478bd9Sstevel@tonic-gate 		int ret = 0;
1645f4b3ec61Sdh155122 		ipst->ips_mrtstat->mrts_mfc_hits++;
16467c478bd9Sstevel@tonic-gate 		if (pim_reg_packet) {
1647f4b3ec61Sdh155122 			ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
16487c478bd9Sstevel@tonic-gate 			ret = ip_mdq(mp, ipha,
1649f4b3ec61Sdh155122 			    ipst->ips_vifs[ipst->ips_reg_vif_num].
1650f4b3ec61Sdh155122 			    v_ipif->ipif_ill,
1651f4b3ec61Sdh155122 			    0, rt);
16527c478bd9Sstevel@tonic-gate 		} else {
16537c478bd9Sstevel@tonic-gate 			ret = ip_mdq(mp, ipha, ill, tunnel_src, rt);
16547c478bd9Sstevel@tonic-gate 		}
16557c478bd9Sstevel@tonic-gate 
16567c478bd9Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
16577c478bd9Sstevel@tonic-gate 		return (ret);
16587c478bd9Sstevel@tonic-gate 
16597c478bd9Sstevel@tonic-gate 		/*
16607c478bd9Sstevel@tonic-gate 		 * Don't forward if we don't have a cache entry.  Mrouted will
16617c478bd9Sstevel@tonic-gate 		 * always provide a cache entry in response to an upcall.
16627c478bd9Sstevel@tonic-gate 		 */
16637c478bd9Sstevel@tonic-gate 	} else {
16647c478bd9Sstevel@tonic-gate 		/*
16657c478bd9Sstevel@tonic-gate 		 * If we don't have a route for packet's origin, make a copy
16667c478bd9Sstevel@tonic-gate 		 * of the packet and send message to routing daemon.
16677c478bd9Sstevel@tonic-gate 		 */
16687c478bd9Sstevel@tonic-gate 		struct mfc	*mfc_rt	 = NULL;
16697c478bd9Sstevel@tonic-gate 		mblk_t		*mp0	 = NULL;
16707c478bd9Sstevel@tonic-gate 		mblk_t		*mp_copy = NULL;
16717c478bd9Sstevel@tonic-gate 		struct rtdetq	*rte	 = NULL;
16727c478bd9Sstevel@tonic-gate 		struct rtdetq	*rte_m, *rte1, *prev_rte;
16737c478bd9Sstevel@tonic-gate 		uint_t		hash;
16747c478bd9Sstevel@tonic-gate 		int		npkts;
16757c478bd9Sstevel@tonic-gate 		boolean_t	new_mfc = B_FALSE;
1676f4b3ec61Sdh155122 		ipst->ips_mrtstat->mrts_mfc_misses++;
16777c478bd9Sstevel@tonic-gate 		/* BSD uses mrts_no_route++ */
1678f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
1679fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16807c478bd9Sstevel@tonic-gate 			    "ip_mforward: no rte ill %s src %x g %x misses %d",
16817c478bd9Sstevel@tonic-gate 			    ill->ill_name, ntohl(src), ntohl(dst),
1682f4b3ec61Sdh155122 			    (int)ipst->ips_mrtstat->mrts_mfc_misses);
16837c478bd9Sstevel@tonic-gate 		}
16847c478bd9Sstevel@tonic-gate 		/*
16857c478bd9Sstevel@tonic-gate 		 * The order of the following code differs from the BSD code.
16867c478bd9Sstevel@tonic-gate 		 * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x
16877c478bd9Sstevel@tonic-gate 		 * code works, so SunOS 5.x wasn't changed to conform to the
16887c478bd9Sstevel@tonic-gate 		 * BSD version.
16897c478bd9Sstevel@tonic-gate 		 */
16907c478bd9Sstevel@tonic-gate 
16917c478bd9Sstevel@tonic-gate 		/* Lock mfctable. */
16927c478bd9Sstevel@tonic-gate 		hash = MFCHASH(src, dst);
1693f4b3ec61Sdh155122 		mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock));
16947c478bd9Sstevel@tonic-gate 
16957c478bd9Sstevel@tonic-gate 		/*
16967c478bd9Sstevel@tonic-gate 		 * If we are turning off mrouted return an error
16977c478bd9Sstevel@tonic-gate 		 */
1698f4b3ec61Sdh155122 		if (is_mrouter_off(ipst)) {
16997c478bd9Sstevel@tonic-gate 			mutex_exit(&mfcbp->mfcb_lock);
17007c478bd9Sstevel@tonic-gate 			MFCB_REFRELE(mfcbp);
17017c478bd9Sstevel@tonic-gate 			return (-1);
17027c478bd9Sstevel@tonic-gate 		}
17037c478bd9Sstevel@tonic-gate 
17047c478bd9Sstevel@tonic-gate 		/* Is there an upcall waiting for this packet? */
1705f4b3ec61Sdh155122 		for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt;
17067c478bd9Sstevel@tonic-gate 		    mfc_rt = mfc_rt->mfc_next) {
17077c478bd9Sstevel@tonic-gate 			mutex_enter(&mfc_rt->mfc_mutex);
1708f4b3ec61Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
1709fc80c0dfSnordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
1710f4b3ec61Sdh155122 				    SL_TRACE,
17117c478bd9Sstevel@tonic-gate 				    "ip_mforward: MFCTAB hash %d o 0x%x"
17127c478bd9Sstevel@tonic-gate 				    " g 0x%x\n",
17137c478bd9Sstevel@tonic-gate 				    hash, ntohl(mfc_rt->mfc_origin.s_addr),
17147c478bd9Sstevel@tonic-gate 				    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
17157c478bd9Sstevel@tonic-gate 			}
17167c478bd9Sstevel@tonic-gate 			/* There is an upcall */
17177c478bd9Sstevel@tonic-gate 			if ((src == mfc_rt->mfc_origin.s_addr) &&
17187c478bd9Sstevel@tonic-gate 			    (dst == mfc_rt->mfc_mcastgrp.s_addr) &&
17197c478bd9Sstevel@tonic-gate 			    (mfc_rt->mfc_rte != NULL) &&
17207c478bd9Sstevel@tonic-gate 			    !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
17217c478bd9Sstevel@tonic-gate 				break;
17227c478bd9Sstevel@tonic-gate 			}
17237c478bd9Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
17247c478bd9Sstevel@tonic-gate 		}
17257c478bd9Sstevel@tonic-gate 		/* No upcall, so make a new entry into mfctable */
17267c478bd9Sstevel@tonic-gate 		if (mfc_rt == NULL) {
17277c478bd9Sstevel@tonic-gate 			mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
17287c478bd9Sstevel@tonic-gate 			if (mfc_rt == NULL) {
1729f4b3ec61Sdh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
17307c478bd9Sstevel@tonic-gate 				ip1dbg(("ip_mforward: out of memory "
17317c478bd9Sstevel@tonic-gate 				    "for mfc, mfc_rt\n"));
17327c478bd9Sstevel@tonic-gate 				goto error_return;
17337c478bd9Sstevel@tonic-gate 			} else
17347c478bd9Sstevel@tonic-gate 				new_mfc = B_TRUE;
17357c478bd9Sstevel@tonic-gate 			/* Get resources */
17367c478bd9Sstevel@tonic-gate 			/* TODO could copy header and dup rest */
17377c478bd9Sstevel@tonic-gate 			mp_copy = copymsg(mp);
17387c478bd9Sstevel@tonic-gate 			if (mp_copy == NULL) {
1739f4b3ec61Sdh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
17407c478bd9Sstevel@tonic-gate 				ip1dbg(("ip_mforward: out of memory for "
17417c478bd9Sstevel@tonic-gate 				    "mblk, mp_copy\n"));
17427c478bd9Sstevel@tonic-gate 				goto error_return;
17437c478bd9Sstevel@tonic-gate 			}
17447c478bd9Sstevel@tonic-gate 			mutex_enter(&mfc_rt->mfc_mutex);
17457c478bd9Sstevel@tonic-gate 		}
17467c478bd9Sstevel@tonic-gate 		/* Get resources for rte, whether first rte or not first. */
17477c478bd9Sstevel@tonic-gate 		/* Add this packet into rtdetq */
17487c478bd9Sstevel@tonic-gate 		rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq));
17497c478bd9Sstevel@tonic-gate 		if (rte == NULL) {
1750f4b3ec61Sdh155122 			ipst->ips_mrtstat->mrts_fwd_drop++;
17517c478bd9Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
17527c478bd9Sstevel@tonic-gate 			ip1dbg(("ip_mforward: out of memory for"
17537c478bd9Sstevel@tonic-gate 			    " rtdetq, rte\n"));
17547c478bd9Sstevel@tonic-gate 			goto error_return;
17557c478bd9Sstevel@tonic-gate 		}
17567c478bd9Sstevel@tonic-gate 
17577c478bd9Sstevel@tonic-gate 		mp0 = copymsg(mp);
17587c478bd9Sstevel@tonic-gate 		if (mp0 == NULL) {
1759f4b3ec61Sdh155122 			ipst->ips_mrtstat->mrts_fwd_drop++;
17607c478bd9Sstevel@tonic-gate 			ip1dbg(("ip_mforward: out of memory for mblk, mp0\n"));
17617c478bd9Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
17627c478bd9Sstevel@tonic-gate 			goto error_return;
17637c478bd9Sstevel@tonic-gate 		}
17647c478bd9Sstevel@tonic-gate 		rte->mp		= mp0;
17657c478bd9Sstevel@tonic-gate 		if (pim_reg_packet) {
1766f4b3ec61Sdh155122 			ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
1767f4b3ec61Sdh155122 			rte->ill =
1768f4b3ec61Sdh155122 			    ipst->ips_vifs[ipst->ips_reg_vif_num].
1769f4b3ec61Sdh155122 			    v_ipif->ipif_ill;
17707c478bd9Sstevel@tonic-gate 		} else {
17717c478bd9Sstevel@tonic-gate 			rte->ill = ill;
17727c478bd9Sstevel@tonic-gate 		}
17737c478bd9Sstevel@tonic-gate 		rte->rte_next	= NULL;
17747c478bd9Sstevel@tonic-gate 
17757c478bd9Sstevel@tonic-gate 		/*
17767c478bd9Sstevel@tonic-gate 		 * Determine if upcall q (rtdetq) has overflowed.
17777c478bd9Sstevel@tonic-gate 		 * mfc_rt->mfc_rte is null by mi_zalloc
17787c478bd9Sstevel@tonic-gate 		 * if it is the first message.
17797c478bd9Sstevel@tonic-gate 		 */
17807c478bd9Sstevel@tonic-gate 		for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m;
17817c478bd9Sstevel@tonic-gate 		    rte_m = rte_m->rte_next)
17827c478bd9Sstevel@tonic-gate 			npkts++;
1783f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
1784fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
17857c478bd9Sstevel@tonic-gate 			    "ip_mforward: upcalls %d\n", npkts);
17867c478bd9Sstevel@tonic-gate 		}
17877c478bd9Sstevel@tonic-gate 		if (npkts > MAX_UPQ) {
1788f4b3ec61Sdh155122 			ipst->ips_mrtstat->mrts_upq_ovflw++;
17897c478bd9Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
17907c478bd9Sstevel@tonic-gate 			goto error_return;
17917c478bd9Sstevel@tonic-gate 		}
17927c478bd9Sstevel@tonic-gate 
17937c478bd9Sstevel@tonic-gate 		if (npkts == 0) {	/* first upcall */
17947c478bd9Sstevel@tonic-gate 			int i = 0;
17957c478bd9Sstevel@tonic-gate 			/*
17967c478bd9Sstevel@tonic-gate 			 * Now finish installing the new mfc! Now that we have
17977c478bd9Sstevel@tonic-gate 			 * resources!  Insert new entry at head of hash chain.
17987c478bd9Sstevel@tonic-gate 			 * Use src and dst which are ipaddr_t's.
17997c478bd9Sstevel@tonic-gate 			 */
18007c478bd9Sstevel@tonic-gate 			mfc_rt->mfc_origin.s_addr = src;
18017c478bd9Sstevel@tonic-gate 			mfc_rt->mfc_mcastgrp.s_addr = dst;
18027c478bd9Sstevel@tonic-gate 
1803f4b3ec61Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
1804f4b3ec61Sdh155122 			for (i = 0; i < (int)ipst->ips_numvifs; i++)
18057c478bd9Sstevel@tonic-gate 				mfc_rt->mfc_ttls[i] = 0;
1806f4b3ec61Sdh155122 			mutex_exit(&ipst->ips_numvifs_mutex);
18077c478bd9Sstevel@tonic-gate 			mfc_rt->mfc_parent = ALL_VIFS;
18087c478bd9Sstevel@tonic-gate 
18097c478bd9Sstevel@tonic-gate 			/* Link into table */
1810f4b3ec61Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
1811fc80c0dfSnordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
1812f4b3ec61Sdh155122 				    SL_TRACE,
18137c478bd9Sstevel@tonic-gate 				    "ip_mforward: NEW MFCTAB hash %d o 0x%x "
18147c478bd9Sstevel@tonic-gate 				    "g 0x%x\n", hash,
18157c478bd9Sstevel@tonic-gate 				    ntohl(mfc_rt->mfc_origin.s_addr),
18167c478bd9Sstevel@tonic-gate 				    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
18177c478bd9Sstevel@tonic-gate 			}
1818f4b3ec61Sdh155122 			mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc;
1819f4b3ec61Sdh155122 			ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt;
18207c478bd9Sstevel@tonic-gate 			mfc_rt->mfc_rte = NULL;
18217c478bd9Sstevel@tonic-gate 		}
18227c478bd9Sstevel@tonic-gate 
18237c478bd9Sstevel@tonic-gate 		/* Link in the upcall */
18247c478bd9Sstevel@tonic-gate 		/* First upcall */
18257c478bd9Sstevel@tonic-gate 		if (mfc_rt->mfc_rte == NULL)
18267c478bd9Sstevel@tonic-gate 			mfc_rt->mfc_rte = rte;
18277c478bd9Sstevel@tonic-gate 		else {
18287c478bd9Sstevel@tonic-gate 			/* not the first upcall */
18297c478bd9Sstevel@tonic-gate 			prev_rte = mfc_rt->mfc_rte;
18307c478bd9Sstevel@tonic-gate 			for (rte1 = mfc_rt->mfc_rte->rte_next; rte1;
1831fc80c0dfSnordmark 			    prev_rte = rte1, rte1 = rte1->rte_next)
1832fc80c0dfSnordmark 				;
18337c478bd9Sstevel@tonic-gate 			prev_rte->rte_next = rte;
18347c478bd9Sstevel@tonic-gate 		}
18357c478bd9Sstevel@tonic-gate 
18367c478bd9Sstevel@tonic-gate 		/*
18377c478bd9Sstevel@tonic-gate 		 * No upcalls waiting, this is first one, so send a message to
18387c478bd9Sstevel@tonic-gate 		 * routing daemon to install a route into kernel table.
18397c478bd9Sstevel@tonic-gate 		 */
18407c478bd9Sstevel@tonic-gate 		if (npkts == 0) {
18417c478bd9Sstevel@tonic-gate 			struct igmpmsg	*im;
18427c478bd9Sstevel@tonic-gate 			/* ipha_protocol is 0, for upcall */
18437c478bd9Sstevel@tonic-gate 			ASSERT(mp_copy != NULL);
18447c478bd9Sstevel@tonic-gate 			im = (struct igmpmsg *)mp_copy->b_rptr;
18457c478bd9Sstevel@tonic-gate 			im->im_msgtype	= IGMPMSG_NOCACHE;
18467c478bd9Sstevel@tonic-gate 			im->im_mbz = 0;
1847f4b3ec61Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
18487c478bd9Sstevel@tonic-gate 			if (pim_reg_packet) {
1849f4b3ec61Sdh155122 				im->im_vif = (uchar_t)ipst->ips_reg_vif_num;
1850f4b3ec61Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
18517c478bd9Sstevel@tonic-gate 			} else {
18527c478bd9Sstevel@tonic-gate 				/*
18537c478bd9Sstevel@tonic-gate 				 * XXX do we need to hold locks here ?
18547c478bd9Sstevel@tonic-gate 				 */
1855f4b3ec61Sdh155122 				for (vifi = 0;
1856f4b3ec61Sdh155122 				    vifi < ipst->ips_numvifs;
1857f4b3ec61Sdh155122 				    vifi++) {
1858f4b3ec61Sdh155122 					if (ipst->ips_vifs[vifi].v_ipif == NULL)
18597c478bd9Sstevel@tonic-gate 						continue;
1860f4b3ec61Sdh155122 					if (ipst->ips_vifs[vifi].
1861f4b3ec61Sdh155122 					    v_ipif->ipif_ill == ill) {
18627c478bd9Sstevel@tonic-gate 						im->im_vif = (uchar_t)vifi;
18637c478bd9Sstevel@tonic-gate 						break;
18647c478bd9Sstevel@tonic-gate 					}
18657c478bd9Sstevel@tonic-gate 				}
1866f4b3ec61Sdh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
1867f4b3ec61Sdh155122 				ASSERT(vifi < ipst->ips_numvifs);
18687c478bd9Sstevel@tonic-gate 			}
18697c478bd9Sstevel@tonic-gate 
1870f4b3ec61Sdh155122 			ipst->ips_mrtstat->mrts_upcalls++;
18717c478bd9Sstevel@tonic-gate 			/* Timer to discard upcalls if mrouted is too slow */
18727c478bd9Sstevel@tonic-gate 			mfc_rt->mfc_timeout_id = timeout(expire_upcalls,
18737c478bd9Sstevel@tonic-gate 			    mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE);
18747c478bd9Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
1875f4b3ec61Sdh155122 			mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
1876fc80c0dfSnordmark 			/* Pass to RAWIP */
1877bd670b35SErik Nordmark 			ira->ira_ill = ira->ira_rill = NULL;
1878bd670b35SErik Nordmark 			(mrouter->conn_recv)(mrouter, mp_copy, NULL, ira);
1879bd670b35SErik Nordmark 			ira->ira_ill = ill;
1880bd670b35SErik Nordmark 			ira->ira_rill = rill;
18817c478bd9Sstevel@tonic-gate 		} else {
18827c478bd9Sstevel@tonic-gate 			mutex_exit(&mfc_rt->mfc_mutex);
1883f4b3ec61Sdh155122 			mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
1884bd670b35SErik Nordmark 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1885bd670b35SErik Nordmark 			ip_drop_input("ip_mforward - upcall already waiting",
1886bd670b35SErik Nordmark 			    mp_copy, ill);
18877c478bd9Sstevel@tonic-gate 			freemsg(mp_copy);
18887c478bd9Sstevel@tonic-gate 		}
18897c478bd9Sstevel@tonic-gate 
18907c478bd9Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
18917c478bd9Sstevel@tonic-gate 		if (tunnel_src != 0)
18927c478bd9Sstevel@tonic-gate 			return (1);
18937c478bd9Sstevel@tonic-gate 		else
18947c478bd9Sstevel@tonic-gate 			return (0);
18957c478bd9Sstevel@tonic-gate 	error_return:
1896f4b3ec61Sdh155122 		mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
18977c478bd9Sstevel@tonic-gate 		MFCB_REFRELE(mfcbp);
18987c478bd9Sstevel@tonic-gate 		if (mfc_rt != NULL && (new_mfc == B_TRUE))
18997c478bd9Sstevel@tonic-gate 			mi_free((char *)mfc_rt);
19007c478bd9Sstevel@tonic-gate 		if (rte != NULL)
19017c478bd9Sstevel@tonic-gate 			mi_free((char *)rte);
1902bd670b35SErik Nordmark 		if (mp_copy != NULL) {
1903bd670b35SErik Nordmark 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1904bd670b35SErik Nordmark 			ip_drop_input("ip_mforward error", mp_copy, ill);
19057c478bd9Sstevel@tonic-gate 			freemsg(mp_copy);
1906bd670b35SErik Nordmark 		}
19077c478bd9Sstevel@tonic-gate 		if (mp0 != NULL)
19087c478bd9Sstevel@tonic-gate 			freemsg(mp0);
19097c478bd9Sstevel@tonic-gate 		return (-1);
19107c478bd9Sstevel@tonic-gate 	}
19117c478bd9Sstevel@tonic-gate }
19127c478bd9Sstevel@tonic-gate 
19137c478bd9Sstevel@tonic-gate /*
19147c478bd9Sstevel@tonic-gate  * Clean up the mfctable cache entry if upcall is not serviced.
19157c478bd9Sstevel@tonic-gate  * SunOS 5.x has timeout per mfc, unlike BSD which has one timer.
19167c478bd9Sstevel@tonic-gate  */
19177c478bd9Sstevel@tonic-gate static void
expire_upcalls(void * arg)19187c478bd9Sstevel@tonic-gate expire_upcalls(void *arg)
19197c478bd9Sstevel@tonic-gate {
19207c478bd9Sstevel@tonic-gate 	struct mfc *mfc_rt = arg;
19217c478bd9Sstevel@tonic-gate 	uint_t hash;
19227c478bd9Sstevel@tonic-gate 	struct mfc *prev_mfc, *mfc0;
1923f4b3ec61Sdh155122 	ip_stack_t	*ipst;
1924fc80c0dfSnordmark 	conn_t		*mrouter;
1925f4b3ec61Sdh155122 
1926f4b3ec61Sdh155122 	if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) {
1927f4b3ec61Sdh155122 		cmn_err(CE_WARN, "expire_upcalls: no ILL\n");
1928f4b3ec61Sdh155122 		return;
1929f4b3ec61Sdh155122 	}
1930f4b3ec61Sdh155122 	ipst = mfc_rt->mfc_rte->ill->ill_ipst;
1931fc80c0dfSnordmark 	mrouter = ipst->ips_ip_g_mrouter;
19327c478bd9Sstevel@tonic-gate 
19337c478bd9Sstevel@tonic-gate 	hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr);
1934f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
1935fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
19367c478bd9Sstevel@tonic-gate 		    "expire_upcalls: hash %d s %x g %x",
19377c478bd9Sstevel@tonic-gate 		    hash, ntohl(mfc_rt->mfc_origin.s_addr),
19387c478bd9Sstevel@tonic-gate 		    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
19397c478bd9Sstevel@tonic-gate 	}
1940f4b3ec61Sdh155122 	MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
19417c478bd9Sstevel@tonic-gate 	mutex_enter(&mfc_rt->mfc_mutex);
19427c478bd9Sstevel@tonic-gate 	/*
19437c478bd9Sstevel@tonic-gate 	 * if timeout has been set to zero, than the
19447c478bd9Sstevel@tonic-gate 	 * entry has been filled, no need to delete it.
19457c478bd9Sstevel@tonic-gate 	 */
19467c478bd9Sstevel@tonic-gate 	if (mfc_rt->mfc_timeout_id == 0)
19477c478bd9Sstevel@tonic-gate 		goto done;
1948f4b3ec61Sdh155122 	ipst->ips_mrtstat->mrts_cache_cleanups++;
19497c478bd9Sstevel@tonic-gate 	mfc_rt->mfc_timeout_id = 0;
19507c478bd9Sstevel@tonic-gate 
19517c478bd9Sstevel@tonic-gate 	/* Determine entry to be cleaned up in cache table. */
1952f4b3ec61Sdh155122 	for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0;
19537c478bd9Sstevel@tonic-gate 	    prev_mfc = mfc0, mfc0 = mfc0->mfc_next)
19547c478bd9Sstevel@tonic-gate 		if (mfc0 == mfc_rt)
19557c478bd9Sstevel@tonic-gate 			break;
19567c478bd9Sstevel@tonic-gate 
19577c478bd9Sstevel@tonic-gate 	/* del_mfc takes care of gone mfcs */
19587c478bd9Sstevel@tonic-gate 	ASSERT(prev_mfc != NULL);
19597c478bd9Sstevel@tonic-gate 	ASSERT(mfc0 != NULL);
19607c478bd9Sstevel@tonic-gate 
19617c478bd9Sstevel@tonic-gate 	/*
19627c478bd9Sstevel@tonic-gate 	 * Delete the entry from the cache
19637c478bd9Sstevel@tonic-gate 	 */
1964f4b3ec61Sdh155122 	ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
19657c478bd9Sstevel@tonic-gate 	mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
19667c478bd9Sstevel@tonic-gate 
19677c478bd9Sstevel@tonic-gate 	/*
19687c478bd9Sstevel@tonic-gate 	 * release_mfc will drop all queued upcall packets.
19697c478bd9Sstevel@tonic-gate 	 * and will free the mbuf with the pkt, if, timing info.
19707c478bd9Sstevel@tonic-gate 	 */
19717c478bd9Sstevel@tonic-gate done:
19727c478bd9Sstevel@tonic-gate 	mutex_exit(&mfc_rt->mfc_mutex);
1973f4b3ec61Sdh155122 	MFCB_REFRELE(&ipst->ips_mfcs[hash]);
19747c478bd9Sstevel@tonic-gate }
19757c478bd9Sstevel@tonic-gate 
19767c478bd9Sstevel@tonic-gate /*
19777c478bd9Sstevel@tonic-gate  * Packet forwarding routine once entry in the cache is made.
19787c478bd9Sstevel@tonic-gate  */
19797c478bd9Sstevel@tonic-gate static int
ip_mdq(mblk_t * mp,ipha_t * ipha,ill_t * ill,ipaddr_t tunnel_src,struct mfc * rt)19807c478bd9Sstevel@tonic-gate ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src,
19817c478bd9Sstevel@tonic-gate     struct mfc *rt)
19827c478bd9Sstevel@tonic-gate {
19837c478bd9Sstevel@tonic-gate 	vifi_t vifi;
19847c478bd9Sstevel@tonic-gate 	struct vif *vifp;
19857c478bd9Sstevel@tonic-gate 	ipaddr_t dst = ipha->ipha_dst;
19867c478bd9Sstevel@tonic-gate 	size_t  plen = msgdsize(mp);
19877c478bd9Sstevel@tonic-gate 	vifi_t num_of_vifs;
1988f4b3ec61Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
1989fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
1990bd670b35SErik Nordmark 	ip_recv_attr_t	iras;
19917c478bd9Sstevel@tonic-gate 
1992f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
1993fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
19947c478bd9Sstevel@tonic-gate 		    "ip_mdq: SEND src %x, ipha_dst %x, ill %s",
19957c478bd9Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
19967c478bd9Sstevel@tonic-gate 		    ill->ill_name);
19977c478bd9Sstevel@tonic-gate 	}
19987c478bd9Sstevel@tonic-gate 
19997c478bd9Sstevel@tonic-gate 	/* Macro to send packet on vif */
20007c478bd9Sstevel@tonic-gate #define	MC_SEND(ipha, mp, vifp, dst) { \
20017c478bd9Sstevel@tonic-gate 	if ((vifp)->v_flags & VIFF_TUNNEL) \
20027c478bd9Sstevel@tonic-gate 		encap_send((ipha), (mp), (vifp), (dst)); \
20037c478bd9Sstevel@tonic-gate 	else if ((vifp)->v_flags & VIFF_REGISTER) \
20047c478bd9Sstevel@tonic-gate 		register_send((ipha), (mp), (vifp), (dst)); \
20057c478bd9Sstevel@tonic-gate 	else \
20067c478bd9Sstevel@tonic-gate 		phyint_send((ipha), (mp), (vifp), (dst)); \
20077c478bd9Sstevel@tonic-gate }
20087c478bd9Sstevel@tonic-gate 
20097c478bd9Sstevel@tonic-gate 	vifi = rt->mfc_parent;
20107c478bd9Sstevel@tonic-gate 
20117c478bd9Sstevel@tonic-gate 	/*
20127c478bd9Sstevel@tonic-gate 	 * The value of vifi is MAXVIFS if the pkt had no parent, i.e.,
20137c478bd9Sstevel@tonic-gate 	 * Mrouted had no route.
20147c478bd9Sstevel@tonic-gate 	 * We wanted the route installed in the mfctable to prevent multiple
20157c478bd9Sstevel@tonic-gate 	 * tries, so it passed add_mfc(), but is discarded here. The v_ipif is
20167c478bd9Sstevel@tonic-gate 	 * NULL so we don't want to check the ill. Still needed as of Mrouted
20177c478bd9Sstevel@tonic-gate 	 * 3.6.
20187c478bd9Sstevel@tonic-gate 	 */
20197c478bd9Sstevel@tonic-gate 	if (vifi == NO_VIF) {
20207c478bd9Sstevel@tonic-gate 		ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n",
20217c478bd9Sstevel@tonic-gate 		    ill->ill_name));
2022f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
2023fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
20247c478bd9Sstevel@tonic-gate 			    "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name);
20257c478bd9Sstevel@tonic-gate 		}
20267c478bd9Sstevel@tonic-gate 		return (-1);	/* drop pkt */
20277c478bd9Sstevel@tonic-gate 	}
20287c478bd9Sstevel@tonic-gate 
2029f4b3ec61Sdh155122 	if (!lock_good_vif(&ipst->ips_vifs[vifi]))
20307c478bd9Sstevel@tonic-gate 		return (-1);
20317c478bd9Sstevel@tonic-gate 	/*
20327c478bd9Sstevel@tonic-gate 	 * The MFC entries are not cleaned up when an ipif goes
20337c478bd9Sstevel@tonic-gate 	 * away thus this code has to guard against an MFC referencing
20347c478bd9Sstevel@tonic-gate 	 * an ipif that has been closed. Note: reset_mrt_vif_ipif
20357c478bd9Sstevel@tonic-gate 	 * sets the v_ipif to NULL when the ipif disappears.
20367c478bd9Sstevel@tonic-gate 	 */
2037f4b3ec61Sdh155122 	ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL);
20387c478bd9Sstevel@tonic-gate 
2039f4b3ec61Sdh155122 	if (vifi >= ipst->ips_numvifs) {
20407c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs "
20417c478bd9Sstevel@tonic-gate 		    "%d ill %s viftable ill %s\n",
2042f4b3ec61Sdh155122 		    (int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
2043f4b3ec61Sdh155122 		    ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
2044f4b3ec61Sdh155122 		unlock_good_vif(&ipst->ips_vifs[vifi]);
20457c478bd9Sstevel@tonic-gate 		return (-1);
20467c478bd9Sstevel@tonic-gate 	}
20477c478bd9Sstevel@tonic-gate 	/*
20487c478bd9Sstevel@tonic-gate 	 * Don't forward if it didn't arrive from the parent vif for its
2049e11c3f44Smeem 	 * origin.
20507c478bd9Sstevel@tonic-gate 	 */
2051bd670b35SErik Nordmark 	if ((ipst->ips_vifs[vifi].v_ipif->ipif_ill != ill) ||
2052f4b3ec61Sdh155122 	    (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) {
20537c478bd9Sstevel@tonic-gate 		/* Came in the wrong interface */
20547c478bd9Sstevel@tonic-gate 		ip1dbg(("ip_mdq: arrived wrong if, vifi %d "
20557c478bd9Sstevel@tonic-gate 			"numvifs %d ill %s viftable ill %s\n",
2056f4b3ec61Sdh155122 			(int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
2057bd670b35SErik Nordmark 			ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name));
2058f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
2059fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
20607c478bd9Sstevel@tonic-gate 			    "ip_mdq: arrived wrong if, vifi %d ill "
20617c478bd9Sstevel@tonic-gate 			    "%s viftable ill %s\n",
2062bd670b35SErik Nordmark 			    (int)vifi, ill->ill_name,
2063bd670b35SErik Nordmark 			    ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
20647c478bd9Sstevel@tonic-gate 		}
2065f4b3ec61Sdh155122 		ipst->ips_mrtstat->mrts_wrong_if++;
20667c478bd9Sstevel@tonic-gate 		rt->mfc_wrong_if++;
20677c478bd9Sstevel@tonic-gate 
20687c478bd9Sstevel@tonic-gate 		/*
20697c478bd9Sstevel@tonic-gate 		 * If we are doing PIM assert processing and we are forwarding
20707c478bd9Sstevel@tonic-gate 		 * packets on this interface, and it is a broadcast medium
20717c478bd9Sstevel@tonic-gate 		 * interface (and not a tunnel), send a message to the routing.
20727c478bd9Sstevel@tonic-gate 		 *
20737c478bd9Sstevel@tonic-gate 		 * We use the first ipif on the list, since it's all we have.
20747c478bd9Sstevel@tonic-gate 		 * Chances are the ipif_flags are the same for ipifs on the ill.
20757c478bd9Sstevel@tonic-gate 		 */
2076f4b3ec61Sdh155122 		if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 &&
20777c478bd9Sstevel@tonic-gate 		    (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) &&
2078f4b3ec61Sdh155122 		    !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) {
20797c478bd9Sstevel@tonic-gate 			mblk_t		*mp_copy;
20807c478bd9Sstevel@tonic-gate 			struct igmpmsg	*im;
20817c478bd9Sstevel@tonic-gate 
20827c478bd9Sstevel@tonic-gate 			/* TODO could copy header and dup rest */
20837c478bd9Sstevel@tonic-gate 			mp_copy = copymsg(mp);
20847c478bd9Sstevel@tonic-gate 			if (mp_copy == NULL) {
2085f4b3ec61Sdh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
20867c478bd9Sstevel@tonic-gate 				ip1dbg(("ip_mdq: out of memory "
20877c478bd9Sstevel@tonic-gate 				    "for mblk, mp_copy\n"));
2088f4b3ec61Sdh155122 				unlock_good_vif(&ipst->ips_vifs[vifi]);
20897c478bd9Sstevel@tonic-gate 				return (-1);
20907c478bd9Sstevel@tonic-gate 			}
20917c478bd9Sstevel@tonic-gate 
20927c478bd9Sstevel@tonic-gate 			im = (struct igmpmsg *)mp_copy->b_rptr;
20937c478bd9Sstevel@tonic-gate 			im->im_msgtype = IGMPMSG_WRONGVIF;
20947c478bd9Sstevel@tonic-gate 			im->im_mbz = 0;
20957c478bd9Sstevel@tonic-gate 			im->im_vif = (ushort_t)vifi;
2096fc80c0dfSnordmark 			/* Pass to RAWIP */
2097bd670b35SErik Nordmark 
2098bd670b35SErik Nordmark 			bzero(&iras, sizeof (iras));
2099bd670b35SErik Nordmark 			iras.ira_flags = IRAF_IS_IPV4;
2100bd670b35SErik Nordmark 			iras.ira_ip_hdr_length =
2101bd670b35SErik Nordmark 			    IPH_HDR_LENGTH(mp_copy->b_rptr);
2102bd670b35SErik Nordmark 			iras.ira_pktlen = msgdsize(mp_copy);
2103*e8249070SRobert Mustacchi 			iras.ira_ttl = ipha->ipha_ttl;
2104bd670b35SErik Nordmark 			(mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras);
2105bd670b35SErik Nordmark 			ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
21067c478bd9Sstevel@tonic-gate 		}
2107f4b3ec61Sdh155122 		unlock_good_vif(&ipst->ips_vifs[vifi]);
21087c478bd9Sstevel@tonic-gate 		if (tunnel_src != 0)
21097c478bd9Sstevel@tonic-gate 			return (1);
21107c478bd9Sstevel@tonic-gate 		else
21117c478bd9Sstevel@tonic-gate 			return (0);
21127c478bd9Sstevel@tonic-gate 	}
21137c478bd9Sstevel@tonic-gate 	/*
21147c478bd9Sstevel@tonic-gate 	 * If I sourced this packet, it counts as output, else it was input.
21157c478bd9Sstevel@tonic-gate 	 */
2116f4b3ec61Sdh155122 	if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) {
2117f4b3ec61Sdh155122 		ipst->ips_vifs[vifi].v_pkt_out++;
2118f4b3ec61Sdh155122 		ipst->ips_vifs[vifi].v_bytes_out += plen;
21197c478bd9Sstevel@tonic-gate 	} else {
2120f4b3ec61Sdh155122 		ipst->ips_vifs[vifi].v_pkt_in++;
2121f4b3ec61Sdh155122 		ipst->ips_vifs[vifi].v_bytes_in += plen;
21227c478bd9Sstevel@tonic-gate 	}
21237c478bd9Sstevel@tonic-gate 	mutex_enter(&rt->mfc_mutex);
21247c478bd9Sstevel@tonic-gate 	rt->mfc_pkt_cnt++;
21257c478bd9Sstevel@tonic-gate 	rt->mfc_byte_cnt += plen;
21267c478bd9Sstevel@tonic-gate 	mutex_exit(&rt->mfc_mutex);
2127f4b3ec61Sdh155122 	unlock_good_vif(&ipst->ips_vifs[vifi]);
21287c478bd9Sstevel@tonic-gate 	/*
21297c478bd9Sstevel@tonic-gate 	 * For each vif, decide if a copy of the packet should be forwarded.
21307c478bd9Sstevel@tonic-gate 	 * Forward if:
21317c478bd9Sstevel@tonic-gate 	 *		- the vif threshold ttl is non-zero AND
21327c478bd9Sstevel@tonic-gate 	 *		- the pkt ttl exceeds the vif's threshold
21337c478bd9Sstevel@tonic-gate 	 * A non-zero mfc_ttl indicates that the vif is part of
21347c478bd9Sstevel@tonic-gate 	 * the output set for the mfc entry.
21357c478bd9Sstevel@tonic-gate 	 */
2136f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
2137f4b3ec61Sdh155122 	num_of_vifs = ipst->ips_numvifs;
2138f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
2139f4b3ec61Sdh155122 	for (vifp = ipst->ips_vifs, vifi = 0;
2140f4b3ec61Sdh155122 	    vifi < num_of_vifs;
2141f4b3ec61Sdh155122 	    vifp++, vifi++) {
21427c478bd9Sstevel@tonic-gate 		if (!lock_good_vif(vifp))
21437c478bd9Sstevel@tonic-gate 			continue;
21447c478bd9Sstevel@tonic-gate 		if ((rt->mfc_ttls[vifi] > 0) &&
21457c478bd9Sstevel@tonic-gate 		    (ipha->ipha_ttl > rt->mfc_ttls[vifi])) {
21467c478bd9Sstevel@tonic-gate 			/*
21477c478bd9Sstevel@tonic-gate 			 * lock_good_vif should not have succedded if
21487c478bd9Sstevel@tonic-gate 			 * v_ipif is null.
21497c478bd9Sstevel@tonic-gate 			 */
21507c478bd9Sstevel@tonic-gate 			ASSERT(vifp->v_ipif != NULL);
21517c478bd9Sstevel@tonic-gate 			vifp->v_pkt_out++;
21527c478bd9Sstevel@tonic-gate 			vifp->v_bytes_out += plen;
21537c478bd9Sstevel@tonic-gate 			MC_SEND(ipha, mp, vifp, dst);
2154f4b3ec61Sdh155122 			ipst->ips_mrtstat->mrts_fwd_out++;
21557c478bd9Sstevel@tonic-gate 		}
21567c478bd9Sstevel@tonic-gate 		unlock_good_vif(vifp);
21577c478bd9Sstevel@tonic-gate 	}
21587c478bd9Sstevel@tonic-gate 	if (tunnel_src != 0)
21597c478bd9Sstevel@tonic-gate 		return (1);
21607c478bd9Sstevel@tonic-gate 	else
21617c478bd9Sstevel@tonic-gate 		return (0);
21627c478bd9Sstevel@tonic-gate }
21637c478bd9Sstevel@tonic-gate 
21647c478bd9Sstevel@tonic-gate /*
21657c478bd9Sstevel@tonic-gate  * Send the packet on physical interface.
21667c478bd9Sstevel@tonic-gate  * Caller assumes can continue to use mp on return.
21677c478bd9Sstevel@tonic-gate  */
21687c478bd9Sstevel@tonic-gate /* ARGSUSED */
21697c478bd9Sstevel@tonic-gate static void
phyint_send(ipha_t * ipha,mblk_t * mp,struct vif * vifp,ipaddr_t dst)21707c478bd9Sstevel@tonic-gate phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
21717c478bd9Sstevel@tonic-gate {
21727c478bd9Sstevel@tonic-gate 	mblk_t	*mp_copy;
2173f4b3ec61Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
2174fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
21757c478bd9Sstevel@tonic-gate 
21767c478bd9Sstevel@tonic-gate 	/* Make a new reference to the packet */
21777c478bd9Sstevel@tonic-gate 	mp_copy = copymsg(mp);	/* TODO could copy header and dup rest */
21787c478bd9Sstevel@tonic-gate 	if (mp_copy == NULL) {
2179f4b3ec61Sdh155122 		ipst->ips_mrtstat->mrts_fwd_drop++;
21807c478bd9Sstevel@tonic-gate 		ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n"));
21817c478bd9Sstevel@tonic-gate 		return;
21827c478bd9Sstevel@tonic-gate 	}
21837c478bd9Sstevel@tonic-gate 	if (vifp->v_rate_limit <= 0)
21847c478bd9Sstevel@tonic-gate 		tbf_send_packet(vifp, mp_copy);
21857c478bd9Sstevel@tonic-gate 	else  {
2186f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
2187fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
21887c478bd9Sstevel@tonic-gate 			    "phyint_send: tbf_contr rate %d "
21897c478bd9Sstevel@tonic-gate 			    "vifp 0x%p mp 0x%p dst 0x%x",
21907c478bd9Sstevel@tonic-gate 			    vifp->v_rate_limit, (void *)vifp, (void *)mp, dst);
21917c478bd9Sstevel@tonic-gate 		}
21927c478bd9Sstevel@tonic-gate 		tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr);
21937c478bd9Sstevel@tonic-gate 	}
21947c478bd9Sstevel@tonic-gate }
21957c478bd9Sstevel@tonic-gate 
21967c478bd9Sstevel@tonic-gate /*
21977c478bd9Sstevel@tonic-gate  * Send the whole packet for REGISTER encapsulation to PIM daemon
21987c478bd9Sstevel@tonic-gate  * Caller assumes it can continue to use mp on return.
21997c478bd9Sstevel@tonic-gate  */
22007c478bd9Sstevel@tonic-gate /* ARGSUSED */
22017c478bd9Sstevel@tonic-gate static void
register_send(ipha_t * ipha,mblk_t * mp,struct vif * vifp,ipaddr_t dst)22027c478bd9Sstevel@tonic-gate register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
22037c478bd9Sstevel@tonic-gate {
22047c478bd9Sstevel@tonic-gate 	struct igmpmsg	*im;
22057c478bd9Sstevel@tonic-gate 	mblk_t		*mp_copy;
22067c478bd9Sstevel@tonic-gate 	ipha_t		*ipha_copy;
2207bd670b35SErik Nordmark 	ill_t		*ill = vifp->v_ipif->ipif_ill;
2208bd670b35SErik Nordmark 	ip_stack_t	*ipst = ill->ill_ipst;
2209fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
2210bd670b35SErik Nordmark 	ip_recv_attr_t	iras;
22117c478bd9Sstevel@tonic-gate 
2212f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
2213fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22147c478bd9Sstevel@tonic-gate 		    "register_send: src %x, dst %x\n",
22157c478bd9Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
22167c478bd9Sstevel@tonic-gate 	}
22177c478bd9Sstevel@tonic-gate 
22187c478bd9Sstevel@tonic-gate 	/*
22197c478bd9Sstevel@tonic-gate 	 * Copy the old packet & pullup its IP header into the new mblk_t so we
22207c478bd9Sstevel@tonic-gate 	 * can modify it.  Try to fill the new mblk_t since if we don't the
22217c478bd9Sstevel@tonic-gate 	 * ethernet driver will.
22227c478bd9Sstevel@tonic-gate 	 */
22237c478bd9Sstevel@tonic-gate 	mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED);
22247c478bd9Sstevel@tonic-gate 	if (mp_copy == NULL) {
2225f4b3ec61Sdh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
2226f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 3) {
2227fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22287c478bd9Sstevel@tonic-gate 			    "register_send: allocb failure.");
22297c478bd9Sstevel@tonic-gate 		}
22307c478bd9Sstevel@tonic-gate 		return;
22317c478bd9Sstevel@tonic-gate 	}
22327c478bd9Sstevel@tonic-gate 
22337c478bd9Sstevel@tonic-gate 	/*
22347c478bd9Sstevel@tonic-gate 	 * Bump write pointer to account for igmpmsg being added.
22357c478bd9Sstevel@tonic-gate 	 */
22367c478bd9Sstevel@tonic-gate 	mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg);
22377c478bd9Sstevel@tonic-gate 
22387c478bd9Sstevel@tonic-gate 	/*
22397c478bd9Sstevel@tonic-gate 	 * Chain packet to new mblk_t.
22407c478bd9Sstevel@tonic-gate 	 */
22417c478bd9Sstevel@tonic-gate 	if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
2242f4b3ec61Sdh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
2243f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 3) {
2244fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22457c478bd9Sstevel@tonic-gate 			    "register_send: copymsg failure.");
22467c478bd9Sstevel@tonic-gate 		}
22477c478bd9Sstevel@tonic-gate 		freeb(mp_copy);
22487c478bd9Sstevel@tonic-gate 		return;
22497c478bd9Sstevel@tonic-gate 	}
22507c478bd9Sstevel@tonic-gate 
22517c478bd9Sstevel@tonic-gate 	/*
2252fc80c0dfSnordmark 	 * icmp_input() asserts that IP version field is set to an
22537c478bd9Sstevel@tonic-gate 	 * appropriate version. Hence, the struct igmpmsg that this really
22547c478bd9Sstevel@tonic-gate 	 * becomes, needs to have the correct IP version field.
22557c478bd9Sstevel@tonic-gate 	 */
22567c478bd9Sstevel@tonic-gate 	ipha_copy = (ipha_t *)mp_copy->b_rptr;
22577c478bd9Sstevel@tonic-gate 	*ipha_copy = multicast_encap_iphdr;
22587c478bd9Sstevel@tonic-gate 
22597c478bd9Sstevel@tonic-gate 	/*
22607c478bd9Sstevel@tonic-gate 	 * The kernel uses the struct igmpmsg header to encode the messages to
22617c478bd9Sstevel@tonic-gate 	 * the multicast routing daemon. Fill in the fields in the header
22627c478bd9Sstevel@tonic-gate 	 * starting with the message type which is IGMPMSG_WHOLEPKT
22637c478bd9Sstevel@tonic-gate 	 */
22647c478bd9Sstevel@tonic-gate 	im = (struct igmpmsg *)mp_copy->b_rptr;
22657c478bd9Sstevel@tonic-gate 	im->im_msgtype = IGMPMSG_WHOLEPKT;
22667c478bd9Sstevel@tonic-gate 	im->im_src.s_addr = ipha->ipha_src;
22677c478bd9Sstevel@tonic-gate 	im->im_dst.s_addr = ipha->ipha_dst;
22687c478bd9Sstevel@tonic-gate 
22697c478bd9Sstevel@tonic-gate 	/*
22707c478bd9Sstevel@tonic-gate 	 * Must Be Zero. This is because the struct igmpmsg is really an IP
22717c478bd9Sstevel@tonic-gate 	 * header with renamed fields and the multicast routing daemon uses
22727c478bd9Sstevel@tonic-gate 	 * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages.
22737c478bd9Sstevel@tonic-gate 	 */
22747c478bd9Sstevel@tonic-gate 	im->im_mbz = 0;
22757c478bd9Sstevel@tonic-gate 
2276f4b3ec61Sdh155122 	++ipst->ips_mrtstat->mrts_upcalls;
2277bd670b35SErik Nordmark 	if (IPCL_IS_NONSTR(mrouter) ? mrouter->conn_flow_cntrld :
2278bd670b35SErik Nordmark 	    !canputnext(mrouter->conn_rq)) {
2279f4b3ec61Sdh155122 		++ipst->ips_mrtstat->mrts_pim_regsend_drops;
2280f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 3) {
2281fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22827c478bd9Sstevel@tonic-gate 			    "register_send: register upcall failure.");
22837c478bd9Sstevel@tonic-gate 		}
2284bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2285bd670b35SErik Nordmark 		ip_drop_input("mrts_pim_regsend_drops", mp_copy, ill);
22867c478bd9Sstevel@tonic-gate 		freemsg(mp_copy);
22877c478bd9Sstevel@tonic-gate 	} else {
2288fc80c0dfSnordmark 		/* Pass to RAWIP */
2289bd670b35SErik Nordmark 		bzero(&iras, sizeof (iras));
2290bd670b35SErik Nordmark 		iras.ira_flags = IRAF_IS_IPV4;
2291bd670b35SErik Nordmark 		iras.ira_ip_hdr_length = sizeof (ipha_t);
2292bd670b35SErik Nordmark 		iras.ira_pktlen = msgdsize(mp_copy);
2293*e8249070SRobert Mustacchi 		iras.ira_ttl = ipha->ipha_ttl;
2294bd670b35SErik Nordmark 		(mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras);
2295bd670b35SErik Nordmark 		ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
22967c478bd9Sstevel@tonic-gate 	}
22977c478bd9Sstevel@tonic-gate }
22987c478bd9Sstevel@tonic-gate 
22997c478bd9Sstevel@tonic-gate /*
23007c478bd9Sstevel@tonic-gate  * pim_validate_cksum handles verification of the checksum in the
23017c478bd9Sstevel@tonic-gate  * pim header.  For PIM Register packets, the checksum is calculated
23027c478bd9Sstevel@tonic-gate  * across the PIM header only.  For all other packets, the checksum
23037c478bd9Sstevel@tonic-gate  * is for the PIM header and remainder of the packet.
23047c478bd9Sstevel@tonic-gate  *
23057c478bd9Sstevel@tonic-gate  * returns: B_TRUE, if checksum is okay.
23067c478bd9Sstevel@tonic-gate  *          B_FALSE, if checksum is not valid.
23077c478bd9Sstevel@tonic-gate  */
23087c478bd9Sstevel@tonic-gate static boolean_t
pim_validate_cksum(mblk_t * mp,ipha_t * ip,struct pim * pimp)23097c478bd9Sstevel@tonic-gate pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp)
23107c478bd9Sstevel@tonic-gate {
23117c478bd9Sstevel@tonic-gate 	mblk_t *mp_dup;
23127c478bd9Sstevel@tonic-gate 
23137c478bd9Sstevel@tonic-gate 	if ((mp_dup = dupmsg(mp)) == NULL)
23147c478bd9Sstevel@tonic-gate 		return (B_FALSE);
23157c478bd9Sstevel@tonic-gate 
23167c478bd9Sstevel@tonic-gate 	mp_dup->b_rptr += IPH_HDR_LENGTH(ip);
23177c478bd9Sstevel@tonic-gate 	if (pimp->pim_type == PIM_REGISTER)
23187c478bd9Sstevel@tonic-gate 		mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN;
23197c478bd9Sstevel@tonic-gate 	if (IP_CSUM(mp_dup, 0, 0)) {
23207c478bd9Sstevel@tonic-gate 		freemsg(mp_dup);
23217c478bd9Sstevel@tonic-gate 		return (B_FALSE);
23227c478bd9Sstevel@tonic-gate 	}
23237c478bd9Sstevel@tonic-gate 	freemsg(mp_dup);
23247c478bd9Sstevel@tonic-gate 	return (B_TRUE);
23257c478bd9Sstevel@tonic-gate }
23267c478bd9Sstevel@tonic-gate 
23277c478bd9Sstevel@tonic-gate /*
2328bd670b35SErik Nordmark  * Process PIM protocol packets i.e. IP Protocol 103.
2329bd670b35SErik Nordmark  * Register messages are decapsulated and sent onto multicast forwarding.
2330bd670b35SErik Nordmark  *
2331bd670b35SErik Nordmark  * Return NULL for a bad packet that is discarded here.
2332bd670b35SErik Nordmark  * Return mp if the message is OK and should be handed to "raw" receivers.
2333bd670b35SErik Nordmark  * Callers of pim_input() may need to reinitialize variables that were copied
2334bd670b35SErik Nordmark  * from the mblk as this calls pullupmsg().
23357c478bd9Sstevel@tonic-gate  */
2336bd670b35SErik Nordmark mblk_t *
pim_input(mblk_t * mp,ip_recv_attr_t * ira)2337bd670b35SErik Nordmark pim_input(mblk_t *mp, ip_recv_attr_t *ira)
23387c478bd9Sstevel@tonic-gate {
23397c478bd9Sstevel@tonic-gate 	ipha_t		*eip, *ip;
23407c478bd9Sstevel@tonic-gate 	int		iplen, pimlen, iphlen;
23417c478bd9Sstevel@tonic-gate 	struct pim	*pimp;	/* pointer to a pim struct */
23427c478bd9Sstevel@tonic-gate 	uint32_t	*reghdr;
2343bd670b35SErik Nordmark 	ill_t		*ill = ira->ira_ill;
2344f4b3ec61Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
2345fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
23467c478bd9Sstevel@tonic-gate 
23477c478bd9Sstevel@tonic-gate 	/*
23487c478bd9Sstevel@tonic-gate 	 * Pullup the msg for PIM protocol processing.
23497c478bd9Sstevel@tonic-gate 	 */
23507c478bd9Sstevel@tonic-gate 	if (pullupmsg(mp, -1) == 0) {
2351f4b3ec61Sdh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
2352bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2353bd670b35SErik Nordmark 		ip_drop_input("mrts_pim_nomemory", mp, ill);
23547c478bd9Sstevel@tonic-gate 		freemsg(mp);
2355bd670b35SErik Nordmark 		return (NULL);
23567c478bd9Sstevel@tonic-gate 	}
23577c478bd9Sstevel@tonic-gate 
23587c478bd9Sstevel@tonic-gate 	ip = (ipha_t *)mp->b_rptr;
23597c478bd9Sstevel@tonic-gate 	iplen = ip->ipha_length;
23607c478bd9Sstevel@tonic-gate 	iphlen = IPH_HDR_LENGTH(ip);
23617c478bd9Sstevel@tonic-gate 	pimlen = ntohs(iplen) - iphlen;
23627c478bd9Sstevel@tonic-gate 
23637c478bd9Sstevel@tonic-gate 	/*
23647c478bd9Sstevel@tonic-gate 	 * Validate lengths
23657c478bd9Sstevel@tonic-gate 	 */
23667c478bd9Sstevel@tonic-gate 	if (pimlen < PIM_MINLEN) {
2367f4b3ec61Sdh155122 		++ipst->ips_mrtstat->mrts_pim_malformed;
2368f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
2369fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
23707c478bd9Sstevel@tonic-gate 			    "pim_input: length not at least minlen");
23717c478bd9Sstevel@tonic-gate 		}
2372bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2373bd670b35SErik Nordmark 		ip_drop_input("mrts_pim_malformed", mp, ill);
23747c478bd9Sstevel@tonic-gate 		freemsg(mp);
2375bd670b35SErik Nordmark 		return (NULL);
23767c478bd9Sstevel@tonic-gate 	}
23777c478bd9Sstevel@tonic-gate 
23787c478bd9Sstevel@tonic-gate 	/*
23797c478bd9Sstevel@tonic-gate 	 * Point to the PIM header.
23807c478bd9Sstevel@tonic-gate 	 */
23817c478bd9Sstevel@tonic-gate 	pimp = (struct pim *)((caddr_t)ip + iphlen);
23827c478bd9Sstevel@tonic-gate 
23837c478bd9Sstevel@tonic-gate 	/*
23847c478bd9Sstevel@tonic-gate 	 * Check the version number.
23857c478bd9Sstevel@tonic-gate 	 */
23867c478bd9Sstevel@tonic-gate 	if (pimp->pim_vers != PIM_VERSION) {
2387f4b3ec61Sdh155122 		++ipst->ips_mrtstat->mrts_pim_badversion;
2388f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
2389fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
23907c478bd9Sstevel@tonic-gate 			    "pim_input: unknown version of PIM");
23917c478bd9Sstevel@tonic-gate 		}
2392bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2393bd670b35SErik Nordmark 		ip_drop_input("mrts_pim_badversion", mp, ill);
23947c478bd9Sstevel@tonic-gate 		freemsg(mp);
2395bd670b35SErik Nordmark 		return (NULL);
23967c478bd9Sstevel@tonic-gate 	}
23977c478bd9Sstevel@tonic-gate 
23987c478bd9Sstevel@tonic-gate 	/*
23997c478bd9Sstevel@tonic-gate 	 * Validate the checksum
24007c478bd9Sstevel@tonic-gate 	 */
24017c478bd9Sstevel@tonic-gate 	if (!pim_validate_cksum(mp, ip, pimp)) {
2402f4b3ec61Sdh155122 		++ipst->ips_mrtstat->mrts_pim_rcv_badcsum;
2403f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
2404fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24057c478bd9Sstevel@tonic-gate 			    "pim_input: invalid checksum");
24067c478bd9Sstevel@tonic-gate 		}
2407bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2408bd670b35SErik Nordmark 		ip_drop_input("pim_rcv_badcsum", mp, ill);
24097c478bd9Sstevel@tonic-gate 		freemsg(mp);
2410bd670b35SErik Nordmark 		return (NULL);
24117c478bd9Sstevel@tonic-gate 	}
24127c478bd9Sstevel@tonic-gate 
24137c478bd9Sstevel@tonic-gate 	if (pimp->pim_type != PIM_REGISTER)
2414bd670b35SErik Nordmark 		return (mp);
24157c478bd9Sstevel@tonic-gate 
24167c478bd9Sstevel@tonic-gate 	reghdr = (uint32_t *)(pimp + 1);
24177c478bd9Sstevel@tonic-gate 	eip = (ipha_t *)(reghdr + 1);
24187c478bd9Sstevel@tonic-gate 
24197c478bd9Sstevel@tonic-gate 	/*
24207c478bd9Sstevel@tonic-gate 	 * check if the inner packet is destined to mcast group
24217c478bd9Sstevel@tonic-gate 	 */
24227c478bd9Sstevel@tonic-gate 	if (!CLASSD(eip->ipha_dst)) {
2423f4b3ec61Sdh155122 		++ipst->ips_mrtstat->mrts_pim_badregisters;
2424f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
2425fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24267c478bd9Sstevel@tonic-gate 			    "pim_input: Inner pkt not mcast .. !");
24277c478bd9Sstevel@tonic-gate 		}
2428bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2429bd670b35SErik Nordmark 		ip_drop_input("mrts_pim_badregisters", mp, ill);
24307c478bd9Sstevel@tonic-gate 		freemsg(mp);
2431bd670b35SErik Nordmark 		return (NULL);
24327c478bd9Sstevel@tonic-gate 	}
2433f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
2434fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24357c478bd9Sstevel@tonic-gate 		    "register from %x, to %x, len %d",
24367c478bd9Sstevel@tonic-gate 		    ntohl(eip->ipha_src),
24377c478bd9Sstevel@tonic-gate 		    ntohl(eip->ipha_dst),
24387c478bd9Sstevel@tonic-gate 		    ntohs(eip->ipha_length));
24397c478bd9Sstevel@tonic-gate 	}
24407c478bd9Sstevel@tonic-gate 	/*
24417c478bd9Sstevel@tonic-gate 	 * If the null register bit is not set, decapsulate
24427c478bd9Sstevel@tonic-gate 	 * the packet before forwarding it.
2443bd670b35SErik Nordmark 	 * Avoid this in no register vif
24447c478bd9Sstevel@tonic-gate 	 */
2445bd670b35SErik Nordmark 	if (!(ntohl(*reghdr) & PIM_NULL_REGISTER) &&
2446bd670b35SErik Nordmark 	    ipst->ips_reg_vif_num != ALL_VIFS) {
24477c478bd9Sstevel@tonic-gate 		mblk_t *mp_copy;
2448bd670b35SErik Nordmark 		uint_t saved_pktlen;
24497c478bd9Sstevel@tonic-gate 
24507c478bd9Sstevel@tonic-gate 		/* Copy the message */
24517c478bd9Sstevel@tonic-gate 		if ((mp_copy = copymsg(mp)) == NULL) {
2452f4b3ec61Sdh155122 			++ipst->ips_mrtstat->mrts_pim_nomemory;
2453bd670b35SErik Nordmark 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2454bd670b35SErik Nordmark 			ip_drop_input("mrts_pim_nomemory", mp, ill);
24557c478bd9Sstevel@tonic-gate 			freemsg(mp);
2456bd670b35SErik Nordmark 			return (NULL);
24577c478bd9Sstevel@tonic-gate 		}
24587c478bd9Sstevel@tonic-gate 
24597c478bd9Sstevel@tonic-gate 		/*
24607c478bd9Sstevel@tonic-gate 		 * Decapsulate the packet and give it to
24617c478bd9Sstevel@tonic-gate 		 * register_mforward.
24627c478bd9Sstevel@tonic-gate 		 */
2463bd670b35SErik Nordmark 		mp_copy->b_rptr += iphlen + sizeof (pim_t) + sizeof (*reghdr);
2464bd670b35SErik Nordmark 		saved_pktlen = ira->ira_pktlen;
2465bd670b35SErik Nordmark 		ira->ira_pktlen -= iphlen + sizeof (pim_t) + sizeof (*reghdr);
2466bd670b35SErik Nordmark 		if (register_mforward(mp_copy, ira) != 0) {
2467bd670b35SErik Nordmark 			/* register_mforward already called ip_drop_input */
24687c478bd9Sstevel@tonic-gate 			freemsg(mp);
2469bd670b35SErik Nordmark 			ira->ira_pktlen = saved_pktlen;
2470bd670b35SErik Nordmark 			return (NULL);
24717c478bd9Sstevel@tonic-gate 		}
2472bd670b35SErik Nordmark 		ira->ira_pktlen = saved_pktlen;
24737c478bd9Sstevel@tonic-gate 	}
24747c478bd9Sstevel@tonic-gate 
24757c478bd9Sstevel@tonic-gate 	/*
24767c478bd9Sstevel@tonic-gate 	 * Pass all valid PIM packets up to any process(es) listening on a raw
24777c478bd9Sstevel@tonic-gate 	 * PIM socket. For Solaris it is done right after pim_input() is
24787c478bd9Sstevel@tonic-gate 	 * called.
24797c478bd9Sstevel@tonic-gate 	 */
2480bd670b35SErik Nordmark 	return (mp);
24817c478bd9Sstevel@tonic-gate }
24827c478bd9Sstevel@tonic-gate 
24837c478bd9Sstevel@tonic-gate /*
24847c478bd9Sstevel@tonic-gate  * PIM sparse mode hook.  Called by pim_input after decapsulating
24857c478bd9Sstevel@tonic-gate  * the packet. Loop back the packet, as if we have received it.
24867c478bd9Sstevel@tonic-gate  * In pim_input() we have to check if the destination is a multicast address.
24877c478bd9Sstevel@tonic-gate  */
24887c478bd9Sstevel@tonic-gate static int
register_mforward(mblk_t * mp,ip_recv_attr_t * ira)2489bd670b35SErik Nordmark register_mforward(mblk_t *mp, ip_recv_attr_t *ira)
24907c478bd9Sstevel@tonic-gate {
2491bd670b35SErik Nordmark 	ire_t		*ire;
2492bd670b35SErik Nordmark 	ipha_t		*ipha = (ipha_t *)mp->b_rptr;
2493bd670b35SErik Nordmark 	ill_t		*ill = ira->ira_ill;
2494f4b3ec61Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
2495fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
24967c478bd9Sstevel@tonic-gate 
2497f4b3ec61Sdh155122 	ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs);
2498f4b3ec61Sdh155122 
2499f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 3) {
2500fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25017c478bd9Sstevel@tonic-gate 		    "register_mforward: src %x, dst %x\n",
25027c478bd9Sstevel@tonic-gate 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
25037c478bd9Sstevel@tonic-gate 	}
25047c478bd9Sstevel@tonic-gate 	/*
25057c478bd9Sstevel@tonic-gate 	 * Need to pass in to ip_mforward() the information that the
2506bd670b35SErik Nordmark 	 * packet has arrived on the register_vif. We mark it with
2507bd670b35SErik Nordmark 	 * the IRAF_PIM_REGISTER attribute.
2508bd670b35SErik Nordmark 	 * pim_input verified that the (inner) destination is multicast,
2509bd670b35SErik Nordmark 	 * hence we skip the generic code in ip_input.
25107c478bd9Sstevel@tonic-gate 	 */
2511bd670b35SErik Nordmark 	ira->ira_flags |= IRAF_PIM_REGISTER;
2512f4b3ec61Sdh155122 	++ipst->ips_mrtstat->mrts_pim_regforwards;
2513bd670b35SErik Nordmark 
2514bd670b35SErik Nordmark 	if (!CLASSD(ipha->ipha_dst)) {
2515bd670b35SErik Nordmark 		ire = ire_route_recursive_v4(ipha->ipha_dst, 0, NULL, ALL_ZONES,
25169e3469d3SErik Nordmark 		    ira->ira_tsl, MATCH_IRE_SECATTR, IRR_ALLOCATE, 0, ipst,
25179e3469d3SErik Nordmark 		    NULL, NULL, NULL);
2518bd670b35SErik Nordmark 	} else {
2519bd670b35SErik Nordmark 		ire = ire_multicast(ill);
2520bd670b35SErik Nordmark 	}
2521bd670b35SErik Nordmark 	ASSERT(ire != NULL);
2522bd670b35SErik Nordmark 	/* Normally this will return the IRE_MULTICAST */
2523bd670b35SErik Nordmark 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2524bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2525bd670b35SErik Nordmark 		ip_drop_input("mrts_pim RTF_REJECT", mp, ill);
2526bd670b35SErik Nordmark 		freemsg(mp);
2527bd670b35SErik Nordmark 		ire_refrele(ire);
2528bd670b35SErik Nordmark 		return (-1);
2529bd670b35SErik Nordmark 	}
2530bd670b35SErik Nordmark 	ASSERT(ire->ire_type & IRE_MULTICAST);
2531bd670b35SErik Nordmark 	(*ire->ire_recvfn)(ire, mp, ipha, ira);
2532bd670b35SErik Nordmark 	ire_refrele(ire);
2533bd670b35SErik Nordmark 
25347c478bd9Sstevel@tonic-gate 	return (0);
25357c478bd9Sstevel@tonic-gate }
25367c478bd9Sstevel@tonic-gate 
25377c478bd9Sstevel@tonic-gate /*
25387c478bd9Sstevel@tonic-gate  * Send an encapsulated packet.
25397c478bd9Sstevel@tonic-gate  * Caller assumes can continue to use mp when routine returns.
25407c478bd9Sstevel@tonic-gate  */
25417c478bd9Sstevel@tonic-gate /* ARGSUSED */
25427c478bd9Sstevel@tonic-gate static void
encap_send(ipha_t * ipha,mblk_t * mp,struct vif * vifp,ipaddr_t dst)25437c478bd9Sstevel@tonic-gate encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
25447c478bd9Sstevel@tonic-gate {
25457c478bd9Sstevel@tonic-gate 	mblk_t	*mp_copy;
25467c478bd9Sstevel@tonic-gate 	ipha_t	*ipha_copy;
25477c478bd9Sstevel@tonic-gate 	size_t	len;
2548f4b3ec61Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
2549fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
25507c478bd9Sstevel@tonic-gate 
2551f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
2552fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
2553f4b3ec61Sdh155122 		    "encap_send: vif %ld enter",
2554f4b3ec61Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs));
25557c478bd9Sstevel@tonic-gate 	}
25567c478bd9Sstevel@tonic-gate 	len = ntohs(ipha->ipha_length);
25577c478bd9Sstevel@tonic-gate 
25587c478bd9Sstevel@tonic-gate 	/*
25597c478bd9Sstevel@tonic-gate 	 * Copy the old packet & pullup it's IP header into the
25607c478bd9Sstevel@tonic-gate 	 * new mbuf so we can modify it.  Try to fill the new
25617c478bd9Sstevel@tonic-gate 	 * mbuf since if we don't the ethernet driver will.
25627c478bd9Sstevel@tonic-gate 	 */
25637c478bd9Sstevel@tonic-gate 	mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED);
25647c478bd9Sstevel@tonic-gate 	if (mp_copy == NULL)
25657c478bd9Sstevel@tonic-gate 		return;
25667c478bd9Sstevel@tonic-gate 	mp_copy->b_rptr += 32;
25677c478bd9Sstevel@tonic-gate 	mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr);
25687c478bd9Sstevel@tonic-gate 	if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
25697c478bd9Sstevel@tonic-gate 		freeb(mp_copy);
25707c478bd9Sstevel@tonic-gate 		return;
25717c478bd9Sstevel@tonic-gate 	}
25727c478bd9Sstevel@tonic-gate 
25737c478bd9Sstevel@tonic-gate 	/*
25747c478bd9Sstevel@tonic-gate 	 * Fill in the encapsulating IP header.
25757c478bd9Sstevel@tonic-gate 	 * Remote tunnel dst in rmt_addr, from add_vif().
25767c478bd9Sstevel@tonic-gate 	 */
25777c478bd9Sstevel@tonic-gate 	ipha_copy = (ipha_t *)mp_copy->b_rptr;
25787c478bd9Sstevel@tonic-gate 	*ipha_copy = multicast_encap_iphdr;
25797c478bd9Sstevel@tonic-gate 	ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET);
25807c478bd9Sstevel@tonic-gate 	ipha_copy->ipha_length = htons(len + sizeof (ipha_t));
25817c478bd9Sstevel@tonic-gate 	ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr;
25827c478bd9Sstevel@tonic-gate 	ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr;
25837c478bd9Sstevel@tonic-gate 	ASSERT(ipha_copy->ipha_ident == 0);
25847c478bd9Sstevel@tonic-gate 
25857c478bd9Sstevel@tonic-gate 	/* Turn the encapsulated IP header back into a valid one. */
25867c478bd9Sstevel@tonic-gate 	ipha = (ipha_t *)mp_copy->b_cont->b_rptr;
25877c478bd9Sstevel@tonic-gate 	ipha->ipha_ttl--;
25887c478bd9Sstevel@tonic-gate 	ipha->ipha_hdr_checksum = 0;
25897c478bd9Sstevel@tonic-gate 	ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
25907c478bd9Sstevel@tonic-gate 
2591bd670b35SErik Nordmark 	ipha_copy->ipha_ttl = ipha->ipha_ttl;
2592bd670b35SErik Nordmark 
2593f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
2594fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25957c478bd9Sstevel@tonic-gate 		    "encap_send: group 0x%x", ntohl(ipha->ipha_dst));
25967c478bd9Sstevel@tonic-gate 	}
25977c478bd9Sstevel@tonic-gate 	if (vifp->v_rate_limit <= 0)
25987c478bd9Sstevel@tonic-gate 		tbf_send_packet(vifp, mp_copy);
25997c478bd9Sstevel@tonic-gate 	else
26007c478bd9Sstevel@tonic-gate 		/* ipha is from the original header */
26017c478bd9Sstevel@tonic-gate 		tbf_control(vifp, mp_copy, ipha);
26027c478bd9Sstevel@tonic-gate }
26037c478bd9Sstevel@tonic-gate 
26047c478bd9Sstevel@tonic-gate /*
2605bd670b35SErik Nordmark  * De-encapsulate a packet and feed it back through IP input if it
2606bd670b35SErik Nordmark  * matches one of our multicast tunnels.
2607bd670b35SErik Nordmark  *
26087c478bd9Sstevel@tonic-gate  * This routine is called whenever IP gets a packet with prototype
2609bd670b35SErik Nordmark  * IPPROTO_ENCAP and a local destination address and the packet didn't
2610bd670b35SErik Nordmark  * match one of our configured IP-in-IP tunnels.
26117c478bd9Sstevel@tonic-gate  */
26127c478bd9Sstevel@tonic-gate void
ip_mroute_decap(mblk_t * mp,ip_recv_attr_t * ira)2613bd670b35SErik Nordmark ip_mroute_decap(mblk_t *mp, ip_recv_attr_t *ira)
26147c478bd9Sstevel@tonic-gate {
26157c478bd9Sstevel@tonic-gate 	ipha_t		*ipha = (ipha_t *)mp->b_rptr;
26167c478bd9Sstevel@tonic-gate 	ipha_t		*ipha_encap;
26177c478bd9Sstevel@tonic-gate 	int		hlen = IPH_HDR_LENGTH(ipha);
2618bd670b35SErik Nordmark 	int		hlen_encap;
26197c478bd9Sstevel@tonic-gate 	ipaddr_t	src;
26207c478bd9Sstevel@tonic-gate 	struct vif	*vifp;
2621bd670b35SErik Nordmark 	ire_t		*ire;
2622bd670b35SErik Nordmark 	ill_t		*ill = ira->ira_ill;
2623f4b3ec61Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
2624fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
26257c478bd9Sstevel@tonic-gate 
2626bd670b35SErik Nordmark 	/* Make sure we have all of the inner header */
2627bd670b35SErik Nordmark 	ipha_encap = (ipha_t *)((char *)ipha + hlen);
2628bd670b35SErik Nordmark 	if (mp->b_wptr - mp->b_rptr < hlen + IP_SIMPLE_HDR_LENGTH) {
2629bd670b35SErik Nordmark 		ipha = ip_pullup(mp, hlen + IP_SIMPLE_HDR_LENGTH, ira);
2630bd670b35SErik Nordmark 		if (ipha == NULL) {
2631bd670b35SErik Nordmark 			ipst->ips_mrtstat->mrts_bad_tunnel++;
2632bd670b35SErik Nordmark 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2633bd670b35SErik Nordmark 			ip_drop_input("ip_mroute_decap: too short", mp, ill);
2634bd670b35SErik Nordmark 			freemsg(mp);
2635bd670b35SErik Nordmark 			return;
2636bd670b35SErik Nordmark 		}
2637bd670b35SErik Nordmark 		ipha_encap = (ipha_t *)((char *)ipha + hlen);
2638bd670b35SErik Nordmark 	}
2639bd670b35SErik Nordmark 	hlen_encap = IPH_HDR_LENGTH(ipha_encap);
2640bd670b35SErik Nordmark 	if (mp->b_wptr - mp->b_rptr < hlen + hlen_encap) {
2641bd670b35SErik Nordmark 		ipha = ip_pullup(mp, hlen + hlen_encap, ira);
2642bd670b35SErik Nordmark 		if (ipha == NULL) {
2643bd670b35SErik Nordmark 			ipst->ips_mrtstat->mrts_bad_tunnel++;
2644bd670b35SErik Nordmark 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2645bd670b35SErik Nordmark 			ip_drop_input("ip_mroute_decap: too short", mp, ill);
2646bd670b35SErik Nordmark 			freemsg(mp);
2647bd670b35SErik Nordmark 			return;
2648bd670b35SErik Nordmark 		}
2649bd670b35SErik Nordmark 		ipha_encap = (ipha_t *)((char *)ipha + hlen);
2650bd670b35SErik Nordmark 	}
2651bd670b35SErik Nordmark 
26527c478bd9Sstevel@tonic-gate 	/*
26537c478bd9Sstevel@tonic-gate 	 * Dump the packet if it's not to a multicast destination or if
26547c478bd9Sstevel@tonic-gate 	 * we don't have an encapsulating tunnel with the source.
26557c478bd9Sstevel@tonic-gate 	 * Note:  This code assumes that the remote site IP address
26567c478bd9Sstevel@tonic-gate 	 * uniquely identifies the tunnel (i.e., that this site has
26577c478bd9Sstevel@tonic-gate 	 * at most one tunnel with the remote site).
26587c478bd9Sstevel@tonic-gate 	 */
26597c478bd9Sstevel@tonic-gate 	if (!CLASSD(ipha_encap->ipha_dst)) {
2660f4b3ec61Sdh155122 		ipst->ips_mrtstat->mrts_bad_tunnel++;
26617c478bd9Sstevel@tonic-gate 		ip1dbg(("ip_mroute_decap: bad tunnel\n"));
2662bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2663bd670b35SErik Nordmark 		ip_drop_input("mrts_bad_tunnel", mp, ill);
26647c478bd9Sstevel@tonic-gate 		freemsg(mp);
26657c478bd9Sstevel@tonic-gate 		return;
26667c478bd9Sstevel@tonic-gate 	}
26677c478bd9Sstevel@tonic-gate 	src = (ipaddr_t)ipha->ipha_src;
2668f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_last_encap_lock);
2669f4b3ec61Sdh155122 	if (src != ipst->ips_last_encap_src) {
26707c478bd9Sstevel@tonic-gate 		struct vif *vife;
26717c478bd9Sstevel@tonic-gate 
2672f4b3ec61Sdh155122 		vifp = ipst->ips_vifs;
2673f4b3ec61Sdh155122 		vife = vifp + ipst->ips_numvifs;
2674f4b3ec61Sdh155122 		ipst->ips_last_encap_src = src;
2675f4b3ec61Sdh155122 		ipst->ips_last_encap_vif = 0;
26767c478bd9Sstevel@tonic-gate 		for (; vifp < vife; ++vifp) {
26777c478bd9Sstevel@tonic-gate 			if (!lock_good_vif(vifp))
26787c478bd9Sstevel@tonic-gate 				continue;
26797c478bd9Sstevel@tonic-gate 			if (vifp->v_rmt_addr.s_addr == src) {
26807c478bd9Sstevel@tonic-gate 				if (vifp->v_flags & VIFF_TUNNEL)
2681f4b3ec61Sdh155122 					ipst->ips_last_encap_vif = vifp;
2682f4b3ec61Sdh155122 				if (ipst->ips_ip_mrtdebug > 1) {
2683fc80c0dfSnordmark 					(void) mi_strlog(mrouter->conn_rq,
26847c478bd9Sstevel@tonic-gate 					    1, SL_TRACE,
26857c478bd9Sstevel@tonic-gate 					    "ip_mroute_decap: good tun "
26867c478bd9Sstevel@tonic-gate 					    "vif %ld with %x",
2687f4b3ec61Sdh155122 					    (ptrdiff_t)(vifp - ipst->ips_vifs),
26887c478bd9Sstevel@tonic-gate 					    ntohl(src));
26897c478bd9Sstevel@tonic-gate 				}
26907c478bd9Sstevel@tonic-gate 				unlock_good_vif(vifp);
26917c478bd9Sstevel@tonic-gate 				break;
26927c478bd9Sstevel@tonic-gate 			}
26937c478bd9Sstevel@tonic-gate 			unlock_good_vif(vifp);
26947c478bd9Sstevel@tonic-gate 		}
26957c478bd9Sstevel@tonic-gate 	}
2696f4b3ec61Sdh155122 	if ((vifp = ipst->ips_last_encap_vif) == 0) {
2697f4b3ec61Sdh155122 		mutex_exit(&ipst->ips_last_encap_lock);
2698f4b3ec61Sdh155122 		ipst->ips_mrtstat->mrts_bad_tunnel++;
2699bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2700bd670b35SErik Nordmark 		ip_drop_input("mrts_bad_tunnel", mp, ill);
27017c478bd9Sstevel@tonic-gate 		freemsg(mp);
27027c478bd9Sstevel@tonic-gate 		ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n",
2703f4b3ec61Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src)));
27047c478bd9Sstevel@tonic-gate 		return;
27057c478bd9Sstevel@tonic-gate 	}
2706f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_last_encap_lock);
27077c478bd9Sstevel@tonic-gate 
27087c478bd9Sstevel@tonic-gate 	/*
27097c478bd9Sstevel@tonic-gate 	 * Need to pass in the tunnel source to ip_mforward (so that it can
2710bd670b35SErik Nordmark 	 * verify that the packet arrived over the correct vif.)
27117c478bd9Sstevel@tonic-gate 	 */
2712bd670b35SErik Nordmark 	ira->ira_flags |= IRAF_MROUTE_TUNNEL_SET;
2713bd670b35SErik Nordmark 	ira->ira_mroute_tunnel = src;
27147c478bd9Sstevel@tonic-gate 	mp->b_rptr += hlen;
2715bd670b35SErik Nordmark 	ira->ira_pktlen -= hlen;
2716bd670b35SErik Nordmark 	ira->ira_ip_hdr_length = hlen_encap;
2717bd670b35SErik Nordmark 
2718bd670b35SErik Nordmark 	/*
2719bd670b35SErik Nordmark 	 * We don't redo any of the filtering in ill_input_full_v4 and we
2720bd670b35SErik Nordmark 	 * have checked that all of ipha_encap and any IP options are
2721bd670b35SErik Nordmark 	 * pulled up. Hence we call ire_recv_multicast_v4 directly.
2722bd670b35SErik Nordmark 	 * However, we have to check for RSVP as in ip_input_full_v4
2723bd670b35SErik Nordmark 	 * and if so we pass it to ire_recv_broadcast_v4 for local delivery
2724bd670b35SErik Nordmark 	 * to the rsvpd.
2725bd670b35SErik Nordmark 	 */
2726bd670b35SErik Nordmark 	if (ipha_encap->ipha_protocol == IPPROTO_RSVP &&
2727bd670b35SErik Nordmark 	    ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) {
2728bd670b35SErik Nordmark 		ire = ire_route_recursive_v4(INADDR_BROADCAST, 0, ill,
2729bd670b35SErik Nordmark 		    ALL_ZONES, ira->ira_tsl, MATCH_IRE_ILL|MATCH_IRE_SECATTR,
27309e3469d3SErik Nordmark 		    IRR_ALLOCATE, 0, ipst, NULL, NULL, NULL);
2731bd670b35SErik Nordmark 	} else {
2732bd670b35SErik Nordmark 		ire = ire_multicast(ill);
2733bd670b35SErik Nordmark 	}
2734bd670b35SErik Nordmark 	ASSERT(ire != NULL);
2735bd670b35SErik Nordmark 	/* Normally this will return the IRE_MULTICAST or IRE_BROADCAST */
2736bd670b35SErik Nordmark 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2737bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2738bd670b35SErik Nordmark 		ip_drop_input("ip_mroute_decap: RTF_REJECT", mp, ill);
2739bd670b35SErik Nordmark 		freemsg(mp);
2740bd670b35SErik Nordmark 		ire_refrele(ire);
2741bd670b35SErik Nordmark 		return;
2742bd670b35SErik Nordmark 	}
2743bd670b35SErik Nordmark 	ire->ire_ib_pkt_count++;
2744bd670b35SErik Nordmark 	ASSERT(ire->ire_type & (IRE_MULTICAST|IRE_BROADCAST));
2745bd670b35SErik Nordmark 	(*ire->ire_recvfn)(ire, mp, ipha_encap, ira);
2746bd670b35SErik Nordmark 	ire_refrele(ire);
27477c478bd9Sstevel@tonic-gate }
27487c478bd9Sstevel@tonic-gate 
27497c478bd9Sstevel@tonic-gate /*
27507c478bd9Sstevel@tonic-gate  * Remove all records with v_ipif == ipif.  Called when an interface goes away
27517c478bd9Sstevel@tonic-gate  * (stream closed).  Called as writer.
27527c478bd9Sstevel@tonic-gate  */
27537c478bd9Sstevel@tonic-gate void
reset_mrt_vif_ipif(ipif_t * ipif)27547c478bd9Sstevel@tonic-gate reset_mrt_vif_ipif(ipif_t *ipif)
27557c478bd9Sstevel@tonic-gate {
27567c478bd9Sstevel@tonic-gate 	vifi_t vifi, tmp_vifi;
27577c478bd9Sstevel@tonic-gate 	vifi_t num_of_vifs;
2758f4b3ec61Sdh155122 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
27597c478bd9Sstevel@tonic-gate 
27607c478bd9Sstevel@tonic-gate 	/* Can't check vifi >= 0 since vifi_t is unsigned! */
27617c478bd9Sstevel@tonic-gate 
2762f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
2763f4b3ec61Sdh155122 	num_of_vifs = ipst->ips_numvifs;
2764f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
27657c478bd9Sstevel@tonic-gate 
27667c478bd9Sstevel@tonic-gate 	for (vifi = num_of_vifs; vifi != 0; vifi--) {
27677c478bd9Sstevel@tonic-gate 		tmp_vifi = vifi - 1;
2768f4b3ec61Sdh155122 		if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) {
2769bd670b35SErik Nordmark 			(void) del_vif(&tmp_vifi, ipst);
27707c478bd9Sstevel@tonic-gate 		}
27717c478bd9Sstevel@tonic-gate 	}
27727c478bd9Sstevel@tonic-gate }
27737c478bd9Sstevel@tonic-gate 
27747c478bd9Sstevel@tonic-gate /* Remove pending upcall msgs when ill goes away.  Called by ill_delete.  */
27757c478bd9Sstevel@tonic-gate void
reset_mrt_ill(ill_t * ill)27767c478bd9Sstevel@tonic-gate reset_mrt_ill(ill_t *ill)
27777c478bd9Sstevel@tonic-gate {
27787c478bd9Sstevel@tonic-gate 	struct mfc	*rt;
27797c478bd9Sstevel@tonic-gate 	struct rtdetq	*rte;
27807c478bd9Sstevel@tonic-gate 	int		i;
2781f4b3ec61Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
2782fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
2783bd670b35SErik Nordmark 	timeout_id_t	id;
27847c478bd9Sstevel@tonic-gate 
27857c478bd9Sstevel@tonic-gate 	for (i = 0; i < MFCTBLSIZ; i++) {
2786f4b3ec61Sdh155122 		MFCB_REFHOLD(&ipst->ips_mfcs[i]);
2787f4b3ec61Sdh155122 		if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) {
2788f4b3ec61Sdh155122 			if (ipst->ips_ip_mrtdebug > 1) {
2789fc80c0dfSnordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
2790f4b3ec61Sdh155122 				    SL_TRACE,
27917c478bd9Sstevel@tonic-gate 				    "reset_mrt_ill: mfctable [%d]", i);
27927c478bd9Sstevel@tonic-gate 			}
27937c478bd9Sstevel@tonic-gate 			while (rt != NULL) {
27947c478bd9Sstevel@tonic-gate 				mutex_enter(&rt->mfc_mutex);
27957c478bd9Sstevel@tonic-gate 				while ((rte = rt->mfc_rte) != NULL) {
2796bd670b35SErik Nordmark 					if (rte->ill == ill &&
2797bd670b35SErik Nordmark 					    (id = rt->mfc_timeout_id) != 0) {
2798bd670b35SErik Nordmark 						/*
2799bd670b35SErik Nordmark 						 * Its ok to drop the lock,  the
2800bd670b35SErik Nordmark 						 * struct cannot be freed since
2801bd670b35SErik Nordmark 						 * we have a ref on the hash
2802bd670b35SErik Nordmark 						 * bucket.
2803bd670b35SErik Nordmark 						 */
2804bd670b35SErik Nordmark 						mutex_exit(&rt->mfc_mutex);
2805bd670b35SErik Nordmark 						(void) untimeout(id);
2806bd670b35SErik Nordmark 						mutex_enter(&rt->mfc_mutex);
2807bd670b35SErik Nordmark 					}
28087c478bd9Sstevel@tonic-gate 					if (rte->ill == ill) {
2809f4b3ec61Sdh155122 						if (ipst->ips_ip_mrtdebug > 1) {
28107c478bd9Sstevel@tonic-gate 						(void) mi_strlog(
2811fc80c0dfSnordmark 						    mrouter->conn_rq,
28127c478bd9Sstevel@tonic-gate 						    1, SL_TRACE,
28137c478bd9Sstevel@tonic-gate 						    "reset_mrt_ill: "
2814903a11ebSrh87107 						    "ill 0x%p", (void *)ill);
28157c478bd9Sstevel@tonic-gate 						}
28167c478bd9Sstevel@tonic-gate 						rt->mfc_rte = rte->rte_next;
28177c478bd9Sstevel@tonic-gate 						freemsg(rte->mp);
28187c478bd9Sstevel@tonic-gate 						mi_free((char *)rte);
28197c478bd9Sstevel@tonic-gate 					}
28207c478bd9Sstevel@tonic-gate 				}
28217c478bd9Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
28227c478bd9Sstevel@tonic-gate 				rt = rt->mfc_next;
28237c478bd9Sstevel@tonic-gate 			}
28247c478bd9Sstevel@tonic-gate 		}
2825f4b3ec61Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
28267c478bd9Sstevel@tonic-gate 	}
28277c478bd9Sstevel@tonic-gate }
28287c478bd9Sstevel@tonic-gate 
28297c478bd9Sstevel@tonic-gate /*
28307c478bd9Sstevel@tonic-gate  * Token bucket filter module.
28317c478bd9Sstevel@tonic-gate  * The ipha is for mcastgrp destination for phyint and encap.
28327c478bd9Sstevel@tonic-gate  */
28337c478bd9Sstevel@tonic-gate static void
tbf_control(struct vif * vifp,mblk_t * mp,ipha_t * ipha)28347c478bd9Sstevel@tonic-gate tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha)
28357c478bd9Sstevel@tonic-gate {
28367c478bd9Sstevel@tonic-gate 	size_t	p_len =  msgdsize(mp);
28377c478bd9Sstevel@tonic-gate 	struct tbf	*t    = vifp->v_tbf;
28387c478bd9Sstevel@tonic-gate 	timeout_id_t id = 0;
2839bd670b35SErik Nordmark 	ill_t		*ill = vifp->v_ipif->ipif_ill;
2840bd670b35SErik Nordmark 	ip_stack_t	*ipst = ill->ill_ipst;
2841fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
28427c478bd9Sstevel@tonic-gate 
28437c478bd9Sstevel@tonic-gate 	/* Drop if packet is too large */
28447c478bd9Sstevel@tonic-gate 	if (p_len > MAX_BKT_SIZE) {
2845f4b3ec61Sdh155122 		ipst->ips_mrtstat->mrts_pkt2large++;
2846bd670b35SErik Nordmark 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2847bd670b35SErik Nordmark 		ip_drop_output("tbf_control - too large", mp, ill);
28487c478bd9Sstevel@tonic-gate 		freemsg(mp);
28497c478bd9Sstevel@tonic-gate 		return;
28507c478bd9Sstevel@tonic-gate 	}
2851f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
2852fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
28537c478bd9Sstevel@tonic-gate 		    "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x",
2854f4b3ec61Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len,
28557c478bd9Sstevel@tonic-gate 		    ntohl(ipha->ipha_dst));
28567c478bd9Sstevel@tonic-gate 	}
28577c478bd9Sstevel@tonic-gate 
28587c478bd9Sstevel@tonic-gate 	mutex_enter(&t->tbf_lock);
28597c478bd9Sstevel@tonic-gate 
28607c478bd9Sstevel@tonic-gate 	tbf_update_tokens(vifp);
28617c478bd9Sstevel@tonic-gate 
28627c478bd9Sstevel@tonic-gate 	/*
28637c478bd9Sstevel@tonic-gate 	 * If there are enough tokens,
28647c478bd9Sstevel@tonic-gate 	 * and the queue is empty, send this packet out.
28657c478bd9Sstevel@tonic-gate 	 */
2866f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
2867fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
28687c478bd9Sstevel@tonic-gate 		    "tbf_control: vif %ld, TOKENS  %d, pkt len  %lu, qlen  %d",
2869f4b3ec61Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len,
28707c478bd9Sstevel@tonic-gate 		    t->tbf_q_len);
28717c478bd9Sstevel@tonic-gate 	}
28727c478bd9Sstevel@tonic-gate 	/* No packets are queued */
28737c478bd9Sstevel@tonic-gate 	if (t->tbf_q_len == 0) {
28747c478bd9Sstevel@tonic-gate 		/* queue empty, send packet if enough tokens */
28757c478bd9Sstevel@tonic-gate 		if (p_len <= t->tbf_n_tok) {
28767c478bd9Sstevel@tonic-gate 			t->tbf_n_tok -= p_len;
28777c478bd9Sstevel@tonic-gate 			mutex_exit(&t->tbf_lock);
28787c478bd9Sstevel@tonic-gate 			tbf_send_packet(vifp, mp);
28797c478bd9Sstevel@tonic-gate 			return;
28807c478bd9Sstevel@tonic-gate 		} else {
28817c478bd9Sstevel@tonic-gate 			/* Queue packet and timeout till later */
28827c478bd9Sstevel@tonic-gate 			tbf_queue(vifp, mp);
28837c478bd9Sstevel@tonic-gate 			ASSERT(vifp->v_timeout_id == 0);
28847c478bd9Sstevel@tonic-gate 			vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
28857c478bd9Sstevel@tonic-gate 			    TBF_REPROCESS);
28867c478bd9Sstevel@tonic-gate 		}
28877c478bd9Sstevel@tonic-gate 	} else if (t->tbf_q_len < t->tbf_max_q_len) {
28887c478bd9Sstevel@tonic-gate 		/* Finite queue length, so queue pkts and process queue */
28897c478bd9Sstevel@tonic-gate 		tbf_queue(vifp, mp);
28907c478bd9Sstevel@tonic-gate 		tbf_process_q(vifp);
28917c478bd9Sstevel@tonic-gate 	} else {
28927c478bd9Sstevel@tonic-gate 		/* Check that we have UDP header with IP header */
28937c478bd9Sstevel@tonic-gate 		size_t hdr_length = IPH_HDR_LENGTH(ipha) +
28947c478bd9Sstevel@tonic-gate 		    sizeof (struct udphdr);
28957c478bd9Sstevel@tonic-gate 
28967c478bd9Sstevel@tonic-gate 		if ((mp->b_wptr - mp->b_rptr) < hdr_length) {
28977c478bd9Sstevel@tonic-gate 			if (!pullupmsg(mp, hdr_length)) {
2898bd670b35SErik Nordmark 				BUMP_MIB(ill->ill_ip_mib,
2899bd670b35SErik Nordmark 				    ipIfStatsOutDiscards);
2900bd670b35SErik Nordmark 				ip_drop_output("tbf_control - pullup", mp, ill);
29017c478bd9Sstevel@tonic-gate 				freemsg(mp);
29027c478bd9Sstevel@tonic-gate 				ip1dbg(("tbf_ctl: couldn't pullup udp hdr, "
29037c478bd9Sstevel@tonic-gate 				    "vif %ld src 0x%x dst 0x%x\n",
2904f4b3ec61Sdh155122 				    (ptrdiff_t)(vifp - ipst->ips_vifs),
29057c478bd9Sstevel@tonic-gate 				    ntohl(ipha->ipha_src),
29067c478bd9Sstevel@tonic-gate 				    ntohl(ipha->ipha_dst)));
29077c478bd9Sstevel@tonic-gate 				mutex_exit(&vifp->v_tbf->tbf_lock);
29087c478bd9Sstevel@tonic-gate 				return;
29097c478bd9Sstevel@tonic-gate 			} else
29107c478bd9Sstevel@tonic-gate 				/* Have to reassign ipha after pullupmsg */
29117c478bd9Sstevel@tonic-gate 				ipha = (ipha_t *)mp->b_rptr;
29127c478bd9Sstevel@tonic-gate 		}
29137c478bd9Sstevel@tonic-gate 		/*
29147c478bd9Sstevel@tonic-gate 		 * Queue length too much,
29157c478bd9Sstevel@tonic-gate 		 * try to selectively dq, or queue and process
29167c478bd9Sstevel@tonic-gate 		 */
29177c478bd9Sstevel@tonic-gate 		if (!tbf_dq_sel(vifp, ipha)) {
2918f4b3ec61Sdh155122 			ipst->ips_mrtstat->mrts_q_overflow++;
2919bd670b35SErik Nordmark 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2920bd670b35SErik Nordmark 			ip_drop_output("mrts_q_overflow", mp, ill);
29217c478bd9Sstevel@tonic-gate 			freemsg(mp);
29227c478bd9Sstevel@tonic-gate 		} else {
29237c478bd9Sstevel@tonic-gate 			tbf_queue(vifp, mp);
29247c478bd9Sstevel@tonic-gate 			tbf_process_q(vifp);
29257c478bd9Sstevel@tonic-gate 		}
29267c478bd9Sstevel@tonic-gate 	}
29277c478bd9Sstevel@tonic-gate 	if (t->tbf_q_len == 0) {
29287c478bd9Sstevel@tonic-gate 		id = vifp->v_timeout_id;
29297c478bd9Sstevel@tonic-gate 		vifp->v_timeout_id = 0;
29307c478bd9Sstevel@tonic-gate 	}
29317c478bd9Sstevel@tonic-gate 	mutex_exit(&vifp->v_tbf->tbf_lock);
29327c478bd9Sstevel@tonic-gate 	if (id != 0)
29337c478bd9Sstevel@tonic-gate 		(void) untimeout(id);
29347c478bd9Sstevel@tonic-gate }
29357c478bd9Sstevel@tonic-gate 
29367c478bd9Sstevel@tonic-gate /*
29377c478bd9Sstevel@tonic-gate  * Adds a packet to the tbf queue at the interface.
29387c478bd9Sstevel@tonic-gate  * The ipha is for mcastgrp destination for phyint and encap.
29397c478bd9Sstevel@tonic-gate  */
29407c478bd9Sstevel@tonic-gate static void
tbf_queue(struct vif * vifp,mblk_t * mp)29417c478bd9Sstevel@tonic-gate tbf_queue(struct vif *vifp, mblk_t *mp)
29427c478bd9Sstevel@tonic-gate {
29437c478bd9Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
2944f4b3ec61Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
2945fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
29467c478bd9Sstevel@tonic-gate 
2947f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
2948fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
2949f4b3ec61Sdh155122 		    "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs));
29507c478bd9Sstevel@tonic-gate 	}
29517c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
29527c478bd9Sstevel@tonic-gate 
29537c478bd9Sstevel@tonic-gate 	if (t->tbf_t == NULL) {
29547c478bd9Sstevel@tonic-gate 		/* Queue was empty */
29557c478bd9Sstevel@tonic-gate 		t->tbf_q = mp;
29567c478bd9Sstevel@tonic-gate 	} else {
29577c478bd9Sstevel@tonic-gate 		/* Insert at tail */
29587c478bd9Sstevel@tonic-gate 		t->tbf_t->b_next = mp;
29597c478bd9Sstevel@tonic-gate 	}
29607c478bd9Sstevel@tonic-gate 	/* set new tail pointer */
29617c478bd9Sstevel@tonic-gate 	t->tbf_t = mp;
29627c478bd9Sstevel@tonic-gate 
29637c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = NULL;
29647c478bd9Sstevel@tonic-gate 
29657c478bd9Sstevel@tonic-gate 	t->tbf_q_len++;
29667c478bd9Sstevel@tonic-gate }
29677c478bd9Sstevel@tonic-gate 
29687c478bd9Sstevel@tonic-gate /*
29697c478bd9Sstevel@tonic-gate  * Process the queue at the vif interface.
29707c478bd9Sstevel@tonic-gate  * Drops the tbf_lock when sending packets.
29717c478bd9Sstevel@tonic-gate  *
29727c478bd9Sstevel@tonic-gate  * NOTE : The caller should quntimeout if the queue length is 0.
29737c478bd9Sstevel@tonic-gate  */
29747c478bd9Sstevel@tonic-gate static void
tbf_process_q(struct vif * vifp)29757c478bd9Sstevel@tonic-gate tbf_process_q(struct vif *vifp)
29767c478bd9Sstevel@tonic-gate {
29777c478bd9Sstevel@tonic-gate 	mblk_t	*mp;
29787c478bd9Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
29797c478bd9Sstevel@tonic-gate 	size_t	len;
2980f4b3ec61Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
2981fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
29827c478bd9Sstevel@tonic-gate 
2983f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
2984fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
29857c478bd9Sstevel@tonic-gate 		    "tbf_process_q 1: vif %ld qlen = %d",
2986f4b3ec61Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len);
29877c478bd9Sstevel@tonic-gate 	}
29887c478bd9Sstevel@tonic-gate 
29897c478bd9Sstevel@tonic-gate 	/*
29907c478bd9Sstevel@tonic-gate 	 * Loop through the queue at the interface and send
29917c478bd9Sstevel@tonic-gate 	 * as many packets as possible.
29927c478bd9Sstevel@tonic-gate 	 */
29937c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
29947c478bd9Sstevel@tonic-gate 
29957c478bd9Sstevel@tonic-gate 	while (t->tbf_q_len > 0) {
29967c478bd9Sstevel@tonic-gate 		mp = t->tbf_q;
29977c478bd9Sstevel@tonic-gate 		len = (size_t)msgdsize(mp); /* length of ip pkt */
29987c478bd9Sstevel@tonic-gate 
29997c478bd9Sstevel@tonic-gate 		/* Determine if the packet can be sent */
30007c478bd9Sstevel@tonic-gate 		if (len <= t->tbf_n_tok) {
30017c478bd9Sstevel@tonic-gate 			/*
30027c478bd9Sstevel@tonic-gate 			 * If so, reduce no. of tokens, dequeue the packet,
30037c478bd9Sstevel@tonic-gate 			 * send the packet.
30047c478bd9Sstevel@tonic-gate 			 */
30057c478bd9Sstevel@tonic-gate 			t->tbf_n_tok -= len;
30067c478bd9Sstevel@tonic-gate 
30077c478bd9Sstevel@tonic-gate 			t->tbf_q = mp->b_next;
30087c478bd9Sstevel@tonic-gate 			if (--t->tbf_q_len == 0) {
30097c478bd9Sstevel@tonic-gate 				t->tbf_t = NULL;
30107c478bd9Sstevel@tonic-gate 			}
30117c478bd9Sstevel@tonic-gate 			mp->b_next = NULL;
30127c478bd9Sstevel@tonic-gate 			/* Exit mutex before sending packet, then re-enter */
30137c478bd9Sstevel@tonic-gate 			mutex_exit(&t->tbf_lock);
30147c478bd9Sstevel@tonic-gate 			tbf_send_packet(vifp, mp);
30157c478bd9Sstevel@tonic-gate 			mutex_enter(&t->tbf_lock);
30167c478bd9Sstevel@tonic-gate 		} else
30177c478bd9Sstevel@tonic-gate 			break;
30187c478bd9Sstevel@tonic-gate 	}
30197c478bd9Sstevel@tonic-gate }
30207c478bd9Sstevel@tonic-gate 
30217c478bd9Sstevel@tonic-gate /* Called at tbf timeout to update tokens, process q and reset timer.  */
30227c478bd9Sstevel@tonic-gate static void
tbf_reprocess_q(void * arg)30237c478bd9Sstevel@tonic-gate tbf_reprocess_q(void *arg)
30247c478bd9Sstevel@tonic-gate {
30257c478bd9Sstevel@tonic-gate 	struct vif *vifp = arg;
3026f4b3ec61Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
3027fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
30287c478bd9Sstevel@tonic-gate 
30297c478bd9Sstevel@tonic-gate 	mutex_enter(&vifp->v_tbf->tbf_lock);
30307c478bd9Sstevel@tonic-gate 	vifp->v_timeout_id = 0;
30317c478bd9Sstevel@tonic-gate 	tbf_update_tokens(vifp);
30327c478bd9Sstevel@tonic-gate 
30337c478bd9Sstevel@tonic-gate 	tbf_process_q(vifp);
30347c478bd9Sstevel@tonic-gate 
30357c478bd9Sstevel@tonic-gate 	if (vifp->v_tbf->tbf_q_len > 0) {
30367c478bd9Sstevel@tonic-gate 		vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
30377c478bd9Sstevel@tonic-gate 		    TBF_REPROCESS);
30387c478bd9Sstevel@tonic-gate 	}
30397c478bd9Sstevel@tonic-gate 	mutex_exit(&vifp->v_tbf->tbf_lock);
30407c478bd9Sstevel@tonic-gate 
3041f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
3042fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
30437c478bd9Sstevel@tonic-gate 		    "tbf_reprcess_q: vif %ld timeout id = %p",
3044f4b3ec61Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id);
30457c478bd9Sstevel@tonic-gate 	}
30467c478bd9Sstevel@tonic-gate }
30477c478bd9Sstevel@tonic-gate 
30487c478bd9Sstevel@tonic-gate /*
30497c478bd9Sstevel@tonic-gate  * Function that will selectively discard a member of the tbf queue,
30507c478bd9Sstevel@tonic-gate  * based on the precedence value and the priority.
30517c478bd9Sstevel@tonic-gate  *
30527c478bd9Sstevel@tonic-gate  * NOTE : The caller should quntimeout if the queue length is 0.
30537c478bd9Sstevel@tonic-gate  */
30547c478bd9Sstevel@tonic-gate static int
tbf_dq_sel(struct vif * vifp,ipha_t * ipha)30557c478bd9Sstevel@tonic-gate tbf_dq_sel(struct vif *vifp, ipha_t *ipha)
30567c478bd9Sstevel@tonic-gate {
30577c478bd9Sstevel@tonic-gate 	uint_t		p;
30587c478bd9Sstevel@tonic-gate 	struct tbf		*t = vifp->v_tbf;
30597c478bd9Sstevel@tonic-gate 	mblk_t		**np;
30607c478bd9Sstevel@tonic-gate 	mblk_t		*last, *mp;
3061bd670b35SErik Nordmark 	ill_t		*ill = vifp->v_ipif->ipif_ill;
3062bd670b35SErik Nordmark 	ip_stack_t	*ipst = ill->ill_ipst;
3063fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
30647c478bd9Sstevel@tonic-gate 
3065f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
3066fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
30677c478bd9Sstevel@tonic-gate 		    "dq_sel: vif %ld dst 0x%x",
3068f4b3ec61Sdh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst));
30697c478bd9Sstevel@tonic-gate 	}
30707c478bd9Sstevel@tonic-gate 
30717c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
30727c478bd9Sstevel@tonic-gate 	p = priority(vifp, ipha);
30737c478bd9Sstevel@tonic-gate 
30747c478bd9Sstevel@tonic-gate 	np = &t->tbf_q;
30757c478bd9Sstevel@tonic-gate 	last = NULL;
30767c478bd9Sstevel@tonic-gate 	while ((mp = *np) != NULL) {
30777c478bd9Sstevel@tonic-gate 		if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) {
30787c478bd9Sstevel@tonic-gate 			*np = mp->b_next;
30797c478bd9Sstevel@tonic-gate 			/* If removing the last packet, fix the tail pointer */
30807c478bd9Sstevel@tonic-gate 			if (mp == t->tbf_t)
30817c478bd9Sstevel@tonic-gate 				t->tbf_t = last;
30827c478bd9Sstevel@tonic-gate 			mp->b_prev = mp->b_next = NULL;
3083bd670b35SErik Nordmark 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3084bd670b35SErik Nordmark 			ip_drop_output("tbf_dq_send", mp, ill);
30857c478bd9Sstevel@tonic-gate 			freemsg(mp);
30867c478bd9Sstevel@tonic-gate 			/*
30877c478bd9Sstevel@tonic-gate 			 * It's impossible for the queue to be empty, but
30887c478bd9Sstevel@tonic-gate 			 * we check anyway.
30897c478bd9Sstevel@tonic-gate 			 */
30907c478bd9Sstevel@tonic-gate 			if (--t->tbf_q_len == 0) {
30917c478bd9Sstevel@tonic-gate 				t->tbf_t = NULL;
30927c478bd9Sstevel@tonic-gate 			}
3093f4b3ec61Sdh155122 			ipst->ips_mrtstat->mrts_drop_sel++;
30947c478bd9Sstevel@tonic-gate 			return (1);
30957c478bd9Sstevel@tonic-gate 		}
30967c478bd9Sstevel@tonic-gate 		np = &mp->b_next;
30977c478bd9Sstevel@tonic-gate 		last = mp;
30987c478bd9Sstevel@tonic-gate 	}
30997c478bd9Sstevel@tonic-gate 	return (0);
31007c478bd9Sstevel@tonic-gate }
31017c478bd9Sstevel@tonic-gate 
31027c478bd9Sstevel@tonic-gate /* Sends packet, 2 cases - encap tunnel, phyint.  */
31037c478bd9Sstevel@tonic-gate static void
tbf_send_packet(struct vif * vifp,mblk_t * mp)31047c478bd9Sstevel@tonic-gate tbf_send_packet(struct vif *vifp, mblk_t *mp)
31057c478bd9Sstevel@tonic-gate {
3106bd670b35SErik Nordmark 	ipif_t		*ipif = vifp->v_ipif;
3107bd670b35SErik Nordmark 	ill_t		*ill = ipif->ipif_ill;
3108bd670b35SErik Nordmark 	ip_stack_t	*ipst = ill->ill_ipst;
3109fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
3110bd670b35SErik Nordmark 	ipha_t		*ipha;
31117c478bd9Sstevel@tonic-gate 
3112bd670b35SErik Nordmark 	ipha = (ipha_t *)mp->b_rptr;
31137c478bd9Sstevel@tonic-gate 	/* If encap tunnel options */
31147c478bd9Sstevel@tonic-gate 	if (vifp->v_flags & VIFF_TUNNEL)  {
3115bd670b35SErik Nordmark 		ip_xmit_attr_t	ixas;
3116bd670b35SErik Nordmark 
3117f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
3118fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
3119bd670b35SErik Nordmark 			    "tbf_send_packet: ENCAP tunnel vif %ld",
3120f4b3ec61Sdh155122 			    (ptrdiff_t)(vifp - ipst->ips_vifs));
31217c478bd9Sstevel@tonic-gate 		}
3122bd670b35SErik Nordmark 		bzero(&ixas, sizeof (ixas));
312344b099c4SSowmini Varadhan 		ixas.ixa_flags =
312444b099c4SSowmini Varadhan 		    IXAF_IS_IPV4 | IXAF_NO_TTL_CHANGE | IXAF_VERIFY_SOURCE;
3125bd670b35SErik Nordmark 		ixas.ixa_ipst = ipst;
3126bd670b35SErik Nordmark 		ixas.ixa_ifindex = 0;
3127bd670b35SErik Nordmark 		ixas.ixa_cred = kcred;
3128bd670b35SErik Nordmark 		ixas.ixa_cpid = NOPID;
3129bd670b35SErik Nordmark 		ixas.ixa_tsl = NULL;
3130bd670b35SErik Nordmark 		ixas.ixa_zoneid = GLOBAL_ZONEID; /* Multicast router in GZ */
3131bd670b35SErik Nordmark 		ixas.ixa_pktlen = ntohs(ipha->ipha_length);
3132bd670b35SErik Nordmark 		ixas.ixa_ip_hdr_length = IPH_HDR_LENGTH(ipha);
31337c478bd9Sstevel@tonic-gate 
31347c478bd9Sstevel@tonic-gate 		/*
3135bd670b35SErik Nordmark 		 * Feed into ip_output_simple which will set the ident field
3136bd670b35SErik Nordmark 		 * and checksum the encapsulating header.
31377c478bd9Sstevel@tonic-gate 		 * BSD gets the cached route vifp->v_route from ip_output()
31387c478bd9Sstevel@tonic-gate 		 * to speed up route table lookups. Not necessary in SunOS 5.x.
3139bd670b35SErik Nordmark 		 * One could make multicast forwarding faster by putting an
3140bd670b35SErik Nordmark 		 * ip_xmit_attr_t in each vif thereby caching the ire/nce.
31417c478bd9Sstevel@tonic-gate 		 */
3142bd670b35SErik Nordmark 		(void) ip_output_simple(mp, &ixas);
3143bd670b35SErik Nordmark 		ixa_cleanup(&ixas);
31447c478bd9Sstevel@tonic-gate 		return;
31457c478bd9Sstevel@tonic-gate 
31467c478bd9Sstevel@tonic-gate 		/* phyint */
31477c478bd9Sstevel@tonic-gate 	} else {
31487c478bd9Sstevel@tonic-gate 		/* Need to loop back to members on the outgoing interface. */
31497c478bd9Sstevel@tonic-gate 		ipaddr_t	dst;
3150bd670b35SErik Nordmark 		ip_recv_attr_t	iras;
3151bd670b35SErik Nordmark 		nce_t		*nce;
3152bd670b35SErik Nordmark 
3153bd670b35SErik Nordmark 		bzero(&iras, sizeof (iras));
3154bd670b35SErik Nordmark 		iras.ira_flags = IRAF_IS_IPV4;
3155bd670b35SErik Nordmark 		iras.ira_ill = iras.ira_rill = ill;
3156bd670b35SErik Nordmark 		iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
3157bd670b35SErik Nordmark 		iras.ira_zoneid = GLOBAL_ZONEID; /* Multicast router in GZ */
3158bd670b35SErik Nordmark 		iras.ira_pktlen = ntohs(ipha->ipha_length);
3159bd670b35SErik Nordmark 		iras.ira_ip_hdr_length = IPH_HDR_LENGTH(ipha);
3160bd670b35SErik Nordmark 
31617c478bd9Sstevel@tonic-gate 		dst = ipha->ipha_dst;
3162bd670b35SErik Nordmark 		if (ill_hasmembers_v4(ill, dst)) {
3163bd670b35SErik Nordmark 			iras.ira_flags |= IRAF_LOOPBACK_COPY;
31647c478bd9Sstevel@tonic-gate 		}
3165f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
3166fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
31677c478bd9Sstevel@tonic-gate 			    "tbf_send_pkt: phyint forward  vif %ld dst = 0x%x",
3168f4b3ec61Sdh155122 			    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst));
31697c478bd9Sstevel@tonic-gate 		}
3170bd670b35SErik Nordmark 		/*
3171bd670b35SErik Nordmark 		 * Find an NCE which matches the nexthop.
3172bd670b35SErik Nordmark 		 * For a pt-pt interface we use the other end of the pt-pt
3173bd670b35SErik Nordmark 		 * link.
3174bd670b35SErik Nordmark 		 */
3175bd670b35SErik Nordmark 		if (ipif->ipif_flags & IPIF_POINTOPOINT) {
3176bd670b35SErik Nordmark 			dst = ipif->ipif_pp_dst_addr;
3177bd670b35SErik Nordmark 			nce = arp_nce_init(ill, dst, ill->ill_net_type);
3178bd670b35SErik Nordmark 		} else {
3179bd670b35SErik Nordmark 			nce = arp_nce_init(ill, dst, IRE_MULTICAST);
3180bd670b35SErik Nordmark 		}
3181bd670b35SErik Nordmark 		if (nce == NULL) {
3182bd670b35SErik Nordmark 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3183bd670b35SErik Nordmark 			ip_drop_output("tbf_send_packet - no nce", mp, ill);
3184bd670b35SErik Nordmark 			freemsg(mp);
3185bd670b35SErik Nordmark 			return;
3186bd670b35SErik Nordmark 		}
3187bd670b35SErik Nordmark 
3188bd670b35SErik Nordmark 		/*
3189bd670b35SErik Nordmark 		 * We don't remeber the incoming ill. Thus we
3190bd670b35SErik Nordmark 		 * pretend the  packet arrived on the outbound ill. This means
3191bd670b35SErik Nordmark 		 * statistics for input errors will be increased on the wrong
3192bd670b35SErik Nordmark 		 * ill but that isn't a big deal.
3193bd670b35SErik Nordmark 		 */
31941eee170aSErik Nordmark 		ip_forward_xmit_v4(nce, ill, mp, ipha, &iras, ill->ill_mc_mtu,
31951eee170aSErik Nordmark 		    0);
3196bd670b35SErik Nordmark 		ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
3197bd670b35SErik Nordmark 
3198bd670b35SErik Nordmark 		nce_refrele(nce);
31997c478bd9Sstevel@tonic-gate 	}
32007c478bd9Sstevel@tonic-gate }
32017c478bd9Sstevel@tonic-gate 
32027c478bd9Sstevel@tonic-gate /*
32037c478bd9Sstevel@tonic-gate  * Determine the current time and then the elapsed time (between the last time
32047c478bd9Sstevel@tonic-gate  * and time now).  Update the no. of tokens in the bucket.
32057c478bd9Sstevel@tonic-gate  */
32067c478bd9Sstevel@tonic-gate static void
tbf_update_tokens(struct vif * vifp)32077c478bd9Sstevel@tonic-gate tbf_update_tokens(struct vif *vifp)
32087c478bd9Sstevel@tonic-gate {
32097c478bd9Sstevel@tonic-gate 	timespec_t	tp;
32107c478bd9Sstevel@tonic-gate 	hrtime_t	tm;
32117c478bd9Sstevel@tonic-gate 	struct tbf	*t = vifp->v_tbf;
3212f4b3ec61Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
3213fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
32147c478bd9Sstevel@tonic-gate 
32157c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&t->tbf_lock));
32167c478bd9Sstevel@tonic-gate 
32177c478bd9Sstevel@tonic-gate 	/* Time in secs and nsecs, rate limit in kbits/sec */
32187c478bd9Sstevel@tonic-gate 	gethrestime(&tp);
32197c478bd9Sstevel@tonic-gate 
32207c478bd9Sstevel@tonic-gate 	/*LINTED*/
32217c478bd9Sstevel@tonic-gate 	TV_DELTA(tp, t->tbf_last_pkt_t, tm);
32227c478bd9Sstevel@tonic-gate 
32237c478bd9Sstevel@tonic-gate 	/*
32247c478bd9Sstevel@tonic-gate 	 * This formula is actually
32257c478bd9Sstevel@tonic-gate 	 * "time in seconds" * "bytes/second".  Scaled for nsec.
32267c478bd9Sstevel@tonic-gate 	 * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8)
32277c478bd9Sstevel@tonic-gate 	 *
32287c478bd9Sstevel@tonic-gate 	 * The (1000/1024) was introduced in add_vif to optimize
32297c478bd9Sstevel@tonic-gate 	 * this divide into a shift.
32307c478bd9Sstevel@tonic-gate 	 */
32317c478bd9Sstevel@tonic-gate 	t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8;
32327c478bd9Sstevel@tonic-gate 	t->tbf_last_pkt_t = tp;
32337c478bd9Sstevel@tonic-gate 
32347c478bd9Sstevel@tonic-gate 	if (t->tbf_n_tok > MAX_BKT_SIZE)
32357c478bd9Sstevel@tonic-gate 		t->tbf_n_tok = MAX_BKT_SIZE;
3236f4b3ec61Sdh155122 	if (ipst->ips_ip_mrtdebug > 1) {
3237fc80c0dfSnordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
32387c478bd9Sstevel@tonic-gate 		    "tbf_update_tok: tm %lld tok %d vif %ld",
3239f4b3ec61Sdh155122 		    tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs));
32407c478bd9Sstevel@tonic-gate 	}
32417c478bd9Sstevel@tonic-gate }
32427c478bd9Sstevel@tonic-gate 
32437c478bd9Sstevel@tonic-gate /*
32447c478bd9Sstevel@tonic-gate  * Priority currently is based on port nos.
32457c478bd9Sstevel@tonic-gate  * Different forwarding mechanisms have different ways
32467c478bd9Sstevel@tonic-gate  * of obtaining the port no. Hence, the vif must be
32477c478bd9Sstevel@tonic-gate  * given along with the packet itself.
32487c478bd9Sstevel@tonic-gate  *
32497c478bd9Sstevel@tonic-gate  */
32507c478bd9Sstevel@tonic-gate static int
priority(struct vif * vifp,ipha_t * ipha)32517c478bd9Sstevel@tonic-gate priority(struct vif *vifp, ipha_t *ipha)
32527c478bd9Sstevel@tonic-gate {
32537c478bd9Sstevel@tonic-gate 	int prio;
3254f4b3ec61Sdh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
3255fc80c0dfSnordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
32567c478bd9Sstevel@tonic-gate 
32577c478bd9Sstevel@tonic-gate 	/* Temporary hack; may add general packet classifier some day */
32587c478bd9Sstevel@tonic-gate 
32597c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock));
32607c478bd9Sstevel@tonic-gate 
32617c478bd9Sstevel@tonic-gate 	/*
32627c478bd9Sstevel@tonic-gate 	 * The UDP port space is divided up into four priority ranges:
32637c478bd9Sstevel@tonic-gate 	 * [0, 16384)	: unclassified - lowest priority
32647c478bd9Sstevel@tonic-gate 	 * [16384, 32768)	: audio - highest priority
32657c478bd9Sstevel@tonic-gate 	 * [32768, 49152)	: whiteboard - medium priority
32667c478bd9Sstevel@tonic-gate 	 * [49152, 65536)	: video - low priority
32677c478bd9Sstevel@tonic-gate 	 */
32687c478bd9Sstevel@tonic-gate 
32697c478bd9Sstevel@tonic-gate 	if (ipha->ipha_protocol == IPPROTO_UDP) {
32707c478bd9Sstevel@tonic-gate 		struct udphdr *udp =
32717c478bd9Sstevel@tonic-gate 		    (struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha));
32727c478bd9Sstevel@tonic-gate 		switch (ntohs(udp->uh_dport) & 0xc000) {
32737c478bd9Sstevel@tonic-gate 		case 0x4000:
32747c478bd9Sstevel@tonic-gate 			prio = 70;
32757c478bd9Sstevel@tonic-gate 			break;
32767c478bd9Sstevel@tonic-gate 		case 0x8000:
32777c478bd9Sstevel@tonic-gate 			prio = 60;
32787c478bd9Sstevel@tonic-gate 			break;
32797c478bd9Sstevel@tonic-gate 		case 0xc000:
32807c478bd9Sstevel@tonic-gate 			prio = 55;
32817c478bd9Sstevel@tonic-gate 			break;
32827c478bd9Sstevel@tonic-gate 		default:
32837c478bd9Sstevel@tonic-gate 			prio = 50;
32847c478bd9Sstevel@tonic-gate 			break;
32857c478bd9Sstevel@tonic-gate 		}
3286f4b3ec61Sdh155122 		if (ipst->ips_ip_mrtdebug > 1) {
3287fc80c0dfSnordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
32887c478bd9Sstevel@tonic-gate 			    "priority: port %x prio %d\n",
32897c478bd9Sstevel@tonic-gate 			    ntohs(udp->uh_dport), prio);
32907c478bd9Sstevel@tonic-gate 		}
32917c478bd9Sstevel@tonic-gate 	} else
32927c478bd9Sstevel@tonic-gate 		prio = 50;  /* default priority */
32937c478bd9Sstevel@tonic-gate 	return (prio);
32947c478bd9Sstevel@tonic-gate }
32957c478bd9Sstevel@tonic-gate 
32967c478bd9Sstevel@tonic-gate /*
32977c478bd9Sstevel@tonic-gate  * End of token bucket filter modifications
32987c478bd9Sstevel@tonic-gate  */
32997c478bd9Sstevel@tonic-gate 
33007c478bd9Sstevel@tonic-gate 
33017c478bd9Sstevel@tonic-gate 
33027c478bd9Sstevel@tonic-gate /*
33037c478bd9Sstevel@tonic-gate  * Produces data for netstat -M.
33047c478bd9Sstevel@tonic-gate  */
33057c478bd9Sstevel@tonic-gate int
ip_mroute_stats(mblk_t * mp,ip_stack_t * ipst)3306f4b3ec61Sdh155122 ip_mroute_stats(mblk_t *mp, ip_stack_t *ipst)
33077c478bd9Sstevel@tonic-gate {
3308f4b3ec61Sdh155122 	ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
3309f4b3ec61Sdh155122 	ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
3310f4b3ec61Sdh155122 	if (!snmp_append_data(mp, (char *)ipst->ips_mrtstat,
3311f4b3ec61Sdh155122 		sizeof (struct mrtstat))) {
33127c478bd9Sstevel@tonic-gate 		ip0dbg(("ip_mroute_stats: failed %ld bytes\n",
3313f4b3ec61Sdh155122 		    (size_t)sizeof (struct mrtstat)));
33147c478bd9Sstevel@tonic-gate 		return (0);
33157c478bd9Sstevel@tonic-gate 	}
33167c478bd9Sstevel@tonic-gate 	return (1);
33177c478bd9Sstevel@tonic-gate }
33187c478bd9Sstevel@tonic-gate 
33197c478bd9Sstevel@tonic-gate /*
33207c478bd9Sstevel@tonic-gate  * Sends info for SNMP's MIB.
33217c478bd9Sstevel@tonic-gate  */
33227c478bd9Sstevel@tonic-gate int
ip_mroute_vif(mblk_t * mp,ip_stack_t * ipst)3323f4b3ec61Sdh155122 ip_mroute_vif(mblk_t *mp, ip_stack_t *ipst)
33247c478bd9Sstevel@tonic-gate {
33257c478bd9Sstevel@tonic-gate 	struct vifctl	vi;
33267c478bd9Sstevel@tonic-gate 	vifi_t		vifi;
33277c478bd9Sstevel@tonic-gate 
3328f4b3ec61Sdh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
3329f4b3ec61Sdh155122 	for (vifi = 0; vifi < ipst->ips_numvifs; vifi++) {
3330f4b3ec61Sdh155122 		if (ipst->ips_vifs[vifi].v_lcl_addr.s_addr == 0)
33317c478bd9Sstevel@tonic-gate 			continue;
33327c478bd9Sstevel@tonic-gate 		/*
33337c478bd9Sstevel@tonic-gate 		 * No locks here, an approximation is fine.
33347c478bd9Sstevel@tonic-gate 		 */
33357c478bd9Sstevel@tonic-gate 		vi.vifc_vifi = vifi;
3336f4b3ec61Sdh155122 		vi.vifc_flags = ipst->ips_vifs[vifi].v_flags;
3337f4b3ec61Sdh155122 		vi.vifc_threshold = ipst->ips_vifs[vifi].v_threshold;
3338f4b3ec61Sdh155122 		vi.vifc_rate_limit	= ipst->ips_vifs[vifi].v_rate_limit;
3339f4b3ec61Sdh155122 		vi.vifc_lcl_addr	= ipst->ips_vifs[vifi].v_lcl_addr;
3340f4b3ec61Sdh155122 		vi.vifc_rmt_addr	= ipst->ips_vifs[vifi].v_rmt_addr;
3341f4b3ec61Sdh155122 		vi.vifc_pkt_in		= ipst->ips_vifs[vifi].v_pkt_in;
3342f4b3ec61Sdh155122 		vi.vifc_pkt_out		= ipst->ips_vifs[vifi].v_pkt_out;
33437c478bd9Sstevel@tonic-gate 
33447c478bd9Sstevel@tonic-gate 		if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) {
33457c478bd9Sstevel@tonic-gate 			ip0dbg(("ip_mroute_vif: failed %ld bytes\n",
33467c478bd9Sstevel@tonic-gate 			    (size_t)sizeof (vi)));
3347ee07f6e7SSowmini Varadhan 			mutex_exit(&ipst->ips_numvifs_mutex);
33487c478bd9Sstevel@tonic-gate 			return (0);
33497c478bd9Sstevel@tonic-gate 		}
33507c478bd9Sstevel@tonic-gate 	}
3351f4b3ec61Sdh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
33527c478bd9Sstevel@tonic-gate 	return (1);
33537c478bd9Sstevel@tonic-gate }
33547c478bd9Sstevel@tonic-gate 
33557c478bd9Sstevel@tonic-gate /*
33567c478bd9Sstevel@tonic-gate  * Called by ip_snmp_get to send up multicast routing table.
33577c478bd9Sstevel@tonic-gate  */
33587c478bd9Sstevel@tonic-gate int
ip_mroute_mrt(mblk_t * mp,ip_stack_t * ipst)3359f4b3ec61Sdh155122 ip_mroute_mrt(mblk_t *mp, ip_stack_t *ipst)
33607c478bd9Sstevel@tonic-gate {
33617c478bd9Sstevel@tonic-gate 	int			i, j;
33627c478bd9Sstevel@tonic-gate 	struct mfc		*rt;
33637c478bd9Sstevel@tonic-gate 	struct mfcctl	mfcc;
33647c478bd9Sstevel@tonic-gate 
33657c478bd9Sstevel@tonic-gate 	/*
33667c478bd9Sstevel@tonic-gate 	 * Make sure multicast has not been turned off.
33677c478bd9Sstevel@tonic-gate 	 */
3368f4b3ec61Sdh155122 	if (is_mrouter_off(ipst))
33697c478bd9Sstevel@tonic-gate 		return (1);
33707c478bd9Sstevel@tonic-gate 
33717c478bd9Sstevel@tonic-gate 	/* Loop over all hash buckets and their chains */
33727c478bd9Sstevel@tonic-gate 	for (i = 0; i < MFCTBLSIZ; i++) {
3373f4b3ec61Sdh155122 		MFCB_REFHOLD(&ipst->ips_mfcs[i]);
3374f4b3ec61Sdh155122 		for (rt = ipst->ips_mfcs[i].mfcb_mfc; rt; rt = rt->mfc_next) {
33757c478bd9Sstevel@tonic-gate 			mutex_enter(&rt->mfc_mutex);
33767c478bd9Sstevel@tonic-gate 			if (rt->mfc_rte != NULL ||
33777c478bd9Sstevel@tonic-gate 			    (rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
33787c478bd9Sstevel@tonic-gate 				mutex_exit(&rt->mfc_mutex);
33797c478bd9Sstevel@tonic-gate 				continue;
33807c478bd9Sstevel@tonic-gate 			}
33817c478bd9Sstevel@tonic-gate 			mfcc.mfcc_origin = rt->mfc_origin;
33827c478bd9Sstevel@tonic-gate 			mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp;
33837c478bd9Sstevel@tonic-gate 			mfcc.mfcc_parent = rt->mfc_parent;
33847c478bd9Sstevel@tonic-gate 			mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt;
3385f4b3ec61Sdh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
3386f4b3ec61Sdh155122 			for (j = 0; j < (int)ipst->ips_numvifs; j++)
33877c478bd9Sstevel@tonic-gate 				mfcc.mfcc_ttls[j] = rt->mfc_ttls[j];
3388f4b3ec61Sdh155122 			for (j = (int)ipst->ips_numvifs; j < MAXVIFS; j++)
33897c478bd9Sstevel@tonic-gate 				mfcc.mfcc_ttls[j] = 0;
3390f4b3ec61Sdh155122 			mutex_exit(&ipst->ips_numvifs_mutex);
33917c478bd9Sstevel@tonic-gate 
33927c478bd9Sstevel@tonic-gate 			mutex_exit(&rt->mfc_mutex);
33937c478bd9Sstevel@tonic-gate 			if (!snmp_append_data(mp, (char *)&mfcc,
33947c478bd9Sstevel@tonic-gate 			    sizeof (mfcc))) {
3395f4b3ec61Sdh155122 				MFCB_REFRELE(&ipst->ips_mfcs[i]);
33967c478bd9Sstevel@tonic-gate 				ip0dbg(("ip_mroute_mrt: failed %ld bytes\n",
33977c478bd9Sstevel@tonic-gate 				    (size_t)sizeof (mfcc)));
33987c478bd9Sstevel@tonic-gate 				return (0);
33997c478bd9Sstevel@tonic-gate 			}
34007c478bd9Sstevel@tonic-gate 		}
3401f4b3ec61Sdh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
34027c478bd9Sstevel@tonic-gate 	}
34037c478bd9Sstevel@tonic-gate 	return (1);
34047c478bd9Sstevel@tonic-gate }
3405