10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51676Sjpk * Common Development and Distribution License (the "License").
61676Sjpk * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*13123SErik.Nordmark@Sun.COM * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate /* Copyright (c) 1990 Mentat Inc. */
250Sstevel@tonic-gate
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate * Procedures for the kernel part of DVMRP,
280Sstevel@tonic-gate * a Distance-Vector Multicast Routing Protocol.
290Sstevel@tonic-gate * (See RFC-1075)
300Sstevel@tonic-gate * Written by David Waitzman, BBN Labs, August 1988.
310Sstevel@tonic-gate * Modified by Steve Deering, Stanford, February 1989.
320Sstevel@tonic-gate * Modified by Mark J. Steiglitz, Stanford, May, 1991
330Sstevel@tonic-gate * Modified by Van Jacobson, LBL, January 1993
340Sstevel@tonic-gate * Modified by Ajit Thyagarajan, PARC, August 1993
350Sstevel@tonic-gate * Modified by Bill Fenner, PARC, April 1995
360Sstevel@tonic-gate *
370Sstevel@tonic-gate * MROUTING 3.5
380Sstevel@tonic-gate */
390Sstevel@tonic-gate
400Sstevel@tonic-gate /*
410Sstevel@tonic-gate * TODO
420Sstevel@tonic-gate * - function pointer field in vif, void *vif_sendit()
430Sstevel@tonic-gate */
440Sstevel@tonic-gate
450Sstevel@tonic-gate #include <sys/types.h>
460Sstevel@tonic-gate #include <sys/stream.h>
470Sstevel@tonic-gate #include <sys/stropts.h>
480Sstevel@tonic-gate #include <sys/strlog.h>
490Sstevel@tonic-gate #include <sys/systm.h>
500Sstevel@tonic-gate #include <sys/ddi.h>
510Sstevel@tonic-gate #include <sys/cmn_err.h>
520Sstevel@tonic-gate #include <sys/zone.h>
530Sstevel@tonic-gate
540Sstevel@tonic-gate #include <sys/param.h>
550Sstevel@tonic-gate #include <sys/socket.h>
560Sstevel@tonic-gate #include <sys/vtrace.h>
570Sstevel@tonic-gate #include <sys/debug.h>
580Sstevel@tonic-gate #include <net/if.h>
590Sstevel@tonic-gate #include <sys/sockio.h>
600Sstevel@tonic-gate #include <netinet/in.h>
610Sstevel@tonic-gate #include <net/if_dl.h>
620Sstevel@tonic-gate
6311042SErik.Nordmark@Sun.COM #include <inet/ipsec_impl.h>
640Sstevel@tonic-gate #include <inet/common.h>
650Sstevel@tonic-gate #include <inet/mi.h>
660Sstevel@tonic-gate #include <inet/nd.h>
6712016SGirish.Moodalbail@Sun.COM #include <inet/tunables.h>
680Sstevel@tonic-gate #include <inet/mib2.h>
690Sstevel@tonic-gate #include <netinet/ip6.h>
700Sstevel@tonic-gate #include <inet/ip.h>
710Sstevel@tonic-gate #include <inet/snmpcom.h>
720Sstevel@tonic-gate
730Sstevel@tonic-gate #include <netinet/igmp.h>
740Sstevel@tonic-gate #include <netinet/igmp_var.h>
750Sstevel@tonic-gate #include <netinet/udp.h>
760Sstevel@tonic-gate #include <netinet/ip_mroute.h>
770Sstevel@tonic-gate #include <inet/ip_multi.h>
780Sstevel@tonic-gate #include <inet/ip_ire.h>
7911042SErik.Nordmark@Sun.COM #include <inet/ip_ndp.h>
800Sstevel@tonic-gate #include <inet/ip_if.h>
810Sstevel@tonic-gate #include <inet/ipclassifier.h>
820Sstevel@tonic-gate
830Sstevel@tonic-gate #include <netinet/pim.h>
840Sstevel@tonic-gate
850Sstevel@tonic-gate
860Sstevel@tonic-gate /*
870Sstevel@tonic-gate * MT Design:
880Sstevel@tonic-gate *
890Sstevel@tonic-gate * There are three main data structures viftable, mfctable and tbftable that
900Sstevel@tonic-gate * need to be protected against MT races.
910Sstevel@tonic-gate *
920Sstevel@tonic-gate * vitable is a fixed length array of vif structs. There is no lock to protect
930Sstevel@tonic-gate * the whole array, instead each struct is protected by its own indiviual lock.
940Sstevel@tonic-gate * The value of v_marks in conjuction with the value of v_refcnt determines the
950Sstevel@tonic-gate * current state of a vif structure. One special state that needs mention
960Sstevel@tonic-gate * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates
970Sstevel@tonic-gate * that vif is being initalized.
980Sstevel@tonic-gate * Each structure is freed when the refcnt goes down to zero. If a delete comes
9911042SErik.Nordmark@Sun.COM * in when the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED
1000Sstevel@tonic-gate * which prevents the struct from further use. When the refcnt goes to zero
1010Sstevel@tonic-gate * the struct is freed and is marked VIF_MARK_NOTINUSE.
1020Sstevel@tonic-gate * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill
1030Sstevel@tonic-gate * from going away a refhold is put on the ipif before using it. see
1040Sstevel@tonic-gate * lock_good_vif() and unlock_good_vif().
1050Sstevel@tonic-gate *
1060Sstevel@tonic-gate * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts
1070Sstevel@tonic-gate * of the vif struct.
1080Sstevel@tonic-gate *
1090Sstevel@tonic-gate * tbftable is also a fixed length array of tbf structs and is only accessed
1100Sstevel@tonic-gate * via v_tbf. It is protected by its own lock tbf_lock.
1110Sstevel@tonic-gate *
1120Sstevel@tonic-gate * Lock Ordering is
1130Sstevel@tonic-gate * v_lock --> tbf_lock
1140Sstevel@tonic-gate * v_lock --> ill_locK
1150Sstevel@tonic-gate *
1160Sstevel@tonic-gate * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb).
1170Sstevel@tonic-gate * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker,
1180Sstevel@tonic-gate * it also maintains a state. These fields are protected by a lock (mfcb_lock).
1190Sstevel@tonic-gate * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to
1200Sstevel@tonic-gate * protect the struct elements.
1210Sstevel@tonic-gate *
1220Sstevel@tonic-gate * mfc structs are dynamically allocated and are singly linked
1230Sstevel@tonic-gate * at the head of the chain. When an mfc structure is to be deleted
1240Sstevel@tonic-gate * it is marked condemned and so is the state in the bucket struct.
1250Sstevel@tonic-gate * When the last walker of the hash bucket exits all the mfc structs
1260Sstevel@tonic-gate * marked condemed are freed.
1270Sstevel@tonic-gate *
1280Sstevel@tonic-gate * Locking Hierarchy:
1290Sstevel@tonic-gate * The bucket lock should be acquired before the mfc struct lock.
1300Sstevel@tonic-gate * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking
1310Sstevel@tonic-gate * operations on the bucket struct.
1320Sstevel@tonic-gate *
1330Sstevel@tonic-gate * last_encap_lock and numvifs_mutex should be acquired after
1340Sstevel@tonic-gate * acquring vif or mfc locks. These locks protect some global variables.
1350Sstevel@tonic-gate *
1360Sstevel@tonic-gate * The statistics are not currently protected by a lock
1370Sstevel@tonic-gate * causing the stats be be approximate, not exact.
1380Sstevel@tonic-gate */
1390Sstevel@tonic-gate
1400Sstevel@tonic-gate #define NO_VIF MAXVIFS /* from mrouted, no route for src */
1410Sstevel@tonic-gate
1420Sstevel@tonic-gate /*
1430Sstevel@tonic-gate * Timeouts:
1440Sstevel@tonic-gate * Upcall timeouts - BSD uses boolean_t mfc->expire and
1450Sstevel@tonic-gate * nexpire[MFCTBLSIZE], the number of times expire has been called.
1460Sstevel@tonic-gate * SunOS 5.x uses mfc->timeout for each mfc.
1470Sstevel@tonic-gate * Some Unixes are limited in the number of simultaneous timeouts
1480Sstevel@tonic-gate * that can be run, SunOS 5.x does not have this restriction.
1490Sstevel@tonic-gate */
1500Sstevel@tonic-gate
1510Sstevel@tonic-gate /*
1520Sstevel@tonic-gate * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and
1530Sstevel@tonic-gate * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall
1540Sstevel@tonic-gate * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE
1550Sstevel@tonic-gate */
1560Sstevel@tonic-gate #define EXPIRE_TIMEOUT (hz/4) /* 4x / second */
1570Sstevel@tonic-gate #define UPCALL_EXPIRE 6 /* number of timeouts */
1580Sstevel@tonic-gate
1590Sstevel@tonic-gate /*
1600Sstevel@tonic-gate * Hash function for a source, group entry
1610Sstevel@tonic-gate */
1620Sstevel@tonic-gate #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
1630Sstevel@tonic-gate ((g) >> 20) ^ ((g) >> 10) ^ (g))
1640Sstevel@tonic-gate
1650Sstevel@tonic-gate #define TBF_REPROCESS (hz / 100) /* 100x /second */
1660Sstevel@tonic-gate
1670Sstevel@tonic-gate /* Identify PIM packet that came on a Register interface */
1680Sstevel@tonic-gate #define PIM_REGISTER_MARKER 0xffffffff
1690Sstevel@tonic-gate
1700Sstevel@tonic-gate /* Function declarations */
1713448Sdh155122 static int add_mfc(struct mfcctl *, ip_stack_t *);
17211042SErik.Nordmark@Sun.COM static int add_vif(struct vifctl *, conn_t *, ip_stack_t *);
1733448Sdh155122 static int del_mfc(struct mfcctl *, ip_stack_t *);
17411042SErik.Nordmark@Sun.COM static int del_vif(vifi_t *, ip_stack_t *);
1750Sstevel@tonic-gate static void del_vifp(struct vif *);
1760Sstevel@tonic-gate static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
1770Sstevel@tonic-gate static void expire_upcalls(void *);
1783448Sdh155122 static void fill_route(struct mfc *, struct mfcctl *, ip_stack_t *);
1793448Sdh155122 static void free_queue(struct mfc *);
1803448Sdh155122 static int get_assert(uchar_t *, ip_stack_t *);
1813448Sdh155122 static int get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *);
1823448Sdh155122 static int get_sg_cnt(struct sioc_sg_req *, ip_stack_t *);
1830Sstevel@tonic-gate static int get_version(uchar_t *);
1843448Sdh155122 static int get_vif_cnt(struct sioc_vif_req *, ip_stack_t *);
1850Sstevel@tonic-gate static int ip_mdq(mblk_t *, ipha_t *, ill_t *,
1860Sstevel@tonic-gate ipaddr_t, struct mfc *);
1875240Snordmark static int ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *);
1880Sstevel@tonic-gate static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
18911042SErik.Nordmark@Sun.COM static int register_mforward(mblk_t *, ip_recv_attr_t *);
1900Sstevel@tonic-gate static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
1913448Sdh155122 static int set_assert(int *, ip_stack_t *);
1920Sstevel@tonic-gate
1930Sstevel@tonic-gate /*
1940Sstevel@tonic-gate * Token Bucket Filter functions
1950Sstevel@tonic-gate */
1960Sstevel@tonic-gate static int priority(struct vif *, ipha_t *);
1970Sstevel@tonic-gate static void tbf_control(struct vif *, mblk_t *, ipha_t *);
1980Sstevel@tonic-gate static int tbf_dq_sel(struct vif *, ipha_t *);
1990Sstevel@tonic-gate static void tbf_process_q(struct vif *);
2000Sstevel@tonic-gate static void tbf_queue(struct vif *, mblk_t *);
2010Sstevel@tonic-gate static void tbf_reprocess_q(void *);
2020Sstevel@tonic-gate static void tbf_send_packet(struct vif *, mblk_t *);
2030Sstevel@tonic-gate static void tbf_update_tokens(struct vif *);
2040Sstevel@tonic-gate static void release_mfc(struct mfcb *);
2050Sstevel@tonic-gate
2063448Sdh155122 static boolean_t is_mrouter_off(ip_stack_t *);
2070Sstevel@tonic-gate /*
2080Sstevel@tonic-gate * Encapsulation packets
2090Sstevel@tonic-gate */
2100Sstevel@tonic-gate
2110Sstevel@tonic-gate #define ENCAP_TTL 64
2120Sstevel@tonic-gate
2130Sstevel@tonic-gate /* prototype IP hdr for encapsulated packets */
2140Sstevel@tonic-gate static ipha_t multicast_encap_iphdr = {
2150Sstevel@tonic-gate IP_SIMPLE_HDR_VERSION,
2160Sstevel@tonic-gate 0, /* tos */
2170Sstevel@tonic-gate sizeof (ipha_t), /* total length */
2180Sstevel@tonic-gate 0, /* id */
2190Sstevel@tonic-gate 0, /* frag offset */
2200Sstevel@tonic-gate ENCAP_TTL, IPPROTO_ENCAP,
2210Sstevel@tonic-gate 0, /* checksum */
2220Sstevel@tonic-gate };
2230Sstevel@tonic-gate
2240Sstevel@tonic-gate /*
2250Sstevel@tonic-gate * Rate limit for assert notification messages, in nsec.
2260Sstevel@tonic-gate */
2270Sstevel@tonic-gate #define ASSERT_MSG_TIME 3000000000
2280Sstevel@tonic-gate
2290Sstevel@tonic-gate
2300Sstevel@tonic-gate #define VIF_REFHOLD(vifp) { \
2310Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); \
2320Sstevel@tonic-gate (vifp)->v_refcnt++; \
2330Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \
2340Sstevel@tonic-gate }
2350Sstevel@tonic-gate
2360Sstevel@tonic-gate #define VIF_REFRELE_LOCKED(vifp) { \
2370Sstevel@tonic-gate (vifp)->v_refcnt--; \
2380Sstevel@tonic-gate if ((vifp)->v_refcnt == 0 && \
2390Sstevel@tonic-gate ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \
2400Sstevel@tonic-gate del_vifp(vifp); \
2410Sstevel@tonic-gate } else { \
2420Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \
2430Sstevel@tonic-gate } \
2440Sstevel@tonic-gate }
2450Sstevel@tonic-gate
2460Sstevel@tonic-gate #define VIF_REFRELE(vifp) { \
2470Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); \
2480Sstevel@tonic-gate (vifp)->v_refcnt--; \
2490Sstevel@tonic-gate if ((vifp)->v_refcnt == 0 && \
2500Sstevel@tonic-gate ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \
2510Sstevel@tonic-gate del_vifp(vifp); \
2520Sstevel@tonic-gate } else { \
2530Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \
2540Sstevel@tonic-gate } \
2550Sstevel@tonic-gate }
2560Sstevel@tonic-gate
2570Sstevel@tonic-gate #define MFCB_REFHOLD(mfcb) { \
2580Sstevel@tonic-gate mutex_enter(&(mfcb)->mfcb_lock); \
2590Sstevel@tonic-gate (mfcb)->mfcb_refcnt++; \
2600Sstevel@tonic-gate ASSERT((mfcb)->mfcb_refcnt != 0); \
2610Sstevel@tonic-gate mutex_exit(&(mfcb)->mfcb_lock); \
2620Sstevel@tonic-gate }
2630Sstevel@tonic-gate
2640Sstevel@tonic-gate #define MFCB_REFRELE(mfcb) { \
2650Sstevel@tonic-gate mutex_enter(&(mfcb)->mfcb_lock); \
2660Sstevel@tonic-gate ASSERT((mfcb)->mfcb_refcnt != 0); \
2670Sstevel@tonic-gate if (--(mfcb)->mfcb_refcnt == 0 && \
2680Sstevel@tonic-gate ((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) { \
2690Sstevel@tonic-gate release_mfc(mfcb); \
2700Sstevel@tonic-gate } \
2710Sstevel@tonic-gate mutex_exit(&(mfcb)->mfcb_lock); \
2720Sstevel@tonic-gate }
2730Sstevel@tonic-gate
2740Sstevel@tonic-gate /*
2750Sstevel@tonic-gate * MFCFIND:
2760Sstevel@tonic-gate * Find a route for a given origin IP address and multicast group address.
2770Sstevel@tonic-gate * Skip entries with pending upcalls.
2780Sstevel@tonic-gate * Type of service parameter to be added in the future!
2790Sstevel@tonic-gate */
2800Sstevel@tonic-gate #define MFCFIND(mfcbp, o, g, rt) { \
2810Sstevel@tonic-gate struct mfc *_mb_rt = NULL; \
2820Sstevel@tonic-gate rt = NULL; \
2830Sstevel@tonic-gate _mb_rt = mfcbp->mfcb_mfc; \
2840Sstevel@tonic-gate while (_mb_rt) { \
2850Sstevel@tonic-gate if ((_mb_rt->mfc_origin.s_addr == o) && \
2860Sstevel@tonic-gate (_mb_rt->mfc_mcastgrp.s_addr == g) && \
2870Sstevel@tonic-gate (_mb_rt->mfc_rte == NULL) && \
2880Sstevel@tonic-gate (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) { \
2890Sstevel@tonic-gate rt = _mb_rt; \
2900Sstevel@tonic-gate break; \
2910Sstevel@tonic-gate } \
2920Sstevel@tonic-gate _mb_rt = _mb_rt->mfc_next; \
2930Sstevel@tonic-gate } \
2940Sstevel@tonic-gate }
2950Sstevel@tonic-gate
2960Sstevel@tonic-gate /*
2970Sstevel@tonic-gate * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime()
2980Sstevel@tonic-gate * are inefficient. We use gethrestime() which returns a timespec_t with
2990Sstevel@tonic-gate * sec and nsec, the resolution is machine dependent.
3000Sstevel@tonic-gate * The following 2 macros have been changed to use nsec instead of usec.
3010Sstevel@tonic-gate */
3020Sstevel@tonic-gate /*
3030Sstevel@tonic-gate * Macros to compute elapsed time efficiently.
3040Sstevel@tonic-gate * Borrowed from Van Jacobson's scheduling code.
3050Sstevel@tonic-gate * Delta should be a hrtime_t.
3060Sstevel@tonic-gate */
3070Sstevel@tonic-gate #define TV_DELTA(a, b, delta) { \
3080Sstevel@tonic-gate int xxs; \
3090Sstevel@tonic-gate \
3100Sstevel@tonic-gate delta = (a).tv_nsec - (b).tv_nsec; \
3110Sstevel@tonic-gate if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
3120Sstevel@tonic-gate switch (xxs) { \
3130Sstevel@tonic-gate case 2: \
3140Sstevel@tonic-gate delta += 1000000000; \
3150Sstevel@tonic-gate /*FALLTHROUGH*/ \
3160Sstevel@tonic-gate case 1: \
3170Sstevel@tonic-gate delta += 1000000000; \
3180Sstevel@tonic-gate break; \
3190Sstevel@tonic-gate default: \
3200Sstevel@tonic-gate delta += (1000000000 * xxs); \
3210Sstevel@tonic-gate } \
3220Sstevel@tonic-gate } \
3230Sstevel@tonic-gate }
3240Sstevel@tonic-gate
3250Sstevel@tonic-gate #define TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \
3260Sstevel@tonic-gate (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
3270Sstevel@tonic-gate
3280Sstevel@tonic-gate /*
3290Sstevel@tonic-gate * Handle MRT setsockopt commands to modify the multicast routing tables.
3300Sstevel@tonic-gate */
3310Sstevel@tonic-gate int
ip_mrouter_set(int cmd,conn_t * connp,int checkonly,uchar_t * data,int datalen)33211042SErik.Nordmark@Sun.COM ip_mrouter_set(int cmd, conn_t *connp, int checkonly, uchar_t *data,
33311042SErik.Nordmark@Sun.COM int datalen)
3340Sstevel@tonic-gate {
3355240Snordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
3363448Sdh155122
3373448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
3385240Snordmark if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) {
3393448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
3400Sstevel@tonic-gate return (EACCES);
3410Sstevel@tonic-gate }
3423448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
3430Sstevel@tonic-gate
3440Sstevel@tonic-gate if (checkonly) {
3450Sstevel@tonic-gate /*
3460Sstevel@tonic-gate * do not do operation, just pretend to - new T_CHECK
3470Sstevel@tonic-gate * Note: Even routines further on can probably fail but
3480Sstevel@tonic-gate * this T_CHECK stuff is only to please XTI so it not
3490Sstevel@tonic-gate * necessary to be perfect.
3500Sstevel@tonic-gate */
3510Sstevel@tonic-gate switch (cmd) {
3520Sstevel@tonic-gate case MRT_INIT:
3530Sstevel@tonic-gate case MRT_DONE:
3540Sstevel@tonic-gate case MRT_ADD_VIF:
3550Sstevel@tonic-gate case MRT_DEL_VIF:
3560Sstevel@tonic-gate case MRT_ADD_MFC:
3570Sstevel@tonic-gate case MRT_DEL_MFC:
3580Sstevel@tonic-gate case MRT_ASSERT:
3595240Snordmark return (0);
3600Sstevel@tonic-gate default:
3615240Snordmark return (EOPNOTSUPP);
3620Sstevel@tonic-gate }
3630Sstevel@tonic-gate }
3640Sstevel@tonic-gate
3650Sstevel@tonic-gate /*
3660Sstevel@tonic-gate * make sure no command is issued after multicast routing has been
3670Sstevel@tonic-gate * turned off.
3680Sstevel@tonic-gate */
3690Sstevel@tonic-gate if (cmd != MRT_INIT && cmd != MRT_DONE) {
3703448Sdh155122 if (is_mrouter_off(ipst))
3710Sstevel@tonic-gate return (EINVAL);
3720Sstevel@tonic-gate }
3730Sstevel@tonic-gate
3740Sstevel@tonic-gate switch (cmd) {
3755240Snordmark case MRT_INIT: return (ip_mrouter_init(connp, data, datalen, ipst));
37611042SErik.Nordmark@Sun.COM case MRT_DONE: return (ip_mrouter_done(ipst));
37711042SErik.Nordmark@Sun.COM case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, connp, ipst));
37811042SErik.Nordmark@Sun.COM case MRT_DEL_VIF: return (del_vif((vifi_t *)data, ipst));
3793448Sdh155122 case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data, ipst));
3803448Sdh155122 case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data, ipst));
3813448Sdh155122 case MRT_ASSERT: return (set_assert((int *)data, ipst));
3820Sstevel@tonic-gate default: return (EOPNOTSUPP);
3830Sstevel@tonic-gate }
3840Sstevel@tonic-gate }
3850Sstevel@tonic-gate
3860Sstevel@tonic-gate /*
3870Sstevel@tonic-gate * Handle MRT getsockopt commands
3880Sstevel@tonic-gate */
3890Sstevel@tonic-gate int
ip_mrouter_get(int cmd,conn_t * connp,uchar_t * data)39011042SErik.Nordmark@Sun.COM ip_mrouter_get(int cmd, conn_t *connp, uchar_t *data)
3910Sstevel@tonic-gate {
3925240Snordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
3935240Snordmark
3945240Snordmark if (connp != ipst->ips_ip_g_mrouter)
3950Sstevel@tonic-gate return (EACCES);
3960Sstevel@tonic-gate
3970Sstevel@tonic-gate switch (cmd) {
3980Sstevel@tonic-gate case MRT_VERSION: return (get_version((uchar_t *)data));
3993448Sdh155122 case MRT_ASSERT: return (get_assert((uchar_t *)data, ipst));
4000Sstevel@tonic-gate default: return (EOPNOTSUPP);
4010Sstevel@tonic-gate }
4020Sstevel@tonic-gate }
4030Sstevel@tonic-gate
4040Sstevel@tonic-gate /*
4050Sstevel@tonic-gate * Handle ioctl commands to obtain information from the cache.
4060Sstevel@tonic-gate * Called with shared access to IP. These are read_only ioctls.
4070Sstevel@tonic-gate */
4080Sstevel@tonic-gate /* ARGSUSED */
4090Sstevel@tonic-gate int
mrt_ioctl(ipif_t * ipif,sin_t * sin,queue_t * q,mblk_t * mp,ip_ioctl_cmd_t * ipip,void * if_req)4100Sstevel@tonic-gate mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
4110Sstevel@tonic-gate ip_ioctl_cmd_t *ipip, void *if_req)
4120Sstevel@tonic-gate {
4130Sstevel@tonic-gate mblk_t *mp1;
4140Sstevel@tonic-gate struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
4155240Snordmark conn_t *connp = Q_TO_CONN(q);
4165240Snordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
4170Sstevel@tonic-gate
4180Sstevel@tonic-gate /* Existence verified in ip_wput_nondata */
4190Sstevel@tonic-gate mp1 = mp->b_cont->b_cont;
4200Sstevel@tonic-gate
4210Sstevel@tonic-gate switch (iocp->ioc_cmd) {
4220Sstevel@tonic-gate case (SIOCGETVIFCNT):
4233448Sdh155122 return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst));
4240Sstevel@tonic-gate case (SIOCGETSGCNT):
4253448Sdh155122 return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst));
4260Sstevel@tonic-gate case (SIOCGETLSGCNT):
4273448Sdh155122 return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst));
4280Sstevel@tonic-gate default:
4290Sstevel@tonic-gate return (EINVAL);
4300Sstevel@tonic-gate }
4310Sstevel@tonic-gate }
4320Sstevel@tonic-gate
4330Sstevel@tonic-gate /*
4340Sstevel@tonic-gate * Returns the packet, byte, rpf-failure count for the source, group provided.
4350Sstevel@tonic-gate */
4360Sstevel@tonic-gate static int
get_sg_cnt(struct sioc_sg_req * req,ip_stack_t * ipst)4373448Sdh155122 get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst)
4380Sstevel@tonic-gate {
4390Sstevel@tonic-gate struct mfc *rt;
4400Sstevel@tonic-gate struct mfcb *mfcbp;
4410Sstevel@tonic-gate
4423448Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)];
4430Sstevel@tonic-gate MFCB_REFHOLD(mfcbp);
4440Sstevel@tonic-gate MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt);
4450Sstevel@tonic-gate
4460Sstevel@tonic-gate if (rt != NULL) {
4470Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
4480Sstevel@tonic-gate req->pktcnt = rt->mfc_pkt_cnt;
4490Sstevel@tonic-gate req->bytecnt = rt->mfc_byte_cnt;
4500Sstevel@tonic-gate req->wrong_if = rt->mfc_wrong_if;
4510Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
4520Sstevel@tonic-gate } else
4530Sstevel@tonic-gate req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU;
4540Sstevel@tonic-gate
4550Sstevel@tonic-gate MFCB_REFRELE(mfcbp);
4560Sstevel@tonic-gate return (0);
4570Sstevel@tonic-gate }
4580Sstevel@tonic-gate
4590Sstevel@tonic-gate /*
4600Sstevel@tonic-gate * Returns the packet, byte, rpf-failure count for the source, group provided.
4610Sstevel@tonic-gate * Uses larger counters and IPv6 addresses.
4620Sstevel@tonic-gate */
4630Sstevel@tonic-gate /* ARGSUSED XXX until implemented */
4640Sstevel@tonic-gate static int
get_lsg_cnt(struct sioc_lsg_req * req,ip_stack_t * ipst)4653448Sdh155122 get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst)
4660Sstevel@tonic-gate {
4670Sstevel@tonic-gate /* XXX TODO SIOCGETLSGCNT */
4680Sstevel@tonic-gate return (ENXIO);
4690Sstevel@tonic-gate }
4700Sstevel@tonic-gate
4710Sstevel@tonic-gate /*
4720Sstevel@tonic-gate * Returns the input and output packet and byte counts on the vif provided.
4730Sstevel@tonic-gate */
4740Sstevel@tonic-gate static int
get_vif_cnt(struct sioc_vif_req * req,ip_stack_t * ipst)4753448Sdh155122 get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst)
4760Sstevel@tonic-gate {
4770Sstevel@tonic-gate vifi_t vifi = req->vifi;
4780Sstevel@tonic-gate
4793448Sdh155122 if (vifi >= ipst->ips_numvifs)
4800Sstevel@tonic-gate return (EINVAL);
4810Sstevel@tonic-gate
4820Sstevel@tonic-gate /*
4830Sstevel@tonic-gate * No locks here, an approximation is fine.
4840Sstevel@tonic-gate */
4853448Sdh155122 req->icount = ipst->ips_vifs[vifi].v_pkt_in;
4863448Sdh155122 req->ocount = ipst->ips_vifs[vifi].v_pkt_out;
4873448Sdh155122 req->ibytes = ipst->ips_vifs[vifi].v_bytes_in;
4883448Sdh155122 req->obytes = ipst->ips_vifs[vifi].v_bytes_out;
4890Sstevel@tonic-gate
4900Sstevel@tonic-gate return (0);
4910Sstevel@tonic-gate }
4920Sstevel@tonic-gate
4930Sstevel@tonic-gate static int
get_version(uchar_t * data)4940Sstevel@tonic-gate get_version(uchar_t *data)
4950Sstevel@tonic-gate {
4960Sstevel@tonic-gate int *v = (int *)data;
4970Sstevel@tonic-gate
4980Sstevel@tonic-gate *v = 0x0305; /* XXX !!!! */
4990Sstevel@tonic-gate
5000Sstevel@tonic-gate return (0);
5010Sstevel@tonic-gate }
5020Sstevel@tonic-gate
5030Sstevel@tonic-gate /*
5040Sstevel@tonic-gate * Set PIM assert processing global.
5050Sstevel@tonic-gate */
5060Sstevel@tonic-gate static int
set_assert(int * i,ip_stack_t * ipst)5073448Sdh155122 set_assert(int *i, ip_stack_t *ipst)
5080Sstevel@tonic-gate {
5090Sstevel@tonic-gate if ((*i != 1) && (*i != 0))
5100Sstevel@tonic-gate return (EINVAL);
5110Sstevel@tonic-gate
5123448Sdh155122 ipst->ips_pim_assert = *i;
5130Sstevel@tonic-gate
5140Sstevel@tonic-gate return (0);
5150Sstevel@tonic-gate }
5160Sstevel@tonic-gate
5170Sstevel@tonic-gate /*
5180Sstevel@tonic-gate * Get PIM assert processing global.
5190Sstevel@tonic-gate */
5200Sstevel@tonic-gate static int
get_assert(uchar_t * data,ip_stack_t * ipst)5213448Sdh155122 get_assert(uchar_t *data, ip_stack_t *ipst)
5220Sstevel@tonic-gate {
5230Sstevel@tonic-gate int *i = (int *)data;
5240Sstevel@tonic-gate
5253448Sdh155122 *i = ipst->ips_pim_assert;
5260Sstevel@tonic-gate
5270Sstevel@tonic-gate return (0);
5280Sstevel@tonic-gate }
5290Sstevel@tonic-gate
5300Sstevel@tonic-gate /*
5310Sstevel@tonic-gate * Enable multicast routing.
5320Sstevel@tonic-gate */
5330Sstevel@tonic-gate static int
ip_mrouter_init(conn_t * connp,uchar_t * data,int datalen,ip_stack_t * ipst)5345240Snordmark ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst)
5350Sstevel@tonic-gate {
5360Sstevel@tonic-gate int *v;
5370Sstevel@tonic-gate
5380Sstevel@tonic-gate if (data == NULL || (datalen != sizeof (int)))
5390Sstevel@tonic-gate return (ENOPROTOOPT);
5400Sstevel@tonic-gate
5410Sstevel@tonic-gate v = (int *)data;
5420Sstevel@tonic-gate if (*v != 1)
5430Sstevel@tonic-gate return (ENOPROTOOPT);
5440Sstevel@tonic-gate
5453448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
5463448Sdh155122 if (ipst->ips_ip_g_mrouter != NULL) {
5473448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5480Sstevel@tonic-gate return (EADDRINUSE);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate
5515240Snordmark /*
5525240Snordmark * MRT_INIT should only be allowed for RAW sockets, but we double
5535240Snordmark * check.
5545240Snordmark */
5555240Snordmark if (!IPCL_IS_RAWIP(connp)) {
5565240Snordmark mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5575240Snordmark return (EINVAL);
5585240Snordmark }
5595240Snordmark
5605240Snordmark ipst->ips_ip_g_mrouter = connp;
5610Sstevel@tonic-gate connp->conn_multi_router = 1;
5620Sstevel@tonic-gate /* In order for tunnels to work we have to turn ip_g_forward on */
5633448Sdh155122 if (!WE_ARE_FORWARDING(ipst)) {
5643448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
5655240Snordmark (void) mi_strlog(connp->conn_rq, 1, SL_TRACE,
5660Sstevel@tonic-gate "ip_mrouter_init: turning on forwarding");
5670Sstevel@tonic-gate }
56812016SGirish.Moodalbail@Sun.COM ipst->ips_saved_ip_forwarding = ipst->ips_ip_forwarding;
56912016SGirish.Moodalbail@Sun.COM ipst->ips_ip_forwarding = IP_FORWARD_ALWAYS;
5700Sstevel@tonic-gate }
5710Sstevel@tonic-gate
5723448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
5730Sstevel@tonic-gate return (0);
5740Sstevel@tonic-gate }
5750Sstevel@tonic-gate
5763448Sdh155122 void
ip_mrouter_stack_init(ip_stack_t * ipst)5773448Sdh155122 ip_mrouter_stack_init(ip_stack_t *ipst)
5783448Sdh155122 {
5793448Sdh155122 mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL);
5803448Sdh155122
5813448Sdh155122 ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1),
5823448Sdh155122 KM_SLEEP);
5833448Sdh155122 ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP);
5843448Sdh155122 /*
5853448Sdh155122 * mfctable:
5863448Sdh155122 * Includes all mfcs, including waiting upcalls.
5873448Sdh155122 * Multiple mfcs per bucket.
5883448Sdh155122 */
5893448Sdh155122 ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ,
5903448Sdh155122 KM_SLEEP);
5913448Sdh155122 /*
5923448Sdh155122 * Define the token bucket filter structures.
5933448Sdh155122 * tbftable -> each vif has one of these for storing info.
5943448Sdh155122 */
5953448Sdh155122 ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP);
5963448Sdh155122
5973448Sdh155122 mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL);
5983448Sdh155122
5993448Sdh155122 ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
6003448Sdh155122 ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
6013448Sdh155122 }
6023448Sdh155122
6030Sstevel@tonic-gate /*
6040Sstevel@tonic-gate * Disable multicast routing.
6050Sstevel@tonic-gate * Didn't use global timeout_val (BSD version), instead check the mfctable.
6060Sstevel@tonic-gate */
6070Sstevel@tonic-gate int
ip_mrouter_done(ip_stack_t * ipst)60811042SErik.Nordmark@Sun.COM ip_mrouter_done(ip_stack_t *ipst)
6090Sstevel@tonic-gate {
6105240Snordmark conn_t *mrouter;
6110Sstevel@tonic-gate vifi_t vifi;
6120Sstevel@tonic-gate struct mfc *mfc_rt;
6130Sstevel@tonic-gate int i;
6140Sstevel@tonic-gate
6153448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
6163448Sdh155122 if (ipst->ips_ip_g_mrouter == NULL) {
6173448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
6180Sstevel@tonic-gate return (EINVAL);
6190Sstevel@tonic-gate }
6200Sstevel@tonic-gate
6215240Snordmark mrouter = ipst->ips_ip_g_mrouter;
6223448Sdh155122
62312016SGirish.Moodalbail@Sun.COM if (ipst->ips_saved_ip_forwarding != -1) {
6243448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
6255240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
6260Sstevel@tonic-gate "ip_mrouter_done: turning off forwarding");
6270Sstevel@tonic-gate }
62812016SGirish.Moodalbail@Sun.COM ipst->ips_ip_forwarding = ipst->ips_saved_ip_forwarding;
62912016SGirish.Moodalbail@Sun.COM ipst->ips_saved_ip_forwarding = -1;
6300Sstevel@tonic-gate }
6310Sstevel@tonic-gate
6320Sstevel@tonic-gate /*
6330Sstevel@tonic-gate * Always clear cache when vifs change.
6343448Sdh155122 * No need to get ipst->ips_last_encap_lock since we are running as
6353448Sdh155122 * a writer.
6360Sstevel@tonic-gate */
6373448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock);
6383448Sdh155122 ipst->ips_last_encap_src = 0;
6393448Sdh155122 ipst->ips_last_encap_vif = NULL;
6403448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock);
6415240Snordmark mrouter->conn_multi_router = 0;
6420Sstevel@tonic-gate
6433448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
6440Sstevel@tonic-gate
6450Sstevel@tonic-gate /*
6460Sstevel@tonic-gate * For each phyint in use,
6470Sstevel@tonic-gate * disable promiscuous reception of all IP multicasts.
6480Sstevel@tonic-gate */
6490Sstevel@tonic-gate for (vifi = 0; vifi < MAXVIFS; vifi++) {
6503448Sdh155122 struct vif *vifp = ipst->ips_vifs + vifi;
6510Sstevel@tonic-gate
6520Sstevel@tonic-gate mutex_enter(&vifp->v_lock);
6530Sstevel@tonic-gate /*
6540Sstevel@tonic-gate * if the vif is active mark it condemned.
6550Sstevel@tonic-gate */
6560Sstevel@tonic-gate if (vifp->v_marks & VIF_MARK_GOOD) {
6570Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL);
6580Sstevel@tonic-gate ipif_refhold(vifp->v_ipif);
6590Sstevel@tonic-gate /* Phyint only */
6600Sstevel@tonic-gate if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
6610Sstevel@tonic-gate ipif_t *ipif = vifp->v_ipif;
66211042SErik.Nordmark@Sun.COM ilm_t *ilm = vifp->v_ilm;
66311042SErik.Nordmark@Sun.COM
66411042SErik.Nordmark@Sun.COM vifp->v_ilm = NULL;
66511042SErik.Nordmark@Sun.COM vifp->v_marks &= ~VIF_MARK_GOOD;
66611042SErik.Nordmark@Sun.COM vifp->v_marks |= VIF_MARK_CONDEMNED;
66711042SErik.Nordmark@Sun.COM
66811042SErik.Nordmark@Sun.COM mutex_exit(&(vifp)->v_lock);
66911042SErik.Nordmark@Sun.COM if (ilm != NULL) {
67011042SErik.Nordmark@Sun.COM ill_t *ill = ipif->ipif_ill;
67111042SErik.Nordmark@Sun.COM
67211042SErik.Nordmark@Sun.COM (void) ip_delmulti(ilm);
67311042SErik.Nordmark@Sun.COM ASSERT(ill->ill_mrouter_cnt > 0);
67411042SErik.Nordmark@Sun.COM atomic_dec_32(&ill->ill_mrouter_cnt);
6750Sstevel@tonic-gate }
6760Sstevel@tonic-gate mutex_enter(&vifp->v_lock);
6770Sstevel@tonic-gate }
67810495SErik.Nordmark@Sun.COM ipif_refrele(vifp->v_ipif);
6790Sstevel@tonic-gate /*
6800Sstevel@tonic-gate * decreases the refcnt added in add_vif.
6810Sstevel@tonic-gate * and release v_lock.
6820Sstevel@tonic-gate */
6830Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp);
6840Sstevel@tonic-gate } else {
6850Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
6860Sstevel@tonic-gate continue;
6870Sstevel@tonic-gate }
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate
6903448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
6913448Sdh155122 ipst->ips_numvifs = 0;
6923448Sdh155122 ipst->ips_pim_assert = 0;
6933448Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS;
6943448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
6950Sstevel@tonic-gate
6960Sstevel@tonic-gate /*
6970Sstevel@tonic-gate * Free upcall msgs.
6980Sstevel@tonic-gate * Go through mfctable and stop any outstanding upcall
6990Sstevel@tonic-gate * timeouts remaining on mfcs.
7000Sstevel@tonic-gate */
7010Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) {
7023448Sdh155122 mutex_enter(&ipst->ips_mfcs[i].mfcb_lock);
7033448Sdh155122 ipst->ips_mfcs[i].mfcb_refcnt++;
7043448Sdh155122 ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED;
7053448Sdh155122 mutex_exit(&ipst->ips_mfcs[i].mfcb_lock);
7063448Sdh155122 mfc_rt = ipst->ips_mfcs[i].mfcb_mfc;
7070Sstevel@tonic-gate while (mfc_rt) {
7080Sstevel@tonic-gate /* Free upcalls */
7090Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex);
7100Sstevel@tonic-gate if (mfc_rt->mfc_rte != NULL) {
7110Sstevel@tonic-gate if (mfc_rt->mfc_timeout_id != 0) {
7120Sstevel@tonic-gate /*
7130Sstevel@tonic-gate * OK to drop the lock as we have
7140Sstevel@tonic-gate * a refcnt on the bucket. timeout
7150Sstevel@tonic-gate * can fire but it will see that
7160Sstevel@tonic-gate * mfc_timeout_id == 0 and not do
7170Sstevel@tonic-gate * anything. see expire_upcalls().
7180Sstevel@tonic-gate */
7190Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0;
7200Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex);
7210Sstevel@tonic-gate (void) untimeout(
7220Sstevel@tonic-gate mfc_rt->mfc_timeout_id);
7230Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0;
7240Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex);
7250Sstevel@tonic-gate
7260Sstevel@tonic-gate /*
7270Sstevel@tonic-gate * all queued upcall packets
7280Sstevel@tonic-gate * and mblk will be freed in
7290Sstevel@tonic-gate * release_mfc().
7300Sstevel@tonic-gate */
7310Sstevel@tonic-gate }
7320Sstevel@tonic-gate }
7330Sstevel@tonic-gate
7340Sstevel@tonic-gate mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
7350Sstevel@tonic-gate
7360Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex);
7370Sstevel@tonic-gate mfc_rt = mfc_rt->mfc_next;
7380Sstevel@tonic-gate }
7393448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]);
7400Sstevel@tonic-gate }
7410Sstevel@tonic-gate
7423448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
7433448Sdh155122 ipst->ips_ip_g_mrouter = NULL;
7443448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
7450Sstevel@tonic-gate return (0);
7460Sstevel@tonic-gate }
7470Sstevel@tonic-gate
7483448Sdh155122 void
ip_mrouter_stack_destroy(ip_stack_t * ipst)7493448Sdh155122 ip_mrouter_stack_destroy(ip_stack_t *ipst)
7503448Sdh155122 {
7513448Sdh155122 struct mfcb *mfcbp;
7523448Sdh155122 struct mfc *rt;
7533448Sdh155122 int i;
7543448Sdh155122
7553448Sdh155122 for (i = 0; i < MFCTBLSIZ; i++) {
7563448Sdh155122 mfcbp = &ipst->ips_mfcs[i];
7573448Sdh155122
7583448Sdh155122 while ((rt = mfcbp->mfcb_mfc) != NULL) {
7593448Sdh155122 (void) printf("ip_mrouter_stack_destroy: free for %d\n",
7603448Sdh155122 i);
7613448Sdh155122
7623448Sdh155122 mfcbp->mfcb_mfc = rt->mfc_next;
7633448Sdh155122 free_queue(rt);
7643448Sdh155122 mi_free(rt);
7653448Sdh155122 }
7663448Sdh155122 }
7673448Sdh155122 kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1));
7683448Sdh155122 ipst->ips_vifs = NULL;
7693448Sdh155122 kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat));
7703448Sdh155122 ipst->ips_mrtstat = NULL;
7713448Sdh155122 kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ);
7723448Sdh155122 ipst->ips_mfcs = NULL;
7733448Sdh155122 kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS);
7743448Sdh155122 ipst->ips_tbfs = NULL;
7753448Sdh155122
7763448Sdh155122 mutex_destroy(&ipst->ips_last_encap_lock);
7773448Sdh155122 mutex_destroy(&ipst->ips_ip_g_mrouter_mutex);
7783448Sdh155122 }
7793448Sdh155122
7800Sstevel@tonic-gate static boolean_t
is_mrouter_off(ip_stack_t * ipst)7813448Sdh155122 is_mrouter_off(ip_stack_t *ipst)
7820Sstevel@tonic-gate {
7835240Snordmark conn_t *mrouter;
7840Sstevel@tonic-gate
7853448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
7863448Sdh155122 if (ipst->ips_ip_g_mrouter == NULL) {
7873448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
7880Sstevel@tonic-gate return (B_TRUE);
7890Sstevel@tonic-gate }
7900Sstevel@tonic-gate
7915240Snordmark mrouter = ipst->ips_ip_g_mrouter;
7925240Snordmark if (mrouter->conn_multi_router == 0) {
7933448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
7940Sstevel@tonic-gate return (B_TRUE);
7950Sstevel@tonic-gate }
7963448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
7970Sstevel@tonic-gate return (B_FALSE);
7980Sstevel@tonic-gate }
7990Sstevel@tonic-gate
8000Sstevel@tonic-gate static void
unlock_good_vif(struct vif * vifp)8010Sstevel@tonic-gate unlock_good_vif(struct vif *vifp)
8020Sstevel@tonic-gate {
8030Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL);
8040Sstevel@tonic-gate ipif_refrele(vifp->v_ipif);
8050Sstevel@tonic-gate VIF_REFRELE(vifp);
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate
8080Sstevel@tonic-gate static boolean_t
lock_good_vif(struct vif * vifp)8090Sstevel@tonic-gate lock_good_vif(struct vif *vifp)
8100Sstevel@tonic-gate {
8110Sstevel@tonic-gate mutex_enter(&vifp->v_lock);
8120Sstevel@tonic-gate if (!(vifp->v_marks & VIF_MARK_GOOD)) {
8130Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
8140Sstevel@tonic-gate return (B_FALSE);
8150Sstevel@tonic-gate }
8160Sstevel@tonic-gate
8170Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL);
8180Sstevel@tonic-gate mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock);
8190Sstevel@tonic-gate if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) {
8200Sstevel@tonic-gate mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
8210Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
8220Sstevel@tonic-gate return (B_FALSE);
8230Sstevel@tonic-gate }
8240Sstevel@tonic-gate ipif_refhold_locked(vifp->v_ipif);
8250Sstevel@tonic-gate mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
8260Sstevel@tonic-gate vifp->v_refcnt++;
8270Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
8280Sstevel@tonic-gate return (B_TRUE);
8290Sstevel@tonic-gate }
8300Sstevel@tonic-gate
8310Sstevel@tonic-gate /*
8320Sstevel@tonic-gate * Add a vif to the vif table.
8330Sstevel@tonic-gate */
8340Sstevel@tonic-gate static int
add_vif(struct vifctl * vifcp,conn_t * connp,ip_stack_t * ipst)83511042SErik.Nordmark@Sun.COM add_vif(struct vifctl *vifcp, conn_t *connp, ip_stack_t *ipst)
8360Sstevel@tonic-gate {
8373448Sdh155122 struct vif *vifp = ipst->ips_vifs + vifcp->vifc_vifi;
8380Sstevel@tonic-gate ipif_t *ipif;
83911042SErik.Nordmark@Sun.COM int error = 0;
8403448Sdh155122 struct tbf *v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi;
8415240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
84211042SErik.Nordmark@Sun.COM ilm_t *ilm;
84311042SErik.Nordmark@Sun.COM ill_t *ill;
8440Sstevel@tonic-gate
8450Sstevel@tonic-gate ASSERT(connp != NULL);
8460Sstevel@tonic-gate
8470Sstevel@tonic-gate if (vifcp->vifc_vifi >= MAXVIFS)
8480Sstevel@tonic-gate return (EINVAL);
8490Sstevel@tonic-gate
8503448Sdh155122 if (is_mrouter_off(ipst))
8510Sstevel@tonic-gate return (EINVAL);
8520Sstevel@tonic-gate
8530Sstevel@tonic-gate mutex_enter(&vifp->v_lock);
8540Sstevel@tonic-gate /*
8550Sstevel@tonic-gate * Viftable entry should be 0.
8560Sstevel@tonic-gate * if v_marks == 0 but v_refcnt != 0 means struct is being
8570Sstevel@tonic-gate * initialized.
8580Sstevel@tonic-gate *
8590Sstevel@tonic-gate * Also note that it is very unlikely that we will get a MRT_ADD_VIF
8600Sstevel@tonic-gate * request while the delete is in progress, mrouted only sends add
8610Sstevel@tonic-gate * requests when a new interface is added and the new interface cannot
8620Sstevel@tonic-gate * have the same vifi as an existing interface. We make sure that
8630Sstevel@tonic-gate * ill_delete will block till the vif is deleted by adding a refcnt
8640Sstevel@tonic-gate * to ipif in del_vif().
8650Sstevel@tonic-gate */
8660Sstevel@tonic-gate if (vifp->v_lcl_addr.s_addr != 0 ||
8670Sstevel@tonic-gate vifp->v_marks != 0 ||
8680Sstevel@tonic-gate vifp->v_refcnt != 0) {
8690Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
8700Sstevel@tonic-gate return (EADDRINUSE);
8710Sstevel@tonic-gate }
8720Sstevel@tonic-gate
8730Sstevel@tonic-gate /* Incoming vif should not be 0 */
8740Sstevel@tonic-gate if (vifcp->vifc_lcl_addr.s_addr == 0) {
8750Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
8760Sstevel@tonic-gate return (EINVAL);
8770Sstevel@tonic-gate }
8780Sstevel@tonic-gate
8790Sstevel@tonic-gate vifp->v_refcnt++;
8800Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
8810Sstevel@tonic-gate /* Find the interface with the local address */
8820Sstevel@tonic-gate ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL,
88311042SErik.Nordmark@Sun.COM IPCL_ZONEID(connp), ipst);
8840Sstevel@tonic-gate if (ipif == NULL) {
8850Sstevel@tonic-gate VIF_REFRELE(vifp);
8860Sstevel@tonic-gate return (EADDRNOTAVAIL);
8870Sstevel@tonic-gate }
8880Sstevel@tonic-gate
8893448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
8905240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
8910Sstevel@tonic-gate "add_vif: src 0x%x enter",
8920Sstevel@tonic-gate vifcp->vifc_lcl_addr.s_addr);
8930Sstevel@tonic-gate }
8940Sstevel@tonic-gate
8950Sstevel@tonic-gate mutex_enter(&vifp->v_lock);
8960Sstevel@tonic-gate /*
8970Sstevel@tonic-gate * Always clear cache when vifs change.
8980Sstevel@tonic-gate * Needed to ensure that src isn't left over from before vif was added.
8990Sstevel@tonic-gate * No need to get last_encap_lock, since we are running as a writer.
9000Sstevel@tonic-gate */
9010Sstevel@tonic-gate
9023448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock);
9033448Sdh155122 ipst->ips_last_encap_src = 0;
9043448Sdh155122 ipst->ips_last_encap_vif = NULL;
9053448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock);
9060Sstevel@tonic-gate
9070Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_TUNNEL) {
9080Sstevel@tonic-gate if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) {
9090Sstevel@tonic-gate cmn_err(CE_WARN,
9100Sstevel@tonic-gate "add_vif: source route tunnels not supported\n");
9110Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp);
9120Sstevel@tonic-gate ipif_refrele(ipif);
9130Sstevel@tonic-gate return (EOPNOTSUPP);
9140Sstevel@tonic-gate }
9150Sstevel@tonic-gate vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
9160Sstevel@tonic-gate
9170Sstevel@tonic-gate } else {
9180Sstevel@tonic-gate /* Phyint or Register vif */
9190Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) {
9200Sstevel@tonic-gate /*
9210Sstevel@tonic-gate * Note: Since all IPPROTO_IP level options (including
9220Sstevel@tonic-gate * MRT_ADD_VIF) are done exclusively via
9230Sstevel@tonic-gate * ip_optmgmt_writer(), a lock is not necessary to
9240Sstevel@tonic-gate * protect reg_vif_num.
9250Sstevel@tonic-gate */
9263448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
9273448Sdh155122 if (ipst->ips_reg_vif_num == ALL_VIFS) {
9283448Sdh155122 ipst->ips_reg_vif_num = vifcp->vifc_vifi;
9293448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
9300Sstevel@tonic-gate } else {
9313448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
9320Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp);
9330Sstevel@tonic-gate ipif_refrele(ipif);
9340Sstevel@tonic-gate return (EADDRINUSE);
9350Sstevel@tonic-gate }
9360Sstevel@tonic-gate }
9370Sstevel@tonic-gate
9380Sstevel@tonic-gate /* Make sure the interface supports multicast */
9390Sstevel@tonic-gate if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) {
9400Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp);
9410Sstevel@tonic-gate ipif_refrele(ipif);
9420Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) {
9433448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
9443448Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS;
9453448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
9460Sstevel@tonic-gate }
9470Sstevel@tonic-gate return (EOPNOTSUPP);
9480Sstevel@tonic-gate }
9490Sstevel@tonic-gate /* Enable promiscuous reception of all IP mcasts from the if */
9500Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
95111042SErik.Nordmark@Sun.COM
95211042SErik.Nordmark@Sun.COM ill = ipif->ipif_ill;
95311042SErik.Nordmark@Sun.COM if (IS_UNDER_IPMP(ill))
95411042SErik.Nordmark@Sun.COM ill = ipmp_ill_hold_ipmp_ill(ill);
95511042SErik.Nordmark@Sun.COM
95611042SErik.Nordmark@Sun.COM if (ill == NULL) {
95711042SErik.Nordmark@Sun.COM ilm = NULL;
95811042SErik.Nordmark@Sun.COM } else {
95911042SErik.Nordmark@Sun.COM ilm = ip_addmulti(&ipv6_all_zeros, ill,
96011042SErik.Nordmark@Sun.COM ipif->ipif_zoneid, &error);
96111042SErik.Nordmark@Sun.COM if (ilm != NULL)
96211042SErik.Nordmark@Sun.COM atomic_inc_32(&ill->ill_mrouter_cnt);
96311042SErik.Nordmark@Sun.COM if (IS_UNDER_IPMP(ipif->ipif_ill)) {
96411042SErik.Nordmark@Sun.COM ill_refrele(ill);
96511042SErik.Nordmark@Sun.COM ill = ipif->ipif_ill;
96611042SErik.Nordmark@Sun.COM }
96711042SErik.Nordmark@Sun.COM }
96811042SErik.Nordmark@Sun.COM
9690Sstevel@tonic-gate mutex_enter(&vifp->v_lock);
9700Sstevel@tonic-gate /*
9710Sstevel@tonic-gate * since we released the lock lets make sure that
9720Sstevel@tonic-gate * ip_mrouter_done() has not been called.
9730Sstevel@tonic-gate */
97411042SErik.Nordmark@Sun.COM if (ilm == NULL || is_mrouter_off(ipst)) {
97511042SErik.Nordmark@Sun.COM if (ilm != NULL) {
97611042SErik.Nordmark@Sun.COM (void) ip_delmulti(ilm);
97711042SErik.Nordmark@Sun.COM ASSERT(ill->ill_mrouter_cnt > 0);
97811042SErik.Nordmark@Sun.COM atomic_dec_32(&ill->ill_mrouter_cnt);
97911042SErik.Nordmark@Sun.COM }
9800Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) {
9813448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
9823448Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS;
9833448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
9840Sstevel@tonic-gate }
9850Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp);
9860Sstevel@tonic-gate ipif_refrele(ipif);
9870Sstevel@tonic-gate return (error?error:EINVAL);
9880Sstevel@tonic-gate }
98911042SErik.Nordmark@Sun.COM vifp->v_ilm = ilm;
9900Sstevel@tonic-gate }
9910Sstevel@tonic-gate /* Define parameters for the tbf structure */
9920Sstevel@tonic-gate vifp->v_tbf = v_tbf;
9930Sstevel@tonic-gate gethrestime(&vifp->v_tbf->tbf_last_pkt_t);
9940Sstevel@tonic-gate vifp->v_tbf->tbf_n_tok = 0;
9950Sstevel@tonic-gate vifp->v_tbf->tbf_q_len = 0;
9960Sstevel@tonic-gate vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
9970Sstevel@tonic-gate vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
9980Sstevel@tonic-gate
9990Sstevel@tonic-gate vifp->v_flags = vifcp->vifc_flags;
10000Sstevel@tonic-gate vifp->v_threshold = vifcp->vifc_threshold;
10010Sstevel@tonic-gate vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
10020Sstevel@tonic-gate vifp->v_ipif = ipif;
10030Sstevel@tonic-gate ipif_refrele(ipif);
10040Sstevel@tonic-gate /* Scaling up here, allows division by 1024 in critical code. */
10050Sstevel@tonic-gate vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000);
10060Sstevel@tonic-gate vifp->v_timeout_id = 0;
10070Sstevel@tonic-gate /* initialize per vif pkt counters */
10080Sstevel@tonic-gate vifp->v_pkt_in = 0;
10090Sstevel@tonic-gate vifp->v_pkt_out = 0;
10100Sstevel@tonic-gate vifp->v_bytes_in = 0;
10110Sstevel@tonic-gate vifp->v_bytes_out = 0;
10120Sstevel@tonic-gate mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL);
10130Sstevel@tonic-gate
10140Sstevel@tonic-gate /* Adjust numvifs up, if the vifi is higher than numvifs */
10153448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
10163448Sdh155122 if (ipst->ips_numvifs <= vifcp->vifc_vifi)
10173448Sdh155122 ipst->ips_numvifs = vifcp->vifc_vifi + 1;
10183448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
10193448Sdh155122
10203448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
10215240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
10220Sstevel@tonic-gate "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d",
10230Sstevel@tonic-gate vifcp->vifc_vifi,
10240Sstevel@tonic-gate ntohl(vifcp->vifc_lcl_addr.s_addr),
10250Sstevel@tonic-gate (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
10260Sstevel@tonic-gate ntohl(vifcp->vifc_rmt_addr.s_addr),
10270Sstevel@tonic-gate vifcp->vifc_threshold, vifcp->vifc_rate_limit);
10280Sstevel@tonic-gate }
10290Sstevel@tonic-gate
10300Sstevel@tonic-gate vifp->v_marks = VIF_MARK_GOOD;
10310Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
10320Sstevel@tonic-gate return (0);
10330Sstevel@tonic-gate }
10340Sstevel@tonic-gate
10350Sstevel@tonic-gate
10360Sstevel@tonic-gate /* Delete a vif from the vif table. */
10370Sstevel@tonic-gate static void
del_vifp(struct vif * vifp)10380Sstevel@tonic-gate del_vifp(struct vif *vifp)
10390Sstevel@tonic-gate {
10400Sstevel@tonic-gate struct tbf *t = vifp->v_tbf;
10410Sstevel@tonic-gate mblk_t *mp0;
10420Sstevel@tonic-gate vifi_t vifi;
10433448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
10445240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
10450Sstevel@tonic-gate
10460Sstevel@tonic-gate ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED);
10470Sstevel@tonic-gate ASSERT(t != NULL);
10480Sstevel@tonic-gate
10493448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
10505240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
10510Sstevel@tonic-gate "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr);
10520Sstevel@tonic-gate }
10530Sstevel@tonic-gate
10540Sstevel@tonic-gate if (vifp->v_timeout_id != 0) {
10550Sstevel@tonic-gate (void) untimeout(vifp->v_timeout_id);
10560Sstevel@tonic-gate vifp->v_timeout_id = 0;
10570Sstevel@tonic-gate }
10580Sstevel@tonic-gate
10590Sstevel@tonic-gate /*
10600Sstevel@tonic-gate * Free packets queued at the interface.
10610Sstevel@tonic-gate * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc.
10620Sstevel@tonic-gate */
10630Sstevel@tonic-gate mutex_enter(&t->tbf_lock);
10640Sstevel@tonic-gate while (t->tbf_q != NULL) {
10650Sstevel@tonic-gate mp0 = t->tbf_q;
10660Sstevel@tonic-gate t->tbf_q = t->tbf_q->b_next;
10670Sstevel@tonic-gate mp0->b_prev = mp0->b_next = NULL;
10680Sstevel@tonic-gate freemsg(mp0);
10690Sstevel@tonic-gate }
10700Sstevel@tonic-gate mutex_exit(&t->tbf_lock);
10710Sstevel@tonic-gate
10720Sstevel@tonic-gate /*
10730Sstevel@tonic-gate * Always clear cache when vifs change.
10740Sstevel@tonic-gate * No need to get last_encap_lock since we are running as a writer.
10750Sstevel@tonic-gate */
10763448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock);
10773448Sdh155122 if (vifp == ipst->ips_last_encap_vif) {
10783448Sdh155122 ipst->ips_last_encap_vif = NULL;
10793448Sdh155122 ipst->ips_last_encap_src = 0;
10800Sstevel@tonic-gate }
10813448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock);
10820Sstevel@tonic-gate
10830Sstevel@tonic-gate mutex_destroy(&t->tbf_lock);
10840Sstevel@tonic-gate
10850Sstevel@tonic-gate bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf)));
10860Sstevel@tonic-gate
10870Sstevel@tonic-gate /* Adjust numvifs down */
10883448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
10893448Sdh155122 for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */
10903448Sdh155122 if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0)
10910Sstevel@tonic-gate break;
10923448Sdh155122 ipst->ips_numvifs = vifi;
10933448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
10940Sstevel@tonic-gate
10950Sstevel@tonic-gate bzero(vifp, sizeof (*vifp));
10960Sstevel@tonic-gate }
10970Sstevel@tonic-gate
10980Sstevel@tonic-gate static int
del_vif(vifi_t * vifip,ip_stack_t * ipst)109911042SErik.Nordmark@Sun.COM del_vif(vifi_t *vifip, ip_stack_t *ipst)
11000Sstevel@tonic-gate {
11013448Sdh155122 struct vif *vifp = ipst->ips_vifs + *vifip;
11020Sstevel@tonic-gate
11033448Sdh155122 if (*vifip >= ipst->ips_numvifs)
11040Sstevel@tonic-gate return (EINVAL);
11050Sstevel@tonic-gate
11060Sstevel@tonic-gate mutex_enter(&vifp->v_lock);
11070Sstevel@tonic-gate /*
11080Sstevel@tonic-gate * Not initialized
11090Sstevel@tonic-gate * Here we are not looking at the vif that is being initialized
11100Sstevel@tonic-gate * i.e vifp->v_marks == 0 and refcnt > 0.
11110Sstevel@tonic-gate */
11120Sstevel@tonic-gate if (vifp->v_lcl_addr.s_addr == 0 ||
11130Sstevel@tonic-gate !(vifp->v_marks & VIF_MARK_GOOD)) {
11140Sstevel@tonic-gate mutex_exit(&vifp->v_lock);
11150Sstevel@tonic-gate return (EADDRNOTAVAIL);
11160Sstevel@tonic-gate }
11170Sstevel@tonic-gate
11180Sstevel@tonic-gate /* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */
11190Sstevel@tonic-gate vifp->v_marks &= ~VIF_MARK_GOOD;
11200Sstevel@tonic-gate vifp->v_marks |= VIF_MARK_CONDEMNED;
11210Sstevel@tonic-gate
11220Sstevel@tonic-gate /* Phyint only */
11230Sstevel@tonic-gate if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
11240Sstevel@tonic-gate ipif_t *ipif = vifp->v_ipif;
112511042SErik.Nordmark@Sun.COM ilm_t *ilm = vifp->v_ilm;
112611042SErik.Nordmark@Sun.COM
112711042SErik.Nordmark@Sun.COM vifp->v_ilm = NULL;
112811042SErik.Nordmark@Sun.COM
11290Sstevel@tonic-gate ASSERT(ipif != NULL);
11300Sstevel@tonic-gate /*
11310Sstevel@tonic-gate * should be OK to drop the lock as we
11320Sstevel@tonic-gate * have marked this as CONDEMNED.
11330Sstevel@tonic-gate */
11340Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock);
113511042SErik.Nordmark@Sun.COM if (ilm != NULL) {
113611042SErik.Nordmark@Sun.COM (void) ip_delmulti(ilm);
113711042SErik.Nordmark@Sun.COM ASSERT(ipif->ipif_ill->ill_mrouter_cnt > 0);
113811042SErik.Nordmark@Sun.COM atomic_dec_32(&ipif->ipif_ill->ill_mrouter_cnt);
113911042SErik.Nordmark@Sun.COM }
11400Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock);
11410Sstevel@tonic-gate }
11420Sstevel@tonic-gate
114311042SErik.Nordmark@Sun.COM if (vifp->v_flags & VIFF_REGISTER) {
114411042SErik.Nordmark@Sun.COM mutex_enter(&ipst->ips_numvifs_mutex);
114511042SErik.Nordmark@Sun.COM ipst->ips_reg_vif_num = ALL_VIFS;
114611042SErik.Nordmark@Sun.COM mutex_exit(&ipst->ips_numvifs_mutex);
114711042SErik.Nordmark@Sun.COM }
114811042SErik.Nordmark@Sun.COM
11490Sstevel@tonic-gate /*
11500Sstevel@tonic-gate * decreases the refcnt added in add_vif.
11510Sstevel@tonic-gate */
11520Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp);
11530Sstevel@tonic-gate return (0);
11540Sstevel@tonic-gate }
11550Sstevel@tonic-gate
11560Sstevel@tonic-gate /*
11570Sstevel@tonic-gate * Add an mfc entry.
11580Sstevel@tonic-gate */
11590Sstevel@tonic-gate static int
add_mfc(struct mfcctl * mfccp,ip_stack_t * ipst)11603448Sdh155122 add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
11610Sstevel@tonic-gate {
11620Sstevel@tonic-gate struct mfc *rt;
11630Sstevel@tonic-gate struct rtdetq *rte;
11640Sstevel@tonic-gate ushort_t nstl;
11650Sstevel@tonic-gate int i;
11660Sstevel@tonic-gate struct mfcb *mfcbp;
11675240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
11680Sstevel@tonic-gate
11690Sstevel@tonic-gate /*
11700Sstevel@tonic-gate * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted
11710Sstevel@tonic-gate * did not have a real route for pkt.
11720Sstevel@tonic-gate * We want this pkt without rt installed in the mfctable to prevent
11730Sstevel@tonic-gate * multiiple tries, so go ahead and put it in mfctable, it will
11740Sstevel@tonic-gate * be discarded later in ip_mdq() because the child is NULL.
11750Sstevel@tonic-gate */
11760Sstevel@tonic-gate
11770Sstevel@tonic-gate /* Error checking, out of bounds? */
11780Sstevel@tonic-gate if (mfccp->mfcc_parent > MAXVIFS) {
11790Sstevel@tonic-gate ip0dbg(("ADD_MFC: mfcc_parent out of range %d",
11800Sstevel@tonic-gate (int)mfccp->mfcc_parent));
11810Sstevel@tonic-gate return (EINVAL);
11820Sstevel@tonic-gate }
11830Sstevel@tonic-gate
11840Sstevel@tonic-gate if ((mfccp->mfcc_parent != NO_VIF) &&
11853448Sdh155122 (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) {
11860Sstevel@tonic-gate ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n",
11870Sstevel@tonic-gate (int)mfccp->mfcc_parent));
11880Sstevel@tonic-gate return (EINVAL);
11890Sstevel@tonic-gate }
11900Sstevel@tonic-gate
11913448Sdh155122 if (is_mrouter_off(ipst)) {
11920Sstevel@tonic-gate return (EINVAL);
11930Sstevel@tonic-gate }
11940Sstevel@tonic-gate
11953448Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr,
11960Sstevel@tonic-gate mfccp->mfcc_mcastgrp.s_addr)];
11970Sstevel@tonic-gate MFCB_REFHOLD(mfcbp);
11980Sstevel@tonic-gate MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr,
11990Sstevel@tonic-gate mfccp->mfcc_mcastgrp.s_addr, rt);
12000Sstevel@tonic-gate
12010Sstevel@tonic-gate /* If an entry already exists, just update the fields */
12020Sstevel@tonic-gate if (rt) {
12033448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
12045240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
12050Sstevel@tonic-gate "add_mfc: update o %x grp %x parent %x",
12060Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr),
12070Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr),
12080Sstevel@tonic-gate mfccp->mfcc_parent);
12090Sstevel@tonic-gate }
12100Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
12110Sstevel@tonic-gate rt->mfc_parent = mfccp->mfcc_parent;
12120Sstevel@tonic-gate
12133448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
12143448Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++)
12150Sstevel@tonic-gate rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
12163448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
12170Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
12180Sstevel@tonic-gate
12190Sstevel@tonic-gate MFCB_REFRELE(mfcbp);
12200Sstevel@tonic-gate return (0);
12210Sstevel@tonic-gate }
12220Sstevel@tonic-gate
12230Sstevel@tonic-gate /*
12240Sstevel@tonic-gate * Find the entry for which the upcall was made and update.
12250Sstevel@tonic-gate */
12260Sstevel@tonic-gate for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) {
12270Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
12280Sstevel@tonic-gate if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
12290Sstevel@tonic-gate (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
12300Sstevel@tonic-gate (rt->mfc_rte != NULL) &&
12310Sstevel@tonic-gate !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
12320Sstevel@tonic-gate if (nstl++ != 0)
12330Sstevel@tonic-gate cmn_err(CE_WARN,
12340Sstevel@tonic-gate "add_mfc: %s o %x g %x p %x",
12350Sstevel@tonic-gate "multiple kernel entries",
12360Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr),
12370Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr),
12380Sstevel@tonic-gate mfccp->mfcc_parent);
12390Sstevel@tonic-gate
12403448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
12415240Snordmark (void) mi_strlog(mrouter->conn_rq, 1,
12423448Sdh155122 SL_TRACE,
12430Sstevel@tonic-gate "add_mfc: o %x g %x p %x",
12440Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr),
12450Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr),
12460Sstevel@tonic-gate mfccp->mfcc_parent);
12470Sstevel@tonic-gate }
12483448Sdh155122 fill_route(rt, mfccp, ipst);
12490Sstevel@tonic-gate
12500Sstevel@tonic-gate /*
12510Sstevel@tonic-gate * Prevent cleanup of cache entry.
12520Sstevel@tonic-gate * Timer starts in ip_mforward.
12530Sstevel@tonic-gate */
12540Sstevel@tonic-gate if (rt->mfc_timeout_id != 0) {
12550Sstevel@tonic-gate timeout_id_t id;
12560Sstevel@tonic-gate id = rt->mfc_timeout_id;
12570Sstevel@tonic-gate /*
12580Sstevel@tonic-gate * setting id to zero will avoid this
12590Sstevel@tonic-gate * entry from being cleaned up in
12600Sstevel@tonic-gate * expire_up_calls().
12610Sstevel@tonic-gate */
12620Sstevel@tonic-gate rt->mfc_timeout_id = 0;
12630Sstevel@tonic-gate /*
12640Sstevel@tonic-gate * dropping the lock is fine as we
12650Sstevel@tonic-gate * have a refhold on the bucket.
12660Sstevel@tonic-gate * so mfc cannot be freed.
12670Sstevel@tonic-gate * The timeout can fire but it will see
12680Sstevel@tonic-gate * that mfc_timeout_id == 0 and not cleanup.
12690Sstevel@tonic-gate */
12700Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
12710Sstevel@tonic-gate (void) untimeout(id);
12720Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
12730Sstevel@tonic-gate }
12740Sstevel@tonic-gate
12750Sstevel@tonic-gate /*
12760Sstevel@tonic-gate * Send all pkts that are queued waiting for the upcall.
12770Sstevel@tonic-gate * ip_mdq param tun set to 0 -
12780Sstevel@tonic-gate * the return value of ip_mdq() isn't used here,
12790Sstevel@tonic-gate * so value we send doesn't matter.
12800Sstevel@tonic-gate */
12810Sstevel@tonic-gate while (rt->mfc_rte != NULL) {
12820Sstevel@tonic-gate rte = rt->mfc_rte;
12830Sstevel@tonic-gate rt->mfc_rte = rte->rte_next;
12840Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
12850Sstevel@tonic-gate (void) ip_mdq(rte->mp, (ipha_t *)
12860Sstevel@tonic-gate rte->mp->b_rptr, rte->ill, 0, rt);
12870Sstevel@tonic-gate freemsg(rte->mp);
12880Sstevel@tonic-gate mi_free((char *)rte);
12890Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
12900Sstevel@tonic-gate }
12910Sstevel@tonic-gate }
12920Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
12930Sstevel@tonic-gate }
12940Sstevel@tonic-gate
12950Sstevel@tonic-gate
12960Sstevel@tonic-gate /*
12970Sstevel@tonic-gate * It is possible that an entry is being inserted without an upcall
12980Sstevel@tonic-gate */
12990Sstevel@tonic-gate if (nstl == 0) {
13000Sstevel@tonic-gate mutex_enter(&(mfcbp->mfcb_lock));
13013448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
13025240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
13030Sstevel@tonic-gate "add_mfc: no upcall o %x g %x p %x",
13040Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr),
13050Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr),
13060Sstevel@tonic-gate mfccp->mfcc_parent);
13070Sstevel@tonic-gate }
13083448Sdh155122 if (is_mrouter_off(ipst)) {
13090Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock);
13100Sstevel@tonic-gate MFCB_REFRELE(mfcbp);
13110Sstevel@tonic-gate return (EINVAL);
13120Sstevel@tonic-gate }
13130Sstevel@tonic-gate
13140Sstevel@tonic-gate for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) {
13150Sstevel@tonic-gate
13160Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
13170Sstevel@tonic-gate if ((rt->mfc_origin.s_addr ==
13180Sstevel@tonic-gate mfccp->mfcc_origin.s_addr) &&
13190Sstevel@tonic-gate (rt->mfc_mcastgrp.s_addr ==
13205240Snordmark mfccp->mfcc_mcastgrp.s_addr) &&
13215240Snordmark (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) {
13223448Sdh155122 fill_route(rt, mfccp, ipst);
13230Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
13240Sstevel@tonic-gate break;
13250Sstevel@tonic-gate }
13260Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
13270Sstevel@tonic-gate }
13280Sstevel@tonic-gate
13290Sstevel@tonic-gate /* No upcall, so make a new entry into mfctable */
13300Sstevel@tonic-gate if (rt == NULL) {
13310Sstevel@tonic-gate rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
13320Sstevel@tonic-gate if (rt == NULL) {
13330Sstevel@tonic-gate ip1dbg(("add_mfc: out of memory\n"));
13340Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock);
13350Sstevel@tonic-gate MFCB_REFRELE(mfcbp);
13360Sstevel@tonic-gate return (ENOBUFS);
13370Sstevel@tonic-gate }
13380Sstevel@tonic-gate
13390Sstevel@tonic-gate /* Insert new entry at head of hash chain */
13400Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
13413448Sdh155122 fill_route(rt, mfccp, ipst);
13420Sstevel@tonic-gate
13430Sstevel@tonic-gate /* Link into table */
13440Sstevel@tonic-gate rt->mfc_next = mfcbp->mfcb_mfc;
13450Sstevel@tonic-gate mfcbp->mfcb_mfc = rt;
13460Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
13470Sstevel@tonic-gate }
13480Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock);
13490Sstevel@tonic-gate }
13500Sstevel@tonic-gate
13510Sstevel@tonic-gate MFCB_REFRELE(mfcbp);
13520Sstevel@tonic-gate return (0);
13530Sstevel@tonic-gate }
13540Sstevel@tonic-gate
13550Sstevel@tonic-gate /*
13560Sstevel@tonic-gate * Fills in mfc structure from mrouted mfcctl.
13570Sstevel@tonic-gate */
13580Sstevel@tonic-gate static void
fill_route(struct mfc * rt,struct mfcctl * mfccp,ip_stack_t * ipst)13593448Sdh155122 fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst)
13600Sstevel@tonic-gate {
13610Sstevel@tonic-gate int i;
13620Sstevel@tonic-gate
13630Sstevel@tonic-gate rt->mfc_origin = mfccp->mfcc_origin;
13640Sstevel@tonic-gate rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
13650Sstevel@tonic-gate rt->mfc_parent = mfccp->mfcc_parent;
13663448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
13673448Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) {
13680Sstevel@tonic-gate rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
13690Sstevel@tonic-gate }
13703448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
13710Sstevel@tonic-gate /* Initialize pkt counters per src-grp */
13720Sstevel@tonic-gate rt->mfc_pkt_cnt = 0;
13730Sstevel@tonic-gate rt->mfc_byte_cnt = 0;
13740Sstevel@tonic-gate rt->mfc_wrong_if = 0;
13750Sstevel@tonic-gate rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0;
13760Sstevel@tonic-gate
13770Sstevel@tonic-gate }
13780Sstevel@tonic-gate
13790Sstevel@tonic-gate static void
free_queue(struct mfc * mfcp)13800Sstevel@tonic-gate free_queue(struct mfc *mfcp)
13810Sstevel@tonic-gate {
13820Sstevel@tonic-gate struct rtdetq *rte0;
13830Sstevel@tonic-gate
13840Sstevel@tonic-gate /*
13850Sstevel@tonic-gate * Drop all queued upcall packets.
13860Sstevel@tonic-gate * Free the mbuf with the pkt.
13870Sstevel@tonic-gate */
13880Sstevel@tonic-gate while ((rte0 = mfcp->mfc_rte) != NULL) {
13890Sstevel@tonic-gate mfcp->mfc_rte = rte0->rte_next;
13900Sstevel@tonic-gate freemsg(rte0->mp);
13910Sstevel@tonic-gate mi_free((char *)rte0);
13920Sstevel@tonic-gate }
13930Sstevel@tonic-gate }
13940Sstevel@tonic-gate /*
13950Sstevel@tonic-gate * go thorugh the hash bucket and free all the entries marked condemned.
13960Sstevel@tonic-gate */
13970Sstevel@tonic-gate void
release_mfc(struct mfcb * mfcbp)13980Sstevel@tonic-gate release_mfc(struct mfcb *mfcbp)
13990Sstevel@tonic-gate {
14000Sstevel@tonic-gate struct mfc *current_mfcp;
14010Sstevel@tonic-gate struct mfc *prev_mfcp;
14020Sstevel@tonic-gate
14030Sstevel@tonic-gate prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
14040Sstevel@tonic-gate
14050Sstevel@tonic-gate while (current_mfcp != NULL) {
14060Sstevel@tonic-gate if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) {
14070Sstevel@tonic-gate if (current_mfcp == mfcbp->mfcb_mfc) {
14080Sstevel@tonic-gate mfcbp->mfcb_mfc = current_mfcp->mfc_next;
14090Sstevel@tonic-gate free_queue(current_mfcp);
14100Sstevel@tonic-gate mi_free(current_mfcp);
14110Sstevel@tonic-gate prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
14120Sstevel@tonic-gate continue;
14130Sstevel@tonic-gate }
14140Sstevel@tonic-gate ASSERT(prev_mfcp != NULL);
14150Sstevel@tonic-gate prev_mfcp->mfc_next = current_mfcp->mfc_next;
14160Sstevel@tonic-gate free_queue(current_mfcp);
14170Sstevel@tonic-gate mi_free(current_mfcp);
14180Sstevel@tonic-gate current_mfcp = NULL;
14190Sstevel@tonic-gate } else {
14200Sstevel@tonic-gate prev_mfcp = current_mfcp;
14210Sstevel@tonic-gate }
14220Sstevel@tonic-gate
14230Sstevel@tonic-gate current_mfcp = prev_mfcp->mfc_next;
14240Sstevel@tonic-gate
14250Sstevel@tonic-gate }
14260Sstevel@tonic-gate mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED;
14270Sstevel@tonic-gate ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0);
14280Sstevel@tonic-gate }
14290Sstevel@tonic-gate
14300Sstevel@tonic-gate /*
14310Sstevel@tonic-gate * Delete an mfc entry.
14320Sstevel@tonic-gate */
14330Sstevel@tonic-gate static int
del_mfc(struct mfcctl * mfccp,ip_stack_t * ipst)14343448Sdh155122 del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
14350Sstevel@tonic-gate {
14360Sstevel@tonic-gate struct in_addr origin;
14370Sstevel@tonic-gate struct in_addr mcastgrp;
14385240Snordmark struct mfc *rt;
14395240Snordmark uint_t hash;
14405240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
14410Sstevel@tonic-gate
14420Sstevel@tonic-gate origin = mfccp->mfcc_origin;
14430Sstevel@tonic-gate mcastgrp = mfccp->mfcc_mcastgrp;
14440Sstevel@tonic-gate hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
14450Sstevel@tonic-gate
14463448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
14475240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
14480Sstevel@tonic-gate "del_mfc: o %x g %x",
14490Sstevel@tonic-gate ntohl(origin.s_addr),
14500Sstevel@tonic-gate ntohl(mcastgrp.s_addr));
14510Sstevel@tonic-gate }
14520Sstevel@tonic-gate
14533448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
14540Sstevel@tonic-gate
14550Sstevel@tonic-gate /* Find mfc in mfctable, finds only entries without upcalls */
14563448Sdh155122 for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) {
14570Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
14580Sstevel@tonic-gate if (origin.s_addr == rt->mfc_origin.s_addr &&
14590Sstevel@tonic-gate mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
14600Sstevel@tonic-gate rt->mfc_rte == NULL &&
14610Sstevel@tonic-gate !(rt->mfc_marks & MFCB_MARK_CONDEMNED))
14620Sstevel@tonic-gate break;
14630Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
14640Sstevel@tonic-gate }
14650Sstevel@tonic-gate
14660Sstevel@tonic-gate /*
14670Sstevel@tonic-gate * Return if there was an upcall (mfc_rte != NULL,
14680Sstevel@tonic-gate * or rt not in mfctable.
14690Sstevel@tonic-gate */
14700Sstevel@tonic-gate if (rt == NULL) {
14713448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]);
14720Sstevel@tonic-gate return (EADDRNOTAVAIL);
14730Sstevel@tonic-gate }
14740Sstevel@tonic-gate
14750Sstevel@tonic-gate
14760Sstevel@tonic-gate /*
14770Sstevel@tonic-gate * no need to hold lock as we have a reference.
14780Sstevel@tonic-gate */
14793448Sdh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
14800Sstevel@tonic-gate /* error checking */
14810Sstevel@tonic-gate if (rt->mfc_timeout_id != 0) {
14820Sstevel@tonic-gate ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null"));
14830Sstevel@tonic-gate /*
14840Sstevel@tonic-gate * Its ok to drop the lock, the struct cannot be freed
14850Sstevel@tonic-gate * since we have a ref on the hash bucket.
14860Sstevel@tonic-gate */
14870Sstevel@tonic-gate rt->mfc_timeout_id = 0;
14880Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
14890Sstevel@tonic-gate (void) untimeout(rt->mfc_timeout_id);
14900Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
14910Sstevel@tonic-gate }
14920Sstevel@tonic-gate
14930Sstevel@tonic-gate ASSERT(rt->mfc_rte == NULL);
14940Sstevel@tonic-gate
14950Sstevel@tonic-gate
14960Sstevel@tonic-gate /*
14970Sstevel@tonic-gate * Delete the entry from the cache
14980Sstevel@tonic-gate */
14990Sstevel@tonic-gate rt->mfc_marks |= MFCB_MARK_CONDEMNED;
15000Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
15010Sstevel@tonic-gate
15023448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]);
15030Sstevel@tonic-gate
15040Sstevel@tonic-gate return (0);
15050Sstevel@tonic-gate }
15060Sstevel@tonic-gate
15070Sstevel@tonic-gate #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
15080Sstevel@tonic-gate
15090Sstevel@tonic-gate /*
15100Sstevel@tonic-gate * IP multicast forwarding function. This function assumes that the packet
15110Sstevel@tonic-gate * pointed to by ipha has arrived on (or is about to be sent to) the interface
15120Sstevel@tonic-gate * pointed to by "ill", and the packet is to be relayed to other networks
15130Sstevel@tonic-gate * that have members of the packet's destination IP multicast group.
15140Sstevel@tonic-gate *
15150Sstevel@tonic-gate * The packet is returned unscathed to the caller, unless it is
15160Sstevel@tonic-gate * erroneous, in which case a -1 value tells the caller (IP)
15170Sstevel@tonic-gate * to discard it.
15180Sstevel@tonic-gate *
15190Sstevel@tonic-gate * Unlike BSD, SunOS 5.x needs to return to IP info about
15200Sstevel@tonic-gate * whether pkt came in thru a tunnel, so it can be discarded, unless
15210Sstevel@tonic-gate * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try
15220Sstevel@tonic-gate * to be delivered.
15230Sstevel@tonic-gate * Return values are 0 - pkt is okay and phyint
15240Sstevel@tonic-gate * -1 - pkt is malformed and to be tossed
15250Sstevel@tonic-gate * 1 - pkt came in on tunnel
15260Sstevel@tonic-gate */
15270Sstevel@tonic-gate int
ip_mforward(mblk_t * mp,ip_recv_attr_t * ira)152811042SErik.Nordmark@Sun.COM ip_mforward(mblk_t *mp, ip_recv_attr_t *ira)
15290Sstevel@tonic-gate {
153011042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)mp->b_rptr;
153111042SErik.Nordmark@Sun.COM ill_t *ill = ira->ira_ill;
15320Sstevel@tonic-gate struct mfc *rt;
15330Sstevel@tonic-gate ipaddr_t src, dst, tunnel_src = 0;
15340Sstevel@tonic-gate static int srctun = 0;
15350Sstevel@tonic-gate vifi_t vifi;
15360Sstevel@tonic-gate boolean_t pim_reg_packet = B_FALSE;
153711042SErik.Nordmark@Sun.COM struct mfcb *mfcbp;
15383448Sdh155122 ip_stack_t *ipst = ill->ill_ipst;
15395240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
154011042SErik.Nordmark@Sun.COM ill_t *rill = ira->ira_rill;
154111042SErik.Nordmark@Sun.COM
154211042SErik.Nordmark@Sun.COM ASSERT(ira->ira_pktlen == msgdsize(mp));
15433448Sdh155122
15443448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
15455240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
15460Sstevel@tonic-gate "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s",
15470Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
15480Sstevel@tonic-gate ill->ill_name);
15490Sstevel@tonic-gate }
15500Sstevel@tonic-gate
15510Sstevel@tonic-gate dst = ipha->ipha_dst;
155211042SErik.Nordmark@Sun.COM if (ira->ira_flags & IRAF_PIM_REGISTER)
15530Sstevel@tonic-gate pim_reg_packet = B_TRUE;
155411042SErik.Nordmark@Sun.COM else if (ira->ira_flags & IRAF_MROUTE_TUNNEL_SET)
155511042SErik.Nordmark@Sun.COM tunnel_src = ira->ira_mroute_tunnel;
15560Sstevel@tonic-gate
15570Sstevel@tonic-gate /*
15580Sstevel@tonic-gate * Don't forward a packet with time-to-live of zero or one,
15590Sstevel@tonic-gate * or a packet destined to a local-only group.
15600Sstevel@tonic-gate */
15610Sstevel@tonic-gate if (CLASSD(dst) && (ipha->ipha_ttl <= 1 ||
15625240Snordmark (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) {
15633448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
15645240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
15650Sstevel@tonic-gate "ip_mforward: not forwarded ttl %d,"
15660Sstevel@tonic-gate " dst 0x%x ill %s",
15670Sstevel@tonic-gate ipha->ipha_ttl, ntohl(dst), ill->ill_name);
15680Sstevel@tonic-gate }
15690Sstevel@tonic-gate if (tunnel_src != 0)
15700Sstevel@tonic-gate return (1);
15710Sstevel@tonic-gate else
15720Sstevel@tonic-gate return (0);
15730Sstevel@tonic-gate }
15740Sstevel@tonic-gate
15750Sstevel@tonic-gate if ((tunnel_src != 0) || pim_reg_packet) {
15760Sstevel@tonic-gate /*
15770Sstevel@tonic-gate * Packet arrived over an encapsulated tunnel or via a PIM
157811042SErik.Nordmark@Sun.COM * register message.
15790Sstevel@tonic-gate */
15803448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
15810Sstevel@tonic-gate if (tunnel_src != 0) {
15825240Snordmark (void) mi_strlog(mrouter->conn_rq, 1,
15833448Sdh155122 SL_TRACE,
15840Sstevel@tonic-gate "ip_mforward: ill %s arrived via ENCAP TUN",
15850Sstevel@tonic-gate ill->ill_name);
15860Sstevel@tonic-gate } else if (pim_reg_packet) {
15875240Snordmark (void) mi_strlog(mrouter->conn_rq, 1,
15883448Sdh155122 SL_TRACE,
15890Sstevel@tonic-gate "ip_mforward: ill %s arrived via"
15900Sstevel@tonic-gate " REGISTER VIF",
15910Sstevel@tonic-gate ill->ill_name);
15920Sstevel@tonic-gate }
15930Sstevel@tonic-gate }
15940Sstevel@tonic-gate } else if ((ipha->ipha_version_and_hdr_length & 0xf) <
15950Sstevel@tonic-gate (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 ||
15960Sstevel@tonic-gate ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) {
15970Sstevel@tonic-gate /* Packet arrived via a physical interface. */
15983448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
15995240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16000Sstevel@tonic-gate "ip_mforward: ill %s arrived via PHYINT",
16010Sstevel@tonic-gate ill->ill_name);
16020Sstevel@tonic-gate }
16030Sstevel@tonic-gate
16040Sstevel@tonic-gate } else {
16050Sstevel@tonic-gate /*
16060Sstevel@tonic-gate * Packet arrived through a SRCRT tunnel.
16070Sstevel@tonic-gate * Source-route tunnels are no longer supported.
16080Sstevel@tonic-gate * Error message printed every 1000 times.
16090Sstevel@tonic-gate */
16100Sstevel@tonic-gate if ((srctun++ % 1000) == 0) {
16110Sstevel@tonic-gate cmn_err(CE_WARN,
16120Sstevel@tonic-gate "ip_mforward: received source-routed pkt from %x",
16130Sstevel@tonic-gate ntohl(ipha->ipha_src));
16140Sstevel@tonic-gate }
16150Sstevel@tonic-gate return (-1);
16160Sstevel@tonic-gate }
16170Sstevel@tonic-gate
16183448Sdh155122 ipst->ips_mrtstat->mrts_fwd_in++;
16190Sstevel@tonic-gate src = ipha->ipha_src;
16200Sstevel@tonic-gate
16210Sstevel@tonic-gate /* Find route in cache, return NULL if not there or upcalls q'ed. */
16220Sstevel@tonic-gate
16230Sstevel@tonic-gate /*
16240Sstevel@tonic-gate * Lock the mfctable against changes made by ip_mforward.
16250Sstevel@tonic-gate * Note that only add_mfc and del_mfc can remove entries and
16260Sstevel@tonic-gate * they run with exclusive access to IP. So we do not need to
16270Sstevel@tonic-gate * guard against the rt being deleted, so release lock after reading.
16280Sstevel@tonic-gate */
16290Sstevel@tonic-gate
16303448Sdh155122 if (is_mrouter_off(ipst))
16310Sstevel@tonic-gate return (-1);
16320Sstevel@tonic-gate
16333448Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)];
16340Sstevel@tonic-gate MFCB_REFHOLD(mfcbp);
16350Sstevel@tonic-gate MFCFIND(mfcbp, src, dst, rt);
16360Sstevel@tonic-gate
16370Sstevel@tonic-gate /* Entry exists, so forward if necessary */
16380Sstevel@tonic-gate if (rt != NULL) {
16390Sstevel@tonic-gate int ret = 0;
16403448Sdh155122 ipst->ips_mrtstat->mrts_mfc_hits++;
16410Sstevel@tonic-gate if (pim_reg_packet) {
16423448Sdh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
16430Sstevel@tonic-gate ret = ip_mdq(mp, ipha,
16443448Sdh155122 ipst->ips_vifs[ipst->ips_reg_vif_num].
16453448Sdh155122 v_ipif->ipif_ill,
16463448Sdh155122 0, rt);
16470Sstevel@tonic-gate } else {
16480Sstevel@tonic-gate ret = ip_mdq(mp, ipha, ill, tunnel_src, rt);
16490Sstevel@tonic-gate }
16500Sstevel@tonic-gate
16510Sstevel@tonic-gate MFCB_REFRELE(mfcbp);
16520Sstevel@tonic-gate return (ret);
16530Sstevel@tonic-gate
16540Sstevel@tonic-gate /*
16550Sstevel@tonic-gate * Don't forward if we don't have a cache entry. Mrouted will
16560Sstevel@tonic-gate * always provide a cache entry in response to an upcall.
16570Sstevel@tonic-gate */
16580Sstevel@tonic-gate } else {
16590Sstevel@tonic-gate /*
16600Sstevel@tonic-gate * If we don't have a route for packet's origin, make a copy
16610Sstevel@tonic-gate * of the packet and send message to routing daemon.
16620Sstevel@tonic-gate */
16630Sstevel@tonic-gate struct mfc *mfc_rt = NULL;
16640Sstevel@tonic-gate mblk_t *mp0 = NULL;
16650Sstevel@tonic-gate mblk_t *mp_copy = NULL;
16660Sstevel@tonic-gate struct rtdetq *rte = NULL;
16670Sstevel@tonic-gate struct rtdetq *rte_m, *rte1, *prev_rte;
16680Sstevel@tonic-gate uint_t hash;
16690Sstevel@tonic-gate int npkts;
16700Sstevel@tonic-gate boolean_t new_mfc = B_FALSE;
16713448Sdh155122 ipst->ips_mrtstat->mrts_mfc_misses++;
16720Sstevel@tonic-gate /* BSD uses mrts_no_route++ */
16733448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
16745240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
16750Sstevel@tonic-gate "ip_mforward: no rte ill %s src %x g %x misses %d",
16760Sstevel@tonic-gate ill->ill_name, ntohl(src), ntohl(dst),
16773448Sdh155122 (int)ipst->ips_mrtstat->mrts_mfc_misses);
16780Sstevel@tonic-gate }
16790Sstevel@tonic-gate /*
16800Sstevel@tonic-gate * The order of the following code differs from the BSD code.
16810Sstevel@tonic-gate * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x
16820Sstevel@tonic-gate * code works, so SunOS 5.x wasn't changed to conform to the
16830Sstevel@tonic-gate * BSD version.
16840Sstevel@tonic-gate */
16850Sstevel@tonic-gate
16860Sstevel@tonic-gate /* Lock mfctable. */
16870Sstevel@tonic-gate hash = MFCHASH(src, dst);
16883448Sdh155122 mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock));
16890Sstevel@tonic-gate
16900Sstevel@tonic-gate /*
16910Sstevel@tonic-gate * If we are turning off mrouted return an error
16920Sstevel@tonic-gate */
16933448Sdh155122 if (is_mrouter_off(ipst)) {
16940Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock);
16950Sstevel@tonic-gate MFCB_REFRELE(mfcbp);
16960Sstevel@tonic-gate return (-1);
16970Sstevel@tonic-gate }
16980Sstevel@tonic-gate
16990Sstevel@tonic-gate /* Is there an upcall waiting for this packet? */
17003448Sdh155122 for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt;
17010Sstevel@tonic-gate mfc_rt = mfc_rt->mfc_next) {
17020Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex);
17033448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
17045240Snordmark (void) mi_strlog(mrouter->conn_rq, 1,
17053448Sdh155122 SL_TRACE,
17060Sstevel@tonic-gate "ip_mforward: MFCTAB hash %d o 0x%x"
17070Sstevel@tonic-gate " g 0x%x\n",
17080Sstevel@tonic-gate hash, ntohl(mfc_rt->mfc_origin.s_addr),
17090Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr));
17100Sstevel@tonic-gate }
17110Sstevel@tonic-gate /* There is an upcall */
17120Sstevel@tonic-gate if ((src == mfc_rt->mfc_origin.s_addr) &&
17130Sstevel@tonic-gate (dst == mfc_rt->mfc_mcastgrp.s_addr) &&
17140Sstevel@tonic-gate (mfc_rt->mfc_rte != NULL) &&
17150Sstevel@tonic-gate !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
17160Sstevel@tonic-gate break;
17170Sstevel@tonic-gate }
17180Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex);
17190Sstevel@tonic-gate }
17200Sstevel@tonic-gate /* No upcall, so make a new entry into mfctable */
17210Sstevel@tonic-gate if (mfc_rt == NULL) {
17220Sstevel@tonic-gate mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
17230Sstevel@tonic-gate if (mfc_rt == NULL) {
17243448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++;
17250Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory "
17260Sstevel@tonic-gate "for mfc, mfc_rt\n"));
17270Sstevel@tonic-gate goto error_return;
17280Sstevel@tonic-gate } else
17290Sstevel@tonic-gate new_mfc = B_TRUE;
17300Sstevel@tonic-gate /* Get resources */
17310Sstevel@tonic-gate /* TODO could copy header and dup rest */
17320Sstevel@tonic-gate mp_copy = copymsg(mp);
17330Sstevel@tonic-gate if (mp_copy == NULL) {
17343448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++;
17350Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for "
17360Sstevel@tonic-gate "mblk, mp_copy\n"));
17370Sstevel@tonic-gate goto error_return;
17380Sstevel@tonic-gate }
17390Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex);
17400Sstevel@tonic-gate }
17410Sstevel@tonic-gate /* Get resources for rte, whether first rte or not first. */
17420Sstevel@tonic-gate /* Add this packet into rtdetq */
17430Sstevel@tonic-gate rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq));
17440Sstevel@tonic-gate if (rte == NULL) {
17453448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++;
17460Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex);
17470Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for"
17480Sstevel@tonic-gate " rtdetq, rte\n"));
17490Sstevel@tonic-gate goto error_return;
17500Sstevel@tonic-gate }
17510Sstevel@tonic-gate
17520Sstevel@tonic-gate mp0 = copymsg(mp);
17530Sstevel@tonic-gate if (mp0 == NULL) {
17543448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++;
17550Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for mblk, mp0\n"));
17560Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex);
17570Sstevel@tonic-gate goto error_return;
17580Sstevel@tonic-gate }
17590Sstevel@tonic-gate rte->mp = mp0;
17600Sstevel@tonic-gate if (pim_reg_packet) {
17613448Sdh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
17623448Sdh155122 rte->ill =
17633448Sdh155122 ipst->ips_vifs[ipst->ips_reg_vif_num].
17643448Sdh155122 v_ipif->ipif_ill;
17650Sstevel@tonic-gate } else {
17660Sstevel@tonic-gate rte->ill = ill;
17670Sstevel@tonic-gate }
17680Sstevel@tonic-gate rte->rte_next = NULL;
17690Sstevel@tonic-gate
17700Sstevel@tonic-gate /*
17710Sstevel@tonic-gate * Determine if upcall q (rtdetq) has overflowed.
17720Sstevel@tonic-gate * mfc_rt->mfc_rte is null by mi_zalloc
17730Sstevel@tonic-gate * if it is the first message.
17740Sstevel@tonic-gate */
17750Sstevel@tonic-gate for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m;
17760Sstevel@tonic-gate rte_m = rte_m->rte_next)
17770Sstevel@tonic-gate npkts++;
17783448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
17795240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
17800Sstevel@tonic-gate "ip_mforward: upcalls %d\n", npkts);
17810Sstevel@tonic-gate }
17820Sstevel@tonic-gate if (npkts > MAX_UPQ) {
17833448Sdh155122 ipst->ips_mrtstat->mrts_upq_ovflw++;
17840Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex);
17850Sstevel@tonic-gate goto error_return;
17860Sstevel@tonic-gate }
17870Sstevel@tonic-gate
17880Sstevel@tonic-gate if (npkts == 0) { /* first upcall */
17890Sstevel@tonic-gate int i = 0;
17900Sstevel@tonic-gate /*
17910Sstevel@tonic-gate * Now finish installing the new mfc! Now that we have
17920Sstevel@tonic-gate * resources! Insert new entry at head of hash chain.
17930Sstevel@tonic-gate * Use src and dst which are ipaddr_t's.
17940Sstevel@tonic-gate */
17950Sstevel@tonic-gate mfc_rt->mfc_origin.s_addr = src;
17960Sstevel@tonic-gate mfc_rt->mfc_mcastgrp.s_addr = dst;
17970Sstevel@tonic-gate
17983448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
17993448Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++)
18000Sstevel@tonic-gate mfc_rt->mfc_ttls[i] = 0;
18013448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
18020Sstevel@tonic-gate mfc_rt->mfc_parent = ALL_VIFS;
18030Sstevel@tonic-gate
18040Sstevel@tonic-gate /* Link into table */
18053448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
18065240Snordmark (void) mi_strlog(mrouter->conn_rq, 1,
18073448Sdh155122 SL_TRACE,
18080Sstevel@tonic-gate "ip_mforward: NEW MFCTAB hash %d o 0x%x "
18090Sstevel@tonic-gate "g 0x%x\n", hash,
18100Sstevel@tonic-gate ntohl(mfc_rt->mfc_origin.s_addr),
18110Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr));
18120Sstevel@tonic-gate }
18133448Sdh155122 mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc;
18143448Sdh155122 ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt;
18150Sstevel@tonic-gate mfc_rt->mfc_rte = NULL;
18160Sstevel@tonic-gate }
18170Sstevel@tonic-gate
18180Sstevel@tonic-gate /* Link in the upcall */
18190Sstevel@tonic-gate /* First upcall */
18200Sstevel@tonic-gate if (mfc_rt->mfc_rte == NULL)
18210Sstevel@tonic-gate mfc_rt->mfc_rte = rte;
18220Sstevel@tonic-gate else {
18230Sstevel@tonic-gate /* not the first upcall */
18240Sstevel@tonic-gate prev_rte = mfc_rt->mfc_rte;
18250Sstevel@tonic-gate for (rte1 = mfc_rt->mfc_rte->rte_next; rte1;
18265240Snordmark prev_rte = rte1, rte1 = rte1->rte_next)
18275240Snordmark ;
18280Sstevel@tonic-gate prev_rte->rte_next = rte;
18290Sstevel@tonic-gate }
18300Sstevel@tonic-gate
18310Sstevel@tonic-gate /*
18320Sstevel@tonic-gate * No upcalls waiting, this is first one, so send a message to
18330Sstevel@tonic-gate * routing daemon to install a route into kernel table.
18340Sstevel@tonic-gate */
18350Sstevel@tonic-gate if (npkts == 0) {
18360Sstevel@tonic-gate struct igmpmsg *im;
18370Sstevel@tonic-gate /* ipha_protocol is 0, for upcall */
18380Sstevel@tonic-gate ASSERT(mp_copy != NULL);
18390Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr;
18400Sstevel@tonic-gate im->im_msgtype = IGMPMSG_NOCACHE;
18410Sstevel@tonic-gate im->im_mbz = 0;
18423448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
18430Sstevel@tonic-gate if (pim_reg_packet) {
18443448Sdh155122 im->im_vif = (uchar_t)ipst->ips_reg_vif_num;
18453448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
18460Sstevel@tonic-gate } else {
18470Sstevel@tonic-gate /*
18480Sstevel@tonic-gate * XXX do we need to hold locks here ?
18490Sstevel@tonic-gate */
18503448Sdh155122 for (vifi = 0;
18513448Sdh155122 vifi < ipst->ips_numvifs;
18523448Sdh155122 vifi++) {
18533448Sdh155122 if (ipst->ips_vifs[vifi].v_ipif == NULL)
18540Sstevel@tonic-gate continue;
18553448Sdh155122 if (ipst->ips_vifs[vifi].
18563448Sdh155122 v_ipif->ipif_ill == ill) {
18570Sstevel@tonic-gate im->im_vif = (uchar_t)vifi;
18580Sstevel@tonic-gate break;
18590Sstevel@tonic-gate }
18600Sstevel@tonic-gate }
18613448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
18623448Sdh155122 ASSERT(vifi < ipst->ips_numvifs);
18630Sstevel@tonic-gate }
18640Sstevel@tonic-gate
18653448Sdh155122 ipst->ips_mrtstat->mrts_upcalls++;
18660Sstevel@tonic-gate /* Timer to discard upcalls if mrouted is too slow */
18670Sstevel@tonic-gate mfc_rt->mfc_timeout_id = timeout(expire_upcalls,
18680Sstevel@tonic-gate mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE);
18690Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex);
18703448Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
18715240Snordmark /* Pass to RAWIP */
187211042SErik.Nordmark@Sun.COM ira->ira_ill = ira->ira_rill = NULL;
187311042SErik.Nordmark@Sun.COM (mrouter->conn_recv)(mrouter, mp_copy, NULL, ira);
187411042SErik.Nordmark@Sun.COM ira->ira_ill = ill;
187511042SErik.Nordmark@Sun.COM ira->ira_rill = rill;
18760Sstevel@tonic-gate } else {
18770Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex);
18783448Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
187911042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
188011042SErik.Nordmark@Sun.COM ip_drop_input("ip_mforward - upcall already waiting",
188111042SErik.Nordmark@Sun.COM mp_copy, ill);
18820Sstevel@tonic-gate freemsg(mp_copy);
18830Sstevel@tonic-gate }
18840Sstevel@tonic-gate
18850Sstevel@tonic-gate MFCB_REFRELE(mfcbp);
18860Sstevel@tonic-gate if (tunnel_src != 0)
18870Sstevel@tonic-gate return (1);
18880Sstevel@tonic-gate else
18890Sstevel@tonic-gate return (0);
18900Sstevel@tonic-gate error_return:
18913448Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
18920Sstevel@tonic-gate MFCB_REFRELE(mfcbp);
18930Sstevel@tonic-gate if (mfc_rt != NULL && (new_mfc == B_TRUE))
18940Sstevel@tonic-gate mi_free((char *)mfc_rt);
18950Sstevel@tonic-gate if (rte != NULL)
18960Sstevel@tonic-gate mi_free((char *)rte);
189711042SErik.Nordmark@Sun.COM if (mp_copy != NULL) {
189811042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
189911042SErik.Nordmark@Sun.COM ip_drop_input("ip_mforward error", mp_copy, ill);
19000Sstevel@tonic-gate freemsg(mp_copy);
190111042SErik.Nordmark@Sun.COM }
19020Sstevel@tonic-gate if (mp0 != NULL)
19030Sstevel@tonic-gate freemsg(mp0);
19040Sstevel@tonic-gate return (-1);
19050Sstevel@tonic-gate }
19060Sstevel@tonic-gate }
19070Sstevel@tonic-gate
19080Sstevel@tonic-gate /*
19090Sstevel@tonic-gate * Clean up the mfctable cache entry if upcall is not serviced.
19100Sstevel@tonic-gate * SunOS 5.x has timeout per mfc, unlike BSD which has one timer.
19110Sstevel@tonic-gate */
19120Sstevel@tonic-gate static void
expire_upcalls(void * arg)19130Sstevel@tonic-gate expire_upcalls(void *arg)
19140Sstevel@tonic-gate {
19150Sstevel@tonic-gate struct mfc *mfc_rt = arg;
19160Sstevel@tonic-gate uint_t hash;
19170Sstevel@tonic-gate struct mfc *prev_mfc, *mfc0;
19183448Sdh155122 ip_stack_t *ipst;
19195240Snordmark conn_t *mrouter;
19203448Sdh155122
19213448Sdh155122 if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) {
19223448Sdh155122 cmn_err(CE_WARN, "expire_upcalls: no ILL\n");
19233448Sdh155122 return;
19243448Sdh155122 }
19253448Sdh155122 ipst = mfc_rt->mfc_rte->ill->ill_ipst;
19265240Snordmark mrouter = ipst->ips_ip_g_mrouter;
19270Sstevel@tonic-gate
19280Sstevel@tonic-gate hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr);
19293448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
19305240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
19310Sstevel@tonic-gate "expire_upcalls: hash %d s %x g %x",
19320Sstevel@tonic-gate hash, ntohl(mfc_rt->mfc_origin.s_addr),
19330Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr));
19340Sstevel@tonic-gate }
19353448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
19360Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex);
19370Sstevel@tonic-gate /*
19380Sstevel@tonic-gate * if timeout has been set to zero, than the
19390Sstevel@tonic-gate * entry has been filled, no need to delete it.
19400Sstevel@tonic-gate */
19410Sstevel@tonic-gate if (mfc_rt->mfc_timeout_id == 0)
19420Sstevel@tonic-gate goto done;
19433448Sdh155122 ipst->ips_mrtstat->mrts_cache_cleanups++;
19440Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0;
19450Sstevel@tonic-gate
19460Sstevel@tonic-gate /* Determine entry to be cleaned up in cache table. */
19473448Sdh155122 for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0;
19480Sstevel@tonic-gate prev_mfc = mfc0, mfc0 = mfc0->mfc_next)
19490Sstevel@tonic-gate if (mfc0 == mfc_rt)
19500Sstevel@tonic-gate break;
19510Sstevel@tonic-gate
19520Sstevel@tonic-gate /* del_mfc takes care of gone mfcs */
19530Sstevel@tonic-gate ASSERT(prev_mfc != NULL);
19540Sstevel@tonic-gate ASSERT(mfc0 != NULL);
19550Sstevel@tonic-gate
19560Sstevel@tonic-gate /*
19570Sstevel@tonic-gate * Delete the entry from the cache
19580Sstevel@tonic-gate */
19593448Sdh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
19600Sstevel@tonic-gate mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
19610Sstevel@tonic-gate
19620Sstevel@tonic-gate /*
19630Sstevel@tonic-gate * release_mfc will drop all queued upcall packets.
19640Sstevel@tonic-gate * and will free the mbuf with the pkt, if, timing info.
19650Sstevel@tonic-gate */
19660Sstevel@tonic-gate done:
19670Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex);
19683448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]);
19690Sstevel@tonic-gate }
19700Sstevel@tonic-gate
19710Sstevel@tonic-gate /*
19720Sstevel@tonic-gate * Packet forwarding routine once entry in the cache is made.
19730Sstevel@tonic-gate */
19740Sstevel@tonic-gate static int
ip_mdq(mblk_t * mp,ipha_t * ipha,ill_t * ill,ipaddr_t tunnel_src,struct mfc * rt)19750Sstevel@tonic-gate ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src,
19760Sstevel@tonic-gate struct mfc *rt)
19770Sstevel@tonic-gate {
19780Sstevel@tonic-gate vifi_t vifi;
19790Sstevel@tonic-gate struct vif *vifp;
19800Sstevel@tonic-gate ipaddr_t dst = ipha->ipha_dst;
19810Sstevel@tonic-gate size_t plen = msgdsize(mp);
19820Sstevel@tonic-gate vifi_t num_of_vifs;
19833448Sdh155122 ip_stack_t *ipst = ill->ill_ipst;
19845240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
198511042SErik.Nordmark@Sun.COM ip_recv_attr_t iras;
19863448Sdh155122
19873448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
19885240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
19890Sstevel@tonic-gate "ip_mdq: SEND src %x, ipha_dst %x, ill %s",
19900Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
19910Sstevel@tonic-gate ill->ill_name);
19920Sstevel@tonic-gate }
19930Sstevel@tonic-gate
19940Sstevel@tonic-gate /* Macro to send packet on vif */
19950Sstevel@tonic-gate #define MC_SEND(ipha, mp, vifp, dst) { \
19960Sstevel@tonic-gate if ((vifp)->v_flags & VIFF_TUNNEL) \
19970Sstevel@tonic-gate encap_send((ipha), (mp), (vifp), (dst)); \
19980Sstevel@tonic-gate else if ((vifp)->v_flags & VIFF_REGISTER) \
19990Sstevel@tonic-gate register_send((ipha), (mp), (vifp), (dst)); \
20000Sstevel@tonic-gate else \
20010Sstevel@tonic-gate phyint_send((ipha), (mp), (vifp), (dst)); \
20020Sstevel@tonic-gate }
20030Sstevel@tonic-gate
20040Sstevel@tonic-gate vifi = rt->mfc_parent;
20050Sstevel@tonic-gate
20060Sstevel@tonic-gate /*
20070Sstevel@tonic-gate * The value of vifi is MAXVIFS if the pkt had no parent, i.e.,
20080Sstevel@tonic-gate * Mrouted had no route.
20090Sstevel@tonic-gate * We wanted the route installed in the mfctable to prevent multiple
20100Sstevel@tonic-gate * tries, so it passed add_mfc(), but is discarded here. The v_ipif is
20110Sstevel@tonic-gate * NULL so we don't want to check the ill. Still needed as of Mrouted
20120Sstevel@tonic-gate * 3.6.
20130Sstevel@tonic-gate */
20140Sstevel@tonic-gate if (vifi == NO_VIF) {
20150Sstevel@tonic-gate ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n",
20160Sstevel@tonic-gate ill->ill_name));
20173448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
20185240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
20190Sstevel@tonic-gate "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name);
20200Sstevel@tonic-gate }
20210Sstevel@tonic-gate return (-1); /* drop pkt */
20220Sstevel@tonic-gate }
20230Sstevel@tonic-gate
20243448Sdh155122 if (!lock_good_vif(&ipst->ips_vifs[vifi]))
20250Sstevel@tonic-gate return (-1);
20260Sstevel@tonic-gate /*
20270Sstevel@tonic-gate * The MFC entries are not cleaned up when an ipif goes
20280Sstevel@tonic-gate * away thus this code has to guard against an MFC referencing
20290Sstevel@tonic-gate * an ipif that has been closed. Note: reset_mrt_vif_ipif
20300Sstevel@tonic-gate * sets the v_ipif to NULL when the ipif disappears.
20310Sstevel@tonic-gate */
20323448Sdh155122 ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL);
20333448Sdh155122
20343448Sdh155122 if (vifi >= ipst->ips_numvifs) {
20350Sstevel@tonic-gate cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs "
20360Sstevel@tonic-gate "%d ill %s viftable ill %s\n",
20373448Sdh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
20383448Sdh155122 ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
20393448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]);
20400Sstevel@tonic-gate return (-1);
20410Sstevel@tonic-gate }
20420Sstevel@tonic-gate /*
20430Sstevel@tonic-gate * Don't forward if it didn't arrive from the parent vif for its
20448485SPeter.Memishian@Sun.COM * origin.
20450Sstevel@tonic-gate */
204611042SErik.Nordmark@Sun.COM if ((ipst->ips_vifs[vifi].v_ipif->ipif_ill != ill) ||
20473448Sdh155122 (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) {
20480Sstevel@tonic-gate /* Came in the wrong interface */
20490Sstevel@tonic-gate ip1dbg(("ip_mdq: arrived wrong if, vifi %d "
20500Sstevel@tonic-gate "numvifs %d ill %s viftable ill %s\n",
20513448Sdh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
205211042SErik.Nordmark@Sun.COM ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name));
20533448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
20545240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
20550Sstevel@tonic-gate "ip_mdq: arrived wrong if, vifi %d ill "
20560Sstevel@tonic-gate "%s viftable ill %s\n",
205711042SErik.Nordmark@Sun.COM (int)vifi, ill->ill_name,
205811042SErik.Nordmark@Sun.COM ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
20590Sstevel@tonic-gate }
20603448Sdh155122 ipst->ips_mrtstat->mrts_wrong_if++;
20610Sstevel@tonic-gate rt->mfc_wrong_if++;
20620Sstevel@tonic-gate
20630Sstevel@tonic-gate /*
20640Sstevel@tonic-gate * If we are doing PIM assert processing and we are forwarding
20650Sstevel@tonic-gate * packets on this interface, and it is a broadcast medium
20660Sstevel@tonic-gate * interface (and not a tunnel), send a message to the routing.
20670Sstevel@tonic-gate *
20680Sstevel@tonic-gate * We use the first ipif on the list, since it's all we have.
20690Sstevel@tonic-gate * Chances are the ipif_flags are the same for ipifs on the ill.
20700Sstevel@tonic-gate */
20713448Sdh155122 if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 &&
20720Sstevel@tonic-gate (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) &&
20733448Sdh155122 !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) {
20740Sstevel@tonic-gate mblk_t *mp_copy;
20750Sstevel@tonic-gate struct igmpmsg *im;
20760Sstevel@tonic-gate
20770Sstevel@tonic-gate /* TODO could copy header and dup rest */
20780Sstevel@tonic-gate mp_copy = copymsg(mp);
20790Sstevel@tonic-gate if (mp_copy == NULL) {
20803448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++;
20810Sstevel@tonic-gate ip1dbg(("ip_mdq: out of memory "
20820Sstevel@tonic-gate "for mblk, mp_copy\n"));
20833448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]);
20840Sstevel@tonic-gate return (-1);
20850Sstevel@tonic-gate }
20860Sstevel@tonic-gate
20870Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr;
20880Sstevel@tonic-gate im->im_msgtype = IGMPMSG_WRONGVIF;
20890Sstevel@tonic-gate im->im_mbz = 0;
20900Sstevel@tonic-gate im->im_vif = (ushort_t)vifi;
20915240Snordmark /* Pass to RAWIP */
209211042SErik.Nordmark@Sun.COM
209311042SErik.Nordmark@Sun.COM bzero(&iras, sizeof (iras));
209411042SErik.Nordmark@Sun.COM iras.ira_flags = IRAF_IS_IPV4;
209511042SErik.Nordmark@Sun.COM iras.ira_ip_hdr_length =
209611042SErik.Nordmark@Sun.COM IPH_HDR_LENGTH(mp_copy->b_rptr);
209711042SErik.Nordmark@Sun.COM iras.ira_pktlen = msgdsize(mp_copy);
209811042SErik.Nordmark@Sun.COM (mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras);
209911042SErik.Nordmark@Sun.COM ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
21000Sstevel@tonic-gate }
21013448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]);
21020Sstevel@tonic-gate if (tunnel_src != 0)
21030Sstevel@tonic-gate return (1);
21040Sstevel@tonic-gate else
21050Sstevel@tonic-gate return (0);
21060Sstevel@tonic-gate }
21070Sstevel@tonic-gate /*
21080Sstevel@tonic-gate * If I sourced this packet, it counts as output, else it was input.
21090Sstevel@tonic-gate */
21103448Sdh155122 if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) {
21113448Sdh155122 ipst->ips_vifs[vifi].v_pkt_out++;
21123448Sdh155122 ipst->ips_vifs[vifi].v_bytes_out += plen;
21130Sstevel@tonic-gate } else {
21143448Sdh155122 ipst->ips_vifs[vifi].v_pkt_in++;
21153448Sdh155122 ipst->ips_vifs[vifi].v_bytes_in += plen;
21160Sstevel@tonic-gate }
21170Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
21180Sstevel@tonic-gate rt->mfc_pkt_cnt++;
21190Sstevel@tonic-gate rt->mfc_byte_cnt += plen;
21200Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
21213448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]);
21220Sstevel@tonic-gate /*
21230Sstevel@tonic-gate * For each vif, decide if a copy of the packet should be forwarded.
21240Sstevel@tonic-gate * Forward if:
21250Sstevel@tonic-gate * - the vif threshold ttl is non-zero AND
21260Sstevel@tonic-gate * - the pkt ttl exceeds the vif's threshold
21270Sstevel@tonic-gate * A non-zero mfc_ttl indicates that the vif is part of
21280Sstevel@tonic-gate * the output set for the mfc entry.
21290Sstevel@tonic-gate */
21303448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
21313448Sdh155122 num_of_vifs = ipst->ips_numvifs;
21323448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
21333448Sdh155122 for (vifp = ipst->ips_vifs, vifi = 0;
21343448Sdh155122 vifi < num_of_vifs;
21353448Sdh155122 vifp++, vifi++) {
21360Sstevel@tonic-gate if (!lock_good_vif(vifp))
21370Sstevel@tonic-gate continue;
21380Sstevel@tonic-gate if ((rt->mfc_ttls[vifi] > 0) &&
21390Sstevel@tonic-gate (ipha->ipha_ttl > rt->mfc_ttls[vifi])) {
21400Sstevel@tonic-gate /*
21410Sstevel@tonic-gate * lock_good_vif should not have succedded if
21420Sstevel@tonic-gate * v_ipif is null.
21430Sstevel@tonic-gate */
21440Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL);
21450Sstevel@tonic-gate vifp->v_pkt_out++;
21460Sstevel@tonic-gate vifp->v_bytes_out += plen;
21470Sstevel@tonic-gate MC_SEND(ipha, mp, vifp, dst);
21483448Sdh155122 ipst->ips_mrtstat->mrts_fwd_out++;
21490Sstevel@tonic-gate }
21500Sstevel@tonic-gate unlock_good_vif(vifp);
21510Sstevel@tonic-gate }
21520Sstevel@tonic-gate if (tunnel_src != 0)
21530Sstevel@tonic-gate return (1);
21540Sstevel@tonic-gate else
21550Sstevel@tonic-gate return (0);
21560Sstevel@tonic-gate }
21570Sstevel@tonic-gate
21580Sstevel@tonic-gate /*
21590Sstevel@tonic-gate * Send the packet on physical interface.
21600Sstevel@tonic-gate * Caller assumes can continue to use mp on return.
21610Sstevel@tonic-gate */
21620Sstevel@tonic-gate /* ARGSUSED */
21630Sstevel@tonic-gate static void
phyint_send(ipha_t * ipha,mblk_t * mp,struct vif * vifp,ipaddr_t dst)21640Sstevel@tonic-gate phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
21650Sstevel@tonic-gate {
21660Sstevel@tonic-gate mblk_t *mp_copy;
21673448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
21685240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
21690Sstevel@tonic-gate
21700Sstevel@tonic-gate /* Make a new reference to the packet */
21710Sstevel@tonic-gate mp_copy = copymsg(mp); /* TODO could copy header and dup rest */
21720Sstevel@tonic-gate if (mp_copy == NULL) {
21733448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++;
21740Sstevel@tonic-gate ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n"));
21750Sstevel@tonic-gate return;
21760Sstevel@tonic-gate }
21770Sstevel@tonic-gate if (vifp->v_rate_limit <= 0)
21780Sstevel@tonic-gate tbf_send_packet(vifp, mp_copy);
21790Sstevel@tonic-gate else {
21803448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
21815240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
21820Sstevel@tonic-gate "phyint_send: tbf_contr rate %d "
21830Sstevel@tonic-gate "vifp 0x%p mp 0x%p dst 0x%x",
21840Sstevel@tonic-gate vifp->v_rate_limit, (void *)vifp, (void *)mp, dst);
21850Sstevel@tonic-gate }
21860Sstevel@tonic-gate tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr);
21870Sstevel@tonic-gate }
21880Sstevel@tonic-gate }
21890Sstevel@tonic-gate
21900Sstevel@tonic-gate /*
21910Sstevel@tonic-gate * Send the whole packet for REGISTER encapsulation to PIM daemon
21920Sstevel@tonic-gate * Caller assumes it can continue to use mp on return.
21930Sstevel@tonic-gate */
21940Sstevel@tonic-gate /* ARGSUSED */
21950Sstevel@tonic-gate static void
register_send(ipha_t * ipha,mblk_t * mp,struct vif * vifp,ipaddr_t dst)21960Sstevel@tonic-gate register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
21970Sstevel@tonic-gate {
21980Sstevel@tonic-gate struct igmpmsg *im;
21990Sstevel@tonic-gate mblk_t *mp_copy;
22000Sstevel@tonic-gate ipha_t *ipha_copy;
220111042SErik.Nordmark@Sun.COM ill_t *ill = vifp->v_ipif->ipif_ill;
220211042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ill->ill_ipst;
22035240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
220411042SErik.Nordmark@Sun.COM ip_recv_attr_t iras;
22053448Sdh155122
22063448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
22075240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22080Sstevel@tonic-gate "register_send: src %x, dst %x\n",
22090Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
22100Sstevel@tonic-gate }
22110Sstevel@tonic-gate
22120Sstevel@tonic-gate /*
22130Sstevel@tonic-gate * Copy the old packet & pullup its IP header into the new mblk_t so we
22140Sstevel@tonic-gate * can modify it. Try to fill the new mblk_t since if we don't the
22150Sstevel@tonic-gate * ethernet driver will.
22160Sstevel@tonic-gate */
22170Sstevel@tonic-gate mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED);
22180Sstevel@tonic-gate if (mp_copy == NULL) {
22193448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory;
22203448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) {
22215240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22220Sstevel@tonic-gate "register_send: allocb failure.");
22230Sstevel@tonic-gate }
22240Sstevel@tonic-gate return;
22250Sstevel@tonic-gate }
22260Sstevel@tonic-gate
22270Sstevel@tonic-gate /*
22280Sstevel@tonic-gate * Bump write pointer to account for igmpmsg being added.
22290Sstevel@tonic-gate */
22300Sstevel@tonic-gate mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg);
22310Sstevel@tonic-gate
22320Sstevel@tonic-gate /*
22330Sstevel@tonic-gate * Chain packet to new mblk_t.
22340Sstevel@tonic-gate */
22350Sstevel@tonic-gate if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
22363448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory;
22373448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) {
22385240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22390Sstevel@tonic-gate "register_send: copymsg failure.");
22400Sstevel@tonic-gate }
22410Sstevel@tonic-gate freeb(mp_copy);
22420Sstevel@tonic-gate return;
22430Sstevel@tonic-gate }
22440Sstevel@tonic-gate
22450Sstevel@tonic-gate /*
22465240Snordmark * icmp_input() asserts that IP version field is set to an
22470Sstevel@tonic-gate * appropriate version. Hence, the struct igmpmsg that this really
22480Sstevel@tonic-gate * becomes, needs to have the correct IP version field.
22490Sstevel@tonic-gate */
22500Sstevel@tonic-gate ipha_copy = (ipha_t *)mp_copy->b_rptr;
22510Sstevel@tonic-gate *ipha_copy = multicast_encap_iphdr;
22520Sstevel@tonic-gate
22530Sstevel@tonic-gate /*
22540Sstevel@tonic-gate * The kernel uses the struct igmpmsg header to encode the messages to
22550Sstevel@tonic-gate * the multicast routing daemon. Fill in the fields in the header
22560Sstevel@tonic-gate * starting with the message type which is IGMPMSG_WHOLEPKT
22570Sstevel@tonic-gate */
22580Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr;
22590Sstevel@tonic-gate im->im_msgtype = IGMPMSG_WHOLEPKT;
22600Sstevel@tonic-gate im->im_src.s_addr = ipha->ipha_src;
22610Sstevel@tonic-gate im->im_dst.s_addr = ipha->ipha_dst;
22620Sstevel@tonic-gate
22630Sstevel@tonic-gate /*
22640Sstevel@tonic-gate * Must Be Zero. This is because the struct igmpmsg is really an IP
22650Sstevel@tonic-gate * header with renamed fields and the multicast routing daemon uses
22660Sstevel@tonic-gate * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages.
22670Sstevel@tonic-gate */
22680Sstevel@tonic-gate im->im_mbz = 0;
22690Sstevel@tonic-gate
22703448Sdh155122 ++ipst->ips_mrtstat->mrts_upcalls;
227111042SErik.Nordmark@Sun.COM if (IPCL_IS_NONSTR(mrouter) ? mrouter->conn_flow_cntrld :
227211042SErik.Nordmark@Sun.COM !canputnext(mrouter->conn_rq)) {
22733448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_regsend_drops;
22743448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) {
22755240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
22760Sstevel@tonic-gate "register_send: register upcall failure.");
22770Sstevel@tonic-gate }
227811042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
227911042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_regsend_drops", mp_copy, ill);
22800Sstevel@tonic-gate freemsg(mp_copy);
22810Sstevel@tonic-gate } else {
22825240Snordmark /* Pass to RAWIP */
228311042SErik.Nordmark@Sun.COM bzero(&iras, sizeof (iras));
228411042SErik.Nordmark@Sun.COM iras.ira_flags = IRAF_IS_IPV4;
228511042SErik.Nordmark@Sun.COM iras.ira_ip_hdr_length = sizeof (ipha_t);
228611042SErik.Nordmark@Sun.COM iras.ira_pktlen = msgdsize(mp_copy);
228711042SErik.Nordmark@Sun.COM (mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras);
228811042SErik.Nordmark@Sun.COM ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
22890Sstevel@tonic-gate }
22900Sstevel@tonic-gate }
22910Sstevel@tonic-gate
22920Sstevel@tonic-gate /*
22930Sstevel@tonic-gate * pim_validate_cksum handles verification of the checksum in the
22940Sstevel@tonic-gate * pim header. For PIM Register packets, the checksum is calculated
22950Sstevel@tonic-gate * across the PIM header only. For all other packets, the checksum
22960Sstevel@tonic-gate * is for the PIM header and remainder of the packet.
22970Sstevel@tonic-gate *
22980Sstevel@tonic-gate * returns: B_TRUE, if checksum is okay.
22990Sstevel@tonic-gate * B_FALSE, if checksum is not valid.
23000Sstevel@tonic-gate */
23010Sstevel@tonic-gate static boolean_t
pim_validate_cksum(mblk_t * mp,ipha_t * ip,struct pim * pimp)23020Sstevel@tonic-gate pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp)
23030Sstevel@tonic-gate {
23040Sstevel@tonic-gate mblk_t *mp_dup;
23050Sstevel@tonic-gate
23060Sstevel@tonic-gate if ((mp_dup = dupmsg(mp)) == NULL)
23070Sstevel@tonic-gate return (B_FALSE);
23080Sstevel@tonic-gate
23090Sstevel@tonic-gate mp_dup->b_rptr += IPH_HDR_LENGTH(ip);
23100Sstevel@tonic-gate if (pimp->pim_type == PIM_REGISTER)
23110Sstevel@tonic-gate mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN;
23120Sstevel@tonic-gate if (IP_CSUM(mp_dup, 0, 0)) {
23130Sstevel@tonic-gate freemsg(mp_dup);
23140Sstevel@tonic-gate return (B_FALSE);
23150Sstevel@tonic-gate }
23160Sstevel@tonic-gate freemsg(mp_dup);
23170Sstevel@tonic-gate return (B_TRUE);
23180Sstevel@tonic-gate }
23190Sstevel@tonic-gate
23200Sstevel@tonic-gate /*
232111042SErik.Nordmark@Sun.COM * Process PIM protocol packets i.e. IP Protocol 103.
232211042SErik.Nordmark@Sun.COM * Register messages are decapsulated and sent onto multicast forwarding.
232311042SErik.Nordmark@Sun.COM *
232411042SErik.Nordmark@Sun.COM * Return NULL for a bad packet that is discarded here.
232511042SErik.Nordmark@Sun.COM * Return mp if the message is OK and should be handed to "raw" receivers.
232611042SErik.Nordmark@Sun.COM * Callers of pim_input() may need to reinitialize variables that were copied
232711042SErik.Nordmark@Sun.COM * from the mblk as this calls pullupmsg().
23280Sstevel@tonic-gate */
232911042SErik.Nordmark@Sun.COM mblk_t *
pim_input(mblk_t * mp,ip_recv_attr_t * ira)233011042SErik.Nordmark@Sun.COM pim_input(mblk_t *mp, ip_recv_attr_t *ira)
23310Sstevel@tonic-gate {
23320Sstevel@tonic-gate ipha_t *eip, *ip;
23330Sstevel@tonic-gate int iplen, pimlen, iphlen;
23340Sstevel@tonic-gate struct pim *pimp; /* pointer to a pim struct */
23350Sstevel@tonic-gate uint32_t *reghdr;
233611042SErik.Nordmark@Sun.COM ill_t *ill = ira->ira_ill;
23373448Sdh155122 ip_stack_t *ipst = ill->ill_ipst;
23385240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
23390Sstevel@tonic-gate
23400Sstevel@tonic-gate /*
23410Sstevel@tonic-gate * Pullup the msg for PIM protocol processing.
23420Sstevel@tonic-gate */
23430Sstevel@tonic-gate if (pullupmsg(mp, -1) == 0) {
23443448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory;
234511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
234611042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_nomemory", mp, ill);
23470Sstevel@tonic-gate freemsg(mp);
234811042SErik.Nordmark@Sun.COM return (NULL);
23490Sstevel@tonic-gate }
23500Sstevel@tonic-gate
23510Sstevel@tonic-gate ip = (ipha_t *)mp->b_rptr;
23520Sstevel@tonic-gate iplen = ip->ipha_length;
23530Sstevel@tonic-gate iphlen = IPH_HDR_LENGTH(ip);
23540Sstevel@tonic-gate pimlen = ntohs(iplen) - iphlen;
23550Sstevel@tonic-gate
23560Sstevel@tonic-gate /*
23570Sstevel@tonic-gate * Validate lengths
23580Sstevel@tonic-gate */
23590Sstevel@tonic-gate if (pimlen < PIM_MINLEN) {
23603448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_malformed;
23613448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
23625240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
23630Sstevel@tonic-gate "pim_input: length not at least minlen");
23640Sstevel@tonic-gate }
236511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
236611042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_malformed", mp, ill);
23670Sstevel@tonic-gate freemsg(mp);
236811042SErik.Nordmark@Sun.COM return (NULL);
23690Sstevel@tonic-gate }
23700Sstevel@tonic-gate
23710Sstevel@tonic-gate /*
23720Sstevel@tonic-gate * Point to the PIM header.
23730Sstevel@tonic-gate */
23740Sstevel@tonic-gate pimp = (struct pim *)((caddr_t)ip + iphlen);
23750Sstevel@tonic-gate
23760Sstevel@tonic-gate /*
23770Sstevel@tonic-gate * Check the version number.
23780Sstevel@tonic-gate */
23790Sstevel@tonic-gate if (pimp->pim_vers != PIM_VERSION) {
23803448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_badversion;
23813448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
23825240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
23830Sstevel@tonic-gate "pim_input: unknown version of PIM");
23840Sstevel@tonic-gate }
238511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
238611042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_badversion", mp, ill);
23870Sstevel@tonic-gate freemsg(mp);
238811042SErik.Nordmark@Sun.COM return (NULL);
23890Sstevel@tonic-gate }
23900Sstevel@tonic-gate
23910Sstevel@tonic-gate /*
23920Sstevel@tonic-gate * Validate the checksum
23930Sstevel@tonic-gate */
23940Sstevel@tonic-gate if (!pim_validate_cksum(mp, ip, pimp)) {
23953448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_rcv_badcsum;
23963448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
23975240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
23980Sstevel@tonic-gate "pim_input: invalid checksum");
23990Sstevel@tonic-gate }
240011042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
240111042SErik.Nordmark@Sun.COM ip_drop_input("pim_rcv_badcsum", mp, ill);
24020Sstevel@tonic-gate freemsg(mp);
240311042SErik.Nordmark@Sun.COM return (NULL);
24040Sstevel@tonic-gate }
24050Sstevel@tonic-gate
24060Sstevel@tonic-gate if (pimp->pim_type != PIM_REGISTER)
240711042SErik.Nordmark@Sun.COM return (mp);
24080Sstevel@tonic-gate
24090Sstevel@tonic-gate reghdr = (uint32_t *)(pimp + 1);
24100Sstevel@tonic-gate eip = (ipha_t *)(reghdr + 1);
24110Sstevel@tonic-gate
24120Sstevel@tonic-gate /*
24130Sstevel@tonic-gate * check if the inner packet is destined to mcast group
24140Sstevel@tonic-gate */
24150Sstevel@tonic-gate if (!CLASSD(eip->ipha_dst)) {
24163448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_badregisters;
24173448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
24185240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24190Sstevel@tonic-gate "pim_input: Inner pkt not mcast .. !");
24200Sstevel@tonic-gate }
242111042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
242211042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_badregisters", mp, ill);
24230Sstevel@tonic-gate freemsg(mp);
242411042SErik.Nordmark@Sun.COM return (NULL);
24250Sstevel@tonic-gate }
24263448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
24275240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24280Sstevel@tonic-gate "register from %x, to %x, len %d",
24290Sstevel@tonic-gate ntohl(eip->ipha_src),
24300Sstevel@tonic-gate ntohl(eip->ipha_dst),
24310Sstevel@tonic-gate ntohs(eip->ipha_length));
24320Sstevel@tonic-gate }
24330Sstevel@tonic-gate /*
24340Sstevel@tonic-gate * If the null register bit is not set, decapsulate
24350Sstevel@tonic-gate * the packet before forwarding it.
243611042SErik.Nordmark@Sun.COM * Avoid this in no register vif
24370Sstevel@tonic-gate */
243811042SErik.Nordmark@Sun.COM if (!(ntohl(*reghdr) & PIM_NULL_REGISTER) &&
243911042SErik.Nordmark@Sun.COM ipst->ips_reg_vif_num != ALL_VIFS) {
24400Sstevel@tonic-gate mblk_t *mp_copy;
244111042SErik.Nordmark@Sun.COM uint_t saved_pktlen;
24420Sstevel@tonic-gate
24430Sstevel@tonic-gate /* Copy the message */
24440Sstevel@tonic-gate if ((mp_copy = copymsg(mp)) == NULL) {
24453448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory;
244611042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
244711042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_nomemory", mp, ill);
24480Sstevel@tonic-gate freemsg(mp);
244911042SErik.Nordmark@Sun.COM return (NULL);
24500Sstevel@tonic-gate }
24510Sstevel@tonic-gate
24520Sstevel@tonic-gate /*
24530Sstevel@tonic-gate * Decapsulate the packet and give it to
24540Sstevel@tonic-gate * register_mforward.
24550Sstevel@tonic-gate */
245611042SErik.Nordmark@Sun.COM mp_copy->b_rptr += iphlen + sizeof (pim_t) + sizeof (*reghdr);
245711042SErik.Nordmark@Sun.COM saved_pktlen = ira->ira_pktlen;
245811042SErik.Nordmark@Sun.COM ira->ira_pktlen -= iphlen + sizeof (pim_t) + sizeof (*reghdr);
245911042SErik.Nordmark@Sun.COM if (register_mforward(mp_copy, ira) != 0) {
246011042SErik.Nordmark@Sun.COM /* register_mforward already called ip_drop_input */
24610Sstevel@tonic-gate freemsg(mp);
246211042SErik.Nordmark@Sun.COM ira->ira_pktlen = saved_pktlen;
246311042SErik.Nordmark@Sun.COM return (NULL);
24640Sstevel@tonic-gate }
246511042SErik.Nordmark@Sun.COM ira->ira_pktlen = saved_pktlen;
24660Sstevel@tonic-gate }
24670Sstevel@tonic-gate
24680Sstevel@tonic-gate /*
24690Sstevel@tonic-gate * Pass all valid PIM packets up to any process(es) listening on a raw
24700Sstevel@tonic-gate * PIM socket. For Solaris it is done right after pim_input() is
24710Sstevel@tonic-gate * called.
24720Sstevel@tonic-gate */
247311042SErik.Nordmark@Sun.COM return (mp);
24740Sstevel@tonic-gate }
24750Sstevel@tonic-gate
24760Sstevel@tonic-gate /*
24770Sstevel@tonic-gate * PIM sparse mode hook. Called by pim_input after decapsulating
24780Sstevel@tonic-gate * the packet. Loop back the packet, as if we have received it.
24790Sstevel@tonic-gate * In pim_input() we have to check if the destination is a multicast address.
24800Sstevel@tonic-gate */
24810Sstevel@tonic-gate static int
register_mforward(mblk_t * mp,ip_recv_attr_t * ira)248211042SErik.Nordmark@Sun.COM register_mforward(mblk_t *mp, ip_recv_attr_t *ira)
24830Sstevel@tonic-gate {
248411042SErik.Nordmark@Sun.COM ire_t *ire;
248511042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)mp->b_rptr;
248611042SErik.Nordmark@Sun.COM ill_t *ill = ira->ira_ill;
24873448Sdh155122 ip_stack_t *ipst = ill->ill_ipst;
24885240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
24893448Sdh155122
24903448Sdh155122 ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs);
24913448Sdh155122
24923448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) {
24935240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
24940Sstevel@tonic-gate "register_mforward: src %x, dst %x\n",
24950Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
24960Sstevel@tonic-gate }
24970Sstevel@tonic-gate /*
24980Sstevel@tonic-gate * Need to pass in to ip_mforward() the information that the
249911042SErik.Nordmark@Sun.COM * packet has arrived on the register_vif. We mark it with
250011042SErik.Nordmark@Sun.COM * the IRAF_PIM_REGISTER attribute.
250111042SErik.Nordmark@Sun.COM * pim_input verified that the (inner) destination is multicast,
250211042SErik.Nordmark@Sun.COM * hence we skip the generic code in ip_input.
25030Sstevel@tonic-gate */
250411042SErik.Nordmark@Sun.COM ira->ira_flags |= IRAF_PIM_REGISTER;
25053448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_regforwards;
250611042SErik.Nordmark@Sun.COM
250711042SErik.Nordmark@Sun.COM if (!CLASSD(ipha->ipha_dst)) {
250811042SErik.Nordmark@Sun.COM ire = ire_route_recursive_v4(ipha->ipha_dst, 0, NULL, ALL_ZONES,
250911457SErik.Nordmark@Sun.COM ira->ira_tsl, MATCH_IRE_SECATTR, IRR_ALLOCATE, 0, ipst,
251011457SErik.Nordmark@Sun.COM NULL, NULL, NULL);
251111042SErik.Nordmark@Sun.COM } else {
251211042SErik.Nordmark@Sun.COM ire = ire_multicast(ill);
251311042SErik.Nordmark@Sun.COM }
251411042SErik.Nordmark@Sun.COM ASSERT(ire != NULL);
251511042SErik.Nordmark@Sun.COM /* Normally this will return the IRE_MULTICAST */
251611042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
251711042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
251811042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim RTF_REJECT", mp, ill);
251911042SErik.Nordmark@Sun.COM freemsg(mp);
252011042SErik.Nordmark@Sun.COM ire_refrele(ire);
252111042SErik.Nordmark@Sun.COM return (-1);
252211042SErik.Nordmark@Sun.COM }
252311042SErik.Nordmark@Sun.COM ASSERT(ire->ire_type & IRE_MULTICAST);
252411042SErik.Nordmark@Sun.COM (*ire->ire_recvfn)(ire, mp, ipha, ira);
252511042SErik.Nordmark@Sun.COM ire_refrele(ire);
252611042SErik.Nordmark@Sun.COM
25270Sstevel@tonic-gate return (0);
25280Sstevel@tonic-gate }
25290Sstevel@tonic-gate
25300Sstevel@tonic-gate /*
25310Sstevel@tonic-gate * Send an encapsulated packet.
25320Sstevel@tonic-gate * Caller assumes can continue to use mp when routine returns.
25330Sstevel@tonic-gate */
25340Sstevel@tonic-gate /* ARGSUSED */
25350Sstevel@tonic-gate static void
encap_send(ipha_t * ipha,mblk_t * mp,struct vif * vifp,ipaddr_t dst)25360Sstevel@tonic-gate encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
25370Sstevel@tonic-gate {
25380Sstevel@tonic-gate mblk_t *mp_copy;
25390Sstevel@tonic-gate ipha_t *ipha_copy;
25400Sstevel@tonic-gate size_t len;
25413448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
25425240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
25433448Sdh155122
25443448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
25455240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25463448Sdh155122 "encap_send: vif %ld enter",
25473448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs));
25480Sstevel@tonic-gate }
25490Sstevel@tonic-gate len = ntohs(ipha->ipha_length);
25500Sstevel@tonic-gate
25510Sstevel@tonic-gate /*
25520Sstevel@tonic-gate * Copy the old packet & pullup it's IP header into the
25530Sstevel@tonic-gate * new mbuf so we can modify it. Try to fill the new
25540Sstevel@tonic-gate * mbuf since if we don't the ethernet driver will.
25550Sstevel@tonic-gate */
25560Sstevel@tonic-gate mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED);
25570Sstevel@tonic-gate if (mp_copy == NULL)
25580Sstevel@tonic-gate return;
25590Sstevel@tonic-gate mp_copy->b_rptr += 32;
25600Sstevel@tonic-gate mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr);
25610Sstevel@tonic-gate if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
25620Sstevel@tonic-gate freeb(mp_copy);
25630Sstevel@tonic-gate return;
25640Sstevel@tonic-gate }
25650Sstevel@tonic-gate
25660Sstevel@tonic-gate /*
25670Sstevel@tonic-gate * Fill in the encapsulating IP header.
25680Sstevel@tonic-gate * Remote tunnel dst in rmt_addr, from add_vif().
25690Sstevel@tonic-gate */
25700Sstevel@tonic-gate ipha_copy = (ipha_t *)mp_copy->b_rptr;
25710Sstevel@tonic-gate *ipha_copy = multicast_encap_iphdr;
25720Sstevel@tonic-gate ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET);
25730Sstevel@tonic-gate ipha_copy->ipha_length = htons(len + sizeof (ipha_t));
25740Sstevel@tonic-gate ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr;
25750Sstevel@tonic-gate ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr;
25760Sstevel@tonic-gate ASSERT(ipha_copy->ipha_ident == 0);
25770Sstevel@tonic-gate
25780Sstevel@tonic-gate /* Turn the encapsulated IP header back into a valid one. */
25790Sstevel@tonic-gate ipha = (ipha_t *)mp_copy->b_cont->b_rptr;
25800Sstevel@tonic-gate ipha->ipha_ttl--;
25810Sstevel@tonic-gate ipha->ipha_hdr_checksum = 0;
25820Sstevel@tonic-gate ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
25830Sstevel@tonic-gate
258411042SErik.Nordmark@Sun.COM ipha_copy->ipha_ttl = ipha->ipha_ttl;
258511042SErik.Nordmark@Sun.COM
25863448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
25875240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
25880Sstevel@tonic-gate "encap_send: group 0x%x", ntohl(ipha->ipha_dst));
25890Sstevel@tonic-gate }
25900Sstevel@tonic-gate if (vifp->v_rate_limit <= 0)
25910Sstevel@tonic-gate tbf_send_packet(vifp, mp_copy);
25920Sstevel@tonic-gate else
25930Sstevel@tonic-gate /* ipha is from the original header */
25940Sstevel@tonic-gate tbf_control(vifp, mp_copy, ipha);
25950Sstevel@tonic-gate }
25960Sstevel@tonic-gate
25970Sstevel@tonic-gate /*
259811042SErik.Nordmark@Sun.COM * De-encapsulate a packet and feed it back through IP input if it
259911042SErik.Nordmark@Sun.COM * matches one of our multicast tunnels.
260011042SErik.Nordmark@Sun.COM *
26010Sstevel@tonic-gate * This routine is called whenever IP gets a packet with prototype
260211042SErik.Nordmark@Sun.COM * IPPROTO_ENCAP and a local destination address and the packet didn't
260311042SErik.Nordmark@Sun.COM * match one of our configured IP-in-IP tunnels.
26040Sstevel@tonic-gate */
26050Sstevel@tonic-gate void
ip_mroute_decap(mblk_t * mp,ip_recv_attr_t * ira)260611042SErik.Nordmark@Sun.COM ip_mroute_decap(mblk_t *mp, ip_recv_attr_t *ira)
26070Sstevel@tonic-gate {
26080Sstevel@tonic-gate ipha_t *ipha = (ipha_t *)mp->b_rptr;
26090Sstevel@tonic-gate ipha_t *ipha_encap;
26100Sstevel@tonic-gate int hlen = IPH_HDR_LENGTH(ipha);
261111042SErik.Nordmark@Sun.COM int hlen_encap;
26120Sstevel@tonic-gate ipaddr_t src;
26130Sstevel@tonic-gate struct vif *vifp;
261411042SErik.Nordmark@Sun.COM ire_t *ire;
261511042SErik.Nordmark@Sun.COM ill_t *ill = ira->ira_ill;
26163448Sdh155122 ip_stack_t *ipst = ill->ill_ipst;
26175240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
26180Sstevel@tonic-gate
261911042SErik.Nordmark@Sun.COM /* Make sure we have all of the inner header */
262011042SErik.Nordmark@Sun.COM ipha_encap = (ipha_t *)((char *)ipha + hlen);
262111042SErik.Nordmark@Sun.COM if (mp->b_wptr - mp->b_rptr < hlen + IP_SIMPLE_HDR_LENGTH) {
262211042SErik.Nordmark@Sun.COM ipha = ip_pullup(mp, hlen + IP_SIMPLE_HDR_LENGTH, ira);
262311042SErik.Nordmark@Sun.COM if (ipha == NULL) {
262411042SErik.Nordmark@Sun.COM ipst->ips_mrtstat->mrts_bad_tunnel++;
262511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
262611042SErik.Nordmark@Sun.COM ip_drop_input("ip_mroute_decap: too short", mp, ill);
262711042SErik.Nordmark@Sun.COM freemsg(mp);
262811042SErik.Nordmark@Sun.COM return;
262911042SErik.Nordmark@Sun.COM }
263011042SErik.Nordmark@Sun.COM ipha_encap = (ipha_t *)((char *)ipha + hlen);
263111042SErik.Nordmark@Sun.COM }
263211042SErik.Nordmark@Sun.COM hlen_encap = IPH_HDR_LENGTH(ipha_encap);
263311042SErik.Nordmark@Sun.COM if (mp->b_wptr - mp->b_rptr < hlen + hlen_encap) {
263411042SErik.Nordmark@Sun.COM ipha = ip_pullup(mp, hlen + hlen_encap, ira);
263511042SErik.Nordmark@Sun.COM if (ipha == NULL) {
263611042SErik.Nordmark@Sun.COM ipst->ips_mrtstat->mrts_bad_tunnel++;
263711042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
263811042SErik.Nordmark@Sun.COM ip_drop_input("ip_mroute_decap: too short", mp, ill);
263911042SErik.Nordmark@Sun.COM freemsg(mp);
264011042SErik.Nordmark@Sun.COM return;
264111042SErik.Nordmark@Sun.COM }
264211042SErik.Nordmark@Sun.COM ipha_encap = (ipha_t *)((char *)ipha + hlen);
264311042SErik.Nordmark@Sun.COM }
264411042SErik.Nordmark@Sun.COM
26450Sstevel@tonic-gate /*
26460Sstevel@tonic-gate * Dump the packet if it's not to a multicast destination or if
26470Sstevel@tonic-gate * we don't have an encapsulating tunnel with the source.
26480Sstevel@tonic-gate * Note: This code assumes that the remote site IP address
26490Sstevel@tonic-gate * uniquely identifies the tunnel (i.e., that this site has
26500Sstevel@tonic-gate * at most one tunnel with the remote site).
26510Sstevel@tonic-gate */
26520Sstevel@tonic-gate if (!CLASSD(ipha_encap->ipha_dst)) {
26533448Sdh155122 ipst->ips_mrtstat->mrts_bad_tunnel++;
26540Sstevel@tonic-gate ip1dbg(("ip_mroute_decap: bad tunnel\n"));
265511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
265611042SErik.Nordmark@Sun.COM ip_drop_input("mrts_bad_tunnel", mp, ill);
26570Sstevel@tonic-gate freemsg(mp);
26580Sstevel@tonic-gate return;
26590Sstevel@tonic-gate }
26600Sstevel@tonic-gate src = (ipaddr_t)ipha->ipha_src;
26613448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock);
26623448Sdh155122 if (src != ipst->ips_last_encap_src) {
26630Sstevel@tonic-gate struct vif *vife;
26640Sstevel@tonic-gate
26653448Sdh155122 vifp = ipst->ips_vifs;
26663448Sdh155122 vife = vifp + ipst->ips_numvifs;
26673448Sdh155122 ipst->ips_last_encap_src = src;
26683448Sdh155122 ipst->ips_last_encap_vif = 0;
26690Sstevel@tonic-gate for (; vifp < vife; ++vifp) {
26700Sstevel@tonic-gate if (!lock_good_vif(vifp))
26710Sstevel@tonic-gate continue;
26720Sstevel@tonic-gate if (vifp->v_rmt_addr.s_addr == src) {
26730Sstevel@tonic-gate if (vifp->v_flags & VIFF_TUNNEL)
26743448Sdh155122 ipst->ips_last_encap_vif = vifp;
26753448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
26765240Snordmark (void) mi_strlog(mrouter->conn_rq,
26770Sstevel@tonic-gate 1, SL_TRACE,
26780Sstevel@tonic-gate "ip_mroute_decap: good tun "
26790Sstevel@tonic-gate "vif %ld with %x",
26803448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs),
26810Sstevel@tonic-gate ntohl(src));
26820Sstevel@tonic-gate }
26830Sstevel@tonic-gate unlock_good_vif(vifp);
26840Sstevel@tonic-gate break;
26850Sstevel@tonic-gate }
26860Sstevel@tonic-gate unlock_good_vif(vifp);
26870Sstevel@tonic-gate }
26880Sstevel@tonic-gate }
26893448Sdh155122 if ((vifp = ipst->ips_last_encap_vif) == 0) {
26903448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock);
26913448Sdh155122 ipst->ips_mrtstat->mrts_bad_tunnel++;
269211042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
269311042SErik.Nordmark@Sun.COM ip_drop_input("mrts_bad_tunnel", mp, ill);
26940Sstevel@tonic-gate freemsg(mp);
26950Sstevel@tonic-gate ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n",
26963448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src)));
26970Sstevel@tonic-gate return;
26980Sstevel@tonic-gate }
26993448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock);
27000Sstevel@tonic-gate
27010Sstevel@tonic-gate /*
27020Sstevel@tonic-gate * Need to pass in the tunnel source to ip_mforward (so that it can
270311042SErik.Nordmark@Sun.COM * verify that the packet arrived over the correct vif.)
270411042SErik.Nordmark@Sun.COM */
270511042SErik.Nordmark@Sun.COM ira->ira_flags |= IRAF_MROUTE_TUNNEL_SET;
270611042SErik.Nordmark@Sun.COM ira->ira_mroute_tunnel = src;
270711042SErik.Nordmark@Sun.COM mp->b_rptr += hlen;
270811042SErik.Nordmark@Sun.COM ira->ira_pktlen -= hlen;
270911042SErik.Nordmark@Sun.COM ira->ira_ip_hdr_length = hlen_encap;
271011042SErik.Nordmark@Sun.COM
271111042SErik.Nordmark@Sun.COM /*
271211042SErik.Nordmark@Sun.COM * We don't redo any of the filtering in ill_input_full_v4 and we
271311042SErik.Nordmark@Sun.COM * have checked that all of ipha_encap and any IP options are
271411042SErik.Nordmark@Sun.COM * pulled up. Hence we call ire_recv_multicast_v4 directly.
271511042SErik.Nordmark@Sun.COM * However, we have to check for RSVP as in ip_input_full_v4
271611042SErik.Nordmark@Sun.COM * and if so we pass it to ire_recv_broadcast_v4 for local delivery
271711042SErik.Nordmark@Sun.COM * to the rsvpd.
27180Sstevel@tonic-gate */
271911042SErik.Nordmark@Sun.COM if (ipha_encap->ipha_protocol == IPPROTO_RSVP &&
272011042SErik.Nordmark@Sun.COM ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) {
272111042SErik.Nordmark@Sun.COM ire = ire_route_recursive_v4(INADDR_BROADCAST, 0, ill,
272211042SErik.Nordmark@Sun.COM ALL_ZONES, ira->ira_tsl, MATCH_IRE_ILL|MATCH_IRE_SECATTR,
272311457SErik.Nordmark@Sun.COM IRR_ALLOCATE, 0, ipst, NULL, NULL, NULL);
272411042SErik.Nordmark@Sun.COM } else {
272511042SErik.Nordmark@Sun.COM ire = ire_multicast(ill);
272611042SErik.Nordmark@Sun.COM }
272711042SErik.Nordmark@Sun.COM ASSERT(ire != NULL);
272811042SErik.Nordmark@Sun.COM /* Normally this will return the IRE_MULTICAST or IRE_BROADCAST */
272911042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
273011042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
273111042SErik.Nordmark@Sun.COM ip_drop_input("ip_mroute_decap: RTF_REJECT", mp, ill);
273211042SErik.Nordmark@Sun.COM freemsg(mp);
273311042SErik.Nordmark@Sun.COM ire_refrele(ire);
273411042SErik.Nordmark@Sun.COM return;
273511042SErik.Nordmark@Sun.COM }
273611042SErik.Nordmark@Sun.COM ire->ire_ib_pkt_count++;
273711042SErik.Nordmark@Sun.COM ASSERT(ire->ire_type & (IRE_MULTICAST|IRE_BROADCAST));
273811042SErik.Nordmark@Sun.COM (*ire->ire_recvfn)(ire, mp, ipha_encap, ira);
273911042SErik.Nordmark@Sun.COM ire_refrele(ire);
27400Sstevel@tonic-gate }
27410Sstevel@tonic-gate
27420Sstevel@tonic-gate /*
27430Sstevel@tonic-gate * Remove all records with v_ipif == ipif. Called when an interface goes away
27440Sstevel@tonic-gate * (stream closed). Called as writer.
27450Sstevel@tonic-gate */
27460Sstevel@tonic-gate void
reset_mrt_vif_ipif(ipif_t * ipif)27470Sstevel@tonic-gate reset_mrt_vif_ipif(ipif_t *ipif)
27480Sstevel@tonic-gate {
27490Sstevel@tonic-gate vifi_t vifi, tmp_vifi;
27500Sstevel@tonic-gate vifi_t num_of_vifs;
27513448Sdh155122 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
27520Sstevel@tonic-gate
27530Sstevel@tonic-gate /* Can't check vifi >= 0 since vifi_t is unsigned! */
27540Sstevel@tonic-gate
27553448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
27563448Sdh155122 num_of_vifs = ipst->ips_numvifs;
27573448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
27580Sstevel@tonic-gate
27590Sstevel@tonic-gate for (vifi = num_of_vifs; vifi != 0; vifi--) {
27600Sstevel@tonic-gate tmp_vifi = vifi - 1;
27613448Sdh155122 if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) {
276211042SErik.Nordmark@Sun.COM (void) del_vif(&tmp_vifi, ipst);
27630Sstevel@tonic-gate }
27640Sstevel@tonic-gate }
27650Sstevel@tonic-gate }
27660Sstevel@tonic-gate
27670Sstevel@tonic-gate /* Remove pending upcall msgs when ill goes away. Called by ill_delete. */
27680Sstevel@tonic-gate void
reset_mrt_ill(ill_t * ill)27690Sstevel@tonic-gate reset_mrt_ill(ill_t *ill)
27700Sstevel@tonic-gate {
277111042SErik.Nordmark@Sun.COM struct mfc *rt;
27720Sstevel@tonic-gate struct rtdetq *rte;
277311042SErik.Nordmark@Sun.COM int i;
27743448Sdh155122 ip_stack_t *ipst = ill->ill_ipst;
27755240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
277611042SErik.Nordmark@Sun.COM timeout_id_t id;
27770Sstevel@tonic-gate
27780Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) {
27793448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[i]);
27803448Sdh155122 if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) {
27813448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
27825240Snordmark (void) mi_strlog(mrouter->conn_rq, 1,
27833448Sdh155122 SL_TRACE,
27840Sstevel@tonic-gate "reset_mrt_ill: mfctable [%d]", i);
27850Sstevel@tonic-gate }
27860Sstevel@tonic-gate while (rt != NULL) {
27870Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
27880Sstevel@tonic-gate while ((rte = rt->mfc_rte) != NULL) {
278911042SErik.Nordmark@Sun.COM if (rte->ill == ill &&
279011042SErik.Nordmark@Sun.COM (id = rt->mfc_timeout_id) != 0) {
279111042SErik.Nordmark@Sun.COM /*
279211042SErik.Nordmark@Sun.COM * Its ok to drop the lock, the
279311042SErik.Nordmark@Sun.COM * struct cannot be freed since
279411042SErik.Nordmark@Sun.COM * we have a ref on the hash
279511042SErik.Nordmark@Sun.COM * bucket.
279611042SErik.Nordmark@Sun.COM */
279711042SErik.Nordmark@Sun.COM mutex_exit(&rt->mfc_mutex);
279811042SErik.Nordmark@Sun.COM (void) untimeout(id);
279911042SErik.Nordmark@Sun.COM mutex_enter(&rt->mfc_mutex);
280011042SErik.Nordmark@Sun.COM }
28010Sstevel@tonic-gate if (rte->ill == ill) {
28023448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
28033448Sdh155122 (void) mi_strlog(
28045240Snordmark mrouter->conn_rq,
28053448Sdh155122 1, SL_TRACE,
28063448Sdh155122 "reset_mrt_ill: "
28077240Srh87107 "ill 0x%p", (void *)ill);
28080Sstevel@tonic-gate }
28090Sstevel@tonic-gate rt->mfc_rte = rte->rte_next;
28100Sstevel@tonic-gate freemsg(rte->mp);
28110Sstevel@tonic-gate mi_free((char *)rte);
28120Sstevel@tonic-gate }
28130Sstevel@tonic-gate }
28140Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
28150Sstevel@tonic-gate rt = rt->mfc_next;
28160Sstevel@tonic-gate }
28170Sstevel@tonic-gate }
28183448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]);
28190Sstevel@tonic-gate }
28200Sstevel@tonic-gate }
28210Sstevel@tonic-gate
28220Sstevel@tonic-gate /*
28230Sstevel@tonic-gate * Token bucket filter module.
28240Sstevel@tonic-gate * The ipha is for mcastgrp destination for phyint and encap.
28250Sstevel@tonic-gate */
28260Sstevel@tonic-gate static void
tbf_control(struct vif * vifp,mblk_t * mp,ipha_t * ipha)28270Sstevel@tonic-gate tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha)
28280Sstevel@tonic-gate {
28290Sstevel@tonic-gate size_t p_len = msgdsize(mp);
28300Sstevel@tonic-gate struct tbf *t = vifp->v_tbf;
28310Sstevel@tonic-gate timeout_id_t id = 0;
283211042SErik.Nordmark@Sun.COM ill_t *ill = vifp->v_ipif->ipif_ill;
283311042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ill->ill_ipst;
28345240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
28350Sstevel@tonic-gate
28360Sstevel@tonic-gate /* Drop if packet is too large */
28370Sstevel@tonic-gate if (p_len > MAX_BKT_SIZE) {
28383448Sdh155122 ipst->ips_mrtstat->mrts_pkt2large++;
283911042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
284011042SErik.Nordmark@Sun.COM ip_drop_output("tbf_control - too large", mp, ill);
28410Sstevel@tonic-gate freemsg(mp);
28420Sstevel@tonic-gate return;
28430Sstevel@tonic-gate }
28443448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
28455240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
28460Sstevel@tonic-gate "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x",
28473448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len,
28480Sstevel@tonic-gate ntohl(ipha->ipha_dst));
28490Sstevel@tonic-gate }
28500Sstevel@tonic-gate
28510Sstevel@tonic-gate mutex_enter(&t->tbf_lock);
28520Sstevel@tonic-gate
28530Sstevel@tonic-gate tbf_update_tokens(vifp);
28540Sstevel@tonic-gate
28550Sstevel@tonic-gate /*
28560Sstevel@tonic-gate * If there are enough tokens,
28570Sstevel@tonic-gate * and the queue is empty, send this packet out.
28580Sstevel@tonic-gate */
28593448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
28605240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
28610Sstevel@tonic-gate "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d",
28623448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len,
28630Sstevel@tonic-gate t->tbf_q_len);
28640Sstevel@tonic-gate }
28650Sstevel@tonic-gate /* No packets are queued */
28660Sstevel@tonic-gate if (t->tbf_q_len == 0) {
28670Sstevel@tonic-gate /* queue empty, send packet if enough tokens */
28680Sstevel@tonic-gate if (p_len <= t->tbf_n_tok) {
28690Sstevel@tonic-gate t->tbf_n_tok -= p_len;
28700Sstevel@tonic-gate mutex_exit(&t->tbf_lock);
28710Sstevel@tonic-gate tbf_send_packet(vifp, mp);
28720Sstevel@tonic-gate return;
28730Sstevel@tonic-gate } else {
28740Sstevel@tonic-gate /* Queue packet and timeout till later */
28750Sstevel@tonic-gate tbf_queue(vifp, mp);
28760Sstevel@tonic-gate ASSERT(vifp->v_timeout_id == 0);
28770Sstevel@tonic-gate vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
28780Sstevel@tonic-gate TBF_REPROCESS);
28790Sstevel@tonic-gate }
28800Sstevel@tonic-gate } else if (t->tbf_q_len < t->tbf_max_q_len) {
28810Sstevel@tonic-gate /* Finite queue length, so queue pkts and process queue */
28820Sstevel@tonic-gate tbf_queue(vifp, mp);
28830Sstevel@tonic-gate tbf_process_q(vifp);
28840Sstevel@tonic-gate } else {
28850Sstevel@tonic-gate /* Check that we have UDP header with IP header */
28860Sstevel@tonic-gate size_t hdr_length = IPH_HDR_LENGTH(ipha) +
28875240Snordmark sizeof (struct udphdr);
28880Sstevel@tonic-gate
28890Sstevel@tonic-gate if ((mp->b_wptr - mp->b_rptr) < hdr_length) {
28900Sstevel@tonic-gate if (!pullupmsg(mp, hdr_length)) {
289111042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib,
289211042SErik.Nordmark@Sun.COM ipIfStatsOutDiscards);
289311042SErik.Nordmark@Sun.COM ip_drop_output("tbf_control - pullup", mp, ill);
28940Sstevel@tonic-gate freemsg(mp);
28950Sstevel@tonic-gate ip1dbg(("tbf_ctl: couldn't pullup udp hdr, "
28960Sstevel@tonic-gate "vif %ld src 0x%x dst 0x%x\n",
28973448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs),
28980Sstevel@tonic-gate ntohl(ipha->ipha_src),
28990Sstevel@tonic-gate ntohl(ipha->ipha_dst)));
29000Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock);
29010Sstevel@tonic-gate return;
29020Sstevel@tonic-gate } else
29030Sstevel@tonic-gate /* Have to reassign ipha after pullupmsg */
29040Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr;
29050Sstevel@tonic-gate }
29060Sstevel@tonic-gate /*
29070Sstevel@tonic-gate * Queue length too much,
29080Sstevel@tonic-gate * try to selectively dq, or queue and process
29090Sstevel@tonic-gate */
29100Sstevel@tonic-gate if (!tbf_dq_sel(vifp, ipha)) {
29113448Sdh155122 ipst->ips_mrtstat->mrts_q_overflow++;
291211042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
291311042SErik.Nordmark@Sun.COM ip_drop_output("mrts_q_overflow", mp, ill);
29140Sstevel@tonic-gate freemsg(mp);
29150Sstevel@tonic-gate } else {
29160Sstevel@tonic-gate tbf_queue(vifp, mp);
29170Sstevel@tonic-gate tbf_process_q(vifp);
29180Sstevel@tonic-gate }
29190Sstevel@tonic-gate }
29200Sstevel@tonic-gate if (t->tbf_q_len == 0) {
29210Sstevel@tonic-gate id = vifp->v_timeout_id;
29220Sstevel@tonic-gate vifp->v_timeout_id = 0;
29230Sstevel@tonic-gate }
29240Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock);
29250Sstevel@tonic-gate if (id != 0)
29260Sstevel@tonic-gate (void) untimeout(id);
29270Sstevel@tonic-gate }
29280Sstevel@tonic-gate
29290Sstevel@tonic-gate /*
29300Sstevel@tonic-gate * Adds a packet to the tbf queue at the interface.
29310Sstevel@tonic-gate * The ipha is for mcastgrp destination for phyint and encap.
29320Sstevel@tonic-gate */
29330Sstevel@tonic-gate static void
tbf_queue(struct vif * vifp,mblk_t * mp)29340Sstevel@tonic-gate tbf_queue(struct vif *vifp, mblk_t *mp)
29350Sstevel@tonic-gate {
29360Sstevel@tonic-gate struct tbf *t = vifp->v_tbf;
29373448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
29385240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
29393448Sdh155122
29403448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
29415240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
29423448Sdh155122 "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs));
29430Sstevel@tonic-gate }
29440Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock));
29450Sstevel@tonic-gate
29460Sstevel@tonic-gate if (t->tbf_t == NULL) {
29470Sstevel@tonic-gate /* Queue was empty */
29480Sstevel@tonic-gate t->tbf_q = mp;
29490Sstevel@tonic-gate } else {
29500Sstevel@tonic-gate /* Insert at tail */
29510Sstevel@tonic-gate t->tbf_t->b_next = mp;
29520Sstevel@tonic-gate }
29530Sstevel@tonic-gate /* set new tail pointer */
29540Sstevel@tonic-gate t->tbf_t = mp;
29550Sstevel@tonic-gate
29560Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL;
29570Sstevel@tonic-gate
29580Sstevel@tonic-gate t->tbf_q_len++;
29590Sstevel@tonic-gate }
29600Sstevel@tonic-gate
29610Sstevel@tonic-gate /*
29620Sstevel@tonic-gate * Process the queue at the vif interface.
29630Sstevel@tonic-gate * Drops the tbf_lock when sending packets.
29640Sstevel@tonic-gate *
29650Sstevel@tonic-gate * NOTE : The caller should quntimeout if the queue length is 0.
29660Sstevel@tonic-gate */
29670Sstevel@tonic-gate static void
tbf_process_q(struct vif * vifp)29680Sstevel@tonic-gate tbf_process_q(struct vif *vifp)
29690Sstevel@tonic-gate {
29700Sstevel@tonic-gate mblk_t *mp;
29710Sstevel@tonic-gate struct tbf *t = vifp->v_tbf;
29720Sstevel@tonic-gate size_t len;
29733448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
29745240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
29753448Sdh155122
29763448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
29775240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
29780Sstevel@tonic-gate "tbf_process_q 1: vif %ld qlen = %d",
29793448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len);
29800Sstevel@tonic-gate }
29810Sstevel@tonic-gate
29820Sstevel@tonic-gate /*
29830Sstevel@tonic-gate * Loop through the queue at the interface and send
29840Sstevel@tonic-gate * as many packets as possible.
29850Sstevel@tonic-gate */
29860Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock));
29870Sstevel@tonic-gate
29880Sstevel@tonic-gate while (t->tbf_q_len > 0) {
29890Sstevel@tonic-gate mp = t->tbf_q;
29900Sstevel@tonic-gate len = (size_t)msgdsize(mp); /* length of ip pkt */
29910Sstevel@tonic-gate
29920Sstevel@tonic-gate /* Determine if the packet can be sent */
29930Sstevel@tonic-gate if (len <= t->tbf_n_tok) {
29940Sstevel@tonic-gate /*
29950Sstevel@tonic-gate * If so, reduce no. of tokens, dequeue the packet,
29960Sstevel@tonic-gate * send the packet.
29970Sstevel@tonic-gate */
29980Sstevel@tonic-gate t->tbf_n_tok -= len;
29990Sstevel@tonic-gate
30000Sstevel@tonic-gate t->tbf_q = mp->b_next;
30010Sstevel@tonic-gate if (--t->tbf_q_len == 0) {
30020Sstevel@tonic-gate t->tbf_t = NULL;
30030Sstevel@tonic-gate }
30040Sstevel@tonic-gate mp->b_next = NULL;
30050Sstevel@tonic-gate /* Exit mutex before sending packet, then re-enter */
30060Sstevel@tonic-gate mutex_exit(&t->tbf_lock);
30070Sstevel@tonic-gate tbf_send_packet(vifp, mp);
30080Sstevel@tonic-gate mutex_enter(&t->tbf_lock);
30090Sstevel@tonic-gate } else
30100Sstevel@tonic-gate break;
30110Sstevel@tonic-gate }
30120Sstevel@tonic-gate }
30130Sstevel@tonic-gate
30140Sstevel@tonic-gate /* Called at tbf timeout to update tokens, process q and reset timer. */
30150Sstevel@tonic-gate static void
tbf_reprocess_q(void * arg)30160Sstevel@tonic-gate tbf_reprocess_q(void *arg)
30170Sstevel@tonic-gate {
30180Sstevel@tonic-gate struct vif *vifp = arg;
30193448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
30205240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
30210Sstevel@tonic-gate
30220Sstevel@tonic-gate mutex_enter(&vifp->v_tbf->tbf_lock);
30230Sstevel@tonic-gate vifp->v_timeout_id = 0;
30240Sstevel@tonic-gate tbf_update_tokens(vifp);
30250Sstevel@tonic-gate
30260Sstevel@tonic-gate tbf_process_q(vifp);
30270Sstevel@tonic-gate
30280Sstevel@tonic-gate if (vifp->v_tbf->tbf_q_len > 0) {
30290Sstevel@tonic-gate vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
30300Sstevel@tonic-gate TBF_REPROCESS);
30310Sstevel@tonic-gate }
30320Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock);
30330Sstevel@tonic-gate
30343448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
30355240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
30360Sstevel@tonic-gate "tbf_reprcess_q: vif %ld timeout id = %p",
30373448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id);
30380Sstevel@tonic-gate }
30390Sstevel@tonic-gate }
30400Sstevel@tonic-gate
30410Sstevel@tonic-gate /*
30420Sstevel@tonic-gate * Function that will selectively discard a member of the tbf queue,
30430Sstevel@tonic-gate * based on the precedence value and the priority.
30440Sstevel@tonic-gate *
30450Sstevel@tonic-gate * NOTE : The caller should quntimeout if the queue length is 0.
30460Sstevel@tonic-gate */
30470Sstevel@tonic-gate static int
tbf_dq_sel(struct vif * vifp,ipha_t * ipha)30480Sstevel@tonic-gate tbf_dq_sel(struct vif *vifp, ipha_t *ipha)
30490Sstevel@tonic-gate {
30500Sstevel@tonic-gate uint_t p;
30510Sstevel@tonic-gate struct tbf *t = vifp->v_tbf;
30520Sstevel@tonic-gate mblk_t **np;
30530Sstevel@tonic-gate mblk_t *last, *mp;
305411042SErik.Nordmark@Sun.COM ill_t *ill = vifp->v_ipif->ipif_ill;
305511042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ill->ill_ipst;
30565240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
30573448Sdh155122
30583448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
30595240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
30600Sstevel@tonic-gate "dq_sel: vif %ld dst 0x%x",
30613448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst));
30620Sstevel@tonic-gate }
30630Sstevel@tonic-gate
30640Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock));
30650Sstevel@tonic-gate p = priority(vifp, ipha);
30660Sstevel@tonic-gate
30670Sstevel@tonic-gate np = &t->tbf_q;
30680Sstevel@tonic-gate last = NULL;
30690Sstevel@tonic-gate while ((mp = *np) != NULL) {
30700Sstevel@tonic-gate if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) {
30710Sstevel@tonic-gate *np = mp->b_next;
30720Sstevel@tonic-gate /* If removing the last packet, fix the tail pointer */
30730Sstevel@tonic-gate if (mp == t->tbf_t)
30740Sstevel@tonic-gate t->tbf_t = last;
30750Sstevel@tonic-gate mp->b_prev = mp->b_next = NULL;
307611042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
307711042SErik.Nordmark@Sun.COM ip_drop_output("tbf_dq_send", mp, ill);
30780Sstevel@tonic-gate freemsg(mp);
30790Sstevel@tonic-gate /*
30800Sstevel@tonic-gate * It's impossible for the queue to be empty, but
30810Sstevel@tonic-gate * we check anyway.
30820Sstevel@tonic-gate */
30830Sstevel@tonic-gate if (--t->tbf_q_len == 0) {
30840Sstevel@tonic-gate t->tbf_t = NULL;
30850Sstevel@tonic-gate }
30863448Sdh155122 ipst->ips_mrtstat->mrts_drop_sel++;
30870Sstevel@tonic-gate return (1);
30880Sstevel@tonic-gate }
30890Sstevel@tonic-gate np = &mp->b_next;
30900Sstevel@tonic-gate last = mp;
30910Sstevel@tonic-gate }
30920Sstevel@tonic-gate return (0);
30930Sstevel@tonic-gate }
30940Sstevel@tonic-gate
30950Sstevel@tonic-gate /* Sends packet, 2 cases - encap tunnel, phyint. */
30960Sstevel@tonic-gate static void
tbf_send_packet(struct vif * vifp,mblk_t * mp)30970Sstevel@tonic-gate tbf_send_packet(struct vif *vifp, mblk_t *mp)
30980Sstevel@tonic-gate {
309911042SErik.Nordmark@Sun.COM ipif_t *ipif = vifp->v_ipif;
310011042SErik.Nordmark@Sun.COM ill_t *ill = ipif->ipif_ill;
310111042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ill->ill_ipst;
31025240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
310311042SErik.Nordmark@Sun.COM ipha_t *ipha;
310411042SErik.Nordmark@Sun.COM
310511042SErik.Nordmark@Sun.COM ipha = (ipha_t *)mp->b_rptr;
31060Sstevel@tonic-gate /* If encap tunnel options */
31070Sstevel@tonic-gate if (vifp->v_flags & VIFF_TUNNEL) {
310811042SErik.Nordmark@Sun.COM ip_xmit_attr_t ixas;
310911042SErik.Nordmark@Sun.COM
31103448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
31115240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
311211042SErik.Nordmark@Sun.COM "tbf_send_packet: ENCAP tunnel vif %ld",
31133448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs));
31140Sstevel@tonic-gate }
311511042SErik.Nordmark@Sun.COM bzero(&ixas, sizeof (ixas));
311611681SSowmini.Varadhan@Sun.COM ixas.ixa_flags =
311711681SSowmini.Varadhan@Sun.COM IXAF_IS_IPV4 | IXAF_NO_TTL_CHANGE | IXAF_VERIFY_SOURCE;
311811042SErik.Nordmark@Sun.COM ixas.ixa_ipst = ipst;
311911042SErik.Nordmark@Sun.COM ixas.ixa_ifindex = 0;
312011042SErik.Nordmark@Sun.COM ixas.ixa_cred = kcred;
312111042SErik.Nordmark@Sun.COM ixas.ixa_cpid = NOPID;
312211042SErik.Nordmark@Sun.COM ixas.ixa_tsl = NULL;
312311042SErik.Nordmark@Sun.COM ixas.ixa_zoneid = GLOBAL_ZONEID; /* Multicast router in GZ */
312411042SErik.Nordmark@Sun.COM ixas.ixa_pktlen = ntohs(ipha->ipha_length);
312511042SErik.Nordmark@Sun.COM ixas.ixa_ip_hdr_length = IPH_HDR_LENGTH(ipha);
31260Sstevel@tonic-gate
31270Sstevel@tonic-gate /*
312811042SErik.Nordmark@Sun.COM * Feed into ip_output_simple which will set the ident field
312911042SErik.Nordmark@Sun.COM * and checksum the encapsulating header.
31300Sstevel@tonic-gate * BSD gets the cached route vifp->v_route from ip_output()
31310Sstevel@tonic-gate * to speed up route table lookups. Not necessary in SunOS 5.x.
313211042SErik.Nordmark@Sun.COM * One could make multicast forwarding faster by putting an
313311042SErik.Nordmark@Sun.COM * ip_xmit_attr_t in each vif thereby caching the ire/nce.
31340Sstevel@tonic-gate */
313511042SErik.Nordmark@Sun.COM (void) ip_output_simple(mp, &ixas);
313611042SErik.Nordmark@Sun.COM ixa_cleanup(&ixas);
31370Sstevel@tonic-gate return;
31380Sstevel@tonic-gate
31390Sstevel@tonic-gate /* phyint */
31400Sstevel@tonic-gate } else {
31410Sstevel@tonic-gate /* Need to loop back to members on the outgoing interface. */
314211042SErik.Nordmark@Sun.COM ipaddr_t dst;
314311042SErik.Nordmark@Sun.COM ip_recv_attr_t iras;
314411042SErik.Nordmark@Sun.COM nce_t *nce;
314511042SErik.Nordmark@Sun.COM
314611042SErik.Nordmark@Sun.COM bzero(&iras, sizeof (iras));
314711042SErik.Nordmark@Sun.COM iras.ira_flags = IRAF_IS_IPV4;
314811042SErik.Nordmark@Sun.COM iras.ira_ill = iras.ira_rill = ill;
314911042SErik.Nordmark@Sun.COM iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
315011042SErik.Nordmark@Sun.COM iras.ira_zoneid = GLOBAL_ZONEID; /* Multicast router in GZ */
315111042SErik.Nordmark@Sun.COM iras.ira_pktlen = ntohs(ipha->ipha_length);
315211042SErik.Nordmark@Sun.COM iras.ira_ip_hdr_length = IPH_HDR_LENGTH(ipha);
315311042SErik.Nordmark@Sun.COM
315411042SErik.Nordmark@Sun.COM dst = ipha->ipha_dst;
315511042SErik.Nordmark@Sun.COM if (ill_hasmembers_v4(ill, dst)) {
315611042SErik.Nordmark@Sun.COM iras.ira_flags |= IRAF_LOOPBACK_COPY;
31570Sstevel@tonic-gate }
31583448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
31595240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
31600Sstevel@tonic-gate "tbf_send_pkt: phyint forward vif %ld dst = 0x%x",
31613448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst));
31620Sstevel@tonic-gate }
316311042SErik.Nordmark@Sun.COM /*
316411042SErik.Nordmark@Sun.COM * Find an NCE which matches the nexthop.
316511042SErik.Nordmark@Sun.COM * For a pt-pt interface we use the other end of the pt-pt
316611042SErik.Nordmark@Sun.COM * link.
316711042SErik.Nordmark@Sun.COM */
316811042SErik.Nordmark@Sun.COM if (ipif->ipif_flags & IPIF_POINTOPOINT) {
316911042SErik.Nordmark@Sun.COM dst = ipif->ipif_pp_dst_addr;
317011042SErik.Nordmark@Sun.COM nce = arp_nce_init(ill, dst, ill->ill_net_type);
317111042SErik.Nordmark@Sun.COM } else {
317211042SErik.Nordmark@Sun.COM nce = arp_nce_init(ill, dst, IRE_MULTICAST);
317311042SErik.Nordmark@Sun.COM }
317411042SErik.Nordmark@Sun.COM if (nce == NULL) {
317511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
317611042SErik.Nordmark@Sun.COM ip_drop_output("tbf_send_packet - no nce", mp, ill);
317711042SErik.Nordmark@Sun.COM freemsg(mp);
317811042SErik.Nordmark@Sun.COM return;
317911042SErik.Nordmark@Sun.COM }
318011042SErik.Nordmark@Sun.COM
318111042SErik.Nordmark@Sun.COM /*
318211042SErik.Nordmark@Sun.COM * We don't remeber the incoming ill. Thus we
318311042SErik.Nordmark@Sun.COM * pretend the packet arrived on the outbound ill. This means
318411042SErik.Nordmark@Sun.COM * statistics for input errors will be increased on the wrong
318511042SErik.Nordmark@Sun.COM * ill but that isn't a big deal.
318611042SErik.Nordmark@Sun.COM */
3187*13123SErik.Nordmark@Sun.COM ip_forward_xmit_v4(nce, ill, mp, ipha, &iras, ill->ill_mc_mtu,
3188*13123SErik.Nordmark@Sun.COM 0);
318911042SErik.Nordmark@Sun.COM ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
319011042SErik.Nordmark@Sun.COM
319111042SErik.Nordmark@Sun.COM nce_refrele(nce);
31920Sstevel@tonic-gate }
31930Sstevel@tonic-gate }
31940Sstevel@tonic-gate
31950Sstevel@tonic-gate /*
31960Sstevel@tonic-gate * Determine the current time and then the elapsed time (between the last time
31970Sstevel@tonic-gate * and time now). Update the no. of tokens in the bucket.
31980Sstevel@tonic-gate */
31990Sstevel@tonic-gate static void
tbf_update_tokens(struct vif * vifp)32000Sstevel@tonic-gate tbf_update_tokens(struct vif *vifp)
32010Sstevel@tonic-gate {
32020Sstevel@tonic-gate timespec_t tp;
32030Sstevel@tonic-gate hrtime_t tm;
32040Sstevel@tonic-gate struct tbf *t = vifp->v_tbf;
32053448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
32065240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
32070Sstevel@tonic-gate
32080Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock));
32090Sstevel@tonic-gate
32100Sstevel@tonic-gate /* Time in secs and nsecs, rate limit in kbits/sec */
32110Sstevel@tonic-gate gethrestime(&tp);
32120Sstevel@tonic-gate
32130Sstevel@tonic-gate /*LINTED*/
32140Sstevel@tonic-gate TV_DELTA(tp, t->tbf_last_pkt_t, tm);
32150Sstevel@tonic-gate
32160Sstevel@tonic-gate /*
32170Sstevel@tonic-gate * This formula is actually
32180Sstevel@tonic-gate * "time in seconds" * "bytes/second". Scaled for nsec.
32190Sstevel@tonic-gate * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8)
32200Sstevel@tonic-gate *
32210Sstevel@tonic-gate * The (1000/1024) was introduced in add_vif to optimize
32220Sstevel@tonic-gate * this divide into a shift.
32230Sstevel@tonic-gate */
32240Sstevel@tonic-gate t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8;
32250Sstevel@tonic-gate t->tbf_last_pkt_t = tp;
32260Sstevel@tonic-gate
32270Sstevel@tonic-gate if (t->tbf_n_tok > MAX_BKT_SIZE)
32280Sstevel@tonic-gate t->tbf_n_tok = MAX_BKT_SIZE;
32293448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
32305240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
32310Sstevel@tonic-gate "tbf_update_tok: tm %lld tok %d vif %ld",
32323448Sdh155122 tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs));
32330Sstevel@tonic-gate }
32340Sstevel@tonic-gate }
32350Sstevel@tonic-gate
32360Sstevel@tonic-gate /*
32370Sstevel@tonic-gate * Priority currently is based on port nos.
32380Sstevel@tonic-gate * Different forwarding mechanisms have different ways
32390Sstevel@tonic-gate * of obtaining the port no. Hence, the vif must be
32400Sstevel@tonic-gate * given along with the packet itself.
32410Sstevel@tonic-gate *
32420Sstevel@tonic-gate */
32430Sstevel@tonic-gate static int
priority(struct vif * vifp,ipha_t * ipha)32440Sstevel@tonic-gate priority(struct vif *vifp, ipha_t *ipha)
32450Sstevel@tonic-gate {
32460Sstevel@tonic-gate int prio;
32473448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
32485240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter;
32490Sstevel@tonic-gate
32500Sstevel@tonic-gate /* Temporary hack; may add general packet classifier some day */
32510Sstevel@tonic-gate
32520Sstevel@tonic-gate ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock));
32530Sstevel@tonic-gate
32540Sstevel@tonic-gate /*
32550Sstevel@tonic-gate * The UDP port space is divided up into four priority ranges:
32560Sstevel@tonic-gate * [0, 16384) : unclassified - lowest priority
32570Sstevel@tonic-gate * [16384, 32768) : audio - highest priority
32580Sstevel@tonic-gate * [32768, 49152) : whiteboard - medium priority
32590Sstevel@tonic-gate * [49152, 65536) : video - low priority
32600Sstevel@tonic-gate */
32610Sstevel@tonic-gate
32620Sstevel@tonic-gate if (ipha->ipha_protocol == IPPROTO_UDP) {
32630Sstevel@tonic-gate struct udphdr *udp =
32640Sstevel@tonic-gate (struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha));
32650Sstevel@tonic-gate switch (ntohs(udp->uh_dport) & 0xc000) {
32660Sstevel@tonic-gate case 0x4000:
32670Sstevel@tonic-gate prio = 70;
32680Sstevel@tonic-gate break;
32690Sstevel@tonic-gate case 0x8000:
32700Sstevel@tonic-gate prio = 60;
32710Sstevel@tonic-gate break;
32720Sstevel@tonic-gate case 0xc000:
32730Sstevel@tonic-gate prio = 55;
32740Sstevel@tonic-gate break;
32750Sstevel@tonic-gate default:
32760Sstevel@tonic-gate prio = 50;
32770Sstevel@tonic-gate break;
32780Sstevel@tonic-gate }
32793448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) {
32805240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
32810Sstevel@tonic-gate "priority: port %x prio %d\n",
32820Sstevel@tonic-gate ntohs(udp->uh_dport), prio);
32830Sstevel@tonic-gate }
32840Sstevel@tonic-gate } else
32850Sstevel@tonic-gate prio = 50; /* default priority */
32860Sstevel@tonic-gate return (prio);
32870Sstevel@tonic-gate }
32880Sstevel@tonic-gate
32890Sstevel@tonic-gate /*
32900Sstevel@tonic-gate * End of token bucket filter modifications
32910Sstevel@tonic-gate */
32920Sstevel@tonic-gate
32930Sstevel@tonic-gate
32940Sstevel@tonic-gate
32950Sstevel@tonic-gate /*
32960Sstevel@tonic-gate * Produces data for netstat -M.
32970Sstevel@tonic-gate */
32980Sstevel@tonic-gate int
ip_mroute_stats(mblk_t * mp,ip_stack_t * ipst)32993448Sdh155122 ip_mroute_stats(mblk_t *mp, ip_stack_t *ipst)
33000Sstevel@tonic-gate {
33013448Sdh155122 ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
33023448Sdh155122 ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
33033448Sdh155122 if (!snmp_append_data(mp, (char *)ipst->ips_mrtstat,
33043448Sdh155122 sizeof (struct mrtstat))) {
33050Sstevel@tonic-gate ip0dbg(("ip_mroute_stats: failed %ld bytes\n",
33063448Sdh155122 (size_t)sizeof (struct mrtstat)));
33070Sstevel@tonic-gate return (0);
33080Sstevel@tonic-gate }
33090Sstevel@tonic-gate return (1);
33100Sstevel@tonic-gate }
33110Sstevel@tonic-gate
33120Sstevel@tonic-gate /*
33130Sstevel@tonic-gate * Sends info for SNMP's MIB.
33140Sstevel@tonic-gate */
33150Sstevel@tonic-gate int
ip_mroute_vif(mblk_t * mp,ip_stack_t * ipst)33163448Sdh155122 ip_mroute_vif(mblk_t *mp, ip_stack_t *ipst)
33170Sstevel@tonic-gate {
33180Sstevel@tonic-gate struct vifctl vi;
33190Sstevel@tonic-gate vifi_t vifi;
33200Sstevel@tonic-gate
33213448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
33223448Sdh155122 for (vifi = 0; vifi < ipst->ips_numvifs; vifi++) {
33233448Sdh155122 if (ipst->ips_vifs[vifi].v_lcl_addr.s_addr == 0)
33240Sstevel@tonic-gate continue;
33250Sstevel@tonic-gate /*
33260Sstevel@tonic-gate * No locks here, an approximation is fine.
33270Sstevel@tonic-gate */
33280Sstevel@tonic-gate vi.vifc_vifi = vifi;
33293448Sdh155122 vi.vifc_flags = ipst->ips_vifs[vifi].v_flags;
33303448Sdh155122 vi.vifc_threshold = ipst->ips_vifs[vifi].v_threshold;
33313448Sdh155122 vi.vifc_rate_limit = ipst->ips_vifs[vifi].v_rate_limit;
33323448Sdh155122 vi.vifc_lcl_addr = ipst->ips_vifs[vifi].v_lcl_addr;
33333448Sdh155122 vi.vifc_rmt_addr = ipst->ips_vifs[vifi].v_rmt_addr;
33343448Sdh155122 vi.vifc_pkt_in = ipst->ips_vifs[vifi].v_pkt_in;
33353448Sdh155122 vi.vifc_pkt_out = ipst->ips_vifs[vifi].v_pkt_out;
33360Sstevel@tonic-gate
33370Sstevel@tonic-gate if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) {
33380Sstevel@tonic-gate ip0dbg(("ip_mroute_vif: failed %ld bytes\n",
33390Sstevel@tonic-gate (size_t)sizeof (vi)));
33409658SSowmini.Varadhan@Sun.COM mutex_exit(&ipst->ips_numvifs_mutex);
33410Sstevel@tonic-gate return (0);
33420Sstevel@tonic-gate }
33430Sstevel@tonic-gate }
33443448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
33450Sstevel@tonic-gate return (1);
33460Sstevel@tonic-gate }
33470Sstevel@tonic-gate
33480Sstevel@tonic-gate /*
33490Sstevel@tonic-gate * Called by ip_snmp_get to send up multicast routing table.
33500Sstevel@tonic-gate */
33510Sstevel@tonic-gate int
ip_mroute_mrt(mblk_t * mp,ip_stack_t * ipst)33523448Sdh155122 ip_mroute_mrt(mblk_t *mp, ip_stack_t *ipst)
33530Sstevel@tonic-gate {
33540Sstevel@tonic-gate int i, j;
33550Sstevel@tonic-gate struct mfc *rt;
33560Sstevel@tonic-gate struct mfcctl mfcc;
33570Sstevel@tonic-gate
33580Sstevel@tonic-gate /*
33590Sstevel@tonic-gate * Make sure multicast has not been turned off.
33600Sstevel@tonic-gate */
33613448Sdh155122 if (is_mrouter_off(ipst))
33620Sstevel@tonic-gate return (1);
33630Sstevel@tonic-gate
33640Sstevel@tonic-gate /* Loop over all hash buckets and their chains */
33650Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) {
33663448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[i]);
33673448Sdh155122 for (rt = ipst->ips_mfcs[i].mfcb_mfc; rt; rt = rt->mfc_next) {
33680Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex);
33690Sstevel@tonic-gate if (rt->mfc_rte != NULL ||
33700Sstevel@tonic-gate (rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
33710Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
33720Sstevel@tonic-gate continue;
33730Sstevel@tonic-gate }
33740Sstevel@tonic-gate mfcc.mfcc_origin = rt->mfc_origin;
33750Sstevel@tonic-gate mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp;
33760Sstevel@tonic-gate mfcc.mfcc_parent = rt->mfc_parent;
33770Sstevel@tonic-gate mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt;
33783448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex);
33793448Sdh155122 for (j = 0; j < (int)ipst->ips_numvifs; j++)
33800Sstevel@tonic-gate mfcc.mfcc_ttls[j] = rt->mfc_ttls[j];
33813448Sdh155122 for (j = (int)ipst->ips_numvifs; j < MAXVIFS; j++)
33820Sstevel@tonic-gate mfcc.mfcc_ttls[j] = 0;
33833448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex);
33840Sstevel@tonic-gate
33850Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex);
33860Sstevel@tonic-gate if (!snmp_append_data(mp, (char *)&mfcc,
33870Sstevel@tonic-gate sizeof (mfcc))) {
33883448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]);
33890Sstevel@tonic-gate ip0dbg(("ip_mroute_mrt: failed %ld bytes\n",
33900Sstevel@tonic-gate (size_t)sizeof (mfcc)));
33910Sstevel@tonic-gate return (0);
33920Sstevel@tonic-gate }
33930Sstevel@tonic-gate }
33943448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]);
33950Sstevel@tonic-gate }
33960Sstevel@tonic-gate return (1);
33970Sstevel@tonic-gate }
3398