10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51676Sjpk * Common Development and Distribution License (the "License"). 61676Sjpk * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*11457SErik.Nordmark@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 2311042SErik.Nordmark@Sun.COM * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate /* Copyright (c) 1990 Mentat Inc. */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* 280Sstevel@tonic-gate * Procedures for the kernel part of DVMRP, 290Sstevel@tonic-gate * a Distance-Vector Multicast Routing Protocol. 300Sstevel@tonic-gate * (See RFC-1075) 310Sstevel@tonic-gate * Written by David Waitzman, BBN Labs, August 1988. 320Sstevel@tonic-gate * Modified by Steve Deering, Stanford, February 1989. 330Sstevel@tonic-gate * Modified by Mark J. Steiglitz, Stanford, May, 1991 340Sstevel@tonic-gate * Modified by Van Jacobson, LBL, January 1993 350Sstevel@tonic-gate * Modified by Ajit Thyagarajan, PARC, August 1993 360Sstevel@tonic-gate * Modified by Bill Fenner, PARC, April 1995 370Sstevel@tonic-gate * 380Sstevel@tonic-gate * MROUTING 3.5 390Sstevel@tonic-gate */ 400Sstevel@tonic-gate 410Sstevel@tonic-gate /* 420Sstevel@tonic-gate * TODO 430Sstevel@tonic-gate * - function pointer field in vif, void *vif_sendit() 440Sstevel@tonic-gate */ 450Sstevel@tonic-gate 460Sstevel@tonic-gate #include <sys/types.h> 470Sstevel@tonic-gate #include <sys/stream.h> 480Sstevel@tonic-gate #include <sys/stropts.h> 490Sstevel@tonic-gate #include <sys/strlog.h> 500Sstevel@tonic-gate #include <sys/systm.h> 510Sstevel@tonic-gate #include <sys/ddi.h> 520Sstevel@tonic-gate #include <sys/cmn_err.h> 530Sstevel@tonic-gate #include <sys/zone.h> 540Sstevel@tonic-gate 550Sstevel@tonic-gate #include <sys/param.h> 560Sstevel@tonic-gate #include <sys/socket.h> 570Sstevel@tonic-gate #include <sys/vtrace.h> 580Sstevel@tonic-gate #include <sys/debug.h> 590Sstevel@tonic-gate #include <net/if.h> 600Sstevel@tonic-gate #include <sys/sockio.h> 610Sstevel@tonic-gate #include <netinet/in.h> 620Sstevel@tonic-gate #include <net/if_dl.h> 630Sstevel@tonic-gate 6411042SErik.Nordmark@Sun.COM #include <inet/ipsec_impl.h> 650Sstevel@tonic-gate #include <inet/common.h> 660Sstevel@tonic-gate #include <inet/mi.h> 670Sstevel@tonic-gate #include <inet/nd.h> 680Sstevel@tonic-gate #include <inet/mib2.h> 690Sstevel@tonic-gate #include <netinet/ip6.h> 700Sstevel@tonic-gate #include <inet/ip.h> 710Sstevel@tonic-gate #include <inet/snmpcom.h> 720Sstevel@tonic-gate 730Sstevel@tonic-gate #include <netinet/igmp.h> 740Sstevel@tonic-gate #include <netinet/igmp_var.h> 750Sstevel@tonic-gate #include <netinet/udp.h> 760Sstevel@tonic-gate #include <netinet/ip_mroute.h> 770Sstevel@tonic-gate #include <inet/ip_multi.h> 780Sstevel@tonic-gate #include <inet/ip_ire.h> 7911042SErik.Nordmark@Sun.COM #include <inet/ip_ndp.h> 800Sstevel@tonic-gate #include <inet/ip_if.h> 810Sstevel@tonic-gate #include <inet/ipclassifier.h> 820Sstevel@tonic-gate 830Sstevel@tonic-gate #include <netinet/pim.h> 840Sstevel@tonic-gate 850Sstevel@tonic-gate 860Sstevel@tonic-gate /* 870Sstevel@tonic-gate * MT Design: 880Sstevel@tonic-gate * 890Sstevel@tonic-gate * There are three main data structures viftable, mfctable and tbftable that 900Sstevel@tonic-gate * need to be protected against MT races. 910Sstevel@tonic-gate * 920Sstevel@tonic-gate * vitable is a fixed length array of vif structs. There is no lock to protect 930Sstevel@tonic-gate * the whole array, instead each struct is protected by its own indiviual lock. 940Sstevel@tonic-gate * The value of v_marks in conjuction with the value of v_refcnt determines the 950Sstevel@tonic-gate * current state of a vif structure. One special state that needs mention 960Sstevel@tonic-gate * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates 970Sstevel@tonic-gate * that vif is being initalized. 980Sstevel@tonic-gate * Each structure is freed when the refcnt goes down to zero. If a delete comes 9911042SErik.Nordmark@Sun.COM * in when the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED 1000Sstevel@tonic-gate * which prevents the struct from further use. When the refcnt goes to zero 1010Sstevel@tonic-gate * the struct is freed and is marked VIF_MARK_NOTINUSE. 1020Sstevel@tonic-gate * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill 1030Sstevel@tonic-gate * from going away a refhold is put on the ipif before using it. see 1040Sstevel@tonic-gate * lock_good_vif() and unlock_good_vif(). 1050Sstevel@tonic-gate * 1060Sstevel@tonic-gate * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts 1070Sstevel@tonic-gate * of the vif struct. 1080Sstevel@tonic-gate * 1090Sstevel@tonic-gate * tbftable is also a fixed length array of tbf structs and is only accessed 1100Sstevel@tonic-gate * via v_tbf. It is protected by its own lock tbf_lock. 1110Sstevel@tonic-gate * 1120Sstevel@tonic-gate * Lock Ordering is 1130Sstevel@tonic-gate * v_lock --> tbf_lock 1140Sstevel@tonic-gate * v_lock --> ill_locK 1150Sstevel@tonic-gate * 1160Sstevel@tonic-gate * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb). 1170Sstevel@tonic-gate * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker, 1180Sstevel@tonic-gate * it also maintains a state. These fields are protected by a lock (mfcb_lock). 1190Sstevel@tonic-gate * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to 1200Sstevel@tonic-gate * protect the struct elements. 1210Sstevel@tonic-gate * 1220Sstevel@tonic-gate * mfc structs are dynamically allocated and are singly linked 1230Sstevel@tonic-gate * at the head of the chain. When an mfc structure is to be deleted 1240Sstevel@tonic-gate * it is marked condemned and so is the state in the bucket struct. 1250Sstevel@tonic-gate * When the last walker of the hash bucket exits all the mfc structs 1260Sstevel@tonic-gate * marked condemed are freed. 1270Sstevel@tonic-gate * 1280Sstevel@tonic-gate * Locking Hierarchy: 1290Sstevel@tonic-gate * The bucket lock should be acquired before the mfc struct lock. 1300Sstevel@tonic-gate * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking 1310Sstevel@tonic-gate * operations on the bucket struct. 1320Sstevel@tonic-gate * 1330Sstevel@tonic-gate * last_encap_lock and numvifs_mutex should be acquired after 1340Sstevel@tonic-gate * acquring vif or mfc locks. These locks protect some global variables. 1350Sstevel@tonic-gate * 1360Sstevel@tonic-gate * The statistics are not currently protected by a lock 1370Sstevel@tonic-gate * causing the stats be be approximate, not exact. 1380Sstevel@tonic-gate */ 1390Sstevel@tonic-gate 1400Sstevel@tonic-gate #define NO_VIF MAXVIFS /* from mrouted, no route for src */ 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate /* 1430Sstevel@tonic-gate * Timeouts: 1440Sstevel@tonic-gate * Upcall timeouts - BSD uses boolean_t mfc->expire and 1450Sstevel@tonic-gate * nexpire[MFCTBLSIZE], the number of times expire has been called. 1460Sstevel@tonic-gate * SunOS 5.x uses mfc->timeout for each mfc. 1470Sstevel@tonic-gate * Some Unixes are limited in the number of simultaneous timeouts 1480Sstevel@tonic-gate * that can be run, SunOS 5.x does not have this restriction. 1490Sstevel@tonic-gate */ 1500Sstevel@tonic-gate 1510Sstevel@tonic-gate /* 1520Sstevel@tonic-gate * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and 1530Sstevel@tonic-gate * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall 1540Sstevel@tonic-gate * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE 1550Sstevel@tonic-gate */ 1560Sstevel@tonic-gate #define EXPIRE_TIMEOUT (hz/4) /* 4x / second */ 1570Sstevel@tonic-gate #define UPCALL_EXPIRE 6 /* number of timeouts */ 1580Sstevel@tonic-gate 1590Sstevel@tonic-gate /* 1600Sstevel@tonic-gate * Hash function for a source, group entry 1610Sstevel@tonic-gate */ 1620Sstevel@tonic-gate #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 1630Sstevel@tonic-gate ((g) >> 20) ^ ((g) >> 10) ^ (g)) 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate #define TBF_REPROCESS (hz / 100) /* 100x /second */ 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate /* Identify PIM packet that came on a Register interface */ 1680Sstevel@tonic-gate #define PIM_REGISTER_MARKER 0xffffffff 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate /* Function declarations */ 1713448Sdh155122 static int add_mfc(struct mfcctl *, ip_stack_t *); 17211042SErik.Nordmark@Sun.COM static int add_vif(struct vifctl *, conn_t *, ip_stack_t *); 1733448Sdh155122 static int del_mfc(struct mfcctl *, ip_stack_t *); 17411042SErik.Nordmark@Sun.COM static int del_vif(vifi_t *, ip_stack_t *); 1750Sstevel@tonic-gate static void del_vifp(struct vif *); 1760Sstevel@tonic-gate static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 1770Sstevel@tonic-gate static void expire_upcalls(void *); 1783448Sdh155122 static void fill_route(struct mfc *, struct mfcctl *, ip_stack_t *); 1793448Sdh155122 static void free_queue(struct mfc *); 1803448Sdh155122 static int get_assert(uchar_t *, ip_stack_t *); 1813448Sdh155122 static int get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *); 1823448Sdh155122 static int get_sg_cnt(struct sioc_sg_req *, ip_stack_t *); 1830Sstevel@tonic-gate static int get_version(uchar_t *); 1843448Sdh155122 static int get_vif_cnt(struct sioc_vif_req *, ip_stack_t *); 1850Sstevel@tonic-gate static int ip_mdq(mblk_t *, ipha_t *, ill_t *, 1860Sstevel@tonic-gate ipaddr_t, struct mfc *); 1875240Snordmark static int ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *); 1880Sstevel@tonic-gate static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 18911042SErik.Nordmark@Sun.COM static int register_mforward(mblk_t *, ip_recv_attr_t *); 1900Sstevel@tonic-gate static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 1913448Sdh155122 static int set_assert(int *, ip_stack_t *); 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate /* 1940Sstevel@tonic-gate * Token Bucket Filter functions 1950Sstevel@tonic-gate */ 1960Sstevel@tonic-gate static int priority(struct vif *, ipha_t *); 1970Sstevel@tonic-gate static void tbf_control(struct vif *, mblk_t *, ipha_t *); 1980Sstevel@tonic-gate static int tbf_dq_sel(struct vif *, ipha_t *); 1990Sstevel@tonic-gate static void tbf_process_q(struct vif *); 2000Sstevel@tonic-gate static void tbf_queue(struct vif *, mblk_t *); 2010Sstevel@tonic-gate static void tbf_reprocess_q(void *); 2020Sstevel@tonic-gate static void tbf_send_packet(struct vif *, mblk_t *); 2030Sstevel@tonic-gate static void tbf_update_tokens(struct vif *); 2040Sstevel@tonic-gate static void release_mfc(struct mfcb *); 2050Sstevel@tonic-gate 2063448Sdh155122 static boolean_t is_mrouter_off(ip_stack_t *); 2070Sstevel@tonic-gate /* 2080Sstevel@tonic-gate * Encapsulation packets 2090Sstevel@tonic-gate */ 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate #define ENCAP_TTL 64 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate /* prototype IP hdr for encapsulated packets */ 2140Sstevel@tonic-gate static ipha_t multicast_encap_iphdr = { 2150Sstevel@tonic-gate IP_SIMPLE_HDR_VERSION, 2160Sstevel@tonic-gate 0, /* tos */ 2170Sstevel@tonic-gate sizeof (ipha_t), /* total length */ 2180Sstevel@tonic-gate 0, /* id */ 2190Sstevel@tonic-gate 0, /* frag offset */ 2200Sstevel@tonic-gate ENCAP_TTL, IPPROTO_ENCAP, 2210Sstevel@tonic-gate 0, /* checksum */ 2220Sstevel@tonic-gate }; 2230Sstevel@tonic-gate 2240Sstevel@tonic-gate /* 2250Sstevel@tonic-gate * Rate limit for assert notification messages, in nsec. 2260Sstevel@tonic-gate */ 2270Sstevel@tonic-gate #define ASSERT_MSG_TIME 3000000000 2280Sstevel@tonic-gate 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate #define VIF_REFHOLD(vifp) { \ 2310Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); \ 2320Sstevel@tonic-gate (vifp)->v_refcnt++; \ 2330Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \ 2340Sstevel@tonic-gate } 2350Sstevel@tonic-gate 2360Sstevel@tonic-gate #define VIF_REFRELE_LOCKED(vifp) { \ 2370Sstevel@tonic-gate (vifp)->v_refcnt--; \ 2380Sstevel@tonic-gate if ((vifp)->v_refcnt == 0 && \ 2390Sstevel@tonic-gate ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 2400Sstevel@tonic-gate del_vifp(vifp); \ 2410Sstevel@tonic-gate } else { \ 2420Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \ 2430Sstevel@tonic-gate } \ 2440Sstevel@tonic-gate } 2450Sstevel@tonic-gate 2460Sstevel@tonic-gate #define VIF_REFRELE(vifp) { \ 2470Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); \ 2480Sstevel@tonic-gate (vifp)->v_refcnt--; \ 2490Sstevel@tonic-gate if ((vifp)->v_refcnt == 0 && \ 2500Sstevel@tonic-gate ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 2510Sstevel@tonic-gate del_vifp(vifp); \ 2520Sstevel@tonic-gate } else { \ 2530Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \ 2540Sstevel@tonic-gate } \ 2550Sstevel@tonic-gate } 2560Sstevel@tonic-gate 2570Sstevel@tonic-gate #define MFCB_REFHOLD(mfcb) { \ 2580Sstevel@tonic-gate mutex_enter(&(mfcb)->mfcb_lock); \ 2590Sstevel@tonic-gate (mfcb)->mfcb_refcnt++; \ 2600Sstevel@tonic-gate ASSERT((mfcb)->mfcb_refcnt != 0); \ 2610Sstevel@tonic-gate mutex_exit(&(mfcb)->mfcb_lock); \ 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate #define MFCB_REFRELE(mfcb) { \ 2650Sstevel@tonic-gate mutex_enter(&(mfcb)->mfcb_lock); \ 2660Sstevel@tonic-gate ASSERT((mfcb)->mfcb_refcnt != 0); \ 2670Sstevel@tonic-gate if (--(mfcb)->mfcb_refcnt == 0 && \ 2680Sstevel@tonic-gate ((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) { \ 2690Sstevel@tonic-gate release_mfc(mfcb); \ 2700Sstevel@tonic-gate } \ 2710Sstevel@tonic-gate mutex_exit(&(mfcb)->mfcb_lock); \ 2720Sstevel@tonic-gate } 2730Sstevel@tonic-gate 2740Sstevel@tonic-gate /* 2750Sstevel@tonic-gate * MFCFIND: 2760Sstevel@tonic-gate * Find a route for a given origin IP address and multicast group address. 2770Sstevel@tonic-gate * Skip entries with pending upcalls. 2780Sstevel@tonic-gate * Type of service parameter to be added in the future! 2790Sstevel@tonic-gate */ 2800Sstevel@tonic-gate #define MFCFIND(mfcbp, o, g, rt) { \ 2810Sstevel@tonic-gate struct mfc *_mb_rt = NULL; \ 2820Sstevel@tonic-gate rt = NULL; \ 2830Sstevel@tonic-gate _mb_rt = mfcbp->mfcb_mfc; \ 2840Sstevel@tonic-gate while (_mb_rt) { \ 2850Sstevel@tonic-gate if ((_mb_rt->mfc_origin.s_addr == o) && \ 2860Sstevel@tonic-gate (_mb_rt->mfc_mcastgrp.s_addr == g) && \ 2870Sstevel@tonic-gate (_mb_rt->mfc_rte == NULL) && \ 2880Sstevel@tonic-gate (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) { \ 2890Sstevel@tonic-gate rt = _mb_rt; \ 2900Sstevel@tonic-gate break; \ 2910Sstevel@tonic-gate } \ 2920Sstevel@tonic-gate _mb_rt = _mb_rt->mfc_next; \ 2930Sstevel@tonic-gate } \ 2940Sstevel@tonic-gate } 2950Sstevel@tonic-gate 2960Sstevel@tonic-gate /* 2970Sstevel@tonic-gate * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime() 2980Sstevel@tonic-gate * are inefficient. We use gethrestime() which returns a timespec_t with 2990Sstevel@tonic-gate * sec and nsec, the resolution is machine dependent. 3000Sstevel@tonic-gate * The following 2 macros have been changed to use nsec instead of usec. 3010Sstevel@tonic-gate */ 3020Sstevel@tonic-gate /* 3030Sstevel@tonic-gate * Macros to compute elapsed time efficiently. 3040Sstevel@tonic-gate * Borrowed from Van Jacobson's scheduling code. 3050Sstevel@tonic-gate * Delta should be a hrtime_t. 3060Sstevel@tonic-gate */ 3070Sstevel@tonic-gate #define TV_DELTA(a, b, delta) { \ 3080Sstevel@tonic-gate int xxs; \ 3090Sstevel@tonic-gate \ 3100Sstevel@tonic-gate delta = (a).tv_nsec - (b).tv_nsec; \ 3110Sstevel@tonic-gate if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \ 3120Sstevel@tonic-gate switch (xxs) { \ 3130Sstevel@tonic-gate case 2: \ 3140Sstevel@tonic-gate delta += 1000000000; \ 3150Sstevel@tonic-gate /*FALLTHROUGH*/ \ 3160Sstevel@tonic-gate case 1: \ 3170Sstevel@tonic-gate delta += 1000000000; \ 3180Sstevel@tonic-gate break; \ 3190Sstevel@tonic-gate default: \ 3200Sstevel@tonic-gate delta += (1000000000 * xxs); \ 3210Sstevel@tonic-gate } \ 3220Sstevel@tonic-gate } \ 3230Sstevel@tonic-gate } 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate #define TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \ 3260Sstevel@tonic-gate (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 3270Sstevel@tonic-gate 3280Sstevel@tonic-gate /* 3290Sstevel@tonic-gate * Handle MRT setsockopt commands to modify the multicast routing tables. 3300Sstevel@tonic-gate */ 3310Sstevel@tonic-gate int 33211042SErik.Nordmark@Sun.COM ip_mrouter_set(int cmd, conn_t *connp, int checkonly, uchar_t *data, 33311042SErik.Nordmark@Sun.COM int datalen) 3340Sstevel@tonic-gate { 3355240Snordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 3363448Sdh155122 3373448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 3385240Snordmark if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) { 3393448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 3400Sstevel@tonic-gate return (EACCES); 3410Sstevel@tonic-gate } 3423448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 3430Sstevel@tonic-gate 3440Sstevel@tonic-gate if (checkonly) { 3450Sstevel@tonic-gate /* 3460Sstevel@tonic-gate * do not do operation, just pretend to - new T_CHECK 3470Sstevel@tonic-gate * Note: Even routines further on can probably fail but 3480Sstevel@tonic-gate * this T_CHECK stuff is only to please XTI so it not 3490Sstevel@tonic-gate * necessary to be perfect. 3500Sstevel@tonic-gate */ 3510Sstevel@tonic-gate switch (cmd) { 3520Sstevel@tonic-gate case MRT_INIT: 3530Sstevel@tonic-gate case MRT_DONE: 3540Sstevel@tonic-gate case MRT_ADD_VIF: 3550Sstevel@tonic-gate case MRT_DEL_VIF: 3560Sstevel@tonic-gate case MRT_ADD_MFC: 3570Sstevel@tonic-gate case MRT_DEL_MFC: 3580Sstevel@tonic-gate case MRT_ASSERT: 3595240Snordmark return (0); 3600Sstevel@tonic-gate default: 3615240Snordmark return (EOPNOTSUPP); 3620Sstevel@tonic-gate } 3630Sstevel@tonic-gate } 3640Sstevel@tonic-gate 3650Sstevel@tonic-gate /* 3660Sstevel@tonic-gate * make sure no command is issued after multicast routing has been 3670Sstevel@tonic-gate * turned off. 3680Sstevel@tonic-gate */ 3690Sstevel@tonic-gate if (cmd != MRT_INIT && cmd != MRT_DONE) { 3703448Sdh155122 if (is_mrouter_off(ipst)) 3710Sstevel@tonic-gate return (EINVAL); 3720Sstevel@tonic-gate } 3730Sstevel@tonic-gate 3740Sstevel@tonic-gate switch (cmd) { 3755240Snordmark case MRT_INIT: return (ip_mrouter_init(connp, data, datalen, ipst)); 37611042SErik.Nordmark@Sun.COM case MRT_DONE: return (ip_mrouter_done(ipst)); 37711042SErik.Nordmark@Sun.COM case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, connp, ipst)); 37811042SErik.Nordmark@Sun.COM case MRT_DEL_VIF: return (del_vif((vifi_t *)data, ipst)); 3793448Sdh155122 case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data, ipst)); 3803448Sdh155122 case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data, ipst)); 3813448Sdh155122 case MRT_ASSERT: return (set_assert((int *)data, ipst)); 3820Sstevel@tonic-gate default: return (EOPNOTSUPP); 3830Sstevel@tonic-gate } 3840Sstevel@tonic-gate } 3850Sstevel@tonic-gate 3860Sstevel@tonic-gate /* 3870Sstevel@tonic-gate * Handle MRT getsockopt commands 3880Sstevel@tonic-gate */ 3890Sstevel@tonic-gate int 39011042SErik.Nordmark@Sun.COM ip_mrouter_get(int cmd, conn_t *connp, uchar_t *data) 3910Sstevel@tonic-gate { 3925240Snordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 3935240Snordmark 3945240Snordmark if (connp != ipst->ips_ip_g_mrouter) 3950Sstevel@tonic-gate return (EACCES); 3960Sstevel@tonic-gate 3970Sstevel@tonic-gate switch (cmd) { 3980Sstevel@tonic-gate case MRT_VERSION: return (get_version((uchar_t *)data)); 3993448Sdh155122 case MRT_ASSERT: return (get_assert((uchar_t *)data, ipst)); 4000Sstevel@tonic-gate default: return (EOPNOTSUPP); 4010Sstevel@tonic-gate } 4020Sstevel@tonic-gate } 4030Sstevel@tonic-gate 4040Sstevel@tonic-gate /* 4050Sstevel@tonic-gate * Handle ioctl commands to obtain information from the cache. 4060Sstevel@tonic-gate * Called with shared access to IP. These are read_only ioctls. 4070Sstevel@tonic-gate */ 4080Sstevel@tonic-gate /* ARGSUSED */ 4090Sstevel@tonic-gate int 4100Sstevel@tonic-gate mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, 4110Sstevel@tonic-gate ip_ioctl_cmd_t *ipip, void *if_req) 4120Sstevel@tonic-gate { 4130Sstevel@tonic-gate mblk_t *mp1; 4140Sstevel@tonic-gate struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4155240Snordmark conn_t *connp = Q_TO_CONN(q); 4165240Snordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4170Sstevel@tonic-gate 4180Sstevel@tonic-gate /* Existence verified in ip_wput_nondata */ 4190Sstevel@tonic-gate mp1 = mp->b_cont->b_cont; 4200Sstevel@tonic-gate 4210Sstevel@tonic-gate switch (iocp->ioc_cmd) { 4220Sstevel@tonic-gate case (SIOCGETVIFCNT): 4233448Sdh155122 return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst)); 4240Sstevel@tonic-gate case (SIOCGETSGCNT): 4253448Sdh155122 return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst)); 4260Sstevel@tonic-gate case (SIOCGETLSGCNT): 4273448Sdh155122 return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst)); 4280Sstevel@tonic-gate default: 4290Sstevel@tonic-gate return (EINVAL); 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate } 4320Sstevel@tonic-gate 4330Sstevel@tonic-gate /* 4340Sstevel@tonic-gate * Returns the packet, byte, rpf-failure count for the source, group provided. 4350Sstevel@tonic-gate */ 4360Sstevel@tonic-gate static int 4373448Sdh155122 get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst) 4380Sstevel@tonic-gate { 4390Sstevel@tonic-gate struct mfc *rt; 4400Sstevel@tonic-gate struct mfcb *mfcbp; 4410Sstevel@tonic-gate 4423448Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)]; 4430Sstevel@tonic-gate MFCB_REFHOLD(mfcbp); 4440Sstevel@tonic-gate MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt); 4450Sstevel@tonic-gate 4460Sstevel@tonic-gate if (rt != NULL) { 4470Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 4480Sstevel@tonic-gate req->pktcnt = rt->mfc_pkt_cnt; 4490Sstevel@tonic-gate req->bytecnt = rt->mfc_byte_cnt; 4500Sstevel@tonic-gate req->wrong_if = rt->mfc_wrong_if; 4510Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 4520Sstevel@tonic-gate } else 4530Sstevel@tonic-gate req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU; 4540Sstevel@tonic-gate 4550Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 4560Sstevel@tonic-gate return (0); 4570Sstevel@tonic-gate } 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate /* 4600Sstevel@tonic-gate * Returns the packet, byte, rpf-failure count for the source, group provided. 4610Sstevel@tonic-gate * Uses larger counters and IPv6 addresses. 4620Sstevel@tonic-gate */ 4630Sstevel@tonic-gate /* ARGSUSED XXX until implemented */ 4640Sstevel@tonic-gate static int 4653448Sdh155122 get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst) 4660Sstevel@tonic-gate { 4670Sstevel@tonic-gate /* XXX TODO SIOCGETLSGCNT */ 4680Sstevel@tonic-gate return (ENXIO); 4690Sstevel@tonic-gate } 4700Sstevel@tonic-gate 4710Sstevel@tonic-gate /* 4720Sstevel@tonic-gate * Returns the input and output packet and byte counts on the vif provided. 4730Sstevel@tonic-gate */ 4740Sstevel@tonic-gate static int 4753448Sdh155122 get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst) 4760Sstevel@tonic-gate { 4770Sstevel@tonic-gate vifi_t vifi = req->vifi; 4780Sstevel@tonic-gate 4793448Sdh155122 if (vifi >= ipst->ips_numvifs) 4800Sstevel@tonic-gate return (EINVAL); 4810Sstevel@tonic-gate 4820Sstevel@tonic-gate /* 4830Sstevel@tonic-gate * No locks here, an approximation is fine. 4840Sstevel@tonic-gate */ 4853448Sdh155122 req->icount = ipst->ips_vifs[vifi].v_pkt_in; 4863448Sdh155122 req->ocount = ipst->ips_vifs[vifi].v_pkt_out; 4873448Sdh155122 req->ibytes = ipst->ips_vifs[vifi].v_bytes_in; 4883448Sdh155122 req->obytes = ipst->ips_vifs[vifi].v_bytes_out; 4890Sstevel@tonic-gate 4900Sstevel@tonic-gate return (0); 4910Sstevel@tonic-gate } 4920Sstevel@tonic-gate 4930Sstevel@tonic-gate static int 4940Sstevel@tonic-gate get_version(uchar_t *data) 4950Sstevel@tonic-gate { 4960Sstevel@tonic-gate int *v = (int *)data; 4970Sstevel@tonic-gate 4980Sstevel@tonic-gate *v = 0x0305; /* XXX !!!! */ 4990Sstevel@tonic-gate 5000Sstevel@tonic-gate return (0); 5010Sstevel@tonic-gate } 5020Sstevel@tonic-gate 5030Sstevel@tonic-gate /* 5040Sstevel@tonic-gate * Set PIM assert processing global. 5050Sstevel@tonic-gate */ 5060Sstevel@tonic-gate static int 5073448Sdh155122 set_assert(int *i, ip_stack_t *ipst) 5080Sstevel@tonic-gate { 5090Sstevel@tonic-gate if ((*i != 1) && (*i != 0)) 5100Sstevel@tonic-gate return (EINVAL); 5110Sstevel@tonic-gate 5123448Sdh155122 ipst->ips_pim_assert = *i; 5130Sstevel@tonic-gate 5140Sstevel@tonic-gate return (0); 5150Sstevel@tonic-gate } 5160Sstevel@tonic-gate 5170Sstevel@tonic-gate /* 5180Sstevel@tonic-gate * Get PIM assert processing global. 5190Sstevel@tonic-gate */ 5200Sstevel@tonic-gate static int 5213448Sdh155122 get_assert(uchar_t *data, ip_stack_t *ipst) 5220Sstevel@tonic-gate { 5230Sstevel@tonic-gate int *i = (int *)data; 5240Sstevel@tonic-gate 5253448Sdh155122 *i = ipst->ips_pim_assert; 5260Sstevel@tonic-gate 5270Sstevel@tonic-gate return (0); 5280Sstevel@tonic-gate } 5290Sstevel@tonic-gate 5300Sstevel@tonic-gate /* 5310Sstevel@tonic-gate * Enable multicast routing. 5320Sstevel@tonic-gate */ 5330Sstevel@tonic-gate static int 5345240Snordmark ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst) 5350Sstevel@tonic-gate { 5360Sstevel@tonic-gate int *v; 5370Sstevel@tonic-gate 5380Sstevel@tonic-gate if (data == NULL || (datalen != sizeof (int))) 5390Sstevel@tonic-gate return (ENOPROTOOPT); 5400Sstevel@tonic-gate 5410Sstevel@tonic-gate v = (int *)data; 5420Sstevel@tonic-gate if (*v != 1) 5430Sstevel@tonic-gate return (ENOPROTOOPT); 5440Sstevel@tonic-gate 5453448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 5463448Sdh155122 if (ipst->ips_ip_g_mrouter != NULL) { 5473448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 5480Sstevel@tonic-gate return (EADDRINUSE); 5490Sstevel@tonic-gate } 5500Sstevel@tonic-gate 5515240Snordmark /* 5525240Snordmark * MRT_INIT should only be allowed for RAW sockets, but we double 5535240Snordmark * check. 5545240Snordmark */ 5555240Snordmark if (!IPCL_IS_RAWIP(connp)) { 5565240Snordmark mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 5575240Snordmark return (EINVAL); 5585240Snordmark } 5595240Snordmark 5605240Snordmark ipst->ips_ip_g_mrouter = connp; 5610Sstevel@tonic-gate connp->conn_multi_router = 1; 5620Sstevel@tonic-gate /* In order for tunnels to work we have to turn ip_g_forward on */ 5633448Sdh155122 if (!WE_ARE_FORWARDING(ipst)) { 5643448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 5655240Snordmark (void) mi_strlog(connp->conn_rq, 1, SL_TRACE, 5660Sstevel@tonic-gate "ip_mrouter_init: turning on forwarding"); 5670Sstevel@tonic-gate } 5683448Sdh155122 ipst->ips_saved_ip_g_forward = ipst->ips_ip_g_forward; 5693448Sdh155122 ipst->ips_ip_g_forward = IP_FORWARD_ALWAYS; 5700Sstevel@tonic-gate } 5710Sstevel@tonic-gate 5723448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 5730Sstevel@tonic-gate return (0); 5740Sstevel@tonic-gate } 5750Sstevel@tonic-gate 5763448Sdh155122 void 5773448Sdh155122 ip_mrouter_stack_init(ip_stack_t *ipst) 5783448Sdh155122 { 5793448Sdh155122 mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL); 5803448Sdh155122 5813448Sdh155122 ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1), 5823448Sdh155122 KM_SLEEP); 5833448Sdh155122 ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP); 5843448Sdh155122 /* 5853448Sdh155122 * mfctable: 5863448Sdh155122 * Includes all mfcs, including waiting upcalls. 5873448Sdh155122 * Multiple mfcs per bucket. 5883448Sdh155122 */ 5893448Sdh155122 ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ, 5903448Sdh155122 KM_SLEEP); 5913448Sdh155122 /* 5923448Sdh155122 * Define the token bucket filter structures. 5933448Sdh155122 * tbftable -> each vif has one of these for storing info. 5943448Sdh155122 */ 5953448Sdh155122 ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP); 5963448Sdh155122 5973448Sdh155122 mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL); 5983448Sdh155122 5993448Sdh155122 ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl); 6003448Sdh155122 ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl); 6013448Sdh155122 } 6023448Sdh155122 6030Sstevel@tonic-gate /* 6040Sstevel@tonic-gate * Disable multicast routing. 6050Sstevel@tonic-gate * Didn't use global timeout_val (BSD version), instead check the mfctable. 6060Sstevel@tonic-gate */ 6070Sstevel@tonic-gate int 60811042SErik.Nordmark@Sun.COM ip_mrouter_done(ip_stack_t *ipst) 6090Sstevel@tonic-gate { 6105240Snordmark conn_t *mrouter; 6110Sstevel@tonic-gate vifi_t vifi; 6120Sstevel@tonic-gate struct mfc *mfc_rt; 6130Sstevel@tonic-gate int i; 6140Sstevel@tonic-gate 6153448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 6163448Sdh155122 if (ipst->ips_ip_g_mrouter == NULL) { 6173448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 6180Sstevel@tonic-gate return (EINVAL); 6190Sstevel@tonic-gate } 6200Sstevel@tonic-gate 6215240Snordmark mrouter = ipst->ips_ip_g_mrouter; 6223448Sdh155122 6233448Sdh155122 if (ipst->ips_saved_ip_g_forward != -1) { 6243448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 6255240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 6260Sstevel@tonic-gate "ip_mrouter_done: turning off forwarding"); 6270Sstevel@tonic-gate } 6283448Sdh155122 ipst->ips_ip_g_forward = ipst->ips_saved_ip_g_forward; 6293448Sdh155122 ipst->ips_saved_ip_g_forward = -1; 6300Sstevel@tonic-gate } 6310Sstevel@tonic-gate 6320Sstevel@tonic-gate /* 6330Sstevel@tonic-gate * Always clear cache when vifs change. 6343448Sdh155122 * No need to get ipst->ips_last_encap_lock since we are running as 6353448Sdh155122 * a writer. 6360Sstevel@tonic-gate */ 6373448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 6383448Sdh155122 ipst->ips_last_encap_src = 0; 6393448Sdh155122 ipst->ips_last_encap_vif = NULL; 6403448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 6415240Snordmark mrouter->conn_multi_router = 0; 6420Sstevel@tonic-gate 6433448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 6440Sstevel@tonic-gate 6450Sstevel@tonic-gate /* 6460Sstevel@tonic-gate * For each phyint in use, 6470Sstevel@tonic-gate * disable promiscuous reception of all IP multicasts. 6480Sstevel@tonic-gate */ 6490Sstevel@tonic-gate for (vifi = 0; vifi < MAXVIFS; vifi++) { 6503448Sdh155122 struct vif *vifp = ipst->ips_vifs + vifi; 6510Sstevel@tonic-gate 6520Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 6530Sstevel@tonic-gate /* 6540Sstevel@tonic-gate * if the vif is active mark it condemned. 6550Sstevel@tonic-gate */ 6560Sstevel@tonic-gate if (vifp->v_marks & VIF_MARK_GOOD) { 6570Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 6580Sstevel@tonic-gate ipif_refhold(vifp->v_ipif); 6590Sstevel@tonic-gate /* Phyint only */ 6600Sstevel@tonic-gate if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 6610Sstevel@tonic-gate ipif_t *ipif = vifp->v_ipif; 66211042SErik.Nordmark@Sun.COM ilm_t *ilm = vifp->v_ilm; 66311042SErik.Nordmark@Sun.COM 66411042SErik.Nordmark@Sun.COM vifp->v_ilm = NULL; 66511042SErik.Nordmark@Sun.COM vifp->v_marks &= ~VIF_MARK_GOOD; 66611042SErik.Nordmark@Sun.COM vifp->v_marks |= VIF_MARK_CONDEMNED; 66711042SErik.Nordmark@Sun.COM 66811042SErik.Nordmark@Sun.COM mutex_exit(&(vifp)->v_lock); 66911042SErik.Nordmark@Sun.COM if (ilm != NULL) { 67011042SErik.Nordmark@Sun.COM ill_t *ill = ipif->ipif_ill; 67111042SErik.Nordmark@Sun.COM 67211042SErik.Nordmark@Sun.COM (void) ip_delmulti(ilm); 67311042SErik.Nordmark@Sun.COM ASSERT(ill->ill_mrouter_cnt > 0); 67411042SErik.Nordmark@Sun.COM atomic_dec_32(&ill->ill_mrouter_cnt); 6750Sstevel@tonic-gate } 6760Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 6770Sstevel@tonic-gate } 67810495SErik.Nordmark@Sun.COM ipif_refrele(vifp->v_ipif); 6790Sstevel@tonic-gate /* 6800Sstevel@tonic-gate * decreases the refcnt added in add_vif. 6810Sstevel@tonic-gate * and release v_lock. 6820Sstevel@tonic-gate */ 6830Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 6840Sstevel@tonic-gate } else { 6850Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 6860Sstevel@tonic-gate continue; 6870Sstevel@tonic-gate } 6880Sstevel@tonic-gate } 6890Sstevel@tonic-gate 6903448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 6913448Sdh155122 ipst->ips_numvifs = 0; 6923448Sdh155122 ipst->ips_pim_assert = 0; 6933448Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS; 6943448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate /* 6970Sstevel@tonic-gate * Free upcall msgs. 6980Sstevel@tonic-gate * Go through mfctable and stop any outstanding upcall 6990Sstevel@tonic-gate * timeouts remaining on mfcs. 7000Sstevel@tonic-gate */ 7010Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) { 7023448Sdh155122 mutex_enter(&ipst->ips_mfcs[i].mfcb_lock); 7033448Sdh155122 ipst->ips_mfcs[i].mfcb_refcnt++; 7043448Sdh155122 ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED; 7053448Sdh155122 mutex_exit(&ipst->ips_mfcs[i].mfcb_lock); 7063448Sdh155122 mfc_rt = ipst->ips_mfcs[i].mfcb_mfc; 7070Sstevel@tonic-gate while (mfc_rt) { 7080Sstevel@tonic-gate /* Free upcalls */ 7090Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 7100Sstevel@tonic-gate if (mfc_rt->mfc_rte != NULL) { 7110Sstevel@tonic-gate if (mfc_rt->mfc_timeout_id != 0) { 7120Sstevel@tonic-gate /* 7130Sstevel@tonic-gate * OK to drop the lock as we have 7140Sstevel@tonic-gate * a refcnt on the bucket. timeout 7150Sstevel@tonic-gate * can fire but it will see that 7160Sstevel@tonic-gate * mfc_timeout_id == 0 and not do 7170Sstevel@tonic-gate * anything. see expire_upcalls(). 7180Sstevel@tonic-gate */ 7190Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0; 7200Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 7210Sstevel@tonic-gate (void) untimeout( 7220Sstevel@tonic-gate mfc_rt->mfc_timeout_id); 7230Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0; 7240Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 7250Sstevel@tonic-gate 7260Sstevel@tonic-gate /* 7270Sstevel@tonic-gate * all queued upcall packets 7280Sstevel@tonic-gate * and mblk will be freed in 7290Sstevel@tonic-gate * release_mfc(). 7300Sstevel@tonic-gate */ 7310Sstevel@tonic-gate } 7320Sstevel@tonic-gate } 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 7350Sstevel@tonic-gate 7360Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 7370Sstevel@tonic-gate mfc_rt = mfc_rt->mfc_next; 7380Sstevel@tonic-gate } 7393448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 7400Sstevel@tonic-gate } 7410Sstevel@tonic-gate 7423448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 7433448Sdh155122 ipst->ips_ip_g_mrouter = NULL; 7443448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 7450Sstevel@tonic-gate return (0); 7460Sstevel@tonic-gate } 7470Sstevel@tonic-gate 7483448Sdh155122 void 7493448Sdh155122 ip_mrouter_stack_destroy(ip_stack_t *ipst) 7503448Sdh155122 { 7513448Sdh155122 struct mfcb *mfcbp; 7523448Sdh155122 struct mfc *rt; 7533448Sdh155122 int i; 7543448Sdh155122 7553448Sdh155122 for (i = 0; i < MFCTBLSIZ; i++) { 7563448Sdh155122 mfcbp = &ipst->ips_mfcs[i]; 7573448Sdh155122 7583448Sdh155122 while ((rt = mfcbp->mfcb_mfc) != NULL) { 7593448Sdh155122 (void) printf("ip_mrouter_stack_destroy: free for %d\n", 7603448Sdh155122 i); 7613448Sdh155122 7623448Sdh155122 mfcbp->mfcb_mfc = rt->mfc_next; 7633448Sdh155122 free_queue(rt); 7643448Sdh155122 mi_free(rt); 7653448Sdh155122 } 7663448Sdh155122 } 7673448Sdh155122 kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1)); 7683448Sdh155122 ipst->ips_vifs = NULL; 7693448Sdh155122 kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat)); 7703448Sdh155122 ipst->ips_mrtstat = NULL; 7713448Sdh155122 kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ); 7723448Sdh155122 ipst->ips_mfcs = NULL; 7733448Sdh155122 kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS); 7743448Sdh155122 ipst->ips_tbfs = NULL; 7753448Sdh155122 7763448Sdh155122 mutex_destroy(&ipst->ips_last_encap_lock); 7773448Sdh155122 mutex_destroy(&ipst->ips_ip_g_mrouter_mutex); 7783448Sdh155122 } 7793448Sdh155122 7800Sstevel@tonic-gate static boolean_t 7813448Sdh155122 is_mrouter_off(ip_stack_t *ipst) 7820Sstevel@tonic-gate { 7835240Snordmark conn_t *mrouter; 7840Sstevel@tonic-gate 7853448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 7863448Sdh155122 if (ipst->ips_ip_g_mrouter == NULL) { 7873448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 7880Sstevel@tonic-gate return (B_TRUE); 7890Sstevel@tonic-gate } 7900Sstevel@tonic-gate 7915240Snordmark mrouter = ipst->ips_ip_g_mrouter; 7925240Snordmark if (mrouter->conn_multi_router == 0) { 7933448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 7940Sstevel@tonic-gate return (B_TRUE); 7950Sstevel@tonic-gate } 7963448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 7970Sstevel@tonic-gate return (B_FALSE); 7980Sstevel@tonic-gate } 7990Sstevel@tonic-gate 8000Sstevel@tonic-gate static void 8010Sstevel@tonic-gate unlock_good_vif(struct vif *vifp) 8020Sstevel@tonic-gate { 8030Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 8040Sstevel@tonic-gate ipif_refrele(vifp->v_ipif); 8050Sstevel@tonic-gate VIF_REFRELE(vifp); 8060Sstevel@tonic-gate } 8070Sstevel@tonic-gate 8080Sstevel@tonic-gate static boolean_t 8090Sstevel@tonic-gate lock_good_vif(struct vif *vifp) 8100Sstevel@tonic-gate { 8110Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 8120Sstevel@tonic-gate if (!(vifp->v_marks & VIF_MARK_GOOD)) { 8130Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8140Sstevel@tonic-gate return (B_FALSE); 8150Sstevel@tonic-gate } 8160Sstevel@tonic-gate 8170Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 8180Sstevel@tonic-gate mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock); 8190Sstevel@tonic-gate if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) { 8200Sstevel@tonic-gate mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 8210Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8220Sstevel@tonic-gate return (B_FALSE); 8230Sstevel@tonic-gate } 8240Sstevel@tonic-gate ipif_refhold_locked(vifp->v_ipif); 8250Sstevel@tonic-gate mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 8260Sstevel@tonic-gate vifp->v_refcnt++; 8270Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8280Sstevel@tonic-gate return (B_TRUE); 8290Sstevel@tonic-gate } 8300Sstevel@tonic-gate 8310Sstevel@tonic-gate /* 8320Sstevel@tonic-gate * Add a vif to the vif table. 8330Sstevel@tonic-gate */ 8340Sstevel@tonic-gate static int 83511042SErik.Nordmark@Sun.COM add_vif(struct vifctl *vifcp, conn_t *connp, ip_stack_t *ipst) 8360Sstevel@tonic-gate { 8373448Sdh155122 struct vif *vifp = ipst->ips_vifs + vifcp->vifc_vifi; 8380Sstevel@tonic-gate ipif_t *ipif; 83911042SErik.Nordmark@Sun.COM int error = 0; 8403448Sdh155122 struct tbf *v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi; 8415240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 84211042SErik.Nordmark@Sun.COM ilm_t *ilm; 84311042SErik.Nordmark@Sun.COM ill_t *ill; 8440Sstevel@tonic-gate 8450Sstevel@tonic-gate ASSERT(connp != NULL); 8460Sstevel@tonic-gate 8470Sstevel@tonic-gate if (vifcp->vifc_vifi >= MAXVIFS) 8480Sstevel@tonic-gate return (EINVAL); 8490Sstevel@tonic-gate 8503448Sdh155122 if (is_mrouter_off(ipst)) 8510Sstevel@tonic-gate return (EINVAL); 8520Sstevel@tonic-gate 8530Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 8540Sstevel@tonic-gate /* 8550Sstevel@tonic-gate * Viftable entry should be 0. 8560Sstevel@tonic-gate * if v_marks == 0 but v_refcnt != 0 means struct is being 8570Sstevel@tonic-gate * initialized. 8580Sstevel@tonic-gate * 8590Sstevel@tonic-gate * Also note that it is very unlikely that we will get a MRT_ADD_VIF 8600Sstevel@tonic-gate * request while the delete is in progress, mrouted only sends add 8610Sstevel@tonic-gate * requests when a new interface is added and the new interface cannot 8620Sstevel@tonic-gate * have the same vifi as an existing interface. We make sure that 8630Sstevel@tonic-gate * ill_delete will block till the vif is deleted by adding a refcnt 8640Sstevel@tonic-gate * to ipif in del_vif(). 8650Sstevel@tonic-gate */ 8660Sstevel@tonic-gate if (vifp->v_lcl_addr.s_addr != 0 || 8670Sstevel@tonic-gate vifp->v_marks != 0 || 8680Sstevel@tonic-gate vifp->v_refcnt != 0) { 8690Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8700Sstevel@tonic-gate return (EADDRINUSE); 8710Sstevel@tonic-gate } 8720Sstevel@tonic-gate 8730Sstevel@tonic-gate /* Incoming vif should not be 0 */ 8740Sstevel@tonic-gate if (vifcp->vifc_lcl_addr.s_addr == 0) { 8750Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8760Sstevel@tonic-gate return (EINVAL); 8770Sstevel@tonic-gate } 8780Sstevel@tonic-gate 8790Sstevel@tonic-gate vifp->v_refcnt++; 8800Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8810Sstevel@tonic-gate /* Find the interface with the local address */ 8820Sstevel@tonic-gate ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL, 88311042SErik.Nordmark@Sun.COM IPCL_ZONEID(connp), ipst); 8840Sstevel@tonic-gate if (ipif == NULL) { 8850Sstevel@tonic-gate VIF_REFRELE(vifp); 8860Sstevel@tonic-gate return (EADDRNOTAVAIL); 8870Sstevel@tonic-gate } 8880Sstevel@tonic-gate 8893448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 8905240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 8910Sstevel@tonic-gate "add_vif: src 0x%x enter", 8920Sstevel@tonic-gate vifcp->vifc_lcl_addr.s_addr); 8930Sstevel@tonic-gate } 8940Sstevel@tonic-gate 8950Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 8960Sstevel@tonic-gate /* 8970Sstevel@tonic-gate * Always clear cache when vifs change. 8980Sstevel@tonic-gate * Needed to ensure that src isn't left over from before vif was added. 8990Sstevel@tonic-gate * No need to get last_encap_lock, since we are running as a writer. 9000Sstevel@tonic-gate */ 9010Sstevel@tonic-gate 9023448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 9033448Sdh155122 ipst->ips_last_encap_src = 0; 9043448Sdh155122 ipst->ips_last_encap_vif = NULL; 9053448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 9060Sstevel@tonic-gate 9070Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_TUNNEL) { 9080Sstevel@tonic-gate if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) { 9090Sstevel@tonic-gate cmn_err(CE_WARN, 9100Sstevel@tonic-gate "add_vif: source route tunnels not supported\n"); 9110Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9120Sstevel@tonic-gate ipif_refrele(ipif); 9130Sstevel@tonic-gate return (EOPNOTSUPP); 9140Sstevel@tonic-gate } 9150Sstevel@tonic-gate vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 9160Sstevel@tonic-gate 9170Sstevel@tonic-gate } else { 9180Sstevel@tonic-gate /* Phyint or Register vif */ 9190Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) { 9200Sstevel@tonic-gate /* 9210Sstevel@tonic-gate * Note: Since all IPPROTO_IP level options (including 9220Sstevel@tonic-gate * MRT_ADD_VIF) are done exclusively via 9230Sstevel@tonic-gate * ip_optmgmt_writer(), a lock is not necessary to 9240Sstevel@tonic-gate * protect reg_vif_num. 9250Sstevel@tonic-gate */ 9263448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 9273448Sdh155122 if (ipst->ips_reg_vif_num == ALL_VIFS) { 9283448Sdh155122 ipst->ips_reg_vif_num = vifcp->vifc_vifi; 9293448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9300Sstevel@tonic-gate } else { 9313448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9320Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9330Sstevel@tonic-gate ipif_refrele(ipif); 9340Sstevel@tonic-gate return (EADDRINUSE); 9350Sstevel@tonic-gate } 9360Sstevel@tonic-gate } 9370Sstevel@tonic-gate 9380Sstevel@tonic-gate /* Make sure the interface supports multicast */ 9390Sstevel@tonic-gate if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) { 9400Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9410Sstevel@tonic-gate ipif_refrele(ipif); 9420Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) { 9433448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 9443448Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS; 9453448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9460Sstevel@tonic-gate } 9470Sstevel@tonic-gate return (EOPNOTSUPP); 9480Sstevel@tonic-gate } 9490Sstevel@tonic-gate /* Enable promiscuous reception of all IP mcasts from the if */ 9500Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 95111042SErik.Nordmark@Sun.COM 95211042SErik.Nordmark@Sun.COM ill = ipif->ipif_ill; 95311042SErik.Nordmark@Sun.COM if (IS_UNDER_IPMP(ill)) 95411042SErik.Nordmark@Sun.COM ill = ipmp_ill_hold_ipmp_ill(ill); 95511042SErik.Nordmark@Sun.COM 95611042SErik.Nordmark@Sun.COM if (ill == NULL) { 95711042SErik.Nordmark@Sun.COM ilm = NULL; 95811042SErik.Nordmark@Sun.COM } else { 95911042SErik.Nordmark@Sun.COM ilm = ip_addmulti(&ipv6_all_zeros, ill, 96011042SErik.Nordmark@Sun.COM ipif->ipif_zoneid, &error); 96111042SErik.Nordmark@Sun.COM if (ilm != NULL) 96211042SErik.Nordmark@Sun.COM atomic_inc_32(&ill->ill_mrouter_cnt); 96311042SErik.Nordmark@Sun.COM if (IS_UNDER_IPMP(ipif->ipif_ill)) { 96411042SErik.Nordmark@Sun.COM ill_refrele(ill); 96511042SErik.Nordmark@Sun.COM ill = ipif->ipif_ill; 96611042SErik.Nordmark@Sun.COM } 96711042SErik.Nordmark@Sun.COM } 96811042SErik.Nordmark@Sun.COM 9690Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 9700Sstevel@tonic-gate /* 9710Sstevel@tonic-gate * since we released the lock lets make sure that 9720Sstevel@tonic-gate * ip_mrouter_done() has not been called. 9730Sstevel@tonic-gate */ 97411042SErik.Nordmark@Sun.COM if (ilm == NULL || is_mrouter_off(ipst)) { 97511042SErik.Nordmark@Sun.COM if (ilm != NULL) { 97611042SErik.Nordmark@Sun.COM (void) ip_delmulti(ilm); 97711042SErik.Nordmark@Sun.COM ASSERT(ill->ill_mrouter_cnt > 0); 97811042SErik.Nordmark@Sun.COM atomic_dec_32(&ill->ill_mrouter_cnt); 97911042SErik.Nordmark@Sun.COM } 9800Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) { 9813448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 9823448Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS; 9833448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9840Sstevel@tonic-gate } 9850Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9860Sstevel@tonic-gate ipif_refrele(ipif); 9870Sstevel@tonic-gate return (error?error:EINVAL); 9880Sstevel@tonic-gate } 98911042SErik.Nordmark@Sun.COM vifp->v_ilm = ilm; 9900Sstevel@tonic-gate } 9910Sstevel@tonic-gate /* Define parameters for the tbf structure */ 9920Sstevel@tonic-gate vifp->v_tbf = v_tbf; 9930Sstevel@tonic-gate gethrestime(&vifp->v_tbf->tbf_last_pkt_t); 9940Sstevel@tonic-gate vifp->v_tbf->tbf_n_tok = 0; 9950Sstevel@tonic-gate vifp->v_tbf->tbf_q_len = 0; 9960Sstevel@tonic-gate vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 9970Sstevel@tonic-gate vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 9980Sstevel@tonic-gate 9990Sstevel@tonic-gate vifp->v_flags = vifcp->vifc_flags; 10000Sstevel@tonic-gate vifp->v_threshold = vifcp->vifc_threshold; 10010Sstevel@tonic-gate vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 10020Sstevel@tonic-gate vifp->v_ipif = ipif; 10030Sstevel@tonic-gate ipif_refrele(ipif); 10040Sstevel@tonic-gate /* Scaling up here, allows division by 1024 in critical code. */ 10050Sstevel@tonic-gate vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000); 10060Sstevel@tonic-gate vifp->v_timeout_id = 0; 10070Sstevel@tonic-gate /* initialize per vif pkt counters */ 10080Sstevel@tonic-gate vifp->v_pkt_in = 0; 10090Sstevel@tonic-gate vifp->v_pkt_out = 0; 10100Sstevel@tonic-gate vifp->v_bytes_in = 0; 10110Sstevel@tonic-gate vifp->v_bytes_out = 0; 10120Sstevel@tonic-gate mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL); 10130Sstevel@tonic-gate 10140Sstevel@tonic-gate /* Adjust numvifs up, if the vifi is higher than numvifs */ 10153448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 10163448Sdh155122 if (ipst->ips_numvifs <= vifcp->vifc_vifi) 10173448Sdh155122 ipst->ips_numvifs = vifcp->vifc_vifi + 1; 10183448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 10193448Sdh155122 10203448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 10215240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 10220Sstevel@tonic-gate "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d", 10230Sstevel@tonic-gate vifcp->vifc_vifi, 10240Sstevel@tonic-gate ntohl(vifcp->vifc_lcl_addr.s_addr), 10250Sstevel@tonic-gate (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 10260Sstevel@tonic-gate ntohl(vifcp->vifc_rmt_addr.s_addr), 10270Sstevel@tonic-gate vifcp->vifc_threshold, vifcp->vifc_rate_limit); 10280Sstevel@tonic-gate } 10290Sstevel@tonic-gate 10300Sstevel@tonic-gate vifp->v_marks = VIF_MARK_GOOD; 10310Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 10320Sstevel@tonic-gate return (0); 10330Sstevel@tonic-gate } 10340Sstevel@tonic-gate 10350Sstevel@tonic-gate 10360Sstevel@tonic-gate /* Delete a vif from the vif table. */ 10370Sstevel@tonic-gate static void 10380Sstevel@tonic-gate del_vifp(struct vif *vifp) 10390Sstevel@tonic-gate { 10400Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 10410Sstevel@tonic-gate mblk_t *mp0; 10420Sstevel@tonic-gate vifi_t vifi; 10433448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 10445240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 10450Sstevel@tonic-gate 10460Sstevel@tonic-gate ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED); 10470Sstevel@tonic-gate ASSERT(t != NULL); 10480Sstevel@tonic-gate 10493448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 10505240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 10510Sstevel@tonic-gate "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr); 10520Sstevel@tonic-gate } 10530Sstevel@tonic-gate 10540Sstevel@tonic-gate if (vifp->v_timeout_id != 0) { 10550Sstevel@tonic-gate (void) untimeout(vifp->v_timeout_id); 10560Sstevel@tonic-gate vifp->v_timeout_id = 0; 10570Sstevel@tonic-gate } 10580Sstevel@tonic-gate 10590Sstevel@tonic-gate /* 10600Sstevel@tonic-gate * Free packets queued at the interface. 10610Sstevel@tonic-gate * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc. 10620Sstevel@tonic-gate */ 10630Sstevel@tonic-gate mutex_enter(&t->tbf_lock); 10640Sstevel@tonic-gate while (t->tbf_q != NULL) { 10650Sstevel@tonic-gate mp0 = t->tbf_q; 10660Sstevel@tonic-gate t->tbf_q = t->tbf_q->b_next; 10670Sstevel@tonic-gate mp0->b_prev = mp0->b_next = NULL; 10680Sstevel@tonic-gate freemsg(mp0); 10690Sstevel@tonic-gate } 10700Sstevel@tonic-gate mutex_exit(&t->tbf_lock); 10710Sstevel@tonic-gate 10720Sstevel@tonic-gate /* 10730Sstevel@tonic-gate * Always clear cache when vifs change. 10740Sstevel@tonic-gate * No need to get last_encap_lock since we are running as a writer. 10750Sstevel@tonic-gate */ 10763448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 10773448Sdh155122 if (vifp == ipst->ips_last_encap_vif) { 10783448Sdh155122 ipst->ips_last_encap_vif = NULL; 10793448Sdh155122 ipst->ips_last_encap_src = 0; 10800Sstevel@tonic-gate } 10813448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 10820Sstevel@tonic-gate 10830Sstevel@tonic-gate mutex_destroy(&t->tbf_lock); 10840Sstevel@tonic-gate 10850Sstevel@tonic-gate bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf))); 10860Sstevel@tonic-gate 10870Sstevel@tonic-gate /* Adjust numvifs down */ 10883448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 10893448Sdh155122 for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */ 10903448Sdh155122 if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0) 10910Sstevel@tonic-gate break; 10923448Sdh155122 ipst->ips_numvifs = vifi; 10933448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 10940Sstevel@tonic-gate 10950Sstevel@tonic-gate bzero(vifp, sizeof (*vifp)); 10960Sstevel@tonic-gate } 10970Sstevel@tonic-gate 10980Sstevel@tonic-gate static int 109911042SErik.Nordmark@Sun.COM del_vif(vifi_t *vifip, ip_stack_t *ipst) 11000Sstevel@tonic-gate { 11013448Sdh155122 struct vif *vifp = ipst->ips_vifs + *vifip; 11020Sstevel@tonic-gate 11033448Sdh155122 if (*vifip >= ipst->ips_numvifs) 11040Sstevel@tonic-gate return (EINVAL); 11050Sstevel@tonic-gate 11060Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 11070Sstevel@tonic-gate /* 11080Sstevel@tonic-gate * Not initialized 11090Sstevel@tonic-gate * Here we are not looking at the vif that is being initialized 11100Sstevel@tonic-gate * i.e vifp->v_marks == 0 and refcnt > 0. 11110Sstevel@tonic-gate */ 11120Sstevel@tonic-gate if (vifp->v_lcl_addr.s_addr == 0 || 11130Sstevel@tonic-gate !(vifp->v_marks & VIF_MARK_GOOD)) { 11140Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 11150Sstevel@tonic-gate return (EADDRNOTAVAIL); 11160Sstevel@tonic-gate } 11170Sstevel@tonic-gate 11180Sstevel@tonic-gate /* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */ 11190Sstevel@tonic-gate vifp->v_marks &= ~VIF_MARK_GOOD; 11200Sstevel@tonic-gate vifp->v_marks |= VIF_MARK_CONDEMNED; 11210Sstevel@tonic-gate 11220Sstevel@tonic-gate /* Phyint only */ 11230Sstevel@tonic-gate if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 11240Sstevel@tonic-gate ipif_t *ipif = vifp->v_ipif; 112511042SErik.Nordmark@Sun.COM ilm_t *ilm = vifp->v_ilm; 112611042SErik.Nordmark@Sun.COM 112711042SErik.Nordmark@Sun.COM vifp->v_ilm = NULL; 112811042SErik.Nordmark@Sun.COM 11290Sstevel@tonic-gate ASSERT(ipif != NULL); 11300Sstevel@tonic-gate /* 11310Sstevel@tonic-gate * should be OK to drop the lock as we 11320Sstevel@tonic-gate * have marked this as CONDEMNED. 11330Sstevel@tonic-gate */ 11340Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); 113511042SErik.Nordmark@Sun.COM if (ilm != NULL) { 113611042SErik.Nordmark@Sun.COM (void) ip_delmulti(ilm); 113711042SErik.Nordmark@Sun.COM ASSERT(ipif->ipif_ill->ill_mrouter_cnt > 0); 113811042SErik.Nordmark@Sun.COM atomic_dec_32(&ipif->ipif_ill->ill_mrouter_cnt); 113911042SErik.Nordmark@Sun.COM } 11400Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); 11410Sstevel@tonic-gate } 11420Sstevel@tonic-gate 114311042SErik.Nordmark@Sun.COM if (vifp->v_flags & VIFF_REGISTER) { 114411042SErik.Nordmark@Sun.COM mutex_enter(&ipst->ips_numvifs_mutex); 114511042SErik.Nordmark@Sun.COM ipst->ips_reg_vif_num = ALL_VIFS; 114611042SErik.Nordmark@Sun.COM mutex_exit(&ipst->ips_numvifs_mutex); 114711042SErik.Nordmark@Sun.COM } 114811042SErik.Nordmark@Sun.COM 11490Sstevel@tonic-gate /* 11500Sstevel@tonic-gate * decreases the refcnt added in add_vif. 11510Sstevel@tonic-gate */ 11520Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 11530Sstevel@tonic-gate return (0); 11540Sstevel@tonic-gate } 11550Sstevel@tonic-gate 11560Sstevel@tonic-gate /* 11570Sstevel@tonic-gate * Add an mfc entry. 11580Sstevel@tonic-gate */ 11590Sstevel@tonic-gate static int 11603448Sdh155122 add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) 11610Sstevel@tonic-gate { 11620Sstevel@tonic-gate struct mfc *rt; 11630Sstevel@tonic-gate struct rtdetq *rte; 11640Sstevel@tonic-gate ushort_t nstl; 11650Sstevel@tonic-gate int i; 11660Sstevel@tonic-gate struct mfcb *mfcbp; 11675240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 11680Sstevel@tonic-gate 11690Sstevel@tonic-gate /* 11700Sstevel@tonic-gate * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted 11710Sstevel@tonic-gate * did not have a real route for pkt. 11720Sstevel@tonic-gate * We want this pkt without rt installed in the mfctable to prevent 11730Sstevel@tonic-gate * multiiple tries, so go ahead and put it in mfctable, it will 11740Sstevel@tonic-gate * be discarded later in ip_mdq() because the child is NULL. 11750Sstevel@tonic-gate */ 11760Sstevel@tonic-gate 11770Sstevel@tonic-gate /* Error checking, out of bounds? */ 11780Sstevel@tonic-gate if (mfccp->mfcc_parent > MAXVIFS) { 11790Sstevel@tonic-gate ip0dbg(("ADD_MFC: mfcc_parent out of range %d", 11800Sstevel@tonic-gate (int)mfccp->mfcc_parent)); 11810Sstevel@tonic-gate return (EINVAL); 11820Sstevel@tonic-gate } 11830Sstevel@tonic-gate 11840Sstevel@tonic-gate if ((mfccp->mfcc_parent != NO_VIF) && 11853448Sdh155122 (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) { 11860Sstevel@tonic-gate ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n", 11870Sstevel@tonic-gate (int)mfccp->mfcc_parent)); 11880Sstevel@tonic-gate return (EINVAL); 11890Sstevel@tonic-gate } 11900Sstevel@tonic-gate 11913448Sdh155122 if (is_mrouter_off(ipst)) { 11920Sstevel@tonic-gate return (EINVAL); 11930Sstevel@tonic-gate } 11940Sstevel@tonic-gate 11953448Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr, 11960Sstevel@tonic-gate mfccp->mfcc_mcastgrp.s_addr)]; 11970Sstevel@tonic-gate MFCB_REFHOLD(mfcbp); 11980Sstevel@tonic-gate MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr, 11990Sstevel@tonic-gate mfccp->mfcc_mcastgrp.s_addr, rt); 12000Sstevel@tonic-gate 12010Sstevel@tonic-gate /* If an entry already exists, just update the fields */ 12020Sstevel@tonic-gate if (rt) { 12033448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 12045240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 12050Sstevel@tonic-gate "add_mfc: update o %x grp %x parent %x", 12060Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 12070Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 12080Sstevel@tonic-gate mfccp->mfcc_parent); 12090Sstevel@tonic-gate } 12100Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 12110Sstevel@tonic-gate rt->mfc_parent = mfccp->mfcc_parent; 12120Sstevel@tonic-gate 12133448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 12143448Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) 12150Sstevel@tonic-gate rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 12163448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 12170Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 12180Sstevel@tonic-gate 12190Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 12200Sstevel@tonic-gate return (0); 12210Sstevel@tonic-gate } 12220Sstevel@tonic-gate 12230Sstevel@tonic-gate /* 12240Sstevel@tonic-gate * Find the entry for which the upcall was made and update. 12250Sstevel@tonic-gate */ 12260Sstevel@tonic-gate for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) { 12270Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 12280Sstevel@tonic-gate if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 12290Sstevel@tonic-gate (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 12300Sstevel@tonic-gate (rt->mfc_rte != NULL) && 12310Sstevel@tonic-gate !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 12320Sstevel@tonic-gate if (nstl++ != 0) 12330Sstevel@tonic-gate cmn_err(CE_WARN, 12340Sstevel@tonic-gate "add_mfc: %s o %x g %x p %x", 12350Sstevel@tonic-gate "multiple kernel entries", 12360Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 12370Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 12380Sstevel@tonic-gate mfccp->mfcc_parent); 12390Sstevel@tonic-gate 12403448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 12415240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 12423448Sdh155122 SL_TRACE, 12430Sstevel@tonic-gate "add_mfc: o %x g %x p %x", 12440Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 12450Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 12460Sstevel@tonic-gate mfccp->mfcc_parent); 12470Sstevel@tonic-gate } 12483448Sdh155122 fill_route(rt, mfccp, ipst); 12490Sstevel@tonic-gate 12500Sstevel@tonic-gate /* 12510Sstevel@tonic-gate * Prevent cleanup of cache entry. 12520Sstevel@tonic-gate * Timer starts in ip_mforward. 12530Sstevel@tonic-gate */ 12540Sstevel@tonic-gate if (rt->mfc_timeout_id != 0) { 12550Sstevel@tonic-gate timeout_id_t id; 12560Sstevel@tonic-gate id = rt->mfc_timeout_id; 12570Sstevel@tonic-gate /* 12580Sstevel@tonic-gate * setting id to zero will avoid this 12590Sstevel@tonic-gate * entry from being cleaned up in 12600Sstevel@tonic-gate * expire_up_calls(). 12610Sstevel@tonic-gate */ 12620Sstevel@tonic-gate rt->mfc_timeout_id = 0; 12630Sstevel@tonic-gate /* 12640Sstevel@tonic-gate * dropping the lock is fine as we 12650Sstevel@tonic-gate * have a refhold on the bucket. 12660Sstevel@tonic-gate * so mfc cannot be freed. 12670Sstevel@tonic-gate * The timeout can fire but it will see 12680Sstevel@tonic-gate * that mfc_timeout_id == 0 and not cleanup. 12690Sstevel@tonic-gate */ 12700Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 12710Sstevel@tonic-gate (void) untimeout(id); 12720Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 12730Sstevel@tonic-gate } 12740Sstevel@tonic-gate 12750Sstevel@tonic-gate /* 12760Sstevel@tonic-gate * Send all pkts that are queued waiting for the upcall. 12770Sstevel@tonic-gate * ip_mdq param tun set to 0 - 12780Sstevel@tonic-gate * the return value of ip_mdq() isn't used here, 12790Sstevel@tonic-gate * so value we send doesn't matter. 12800Sstevel@tonic-gate */ 12810Sstevel@tonic-gate while (rt->mfc_rte != NULL) { 12820Sstevel@tonic-gate rte = rt->mfc_rte; 12830Sstevel@tonic-gate rt->mfc_rte = rte->rte_next; 12840Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 12850Sstevel@tonic-gate (void) ip_mdq(rte->mp, (ipha_t *) 12860Sstevel@tonic-gate rte->mp->b_rptr, rte->ill, 0, rt); 12870Sstevel@tonic-gate freemsg(rte->mp); 12880Sstevel@tonic-gate mi_free((char *)rte); 12890Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 12900Sstevel@tonic-gate } 12910Sstevel@tonic-gate } 12920Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 12930Sstevel@tonic-gate } 12940Sstevel@tonic-gate 12950Sstevel@tonic-gate 12960Sstevel@tonic-gate /* 12970Sstevel@tonic-gate * It is possible that an entry is being inserted without an upcall 12980Sstevel@tonic-gate */ 12990Sstevel@tonic-gate if (nstl == 0) { 13000Sstevel@tonic-gate mutex_enter(&(mfcbp->mfcb_lock)); 13013448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 13025240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 13030Sstevel@tonic-gate "add_mfc: no upcall o %x g %x p %x", 13040Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 13050Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 13060Sstevel@tonic-gate mfccp->mfcc_parent); 13070Sstevel@tonic-gate } 13083448Sdh155122 if (is_mrouter_off(ipst)) { 13090Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 13100Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 13110Sstevel@tonic-gate return (EINVAL); 13120Sstevel@tonic-gate } 13130Sstevel@tonic-gate 13140Sstevel@tonic-gate for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) { 13150Sstevel@tonic-gate 13160Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 13170Sstevel@tonic-gate if ((rt->mfc_origin.s_addr == 13180Sstevel@tonic-gate mfccp->mfcc_origin.s_addr) && 13190Sstevel@tonic-gate (rt->mfc_mcastgrp.s_addr == 13205240Snordmark mfccp->mfcc_mcastgrp.s_addr) && 13215240Snordmark (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) { 13223448Sdh155122 fill_route(rt, mfccp, ipst); 13230Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13240Sstevel@tonic-gate break; 13250Sstevel@tonic-gate } 13260Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13270Sstevel@tonic-gate } 13280Sstevel@tonic-gate 13290Sstevel@tonic-gate /* No upcall, so make a new entry into mfctable */ 13300Sstevel@tonic-gate if (rt == NULL) { 13310Sstevel@tonic-gate rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 13320Sstevel@tonic-gate if (rt == NULL) { 13330Sstevel@tonic-gate ip1dbg(("add_mfc: out of memory\n")); 13340Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 13350Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 13360Sstevel@tonic-gate return (ENOBUFS); 13370Sstevel@tonic-gate } 13380Sstevel@tonic-gate 13390Sstevel@tonic-gate /* Insert new entry at head of hash chain */ 13400Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 13413448Sdh155122 fill_route(rt, mfccp, ipst); 13420Sstevel@tonic-gate 13430Sstevel@tonic-gate /* Link into table */ 13440Sstevel@tonic-gate rt->mfc_next = mfcbp->mfcb_mfc; 13450Sstevel@tonic-gate mfcbp->mfcb_mfc = rt; 13460Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13470Sstevel@tonic-gate } 13480Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 13490Sstevel@tonic-gate } 13500Sstevel@tonic-gate 13510Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 13520Sstevel@tonic-gate return (0); 13530Sstevel@tonic-gate } 13540Sstevel@tonic-gate 13550Sstevel@tonic-gate /* 13560Sstevel@tonic-gate * Fills in mfc structure from mrouted mfcctl. 13570Sstevel@tonic-gate */ 13580Sstevel@tonic-gate static void 13593448Sdh155122 fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst) 13600Sstevel@tonic-gate { 13610Sstevel@tonic-gate int i; 13620Sstevel@tonic-gate 13630Sstevel@tonic-gate rt->mfc_origin = mfccp->mfcc_origin; 13640Sstevel@tonic-gate rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 13650Sstevel@tonic-gate rt->mfc_parent = mfccp->mfcc_parent; 13663448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 13673448Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) { 13680Sstevel@tonic-gate rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 13690Sstevel@tonic-gate } 13703448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 13710Sstevel@tonic-gate /* Initialize pkt counters per src-grp */ 13720Sstevel@tonic-gate rt->mfc_pkt_cnt = 0; 13730Sstevel@tonic-gate rt->mfc_byte_cnt = 0; 13740Sstevel@tonic-gate rt->mfc_wrong_if = 0; 13750Sstevel@tonic-gate rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0; 13760Sstevel@tonic-gate 13770Sstevel@tonic-gate } 13780Sstevel@tonic-gate 13790Sstevel@tonic-gate static void 13800Sstevel@tonic-gate free_queue(struct mfc *mfcp) 13810Sstevel@tonic-gate { 13820Sstevel@tonic-gate struct rtdetq *rte0; 13830Sstevel@tonic-gate 13840Sstevel@tonic-gate /* 13850Sstevel@tonic-gate * Drop all queued upcall packets. 13860Sstevel@tonic-gate * Free the mbuf with the pkt. 13870Sstevel@tonic-gate */ 13880Sstevel@tonic-gate while ((rte0 = mfcp->mfc_rte) != NULL) { 13890Sstevel@tonic-gate mfcp->mfc_rte = rte0->rte_next; 13900Sstevel@tonic-gate freemsg(rte0->mp); 13910Sstevel@tonic-gate mi_free((char *)rte0); 13920Sstevel@tonic-gate } 13930Sstevel@tonic-gate } 13940Sstevel@tonic-gate /* 13950Sstevel@tonic-gate * go thorugh the hash bucket and free all the entries marked condemned. 13960Sstevel@tonic-gate */ 13970Sstevel@tonic-gate void 13980Sstevel@tonic-gate release_mfc(struct mfcb *mfcbp) 13990Sstevel@tonic-gate { 14000Sstevel@tonic-gate struct mfc *current_mfcp; 14010Sstevel@tonic-gate struct mfc *prev_mfcp; 14020Sstevel@tonic-gate 14030Sstevel@tonic-gate prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 14040Sstevel@tonic-gate 14050Sstevel@tonic-gate while (current_mfcp != NULL) { 14060Sstevel@tonic-gate if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) { 14070Sstevel@tonic-gate if (current_mfcp == mfcbp->mfcb_mfc) { 14080Sstevel@tonic-gate mfcbp->mfcb_mfc = current_mfcp->mfc_next; 14090Sstevel@tonic-gate free_queue(current_mfcp); 14100Sstevel@tonic-gate mi_free(current_mfcp); 14110Sstevel@tonic-gate prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 14120Sstevel@tonic-gate continue; 14130Sstevel@tonic-gate } 14140Sstevel@tonic-gate ASSERT(prev_mfcp != NULL); 14150Sstevel@tonic-gate prev_mfcp->mfc_next = current_mfcp->mfc_next; 14160Sstevel@tonic-gate free_queue(current_mfcp); 14170Sstevel@tonic-gate mi_free(current_mfcp); 14180Sstevel@tonic-gate current_mfcp = NULL; 14190Sstevel@tonic-gate } else { 14200Sstevel@tonic-gate prev_mfcp = current_mfcp; 14210Sstevel@tonic-gate } 14220Sstevel@tonic-gate 14230Sstevel@tonic-gate current_mfcp = prev_mfcp->mfc_next; 14240Sstevel@tonic-gate 14250Sstevel@tonic-gate } 14260Sstevel@tonic-gate mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED; 14270Sstevel@tonic-gate ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0); 14280Sstevel@tonic-gate } 14290Sstevel@tonic-gate 14300Sstevel@tonic-gate /* 14310Sstevel@tonic-gate * Delete an mfc entry. 14320Sstevel@tonic-gate */ 14330Sstevel@tonic-gate static int 14343448Sdh155122 del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) 14350Sstevel@tonic-gate { 14360Sstevel@tonic-gate struct in_addr origin; 14370Sstevel@tonic-gate struct in_addr mcastgrp; 14385240Snordmark struct mfc *rt; 14395240Snordmark uint_t hash; 14405240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 14410Sstevel@tonic-gate 14420Sstevel@tonic-gate origin = mfccp->mfcc_origin; 14430Sstevel@tonic-gate mcastgrp = mfccp->mfcc_mcastgrp; 14440Sstevel@tonic-gate hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 14450Sstevel@tonic-gate 14463448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 14475240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 14480Sstevel@tonic-gate "del_mfc: o %x g %x", 14490Sstevel@tonic-gate ntohl(origin.s_addr), 14500Sstevel@tonic-gate ntohl(mcastgrp.s_addr)); 14510Sstevel@tonic-gate } 14520Sstevel@tonic-gate 14533448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]); 14540Sstevel@tonic-gate 14550Sstevel@tonic-gate /* Find mfc in mfctable, finds only entries without upcalls */ 14563448Sdh155122 for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) { 14570Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 14580Sstevel@tonic-gate if (origin.s_addr == rt->mfc_origin.s_addr && 14590Sstevel@tonic-gate mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 14600Sstevel@tonic-gate rt->mfc_rte == NULL && 14610Sstevel@tonic-gate !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) 14620Sstevel@tonic-gate break; 14630Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 14640Sstevel@tonic-gate } 14650Sstevel@tonic-gate 14660Sstevel@tonic-gate /* 14670Sstevel@tonic-gate * Return if there was an upcall (mfc_rte != NULL, 14680Sstevel@tonic-gate * or rt not in mfctable. 14690Sstevel@tonic-gate */ 14700Sstevel@tonic-gate if (rt == NULL) { 14713448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 14720Sstevel@tonic-gate return (EADDRNOTAVAIL); 14730Sstevel@tonic-gate } 14740Sstevel@tonic-gate 14750Sstevel@tonic-gate 14760Sstevel@tonic-gate /* 14770Sstevel@tonic-gate * no need to hold lock as we have a reference. 14780Sstevel@tonic-gate */ 14793448Sdh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 14800Sstevel@tonic-gate /* error checking */ 14810Sstevel@tonic-gate if (rt->mfc_timeout_id != 0) { 14820Sstevel@tonic-gate ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null")); 14830Sstevel@tonic-gate /* 14840Sstevel@tonic-gate * Its ok to drop the lock, the struct cannot be freed 14850Sstevel@tonic-gate * since we have a ref on the hash bucket. 14860Sstevel@tonic-gate */ 14870Sstevel@tonic-gate rt->mfc_timeout_id = 0; 14880Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 14890Sstevel@tonic-gate (void) untimeout(rt->mfc_timeout_id); 14900Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 14910Sstevel@tonic-gate } 14920Sstevel@tonic-gate 14930Sstevel@tonic-gate ASSERT(rt->mfc_rte == NULL); 14940Sstevel@tonic-gate 14950Sstevel@tonic-gate 14960Sstevel@tonic-gate /* 14970Sstevel@tonic-gate * Delete the entry from the cache 14980Sstevel@tonic-gate */ 14990Sstevel@tonic-gate rt->mfc_marks |= MFCB_MARK_CONDEMNED; 15000Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 15010Sstevel@tonic-gate 15023448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 15030Sstevel@tonic-gate 15040Sstevel@tonic-gate return (0); 15050Sstevel@tonic-gate } 15060Sstevel@tonic-gate 15070Sstevel@tonic-gate #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 15080Sstevel@tonic-gate 15090Sstevel@tonic-gate /* 15100Sstevel@tonic-gate * IP multicast forwarding function. This function assumes that the packet 15110Sstevel@tonic-gate * pointed to by ipha has arrived on (or is about to be sent to) the interface 15120Sstevel@tonic-gate * pointed to by "ill", and the packet is to be relayed to other networks 15130Sstevel@tonic-gate * that have members of the packet's destination IP multicast group. 15140Sstevel@tonic-gate * 15150Sstevel@tonic-gate * The packet is returned unscathed to the caller, unless it is 15160Sstevel@tonic-gate * erroneous, in which case a -1 value tells the caller (IP) 15170Sstevel@tonic-gate * to discard it. 15180Sstevel@tonic-gate * 15190Sstevel@tonic-gate * Unlike BSD, SunOS 5.x needs to return to IP info about 15200Sstevel@tonic-gate * whether pkt came in thru a tunnel, so it can be discarded, unless 15210Sstevel@tonic-gate * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try 15220Sstevel@tonic-gate * to be delivered. 15230Sstevel@tonic-gate * Return values are 0 - pkt is okay and phyint 15240Sstevel@tonic-gate * -1 - pkt is malformed and to be tossed 15250Sstevel@tonic-gate * 1 - pkt came in on tunnel 15260Sstevel@tonic-gate */ 15270Sstevel@tonic-gate int 152811042SErik.Nordmark@Sun.COM ip_mforward(mblk_t *mp, ip_recv_attr_t *ira) 15290Sstevel@tonic-gate { 153011042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)mp->b_rptr; 153111042SErik.Nordmark@Sun.COM ill_t *ill = ira->ira_ill; 15320Sstevel@tonic-gate struct mfc *rt; 15330Sstevel@tonic-gate ipaddr_t src, dst, tunnel_src = 0; 15340Sstevel@tonic-gate static int srctun = 0; 15350Sstevel@tonic-gate vifi_t vifi; 15360Sstevel@tonic-gate boolean_t pim_reg_packet = B_FALSE; 153711042SErik.Nordmark@Sun.COM struct mfcb *mfcbp; 15383448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 15395240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 154011042SErik.Nordmark@Sun.COM ill_t *rill = ira->ira_rill; 154111042SErik.Nordmark@Sun.COM 154211042SErik.Nordmark@Sun.COM ASSERT(ira->ira_pktlen == msgdsize(mp)); 15433448Sdh155122 15443448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 15455240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 15460Sstevel@tonic-gate "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s", 15470Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 15480Sstevel@tonic-gate ill->ill_name); 15490Sstevel@tonic-gate } 15500Sstevel@tonic-gate 15510Sstevel@tonic-gate dst = ipha->ipha_dst; 155211042SErik.Nordmark@Sun.COM if (ira->ira_flags & IRAF_PIM_REGISTER) 15530Sstevel@tonic-gate pim_reg_packet = B_TRUE; 155411042SErik.Nordmark@Sun.COM else if (ira->ira_flags & IRAF_MROUTE_TUNNEL_SET) 155511042SErik.Nordmark@Sun.COM tunnel_src = ira->ira_mroute_tunnel; 15560Sstevel@tonic-gate 15570Sstevel@tonic-gate /* 15580Sstevel@tonic-gate * Don't forward a packet with time-to-live of zero or one, 15590Sstevel@tonic-gate * or a packet destined to a local-only group. 15600Sstevel@tonic-gate */ 15610Sstevel@tonic-gate if (CLASSD(dst) && (ipha->ipha_ttl <= 1 || 15625240Snordmark (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) { 15633448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 15645240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 15650Sstevel@tonic-gate "ip_mforward: not forwarded ttl %d," 15660Sstevel@tonic-gate " dst 0x%x ill %s", 15670Sstevel@tonic-gate ipha->ipha_ttl, ntohl(dst), ill->ill_name); 15680Sstevel@tonic-gate } 15690Sstevel@tonic-gate if (tunnel_src != 0) 15700Sstevel@tonic-gate return (1); 15710Sstevel@tonic-gate else 15720Sstevel@tonic-gate return (0); 15730Sstevel@tonic-gate } 15740Sstevel@tonic-gate 15750Sstevel@tonic-gate if ((tunnel_src != 0) || pim_reg_packet) { 15760Sstevel@tonic-gate /* 15770Sstevel@tonic-gate * Packet arrived over an encapsulated tunnel or via a PIM 157811042SErik.Nordmark@Sun.COM * register message. 15790Sstevel@tonic-gate */ 15803448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 15810Sstevel@tonic-gate if (tunnel_src != 0) { 15825240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 15833448Sdh155122 SL_TRACE, 15840Sstevel@tonic-gate "ip_mforward: ill %s arrived via ENCAP TUN", 15850Sstevel@tonic-gate ill->ill_name); 15860Sstevel@tonic-gate } else if (pim_reg_packet) { 15875240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 15883448Sdh155122 SL_TRACE, 15890Sstevel@tonic-gate "ip_mforward: ill %s arrived via" 15900Sstevel@tonic-gate " REGISTER VIF", 15910Sstevel@tonic-gate ill->ill_name); 15920Sstevel@tonic-gate } 15930Sstevel@tonic-gate } 15940Sstevel@tonic-gate } else if ((ipha->ipha_version_and_hdr_length & 0xf) < 15950Sstevel@tonic-gate (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 || 15960Sstevel@tonic-gate ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) { 15970Sstevel@tonic-gate /* Packet arrived via a physical interface. */ 15983448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 15995240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 16000Sstevel@tonic-gate "ip_mforward: ill %s arrived via PHYINT", 16010Sstevel@tonic-gate ill->ill_name); 16020Sstevel@tonic-gate } 16030Sstevel@tonic-gate 16040Sstevel@tonic-gate } else { 16050Sstevel@tonic-gate /* 16060Sstevel@tonic-gate * Packet arrived through a SRCRT tunnel. 16070Sstevel@tonic-gate * Source-route tunnels are no longer supported. 16080Sstevel@tonic-gate * Error message printed every 1000 times. 16090Sstevel@tonic-gate */ 16100Sstevel@tonic-gate if ((srctun++ % 1000) == 0) { 16110Sstevel@tonic-gate cmn_err(CE_WARN, 16120Sstevel@tonic-gate "ip_mforward: received source-routed pkt from %x", 16130Sstevel@tonic-gate ntohl(ipha->ipha_src)); 16140Sstevel@tonic-gate } 16150Sstevel@tonic-gate return (-1); 16160Sstevel@tonic-gate } 16170Sstevel@tonic-gate 16183448Sdh155122 ipst->ips_mrtstat->mrts_fwd_in++; 16190Sstevel@tonic-gate src = ipha->ipha_src; 16200Sstevel@tonic-gate 16210Sstevel@tonic-gate /* Find route in cache, return NULL if not there or upcalls q'ed. */ 16220Sstevel@tonic-gate 16230Sstevel@tonic-gate /* 16240Sstevel@tonic-gate * Lock the mfctable against changes made by ip_mforward. 16250Sstevel@tonic-gate * Note that only add_mfc and del_mfc can remove entries and 16260Sstevel@tonic-gate * they run with exclusive access to IP. So we do not need to 16270Sstevel@tonic-gate * guard against the rt being deleted, so release lock after reading. 16280Sstevel@tonic-gate */ 16290Sstevel@tonic-gate 16303448Sdh155122 if (is_mrouter_off(ipst)) 16310Sstevel@tonic-gate return (-1); 16320Sstevel@tonic-gate 16333448Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)]; 16340Sstevel@tonic-gate MFCB_REFHOLD(mfcbp); 16350Sstevel@tonic-gate MFCFIND(mfcbp, src, dst, rt); 16360Sstevel@tonic-gate 16370Sstevel@tonic-gate /* Entry exists, so forward if necessary */ 16380Sstevel@tonic-gate if (rt != NULL) { 16390Sstevel@tonic-gate int ret = 0; 16403448Sdh155122 ipst->ips_mrtstat->mrts_mfc_hits++; 16410Sstevel@tonic-gate if (pim_reg_packet) { 16423448Sdh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); 16430Sstevel@tonic-gate ret = ip_mdq(mp, ipha, 16443448Sdh155122 ipst->ips_vifs[ipst->ips_reg_vif_num]. 16453448Sdh155122 v_ipif->ipif_ill, 16463448Sdh155122 0, rt); 16470Sstevel@tonic-gate } else { 16480Sstevel@tonic-gate ret = ip_mdq(mp, ipha, ill, tunnel_src, rt); 16490Sstevel@tonic-gate } 16500Sstevel@tonic-gate 16510Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 16520Sstevel@tonic-gate return (ret); 16530Sstevel@tonic-gate 16540Sstevel@tonic-gate /* 16550Sstevel@tonic-gate * Don't forward if we don't have a cache entry. Mrouted will 16560Sstevel@tonic-gate * always provide a cache entry in response to an upcall. 16570Sstevel@tonic-gate */ 16580Sstevel@tonic-gate } else { 16590Sstevel@tonic-gate /* 16600Sstevel@tonic-gate * If we don't have a route for packet's origin, make a copy 16610Sstevel@tonic-gate * of the packet and send message to routing daemon. 16620Sstevel@tonic-gate */ 16630Sstevel@tonic-gate struct mfc *mfc_rt = NULL; 16640Sstevel@tonic-gate mblk_t *mp0 = NULL; 16650Sstevel@tonic-gate mblk_t *mp_copy = NULL; 16660Sstevel@tonic-gate struct rtdetq *rte = NULL; 16670Sstevel@tonic-gate struct rtdetq *rte_m, *rte1, *prev_rte; 16680Sstevel@tonic-gate uint_t hash; 16690Sstevel@tonic-gate int npkts; 16700Sstevel@tonic-gate boolean_t new_mfc = B_FALSE; 16713448Sdh155122 ipst->ips_mrtstat->mrts_mfc_misses++; 16720Sstevel@tonic-gate /* BSD uses mrts_no_route++ */ 16733448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 16745240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 16750Sstevel@tonic-gate "ip_mforward: no rte ill %s src %x g %x misses %d", 16760Sstevel@tonic-gate ill->ill_name, ntohl(src), ntohl(dst), 16773448Sdh155122 (int)ipst->ips_mrtstat->mrts_mfc_misses); 16780Sstevel@tonic-gate } 16790Sstevel@tonic-gate /* 16800Sstevel@tonic-gate * The order of the following code differs from the BSD code. 16810Sstevel@tonic-gate * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x 16820Sstevel@tonic-gate * code works, so SunOS 5.x wasn't changed to conform to the 16830Sstevel@tonic-gate * BSD version. 16840Sstevel@tonic-gate */ 16850Sstevel@tonic-gate 16860Sstevel@tonic-gate /* Lock mfctable. */ 16870Sstevel@tonic-gate hash = MFCHASH(src, dst); 16883448Sdh155122 mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock)); 16890Sstevel@tonic-gate 16900Sstevel@tonic-gate /* 16910Sstevel@tonic-gate * If we are turning off mrouted return an error 16920Sstevel@tonic-gate */ 16933448Sdh155122 if (is_mrouter_off(ipst)) { 16940Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 16950Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 16960Sstevel@tonic-gate return (-1); 16970Sstevel@tonic-gate } 16980Sstevel@tonic-gate 16990Sstevel@tonic-gate /* Is there an upcall waiting for this packet? */ 17003448Sdh155122 for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt; 17010Sstevel@tonic-gate mfc_rt = mfc_rt->mfc_next) { 17020Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 17033448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 17045240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 17053448Sdh155122 SL_TRACE, 17060Sstevel@tonic-gate "ip_mforward: MFCTAB hash %d o 0x%x" 17070Sstevel@tonic-gate " g 0x%x\n", 17080Sstevel@tonic-gate hash, ntohl(mfc_rt->mfc_origin.s_addr), 17090Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 17100Sstevel@tonic-gate } 17110Sstevel@tonic-gate /* There is an upcall */ 17120Sstevel@tonic-gate if ((src == mfc_rt->mfc_origin.s_addr) && 17130Sstevel@tonic-gate (dst == mfc_rt->mfc_mcastgrp.s_addr) && 17140Sstevel@tonic-gate (mfc_rt->mfc_rte != NULL) && 17150Sstevel@tonic-gate !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 17160Sstevel@tonic-gate break; 17170Sstevel@tonic-gate } 17180Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 17190Sstevel@tonic-gate } 17200Sstevel@tonic-gate /* No upcall, so make a new entry into mfctable */ 17210Sstevel@tonic-gate if (mfc_rt == NULL) { 17220Sstevel@tonic-gate mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 17230Sstevel@tonic-gate if (mfc_rt == NULL) { 17243448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 17250Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory " 17260Sstevel@tonic-gate "for mfc, mfc_rt\n")); 17270Sstevel@tonic-gate goto error_return; 17280Sstevel@tonic-gate } else 17290Sstevel@tonic-gate new_mfc = B_TRUE; 17300Sstevel@tonic-gate /* Get resources */ 17310Sstevel@tonic-gate /* TODO could copy header and dup rest */ 17320Sstevel@tonic-gate mp_copy = copymsg(mp); 17330Sstevel@tonic-gate if (mp_copy == NULL) { 17343448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 17350Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for " 17360Sstevel@tonic-gate "mblk, mp_copy\n")); 17370Sstevel@tonic-gate goto error_return; 17380Sstevel@tonic-gate } 17390Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 17400Sstevel@tonic-gate } 17410Sstevel@tonic-gate /* Get resources for rte, whether first rte or not first. */ 17420Sstevel@tonic-gate /* Add this packet into rtdetq */ 17430Sstevel@tonic-gate rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq)); 17440Sstevel@tonic-gate if (rte == NULL) { 17453448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 17460Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 17470Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for" 17480Sstevel@tonic-gate " rtdetq, rte\n")); 17490Sstevel@tonic-gate goto error_return; 17500Sstevel@tonic-gate } 17510Sstevel@tonic-gate 17520Sstevel@tonic-gate mp0 = copymsg(mp); 17530Sstevel@tonic-gate if (mp0 == NULL) { 17543448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 17550Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for mblk, mp0\n")); 17560Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 17570Sstevel@tonic-gate goto error_return; 17580Sstevel@tonic-gate } 17590Sstevel@tonic-gate rte->mp = mp0; 17600Sstevel@tonic-gate if (pim_reg_packet) { 17613448Sdh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); 17623448Sdh155122 rte->ill = 17633448Sdh155122 ipst->ips_vifs[ipst->ips_reg_vif_num]. 17643448Sdh155122 v_ipif->ipif_ill; 17650Sstevel@tonic-gate } else { 17660Sstevel@tonic-gate rte->ill = ill; 17670Sstevel@tonic-gate } 17680Sstevel@tonic-gate rte->rte_next = NULL; 17690Sstevel@tonic-gate 17700Sstevel@tonic-gate /* 17710Sstevel@tonic-gate * Determine if upcall q (rtdetq) has overflowed. 17720Sstevel@tonic-gate * mfc_rt->mfc_rte is null by mi_zalloc 17730Sstevel@tonic-gate * if it is the first message. 17740Sstevel@tonic-gate */ 17750Sstevel@tonic-gate for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m; 17760Sstevel@tonic-gate rte_m = rte_m->rte_next) 17770Sstevel@tonic-gate npkts++; 17783448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 17795240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 17800Sstevel@tonic-gate "ip_mforward: upcalls %d\n", npkts); 17810Sstevel@tonic-gate } 17820Sstevel@tonic-gate if (npkts > MAX_UPQ) { 17833448Sdh155122 ipst->ips_mrtstat->mrts_upq_ovflw++; 17840Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 17850Sstevel@tonic-gate goto error_return; 17860Sstevel@tonic-gate } 17870Sstevel@tonic-gate 17880Sstevel@tonic-gate if (npkts == 0) { /* first upcall */ 17890Sstevel@tonic-gate int i = 0; 17900Sstevel@tonic-gate /* 17910Sstevel@tonic-gate * Now finish installing the new mfc! Now that we have 17920Sstevel@tonic-gate * resources! Insert new entry at head of hash chain. 17930Sstevel@tonic-gate * Use src and dst which are ipaddr_t's. 17940Sstevel@tonic-gate */ 17950Sstevel@tonic-gate mfc_rt->mfc_origin.s_addr = src; 17960Sstevel@tonic-gate mfc_rt->mfc_mcastgrp.s_addr = dst; 17970Sstevel@tonic-gate 17983448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 17993448Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) 18000Sstevel@tonic-gate mfc_rt->mfc_ttls[i] = 0; 18013448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 18020Sstevel@tonic-gate mfc_rt->mfc_parent = ALL_VIFS; 18030Sstevel@tonic-gate 18040Sstevel@tonic-gate /* Link into table */ 18053448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 18065240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 18073448Sdh155122 SL_TRACE, 18080Sstevel@tonic-gate "ip_mforward: NEW MFCTAB hash %d o 0x%x " 18090Sstevel@tonic-gate "g 0x%x\n", hash, 18100Sstevel@tonic-gate ntohl(mfc_rt->mfc_origin.s_addr), 18110Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 18120Sstevel@tonic-gate } 18133448Sdh155122 mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc; 18143448Sdh155122 ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt; 18150Sstevel@tonic-gate mfc_rt->mfc_rte = NULL; 18160Sstevel@tonic-gate } 18170Sstevel@tonic-gate 18180Sstevel@tonic-gate /* Link in the upcall */ 18190Sstevel@tonic-gate /* First upcall */ 18200Sstevel@tonic-gate if (mfc_rt->mfc_rte == NULL) 18210Sstevel@tonic-gate mfc_rt->mfc_rte = rte; 18220Sstevel@tonic-gate else { 18230Sstevel@tonic-gate /* not the first upcall */ 18240Sstevel@tonic-gate prev_rte = mfc_rt->mfc_rte; 18250Sstevel@tonic-gate for (rte1 = mfc_rt->mfc_rte->rte_next; rte1; 18265240Snordmark prev_rte = rte1, rte1 = rte1->rte_next) 18275240Snordmark ; 18280Sstevel@tonic-gate prev_rte->rte_next = rte; 18290Sstevel@tonic-gate } 18300Sstevel@tonic-gate 18310Sstevel@tonic-gate /* 18320Sstevel@tonic-gate * No upcalls waiting, this is first one, so send a message to 18330Sstevel@tonic-gate * routing daemon to install a route into kernel table. 18340Sstevel@tonic-gate */ 18350Sstevel@tonic-gate if (npkts == 0) { 18360Sstevel@tonic-gate struct igmpmsg *im; 18370Sstevel@tonic-gate /* ipha_protocol is 0, for upcall */ 18380Sstevel@tonic-gate ASSERT(mp_copy != NULL); 18390Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr; 18400Sstevel@tonic-gate im->im_msgtype = IGMPMSG_NOCACHE; 18410Sstevel@tonic-gate im->im_mbz = 0; 18423448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 18430Sstevel@tonic-gate if (pim_reg_packet) { 18443448Sdh155122 im->im_vif = (uchar_t)ipst->ips_reg_vif_num; 18453448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 18460Sstevel@tonic-gate } else { 18470Sstevel@tonic-gate /* 18480Sstevel@tonic-gate * XXX do we need to hold locks here ? 18490Sstevel@tonic-gate */ 18503448Sdh155122 for (vifi = 0; 18513448Sdh155122 vifi < ipst->ips_numvifs; 18523448Sdh155122 vifi++) { 18533448Sdh155122 if (ipst->ips_vifs[vifi].v_ipif == NULL) 18540Sstevel@tonic-gate continue; 18553448Sdh155122 if (ipst->ips_vifs[vifi]. 18563448Sdh155122 v_ipif->ipif_ill == ill) { 18570Sstevel@tonic-gate im->im_vif = (uchar_t)vifi; 18580Sstevel@tonic-gate break; 18590Sstevel@tonic-gate } 18600Sstevel@tonic-gate } 18613448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 18623448Sdh155122 ASSERT(vifi < ipst->ips_numvifs); 18630Sstevel@tonic-gate } 18640Sstevel@tonic-gate 18653448Sdh155122 ipst->ips_mrtstat->mrts_upcalls++; 18660Sstevel@tonic-gate /* Timer to discard upcalls if mrouted is too slow */ 18670Sstevel@tonic-gate mfc_rt->mfc_timeout_id = timeout(expire_upcalls, 18680Sstevel@tonic-gate mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE); 18690Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 18703448Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 18715240Snordmark /* Pass to RAWIP */ 187211042SErik.Nordmark@Sun.COM ira->ira_ill = ira->ira_rill = NULL; 187311042SErik.Nordmark@Sun.COM (mrouter->conn_recv)(mrouter, mp_copy, NULL, ira); 187411042SErik.Nordmark@Sun.COM ira->ira_ill = ill; 187511042SErik.Nordmark@Sun.COM ira->ira_rill = rill; 18760Sstevel@tonic-gate } else { 18770Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 18783448Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 187911042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 188011042SErik.Nordmark@Sun.COM ip_drop_input("ip_mforward - upcall already waiting", 188111042SErik.Nordmark@Sun.COM mp_copy, ill); 18820Sstevel@tonic-gate freemsg(mp_copy); 18830Sstevel@tonic-gate } 18840Sstevel@tonic-gate 18850Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 18860Sstevel@tonic-gate if (tunnel_src != 0) 18870Sstevel@tonic-gate return (1); 18880Sstevel@tonic-gate else 18890Sstevel@tonic-gate return (0); 18900Sstevel@tonic-gate error_return: 18913448Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 18920Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 18930Sstevel@tonic-gate if (mfc_rt != NULL && (new_mfc == B_TRUE)) 18940Sstevel@tonic-gate mi_free((char *)mfc_rt); 18950Sstevel@tonic-gate if (rte != NULL) 18960Sstevel@tonic-gate mi_free((char *)rte); 189711042SErik.Nordmark@Sun.COM if (mp_copy != NULL) { 189811042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 189911042SErik.Nordmark@Sun.COM ip_drop_input("ip_mforward error", mp_copy, ill); 19000Sstevel@tonic-gate freemsg(mp_copy); 190111042SErik.Nordmark@Sun.COM } 19020Sstevel@tonic-gate if (mp0 != NULL) 19030Sstevel@tonic-gate freemsg(mp0); 19040Sstevel@tonic-gate return (-1); 19050Sstevel@tonic-gate } 19060Sstevel@tonic-gate } 19070Sstevel@tonic-gate 19080Sstevel@tonic-gate /* 19090Sstevel@tonic-gate * Clean up the mfctable cache entry if upcall is not serviced. 19100Sstevel@tonic-gate * SunOS 5.x has timeout per mfc, unlike BSD which has one timer. 19110Sstevel@tonic-gate */ 19120Sstevel@tonic-gate static void 19130Sstevel@tonic-gate expire_upcalls(void *arg) 19140Sstevel@tonic-gate { 19150Sstevel@tonic-gate struct mfc *mfc_rt = arg; 19160Sstevel@tonic-gate uint_t hash; 19170Sstevel@tonic-gate struct mfc *prev_mfc, *mfc0; 19183448Sdh155122 ip_stack_t *ipst; 19195240Snordmark conn_t *mrouter; 19203448Sdh155122 19213448Sdh155122 if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) { 19223448Sdh155122 cmn_err(CE_WARN, "expire_upcalls: no ILL\n"); 19233448Sdh155122 return; 19243448Sdh155122 } 19253448Sdh155122 ipst = mfc_rt->mfc_rte->ill->ill_ipst; 19265240Snordmark mrouter = ipst->ips_ip_g_mrouter; 19270Sstevel@tonic-gate 19280Sstevel@tonic-gate hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr); 19293448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 19305240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 19310Sstevel@tonic-gate "expire_upcalls: hash %d s %x g %x", 19320Sstevel@tonic-gate hash, ntohl(mfc_rt->mfc_origin.s_addr), 19330Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 19340Sstevel@tonic-gate } 19353448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]); 19360Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 19370Sstevel@tonic-gate /* 19380Sstevel@tonic-gate * if timeout has been set to zero, than the 19390Sstevel@tonic-gate * entry has been filled, no need to delete it. 19400Sstevel@tonic-gate */ 19410Sstevel@tonic-gate if (mfc_rt->mfc_timeout_id == 0) 19420Sstevel@tonic-gate goto done; 19433448Sdh155122 ipst->ips_mrtstat->mrts_cache_cleanups++; 19440Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0; 19450Sstevel@tonic-gate 19460Sstevel@tonic-gate /* Determine entry to be cleaned up in cache table. */ 19473448Sdh155122 for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0; 19480Sstevel@tonic-gate prev_mfc = mfc0, mfc0 = mfc0->mfc_next) 19490Sstevel@tonic-gate if (mfc0 == mfc_rt) 19500Sstevel@tonic-gate break; 19510Sstevel@tonic-gate 19520Sstevel@tonic-gate /* del_mfc takes care of gone mfcs */ 19530Sstevel@tonic-gate ASSERT(prev_mfc != NULL); 19540Sstevel@tonic-gate ASSERT(mfc0 != NULL); 19550Sstevel@tonic-gate 19560Sstevel@tonic-gate /* 19570Sstevel@tonic-gate * Delete the entry from the cache 19580Sstevel@tonic-gate */ 19593448Sdh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 19600Sstevel@tonic-gate mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 19610Sstevel@tonic-gate 19620Sstevel@tonic-gate /* 19630Sstevel@tonic-gate * release_mfc will drop all queued upcall packets. 19640Sstevel@tonic-gate * and will free the mbuf with the pkt, if, timing info. 19650Sstevel@tonic-gate */ 19660Sstevel@tonic-gate done: 19670Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 19683448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 19690Sstevel@tonic-gate } 19700Sstevel@tonic-gate 19710Sstevel@tonic-gate /* 19720Sstevel@tonic-gate * Packet forwarding routine once entry in the cache is made. 19730Sstevel@tonic-gate */ 19740Sstevel@tonic-gate static int 19750Sstevel@tonic-gate ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, 19760Sstevel@tonic-gate struct mfc *rt) 19770Sstevel@tonic-gate { 19780Sstevel@tonic-gate vifi_t vifi; 19790Sstevel@tonic-gate struct vif *vifp; 19800Sstevel@tonic-gate ipaddr_t dst = ipha->ipha_dst; 19810Sstevel@tonic-gate size_t plen = msgdsize(mp); 19820Sstevel@tonic-gate vifi_t num_of_vifs; 19833448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 19845240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 198511042SErik.Nordmark@Sun.COM ip_recv_attr_t iras; 19863448Sdh155122 19873448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 19885240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 19890Sstevel@tonic-gate "ip_mdq: SEND src %x, ipha_dst %x, ill %s", 19900Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 19910Sstevel@tonic-gate ill->ill_name); 19920Sstevel@tonic-gate } 19930Sstevel@tonic-gate 19940Sstevel@tonic-gate /* Macro to send packet on vif */ 19950Sstevel@tonic-gate #define MC_SEND(ipha, mp, vifp, dst) { \ 19960Sstevel@tonic-gate if ((vifp)->v_flags & VIFF_TUNNEL) \ 19970Sstevel@tonic-gate encap_send((ipha), (mp), (vifp), (dst)); \ 19980Sstevel@tonic-gate else if ((vifp)->v_flags & VIFF_REGISTER) \ 19990Sstevel@tonic-gate register_send((ipha), (mp), (vifp), (dst)); \ 20000Sstevel@tonic-gate else \ 20010Sstevel@tonic-gate phyint_send((ipha), (mp), (vifp), (dst)); \ 20020Sstevel@tonic-gate } 20030Sstevel@tonic-gate 20040Sstevel@tonic-gate vifi = rt->mfc_parent; 20050Sstevel@tonic-gate 20060Sstevel@tonic-gate /* 20070Sstevel@tonic-gate * The value of vifi is MAXVIFS if the pkt had no parent, i.e., 20080Sstevel@tonic-gate * Mrouted had no route. 20090Sstevel@tonic-gate * We wanted the route installed in the mfctable to prevent multiple 20100Sstevel@tonic-gate * tries, so it passed add_mfc(), but is discarded here. The v_ipif is 20110Sstevel@tonic-gate * NULL so we don't want to check the ill. Still needed as of Mrouted 20120Sstevel@tonic-gate * 3.6. 20130Sstevel@tonic-gate */ 20140Sstevel@tonic-gate if (vifi == NO_VIF) { 20150Sstevel@tonic-gate ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n", 20160Sstevel@tonic-gate ill->ill_name)); 20173448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 20185240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 20190Sstevel@tonic-gate "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name); 20200Sstevel@tonic-gate } 20210Sstevel@tonic-gate return (-1); /* drop pkt */ 20220Sstevel@tonic-gate } 20230Sstevel@tonic-gate 20243448Sdh155122 if (!lock_good_vif(&ipst->ips_vifs[vifi])) 20250Sstevel@tonic-gate return (-1); 20260Sstevel@tonic-gate /* 20270Sstevel@tonic-gate * The MFC entries are not cleaned up when an ipif goes 20280Sstevel@tonic-gate * away thus this code has to guard against an MFC referencing 20290Sstevel@tonic-gate * an ipif that has been closed. Note: reset_mrt_vif_ipif 20300Sstevel@tonic-gate * sets the v_ipif to NULL when the ipif disappears. 20310Sstevel@tonic-gate */ 20323448Sdh155122 ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL); 20333448Sdh155122 20343448Sdh155122 if (vifi >= ipst->ips_numvifs) { 20350Sstevel@tonic-gate cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs " 20360Sstevel@tonic-gate "%d ill %s viftable ill %s\n", 20373448Sdh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, 20383448Sdh155122 ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name); 20393448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 20400Sstevel@tonic-gate return (-1); 20410Sstevel@tonic-gate } 20420Sstevel@tonic-gate /* 20430Sstevel@tonic-gate * Don't forward if it didn't arrive from the parent vif for its 20448485SPeter.Memishian@Sun.COM * origin. 20450Sstevel@tonic-gate */ 204611042SErik.Nordmark@Sun.COM if ((ipst->ips_vifs[vifi].v_ipif->ipif_ill != ill) || 20473448Sdh155122 (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) { 20480Sstevel@tonic-gate /* Came in the wrong interface */ 20490Sstevel@tonic-gate ip1dbg(("ip_mdq: arrived wrong if, vifi %d " 20500Sstevel@tonic-gate "numvifs %d ill %s viftable ill %s\n", 20513448Sdh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, 205211042SErik.Nordmark@Sun.COM ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name)); 20533448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 20545240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 20550Sstevel@tonic-gate "ip_mdq: arrived wrong if, vifi %d ill " 20560Sstevel@tonic-gate "%s viftable ill %s\n", 205711042SErik.Nordmark@Sun.COM (int)vifi, ill->ill_name, 205811042SErik.Nordmark@Sun.COM ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name); 20590Sstevel@tonic-gate } 20603448Sdh155122 ipst->ips_mrtstat->mrts_wrong_if++; 20610Sstevel@tonic-gate rt->mfc_wrong_if++; 20620Sstevel@tonic-gate 20630Sstevel@tonic-gate /* 20640Sstevel@tonic-gate * If we are doing PIM assert processing and we are forwarding 20650Sstevel@tonic-gate * packets on this interface, and it is a broadcast medium 20660Sstevel@tonic-gate * interface (and not a tunnel), send a message to the routing. 20670Sstevel@tonic-gate * 20680Sstevel@tonic-gate * We use the first ipif on the list, since it's all we have. 20690Sstevel@tonic-gate * Chances are the ipif_flags are the same for ipifs on the ill. 20700Sstevel@tonic-gate */ 20713448Sdh155122 if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 && 20720Sstevel@tonic-gate (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) && 20733448Sdh155122 !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) { 20740Sstevel@tonic-gate mblk_t *mp_copy; 20750Sstevel@tonic-gate struct igmpmsg *im; 20760Sstevel@tonic-gate 20770Sstevel@tonic-gate /* TODO could copy header and dup rest */ 20780Sstevel@tonic-gate mp_copy = copymsg(mp); 20790Sstevel@tonic-gate if (mp_copy == NULL) { 20803448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 20810Sstevel@tonic-gate ip1dbg(("ip_mdq: out of memory " 20820Sstevel@tonic-gate "for mblk, mp_copy\n")); 20833448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 20840Sstevel@tonic-gate return (-1); 20850Sstevel@tonic-gate } 20860Sstevel@tonic-gate 20870Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr; 20880Sstevel@tonic-gate im->im_msgtype = IGMPMSG_WRONGVIF; 20890Sstevel@tonic-gate im->im_mbz = 0; 20900Sstevel@tonic-gate im->im_vif = (ushort_t)vifi; 20915240Snordmark /* Pass to RAWIP */ 209211042SErik.Nordmark@Sun.COM 209311042SErik.Nordmark@Sun.COM bzero(&iras, sizeof (iras)); 209411042SErik.Nordmark@Sun.COM iras.ira_flags = IRAF_IS_IPV4; 209511042SErik.Nordmark@Sun.COM iras.ira_ip_hdr_length = 209611042SErik.Nordmark@Sun.COM IPH_HDR_LENGTH(mp_copy->b_rptr); 209711042SErik.Nordmark@Sun.COM iras.ira_pktlen = msgdsize(mp_copy); 209811042SErik.Nordmark@Sun.COM (mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras); 209911042SErik.Nordmark@Sun.COM ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 21000Sstevel@tonic-gate } 21013448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 21020Sstevel@tonic-gate if (tunnel_src != 0) 21030Sstevel@tonic-gate return (1); 21040Sstevel@tonic-gate else 21050Sstevel@tonic-gate return (0); 21060Sstevel@tonic-gate } 21070Sstevel@tonic-gate /* 21080Sstevel@tonic-gate * If I sourced this packet, it counts as output, else it was input. 21090Sstevel@tonic-gate */ 21103448Sdh155122 if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) { 21113448Sdh155122 ipst->ips_vifs[vifi].v_pkt_out++; 21123448Sdh155122 ipst->ips_vifs[vifi].v_bytes_out += plen; 21130Sstevel@tonic-gate } else { 21143448Sdh155122 ipst->ips_vifs[vifi].v_pkt_in++; 21153448Sdh155122 ipst->ips_vifs[vifi].v_bytes_in += plen; 21160Sstevel@tonic-gate } 21170Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 21180Sstevel@tonic-gate rt->mfc_pkt_cnt++; 21190Sstevel@tonic-gate rt->mfc_byte_cnt += plen; 21200Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 21213448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 21220Sstevel@tonic-gate /* 21230Sstevel@tonic-gate * For each vif, decide if a copy of the packet should be forwarded. 21240Sstevel@tonic-gate * Forward if: 21250Sstevel@tonic-gate * - the vif threshold ttl is non-zero AND 21260Sstevel@tonic-gate * - the pkt ttl exceeds the vif's threshold 21270Sstevel@tonic-gate * A non-zero mfc_ttl indicates that the vif is part of 21280Sstevel@tonic-gate * the output set for the mfc entry. 21290Sstevel@tonic-gate */ 21303448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 21313448Sdh155122 num_of_vifs = ipst->ips_numvifs; 21323448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 21333448Sdh155122 for (vifp = ipst->ips_vifs, vifi = 0; 21343448Sdh155122 vifi < num_of_vifs; 21353448Sdh155122 vifp++, vifi++) { 21360Sstevel@tonic-gate if (!lock_good_vif(vifp)) 21370Sstevel@tonic-gate continue; 21380Sstevel@tonic-gate if ((rt->mfc_ttls[vifi] > 0) && 21390Sstevel@tonic-gate (ipha->ipha_ttl > rt->mfc_ttls[vifi])) { 21400Sstevel@tonic-gate /* 21410Sstevel@tonic-gate * lock_good_vif should not have succedded if 21420Sstevel@tonic-gate * v_ipif is null. 21430Sstevel@tonic-gate */ 21440Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 21450Sstevel@tonic-gate vifp->v_pkt_out++; 21460Sstevel@tonic-gate vifp->v_bytes_out += plen; 21470Sstevel@tonic-gate MC_SEND(ipha, mp, vifp, dst); 21483448Sdh155122 ipst->ips_mrtstat->mrts_fwd_out++; 21490Sstevel@tonic-gate } 21500Sstevel@tonic-gate unlock_good_vif(vifp); 21510Sstevel@tonic-gate } 21520Sstevel@tonic-gate if (tunnel_src != 0) 21530Sstevel@tonic-gate return (1); 21540Sstevel@tonic-gate else 21550Sstevel@tonic-gate return (0); 21560Sstevel@tonic-gate } 21570Sstevel@tonic-gate 21580Sstevel@tonic-gate /* 21590Sstevel@tonic-gate * Send the packet on physical interface. 21600Sstevel@tonic-gate * Caller assumes can continue to use mp on return. 21610Sstevel@tonic-gate */ 21620Sstevel@tonic-gate /* ARGSUSED */ 21630Sstevel@tonic-gate static void 21640Sstevel@tonic-gate phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 21650Sstevel@tonic-gate { 21660Sstevel@tonic-gate mblk_t *mp_copy; 21673448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 21685240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 21690Sstevel@tonic-gate 21700Sstevel@tonic-gate /* Make a new reference to the packet */ 21710Sstevel@tonic-gate mp_copy = copymsg(mp); /* TODO could copy header and dup rest */ 21720Sstevel@tonic-gate if (mp_copy == NULL) { 21733448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 21740Sstevel@tonic-gate ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n")); 21750Sstevel@tonic-gate return; 21760Sstevel@tonic-gate } 21770Sstevel@tonic-gate if (vifp->v_rate_limit <= 0) 21780Sstevel@tonic-gate tbf_send_packet(vifp, mp_copy); 21790Sstevel@tonic-gate else { 21803448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 21815240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 21820Sstevel@tonic-gate "phyint_send: tbf_contr rate %d " 21830Sstevel@tonic-gate "vifp 0x%p mp 0x%p dst 0x%x", 21840Sstevel@tonic-gate vifp->v_rate_limit, (void *)vifp, (void *)mp, dst); 21850Sstevel@tonic-gate } 21860Sstevel@tonic-gate tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr); 21870Sstevel@tonic-gate } 21880Sstevel@tonic-gate } 21890Sstevel@tonic-gate 21900Sstevel@tonic-gate /* 21910Sstevel@tonic-gate * Send the whole packet for REGISTER encapsulation to PIM daemon 21920Sstevel@tonic-gate * Caller assumes it can continue to use mp on return. 21930Sstevel@tonic-gate */ 21940Sstevel@tonic-gate /* ARGSUSED */ 21950Sstevel@tonic-gate static void 21960Sstevel@tonic-gate register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 21970Sstevel@tonic-gate { 21980Sstevel@tonic-gate struct igmpmsg *im; 21990Sstevel@tonic-gate mblk_t *mp_copy; 22000Sstevel@tonic-gate ipha_t *ipha_copy; 220111042SErik.Nordmark@Sun.COM ill_t *ill = vifp->v_ipif->ipif_ill; 220211042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ill->ill_ipst; 22035240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 220411042SErik.Nordmark@Sun.COM ip_recv_attr_t iras; 22053448Sdh155122 22063448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 22075240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22080Sstevel@tonic-gate "register_send: src %x, dst %x\n", 22090Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 22100Sstevel@tonic-gate } 22110Sstevel@tonic-gate 22120Sstevel@tonic-gate /* 22130Sstevel@tonic-gate * Copy the old packet & pullup its IP header into the new mblk_t so we 22140Sstevel@tonic-gate * can modify it. Try to fill the new mblk_t since if we don't the 22150Sstevel@tonic-gate * ethernet driver will. 22160Sstevel@tonic-gate */ 22170Sstevel@tonic-gate mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED); 22180Sstevel@tonic-gate if (mp_copy == NULL) { 22193448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 22203448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 22215240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22220Sstevel@tonic-gate "register_send: allocb failure."); 22230Sstevel@tonic-gate } 22240Sstevel@tonic-gate return; 22250Sstevel@tonic-gate } 22260Sstevel@tonic-gate 22270Sstevel@tonic-gate /* 22280Sstevel@tonic-gate * Bump write pointer to account for igmpmsg being added. 22290Sstevel@tonic-gate */ 22300Sstevel@tonic-gate mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg); 22310Sstevel@tonic-gate 22320Sstevel@tonic-gate /* 22330Sstevel@tonic-gate * Chain packet to new mblk_t. 22340Sstevel@tonic-gate */ 22350Sstevel@tonic-gate if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 22363448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 22373448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 22385240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22390Sstevel@tonic-gate "register_send: copymsg failure."); 22400Sstevel@tonic-gate } 22410Sstevel@tonic-gate freeb(mp_copy); 22420Sstevel@tonic-gate return; 22430Sstevel@tonic-gate } 22440Sstevel@tonic-gate 22450Sstevel@tonic-gate /* 22465240Snordmark * icmp_input() asserts that IP version field is set to an 22470Sstevel@tonic-gate * appropriate version. Hence, the struct igmpmsg that this really 22480Sstevel@tonic-gate * becomes, needs to have the correct IP version field. 22490Sstevel@tonic-gate */ 22500Sstevel@tonic-gate ipha_copy = (ipha_t *)mp_copy->b_rptr; 22510Sstevel@tonic-gate *ipha_copy = multicast_encap_iphdr; 22520Sstevel@tonic-gate 22530Sstevel@tonic-gate /* 22540Sstevel@tonic-gate * The kernel uses the struct igmpmsg header to encode the messages to 22550Sstevel@tonic-gate * the multicast routing daemon. Fill in the fields in the header 22560Sstevel@tonic-gate * starting with the message type which is IGMPMSG_WHOLEPKT 22570Sstevel@tonic-gate */ 22580Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr; 22590Sstevel@tonic-gate im->im_msgtype = IGMPMSG_WHOLEPKT; 22600Sstevel@tonic-gate im->im_src.s_addr = ipha->ipha_src; 22610Sstevel@tonic-gate im->im_dst.s_addr = ipha->ipha_dst; 22620Sstevel@tonic-gate 22630Sstevel@tonic-gate /* 22640Sstevel@tonic-gate * Must Be Zero. This is because the struct igmpmsg is really an IP 22650Sstevel@tonic-gate * header with renamed fields and the multicast routing daemon uses 22660Sstevel@tonic-gate * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages. 22670Sstevel@tonic-gate */ 22680Sstevel@tonic-gate im->im_mbz = 0; 22690Sstevel@tonic-gate 22703448Sdh155122 ++ipst->ips_mrtstat->mrts_upcalls; 227111042SErik.Nordmark@Sun.COM if (IPCL_IS_NONSTR(mrouter) ? mrouter->conn_flow_cntrld : 227211042SErik.Nordmark@Sun.COM !canputnext(mrouter->conn_rq)) { 22733448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_regsend_drops; 22743448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 22755240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22760Sstevel@tonic-gate "register_send: register upcall failure."); 22770Sstevel@tonic-gate } 227811042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 227911042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_regsend_drops", mp_copy, ill); 22800Sstevel@tonic-gate freemsg(mp_copy); 22810Sstevel@tonic-gate } else { 22825240Snordmark /* Pass to RAWIP */ 228311042SErik.Nordmark@Sun.COM bzero(&iras, sizeof (iras)); 228411042SErik.Nordmark@Sun.COM iras.ira_flags = IRAF_IS_IPV4; 228511042SErik.Nordmark@Sun.COM iras.ira_ip_hdr_length = sizeof (ipha_t); 228611042SErik.Nordmark@Sun.COM iras.ira_pktlen = msgdsize(mp_copy); 228711042SErik.Nordmark@Sun.COM (mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras); 228811042SErik.Nordmark@Sun.COM ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 22890Sstevel@tonic-gate } 22900Sstevel@tonic-gate } 22910Sstevel@tonic-gate 22920Sstevel@tonic-gate /* 22930Sstevel@tonic-gate * pim_validate_cksum handles verification of the checksum in the 22940Sstevel@tonic-gate * pim header. For PIM Register packets, the checksum is calculated 22950Sstevel@tonic-gate * across the PIM header only. For all other packets, the checksum 22960Sstevel@tonic-gate * is for the PIM header and remainder of the packet. 22970Sstevel@tonic-gate * 22980Sstevel@tonic-gate * returns: B_TRUE, if checksum is okay. 22990Sstevel@tonic-gate * B_FALSE, if checksum is not valid. 23000Sstevel@tonic-gate */ 23010Sstevel@tonic-gate static boolean_t 23020Sstevel@tonic-gate pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp) 23030Sstevel@tonic-gate { 23040Sstevel@tonic-gate mblk_t *mp_dup; 23050Sstevel@tonic-gate 23060Sstevel@tonic-gate if ((mp_dup = dupmsg(mp)) == NULL) 23070Sstevel@tonic-gate return (B_FALSE); 23080Sstevel@tonic-gate 23090Sstevel@tonic-gate mp_dup->b_rptr += IPH_HDR_LENGTH(ip); 23100Sstevel@tonic-gate if (pimp->pim_type == PIM_REGISTER) 23110Sstevel@tonic-gate mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN; 23120Sstevel@tonic-gate if (IP_CSUM(mp_dup, 0, 0)) { 23130Sstevel@tonic-gate freemsg(mp_dup); 23140Sstevel@tonic-gate return (B_FALSE); 23150Sstevel@tonic-gate } 23160Sstevel@tonic-gate freemsg(mp_dup); 23170Sstevel@tonic-gate return (B_TRUE); 23180Sstevel@tonic-gate } 23190Sstevel@tonic-gate 23200Sstevel@tonic-gate /* 232111042SErik.Nordmark@Sun.COM * Process PIM protocol packets i.e. IP Protocol 103. 232211042SErik.Nordmark@Sun.COM * Register messages are decapsulated and sent onto multicast forwarding. 232311042SErik.Nordmark@Sun.COM * 232411042SErik.Nordmark@Sun.COM * Return NULL for a bad packet that is discarded here. 232511042SErik.Nordmark@Sun.COM * Return mp if the message is OK and should be handed to "raw" receivers. 232611042SErik.Nordmark@Sun.COM * Callers of pim_input() may need to reinitialize variables that were copied 232711042SErik.Nordmark@Sun.COM * from the mblk as this calls pullupmsg(). 23280Sstevel@tonic-gate */ 232911042SErik.Nordmark@Sun.COM mblk_t * 233011042SErik.Nordmark@Sun.COM pim_input(mblk_t *mp, ip_recv_attr_t *ira) 23310Sstevel@tonic-gate { 23320Sstevel@tonic-gate ipha_t *eip, *ip; 23330Sstevel@tonic-gate int iplen, pimlen, iphlen; 23340Sstevel@tonic-gate struct pim *pimp; /* pointer to a pim struct */ 23350Sstevel@tonic-gate uint32_t *reghdr; 233611042SErik.Nordmark@Sun.COM ill_t *ill = ira->ira_ill; 23373448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 23385240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 23390Sstevel@tonic-gate 23400Sstevel@tonic-gate /* 23410Sstevel@tonic-gate * Pullup the msg for PIM protocol processing. 23420Sstevel@tonic-gate */ 23430Sstevel@tonic-gate if (pullupmsg(mp, -1) == 0) { 23443448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 234511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 234611042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_nomemory", mp, ill); 23470Sstevel@tonic-gate freemsg(mp); 234811042SErik.Nordmark@Sun.COM return (NULL); 23490Sstevel@tonic-gate } 23500Sstevel@tonic-gate 23510Sstevel@tonic-gate ip = (ipha_t *)mp->b_rptr; 23520Sstevel@tonic-gate iplen = ip->ipha_length; 23530Sstevel@tonic-gate iphlen = IPH_HDR_LENGTH(ip); 23540Sstevel@tonic-gate pimlen = ntohs(iplen) - iphlen; 23550Sstevel@tonic-gate 23560Sstevel@tonic-gate /* 23570Sstevel@tonic-gate * Validate lengths 23580Sstevel@tonic-gate */ 23590Sstevel@tonic-gate if (pimlen < PIM_MINLEN) { 23603448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_malformed; 23613448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 23625240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 23630Sstevel@tonic-gate "pim_input: length not at least minlen"); 23640Sstevel@tonic-gate } 236511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 236611042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_malformed", mp, ill); 23670Sstevel@tonic-gate freemsg(mp); 236811042SErik.Nordmark@Sun.COM return (NULL); 23690Sstevel@tonic-gate } 23700Sstevel@tonic-gate 23710Sstevel@tonic-gate /* 23720Sstevel@tonic-gate * Point to the PIM header. 23730Sstevel@tonic-gate */ 23740Sstevel@tonic-gate pimp = (struct pim *)((caddr_t)ip + iphlen); 23750Sstevel@tonic-gate 23760Sstevel@tonic-gate /* 23770Sstevel@tonic-gate * Check the version number. 23780Sstevel@tonic-gate */ 23790Sstevel@tonic-gate if (pimp->pim_vers != PIM_VERSION) { 23803448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_badversion; 23813448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 23825240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 23830Sstevel@tonic-gate "pim_input: unknown version of PIM"); 23840Sstevel@tonic-gate } 238511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 238611042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_badversion", mp, ill); 23870Sstevel@tonic-gate freemsg(mp); 238811042SErik.Nordmark@Sun.COM return (NULL); 23890Sstevel@tonic-gate } 23900Sstevel@tonic-gate 23910Sstevel@tonic-gate /* 23920Sstevel@tonic-gate * Validate the checksum 23930Sstevel@tonic-gate */ 23940Sstevel@tonic-gate if (!pim_validate_cksum(mp, ip, pimp)) { 23953448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_rcv_badcsum; 23963448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 23975240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 23980Sstevel@tonic-gate "pim_input: invalid checksum"); 23990Sstevel@tonic-gate } 240011042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 240111042SErik.Nordmark@Sun.COM ip_drop_input("pim_rcv_badcsum", mp, ill); 24020Sstevel@tonic-gate freemsg(mp); 240311042SErik.Nordmark@Sun.COM return (NULL); 24040Sstevel@tonic-gate } 24050Sstevel@tonic-gate 24060Sstevel@tonic-gate if (pimp->pim_type != PIM_REGISTER) 240711042SErik.Nordmark@Sun.COM return (mp); 24080Sstevel@tonic-gate 24090Sstevel@tonic-gate reghdr = (uint32_t *)(pimp + 1); 24100Sstevel@tonic-gate eip = (ipha_t *)(reghdr + 1); 24110Sstevel@tonic-gate 24120Sstevel@tonic-gate /* 24130Sstevel@tonic-gate * check if the inner packet is destined to mcast group 24140Sstevel@tonic-gate */ 24150Sstevel@tonic-gate if (!CLASSD(eip->ipha_dst)) { 24163448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_badregisters; 24173448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 24185240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24190Sstevel@tonic-gate "pim_input: Inner pkt not mcast .. !"); 24200Sstevel@tonic-gate } 242111042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 242211042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_badregisters", mp, ill); 24230Sstevel@tonic-gate freemsg(mp); 242411042SErik.Nordmark@Sun.COM return (NULL); 24250Sstevel@tonic-gate } 24263448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 24275240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24280Sstevel@tonic-gate "register from %x, to %x, len %d", 24290Sstevel@tonic-gate ntohl(eip->ipha_src), 24300Sstevel@tonic-gate ntohl(eip->ipha_dst), 24310Sstevel@tonic-gate ntohs(eip->ipha_length)); 24320Sstevel@tonic-gate } 24330Sstevel@tonic-gate /* 24340Sstevel@tonic-gate * If the null register bit is not set, decapsulate 24350Sstevel@tonic-gate * the packet before forwarding it. 243611042SErik.Nordmark@Sun.COM * Avoid this in no register vif 24370Sstevel@tonic-gate */ 243811042SErik.Nordmark@Sun.COM if (!(ntohl(*reghdr) & PIM_NULL_REGISTER) && 243911042SErik.Nordmark@Sun.COM ipst->ips_reg_vif_num != ALL_VIFS) { 24400Sstevel@tonic-gate mblk_t *mp_copy; 244111042SErik.Nordmark@Sun.COM uint_t saved_pktlen; 24420Sstevel@tonic-gate 24430Sstevel@tonic-gate /* Copy the message */ 24440Sstevel@tonic-gate if ((mp_copy = copymsg(mp)) == NULL) { 24453448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 244611042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 244711042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim_nomemory", mp, ill); 24480Sstevel@tonic-gate freemsg(mp); 244911042SErik.Nordmark@Sun.COM return (NULL); 24500Sstevel@tonic-gate } 24510Sstevel@tonic-gate 24520Sstevel@tonic-gate /* 24530Sstevel@tonic-gate * Decapsulate the packet and give it to 24540Sstevel@tonic-gate * register_mforward. 24550Sstevel@tonic-gate */ 245611042SErik.Nordmark@Sun.COM mp_copy->b_rptr += iphlen + sizeof (pim_t) + sizeof (*reghdr); 245711042SErik.Nordmark@Sun.COM saved_pktlen = ira->ira_pktlen; 245811042SErik.Nordmark@Sun.COM ira->ira_pktlen -= iphlen + sizeof (pim_t) + sizeof (*reghdr); 245911042SErik.Nordmark@Sun.COM if (register_mforward(mp_copy, ira) != 0) { 246011042SErik.Nordmark@Sun.COM /* register_mforward already called ip_drop_input */ 24610Sstevel@tonic-gate freemsg(mp); 246211042SErik.Nordmark@Sun.COM ira->ira_pktlen = saved_pktlen; 246311042SErik.Nordmark@Sun.COM return (NULL); 24640Sstevel@tonic-gate } 246511042SErik.Nordmark@Sun.COM ira->ira_pktlen = saved_pktlen; 24660Sstevel@tonic-gate } 24670Sstevel@tonic-gate 24680Sstevel@tonic-gate /* 24690Sstevel@tonic-gate * Pass all valid PIM packets up to any process(es) listening on a raw 24700Sstevel@tonic-gate * PIM socket. For Solaris it is done right after pim_input() is 24710Sstevel@tonic-gate * called. 24720Sstevel@tonic-gate */ 247311042SErik.Nordmark@Sun.COM return (mp); 24740Sstevel@tonic-gate } 24750Sstevel@tonic-gate 24760Sstevel@tonic-gate /* 24770Sstevel@tonic-gate * PIM sparse mode hook. Called by pim_input after decapsulating 24780Sstevel@tonic-gate * the packet. Loop back the packet, as if we have received it. 24790Sstevel@tonic-gate * In pim_input() we have to check if the destination is a multicast address. 24800Sstevel@tonic-gate */ 24810Sstevel@tonic-gate static int 248211042SErik.Nordmark@Sun.COM register_mforward(mblk_t *mp, ip_recv_attr_t *ira) 24830Sstevel@tonic-gate { 248411042SErik.Nordmark@Sun.COM ire_t *ire; 248511042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)mp->b_rptr; 248611042SErik.Nordmark@Sun.COM ill_t *ill = ira->ira_ill; 24873448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 24885240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 24893448Sdh155122 24903448Sdh155122 ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs); 24913448Sdh155122 24923448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 24935240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24940Sstevel@tonic-gate "register_mforward: src %x, dst %x\n", 24950Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 24960Sstevel@tonic-gate } 24970Sstevel@tonic-gate /* 24980Sstevel@tonic-gate * Need to pass in to ip_mforward() the information that the 249911042SErik.Nordmark@Sun.COM * packet has arrived on the register_vif. We mark it with 250011042SErik.Nordmark@Sun.COM * the IRAF_PIM_REGISTER attribute. 250111042SErik.Nordmark@Sun.COM * pim_input verified that the (inner) destination is multicast, 250211042SErik.Nordmark@Sun.COM * hence we skip the generic code in ip_input. 25030Sstevel@tonic-gate */ 250411042SErik.Nordmark@Sun.COM ira->ira_flags |= IRAF_PIM_REGISTER; 25053448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_regforwards; 250611042SErik.Nordmark@Sun.COM 250711042SErik.Nordmark@Sun.COM if (!CLASSD(ipha->ipha_dst)) { 250811042SErik.Nordmark@Sun.COM ire = ire_route_recursive_v4(ipha->ipha_dst, 0, NULL, ALL_ZONES, 2509*11457SErik.Nordmark@Sun.COM ira->ira_tsl, MATCH_IRE_SECATTR, IRR_ALLOCATE, 0, ipst, 2510*11457SErik.Nordmark@Sun.COM NULL, NULL, NULL); 251111042SErik.Nordmark@Sun.COM } else { 251211042SErik.Nordmark@Sun.COM ire = ire_multicast(ill); 251311042SErik.Nordmark@Sun.COM } 251411042SErik.Nordmark@Sun.COM ASSERT(ire != NULL); 251511042SErik.Nordmark@Sun.COM /* Normally this will return the IRE_MULTICAST */ 251611042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 251711042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 251811042SErik.Nordmark@Sun.COM ip_drop_input("mrts_pim RTF_REJECT", mp, ill); 251911042SErik.Nordmark@Sun.COM freemsg(mp); 252011042SErik.Nordmark@Sun.COM ire_refrele(ire); 252111042SErik.Nordmark@Sun.COM return (-1); 252211042SErik.Nordmark@Sun.COM } 252311042SErik.Nordmark@Sun.COM ASSERT(ire->ire_type & IRE_MULTICAST); 252411042SErik.Nordmark@Sun.COM (*ire->ire_recvfn)(ire, mp, ipha, ira); 252511042SErik.Nordmark@Sun.COM ire_refrele(ire); 252611042SErik.Nordmark@Sun.COM 25270Sstevel@tonic-gate return (0); 25280Sstevel@tonic-gate } 25290Sstevel@tonic-gate 25300Sstevel@tonic-gate /* 25310Sstevel@tonic-gate * Send an encapsulated packet. 25320Sstevel@tonic-gate * Caller assumes can continue to use mp when routine returns. 25330Sstevel@tonic-gate */ 25340Sstevel@tonic-gate /* ARGSUSED */ 25350Sstevel@tonic-gate static void 25360Sstevel@tonic-gate encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 25370Sstevel@tonic-gate { 25380Sstevel@tonic-gate mblk_t *mp_copy; 25390Sstevel@tonic-gate ipha_t *ipha_copy; 25400Sstevel@tonic-gate size_t len; 25413448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 25425240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 25433448Sdh155122 25443448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 25455240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 25463448Sdh155122 "encap_send: vif %ld enter", 25473448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs)); 25480Sstevel@tonic-gate } 25490Sstevel@tonic-gate len = ntohs(ipha->ipha_length); 25500Sstevel@tonic-gate 25510Sstevel@tonic-gate /* 25520Sstevel@tonic-gate * Copy the old packet & pullup it's IP header into the 25530Sstevel@tonic-gate * new mbuf so we can modify it. Try to fill the new 25540Sstevel@tonic-gate * mbuf since if we don't the ethernet driver will. 25550Sstevel@tonic-gate */ 25560Sstevel@tonic-gate mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED); 25570Sstevel@tonic-gate if (mp_copy == NULL) 25580Sstevel@tonic-gate return; 25590Sstevel@tonic-gate mp_copy->b_rptr += 32; 25600Sstevel@tonic-gate mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr); 25610Sstevel@tonic-gate if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 25620Sstevel@tonic-gate freeb(mp_copy); 25630Sstevel@tonic-gate return; 25640Sstevel@tonic-gate } 25650Sstevel@tonic-gate 25660Sstevel@tonic-gate /* 25670Sstevel@tonic-gate * Fill in the encapsulating IP header. 25680Sstevel@tonic-gate * Remote tunnel dst in rmt_addr, from add_vif(). 25690Sstevel@tonic-gate */ 25700Sstevel@tonic-gate ipha_copy = (ipha_t *)mp_copy->b_rptr; 25710Sstevel@tonic-gate *ipha_copy = multicast_encap_iphdr; 25720Sstevel@tonic-gate ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET); 25730Sstevel@tonic-gate ipha_copy->ipha_length = htons(len + sizeof (ipha_t)); 25740Sstevel@tonic-gate ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr; 25750Sstevel@tonic-gate ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr; 25760Sstevel@tonic-gate ASSERT(ipha_copy->ipha_ident == 0); 25770Sstevel@tonic-gate 25780Sstevel@tonic-gate /* Turn the encapsulated IP header back into a valid one. */ 25790Sstevel@tonic-gate ipha = (ipha_t *)mp_copy->b_cont->b_rptr; 25800Sstevel@tonic-gate ipha->ipha_ttl--; 25810Sstevel@tonic-gate ipha->ipha_hdr_checksum = 0; 25820Sstevel@tonic-gate ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 25830Sstevel@tonic-gate 258411042SErik.Nordmark@Sun.COM ipha_copy->ipha_ttl = ipha->ipha_ttl; 258511042SErik.Nordmark@Sun.COM 25863448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 25875240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 25880Sstevel@tonic-gate "encap_send: group 0x%x", ntohl(ipha->ipha_dst)); 25890Sstevel@tonic-gate } 25900Sstevel@tonic-gate if (vifp->v_rate_limit <= 0) 25910Sstevel@tonic-gate tbf_send_packet(vifp, mp_copy); 25920Sstevel@tonic-gate else 25930Sstevel@tonic-gate /* ipha is from the original header */ 25940Sstevel@tonic-gate tbf_control(vifp, mp_copy, ipha); 25950Sstevel@tonic-gate } 25960Sstevel@tonic-gate 25970Sstevel@tonic-gate /* 259811042SErik.Nordmark@Sun.COM * De-encapsulate a packet and feed it back through IP input if it 259911042SErik.Nordmark@Sun.COM * matches one of our multicast tunnels. 260011042SErik.Nordmark@Sun.COM * 26010Sstevel@tonic-gate * This routine is called whenever IP gets a packet with prototype 260211042SErik.Nordmark@Sun.COM * IPPROTO_ENCAP and a local destination address and the packet didn't 260311042SErik.Nordmark@Sun.COM * match one of our configured IP-in-IP tunnels. 26040Sstevel@tonic-gate */ 26050Sstevel@tonic-gate void 260611042SErik.Nordmark@Sun.COM ip_mroute_decap(mblk_t *mp, ip_recv_attr_t *ira) 26070Sstevel@tonic-gate { 26080Sstevel@tonic-gate ipha_t *ipha = (ipha_t *)mp->b_rptr; 26090Sstevel@tonic-gate ipha_t *ipha_encap; 26100Sstevel@tonic-gate int hlen = IPH_HDR_LENGTH(ipha); 261111042SErik.Nordmark@Sun.COM int hlen_encap; 26120Sstevel@tonic-gate ipaddr_t src; 26130Sstevel@tonic-gate struct vif *vifp; 261411042SErik.Nordmark@Sun.COM ire_t *ire; 261511042SErik.Nordmark@Sun.COM ill_t *ill = ira->ira_ill; 26163448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 26175240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 26180Sstevel@tonic-gate 261911042SErik.Nordmark@Sun.COM /* Make sure we have all of the inner header */ 262011042SErik.Nordmark@Sun.COM ipha_encap = (ipha_t *)((char *)ipha + hlen); 262111042SErik.Nordmark@Sun.COM if (mp->b_wptr - mp->b_rptr < hlen + IP_SIMPLE_HDR_LENGTH) { 262211042SErik.Nordmark@Sun.COM ipha = ip_pullup(mp, hlen + IP_SIMPLE_HDR_LENGTH, ira); 262311042SErik.Nordmark@Sun.COM if (ipha == NULL) { 262411042SErik.Nordmark@Sun.COM ipst->ips_mrtstat->mrts_bad_tunnel++; 262511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 262611042SErik.Nordmark@Sun.COM ip_drop_input("ip_mroute_decap: too short", mp, ill); 262711042SErik.Nordmark@Sun.COM freemsg(mp); 262811042SErik.Nordmark@Sun.COM return; 262911042SErik.Nordmark@Sun.COM } 263011042SErik.Nordmark@Sun.COM ipha_encap = (ipha_t *)((char *)ipha + hlen); 263111042SErik.Nordmark@Sun.COM } 263211042SErik.Nordmark@Sun.COM hlen_encap = IPH_HDR_LENGTH(ipha_encap); 263311042SErik.Nordmark@Sun.COM if (mp->b_wptr - mp->b_rptr < hlen + hlen_encap) { 263411042SErik.Nordmark@Sun.COM ipha = ip_pullup(mp, hlen + hlen_encap, ira); 263511042SErik.Nordmark@Sun.COM if (ipha == NULL) { 263611042SErik.Nordmark@Sun.COM ipst->ips_mrtstat->mrts_bad_tunnel++; 263711042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 263811042SErik.Nordmark@Sun.COM ip_drop_input("ip_mroute_decap: too short", mp, ill); 263911042SErik.Nordmark@Sun.COM freemsg(mp); 264011042SErik.Nordmark@Sun.COM return; 264111042SErik.Nordmark@Sun.COM } 264211042SErik.Nordmark@Sun.COM ipha_encap = (ipha_t *)((char *)ipha + hlen); 264311042SErik.Nordmark@Sun.COM } 264411042SErik.Nordmark@Sun.COM 26450Sstevel@tonic-gate /* 26460Sstevel@tonic-gate * Dump the packet if it's not to a multicast destination or if 26470Sstevel@tonic-gate * we don't have an encapsulating tunnel with the source. 26480Sstevel@tonic-gate * Note: This code assumes that the remote site IP address 26490Sstevel@tonic-gate * uniquely identifies the tunnel (i.e., that this site has 26500Sstevel@tonic-gate * at most one tunnel with the remote site). 26510Sstevel@tonic-gate */ 26520Sstevel@tonic-gate if (!CLASSD(ipha_encap->ipha_dst)) { 26533448Sdh155122 ipst->ips_mrtstat->mrts_bad_tunnel++; 26540Sstevel@tonic-gate ip1dbg(("ip_mroute_decap: bad tunnel\n")); 265511042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 265611042SErik.Nordmark@Sun.COM ip_drop_input("mrts_bad_tunnel", mp, ill); 26570Sstevel@tonic-gate freemsg(mp); 26580Sstevel@tonic-gate return; 26590Sstevel@tonic-gate } 26600Sstevel@tonic-gate src = (ipaddr_t)ipha->ipha_src; 26613448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 26623448Sdh155122 if (src != ipst->ips_last_encap_src) { 26630Sstevel@tonic-gate struct vif *vife; 26640Sstevel@tonic-gate 26653448Sdh155122 vifp = ipst->ips_vifs; 26663448Sdh155122 vife = vifp + ipst->ips_numvifs; 26673448Sdh155122 ipst->ips_last_encap_src = src; 26683448Sdh155122 ipst->ips_last_encap_vif = 0; 26690Sstevel@tonic-gate for (; vifp < vife; ++vifp) { 26700Sstevel@tonic-gate if (!lock_good_vif(vifp)) 26710Sstevel@tonic-gate continue; 26720Sstevel@tonic-gate if (vifp->v_rmt_addr.s_addr == src) { 26730Sstevel@tonic-gate if (vifp->v_flags & VIFF_TUNNEL) 26743448Sdh155122 ipst->ips_last_encap_vif = vifp; 26753448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 26765240Snordmark (void) mi_strlog(mrouter->conn_rq, 26770Sstevel@tonic-gate 1, SL_TRACE, 26780Sstevel@tonic-gate "ip_mroute_decap: good tun " 26790Sstevel@tonic-gate "vif %ld with %x", 26803448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), 26810Sstevel@tonic-gate ntohl(src)); 26820Sstevel@tonic-gate } 26830Sstevel@tonic-gate unlock_good_vif(vifp); 26840Sstevel@tonic-gate break; 26850Sstevel@tonic-gate } 26860Sstevel@tonic-gate unlock_good_vif(vifp); 26870Sstevel@tonic-gate } 26880Sstevel@tonic-gate } 26893448Sdh155122 if ((vifp = ipst->ips_last_encap_vif) == 0) { 26903448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 26913448Sdh155122 ipst->ips_mrtstat->mrts_bad_tunnel++; 269211042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 269311042SErik.Nordmark@Sun.COM ip_drop_input("mrts_bad_tunnel", mp, ill); 26940Sstevel@tonic-gate freemsg(mp); 26950Sstevel@tonic-gate ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n", 26963448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src))); 26970Sstevel@tonic-gate return; 26980Sstevel@tonic-gate } 26993448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 27000Sstevel@tonic-gate 27010Sstevel@tonic-gate /* 27020Sstevel@tonic-gate * Need to pass in the tunnel source to ip_mforward (so that it can 270311042SErik.Nordmark@Sun.COM * verify that the packet arrived over the correct vif.) 270411042SErik.Nordmark@Sun.COM */ 270511042SErik.Nordmark@Sun.COM ira->ira_flags |= IRAF_MROUTE_TUNNEL_SET; 270611042SErik.Nordmark@Sun.COM ira->ira_mroute_tunnel = src; 270711042SErik.Nordmark@Sun.COM mp->b_rptr += hlen; 270811042SErik.Nordmark@Sun.COM ira->ira_pktlen -= hlen; 270911042SErik.Nordmark@Sun.COM ira->ira_ip_hdr_length = hlen_encap; 271011042SErik.Nordmark@Sun.COM 271111042SErik.Nordmark@Sun.COM /* 271211042SErik.Nordmark@Sun.COM * We don't redo any of the filtering in ill_input_full_v4 and we 271311042SErik.Nordmark@Sun.COM * have checked that all of ipha_encap and any IP options are 271411042SErik.Nordmark@Sun.COM * pulled up. Hence we call ire_recv_multicast_v4 directly. 271511042SErik.Nordmark@Sun.COM * However, we have to check for RSVP as in ip_input_full_v4 271611042SErik.Nordmark@Sun.COM * and if so we pass it to ire_recv_broadcast_v4 for local delivery 271711042SErik.Nordmark@Sun.COM * to the rsvpd. 27180Sstevel@tonic-gate */ 271911042SErik.Nordmark@Sun.COM if (ipha_encap->ipha_protocol == IPPROTO_RSVP && 272011042SErik.Nordmark@Sun.COM ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) { 272111042SErik.Nordmark@Sun.COM ire = ire_route_recursive_v4(INADDR_BROADCAST, 0, ill, 272211042SErik.Nordmark@Sun.COM ALL_ZONES, ira->ira_tsl, MATCH_IRE_ILL|MATCH_IRE_SECATTR, 2723*11457SErik.Nordmark@Sun.COM IRR_ALLOCATE, 0, ipst, NULL, NULL, NULL); 272411042SErik.Nordmark@Sun.COM } else { 272511042SErik.Nordmark@Sun.COM ire = ire_multicast(ill); 272611042SErik.Nordmark@Sun.COM } 272711042SErik.Nordmark@Sun.COM ASSERT(ire != NULL); 272811042SErik.Nordmark@Sun.COM /* Normally this will return the IRE_MULTICAST or IRE_BROADCAST */ 272911042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 273011042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 273111042SErik.Nordmark@Sun.COM ip_drop_input("ip_mroute_decap: RTF_REJECT", mp, ill); 273211042SErik.Nordmark@Sun.COM freemsg(mp); 273311042SErik.Nordmark@Sun.COM ire_refrele(ire); 273411042SErik.Nordmark@Sun.COM return; 273511042SErik.Nordmark@Sun.COM } 273611042SErik.Nordmark@Sun.COM ire->ire_ib_pkt_count++; 273711042SErik.Nordmark@Sun.COM ASSERT(ire->ire_type & (IRE_MULTICAST|IRE_BROADCAST)); 273811042SErik.Nordmark@Sun.COM (*ire->ire_recvfn)(ire, mp, ipha_encap, ira); 273911042SErik.Nordmark@Sun.COM ire_refrele(ire); 27400Sstevel@tonic-gate } 27410Sstevel@tonic-gate 27420Sstevel@tonic-gate /* 27430Sstevel@tonic-gate * Remove all records with v_ipif == ipif. Called when an interface goes away 27440Sstevel@tonic-gate * (stream closed). Called as writer. 27450Sstevel@tonic-gate */ 27460Sstevel@tonic-gate void 27470Sstevel@tonic-gate reset_mrt_vif_ipif(ipif_t *ipif) 27480Sstevel@tonic-gate { 27490Sstevel@tonic-gate vifi_t vifi, tmp_vifi; 27500Sstevel@tonic-gate vifi_t num_of_vifs; 27513448Sdh155122 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 27520Sstevel@tonic-gate 27530Sstevel@tonic-gate /* Can't check vifi >= 0 since vifi_t is unsigned! */ 27540Sstevel@tonic-gate 27553448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 27563448Sdh155122 num_of_vifs = ipst->ips_numvifs; 27573448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 27580Sstevel@tonic-gate 27590Sstevel@tonic-gate for (vifi = num_of_vifs; vifi != 0; vifi--) { 27600Sstevel@tonic-gate tmp_vifi = vifi - 1; 27613448Sdh155122 if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) { 276211042SErik.Nordmark@Sun.COM (void) del_vif(&tmp_vifi, ipst); 27630Sstevel@tonic-gate } 27640Sstevel@tonic-gate } 27650Sstevel@tonic-gate } 27660Sstevel@tonic-gate 27670Sstevel@tonic-gate /* Remove pending upcall msgs when ill goes away. Called by ill_delete. */ 27680Sstevel@tonic-gate void 27690Sstevel@tonic-gate reset_mrt_ill(ill_t *ill) 27700Sstevel@tonic-gate { 277111042SErik.Nordmark@Sun.COM struct mfc *rt; 27720Sstevel@tonic-gate struct rtdetq *rte; 277311042SErik.Nordmark@Sun.COM int i; 27743448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 27755240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 277611042SErik.Nordmark@Sun.COM timeout_id_t id; 27770Sstevel@tonic-gate 27780Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) { 27793448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[i]); 27803448Sdh155122 if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) { 27813448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 27825240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 27833448Sdh155122 SL_TRACE, 27840Sstevel@tonic-gate "reset_mrt_ill: mfctable [%d]", i); 27850Sstevel@tonic-gate } 27860Sstevel@tonic-gate while (rt != NULL) { 27870Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 27880Sstevel@tonic-gate while ((rte = rt->mfc_rte) != NULL) { 278911042SErik.Nordmark@Sun.COM if (rte->ill == ill && 279011042SErik.Nordmark@Sun.COM (id = rt->mfc_timeout_id) != 0) { 279111042SErik.Nordmark@Sun.COM /* 279211042SErik.Nordmark@Sun.COM * Its ok to drop the lock, the 279311042SErik.Nordmark@Sun.COM * struct cannot be freed since 279411042SErik.Nordmark@Sun.COM * we have a ref on the hash 279511042SErik.Nordmark@Sun.COM * bucket. 279611042SErik.Nordmark@Sun.COM */ 279711042SErik.Nordmark@Sun.COM mutex_exit(&rt->mfc_mutex); 279811042SErik.Nordmark@Sun.COM (void) untimeout(id); 279911042SErik.Nordmark@Sun.COM mutex_enter(&rt->mfc_mutex); 280011042SErik.Nordmark@Sun.COM } 28010Sstevel@tonic-gate if (rte->ill == ill) { 28023448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 28033448Sdh155122 (void) mi_strlog( 28045240Snordmark mrouter->conn_rq, 28053448Sdh155122 1, SL_TRACE, 28063448Sdh155122 "reset_mrt_ill: " 28077240Srh87107 "ill 0x%p", (void *)ill); 28080Sstevel@tonic-gate } 28090Sstevel@tonic-gate rt->mfc_rte = rte->rte_next; 28100Sstevel@tonic-gate freemsg(rte->mp); 28110Sstevel@tonic-gate mi_free((char *)rte); 28120Sstevel@tonic-gate } 28130Sstevel@tonic-gate } 28140Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 28150Sstevel@tonic-gate rt = rt->mfc_next; 28160Sstevel@tonic-gate } 28170Sstevel@tonic-gate } 28183448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 28190Sstevel@tonic-gate } 28200Sstevel@tonic-gate } 28210Sstevel@tonic-gate 28220Sstevel@tonic-gate /* 28230Sstevel@tonic-gate * Token bucket filter module. 28240Sstevel@tonic-gate * The ipha is for mcastgrp destination for phyint and encap. 28250Sstevel@tonic-gate */ 28260Sstevel@tonic-gate static void 28270Sstevel@tonic-gate tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) 28280Sstevel@tonic-gate { 28290Sstevel@tonic-gate size_t p_len = msgdsize(mp); 28300Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 28310Sstevel@tonic-gate timeout_id_t id = 0; 283211042SErik.Nordmark@Sun.COM ill_t *ill = vifp->v_ipif->ipif_ill; 283311042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ill->ill_ipst; 28345240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 28350Sstevel@tonic-gate 28360Sstevel@tonic-gate /* Drop if packet is too large */ 28370Sstevel@tonic-gate if (p_len > MAX_BKT_SIZE) { 28383448Sdh155122 ipst->ips_mrtstat->mrts_pkt2large++; 283911042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 284011042SErik.Nordmark@Sun.COM ip_drop_output("tbf_control - too large", mp, ill); 28410Sstevel@tonic-gate freemsg(mp); 28420Sstevel@tonic-gate return; 28430Sstevel@tonic-gate } 28443448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 28455240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 28460Sstevel@tonic-gate "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x", 28473448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len, 28480Sstevel@tonic-gate ntohl(ipha->ipha_dst)); 28490Sstevel@tonic-gate } 28500Sstevel@tonic-gate 28510Sstevel@tonic-gate mutex_enter(&t->tbf_lock); 28520Sstevel@tonic-gate 28530Sstevel@tonic-gate tbf_update_tokens(vifp); 28540Sstevel@tonic-gate 28550Sstevel@tonic-gate /* 28560Sstevel@tonic-gate * If there are enough tokens, 28570Sstevel@tonic-gate * and the queue is empty, send this packet out. 28580Sstevel@tonic-gate */ 28593448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 28605240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 28610Sstevel@tonic-gate "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d", 28623448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len, 28630Sstevel@tonic-gate t->tbf_q_len); 28640Sstevel@tonic-gate } 28650Sstevel@tonic-gate /* No packets are queued */ 28660Sstevel@tonic-gate if (t->tbf_q_len == 0) { 28670Sstevel@tonic-gate /* queue empty, send packet if enough tokens */ 28680Sstevel@tonic-gate if (p_len <= t->tbf_n_tok) { 28690Sstevel@tonic-gate t->tbf_n_tok -= p_len; 28700Sstevel@tonic-gate mutex_exit(&t->tbf_lock); 28710Sstevel@tonic-gate tbf_send_packet(vifp, mp); 28720Sstevel@tonic-gate return; 28730Sstevel@tonic-gate } else { 28740Sstevel@tonic-gate /* Queue packet and timeout till later */ 28750Sstevel@tonic-gate tbf_queue(vifp, mp); 28760Sstevel@tonic-gate ASSERT(vifp->v_timeout_id == 0); 28770Sstevel@tonic-gate vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 28780Sstevel@tonic-gate TBF_REPROCESS); 28790Sstevel@tonic-gate } 28800Sstevel@tonic-gate } else if (t->tbf_q_len < t->tbf_max_q_len) { 28810Sstevel@tonic-gate /* Finite queue length, so queue pkts and process queue */ 28820Sstevel@tonic-gate tbf_queue(vifp, mp); 28830Sstevel@tonic-gate tbf_process_q(vifp); 28840Sstevel@tonic-gate } else { 28850Sstevel@tonic-gate /* Check that we have UDP header with IP header */ 28860Sstevel@tonic-gate size_t hdr_length = IPH_HDR_LENGTH(ipha) + 28875240Snordmark sizeof (struct udphdr); 28880Sstevel@tonic-gate 28890Sstevel@tonic-gate if ((mp->b_wptr - mp->b_rptr) < hdr_length) { 28900Sstevel@tonic-gate if (!pullupmsg(mp, hdr_length)) { 289111042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, 289211042SErik.Nordmark@Sun.COM ipIfStatsOutDiscards); 289311042SErik.Nordmark@Sun.COM ip_drop_output("tbf_control - pullup", mp, ill); 28940Sstevel@tonic-gate freemsg(mp); 28950Sstevel@tonic-gate ip1dbg(("tbf_ctl: couldn't pullup udp hdr, " 28960Sstevel@tonic-gate "vif %ld src 0x%x dst 0x%x\n", 28973448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), 28980Sstevel@tonic-gate ntohl(ipha->ipha_src), 28990Sstevel@tonic-gate ntohl(ipha->ipha_dst))); 29000Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock); 29010Sstevel@tonic-gate return; 29020Sstevel@tonic-gate } else 29030Sstevel@tonic-gate /* Have to reassign ipha after pullupmsg */ 29040Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 29050Sstevel@tonic-gate } 29060Sstevel@tonic-gate /* 29070Sstevel@tonic-gate * Queue length too much, 29080Sstevel@tonic-gate * try to selectively dq, or queue and process 29090Sstevel@tonic-gate */ 29100Sstevel@tonic-gate if (!tbf_dq_sel(vifp, ipha)) { 29113448Sdh155122 ipst->ips_mrtstat->mrts_q_overflow++; 291211042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 291311042SErik.Nordmark@Sun.COM ip_drop_output("mrts_q_overflow", mp, ill); 29140Sstevel@tonic-gate freemsg(mp); 29150Sstevel@tonic-gate } else { 29160Sstevel@tonic-gate tbf_queue(vifp, mp); 29170Sstevel@tonic-gate tbf_process_q(vifp); 29180Sstevel@tonic-gate } 29190Sstevel@tonic-gate } 29200Sstevel@tonic-gate if (t->tbf_q_len == 0) { 29210Sstevel@tonic-gate id = vifp->v_timeout_id; 29220Sstevel@tonic-gate vifp->v_timeout_id = 0; 29230Sstevel@tonic-gate } 29240Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock); 29250Sstevel@tonic-gate if (id != 0) 29260Sstevel@tonic-gate (void) untimeout(id); 29270Sstevel@tonic-gate } 29280Sstevel@tonic-gate 29290Sstevel@tonic-gate /* 29300Sstevel@tonic-gate * Adds a packet to the tbf queue at the interface. 29310Sstevel@tonic-gate * The ipha is for mcastgrp destination for phyint and encap. 29320Sstevel@tonic-gate */ 29330Sstevel@tonic-gate static void 29340Sstevel@tonic-gate tbf_queue(struct vif *vifp, mblk_t *mp) 29350Sstevel@tonic-gate { 29360Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 29373448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 29385240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 29393448Sdh155122 29403448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 29415240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 29423448Sdh155122 "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs)); 29430Sstevel@tonic-gate } 29440Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 29450Sstevel@tonic-gate 29460Sstevel@tonic-gate if (t->tbf_t == NULL) { 29470Sstevel@tonic-gate /* Queue was empty */ 29480Sstevel@tonic-gate t->tbf_q = mp; 29490Sstevel@tonic-gate } else { 29500Sstevel@tonic-gate /* Insert at tail */ 29510Sstevel@tonic-gate t->tbf_t->b_next = mp; 29520Sstevel@tonic-gate } 29530Sstevel@tonic-gate /* set new tail pointer */ 29540Sstevel@tonic-gate t->tbf_t = mp; 29550Sstevel@tonic-gate 29560Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL; 29570Sstevel@tonic-gate 29580Sstevel@tonic-gate t->tbf_q_len++; 29590Sstevel@tonic-gate } 29600Sstevel@tonic-gate 29610Sstevel@tonic-gate /* 29620Sstevel@tonic-gate * Process the queue at the vif interface. 29630Sstevel@tonic-gate * Drops the tbf_lock when sending packets. 29640Sstevel@tonic-gate * 29650Sstevel@tonic-gate * NOTE : The caller should quntimeout if the queue length is 0. 29660Sstevel@tonic-gate */ 29670Sstevel@tonic-gate static void 29680Sstevel@tonic-gate tbf_process_q(struct vif *vifp) 29690Sstevel@tonic-gate { 29700Sstevel@tonic-gate mblk_t *mp; 29710Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 29720Sstevel@tonic-gate size_t len; 29733448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 29745240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 29753448Sdh155122 29763448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 29775240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 29780Sstevel@tonic-gate "tbf_process_q 1: vif %ld qlen = %d", 29793448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len); 29800Sstevel@tonic-gate } 29810Sstevel@tonic-gate 29820Sstevel@tonic-gate /* 29830Sstevel@tonic-gate * Loop through the queue at the interface and send 29840Sstevel@tonic-gate * as many packets as possible. 29850Sstevel@tonic-gate */ 29860Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 29870Sstevel@tonic-gate 29880Sstevel@tonic-gate while (t->tbf_q_len > 0) { 29890Sstevel@tonic-gate mp = t->tbf_q; 29900Sstevel@tonic-gate len = (size_t)msgdsize(mp); /* length of ip pkt */ 29910Sstevel@tonic-gate 29920Sstevel@tonic-gate /* Determine if the packet can be sent */ 29930Sstevel@tonic-gate if (len <= t->tbf_n_tok) { 29940Sstevel@tonic-gate /* 29950Sstevel@tonic-gate * If so, reduce no. of tokens, dequeue the packet, 29960Sstevel@tonic-gate * send the packet. 29970Sstevel@tonic-gate */ 29980Sstevel@tonic-gate t->tbf_n_tok -= len; 29990Sstevel@tonic-gate 30000Sstevel@tonic-gate t->tbf_q = mp->b_next; 30010Sstevel@tonic-gate if (--t->tbf_q_len == 0) { 30020Sstevel@tonic-gate t->tbf_t = NULL; 30030Sstevel@tonic-gate } 30040Sstevel@tonic-gate mp->b_next = NULL; 30050Sstevel@tonic-gate /* Exit mutex before sending packet, then re-enter */ 30060Sstevel@tonic-gate mutex_exit(&t->tbf_lock); 30070Sstevel@tonic-gate tbf_send_packet(vifp, mp); 30080Sstevel@tonic-gate mutex_enter(&t->tbf_lock); 30090Sstevel@tonic-gate } else 30100Sstevel@tonic-gate break; 30110Sstevel@tonic-gate } 30120Sstevel@tonic-gate } 30130Sstevel@tonic-gate 30140Sstevel@tonic-gate /* Called at tbf timeout to update tokens, process q and reset timer. */ 30150Sstevel@tonic-gate static void 30160Sstevel@tonic-gate tbf_reprocess_q(void *arg) 30170Sstevel@tonic-gate { 30180Sstevel@tonic-gate struct vif *vifp = arg; 30193448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 30205240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 30210Sstevel@tonic-gate 30220Sstevel@tonic-gate mutex_enter(&vifp->v_tbf->tbf_lock); 30230Sstevel@tonic-gate vifp->v_timeout_id = 0; 30240Sstevel@tonic-gate tbf_update_tokens(vifp); 30250Sstevel@tonic-gate 30260Sstevel@tonic-gate tbf_process_q(vifp); 30270Sstevel@tonic-gate 30280Sstevel@tonic-gate if (vifp->v_tbf->tbf_q_len > 0) { 30290Sstevel@tonic-gate vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 30300Sstevel@tonic-gate TBF_REPROCESS); 30310Sstevel@tonic-gate } 30320Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock); 30330Sstevel@tonic-gate 30343448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 30355240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 30360Sstevel@tonic-gate "tbf_reprcess_q: vif %ld timeout id = %p", 30373448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id); 30380Sstevel@tonic-gate } 30390Sstevel@tonic-gate } 30400Sstevel@tonic-gate 30410Sstevel@tonic-gate /* 30420Sstevel@tonic-gate * Function that will selectively discard a member of the tbf queue, 30430Sstevel@tonic-gate * based on the precedence value and the priority. 30440Sstevel@tonic-gate * 30450Sstevel@tonic-gate * NOTE : The caller should quntimeout if the queue length is 0. 30460Sstevel@tonic-gate */ 30470Sstevel@tonic-gate static int 30480Sstevel@tonic-gate tbf_dq_sel(struct vif *vifp, ipha_t *ipha) 30490Sstevel@tonic-gate { 30500Sstevel@tonic-gate uint_t p; 30510Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 30520Sstevel@tonic-gate mblk_t **np; 30530Sstevel@tonic-gate mblk_t *last, *mp; 305411042SErik.Nordmark@Sun.COM ill_t *ill = vifp->v_ipif->ipif_ill; 305511042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ill->ill_ipst; 30565240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 30573448Sdh155122 30583448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 30595240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 30600Sstevel@tonic-gate "dq_sel: vif %ld dst 0x%x", 30613448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst)); 30620Sstevel@tonic-gate } 30630Sstevel@tonic-gate 30640Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 30650Sstevel@tonic-gate p = priority(vifp, ipha); 30660Sstevel@tonic-gate 30670Sstevel@tonic-gate np = &t->tbf_q; 30680Sstevel@tonic-gate last = NULL; 30690Sstevel@tonic-gate while ((mp = *np) != NULL) { 30700Sstevel@tonic-gate if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) { 30710Sstevel@tonic-gate *np = mp->b_next; 30720Sstevel@tonic-gate /* If removing the last packet, fix the tail pointer */ 30730Sstevel@tonic-gate if (mp == t->tbf_t) 30740Sstevel@tonic-gate t->tbf_t = last; 30750Sstevel@tonic-gate mp->b_prev = mp->b_next = NULL; 307611042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 307711042SErik.Nordmark@Sun.COM ip_drop_output("tbf_dq_send", mp, ill); 30780Sstevel@tonic-gate freemsg(mp); 30790Sstevel@tonic-gate /* 30800Sstevel@tonic-gate * It's impossible for the queue to be empty, but 30810Sstevel@tonic-gate * we check anyway. 30820Sstevel@tonic-gate */ 30830Sstevel@tonic-gate if (--t->tbf_q_len == 0) { 30840Sstevel@tonic-gate t->tbf_t = NULL; 30850Sstevel@tonic-gate } 30863448Sdh155122 ipst->ips_mrtstat->mrts_drop_sel++; 30870Sstevel@tonic-gate return (1); 30880Sstevel@tonic-gate } 30890Sstevel@tonic-gate np = &mp->b_next; 30900Sstevel@tonic-gate last = mp; 30910Sstevel@tonic-gate } 30920Sstevel@tonic-gate return (0); 30930Sstevel@tonic-gate } 30940Sstevel@tonic-gate 30950Sstevel@tonic-gate /* Sends packet, 2 cases - encap tunnel, phyint. */ 30960Sstevel@tonic-gate static void 30970Sstevel@tonic-gate tbf_send_packet(struct vif *vifp, mblk_t *mp) 30980Sstevel@tonic-gate { 309911042SErik.Nordmark@Sun.COM ipif_t *ipif = vifp->v_ipif; 310011042SErik.Nordmark@Sun.COM ill_t *ill = ipif->ipif_ill; 310111042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ill->ill_ipst; 31025240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 310311042SErik.Nordmark@Sun.COM ipha_t *ipha; 310411042SErik.Nordmark@Sun.COM 310511042SErik.Nordmark@Sun.COM ipha = (ipha_t *)mp->b_rptr; 31060Sstevel@tonic-gate /* If encap tunnel options */ 31070Sstevel@tonic-gate if (vifp->v_flags & VIFF_TUNNEL) { 310811042SErik.Nordmark@Sun.COM ip_xmit_attr_t ixas; 310911042SErik.Nordmark@Sun.COM 31103448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 31115240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 311211042SErik.Nordmark@Sun.COM "tbf_send_packet: ENCAP tunnel vif %ld", 31133448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs)); 31140Sstevel@tonic-gate } 311511042SErik.Nordmark@Sun.COM bzero(&ixas, sizeof (ixas)); 311611042SErik.Nordmark@Sun.COM ixas.ixa_flags = IXAF_IS_IPV4 | IXAF_NO_TTL_CHANGE; 311711042SErik.Nordmark@Sun.COM ixas.ixa_ipst = ipst; 311811042SErik.Nordmark@Sun.COM ixas.ixa_ifindex = 0; 311911042SErik.Nordmark@Sun.COM ixas.ixa_cred = kcred; 312011042SErik.Nordmark@Sun.COM ixas.ixa_cpid = NOPID; 312111042SErik.Nordmark@Sun.COM ixas.ixa_tsl = NULL; 312211042SErik.Nordmark@Sun.COM ixas.ixa_zoneid = GLOBAL_ZONEID; /* Multicast router in GZ */ 312311042SErik.Nordmark@Sun.COM ixas.ixa_pktlen = ntohs(ipha->ipha_length); 312411042SErik.Nordmark@Sun.COM ixas.ixa_ip_hdr_length = IPH_HDR_LENGTH(ipha); 31250Sstevel@tonic-gate 31260Sstevel@tonic-gate /* 312711042SErik.Nordmark@Sun.COM * Feed into ip_output_simple which will set the ident field 312811042SErik.Nordmark@Sun.COM * and checksum the encapsulating header. 31290Sstevel@tonic-gate * BSD gets the cached route vifp->v_route from ip_output() 31300Sstevel@tonic-gate * to speed up route table lookups. Not necessary in SunOS 5.x. 313111042SErik.Nordmark@Sun.COM * One could make multicast forwarding faster by putting an 313211042SErik.Nordmark@Sun.COM * ip_xmit_attr_t in each vif thereby caching the ire/nce. 31330Sstevel@tonic-gate */ 313411042SErik.Nordmark@Sun.COM (void) ip_output_simple(mp, &ixas); 313511042SErik.Nordmark@Sun.COM ixa_cleanup(&ixas); 31360Sstevel@tonic-gate return; 31370Sstevel@tonic-gate 31380Sstevel@tonic-gate /* phyint */ 31390Sstevel@tonic-gate } else { 31400Sstevel@tonic-gate /* Need to loop back to members on the outgoing interface. */ 314111042SErik.Nordmark@Sun.COM ipaddr_t dst; 314211042SErik.Nordmark@Sun.COM ip_recv_attr_t iras; 314311042SErik.Nordmark@Sun.COM nce_t *nce; 314411042SErik.Nordmark@Sun.COM 314511042SErik.Nordmark@Sun.COM bzero(&iras, sizeof (iras)); 314611042SErik.Nordmark@Sun.COM iras.ira_flags = IRAF_IS_IPV4; 314711042SErik.Nordmark@Sun.COM iras.ira_ill = iras.ira_rill = ill; 314811042SErik.Nordmark@Sun.COM iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 314911042SErik.Nordmark@Sun.COM iras.ira_zoneid = GLOBAL_ZONEID; /* Multicast router in GZ */ 315011042SErik.Nordmark@Sun.COM iras.ira_pktlen = ntohs(ipha->ipha_length); 315111042SErik.Nordmark@Sun.COM iras.ira_ip_hdr_length = IPH_HDR_LENGTH(ipha); 315211042SErik.Nordmark@Sun.COM 315311042SErik.Nordmark@Sun.COM dst = ipha->ipha_dst; 315411042SErik.Nordmark@Sun.COM if (ill_hasmembers_v4(ill, dst)) { 315511042SErik.Nordmark@Sun.COM iras.ira_flags |= IRAF_LOOPBACK_COPY; 31560Sstevel@tonic-gate } 31573448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 31585240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 31590Sstevel@tonic-gate "tbf_send_pkt: phyint forward vif %ld dst = 0x%x", 31603448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst)); 31610Sstevel@tonic-gate } 316211042SErik.Nordmark@Sun.COM /* 316311042SErik.Nordmark@Sun.COM * Find an NCE which matches the nexthop. 316411042SErik.Nordmark@Sun.COM * For a pt-pt interface we use the other end of the pt-pt 316511042SErik.Nordmark@Sun.COM * link. 316611042SErik.Nordmark@Sun.COM */ 316711042SErik.Nordmark@Sun.COM if (ipif->ipif_flags & IPIF_POINTOPOINT) { 316811042SErik.Nordmark@Sun.COM dst = ipif->ipif_pp_dst_addr; 316911042SErik.Nordmark@Sun.COM nce = arp_nce_init(ill, dst, ill->ill_net_type); 317011042SErik.Nordmark@Sun.COM } else { 317111042SErik.Nordmark@Sun.COM nce = arp_nce_init(ill, dst, IRE_MULTICAST); 317211042SErik.Nordmark@Sun.COM } 317311042SErik.Nordmark@Sun.COM if (nce == NULL) { 317411042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 317511042SErik.Nordmark@Sun.COM ip_drop_output("tbf_send_packet - no nce", mp, ill); 317611042SErik.Nordmark@Sun.COM freemsg(mp); 317711042SErik.Nordmark@Sun.COM return; 317811042SErik.Nordmark@Sun.COM } 317911042SErik.Nordmark@Sun.COM 318011042SErik.Nordmark@Sun.COM /* 318111042SErik.Nordmark@Sun.COM * We don't remeber the incoming ill. Thus we 318211042SErik.Nordmark@Sun.COM * pretend the packet arrived on the outbound ill. This means 318311042SErik.Nordmark@Sun.COM * statistics for input errors will be increased on the wrong 318411042SErik.Nordmark@Sun.COM * ill but that isn't a big deal. 318511042SErik.Nordmark@Sun.COM */ 318611042SErik.Nordmark@Sun.COM ip_forward_xmit_v4(nce, ill, mp, ipha, &iras, ill->ill_mtu, 0); 318711042SErik.Nordmark@Sun.COM ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 318811042SErik.Nordmark@Sun.COM 318911042SErik.Nordmark@Sun.COM nce_refrele(nce); 31900Sstevel@tonic-gate } 31910Sstevel@tonic-gate } 31920Sstevel@tonic-gate 31930Sstevel@tonic-gate /* 31940Sstevel@tonic-gate * Determine the current time and then the elapsed time (between the last time 31950Sstevel@tonic-gate * and time now). Update the no. of tokens in the bucket. 31960Sstevel@tonic-gate */ 31970Sstevel@tonic-gate static void 31980Sstevel@tonic-gate tbf_update_tokens(struct vif *vifp) 31990Sstevel@tonic-gate { 32000Sstevel@tonic-gate timespec_t tp; 32010Sstevel@tonic-gate hrtime_t tm; 32020Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 32033448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 32045240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 32050Sstevel@tonic-gate 32060Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 32070Sstevel@tonic-gate 32080Sstevel@tonic-gate /* Time in secs and nsecs, rate limit in kbits/sec */ 32090Sstevel@tonic-gate gethrestime(&tp); 32100Sstevel@tonic-gate 32110Sstevel@tonic-gate /*LINTED*/ 32120Sstevel@tonic-gate TV_DELTA(tp, t->tbf_last_pkt_t, tm); 32130Sstevel@tonic-gate 32140Sstevel@tonic-gate /* 32150Sstevel@tonic-gate * This formula is actually 32160Sstevel@tonic-gate * "time in seconds" * "bytes/second". Scaled for nsec. 32170Sstevel@tonic-gate * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8) 32180Sstevel@tonic-gate * 32190Sstevel@tonic-gate * The (1000/1024) was introduced in add_vif to optimize 32200Sstevel@tonic-gate * this divide into a shift. 32210Sstevel@tonic-gate */ 32220Sstevel@tonic-gate t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8; 32230Sstevel@tonic-gate t->tbf_last_pkt_t = tp; 32240Sstevel@tonic-gate 32250Sstevel@tonic-gate if (t->tbf_n_tok > MAX_BKT_SIZE) 32260Sstevel@tonic-gate t->tbf_n_tok = MAX_BKT_SIZE; 32273448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 32285240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 32290Sstevel@tonic-gate "tbf_update_tok: tm %lld tok %d vif %ld", 32303448Sdh155122 tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs)); 32310Sstevel@tonic-gate } 32320Sstevel@tonic-gate } 32330Sstevel@tonic-gate 32340Sstevel@tonic-gate /* 32350Sstevel@tonic-gate * Priority currently is based on port nos. 32360Sstevel@tonic-gate * Different forwarding mechanisms have different ways 32370Sstevel@tonic-gate * of obtaining the port no. Hence, the vif must be 32380Sstevel@tonic-gate * given along with the packet itself. 32390Sstevel@tonic-gate * 32400Sstevel@tonic-gate */ 32410Sstevel@tonic-gate static int 32420Sstevel@tonic-gate priority(struct vif *vifp, ipha_t *ipha) 32430Sstevel@tonic-gate { 32440Sstevel@tonic-gate int prio; 32453448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 32465240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 32470Sstevel@tonic-gate 32480Sstevel@tonic-gate /* Temporary hack; may add general packet classifier some day */ 32490Sstevel@tonic-gate 32500Sstevel@tonic-gate ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock)); 32510Sstevel@tonic-gate 32520Sstevel@tonic-gate /* 32530Sstevel@tonic-gate * The UDP port space is divided up into four priority ranges: 32540Sstevel@tonic-gate * [0, 16384) : unclassified - lowest priority 32550Sstevel@tonic-gate * [16384, 32768) : audio - highest priority 32560Sstevel@tonic-gate * [32768, 49152) : whiteboard - medium priority 32570Sstevel@tonic-gate * [49152, 65536) : video - low priority 32580Sstevel@tonic-gate */ 32590Sstevel@tonic-gate 32600Sstevel@tonic-gate if (ipha->ipha_protocol == IPPROTO_UDP) { 32610Sstevel@tonic-gate struct udphdr *udp = 32620Sstevel@tonic-gate (struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha)); 32630Sstevel@tonic-gate switch (ntohs(udp->uh_dport) & 0xc000) { 32640Sstevel@tonic-gate case 0x4000: 32650Sstevel@tonic-gate prio = 70; 32660Sstevel@tonic-gate break; 32670Sstevel@tonic-gate case 0x8000: 32680Sstevel@tonic-gate prio = 60; 32690Sstevel@tonic-gate break; 32700Sstevel@tonic-gate case 0xc000: 32710Sstevel@tonic-gate prio = 55; 32720Sstevel@tonic-gate break; 32730Sstevel@tonic-gate default: 32740Sstevel@tonic-gate prio = 50; 32750Sstevel@tonic-gate break; 32760Sstevel@tonic-gate } 32773448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 32785240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 32790Sstevel@tonic-gate "priority: port %x prio %d\n", 32800Sstevel@tonic-gate ntohs(udp->uh_dport), prio); 32810Sstevel@tonic-gate } 32820Sstevel@tonic-gate } else 32830Sstevel@tonic-gate prio = 50; /* default priority */ 32840Sstevel@tonic-gate return (prio); 32850Sstevel@tonic-gate } 32860Sstevel@tonic-gate 32870Sstevel@tonic-gate /* 32880Sstevel@tonic-gate * End of token bucket filter modifications 32890Sstevel@tonic-gate */ 32900Sstevel@tonic-gate 32910Sstevel@tonic-gate 32920Sstevel@tonic-gate 32930Sstevel@tonic-gate /* 32940Sstevel@tonic-gate * Produces data for netstat -M. 32950Sstevel@tonic-gate */ 32960Sstevel@tonic-gate int 32973448Sdh155122 ip_mroute_stats(mblk_t *mp, ip_stack_t *ipst) 32980Sstevel@tonic-gate { 32993448Sdh155122 ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl); 33003448Sdh155122 ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl); 33013448Sdh155122 if (!snmp_append_data(mp, (char *)ipst->ips_mrtstat, 33023448Sdh155122 sizeof (struct mrtstat))) { 33030Sstevel@tonic-gate ip0dbg(("ip_mroute_stats: failed %ld bytes\n", 33043448Sdh155122 (size_t)sizeof (struct mrtstat))); 33050Sstevel@tonic-gate return (0); 33060Sstevel@tonic-gate } 33070Sstevel@tonic-gate return (1); 33080Sstevel@tonic-gate } 33090Sstevel@tonic-gate 33100Sstevel@tonic-gate /* 33110Sstevel@tonic-gate * Sends info for SNMP's MIB. 33120Sstevel@tonic-gate */ 33130Sstevel@tonic-gate int 33143448Sdh155122 ip_mroute_vif(mblk_t *mp, ip_stack_t *ipst) 33150Sstevel@tonic-gate { 33160Sstevel@tonic-gate struct vifctl vi; 33170Sstevel@tonic-gate vifi_t vifi; 33180Sstevel@tonic-gate 33193448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 33203448Sdh155122 for (vifi = 0; vifi < ipst->ips_numvifs; vifi++) { 33213448Sdh155122 if (ipst->ips_vifs[vifi].v_lcl_addr.s_addr == 0) 33220Sstevel@tonic-gate continue; 33230Sstevel@tonic-gate /* 33240Sstevel@tonic-gate * No locks here, an approximation is fine. 33250Sstevel@tonic-gate */ 33260Sstevel@tonic-gate vi.vifc_vifi = vifi; 33273448Sdh155122 vi.vifc_flags = ipst->ips_vifs[vifi].v_flags; 33283448Sdh155122 vi.vifc_threshold = ipst->ips_vifs[vifi].v_threshold; 33293448Sdh155122 vi.vifc_rate_limit = ipst->ips_vifs[vifi].v_rate_limit; 33303448Sdh155122 vi.vifc_lcl_addr = ipst->ips_vifs[vifi].v_lcl_addr; 33313448Sdh155122 vi.vifc_rmt_addr = ipst->ips_vifs[vifi].v_rmt_addr; 33323448Sdh155122 vi.vifc_pkt_in = ipst->ips_vifs[vifi].v_pkt_in; 33333448Sdh155122 vi.vifc_pkt_out = ipst->ips_vifs[vifi].v_pkt_out; 33340Sstevel@tonic-gate 33350Sstevel@tonic-gate if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) { 33360Sstevel@tonic-gate ip0dbg(("ip_mroute_vif: failed %ld bytes\n", 33370Sstevel@tonic-gate (size_t)sizeof (vi))); 33389658SSowmini.Varadhan@Sun.COM mutex_exit(&ipst->ips_numvifs_mutex); 33390Sstevel@tonic-gate return (0); 33400Sstevel@tonic-gate } 33410Sstevel@tonic-gate } 33423448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 33430Sstevel@tonic-gate return (1); 33440Sstevel@tonic-gate } 33450Sstevel@tonic-gate 33460Sstevel@tonic-gate /* 33470Sstevel@tonic-gate * Called by ip_snmp_get to send up multicast routing table. 33480Sstevel@tonic-gate */ 33490Sstevel@tonic-gate int 33503448Sdh155122 ip_mroute_mrt(mblk_t *mp, ip_stack_t *ipst) 33510Sstevel@tonic-gate { 33520Sstevel@tonic-gate int i, j; 33530Sstevel@tonic-gate struct mfc *rt; 33540Sstevel@tonic-gate struct mfcctl mfcc; 33550Sstevel@tonic-gate 33560Sstevel@tonic-gate /* 33570Sstevel@tonic-gate * Make sure multicast has not been turned off. 33580Sstevel@tonic-gate */ 33593448Sdh155122 if (is_mrouter_off(ipst)) 33600Sstevel@tonic-gate return (1); 33610Sstevel@tonic-gate 33620Sstevel@tonic-gate /* Loop over all hash buckets and their chains */ 33630Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) { 33643448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[i]); 33653448Sdh155122 for (rt = ipst->ips_mfcs[i].mfcb_mfc; rt; rt = rt->mfc_next) { 33660Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 33670Sstevel@tonic-gate if (rt->mfc_rte != NULL || 33680Sstevel@tonic-gate (rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 33690Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 33700Sstevel@tonic-gate continue; 33710Sstevel@tonic-gate } 33720Sstevel@tonic-gate mfcc.mfcc_origin = rt->mfc_origin; 33730Sstevel@tonic-gate mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp; 33740Sstevel@tonic-gate mfcc.mfcc_parent = rt->mfc_parent; 33750Sstevel@tonic-gate mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt; 33763448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 33773448Sdh155122 for (j = 0; j < (int)ipst->ips_numvifs; j++) 33780Sstevel@tonic-gate mfcc.mfcc_ttls[j] = rt->mfc_ttls[j]; 33793448Sdh155122 for (j = (int)ipst->ips_numvifs; j < MAXVIFS; j++) 33800Sstevel@tonic-gate mfcc.mfcc_ttls[j] = 0; 33813448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 33820Sstevel@tonic-gate 33830Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 33840Sstevel@tonic-gate if (!snmp_append_data(mp, (char *)&mfcc, 33850Sstevel@tonic-gate sizeof (mfcc))) { 33863448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 33870Sstevel@tonic-gate ip0dbg(("ip_mroute_mrt: failed %ld bytes\n", 33880Sstevel@tonic-gate (size_t)sizeof (mfcc))); 33890Sstevel@tonic-gate return (0); 33900Sstevel@tonic-gate } 33910Sstevel@tonic-gate } 33923448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 33930Sstevel@tonic-gate } 33940Sstevel@tonic-gate return (1); 33950Sstevel@tonic-gate } 3396