17098Smeem /* 2*8485SPeter.Memishian@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 37098Smeem * Use is subject to license terms. 47098Smeem */ 50Sstevel@tonic-gate /* 60Sstevel@tonic-gate * CDDL HEADER START 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * The contents of this file are subject to the terms of the 91676Sjpk * Common Development and Distribution License (the "License"). 101676Sjpk * You may not use this file except in compliance with the License. 110Sstevel@tonic-gate * 120Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 130Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 140Sstevel@tonic-gate * See the License for the specific language governing permissions 150Sstevel@tonic-gate * and limitations under the License. 160Sstevel@tonic-gate * 170Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 180Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 190Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 200Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 210Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 220Sstevel@tonic-gate * 230Sstevel@tonic-gate * CDDL HEADER END 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate /* 267240Srh87107 * Copyright 2008 Sun Microsystems, Inc. 270Sstevel@tonic-gate * All rights reserved. Use is subject to license terms. 280Sstevel@tonic-gate */ 290Sstevel@tonic-gate /* Copyright (c) 1990 Mentat Inc. */ 300Sstevel@tonic-gate 310Sstevel@tonic-gate /* 320Sstevel@tonic-gate * Procedures for the kernel part of DVMRP, 330Sstevel@tonic-gate * a Distance-Vector Multicast Routing Protocol. 340Sstevel@tonic-gate * (See RFC-1075) 350Sstevel@tonic-gate * Written by David Waitzman, BBN Labs, August 1988. 360Sstevel@tonic-gate * Modified by Steve Deering, Stanford, February 1989. 370Sstevel@tonic-gate * Modified by Mark J. Steiglitz, Stanford, May, 1991 380Sstevel@tonic-gate * Modified by Van Jacobson, LBL, January 1993 390Sstevel@tonic-gate * Modified by Ajit Thyagarajan, PARC, August 1993 400Sstevel@tonic-gate * Modified by Bill Fenner, PARC, April 1995 410Sstevel@tonic-gate * 420Sstevel@tonic-gate * MROUTING 3.5 430Sstevel@tonic-gate */ 440Sstevel@tonic-gate 450Sstevel@tonic-gate /* 460Sstevel@tonic-gate * TODO 470Sstevel@tonic-gate * - function pointer field in vif, void *vif_sendit() 480Sstevel@tonic-gate */ 490Sstevel@tonic-gate 500Sstevel@tonic-gate #include <sys/types.h> 510Sstevel@tonic-gate #include <sys/stream.h> 520Sstevel@tonic-gate #include <sys/stropts.h> 530Sstevel@tonic-gate #include <sys/strlog.h> 540Sstevel@tonic-gate #include <sys/systm.h> 550Sstevel@tonic-gate #include <sys/ddi.h> 560Sstevel@tonic-gate #include <sys/cmn_err.h> 570Sstevel@tonic-gate #include <sys/zone.h> 580Sstevel@tonic-gate 590Sstevel@tonic-gate #include <sys/param.h> 600Sstevel@tonic-gate #include <sys/socket.h> 610Sstevel@tonic-gate #include <sys/vtrace.h> 620Sstevel@tonic-gate #include <sys/debug.h> 630Sstevel@tonic-gate #include <net/if.h> 640Sstevel@tonic-gate #include <sys/sockio.h> 650Sstevel@tonic-gate #include <netinet/in.h> 660Sstevel@tonic-gate #include <net/if_dl.h> 670Sstevel@tonic-gate 680Sstevel@tonic-gate #include <inet/common.h> 690Sstevel@tonic-gate #include <inet/mi.h> 700Sstevel@tonic-gate #include <inet/nd.h> 710Sstevel@tonic-gate #include <inet/mib2.h> 720Sstevel@tonic-gate #include <netinet/ip6.h> 730Sstevel@tonic-gate #include <inet/ip.h> 740Sstevel@tonic-gate #include <inet/snmpcom.h> 750Sstevel@tonic-gate 760Sstevel@tonic-gate #include <netinet/igmp.h> 770Sstevel@tonic-gate #include <netinet/igmp_var.h> 780Sstevel@tonic-gate #include <netinet/udp.h> 790Sstevel@tonic-gate #include <netinet/ip_mroute.h> 800Sstevel@tonic-gate #include <inet/ip_multi.h> 810Sstevel@tonic-gate #include <inet/ip_ire.h> 820Sstevel@tonic-gate #include <inet/ip_if.h> 830Sstevel@tonic-gate #include <inet/ipclassifier.h> 840Sstevel@tonic-gate 850Sstevel@tonic-gate #include <netinet/pim.h> 860Sstevel@tonic-gate 870Sstevel@tonic-gate 880Sstevel@tonic-gate /* 890Sstevel@tonic-gate * MT Design: 900Sstevel@tonic-gate * 910Sstevel@tonic-gate * There are three main data structures viftable, mfctable and tbftable that 920Sstevel@tonic-gate * need to be protected against MT races. 930Sstevel@tonic-gate * 940Sstevel@tonic-gate * vitable is a fixed length array of vif structs. There is no lock to protect 950Sstevel@tonic-gate * the whole array, instead each struct is protected by its own indiviual lock. 960Sstevel@tonic-gate * The value of v_marks in conjuction with the value of v_refcnt determines the 970Sstevel@tonic-gate * current state of a vif structure. One special state that needs mention 980Sstevel@tonic-gate * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates 990Sstevel@tonic-gate * that vif is being initalized. 1000Sstevel@tonic-gate * Each structure is freed when the refcnt goes down to zero. If a delete comes 1010Sstevel@tonic-gate * in when the the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED 1020Sstevel@tonic-gate * which prevents the struct from further use. When the refcnt goes to zero 1030Sstevel@tonic-gate * the struct is freed and is marked VIF_MARK_NOTINUSE. 1040Sstevel@tonic-gate * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill 1050Sstevel@tonic-gate * from going away a refhold is put on the ipif before using it. see 1060Sstevel@tonic-gate * lock_good_vif() and unlock_good_vif(). 1070Sstevel@tonic-gate * 1080Sstevel@tonic-gate * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts 1090Sstevel@tonic-gate * of the vif struct. 1100Sstevel@tonic-gate * 1110Sstevel@tonic-gate * tbftable is also a fixed length array of tbf structs and is only accessed 1120Sstevel@tonic-gate * via v_tbf. It is protected by its own lock tbf_lock. 1130Sstevel@tonic-gate * 1140Sstevel@tonic-gate * Lock Ordering is 1150Sstevel@tonic-gate * v_lock --> tbf_lock 1160Sstevel@tonic-gate * v_lock --> ill_locK 1170Sstevel@tonic-gate * 1180Sstevel@tonic-gate * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb). 1190Sstevel@tonic-gate * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker, 1200Sstevel@tonic-gate * it also maintains a state. These fields are protected by a lock (mfcb_lock). 1210Sstevel@tonic-gate * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to 1220Sstevel@tonic-gate * protect the struct elements. 1230Sstevel@tonic-gate * 1240Sstevel@tonic-gate * mfc structs are dynamically allocated and are singly linked 1250Sstevel@tonic-gate * at the head of the chain. When an mfc structure is to be deleted 1260Sstevel@tonic-gate * it is marked condemned and so is the state in the bucket struct. 1270Sstevel@tonic-gate * When the last walker of the hash bucket exits all the mfc structs 1280Sstevel@tonic-gate * marked condemed are freed. 1290Sstevel@tonic-gate * 1300Sstevel@tonic-gate * Locking Hierarchy: 1310Sstevel@tonic-gate * The bucket lock should be acquired before the mfc struct lock. 1320Sstevel@tonic-gate * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking 1330Sstevel@tonic-gate * operations on the bucket struct. 1340Sstevel@tonic-gate * 1350Sstevel@tonic-gate * last_encap_lock and numvifs_mutex should be acquired after 1360Sstevel@tonic-gate * acquring vif or mfc locks. These locks protect some global variables. 1370Sstevel@tonic-gate * 1380Sstevel@tonic-gate * The statistics are not currently protected by a lock 1390Sstevel@tonic-gate * causing the stats be be approximate, not exact. 1400Sstevel@tonic-gate */ 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate #define NO_VIF MAXVIFS /* from mrouted, no route for src */ 1430Sstevel@tonic-gate 1440Sstevel@tonic-gate /* 1450Sstevel@tonic-gate * Timeouts: 1460Sstevel@tonic-gate * Upcall timeouts - BSD uses boolean_t mfc->expire and 1470Sstevel@tonic-gate * nexpire[MFCTBLSIZE], the number of times expire has been called. 1480Sstevel@tonic-gate * SunOS 5.x uses mfc->timeout for each mfc. 1490Sstevel@tonic-gate * Some Unixes are limited in the number of simultaneous timeouts 1500Sstevel@tonic-gate * that can be run, SunOS 5.x does not have this restriction. 1510Sstevel@tonic-gate */ 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate /* 1540Sstevel@tonic-gate * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and 1550Sstevel@tonic-gate * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall 1560Sstevel@tonic-gate * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE 1570Sstevel@tonic-gate */ 1580Sstevel@tonic-gate #define EXPIRE_TIMEOUT (hz/4) /* 4x / second */ 1590Sstevel@tonic-gate #define UPCALL_EXPIRE 6 /* number of timeouts */ 1600Sstevel@tonic-gate 1610Sstevel@tonic-gate /* 1620Sstevel@tonic-gate * Hash function for a source, group entry 1630Sstevel@tonic-gate */ 1640Sstevel@tonic-gate #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 1650Sstevel@tonic-gate ((g) >> 20) ^ ((g) >> 10) ^ (g)) 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate #define TBF_REPROCESS (hz / 100) /* 100x /second */ 1680Sstevel@tonic-gate 1690Sstevel@tonic-gate /* Identify PIM packet that came on a Register interface */ 1700Sstevel@tonic-gate #define PIM_REGISTER_MARKER 0xffffffff 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate /* Function declarations */ 1733448Sdh155122 static int add_mfc(struct mfcctl *, ip_stack_t *); 1745240Snordmark static int add_vif(struct vifctl *, conn_t *, mblk_t *, ip_stack_t *); 1753448Sdh155122 static int del_mfc(struct mfcctl *, ip_stack_t *); 1765240Snordmark static int del_vif(vifi_t *, conn_t *, mblk_t *, ip_stack_t *); 1770Sstevel@tonic-gate static void del_vifp(struct vif *); 1780Sstevel@tonic-gate static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 1790Sstevel@tonic-gate static void expire_upcalls(void *); 1803448Sdh155122 static void fill_route(struct mfc *, struct mfcctl *, ip_stack_t *); 1813448Sdh155122 static void free_queue(struct mfc *); 1823448Sdh155122 static int get_assert(uchar_t *, ip_stack_t *); 1833448Sdh155122 static int get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *); 1843448Sdh155122 static int get_sg_cnt(struct sioc_sg_req *, ip_stack_t *); 1850Sstevel@tonic-gate static int get_version(uchar_t *); 1863448Sdh155122 static int get_vif_cnt(struct sioc_vif_req *, ip_stack_t *); 1870Sstevel@tonic-gate static int ip_mdq(mblk_t *, ipha_t *, ill_t *, 1880Sstevel@tonic-gate ipaddr_t, struct mfc *); 1895240Snordmark static int ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *); 1900Sstevel@tonic-gate static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 1913448Sdh155122 static int register_mforward(queue_t *, mblk_t *, ill_t *); 1920Sstevel@tonic-gate static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 1933448Sdh155122 static int set_assert(int *, ip_stack_t *); 1940Sstevel@tonic-gate 1950Sstevel@tonic-gate /* 1960Sstevel@tonic-gate * Token Bucket Filter functions 1970Sstevel@tonic-gate */ 1980Sstevel@tonic-gate static int priority(struct vif *, ipha_t *); 1990Sstevel@tonic-gate static void tbf_control(struct vif *, mblk_t *, ipha_t *); 2000Sstevel@tonic-gate static int tbf_dq_sel(struct vif *, ipha_t *); 2010Sstevel@tonic-gate static void tbf_process_q(struct vif *); 2020Sstevel@tonic-gate static void tbf_queue(struct vif *, mblk_t *); 2030Sstevel@tonic-gate static void tbf_reprocess_q(void *); 2040Sstevel@tonic-gate static void tbf_send_packet(struct vif *, mblk_t *); 2050Sstevel@tonic-gate static void tbf_update_tokens(struct vif *); 2060Sstevel@tonic-gate static void release_mfc(struct mfcb *); 2070Sstevel@tonic-gate 2083448Sdh155122 static boolean_t is_mrouter_off(ip_stack_t *); 2090Sstevel@tonic-gate /* 2100Sstevel@tonic-gate * Encapsulation packets 2110Sstevel@tonic-gate */ 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate #define ENCAP_TTL 64 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate /* prototype IP hdr for encapsulated packets */ 2160Sstevel@tonic-gate static ipha_t multicast_encap_iphdr = { 2170Sstevel@tonic-gate IP_SIMPLE_HDR_VERSION, 2180Sstevel@tonic-gate 0, /* tos */ 2190Sstevel@tonic-gate sizeof (ipha_t), /* total length */ 2200Sstevel@tonic-gate 0, /* id */ 2210Sstevel@tonic-gate 0, /* frag offset */ 2220Sstevel@tonic-gate ENCAP_TTL, IPPROTO_ENCAP, 2230Sstevel@tonic-gate 0, /* checksum */ 2240Sstevel@tonic-gate }; 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate /* 2270Sstevel@tonic-gate * Rate limit for assert notification messages, in nsec. 2280Sstevel@tonic-gate */ 2290Sstevel@tonic-gate #define ASSERT_MSG_TIME 3000000000 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate #define VIF_REFHOLD(vifp) { \ 2330Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); \ 2340Sstevel@tonic-gate (vifp)->v_refcnt++; \ 2350Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \ 2360Sstevel@tonic-gate } 2370Sstevel@tonic-gate 2380Sstevel@tonic-gate #define VIF_REFRELE_LOCKED(vifp) { \ 2390Sstevel@tonic-gate (vifp)->v_refcnt--; \ 2400Sstevel@tonic-gate if ((vifp)->v_refcnt == 0 && \ 2410Sstevel@tonic-gate ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 2420Sstevel@tonic-gate del_vifp(vifp); \ 2430Sstevel@tonic-gate } else { \ 2440Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \ 2450Sstevel@tonic-gate } \ 2460Sstevel@tonic-gate } 2470Sstevel@tonic-gate 2480Sstevel@tonic-gate #define VIF_REFRELE(vifp) { \ 2490Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); \ 2500Sstevel@tonic-gate (vifp)->v_refcnt--; \ 2510Sstevel@tonic-gate if ((vifp)->v_refcnt == 0 && \ 2520Sstevel@tonic-gate ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 2530Sstevel@tonic-gate del_vifp(vifp); \ 2540Sstevel@tonic-gate } else { \ 2550Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); \ 2560Sstevel@tonic-gate } \ 2570Sstevel@tonic-gate } 2580Sstevel@tonic-gate 2590Sstevel@tonic-gate #define MFCB_REFHOLD(mfcb) { \ 2600Sstevel@tonic-gate mutex_enter(&(mfcb)->mfcb_lock); \ 2610Sstevel@tonic-gate (mfcb)->mfcb_refcnt++; \ 2620Sstevel@tonic-gate ASSERT((mfcb)->mfcb_refcnt != 0); \ 2630Sstevel@tonic-gate mutex_exit(&(mfcb)->mfcb_lock); \ 2640Sstevel@tonic-gate } 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate #define MFCB_REFRELE(mfcb) { \ 2670Sstevel@tonic-gate mutex_enter(&(mfcb)->mfcb_lock); \ 2680Sstevel@tonic-gate ASSERT((mfcb)->mfcb_refcnt != 0); \ 2690Sstevel@tonic-gate if (--(mfcb)->mfcb_refcnt == 0 && \ 2700Sstevel@tonic-gate ((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) { \ 2710Sstevel@tonic-gate release_mfc(mfcb); \ 2720Sstevel@tonic-gate } \ 2730Sstevel@tonic-gate mutex_exit(&(mfcb)->mfcb_lock); \ 2740Sstevel@tonic-gate } 2750Sstevel@tonic-gate 2760Sstevel@tonic-gate /* 2770Sstevel@tonic-gate * MFCFIND: 2780Sstevel@tonic-gate * Find a route for a given origin IP address and multicast group address. 2790Sstevel@tonic-gate * Skip entries with pending upcalls. 2800Sstevel@tonic-gate * Type of service parameter to be added in the future! 2810Sstevel@tonic-gate */ 2820Sstevel@tonic-gate #define MFCFIND(mfcbp, o, g, rt) { \ 2830Sstevel@tonic-gate struct mfc *_mb_rt = NULL; \ 2840Sstevel@tonic-gate rt = NULL; \ 2850Sstevel@tonic-gate _mb_rt = mfcbp->mfcb_mfc; \ 2860Sstevel@tonic-gate while (_mb_rt) { \ 2870Sstevel@tonic-gate if ((_mb_rt->mfc_origin.s_addr == o) && \ 2880Sstevel@tonic-gate (_mb_rt->mfc_mcastgrp.s_addr == g) && \ 2890Sstevel@tonic-gate (_mb_rt->mfc_rte == NULL) && \ 2900Sstevel@tonic-gate (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) { \ 2910Sstevel@tonic-gate rt = _mb_rt; \ 2920Sstevel@tonic-gate break; \ 2930Sstevel@tonic-gate } \ 2940Sstevel@tonic-gate _mb_rt = _mb_rt->mfc_next; \ 2950Sstevel@tonic-gate } \ 2960Sstevel@tonic-gate } 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate /* 2990Sstevel@tonic-gate * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime() 3000Sstevel@tonic-gate * are inefficient. We use gethrestime() which returns a timespec_t with 3010Sstevel@tonic-gate * sec and nsec, the resolution is machine dependent. 3020Sstevel@tonic-gate * The following 2 macros have been changed to use nsec instead of usec. 3030Sstevel@tonic-gate */ 3040Sstevel@tonic-gate /* 3050Sstevel@tonic-gate * Macros to compute elapsed time efficiently. 3060Sstevel@tonic-gate * Borrowed from Van Jacobson's scheduling code. 3070Sstevel@tonic-gate * Delta should be a hrtime_t. 3080Sstevel@tonic-gate */ 3090Sstevel@tonic-gate #define TV_DELTA(a, b, delta) { \ 3100Sstevel@tonic-gate int xxs; \ 3110Sstevel@tonic-gate \ 3120Sstevel@tonic-gate delta = (a).tv_nsec - (b).tv_nsec; \ 3130Sstevel@tonic-gate if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \ 3140Sstevel@tonic-gate switch (xxs) { \ 3150Sstevel@tonic-gate case 2: \ 3160Sstevel@tonic-gate delta += 1000000000; \ 3170Sstevel@tonic-gate /*FALLTHROUGH*/ \ 3180Sstevel@tonic-gate case 1: \ 3190Sstevel@tonic-gate delta += 1000000000; \ 3200Sstevel@tonic-gate break; \ 3210Sstevel@tonic-gate default: \ 3220Sstevel@tonic-gate delta += (1000000000 * xxs); \ 3230Sstevel@tonic-gate } \ 3240Sstevel@tonic-gate } \ 3250Sstevel@tonic-gate } 3260Sstevel@tonic-gate 3270Sstevel@tonic-gate #define TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \ 3280Sstevel@tonic-gate (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 3290Sstevel@tonic-gate 3300Sstevel@tonic-gate /* 3310Sstevel@tonic-gate * Handle MRT setsockopt commands to modify the multicast routing tables. 3320Sstevel@tonic-gate */ 3330Sstevel@tonic-gate int 3340Sstevel@tonic-gate ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, 3350Sstevel@tonic-gate int datalen, mblk_t *first_mp) 3360Sstevel@tonic-gate { 3375240Snordmark conn_t *connp = Q_TO_CONN(q); 3385240Snordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 3393448Sdh155122 3403448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 3415240Snordmark if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) { 3423448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 3430Sstevel@tonic-gate return (EACCES); 3440Sstevel@tonic-gate } 3453448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 3460Sstevel@tonic-gate 3470Sstevel@tonic-gate if (checkonly) { 3480Sstevel@tonic-gate /* 3490Sstevel@tonic-gate * do not do operation, just pretend to - new T_CHECK 3500Sstevel@tonic-gate * Note: Even routines further on can probably fail but 3510Sstevel@tonic-gate * this T_CHECK stuff is only to please XTI so it not 3520Sstevel@tonic-gate * necessary to be perfect. 3530Sstevel@tonic-gate */ 3540Sstevel@tonic-gate switch (cmd) { 3550Sstevel@tonic-gate case MRT_INIT: 3560Sstevel@tonic-gate case MRT_DONE: 3570Sstevel@tonic-gate case MRT_ADD_VIF: 3580Sstevel@tonic-gate case MRT_DEL_VIF: 3590Sstevel@tonic-gate case MRT_ADD_MFC: 3600Sstevel@tonic-gate case MRT_DEL_MFC: 3610Sstevel@tonic-gate case MRT_ASSERT: 3625240Snordmark return (0); 3630Sstevel@tonic-gate default: 3645240Snordmark return (EOPNOTSUPP); 3650Sstevel@tonic-gate } 3660Sstevel@tonic-gate } 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate /* 3690Sstevel@tonic-gate * make sure no command is issued after multicast routing has been 3700Sstevel@tonic-gate * turned off. 3710Sstevel@tonic-gate */ 3720Sstevel@tonic-gate if (cmd != MRT_INIT && cmd != MRT_DONE) { 3733448Sdh155122 if (is_mrouter_off(ipst)) 3740Sstevel@tonic-gate return (EINVAL); 3750Sstevel@tonic-gate } 3760Sstevel@tonic-gate 3770Sstevel@tonic-gate switch (cmd) { 3785240Snordmark case MRT_INIT: return (ip_mrouter_init(connp, data, datalen, ipst)); 3793448Sdh155122 case MRT_DONE: return (ip_mrouter_done(first_mp, ipst)); 3805240Snordmark case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, connp, 3815240Snordmark first_mp, ipst)); 3825240Snordmark case MRT_DEL_VIF: return (del_vif((vifi_t *)data, connp, first_mp, 3835240Snordmark ipst)); 3843448Sdh155122 case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data, ipst)); 3853448Sdh155122 case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data, ipst)); 3863448Sdh155122 case MRT_ASSERT: return (set_assert((int *)data, ipst)); 3870Sstevel@tonic-gate default: return (EOPNOTSUPP); 3880Sstevel@tonic-gate } 3890Sstevel@tonic-gate } 3900Sstevel@tonic-gate 3910Sstevel@tonic-gate /* 3920Sstevel@tonic-gate * Handle MRT getsockopt commands 3930Sstevel@tonic-gate */ 3940Sstevel@tonic-gate int 3950Sstevel@tonic-gate ip_mrouter_get(int cmd, queue_t *q, uchar_t *data) 3960Sstevel@tonic-gate { 3975240Snordmark conn_t *connp = Q_TO_CONN(q); 3985240Snordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 3995240Snordmark 4005240Snordmark if (connp != ipst->ips_ip_g_mrouter) 4010Sstevel@tonic-gate return (EACCES); 4020Sstevel@tonic-gate 4030Sstevel@tonic-gate switch (cmd) { 4040Sstevel@tonic-gate case MRT_VERSION: return (get_version((uchar_t *)data)); 4053448Sdh155122 case MRT_ASSERT: return (get_assert((uchar_t *)data, ipst)); 4060Sstevel@tonic-gate default: return (EOPNOTSUPP); 4070Sstevel@tonic-gate } 4080Sstevel@tonic-gate } 4090Sstevel@tonic-gate 4100Sstevel@tonic-gate /* 4110Sstevel@tonic-gate * Handle ioctl commands to obtain information from the cache. 4120Sstevel@tonic-gate * Called with shared access to IP. These are read_only ioctls. 4130Sstevel@tonic-gate */ 4140Sstevel@tonic-gate /* ARGSUSED */ 4150Sstevel@tonic-gate int 4160Sstevel@tonic-gate mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, 4170Sstevel@tonic-gate ip_ioctl_cmd_t *ipip, void *if_req) 4180Sstevel@tonic-gate { 4190Sstevel@tonic-gate mblk_t *mp1; 4200Sstevel@tonic-gate struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4215240Snordmark conn_t *connp = Q_TO_CONN(q); 4225240Snordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate /* Existence verified in ip_wput_nondata */ 4250Sstevel@tonic-gate mp1 = mp->b_cont->b_cont; 4260Sstevel@tonic-gate 4270Sstevel@tonic-gate switch (iocp->ioc_cmd) { 4280Sstevel@tonic-gate case (SIOCGETVIFCNT): 4293448Sdh155122 return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst)); 4300Sstevel@tonic-gate case (SIOCGETSGCNT): 4313448Sdh155122 return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst)); 4320Sstevel@tonic-gate case (SIOCGETLSGCNT): 4333448Sdh155122 return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst)); 4340Sstevel@tonic-gate default: 4350Sstevel@tonic-gate return (EINVAL); 4360Sstevel@tonic-gate } 4370Sstevel@tonic-gate } 4380Sstevel@tonic-gate 4390Sstevel@tonic-gate /* 4400Sstevel@tonic-gate * Returns the packet, byte, rpf-failure count for the source, group provided. 4410Sstevel@tonic-gate */ 4420Sstevel@tonic-gate static int 4433448Sdh155122 get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst) 4440Sstevel@tonic-gate { 4450Sstevel@tonic-gate struct mfc *rt; 4460Sstevel@tonic-gate struct mfcb *mfcbp; 4470Sstevel@tonic-gate 4483448Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)]; 4490Sstevel@tonic-gate MFCB_REFHOLD(mfcbp); 4500Sstevel@tonic-gate MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt); 4510Sstevel@tonic-gate 4520Sstevel@tonic-gate if (rt != NULL) { 4530Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 4540Sstevel@tonic-gate req->pktcnt = rt->mfc_pkt_cnt; 4550Sstevel@tonic-gate req->bytecnt = rt->mfc_byte_cnt; 4560Sstevel@tonic-gate req->wrong_if = rt->mfc_wrong_if; 4570Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 4580Sstevel@tonic-gate } else 4590Sstevel@tonic-gate req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU; 4600Sstevel@tonic-gate 4610Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 4620Sstevel@tonic-gate return (0); 4630Sstevel@tonic-gate } 4640Sstevel@tonic-gate 4650Sstevel@tonic-gate /* 4660Sstevel@tonic-gate * Returns the packet, byte, rpf-failure count for the source, group provided. 4670Sstevel@tonic-gate * Uses larger counters and IPv6 addresses. 4680Sstevel@tonic-gate */ 4690Sstevel@tonic-gate /* ARGSUSED XXX until implemented */ 4700Sstevel@tonic-gate static int 4713448Sdh155122 get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst) 4720Sstevel@tonic-gate { 4730Sstevel@tonic-gate /* XXX TODO SIOCGETLSGCNT */ 4740Sstevel@tonic-gate return (ENXIO); 4750Sstevel@tonic-gate } 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate /* 4780Sstevel@tonic-gate * Returns the input and output packet and byte counts on the vif provided. 4790Sstevel@tonic-gate */ 4800Sstevel@tonic-gate static int 4813448Sdh155122 get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst) 4820Sstevel@tonic-gate { 4830Sstevel@tonic-gate vifi_t vifi = req->vifi; 4840Sstevel@tonic-gate 4853448Sdh155122 if (vifi >= ipst->ips_numvifs) 4860Sstevel@tonic-gate return (EINVAL); 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate /* 4890Sstevel@tonic-gate * No locks here, an approximation is fine. 4900Sstevel@tonic-gate */ 4913448Sdh155122 req->icount = ipst->ips_vifs[vifi].v_pkt_in; 4923448Sdh155122 req->ocount = ipst->ips_vifs[vifi].v_pkt_out; 4933448Sdh155122 req->ibytes = ipst->ips_vifs[vifi].v_bytes_in; 4943448Sdh155122 req->obytes = ipst->ips_vifs[vifi].v_bytes_out; 4950Sstevel@tonic-gate 4960Sstevel@tonic-gate return (0); 4970Sstevel@tonic-gate } 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate static int 5000Sstevel@tonic-gate get_version(uchar_t *data) 5010Sstevel@tonic-gate { 5020Sstevel@tonic-gate int *v = (int *)data; 5030Sstevel@tonic-gate 5040Sstevel@tonic-gate *v = 0x0305; /* XXX !!!! */ 5050Sstevel@tonic-gate 5060Sstevel@tonic-gate return (0); 5070Sstevel@tonic-gate } 5080Sstevel@tonic-gate 5090Sstevel@tonic-gate /* 5100Sstevel@tonic-gate * Set PIM assert processing global. 5110Sstevel@tonic-gate */ 5120Sstevel@tonic-gate static int 5133448Sdh155122 set_assert(int *i, ip_stack_t *ipst) 5140Sstevel@tonic-gate { 5150Sstevel@tonic-gate if ((*i != 1) && (*i != 0)) 5160Sstevel@tonic-gate return (EINVAL); 5170Sstevel@tonic-gate 5183448Sdh155122 ipst->ips_pim_assert = *i; 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate return (0); 5210Sstevel@tonic-gate } 5220Sstevel@tonic-gate 5230Sstevel@tonic-gate /* 5240Sstevel@tonic-gate * Get PIM assert processing global. 5250Sstevel@tonic-gate */ 5260Sstevel@tonic-gate static int 5273448Sdh155122 get_assert(uchar_t *data, ip_stack_t *ipst) 5280Sstevel@tonic-gate { 5290Sstevel@tonic-gate int *i = (int *)data; 5300Sstevel@tonic-gate 5313448Sdh155122 *i = ipst->ips_pim_assert; 5320Sstevel@tonic-gate 5330Sstevel@tonic-gate return (0); 5340Sstevel@tonic-gate } 5350Sstevel@tonic-gate 5360Sstevel@tonic-gate /* 5370Sstevel@tonic-gate * Enable multicast routing. 5380Sstevel@tonic-gate */ 5390Sstevel@tonic-gate static int 5405240Snordmark ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst) 5410Sstevel@tonic-gate { 5420Sstevel@tonic-gate int *v; 5430Sstevel@tonic-gate 5440Sstevel@tonic-gate if (data == NULL || (datalen != sizeof (int))) 5450Sstevel@tonic-gate return (ENOPROTOOPT); 5460Sstevel@tonic-gate 5470Sstevel@tonic-gate v = (int *)data; 5480Sstevel@tonic-gate if (*v != 1) 5490Sstevel@tonic-gate return (ENOPROTOOPT); 5500Sstevel@tonic-gate 5513448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 5523448Sdh155122 if (ipst->ips_ip_g_mrouter != NULL) { 5533448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 5540Sstevel@tonic-gate return (EADDRINUSE); 5550Sstevel@tonic-gate } 5560Sstevel@tonic-gate 5575240Snordmark /* 5585240Snordmark * MRT_INIT should only be allowed for RAW sockets, but we double 5595240Snordmark * check. 5605240Snordmark */ 5615240Snordmark if (!IPCL_IS_RAWIP(connp)) { 5625240Snordmark mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 5635240Snordmark return (EINVAL); 5645240Snordmark } 5655240Snordmark 5665240Snordmark ipst->ips_ip_g_mrouter = connp; 5670Sstevel@tonic-gate connp->conn_multi_router = 1; 5680Sstevel@tonic-gate /* In order for tunnels to work we have to turn ip_g_forward on */ 5693448Sdh155122 if (!WE_ARE_FORWARDING(ipst)) { 5703448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 5715240Snordmark (void) mi_strlog(connp->conn_rq, 1, SL_TRACE, 5720Sstevel@tonic-gate "ip_mrouter_init: turning on forwarding"); 5730Sstevel@tonic-gate } 5743448Sdh155122 ipst->ips_saved_ip_g_forward = ipst->ips_ip_g_forward; 5753448Sdh155122 ipst->ips_ip_g_forward = IP_FORWARD_ALWAYS; 5760Sstevel@tonic-gate } 5770Sstevel@tonic-gate 5783448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 5790Sstevel@tonic-gate return (0); 5800Sstevel@tonic-gate } 5810Sstevel@tonic-gate 5823448Sdh155122 void 5833448Sdh155122 ip_mrouter_stack_init(ip_stack_t *ipst) 5843448Sdh155122 { 5853448Sdh155122 mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL); 5863448Sdh155122 5873448Sdh155122 ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1), 5883448Sdh155122 KM_SLEEP); 5893448Sdh155122 ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP); 5903448Sdh155122 /* 5913448Sdh155122 * mfctable: 5923448Sdh155122 * Includes all mfcs, including waiting upcalls. 5933448Sdh155122 * Multiple mfcs per bucket. 5943448Sdh155122 */ 5953448Sdh155122 ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ, 5963448Sdh155122 KM_SLEEP); 5973448Sdh155122 /* 5983448Sdh155122 * Define the token bucket filter structures. 5993448Sdh155122 * tbftable -> each vif has one of these for storing info. 6003448Sdh155122 */ 6013448Sdh155122 ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP); 6023448Sdh155122 6033448Sdh155122 mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL); 6043448Sdh155122 6053448Sdh155122 ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl); 6063448Sdh155122 ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl); 6073448Sdh155122 } 6083448Sdh155122 6090Sstevel@tonic-gate /* 6100Sstevel@tonic-gate * Disable multicast routing. 6110Sstevel@tonic-gate * Didn't use global timeout_val (BSD version), instead check the mfctable. 6120Sstevel@tonic-gate */ 6130Sstevel@tonic-gate int 6143448Sdh155122 ip_mrouter_done(mblk_t *mp, ip_stack_t *ipst) 6150Sstevel@tonic-gate { 6165240Snordmark conn_t *mrouter; 6170Sstevel@tonic-gate vifi_t vifi; 6180Sstevel@tonic-gate struct mfc *mfc_rt; 6190Sstevel@tonic-gate int i; 6200Sstevel@tonic-gate 6213448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 6223448Sdh155122 if (ipst->ips_ip_g_mrouter == NULL) { 6233448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 6240Sstevel@tonic-gate return (EINVAL); 6250Sstevel@tonic-gate } 6260Sstevel@tonic-gate 6275240Snordmark mrouter = ipst->ips_ip_g_mrouter; 6283448Sdh155122 6293448Sdh155122 if (ipst->ips_saved_ip_g_forward != -1) { 6303448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 6315240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 6320Sstevel@tonic-gate "ip_mrouter_done: turning off forwarding"); 6330Sstevel@tonic-gate } 6343448Sdh155122 ipst->ips_ip_g_forward = ipst->ips_saved_ip_g_forward; 6353448Sdh155122 ipst->ips_saved_ip_g_forward = -1; 6360Sstevel@tonic-gate } 6370Sstevel@tonic-gate 6380Sstevel@tonic-gate /* 6390Sstevel@tonic-gate * Always clear cache when vifs change. 6403448Sdh155122 * No need to get ipst->ips_last_encap_lock since we are running as 6413448Sdh155122 * a writer. 6420Sstevel@tonic-gate */ 6433448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 6443448Sdh155122 ipst->ips_last_encap_src = 0; 6453448Sdh155122 ipst->ips_last_encap_vif = NULL; 6463448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 6475240Snordmark mrouter->conn_multi_router = 0; 6480Sstevel@tonic-gate 6493448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 6500Sstevel@tonic-gate 6510Sstevel@tonic-gate /* 6520Sstevel@tonic-gate * For each phyint in use, 6530Sstevel@tonic-gate * disable promiscuous reception of all IP multicasts. 6540Sstevel@tonic-gate */ 6550Sstevel@tonic-gate for (vifi = 0; vifi < MAXVIFS; vifi++) { 6563448Sdh155122 struct vif *vifp = ipst->ips_vifs + vifi; 6570Sstevel@tonic-gate 6580Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 6590Sstevel@tonic-gate /* 6600Sstevel@tonic-gate * if the vif is active mark it condemned. 6610Sstevel@tonic-gate */ 6620Sstevel@tonic-gate if (vifp->v_marks & VIF_MARK_GOOD) { 6630Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 6640Sstevel@tonic-gate ipif_refhold(vifp->v_ipif); 6650Sstevel@tonic-gate /* Phyint only */ 6660Sstevel@tonic-gate if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 6670Sstevel@tonic-gate ipif_t *ipif = vifp->v_ipif; 6680Sstevel@tonic-gate ipsq_t *ipsq; 6690Sstevel@tonic-gate boolean_t suc; 6700Sstevel@tonic-gate ill_t *ill; 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate ill = ipif->ipif_ill; 6730Sstevel@tonic-gate suc = B_FALSE; 6740Sstevel@tonic-gate if (mp == NULL) { 6750Sstevel@tonic-gate /* 6760Sstevel@tonic-gate * being called from ip_close, 6770Sstevel@tonic-gate * lets do it synchronously. 6780Sstevel@tonic-gate * Clear VIF_MARK_GOOD and 6790Sstevel@tonic-gate * set VIF_MARK_CONDEMNED. 6800Sstevel@tonic-gate */ 6810Sstevel@tonic-gate vifp->v_marks &= ~VIF_MARK_GOOD; 6820Sstevel@tonic-gate vifp->v_marks |= VIF_MARK_CONDEMNED; 6830Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); 6848275SEric Cheng suc = ipsq_enter(ill, B_FALSE, NEW_OP); 6850Sstevel@tonic-gate ipsq = ill->ill_phyint->phyint_ipsq; 6860Sstevel@tonic-gate } else { 6870Sstevel@tonic-gate ipsq = ipsq_try_enter(ipif, NULL, 6885240Snordmark mrouter->conn_wq, mp, 6890Sstevel@tonic-gate ip_restart_optmgmt, NEW_OP, B_TRUE); 6900Sstevel@tonic-gate if (ipsq == NULL) { 6910Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); 6925240Snordmark ipif_refrele(ipif); 6930Sstevel@tonic-gate return (EINPROGRESS); 6940Sstevel@tonic-gate } 6950Sstevel@tonic-gate /* 6960Sstevel@tonic-gate * Clear VIF_MARK_GOOD and 6970Sstevel@tonic-gate * set VIF_MARK_CONDEMNED. 6980Sstevel@tonic-gate */ 6990Sstevel@tonic-gate vifp->v_marks &= ~VIF_MARK_GOOD; 7000Sstevel@tonic-gate vifp->v_marks |= VIF_MARK_CONDEMNED; 7015240Snordmark mutex_exit(&(vifp)->v_lock); 7020Sstevel@tonic-gate suc = B_TRUE; 7030Sstevel@tonic-gate } 7040Sstevel@tonic-gate 7050Sstevel@tonic-gate if (suc) { 7060Sstevel@tonic-gate (void) ip_delmulti(INADDR_ANY, ipif, 7070Sstevel@tonic-gate B_TRUE, B_TRUE); 7087098Smeem ipsq_exit(ipsq); 7090Sstevel@tonic-gate } 7100Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 7110Sstevel@tonic-gate } 7120Sstevel@tonic-gate /* 7130Sstevel@tonic-gate * decreases the refcnt added in add_vif. 7140Sstevel@tonic-gate * and release v_lock. 7150Sstevel@tonic-gate */ 7160Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 7170Sstevel@tonic-gate } else { 7180Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 7190Sstevel@tonic-gate continue; 7200Sstevel@tonic-gate } 7210Sstevel@tonic-gate } 7220Sstevel@tonic-gate 7233448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 7243448Sdh155122 ipst->ips_numvifs = 0; 7253448Sdh155122 ipst->ips_pim_assert = 0; 7263448Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS; 7273448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 7280Sstevel@tonic-gate 7290Sstevel@tonic-gate /* 7300Sstevel@tonic-gate * Free upcall msgs. 7310Sstevel@tonic-gate * Go through mfctable and stop any outstanding upcall 7320Sstevel@tonic-gate * timeouts remaining on mfcs. 7330Sstevel@tonic-gate */ 7340Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) { 7353448Sdh155122 mutex_enter(&ipst->ips_mfcs[i].mfcb_lock); 7363448Sdh155122 ipst->ips_mfcs[i].mfcb_refcnt++; 7373448Sdh155122 ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED; 7383448Sdh155122 mutex_exit(&ipst->ips_mfcs[i].mfcb_lock); 7393448Sdh155122 mfc_rt = ipst->ips_mfcs[i].mfcb_mfc; 7400Sstevel@tonic-gate while (mfc_rt) { 7410Sstevel@tonic-gate /* Free upcalls */ 7420Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 7430Sstevel@tonic-gate if (mfc_rt->mfc_rte != NULL) { 7440Sstevel@tonic-gate if (mfc_rt->mfc_timeout_id != 0) { 7450Sstevel@tonic-gate /* 7460Sstevel@tonic-gate * OK to drop the lock as we have 7470Sstevel@tonic-gate * a refcnt on the bucket. timeout 7480Sstevel@tonic-gate * can fire but it will see that 7490Sstevel@tonic-gate * mfc_timeout_id == 0 and not do 7500Sstevel@tonic-gate * anything. see expire_upcalls(). 7510Sstevel@tonic-gate */ 7520Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0; 7530Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 7540Sstevel@tonic-gate (void) untimeout( 7550Sstevel@tonic-gate mfc_rt->mfc_timeout_id); 7560Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0; 7570Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate /* 7600Sstevel@tonic-gate * all queued upcall packets 7610Sstevel@tonic-gate * and mblk will be freed in 7620Sstevel@tonic-gate * release_mfc(). 7630Sstevel@tonic-gate */ 7640Sstevel@tonic-gate } 7650Sstevel@tonic-gate } 7660Sstevel@tonic-gate 7670Sstevel@tonic-gate mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 7680Sstevel@tonic-gate 7690Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 7700Sstevel@tonic-gate mfc_rt = mfc_rt->mfc_next; 7710Sstevel@tonic-gate } 7723448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 7730Sstevel@tonic-gate } 7740Sstevel@tonic-gate 7753448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 7763448Sdh155122 ipst->ips_ip_g_mrouter = NULL; 7773448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 7780Sstevel@tonic-gate return (0); 7790Sstevel@tonic-gate } 7800Sstevel@tonic-gate 7813448Sdh155122 void 7823448Sdh155122 ip_mrouter_stack_destroy(ip_stack_t *ipst) 7833448Sdh155122 { 7843448Sdh155122 struct mfcb *mfcbp; 7853448Sdh155122 struct mfc *rt; 7863448Sdh155122 int i; 7873448Sdh155122 7883448Sdh155122 for (i = 0; i < MFCTBLSIZ; i++) { 7893448Sdh155122 mfcbp = &ipst->ips_mfcs[i]; 7903448Sdh155122 7913448Sdh155122 while ((rt = mfcbp->mfcb_mfc) != NULL) { 7923448Sdh155122 (void) printf("ip_mrouter_stack_destroy: free for %d\n", 7933448Sdh155122 i); 7943448Sdh155122 7953448Sdh155122 mfcbp->mfcb_mfc = rt->mfc_next; 7963448Sdh155122 free_queue(rt); 7973448Sdh155122 mi_free(rt); 7983448Sdh155122 } 7993448Sdh155122 } 8003448Sdh155122 kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1)); 8013448Sdh155122 ipst->ips_vifs = NULL; 8023448Sdh155122 kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat)); 8033448Sdh155122 ipst->ips_mrtstat = NULL; 8043448Sdh155122 kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ); 8053448Sdh155122 ipst->ips_mfcs = NULL; 8063448Sdh155122 kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS); 8073448Sdh155122 ipst->ips_tbfs = NULL; 8083448Sdh155122 8093448Sdh155122 mutex_destroy(&ipst->ips_last_encap_lock); 8103448Sdh155122 mutex_destroy(&ipst->ips_ip_g_mrouter_mutex); 8113448Sdh155122 } 8123448Sdh155122 8130Sstevel@tonic-gate static boolean_t 8143448Sdh155122 is_mrouter_off(ip_stack_t *ipst) 8150Sstevel@tonic-gate { 8165240Snordmark conn_t *mrouter; 8170Sstevel@tonic-gate 8183448Sdh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 8193448Sdh155122 if (ipst->ips_ip_g_mrouter == NULL) { 8203448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 8210Sstevel@tonic-gate return (B_TRUE); 8220Sstevel@tonic-gate } 8230Sstevel@tonic-gate 8245240Snordmark mrouter = ipst->ips_ip_g_mrouter; 8255240Snordmark if (mrouter->conn_multi_router == 0) { 8263448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 8270Sstevel@tonic-gate return (B_TRUE); 8280Sstevel@tonic-gate } 8293448Sdh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 8300Sstevel@tonic-gate return (B_FALSE); 8310Sstevel@tonic-gate } 8320Sstevel@tonic-gate 8330Sstevel@tonic-gate static void 8340Sstevel@tonic-gate unlock_good_vif(struct vif *vifp) 8350Sstevel@tonic-gate { 8360Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 8370Sstevel@tonic-gate ipif_refrele(vifp->v_ipif); 8380Sstevel@tonic-gate VIF_REFRELE(vifp); 8390Sstevel@tonic-gate } 8400Sstevel@tonic-gate 8410Sstevel@tonic-gate static boolean_t 8420Sstevel@tonic-gate lock_good_vif(struct vif *vifp) 8430Sstevel@tonic-gate { 8440Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 8450Sstevel@tonic-gate if (!(vifp->v_marks & VIF_MARK_GOOD)) { 8460Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8470Sstevel@tonic-gate return (B_FALSE); 8480Sstevel@tonic-gate } 8490Sstevel@tonic-gate 8500Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 8510Sstevel@tonic-gate mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock); 8520Sstevel@tonic-gate if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) { 8530Sstevel@tonic-gate mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 8540Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8550Sstevel@tonic-gate return (B_FALSE); 8560Sstevel@tonic-gate } 8570Sstevel@tonic-gate ipif_refhold_locked(vifp->v_ipif); 8580Sstevel@tonic-gate mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 8590Sstevel@tonic-gate vifp->v_refcnt++; 8600Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 8610Sstevel@tonic-gate return (B_TRUE); 8620Sstevel@tonic-gate } 8630Sstevel@tonic-gate 8640Sstevel@tonic-gate /* 8650Sstevel@tonic-gate * Add a vif to the vif table. 8660Sstevel@tonic-gate */ 8670Sstevel@tonic-gate static int 8685240Snordmark add_vif(struct vifctl *vifcp, conn_t *connp, mblk_t *first_mp, ip_stack_t *ipst) 8690Sstevel@tonic-gate { 8703448Sdh155122 struct vif *vifp = ipst->ips_vifs + vifcp->vifc_vifi; 8710Sstevel@tonic-gate ipif_t *ipif; 8720Sstevel@tonic-gate int error; 8733448Sdh155122 struct tbf *v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi; 8740Sstevel@tonic-gate ipsq_t *ipsq; 8755240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 8760Sstevel@tonic-gate 8770Sstevel@tonic-gate ASSERT(connp != NULL); 8780Sstevel@tonic-gate 8790Sstevel@tonic-gate if (vifcp->vifc_vifi >= MAXVIFS) 8800Sstevel@tonic-gate return (EINVAL); 8810Sstevel@tonic-gate 8823448Sdh155122 if (is_mrouter_off(ipst)) 8830Sstevel@tonic-gate return (EINVAL); 8840Sstevel@tonic-gate 8850Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 8860Sstevel@tonic-gate /* 8870Sstevel@tonic-gate * Viftable entry should be 0. 8880Sstevel@tonic-gate * if v_marks == 0 but v_refcnt != 0 means struct is being 8890Sstevel@tonic-gate * initialized. 8900Sstevel@tonic-gate * 8910Sstevel@tonic-gate * Also note that it is very unlikely that we will get a MRT_ADD_VIF 8920Sstevel@tonic-gate * request while the delete is in progress, mrouted only sends add 8930Sstevel@tonic-gate * requests when a new interface is added and the new interface cannot 8940Sstevel@tonic-gate * have the same vifi as an existing interface. We make sure that 8950Sstevel@tonic-gate * ill_delete will block till the vif is deleted by adding a refcnt 8960Sstevel@tonic-gate * to ipif in del_vif(). 8970Sstevel@tonic-gate */ 8980Sstevel@tonic-gate if (vifp->v_lcl_addr.s_addr != 0 || 8990Sstevel@tonic-gate vifp->v_marks != 0 || 9000Sstevel@tonic-gate vifp->v_refcnt != 0) { 9010Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 9020Sstevel@tonic-gate return (EADDRINUSE); 9030Sstevel@tonic-gate } 9040Sstevel@tonic-gate 9050Sstevel@tonic-gate /* Incoming vif should not be 0 */ 9060Sstevel@tonic-gate if (vifcp->vifc_lcl_addr.s_addr == 0) { 9070Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 9080Sstevel@tonic-gate return (EINVAL); 9090Sstevel@tonic-gate } 9100Sstevel@tonic-gate 9110Sstevel@tonic-gate vifp->v_refcnt++; 9120Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 9130Sstevel@tonic-gate /* Find the interface with the local address */ 9140Sstevel@tonic-gate ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL, 9150Sstevel@tonic-gate connp->conn_zoneid, CONNP_TO_WQ(connp), first_mp, 9163448Sdh155122 ip_restart_optmgmt, &error, ipst); 9170Sstevel@tonic-gate if (ipif == NULL) { 9180Sstevel@tonic-gate VIF_REFRELE(vifp); 9190Sstevel@tonic-gate if (error == EINPROGRESS) 9200Sstevel@tonic-gate return (error); 9210Sstevel@tonic-gate return (EADDRNOTAVAIL); 9220Sstevel@tonic-gate } 9230Sstevel@tonic-gate 9240Sstevel@tonic-gate /* 9250Sstevel@tonic-gate * We have to be exclusive as we have to call ip_addmulti() 9260Sstevel@tonic-gate * This is the best position to try to be exclusive in case 9270Sstevel@tonic-gate * we have to wait. 9280Sstevel@tonic-gate */ 9290Sstevel@tonic-gate ipsq = ipsq_try_enter(ipif, NULL, CONNP_TO_WQ(connp), first_mp, 9300Sstevel@tonic-gate ip_restart_optmgmt, NEW_OP, B_TRUE); 9310Sstevel@tonic-gate if ((ipsq) == NULL) { 9320Sstevel@tonic-gate VIF_REFRELE(vifp); 9330Sstevel@tonic-gate ipif_refrele(ipif); 9340Sstevel@tonic-gate return (EINPROGRESS); 9350Sstevel@tonic-gate } 9360Sstevel@tonic-gate 9373448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 9385240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 9390Sstevel@tonic-gate "add_vif: src 0x%x enter", 9400Sstevel@tonic-gate vifcp->vifc_lcl_addr.s_addr); 9410Sstevel@tonic-gate } 9420Sstevel@tonic-gate 9430Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 9440Sstevel@tonic-gate /* 9450Sstevel@tonic-gate * Always clear cache when vifs change. 9460Sstevel@tonic-gate * Needed to ensure that src isn't left over from before vif was added. 9470Sstevel@tonic-gate * No need to get last_encap_lock, since we are running as a writer. 9480Sstevel@tonic-gate */ 9490Sstevel@tonic-gate 9503448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 9513448Sdh155122 ipst->ips_last_encap_src = 0; 9523448Sdh155122 ipst->ips_last_encap_vif = NULL; 9533448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 9540Sstevel@tonic-gate 9550Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_TUNNEL) { 9560Sstevel@tonic-gate if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) { 9570Sstevel@tonic-gate cmn_err(CE_WARN, 9580Sstevel@tonic-gate "add_vif: source route tunnels not supported\n"); 9590Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9600Sstevel@tonic-gate ipif_refrele(ipif); 9617098Smeem ipsq_exit(ipsq); 9620Sstevel@tonic-gate return (EOPNOTSUPP); 9630Sstevel@tonic-gate } 9640Sstevel@tonic-gate vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 9650Sstevel@tonic-gate 9660Sstevel@tonic-gate } else { 9670Sstevel@tonic-gate /* Phyint or Register vif */ 9680Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) { 9690Sstevel@tonic-gate /* 9700Sstevel@tonic-gate * Note: Since all IPPROTO_IP level options (including 9710Sstevel@tonic-gate * MRT_ADD_VIF) are done exclusively via 9720Sstevel@tonic-gate * ip_optmgmt_writer(), a lock is not necessary to 9730Sstevel@tonic-gate * protect reg_vif_num. 9740Sstevel@tonic-gate */ 9753448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 9763448Sdh155122 if (ipst->ips_reg_vif_num == ALL_VIFS) { 9773448Sdh155122 ipst->ips_reg_vif_num = vifcp->vifc_vifi; 9783448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9790Sstevel@tonic-gate } else { 9803448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9810Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9820Sstevel@tonic-gate ipif_refrele(ipif); 9837098Smeem ipsq_exit(ipsq); 9840Sstevel@tonic-gate return (EADDRINUSE); 9850Sstevel@tonic-gate } 9860Sstevel@tonic-gate } 9870Sstevel@tonic-gate 9880Sstevel@tonic-gate /* Make sure the interface supports multicast */ 9890Sstevel@tonic-gate if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) { 9900Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 9910Sstevel@tonic-gate ipif_refrele(ipif); 9920Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) { 9933448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 9943448Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS; 9953448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 9960Sstevel@tonic-gate } 9977098Smeem ipsq_exit(ipsq); 9980Sstevel@tonic-gate return (EOPNOTSUPP); 9990Sstevel@tonic-gate } 10000Sstevel@tonic-gate /* Enable promiscuous reception of all IP mcasts from the if */ 10010Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 10020Sstevel@tonic-gate error = ip_addmulti(INADDR_ANY, ipif, ILGSTAT_NONE, 10030Sstevel@tonic-gate MODE_IS_EXCLUDE, NULL); 10040Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 10050Sstevel@tonic-gate /* 10060Sstevel@tonic-gate * since we released the lock lets make sure that 10070Sstevel@tonic-gate * ip_mrouter_done() has not been called. 10080Sstevel@tonic-gate */ 10093448Sdh155122 if (error != 0 || is_mrouter_off(ipst)) { 10100Sstevel@tonic-gate if (error == 0) 10110Sstevel@tonic-gate (void) ip_delmulti(INADDR_ANY, ipif, B_TRUE, 10120Sstevel@tonic-gate B_TRUE); 10130Sstevel@tonic-gate if (vifcp->vifc_flags & VIFF_REGISTER) { 10143448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 10153448Sdh155122 ipst->ips_reg_vif_num = ALL_VIFS; 10163448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 10170Sstevel@tonic-gate } 10180Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 10190Sstevel@tonic-gate ipif_refrele(ipif); 10207098Smeem ipsq_exit(ipsq); 10210Sstevel@tonic-gate return (error?error:EINVAL); 10220Sstevel@tonic-gate } 10230Sstevel@tonic-gate } 10240Sstevel@tonic-gate /* Define parameters for the tbf structure */ 10250Sstevel@tonic-gate vifp->v_tbf = v_tbf; 10260Sstevel@tonic-gate gethrestime(&vifp->v_tbf->tbf_last_pkt_t); 10270Sstevel@tonic-gate vifp->v_tbf->tbf_n_tok = 0; 10280Sstevel@tonic-gate vifp->v_tbf->tbf_q_len = 0; 10290Sstevel@tonic-gate vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 10300Sstevel@tonic-gate vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 10310Sstevel@tonic-gate 10320Sstevel@tonic-gate vifp->v_flags = vifcp->vifc_flags; 10330Sstevel@tonic-gate vifp->v_threshold = vifcp->vifc_threshold; 10340Sstevel@tonic-gate vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 10350Sstevel@tonic-gate vifp->v_ipif = ipif; 10360Sstevel@tonic-gate ipif_refrele(ipif); 10370Sstevel@tonic-gate /* Scaling up here, allows division by 1024 in critical code. */ 10380Sstevel@tonic-gate vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000); 10390Sstevel@tonic-gate vifp->v_timeout_id = 0; 10400Sstevel@tonic-gate /* initialize per vif pkt counters */ 10410Sstevel@tonic-gate vifp->v_pkt_in = 0; 10420Sstevel@tonic-gate vifp->v_pkt_out = 0; 10430Sstevel@tonic-gate vifp->v_bytes_in = 0; 10440Sstevel@tonic-gate vifp->v_bytes_out = 0; 10450Sstevel@tonic-gate mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL); 10460Sstevel@tonic-gate 10470Sstevel@tonic-gate /* Adjust numvifs up, if the vifi is higher than numvifs */ 10483448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 10493448Sdh155122 if (ipst->ips_numvifs <= vifcp->vifc_vifi) 10503448Sdh155122 ipst->ips_numvifs = vifcp->vifc_vifi + 1; 10513448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 10523448Sdh155122 10533448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 10545240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 10550Sstevel@tonic-gate "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d", 10560Sstevel@tonic-gate vifcp->vifc_vifi, 10570Sstevel@tonic-gate ntohl(vifcp->vifc_lcl_addr.s_addr), 10580Sstevel@tonic-gate (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 10590Sstevel@tonic-gate ntohl(vifcp->vifc_rmt_addr.s_addr), 10600Sstevel@tonic-gate vifcp->vifc_threshold, vifcp->vifc_rate_limit); 10610Sstevel@tonic-gate } 10620Sstevel@tonic-gate 10630Sstevel@tonic-gate vifp->v_marks = VIF_MARK_GOOD; 10640Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 10657098Smeem ipsq_exit(ipsq); 10660Sstevel@tonic-gate return (0); 10670Sstevel@tonic-gate } 10680Sstevel@tonic-gate 10690Sstevel@tonic-gate 10700Sstevel@tonic-gate /* Delete a vif from the vif table. */ 10710Sstevel@tonic-gate static void 10720Sstevel@tonic-gate del_vifp(struct vif *vifp) 10730Sstevel@tonic-gate { 10740Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 10750Sstevel@tonic-gate mblk_t *mp0; 10760Sstevel@tonic-gate vifi_t vifi; 10773448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 10785240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 10790Sstevel@tonic-gate 10800Sstevel@tonic-gate ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED); 10810Sstevel@tonic-gate ASSERT(t != NULL); 10820Sstevel@tonic-gate 10830Sstevel@tonic-gate /* 10840Sstevel@tonic-gate * release the ref we put in vif_del. 10850Sstevel@tonic-gate */ 10860Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 10870Sstevel@tonic-gate ipif_refrele(vifp->v_ipif); 10880Sstevel@tonic-gate 10893448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 10905240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 10910Sstevel@tonic-gate "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr); 10920Sstevel@tonic-gate } 10930Sstevel@tonic-gate 10940Sstevel@tonic-gate if (vifp->v_timeout_id != 0) { 10950Sstevel@tonic-gate (void) untimeout(vifp->v_timeout_id); 10960Sstevel@tonic-gate vifp->v_timeout_id = 0; 10970Sstevel@tonic-gate } 10980Sstevel@tonic-gate 10990Sstevel@tonic-gate /* 11000Sstevel@tonic-gate * Free packets queued at the interface. 11010Sstevel@tonic-gate * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc. 11020Sstevel@tonic-gate */ 11030Sstevel@tonic-gate mutex_enter(&t->tbf_lock); 11040Sstevel@tonic-gate while (t->tbf_q != NULL) { 11050Sstevel@tonic-gate mp0 = t->tbf_q; 11060Sstevel@tonic-gate t->tbf_q = t->tbf_q->b_next; 11070Sstevel@tonic-gate mp0->b_prev = mp0->b_next = NULL; 11080Sstevel@tonic-gate freemsg(mp0); 11090Sstevel@tonic-gate } 11100Sstevel@tonic-gate mutex_exit(&t->tbf_lock); 11110Sstevel@tonic-gate 11120Sstevel@tonic-gate /* 11130Sstevel@tonic-gate * Always clear cache when vifs change. 11140Sstevel@tonic-gate * No need to get last_encap_lock since we are running as a writer. 11150Sstevel@tonic-gate */ 11163448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 11173448Sdh155122 if (vifp == ipst->ips_last_encap_vif) { 11183448Sdh155122 ipst->ips_last_encap_vif = NULL; 11193448Sdh155122 ipst->ips_last_encap_src = 0; 11200Sstevel@tonic-gate } 11213448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 11220Sstevel@tonic-gate 11230Sstevel@tonic-gate mutex_destroy(&t->tbf_lock); 11240Sstevel@tonic-gate 11250Sstevel@tonic-gate bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf))); 11260Sstevel@tonic-gate 11270Sstevel@tonic-gate /* Adjust numvifs down */ 11283448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 11293448Sdh155122 for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */ 11303448Sdh155122 if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0) 11310Sstevel@tonic-gate break; 11323448Sdh155122 ipst->ips_numvifs = vifi; 11333448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 11340Sstevel@tonic-gate 11350Sstevel@tonic-gate bzero(vifp, sizeof (*vifp)); 11360Sstevel@tonic-gate } 11370Sstevel@tonic-gate 11380Sstevel@tonic-gate static int 11395240Snordmark del_vif(vifi_t *vifip, conn_t *connp, mblk_t *first_mp, ip_stack_t *ipst) 11400Sstevel@tonic-gate { 11413448Sdh155122 struct vif *vifp = ipst->ips_vifs + *vifip; 11420Sstevel@tonic-gate ipsq_t *ipsq; 11430Sstevel@tonic-gate 11443448Sdh155122 if (*vifip >= ipst->ips_numvifs) 11450Sstevel@tonic-gate return (EINVAL); 11460Sstevel@tonic-gate 11470Sstevel@tonic-gate 11480Sstevel@tonic-gate mutex_enter(&vifp->v_lock); 11490Sstevel@tonic-gate /* 11500Sstevel@tonic-gate * Not initialized 11510Sstevel@tonic-gate * Here we are not looking at the vif that is being initialized 11520Sstevel@tonic-gate * i.e vifp->v_marks == 0 and refcnt > 0. 11530Sstevel@tonic-gate */ 11540Sstevel@tonic-gate if (vifp->v_lcl_addr.s_addr == 0 || 11550Sstevel@tonic-gate !(vifp->v_marks & VIF_MARK_GOOD)) { 11560Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 11570Sstevel@tonic-gate return (EADDRNOTAVAIL); 11580Sstevel@tonic-gate } 11590Sstevel@tonic-gate 11600Sstevel@tonic-gate /* 11610Sstevel@tonic-gate * This is an optimization, if first_mp == NULL 11620Sstevel@tonic-gate * than we are being called from reset_mrt_vif_ipif() 11630Sstevel@tonic-gate * so we already have exclusive access to the ipsq. 11640Sstevel@tonic-gate * the ASSERT below is a check for this condition. 11650Sstevel@tonic-gate */ 11660Sstevel@tonic-gate if (first_mp != NULL && 11670Sstevel@tonic-gate !(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 11680Sstevel@tonic-gate ASSERT(connp != NULL); 11690Sstevel@tonic-gate /* 11700Sstevel@tonic-gate * We have to be exclusive as we have to call ip_delmulti() 11710Sstevel@tonic-gate * This is the best position to try to be exclusive in case 11720Sstevel@tonic-gate * we have to wait. 11730Sstevel@tonic-gate */ 11740Sstevel@tonic-gate ipsq = ipsq_try_enter(vifp->v_ipif, NULL, CONNP_TO_WQ(connp), 11750Sstevel@tonic-gate first_mp, ip_restart_optmgmt, NEW_OP, B_TRUE); 11760Sstevel@tonic-gate if ((ipsq) == NULL) { 11770Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 11780Sstevel@tonic-gate return (EINPROGRESS); 11790Sstevel@tonic-gate } 11800Sstevel@tonic-gate /* recheck after being exclusive */ 11810Sstevel@tonic-gate if (vifp->v_lcl_addr.s_addr == 0 || 11820Sstevel@tonic-gate !vifp->v_marks & VIF_MARK_GOOD) { 11830Sstevel@tonic-gate /* 11840Sstevel@tonic-gate * someone beat us. 11850Sstevel@tonic-gate */ 11860Sstevel@tonic-gate mutex_exit(&vifp->v_lock); 11877098Smeem ipsq_exit(ipsq); 11880Sstevel@tonic-gate return (EADDRNOTAVAIL); 11890Sstevel@tonic-gate } 11900Sstevel@tonic-gate } 11910Sstevel@tonic-gate 11920Sstevel@tonic-gate 11930Sstevel@tonic-gate ASSERT(IAM_WRITER_IPIF(vifp->v_ipif)); 11940Sstevel@tonic-gate 11950Sstevel@tonic-gate 11960Sstevel@tonic-gate /* 11970Sstevel@tonic-gate * add a refhold so that ipif does not go away while 11980Sstevel@tonic-gate * there are still users, this will be released in del_vifp 11990Sstevel@tonic-gate * when we free the vif. 12000Sstevel@tonic-gate */ 12010Sstevel@tonic-gate ipif_refhold(vifp->v_ipif); 12020Sstevel@tonic-gate 12030Sstevel@tonic-gate /* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */ 12040Sstevel@tonic-gate vifp->v_marks &= ~VIF_MARK_GOOD; 12050Sstevel@tonic-gate vifp->v_marks |= VIF_MARK_CONDEMNED; 12060Sstevel@tonic-gate 12070Sstevel@tonic-gate /* Phyint only */ 12080Sstevel@tonic-gate if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 12090Sstevel@tonic-gate ipif_t *ipif = vifp->v_ipif; 12100Sstevel@tonic-gate ASSERT(ipif != NULL); 12110Sstevel@tonic-gate /* 12120Sstevel@tonic-gate * should be OK to drop the lock as we 12130Sstevel@tonic-gate * have marked this as CONDEMNED. 12140Sstevel@tonic-gate */ 12150Sstevel@tonic-gate mutex_exit(&(vifp)->v_lock); 12160Sstevel@tonic-gate (void) ip_delmulti(INADDR_ANY, ipif, B_TRUE, B_TRUE); 12170Sstevel@tonic-gate if (first_mp != NULL) 12187098Smeem ipsq_exit(ipsq); 12190Sstevel@tonic-gate mutex_enter(&(vifp)->v_lock); 12200Sstevel@tonic-gate } 12210Sstevel@tonic-gate 12220Sstevel@tonic-gate /* 12230Sstevel@tonic-gate * decreases the refcnt added in add_vif. 12240Sstevel@tonic-gate */ 12250Sstevel@tonic-gate VIF_REFRELE_LOCKED(vifp); 12260Sstevel@tonic-gate return (0); 12270Sstevel@tonic-gate } 12280Sstevel@tonic-gate 12290Sstevel@tonic-gate /* 12300Sstevel@tonic-gate * Add an mfc entry. 12310Sstevel@tonic-gate */ 12320Sstevel@tonic-gate static int 12333448Sdh155122 add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) 12340Sstevel@tonic-gate { 12350Sstevel@tonic-gate struct mfc *rt; 12360Sstevel@tonic-gate struct rtdetq *rte; 12370Sstevel@tonic-gate ushort_t nstl; 12380Sstevel@tonic-gate int i; 12390Sstevel@tonic-gate struct mfcb *mfcbp; 12405240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 12410Sstevel@tonic-gate 12420Sstevel@tonic-gate /* 12430Sstevel@tonic-gate * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted 12440Sstevel@tonic-gate * did not have a real route for pkt. 12450Sstevel@tonic-gate * We want this pkt without rt installed in the mfctable to prevent 12460Sstevel@tonic-gate * multiiple tries, so go ahead and put it in mfctable, it will 12470Sstevel@tonic-gate * be discarded later in ip_mdq() because the child is NULL. 12480Sstevel@tonic-gate */ 12490Sstevel@tonic-gate 12500Sstevel@tonic-gate /* Error checking, out of bounds? */ 12510Sstevel@tonic-gate if (mfccp->mfcc_parent > MAXVIFS) { 12520Sstevel@tonic-gate ip0dbg(("ADD_MFC: mfcc_parent out of range %d", 12530Sstevel@tonic-gate (int)mfccp->mfcc_parent)); 12540Sstevel@tonic-gate return (EINVAL); 12550Sstevel@tonic-gate } 12560Sstevel@tonic-gate 12570Sstevel@tonic-gate if ((mfccp->mfcc_parent != NO_VIF) && 12583448Sdh155122 (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) { 12590Sstevel@tonic-gate ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n", 12600Sstevel@tonic-gate (int)mfccp->mfcc_parent)); 12610Sstevel@tonic-gate return (EINVAL); 12620Sstevel@tonic-gate } 12630Sstevel@tonic-gate 12643448Sdh155122 if (is_mrouter_off(ipst)) { 12650Sstevel@tonic-gate return (EINVAL); 12660Sstevel@tonic-gate } 12670Sstevel@tonic-gate 12683448Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr, 12690Sstevel@tonic-gate mfccp->mfcc_mcastgrp.s_addr)]; 12700Sstevel@tonic-gate MFCB_REFHOLD(mfcbp); 12710Sstevel@tonic-gate MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr, 12720Sstevel@tonic-gate mfccp->mfcc_mcastgrp.s_addr, rt); 12730Sstevel@tonic-gate 12740Sstevel@tonic-gate /* If an entry already exists, just update the fields */ 12750Sstevel@tonic-gate if (rt) { 12763448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 12775240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 12780Sstevel@tonic-gate "add_mfc: update o %x grp %x parent %x", 12790Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 12800Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 12810Sstevel@tonic-gate mfccp->mfcc_parent); 12820Sstevel@tonic-gate } 12830Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 12840Sstevel@tonic-gate rt->mfc_parent = mfccp->mfcc_parent; 12850Sstevel@tonic-gate 12863448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 12873448Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) 12880Sstevel@tonic-gate rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 12893448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 12900Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 12910Sstevel@tonic-gate 12920Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 12930Sstevel@tonic-gate return (0); 12940Sstevel@tonic-gate } 12950Sstevel@tonic-gate 12960Sstevel@tonic-gate /* 12970Sstevel@tonic-gate * Find the entry for which the upcall was made and update. 12980Sstevel@tonic-gate */ 12990Sstevel@tonic-gate for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) { 13000Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 13010Sstevel@tonic-gate if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 13020Sstevel@tonic-gate (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 13030Sstevel@tonic-gate (rt->mfc_rte != NULL) && 13040Sstevel@tonic-gate !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 13050Sstevel@tonic-gate if (nstl++ != 0) 13060Sstevel@tonic-gate cmn_err(CE_WARN, 13070Sstevel@tonic-gate "add_mfc: %s o %x g %x p %x", 13080Sstevel@tonic-gate "multiple kernel entries", 13090Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 13100Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 13110Sstevel@tonic-gate mfccp->mfcc_parent); 13120Sstevel@tonic-gate 13133448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 13145240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 13153448Sdh155122 SL_TRACE, 13160Sstevel@tonic-gate "add_mfc: o %x g %x p %x", 13170Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 13180Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 13190Sstevel@tonic-gate mfccp->mfcc_parent); 13200Sstevel@tonic-gate } 13213448Sdh155122 fill_route(rt, mfccp, ipst); 13220Sstevel@tonic-gate 13230Sstevel@tonic-gate /* 13240Sstevel@tonic-gate * Prevent cleanup of cache entry. 13250Sstevel@tonic-gate * Timer starts in ip_mforward. 13260Sstevel@tonic-gate */ 13270Sstevel@tonic-gate if (rt->mfc_timeout_id != 0) { 13280Sstevel@tonic-gate timeout_id_t id; 13290Sstevel@tonic-gate id = rt->mfc_timeout_id; 13300Sstevel@tonic-gate /* 13310Sstevel@tonic-gate * setting id to zero will avoid this 13320Sstevel@tonic-gate * entry from being cleaned up in 13330Sstevel@tonic-gate * expire_up_calls(). 13340Sstevel@tonic-gate */ 13350Sstevel@tonic-gate rt->mfc_timeout_id = 0; 13360Sstevel@tonic-gate /* 13370Sstevel@tonic-gate * dropping the lock is fine as we 13380Sstevel@tonic-gate * have a refhold on the bucket. 13390Sstevel@tonic-gate * so mfc cannot be freed. 13400Sstevel@tonic-gate * The timeout can fire but it will see 13410Sstevel@tonic-gate * that mfc_timeout_id == 0 and not cleanup. 13420Sstevel@tonic-gate */ 13430Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13440Sstevel@tonic-gate (void) untimeout(id); 13450Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 13460Sstevel@tonic-gate } 13470Sstevel@tonic-gate 13480Sstevel@tonic-gate /* 13490Sstevel@tonic-gate * Send all pkts that are queued waiting for the upcall. 13500Sstevel@tonic-gate * ip_mdq param tun set to 0 - 13510Sstevel@tonic-gate * the return value of ip_mdq() isn't used here, 13520Sstevel@tonic-gate * so value we send doesn't matter. 13530Sstevel@tonic-gate */ 13540Sstevel@tonic-gate while (rt->mfc_rte != NULL) { 13550Sstevel@tonic-gate rte = rt->mfc_rte; 13560Sstevel@tonic-gate rt->mfc_rte = rte->rte_next; 13570Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13580Sstevel@tonic-gate (void) ip_mdq(rte->mp, (ipha_t *) 13590Sstevel@tonic-gate rte->mp->b_rptr, rte->ill, 0, rt); 13600Sstevel@tonic-gate freemsg(rte->mp); 13610Sstevel@tonic-gate mi_free((char *)rte); 13620Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 13630Sstevel@tonic-gate } 13640Sstevel@tonic-gate } 13650Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13660Sstevel@tonic-gate } 13670Sstevel@tonic-gate 13680Sstevel@tonic-gate 13690Sstevel@tonic-gate /* 13700Sstevel@tonic-gate * It is possible that an entry is being inserted without an upcall 13710Sstevel@tonic-gate */ 13720Sstevel@tonic-gate if (nstl == 0) { 13730Sstevel@tonic-gate mutex_enter(&(mfcbp->mfcb_lock)); 13743448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 13755240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 13760Sstevel@tonic-gate "add_mfc: no upcall o %x g %x p %x", 13770Sstevel@tonic-gate ntohl(mfccp->mfcc_origin.s_addr), 13780Sstevel@tonic-gate ntohl(mfccp->mfcc_mcastgrp.s_addr), 13790Sstevel@tonic-gate mfccp->mfcc_parent); 13800Sstevel@tonic-gate } 13813448Sdh155122 if (is_mrouter_off(ipst)) { 13820Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 13830Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 13840Sstevel@tonic-gate return (EINVAL); 13850Sstevel@tonic-gate } 13860Sstevel@tonic-gate 13870Sstevel@tonic-gate for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) { 13880Sstevel@tonic-gate 13890Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 13900Sstevel@tonic-gate if ((rt->mfc_origin.s_addr == 13910Sstevel@tonic-gate mfccp->mfcc_origin.s_addr) && 13920Sstevel@tonic-gate (rt->mfc_mcastgrp.s_addr == 13935240Snordmark mfccp->mfcc_mcastgrp.s_addr) && 13945240Snordmark (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) { 13953448Sdh155122 fill_route(rt, mfccp, ipst); 13960Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 13970Sstevel@tonic-gate break; 13980Sstevel@tonic-gate } 13990Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 14000Sstevel@tonic-gate } 14010Sstevel@tonic-gate 14020Sstevel@tonic-gate /* No upcall, so make a new entry into mfctable */ 14030Sstevel@tonic-gate if (rt == NULL) { 14040Sstevel@tonic-gate rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 14050Sstevel@tonic-gate if (rt == NULL) { 14060Sstevel@tonic-gate ip1dbg(("add_mfc: out of memory\n")); 14070Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 14080Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 14090Sstevel@tonic-gate return (ENOBUFS); 14100Sstevel@tonic-gate } 14110Sstevel@tonic-gate 14120Sstevel@tonic-gate /* Insert new entry at head of hash chain */ 14130Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 14143448Sdh155122 fill_route(rt, mfccp, ipst); 14150Sstevel@tonic-gate 14160Sstevel@tonic-gate /* Link into table */ 14170Sstevel@tonic-gate rt->mfc_next = mfcbp->mfcb_mfc; 14180Sstevel@tonic-gate mfcbp->mfcb_mfc = rt; 14190Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 14200Sstevel@tonic-gate } 14210Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 14220Sstevel@tonic-gate } 14230Sstevel@tonic-gate 14240Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 14250Sstevel@tonic-gate return (0); 14260Sstevel@tonic-gate } 14270Sstevel@tonic-gate 14280Sstevel@tonic-gate /* 14290Sstevel@tonic-gate * Fills in mfc structure from mrouted mfcctl. 14300Sstevel@tonic-gate */ 14310Sstevel@tonic-gate static void 14323448Sdh155122 fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst) 14330Sstevel@tonic-gate { 14340Sstevel@tonic-gate int i; 14350Sstevel@tonic-gate 14360Sstevel@tonic-gate rt->mfc_origin = mfccp->mfcc_origin; 14370Sstevel@tonic-gate rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 14380Sstevel@tonic-gate rt->mfc_parent = mfccp->mfcc_parent; 14393448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 14403448Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) { 14410Sstevel@tonic-gate rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 14420Sstevel@tonic-gate } 14433448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 14440Sstevel@tonic-gate /* Initialize pkt counters per src-grp */ 14450Sstevel@tonic-gate rt->mfc_pkt_cnt = 0; 14460Sstevel@tonic-gate rt->mfc_byte_cnt = 0; 14470Sstevel@tonic-gate rt->mfc_wrong_if = 0; 14480Sstevel@tonic-gate rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0; 14490Sstevel@tonic-gate 14500Sstevel@tonic-gate } 14510Sstevel@tonic-gate 14520Sstevel@tonic-gate static void 14530Sstevel@tonic-gate free_queue(struct mfc *mfcp) 14540Sstevel@tonic-gate { 14550Sstevel@tonic-gate struct rtdetq *rte0; 14560Sstevel@tonic-gate 14570Sstevel@tonic-gate /* 14580Sstevel@tonic-gate * Drop all queued upcall packets. 14590Sstevel@tonic-gate * Free the mbuf with the pkt. 14600Sstevel@tonic-gate */ 14610Sstevel@tonic-gate while ((rte0 = mfcp->mfc_rte) != NULL) { 14620Sstevel@tonic-gate mfcp->mfc_rte = rte0->rte_next; 14630Sstevel@tonic-gate freemsg(rte0->mp); 14640Sstevel@tonic-gate mi_free((char *)rte0); 14650Sstevel@tonic-gate } 14660Sstevel@tonic-gate } 14670Sstevel@tonic-gate /* 14680Sstevel@tonic-gate * go thorugh the hash bucket and free all the entries marked condemned. 14690Sstevel@tonic-gate */ 14700Sstevel@tonic-gate void 14710Sstevel@tonic-gate release_mfc(struct mfcb *mfcbp) 14720Sstevel@tonic-gate { 14730Sstevel@tonic-gate struct mfc *current_mfcp; 14740Sstevel@tonic-gate struct mfc *prev_mfcp; 14750Sstevel@tonic-gate 14760Sstevel@tonic-gate prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 14770Sstevel@tonic-gate 14780Sstevel@tonic-gate while (current_mfcp != NULL) { 14790Sstevel@tonic-gate if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) { 14800Sstevel@tonic-gate if (current_mfcp == mfcbp->mfcb_mfc) { 14810Sstevel@tonic-gate mfcbp->mfcb_mfc = current_mfcp->mfc_next; 14820Sstevel@tonic-gate free_queue(current_mfcp); 14830Sstevel@tonic-gate mi_free(current_mfcp); 14840Sstevel@tonic-gate prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 14850Sstevel@tonic-gate continue; 14860Sstevel@tonic-gate } 14870Sstevel@tonic-gate ASSERT(prev_mfcp != NULL); 14880Sstevel@tonic-gate prev_mfcp->mfc_next = current_mfcp->mfc_next; 14890Sstevel@tonic-gate free_queue(current_mfcp); 14900Sstevel@tonic-gate mi_free(current_mfcp); 14910Sstevel@tonic-gate current_mfcp = NULL; 14920Sstevel@tonic-gate } else { 14930Sstevel@tonic-gate prev_mfcp = current_mfcp; 14940Sstevel@tonic-gate } 14950Sstevel@tonic-gate 14960Sstevel@tonic-gate current_mfcp = prev_mfcp->mfc_next; 14970Sstevel@tonic-gate 14980Sstevel@tonic-gate } 14990Sstevel@tonic-gate mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED; 15000Sstevel@tonic-gate ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0); 15010Sstevel@tonic-gate } 15020Sstevel@tonic-gate 15030Sstevel@tonic-gate /* 15040Sstevel@tonic-gate * Delete an mfc entry. 15050Sstevel@tonic-gate */ 15060Sstevel@tonic-gate static int 15073448Sdh155122 del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) 15080Sstevel@tonic-gate { 15090Sstevel@tonic-gate struct in_addr origin; 15100Sstevel@tonic-gate struct in_addr mcastgrp; 15115240Snordmark struct mfc *rt; 15125240Snordmark uint_t hash; 15135240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 15140Sstevel@tonic-gate 15150Sstevel@tonic-gate origin = mfccp->mfcc_origin; 15160Sstevel@tonic-gate mcastgrp = mfccp->mfcc_mcastgrp; 15170Sstevel@tonic-gate hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 15180Sstevel@tonic-gate 15193448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 15205240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 15210Sstevel@tonic-gate "del_mfc: o %x g %x", 15220Sstevel@tonic-gate ntohl(origin.s_addr), 15230Sstevel@tonic-gate ntohl(mcastgrp.s_addr)); 15240Sstevel@tonic-gate } 15250Sstevel@tonic-gate 15263448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]); 15270Sstevel@tonic-gate 15280Sstevel@tonic-gate /* Find mfc in mfctable, finds only entries without upcalls */ 15293448Sdh155122 for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) { 15300Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 15310Sstevel@tonic-gate if (origin.s_addr == rt->mfc_origin.s_addr && 15320Sstevel@tonic-gate mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 15330Sstevel@tonic-gate rt->mfc_rte == NULL && 15340Sstevel@tonic-gate !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) 15350Sstevel@tonic-gate break; 15360Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 15370Sstevel@tonic-gate } 15380Sstevel@tonic-gate 15390Sstevel@tonic-gate /* 15400Sstevel@tonic-gate * Return if there was an upcall (mfc_rte != NULL, 15410Sstevel@tonic-gate * or rt not in mfctable. 15420Sstevel@tonic-gate */ 15430Sstevel@tonic-gate if (rt == NULL) { 15443448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 15450Sstevel@tonic-gate return (EADDRNOTAVAIL); 15460Sstevel@tonic-gate } 15470Sstevel@tonic-gate 15480Sstevel@tonic-gate 15490Sstevel@tonic-gate /* 15500Sstevel@tonic-gate * no need to hold lock as we have a reference. 15510Sstevel@tonic-gate */ 15523448Sdh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 15530Sstevel@tonic-gate /* error checking */ 15540Sstevel@tonic-gate if (rt->mfc_timeout_id != 0) { 15550Sstevel@tonic-gate ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null")); 15560Sstevel@tonic-gate /* 15570Sstevel@tonic-gate * Its ok to drop the lock, the struct cannot be freed 15580Sstevel@tonic-gate * since we have a ref on the hash bucket. 15590Sstevel@tonic-gate */ 15600Sstevel@tonic-gate rt->mfc_timeout_id = 0; 15610Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 15620Sstevel@tonic-gate (void) untimeout(rt->mfc_timeout_id); 15630Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 15640Sstevel@tonic-gate } 15650Sstevel@tonic-gate 15660Sstevel@tonic-gate ASSERT(rt->mfc_rte == NULL); 15670Sstevel@tonic-gate 15680Sstevel@tonic-gate 15690Sstevel@tonic-gate /* 15700Sstevel@tonic-gate * Delete the entry from the cache 15710Sstevel@tonic-gate */ 15720Sstevel@tonic-gate rt->mfc_marks |= MFCB_MARK_CONDEMNED; 15730Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 15740Sstevel@tonic-gate 15753448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 15760Sstevel@tonic-gate 15770Sstevel@tonic-gate return (0); 15780Sstevel@tonic-gate } 15790Sstevel@tonic-gate 15800Sstevel@tonic-gate #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 15810Sstevel@tonic-gate 15820Sstevel@tonic-gate /* 15830Sstevel@tonic-gate * IP multicast forwarding function. This function assumes that the packet 15840Sstevel@tonic-gate * pointed to by ipha has arrived on (or is about to be sent to) the interface 15850Sstevel@tonic-gate * pointed to by "ill", and the packet is to be relayed to other networks 15860Sstevel@tonic-gate * that have members of the packet's destination IP multicast group. 15870Sstevel@tonic-gate * 15880Sstevel@tonic-gate * The packet is returned unscathed to the caller, unless it is 15890Sstevel@tonic-gate * erroneous, in which case a -1 value tells the caller (IP) 15900Sstevel@tonic-gate * to discard it. 15910Sstevel@tonic-gate * 15920Sstevel@tonic-gate * Unlike BSD, SunOS 5.x needs to return to IP info about 15930Sstevel@tonic-gate * whether pkt came in thru a tunnel, so it can be discarded, unless 15940Sstevel@tonic-gate * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try 15950Sstevel@tonic-gate * to be delivered. 15960Sstevel@tonic-gate * Return values are 0 - pkt is okay and phyint 15970Sstevel@tonic-gate * -1 - pkt is malformed and to be tossed 15980Sstevel@tonic-gate * 1 - pkt came in on tunnel 15990Sstevel@tonic-gate */ 16000Sstevel@tonic-gate int 16010Sstevel@tonic-gate ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) 16020Sstevel@tonic-gate { 16030Sstevel@tonic-gate struct mfc *rt; 16040Sstevel@tonic-gate ipaddr_t src, dst, tunnel_src = 0; 16050Sstevel@tonic-gate static int srctun = 0; 16060Sstevel@tonic-gate vifi_t vifi; 16070Sstevel@tonic-gate boolean_t pim_reg_packet = B_FALSE; 16080Sstevel@tonic-gate struct mfcb *mfcbp; 16093448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 16105240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 16113448Sdh155122 16123448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 16135240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 16140Sstevel@tonic-gate "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s", 16150Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 16160Sstevel@tonic-gate ill->ill_name); 16170Sstevel@tonic-gate } 16180Sstevel@tonic-gate 16190Sstevel@tonic-gate dst = ipha->ipha_dst; 16200Sstevel@tonic-gate if ((uint32_t)(uintptr_t)mp->b_prev == PIM_REGISTER_MARKER) 16210Sstevel@tonic-gate pim_reg_packet = B_TRUE; 16220Sstevel@tonic-gate else 16230Sstevel@tonic-gate tunnel_src = (ipaddr_t)(uintptr_t)mp->b_prev; 16240Sstevel@tonic-gate 16250Sstevel@tonic-gate /* 16260Sstevel@tonic-gate * Don't forward a packet with time-to-live of zero or one, 16270Sstevel@tonic-gate * or a packet destined to a local-only group. 16280Sstevel@tonic-gate */ 16290Sstevel@tonic-gate if (CLASSD(dst) && (ipha->ipha_ttl <= 1 || 16305240Snordmark (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) { 16313448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 16325240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 16330Sstevel@tonic-gate "ip_mforward: not forwarded ttl %d," 16340Sstevel@tonic-gate " dst 0x%x ill %s", 16350Sstevel@tonic-gate ipha->ipha_ttl, ntohl(dst), ill->ill_name); 16360Sstevel@tonic-gate } 16370Sstevel@tonic-gate mp->b_prev = NULL; 16380Sstevel@tonic-gate if (tunnel_src != 0) 16390Sstevel@tonic-gate return (1); 16400Sstevel@tonic-gate else 16410Sstevel@tonic-gate return (0); 16420Sstevel@tonic-gate } 16430Sstevel@tonic-gate 16440Sstevel@tonic-gate if ((tunnel_src != 0) || pim_reg_packet) { 16450Sstevel@tonic-gate /* 16460Sstevel@tonic-gate * Packet arrived over an encapsulated tunnel or via a PIM 16470Sstevel@tonic-gate * register message. Both ip_mroute_decap() and pim_input() 16480Sstevel@tonic-gate * encode information in mp->b_prev. 16490Sstevel@tonic-gate */ 16500Sstevel@tonic-gate mp->b_prev = NULL; 16513448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 16520Sstevel@tonic-gate if (tunnel_src != 0) { 16535240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 16543448Sdh155122 SL_TRACE, 16550Sstevel@tonic-gate "ip_mforward: ill %s arrived via ENCAP TUN", 16560Sstevel@tonic-gate ill->ill_name); 16570Sstevel@tonic-gate } else if (pim_reg_packet) { 16585240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 16593448Sdh155122 SL_TRACE, 16600Sstevel@tonic-gate "ip_mforward: ill %s arrived via" 16610Sstevel@tonic-gate " REGISTER VIF", 16620Sstevel@tonic-gate ill->ill_name); 16630Sstevel@tonic-gate } 16640Sstevel@tonic-gate } 16650Sstevel@tonic-gate } else if ((ipha->ipha_version_and_hdr_length & 0xf) < 16660Sstevel@tonic-gate (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 || 16670Sstevel@tonic-gate ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) { 16680Sstevel@tonic-gate /* Packet arrived via a physical interface. */ 16693448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 16705240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 16710Sstevel@tonic-gate "ip_mforward: ill %s arrived via PHYINT", 16720Sstevel@tonic-gate ill->ill_name); 16730Sstevel@tonic-gate } 16740Sstevel@tonic-gate 16750Sstevel@tonic-gate } else { 16760Sstevel@tonic-gate /* 16770Sstevel@tonic-gate * Packet arrived through a SRCRT tunnel. 16780Sstevel@tonic-gate * Source-route tunnels are no longer supported. 16790Sstevel@tonic-gate * Error message printed every 1000 times. 16800Sstevel@tonic-gate */ 16810Sstevel@tonic-gate if ((srctun++ % 1000) == 0) { 16820Sstevel@tonic-gate cmn_err(CE_WARN, 16830Sstevel@tonic-gate "ip_mforward: received source-routed pkt from %x", 16840Sstevel@tonic-gate ntohl(ipha->ipha_src)); 16850Sstevel@tonic-gate } 16860Sstevel@tonic-gate return (-1); 16870Sstevel@tonic-gate } 16880Sstevel@tonic-gate 16893448Sdh155122 ipst->ips_mrtstat->mrts_fwd_in++; 16900Sstevel@tonic-gate src = ipha->ipha_src; 16910Sstevel@tonic-gate 16920Sstevel@tonic-gate /* Find route in cache, return NULL if not there or upcalls q'ed. */ 16930Sstevel@tonic-gate 16940Sstevel@tonic-gate /* 16950Sstevel@tonic-gate * Lock the mfctable against changes made by ip_mforward. 16960Sstevel@tonic-gate * Note that only add_mfc and del_mfc can remove entries and 16970Sstevel@tonic-gate * they run with exclusive access to IP. So we do not need to 16980Sstevel@tonic-gate * guard against the rt being deleted, so release lock after reading. 16990Sstevel@tonic-gate */ 17000Sstevel@tonic-gate 17013448Sdh155122 if (is_mrouter_off(ipst)) 17020Sstevel@tonic-gate return (-1); 17030Sstevel@tonic-gate 17043448Sdh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)]; 17050Sstevel@tonic-gate MFCB_REFHOLD(mfcbp); 17060Sstevel@tonic-gate MFCFIND(mfcbp, src, dst, rt); 17070Sstevel@tonic-gate 17080Sstevel@tonic-gate /* Entry exists, so forward if necessary */ 17090Sstevel@tonic-gate if (rt != NULL) { 17100Sstevel@tonic-gate int ret = 0; 17113448Sdh155122 ipst->ips_mrtstat->mrts_mfc_hits++; 17120Sstevel@tonic-gate if (pim_reg_packet) { 17133448Sdh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); 17140Sstevel@tonic-gate ret = ip_mdq(mp, ipha, 17153448Sdh155122 ipst->ips_vifs[ipst->ips_reg_vif_num]. 17163448Sdh155122 v_ipif->ipif_ill, 17173448Sdh155122 0, rt); 17180Sstevel@tonic-gate } else { 17190Sstevel@tonic-gate ret = ip_mdq(mp, ipha, ill, tunnel_src, rt); 17200Sstevel@tonic-gate } 17210Sstevel@tonic-gate 17220Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 17230Sstevel@tonic-gate return (ret); 17240Sstevel@tonic-gate 17250Sstevel@tonic-gate /* 17260Sstevel@tonic-gate * Don't forward if we don't have a cache entry. Mrouted will 17270Sstevel@tonic-gate * always provide a cache entry in response to an upcall. 17280Sstevel@tonic-gate */ 17290Sstevel@tonic-gate } else { 17300Sstevel@tonic-gate /* 17310Sstevel@tonic-gate * If we don't have a route for packet's origin, make a copy 17320Sstevel@tonic-gate * of the packet and send message to routing daemon. 17330Sstevel@tonic-gate */ 17340Sstevel@tonic-gate struct mfc *mfc_rt = NULL; 17350Sstevel@tonic-gate mblk_t *mp0 = NULL; 17360Sstevel@tonic-gate mblk_t *mp_copy = NULL; 17370Sstevel@tonic-gate struct rtdetq *rte = NULL; 17380Sstevel@tonic-gate struct rtdetq *rte_m, *rte1, *prev_rte; 17390Sstevel@tonic-gate uint_t hash; 17400Sstevel@tonic-gate int npkts; 17410Sstevel@tonic-gate boolean_t new_mfc = B_FALSE; 17423448Sdh155122 ipst->ips_mrtstat->mrts_mfc_misses++; 17430Sstevel@tonic-gate /* BSD uses mrts_no_route++ */ 17443448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 17455240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 17460Sstevel@tonic-gate "ip_mforward: no rte ill %s src %x g %x misses %d", 17470Sstevel@tonic-gate ill->ill_name, ntohl(src), ntohl(dst), 17483448Sdh155122 (int)ipst->ips_mrtstat->mrts_mfc_misses); 17490Sstevel@tonic-gate } 17500Sstevel@tonic-gate /* 17510Sstevel@tonic-gate * The order of the following code differs from the BSD code. 17520Sstevel@tonic-gate * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x 17530Sstevel@tonic-gate * code works, so SunOS 5.x wasn't changed to conform to the 17540Sstevel@tonic-gate * BSD version. 17550Sstevel@tonic-gate */ 17560Sstevel@tonic-gate 17570Sstevel@tonic-gate /* Lock mfctable. */ 17580Sstevel@tonic-gate hash = MFCHASH(src, dst); 17593448Sdh155122 mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock)); 17600Sstevel@tonic-gate 17610Sstevel@tonic-gate /* 17620Sstevel@tonic-gate * If we are turning off mrouted return an error 17630Sstevel@tonic-gate */ 17643448Sdh155122 if (is_mrouter_off(ipst)) { 17650Sstevel@tonic-gate mutex_exit(&mfcbp->mfcb_lock); 17660Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 17670Sstevel@tonic-gate return (-1); 17680Sstevel@tonic-gate } 17690Sstevel@tonic-gate 17700Sstevel@tonic-gate /* Is there an upcall waiting for this packet? */ 17713448Sdh155122 for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt; 17720Sstevel@tonic-gate mfc_rt = mfc_rt->mfc_next) { 17730Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 17743448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 17755240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 17763448Sdh155122 SL_TRACE, 17770Sstevel@tonic-gate "ip_mforward: MFCTAB hash %d o 0x%x" 17780Sstevel@tonic-gate " g 0x%x\n", 17790Sstevel@tonic-gate hash, ntohl(mfc_rt->mfc_origin.s_addr), 17800Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 17810Sstevel@tonic-gate } 17820Sstevel@tonic-gate /* There is an upcall */ 17830Sstevel@tonic-gate if ((src == mfc_rt->mfc_origin.s_addr) && 17840Sstevel@tonic-gate (dst == mfc_rt->mfc_mcastgrp.s_addr) && 17850Sstevel@tonic-gate (mfc_rt->mfc_rte != NULL) && 17860Sstevel@tonic-gate !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 17870Sstevel@tonic-gate break; 17880Sstevel@tonic-gate } 17890Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 17900Sstevel@tonic-gate } 17910Sstevel@tonic-gate /* No upcall, so make a new entry into mfctable */ 17920Sstevel@tonic-gate if (mfc_rt == NULL) { 17930Sstevel@tonic-gate mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 17940Sstevel@tonic-gate if (mfc_rt == NULL) { 17953448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 17960Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory " 17970Sstevel@tonic-gate "for mfc, mfc_rt\n")); 17980Sstevel@tonic-gate goto error_return; 17990Sstevel@tonic-gate } else 18000Sstevel@tonic-gate new_mfc = B_TRUE; 18010Sstevel@tonic-gate /* Get resources */ 18020Sstevel@tonic-gate /* TODO could copy header and dup rest */ 18030Sstevel@tonic-gate mp_copy = copymsg(mp); 18040Sstevel@tonic-gate if (mp_copy == NULL) { 18053448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 18060Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for " 18070Sstevel@tonic-gate "mblk, mp_copy\n")); 18080Sstevel@tonic-gate goto error_return; 18090Sstevel@tonic-gate } 18100Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 18110Sstevel@tonic-gate } 18120Sstevel@tonic-gate /* Get resources for rte, whether first rte or not first. */ 18130Sstevel@tonic-gate /* Add this packet into rtdetq */ 18140Sstevel@tonic-gate rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq)); 18150Sstevel@tonic-gate if (rte == NULL) { 18163448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 18170Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 18180Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for" 18190Sstevel@tonic-gate " rtdetq, rte\n")); 18200Sstevel@tonic-gate goto error_return; 18210Sstevel@tonic-gate } 18220Sstevel@tonic-gate 18230Sstevel@tonic-gate mp0 = copymsg(mp); 18240Sstevel@tonic-gate if (mp0 == NULL) { 18253448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 18260Sstevel@tonic-gate ip1dbg(("ip_mforward: out of memory for mblk, mp0\n")); 18270Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 18280Sstevel@tonic-gate goto error_return; 18290Sstevel@tonic-gate } 18300Sstevel@tonic-gate rte->mp = mp0; 18310Sstevel@tonic-gate if (pim_reg_packet) { 18323448Sdh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); 18333448Sdh155122 rte->ill = 18343448Sdh155122 ipst->ips_vifs[ipst->ips_reg_vif_num]. 18353448Sdh155122 v_ipif->ipif_ill; 18360Sstevel@tonic-gate } else { 18370Sstevel@tonic-gate rte->ill = ill; 18380Sstevel@tonic-gate } 18390Sstevel@tonic-gate rte->rte_next = NULL; 18400Sstevel@tonic-gate 18410Sstevel@tonic-gate /* 18420Sstevel@tonic-gate * Determine if upcall q (rtdetq) has overflowed. 18430Sstevel@tonic-gate * mfc_rt->mfc_rte is null by mi_zalloc 18440Sstevel@tonic-gate * if it is the first message. 18450Sstevel@tonic-gate */ 18460Sstevel@tonic-gate for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m; 18470Sstevel@tonic-gate rte_m = rte_m->rte_next) 18480Sstevel@tonic-gate npkts++; 18493448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 18505240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 18510Sstevel@tonic-gate "ip_mforward: upcalls %d\n", npkts); 18520Sstevel@tonic-gate } 18530Sstevel@tonic-gate if (npkts > MAX_UPQ) { 18543448Sdh155122 ipst->ips_mrtstat->mrts_upq_ovflw++; 18550Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 18560Sstevel@tonic-gate goto error_return; 18570Sstevel@tonic-gate } 18580Sstevel@tonic-gate 18590Sstevel@tonic-gate if (npkts == 0) { /* first upcall */ 18600Sstevel@tonic-gate int i = 0; 18610Sstevel@tonic-gate /* 18620Sstevel@tonic-gate * Now finish installing the new mfc! Now that we have 18630Sstevel@tonic-gate * resources! Insert new entry at head of hash chain. 18640Sstevel@tonic-gate * Use src and dst which are ipaddr_t's. 18650Sstevel@tonic-gate */ 18660Sstevel@tonic-gate mfc_rt->mfc_origin.s_addr = src; 18670Sstevel@tonic-gate mfc_rt->mfc_mcastgrp.s_addr = dst; 18680Sstevel@tonic-gate 18693448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 18703448Sdh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) 18710Sstevel@tonic-gate mfc_rt->mfc_ttls[i] = 0; 18723448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 18730Sstevel@tonic-gate mfc_rt->mfc_parent = ALL_VIFS; 18740Sstevel@tonic-gate 18750Sstevel@tonic-gate /* Link into table */ 18763448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 18775240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 18783448Sdh155122 SL_TRACE, 18790Sstevel@tonic-gate "ip_mforward: NEW MFCTAB hash %d o 0x%x " 18800Sstevel@tonic-gate "g 0x%x\n", hash, 18810Sstevel@tonic-gate ntohl(mfc_rt->mfc_origin.s_addr), 18820Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 18830Sstevel@tonic-gate } 18843448Sdh155122 mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc; 18853448Sdh155122 ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt; 18860Sstevel@tonic-gate mfc_rt->mfc_rte = NULL; 18870Sstevel@tonic-gate } 18880Sstevel@tonic-gate 18890Sstevel@tonic-gate /* Link in the upcall */ 18900Sstevel@tonic-gate /* First upcall */ 18910Sstevel@tonic-gate if (mfc_rt->mfc_rte == NULL) 18920Sstevel@tonic-gate mfc_rt->mfc_rte = rte; 18930Sstevel@tonic-gate else { 18940Sstevel@tonic-gate /* not the first upcall */ 18950Sstevel@tonic-gate prev_rte = mfc_rt->mfc_rte; 18960Sstevel@tonic-gate for (rte1 = mfc_rt->mfc_rte->rte_next; rte1; 18975240Snordmark prev_rte = rte1, rte1 = rte1->rte_next) 18985240Snordmark ; 18990Sstevel@tonic-gate prev_rte->rte_next = rte; 19000Sstevel@tonic-gate } 19010Sstevel@tonic-gate 19020Sstevel@tonic-gate /* 19030Sstevel@tonic-gate * No upcalls waiting, this is first one, so send a message to 19040Sstevel@tonic-gate * routing daemon to install a route into kernel table. 19050Sstevel@tonic-gate */ 19060Sstevel@tonic-gate if (npkts == 0) { 19070Sstevel@tonic-gate struct igmpmsg *im; 19080Sstevel@tonic-gate /* ipha_protocol is 0, for upcall */ 19090Sstevel@tonic-gate ASSERT(mp_copy != NULL); 19100Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr; 19110Sstevel@tonic-gate im->im_msgtype = IGMPMSG_NOCACHE; 19120Sstevel@tonic-gate im->im_mbz = 0; 19133448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 19140Sstevel@tonic-gate if (pim_reg_packet) { 19153448Sdh155122 im->im_vif = (uchar_t)ipst->ips_reg_vif_num; 19163448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 19170Sstevel@tonic-gate } else { 19180Sstevel@tonic-gate /* 19190Sstevel@tonic-gate * XXX do we need to hold locks here ? 19200Sstevel@tonic-gate */ 19213448Sdh155122 for (vifi = 0; 19223448Sdh155122 vifi < ipst->ips_numvifs; 19233448Sdh155122 vifi++) { 19243448Sdh155122 if (ipst->ips_vifs[vifi].v_ipif == NULL) 19250Sstevel@tonic-gate continue; 19263448Sdh155122 if (ipst->ips_vifs[vifi]. 19273448Sdh155122 v_ipif->ipif_ill == ill) { 19280Sstevel@tonic-gate im->im_vif = (uchar_t)vifi; 19290Sstevel@tonic-gate break; 19300Sstevel@tonic-gate } 19310Sstevel@tonic-gate } 19323448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 19333448Sdh155122 ASSERT(vifi < ipst->ips_numvifs); 19340Sstevel@tonic-gate } 19350Sstevel@tonic-gate 19363448Sdh155122 ipst->ips_mrtstat->mrts_upcalls++; 19370Sstevel@tonic-gate /* Timer to discard upcalls if mrouted is too slow */ 19380Sstevel@tonic-gate mfc_rt->mfc_timeout_id = timeout(expire_upcalls, 19390Sstevel@tonic-gate mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE); 19400Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 19413448Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 19425240Snordmark /* Pass to RAWIP */ 19435240Snordmark (mrouter->conn_recv)(mrouter, mp_copy, NULL); 19440Sstevel@tonic-gate } else { 19450Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 19463448Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 19470Sstevel@tonic-gate freemsg(mp_copy); 19480Sstevel@tonic-gate } 19490Sstevel@tonic-gate 19500Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 19510Sstevel@tonic-gate if (tunnel_src != 0) 19520Sstevel@tonic-gate return (1); 19530Sstevel@tonic-gate else 19540Sstevel@tonic-gate return (0); 19550Sstevel@tonic-gate error_return: 19563448Sdh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 19570Sstevel@tonic-gate MFCB_REFRELE(mfcbp); 19580Sstevel@tonic-gate if (mfc_rt != NULL && (new_mfc == B_TRUE)) 19590Sstevel@tonic-gate mi_free((char *)mfc_rt); 19600Sstevel@tonic-gate if (rte != NULL) 19610Sstevel@tonic-gate mi_free((char *)rte); 19620Sstevel@tonic-gate if (mp_copy != NULL) 19630Sstevel@tonic-gate freemsg(mp_copy); 19640Sstevel@tonic-gate if (mp0 != NULL) 19650Sstevel@tonic-gate freemsg(mp0); 19660Sstevel@tonic-gate return (-1); 19670Sstevel@tonic-gate } 19680Sstevel@tonic-gate } 19690Sstevel@tonic-gate 19700Sstevel@tonic-gate /* 19710Sstevel@tonic-gate * Clean up the mfctable cache entry if upcall is not serviced. 19720Sstevel@tonic-gate * SunOS 5.x has timeout per mfc, unlike BSD which has one timer. 19730Sstevel@tonic-gate */ 19740Sstevel@tonic-gate static void 19750Sstevel@tonic-gate expire_upcalls(void *arg) 19760Sstevel@tonic-gate { 19770Sstevel@tonic-gate struct mfc *mfc_rt = arg; 19780Sstevel@tonic-gate uint_t hash; 19790Sstevel@tonic-gate struct mfc *prev_mfc, *mfc0; 19803448Sdh155122 ip_stack_t *ipst; 19815240Snordmark conn_t *mrouter; 19823448Sdh155122 19833448Sdh155122 if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) { 19843448Sdh155122 cmn_err(CE_WARN, "expire_upcalls: no ILL\n"); 19853448Sdh155122 return; 19863448Sdh155122 } 19873448Sdh155122 ipst = mfc_rt->mfc_rte->ill->ill_ipst; 19885240Snordmark mrouter = ipst->ips_ip_g_mrouter; 19890Sstevel@tonic-gate 19900Sstevel@tonic-gate hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr); 19913448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 19925240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 19930Sstevel@tonic-gate "expire_upcalls: hash %d s %x g %x", 19940Sstevel@tonic-gate hash, ntohl(mfc_rt->mfc_origin.s_addr), 19950Sstevel@tonic-gate ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 19960Sstevel@tonic-gate } 19973448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]); 19980Sstevel@tonic-gate mutex_enter(&mfc_rt->mfc_mutex); 19990Sstevel@tonic-gate /* 20000Sstevel@tonic-gate * if timeout has been set to zero, than the 20010Sstevel@tonic-gate * entry has been filled, no need to delete it. 20020Sstevel@tonic-gate */ 20030Sstevel@tonic-gate if (mfc_rt->mfc_timeout_id == 0) 20040Sstevel@tonic-gate goto done; 20053448Sdh155122 ipst->ips_mrtstat->mrts_cache_cleanups++; 20060Sstevel@tonic-gate mfc_rt->mfc_timeout_id = 0; 20070Sstevel@tonic-gate 20080Sstevel@tonic-gate /* Determine entry to be cleaned up in cache table. */ 20093448Sdh155122 for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0; 20100Sstevel@tonic-gate prev_mfc = mfc0, mfc0 = mfc0->mfc_next) 20110Sstevel@tonic-gate if (mfc0 == mfc_rt) 20120Sstevel@tonic-gate break; 20130Sstevel@tonic-gate 20140Sstevel@tonic-gate /* del_mfc takes care of gone mfcs */ 20150Sstevel@tonic-gate ASSERT(prev_mfc != NULL); 20160Sstevel@tonic-gate ASSERT(mfc0 != NULL); 20170Sstevel@tonic-gate 20180Sstevel@tonic-gate /* 20190Sstevel@tonic-gate * Delete the entry from the cache 20200Sstevel@tonic-gate */ 20213448Sdh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 20220Sstevel@tonic-gate mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 20230Sstevel@tonic-gate 20240Sstevel@tonic-gate /* 20250Sstevel@tonic-gate * release_mfc will drop all queued upcall packets. 20260Sstevel@tonic-gate * and will free the mbuf with the pkt, if, timing info. 20270Sstevel@tonic-gate */ 20280Sstevel@tonic-gate done: 20290Sstevel@tonic-gate mutex_exit(&mfc_rt->mfc_mutex); 20303448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 20310Sstevel@tonic-gate } 20320Sstevel@tonic-gate 20330Sstevel@tonic-gate /* 20340Sstevel@tonic-gate * Packet forwarding routine once entry in the cache is made. 20350Sstevel@tonic-gate */ 20360Sstevel@tonic-gate static int 20370Sstevel@tonic-gate ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, 20380Sstevel@tonic-gate struct mfc *rt) 20390Sstevel@tonic-gate { 2040*8485SPeter.Memishian@Sun.COM ill_t *vill; 20410Sstevel@tonic-gate vifi_t vifi; 20420Sstevel@tonic-gate struct vif *vifp; 20430Sstevel@tonic-gate ipaddr_t dst = ipha->ipha_dst; 20440Sstevel@tonic-gate size_t plen = msgdsize(mp); 20450Sstevel@tonic-gate vifi_t num_of_vifs; 20463448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 20475240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 20483448Sdh155122 20493448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 20505240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 20510Sstevel@tonic-gate "ip_mdq: SEND src %x, ipha_dst %x, ill %s", 20520Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 20530Sstevel@tonic-gate ill->ill_name); 20540Sstevel@tonic-gate } 20550Sstevel@tonic-gate 20560Sstevel@tonic-gate /* Macro to send packet on vif */ 20570Sstevel@tonic-gate #define MC_SEND(ipha, mp, vifp, dst) { \ 20580Sstevel@tonic-gate if ((vifp)->v_flags & VIFF_TUNNEL) \ 20590Sstevel@tonic-gate encap_send((ipha), (mp), (vifp), (dst)); \ 20600Sstevel@tonic-gate else if ((vifp)->v_flags & VIFF_REGISTER) \ 20610Sstevel@tonic-gate register_send((ipha), (mp), (vifp), (dst)); \ 20620Sstevel@tonic-gate else \ 20630Sstevel@tonic-gate phyint_send((ipha), (mp), (vifp), (dst)); \ 20640Sstevel@tonic-gate } 20650Sstevel@tonic-gate 20660Sstevel@tonic-gate vifi = rt->mfc_parent; 20670Sstevel@tonic-gate 20680Sstevel@tonic-gate /* 20690Sstevel@tonic-gate * The value of vifi is MAXVIFS if the pkt had no parent, i.e., 20700Sstevel@tonic-gate * Mrouted had no route. 20710Sstevel@tonic-gate * We wanted the route installed in the mfctable to prevent multiple 20720Sstevel@tonic-gate * tries, so it passed add_mfc(), but is discarded here. The v_ipif is 20730Sstevel@tonic-gate * NULL so we don't want to check the ill. Still needed as of Mrouted 20740Sstevel@tonic-gate * 3.6. 20750Sstevel@tonic-gate */ 20760Sstevel@tonic-gate if (vifi == NO_VIF) { 20770Sstevel@tonic-gate ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n", 20780Sstevel@tonic-gate ill->ill_name)); 20793448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 20805240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 20810Sstevel@tonic-gate "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name); 20820Sstevel@tonic-gate } 20830Sstevel@tonic-gate return (-1); /* drop pkt */ 20840Sstevel@tonic-gate } 20850Sstevel@tonic-gate 20863448Sdh155122 if (!lock_good_vif(&ipst->ips_vifs[vifi])) 20870Sstevel@tonic-gate return (-1); 20880Sstevel@tonic-gate /* 20890Sstevel@tonic-gate * The MFC entries are not cleaned up when an ipif goes 20900Sstevel@tonic-gate * away thus this code has to guard against an MFC referencing 20910Sstevel@tonic-gate * an ipif that has been closed. Note: reset_mrt_vif_ipif 20920Sstevel@tonic-gate * sets the v_ipif to NULL when the ipif disappears. 20930Sstevel@tonic-gate */ 20943448Sdh155122 ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL); 20953448Sdh155122 20963448Sdh155122 if (vifi >= ipst->ips_numvifs) { 20970Sstevel@tonic-gate cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs " 20980Sstevel@tonic-gate "%d ill %s viftable ill %s\n", 20993448Sdh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, 21003448Sdh155122 ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name); 21013448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 21020Sstevel@tonic-gate return (-1); 21030Sstevel@tonic-gate } 21040Sstevel@tonic-gate /* 21050Sstevel@tonic-gate * Don't forward if it didn't arrive from the parent vif for its 2106*8485SPeter.Memishian@Sun.COM * origin. 21070Sstevel@tonic-gate */ 2108*8485SPeter.Memishian@Sun.COM vill = ipst->ips_vifs[vifi].v_ipif->ipif_ill; 2109*8485SPeter.Memishian@Sun.COM if ((vill != ill && !IS_IN_SAME_ILLGRP(vill, ill)) || 21103448Sdh155122 (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) { 21110Sstevel@tonic-gate /* Came in the wrong interface */ 21120Sstevel@tonic-gate ip1dbg(("ip_mdq: arrived wrong if, vifi %d " 21130Sstevel@tonic-gate "numvifs %d ill %s viftable ill %s\n", 21143448Sdh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, 2115*8485SPeter.Memishian@Sun.COM vill->ill_name)); 21163448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 21175240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 21180Sstevel@tonic-gate "ip_mdq: arrived wrong if, vifi %d ill " 21190Sstevel@tonic-gate "%s viftable ill %s\n", 2120*8485SPeter.Memishian@Sun.COM (int)vifi, ill->ill_name, vill->ill_name); 21210Sstevel@tonic-gate } 21223448Sdh155122 ipst->ips_mrtstat->mrts_wrong_if++; 21230Sstevel@tonic-gate rt->mfc_wrong_if++; 21240Sstevel@tonic-gate 21250Sstevel@tonic-gate /* 21260Sstevel@tonic-gate * If we are doing PIM assert processing and we are forwarding 21270Sstevel@tonic-gate * packets on this interface, and it is a broadcast medium 21280Sstevel@tonic-gate * interface (and not a tunnel), send a message to the routing. 21290Sstevel@tonic-gate * 21300Sstevel@tonic-gate * We use the first ipif on the list, since it's all we have. 21310Sstevel@tonic-gate * Chances are the ipif_flags are the same for ipifs on the ill. 21320Sstevel@tonic-gate */ 21333448Sdh155122 if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 && 21340Sstevel@tonic-gate (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) && 21353448Sdh155122 !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) { 21360Sstevel@tonic-gate mblk_t *mp_copy; 21370Sstevel@tonic-gate struct igmpmsg *im; 21380Sstevel@tonic-gate 21390Sstevel@tonic-gate /* TODO could copy header and dup rest */ 21400Sstevel@tonic-gate mp_copy = copymsg(mp); 21410Sstevel@tonic-gate if (mp_copy == NULL) { 21423448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 21430Sstevel@tonic-gate ip1dbg(("ip_mdq: out of memory " 21440Sstevel@tonic-gate "for mblk, mp_copy\n")); 21453448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 21460Sstevel@tonic-gate return (-1); 21470Sstevel@tonic-gate } 21480Sstevel@tonic-gate 21490Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr; 21500Sstevel@tonic-gate im->im_msgtype = IGMPMSG_WRONGVIF; 21510Sstevel@tonic-gate im->im_mbz = 0; 21520Sstevel@tonic-gate im->im_vif = (ushort_t)vifi; 21535240Snordmark /* Pass to RAWIP */ 21545240Snordmark (mrouter->conn_recv)(mrouter, mp_copy, NULL); 21550Sstevel@tonic-gate } 21563448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 21570Sstevel@tonic-gate if (tunnel_src != 0) 21580Sstevel@tonic-gate return (1); 21590Sstevel@tonic-gate else 21600Sstevel@tonic-gate return (0); 21610Sstevel@tonic-gate } 21620Sstevel@tonic-gate /* 21630Sstevel@tonic-gate * If I sourced this packet, it counts as output, else it was input. 21640Sstevel@tonic-gate */ 21653448Sdh155122 if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) { 21663448Sdh155122 ipst->ips_vifs[vifi].v_pkt_out++; 21673448Sdh155122 ipst->ips_vifs[vifi].v_bytes_out += plen; 21680Sstevel@tonic-gate } else { 21693448Sdh155122 ipst->ips_vifs[vifi].v_pkt_in++; 21703448Sdh155122 ipst->ips_vifs[vifi].v_bytes_in += plen; 21710Sstevel@tonic-gate } 21720Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 21730Sstevel@tonic-gate rt->mfc_pkt_cnt++; 21740Sstevel@tonic-gate rt->mfc_byte_cnt += plen; 21750Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 21763448Sdh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 21770Sstevel@tonic-gate /* 21780Sstevel@tonic-gate * For each vif, decide if a copy of the packet should be forwarded. 21790Sstevel@tonic-gate * Forward if: 21800Sstevel@tonic-gate * - the vif threshold ttl is non-zero AND 21810Sstevel@tonic-gate * - the pkt ttl exceeds the vif's threshold 21820Sstevel@tonic-gate * A non-zero mfc_ttl indicates that the vif is part of 21830Sstevel@tonic-gate * the output set for the mfc entry. 21840Sstevel@tonic-gate */ 21853448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 21863448Sdh155122 num_of_vifs = ipst->ips_numvifs; 21873448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 21883448Sdh155122 for (vifp = ipst->ips_vifs, vifi = 0; 21893448Sdh155122 vifi < num_of_vifs; 21903448Sdh155122 vifp++, vifi++) { 21910Sstevel@tonic-gate if (!lock_good_vif(vifp)) 21920Sstevel@tonic-gate continue; 21930Sstevel@tonic-gate if ((rt->mfc_ttls[vifi] > 0) && 21940Sstevel@tonic-gate (ipha->ipha_ttl > rt->mfc_ttls[vifi])) { 21950Sstevel@tonic-gate /* 21960Sstevel@tonic-gate * lock_good_vif should not have succedded if 21970Sstevel@tonic-gate * v_ipif is null. 21980Sstevel@tonic-gate */ 21990Sstevel@tonic-gate ASSERT(vifp->v_ipif != NULL); 22000Sstevel@tonic-gate vifp->v_pkt_out++; 22010Sstevel@tonic-gate vifp->v_bytes_out += plen; 22020Sstevel@tonic-gate MC_SEND(ipha, mp, vifp, dst); 22033448Sdh155122 ipst->ips_mrtstat->mrts_fwd_out++; 22040Sstevel@tonic-gate } 22050Sstevel@tonic-gate unlock_good_vif(vifp); 22060Sstevel@tonic-gate } 22070Sstevel@tonic-gate if (tunnel_src != 0) 22080Sstevel@tonic-gate return (1); 22090Sstevel@tonic-gate else 22100Sstevel@tonic-gate return (0); 22110Sstevel@tonic-gate } 22120Sstevel@tonic-gate 22130Sstevel@tonic-gate /* 22140Sstevel@tonic-gate * Send the packet on physical interface. 22150Sstevel@tonic-gate * Caller assumes can continue to use mp on return. 22160Sstevel@tonic-gate */ 22170Sstevel@tonic-gate /* ARGSUSED */ 22180Sstevel@tonic-gate static void 22190Sstevel@tonic-gate phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 22200Sstevel@tonic-gate { 22210Sstevel@tonic-gate mblk_t *mp_copy; 22223448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 22235240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 22240Sstevel@tonic-gate 22250Sstevel@tonic-gate /* Make a new reference to the packet */ 22260Sstevel@tonic-gate mp_copy = copymsg(mp); /* TODO could copy header and dup rest */ 22270Sstevel@tonic-gate if (mp_copy == NULL) { 22283448Sdh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 22290Sstevel@tonic-gate ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n")); 22300Sstevel@tonic-gate return; 22310Sstevel@tonic-gate } 22320Sstevel@tonic-gate if (vifp->v_rate_limit <= 0) 22330Sstevel@tonic-gate tbf_send_packet(vifp, mp_copy); 22340Sstevel@tonic-gate else { 22353448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 22365240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22370Sstevel@tonic-gate "phyint_send: tbf_contr rate %d " 22380Sstevel@tonic-gate "vifp 0x%p mp 0x%p dst 0x%x", 22390Sstevel@tonic-gate vifp->v_rate_limit, (void *)vifp, (void *)mp, dst); 22400Sstevel@tonic-gate } 22410Sstevel@tonic-gate tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr); 22420Sstevel@tonic-gate } 22430Sstevel@tonic-gate } 22440Sstevel@tonic-gate 22450Sstevel@tonic-gate /* 22460Sstevel@tonic-gate * Send the whole packet for REGISTER encapsulation to PIM daemon 22470Sstevel@tonic-gate * Caller assumes it can continue to use mp on return. 22480Sstevel@tonic-gate */ 22490Sstevel@tonic-gate /* ARGSUSED */ 22500Sstevel@tonic-gate static void 22510Sstevel@tonic-gate register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 22520Sstevel@tonic-gate { 22530Sstevel@tonic-gate struct igmpmsg *im; 22540Sstevel@tonic-gate mblk_t *mp_copy; 22550Sstevel@tonic-gate ipha_t *ipha_copy; 22563448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 22575240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 22583448Sdh155122 22593448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 22605240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22610Sstevel@tonic-gate "register_send: src %x, dst %x\n", 22620Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 22630Sstevel@tonic-gate } 22640Sstevel@tonic-gate 22650Sstevel@tonic-gate /* 22660Sstevel@tonic-gate * Copy the old packet & pullup its IP header into the new mblk_t so we 22670Sstevel@tonic-gate * can modify it. Try to fill the new mblk_t since if we don't the 22680Sstevel@tonic-gate * ethernet driver will. 22690Sstevel@tonic-gate */ 22700Sstevel@tonic-gate mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED); 22710Sstevel@tonic-gate if (mp_copy == NULL) { 22723448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 22733448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 22745240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22750Sstevel@tonic-gate "register_send: allocb failure."); 22760Sstevel@tonic-gate } 22770Sstevel@tonic-gate return; 22780Sstevel@tonic-gate } 22790Sstevel@tonic-gate 22800Sstevel@tonic-gate /* 22810Sstevel@tonic-gate * Bump write pointer to account for igmpmsg being added. 22820Sstevel@tonic-gate */ 22830Sstevel@tonic-gate mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg); 22840Sstevel@tonic-gate 22850Sstevel@tonic-gate /* 22860Sstevel@tonic-gate * Chain packet to new mblk_t. 22870Sstevel@tonic-gate */ 22880Sstevel@tonic-gate if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 22893448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 22903448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 22915240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 22920Sstevel@tonic-gate "register_send: copymsg failure."); 22930Sstevel@tonic-gate } 22940Sstevel@tonic-gate freeb(mp_copy); 22950Sstevel@tonic-gate return; 22960Sstevel@tonic-gate } 22970Sstevel@tonic-gate 22980Sstevel@tonic-gate /* 22995240Snordmark * icmp_input() asserts that IP version field is set to an 23000Sstevel@tonic-gate * appropriate version. Hence, the struct igmpmsg that this really 23010Sstevel@tonic-gate * becomes, needs to have the correct IP version field. 23020Sstevel@tonic-gate */ 23030Sstevel@tonic-gate ipha_copy = (ipha_t *)mp_copy->b_rptr; 23040Sstevel@tonic-gate *ipha_copy = multicast_encap_iphdr; 23050Sstevel@tonic-gate 23060Sstevel@tonic-gate /* 23070Sstevel@tonic-gate * The kernel uses the struct igmpmsg header to encode the messages to 23080Sstevel@tonic-gate * the multicast routing daemon. Fill in the fields in the header 23090Sstevel@tonic-gate * starting with the message type which is IGMPMSG_WHOLEPKT 23100Sstevel@tonic-gate */ 23110Sstevel@tonic-gate im = (struct igmpmsg *)mp_copy->b_rptr; 23120Sstevel@tonic-gate im->im_msgtype = IGMPMSG_WHOLEPKT; 23130Sstevel@tonic-gate im->im_src.s_addr = ipha->ipha_src; 23140Sstevel@tonic-gate im->im_dst.s_addr = ipha->ipha_dst; 23150Sstevel@tonic-gate 23160Sstevel@tonic-gate /* 23170Sstevel@tonic-gate * Must Be Zero. This is because the struct igmpmsg is really an IP 23180Sstevel@tonic-gate * header with renamed fields and the multicast routing daemon uses 23190Sstevel@tonic-gate * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages. 23200Sstevel@tonic-gate */ 23210Sstevel@tonic-gate im->im_mbz = 0; 23220Sstevel@tonic-gate 23233448Sdh155122 ++ipst->ips_mrtstat->mrts_upcalls; 23245240Snordmark if (!canputnext(mrouter->conn_rq)) { 23253448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_regsend_drops; 23263448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 23275240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 23280Sstevel@tonic-gate "register_send: register upcall failure."); 23290Sstevel@tonic-gate } 23300Sstevel@tonic-gate freemsg(mp_copy); 23310Sstevel@tonic-gate } else { 23325240Snordmark /* Pass to RAWIP */ 23335240Snordmark (mrouter->conn_recv)(mrouter, mp_copy, NULL); 23340Sstevel@tonic-gate } 23350Sstevel@tonic-gate } 23360Sstevel@tonic-gate 23370Sstevel@tonic-gate /* 23380Sstevel@tonic-gate * pim_validate_cksum handles verification of the checksum in the 23390Sstevel@tonic-gate * pim header. For PIM Register packets, the checksum is calculated 23400Sstevel@tonic-gate * across the PIM header only. For all other packets, the checksum 23410Sstevel@tonic-gate * is for the PIM header and remainder of the packet. 23420Sstevel@tonic-gate * 23430Sstevel@tonic-gate * returns: B_TRUE, if checksum is okay. 23440Sstevel@tonic-gate * B_FALSE, if checksum is not valid. 23450Sstevel@tonic-gate */ 23460Sstevel@tonic-gate static boolean_t 23470Sstevel@tonic-gate pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp) 23480Sstevel@tonic-gate { 23490Sstevel@tonic-gate mblk_t *mp_dup; 23500Sstevel@tonic-gate 23510Sstevel@tonic-gate if ((mp_dup = dupmsg(mp)) == NULL) 23520Sstevel@tonic-gate return (B_FALSE); 23530Sstevel@tonic-gate 23540Sstevel@tonic-gate mp_dup->b_rptr += IPH_HDR_LENGTH(ip); 23550Sstevel@tonic-gate if (pimp->pim_type == PIM_REGISTER) 23560Sstevel@tonic-gate mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN; 23570Sstevel@tonic-gate if (IP_CSUM(mp_dup, 0, 0)) { 23580Sstevel@tonic-gate freemsg(mp_dup); 23590Sstevel@tonic-gate return (B_FALSE); 23600Sstevel@tonic-gate } 23610Sstevel@tonic-gate freemsg(mp_dup); 23620Sstevel@tonic-gate return (B_TRUE); 23630Sstevel@tonic-gate } 23640Sstevel@tonic-gate 23650Sstevel@tonic-gate /* 23660Sstevel@tonic-gate * int 23673448Sdh155122 * pim_input(queue_t *, mblk_t *, ill_t *ill) - Process PIM protocol packets. 23680Sstevel@tonic-gate * IP Protocol 103. Register messages are decapsulated and sent 23690Sstevel@tonic-gate * onto multicast forwarding. 23700Sstevel@tonic-gate */ 23710Sstevel@tonic-gate int 23723448Sdh155122 pim_input(queue_t *q, mblk_t *mp, ill_t *ill) 23730Sstevel@tonic-gate { 23740Sstevel@tonic-gate ipha_t *eip, *ip; 23750Sstevel@tonic-gate int iplen, pimlen, iphlen; 23760Sstevel@tonic-gate struct pim *pimp; /* pointer to a pim struct */ 23770Sstevel@tonic-gate uint32_t *reghdr; 23783448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 23795240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 23800Sstevel@tonic-gate 23810Sstevel@tonic-gate /* 23820Sstevel@tonic-gate * Pullup the msg for PIM protocol processing. 23830Sstevel@tonic-gate */ 23840Sstevel@tonic-gate if (pullupmsg(mp, -1) == 0) { 23853448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 23860Sstevel@tonic-gate freemsg(mp); 23870Sstevel@tonic-gate return (-1); 23880Sstevel@tonic-gate } 23890Sstevel@tonic-gate 23900Sstevel@tonic-gate ip = (ipha_t *)mp->b_rptr; 23910Sstevel@tonic-gate iplen = ip->ipha_length; 23920Sstevel@tonic-gate iphlen = IPH_HDR_LENGTH(ip); 23930Sstevel@tonic-gate pimlen = ntohs(iplen) - iphlen; 23940Sstevel@tonic-gate 23950Sstevel@tonic-gate /* 23960Sstevel@tonic-gate * Validate lengths 23970Sstevel@tonic-gate */ 23980Sstevel@tonic-gate if (pimlen < PIM_MINLEN) { 23993448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_malformed; 24003448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 24015240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24020Sstevel@tonic-gate "pim_input: length not at least minlen"); 24030Sstevel@tonic-gate } 24040Sstevel@tonic-gate freemsg(mp); 24050Sstevel@tonic-gate return (-1); 24060Sstevel@tonic-gate } 24070Sstevel@tonic-gate 24080Sstevel@tonic-gate /* 24090Sstevel@tonic-gate * Point to the PIM header. 24100Sstevel@tonic-gate */ 24110Sstevel@tonic-gate pimp = (struct pim *)((caddr_t)ip + iphlen); 24120Sstevel@tonic-gate 24130Sstevel@tonic-gate /* 24140Sstevel@tonic-gate * Check the version number. 24150Sstevel@tonic-gate */ 24160Sstevel@tonic-gate if (pimp->pim_vers != PIM_VERSION) { 24173448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_badversion; 24183448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 24195240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24200Sstevel@tonic-gate "pim_input: unknown version of PIM"); 24210Sstevel@tonic-gate } 24220Sstevel@tonic-gate freemsg(mp); 24230Sstevel@tonic-gate return (-1); 24240Sstevel@tonic-gate } 24250Sstevel@tonic-gate 24260Sstevel@tonic-gate /* 24270Sstevel@tonic-gate * Validate the checksum 24280Sstevel@tonic-gate */ 24290Sstevel@tonic-gate if (!pim_validate_cksum(mp, ip, pimp)) { 24303448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_rcv_badcsum; 24313448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 24325240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24330Sstevel@tonic-gate "pim_input: invalid checksum"); 24340Sstevel@tonic-gate } 24350Sstevel@tonic-gate freemsg(mp); 24360Sstevel@tonic-gate return (-1); 24370Sstevel@tonic-gate } 24380Sstevel@tonic-gate 24390Sstevel@tonic-gate if (pimp->pim_type != PIM_REGISTER) 24400Sstevel@tonic-gate return (0); 24410Sstevel@tonic-gate 24420Sstevel@tonic-gate reghdr = (uint32_t *)(pimp + 1); 24430Sstevel@tonic-gate eip = (ipha_t *)(reghdr + 1); 24440Sstevel@tonic-gate 24450Sstevel@tonic-gate /* 24460Sstevel@tonic-gate * check if the inner packet is destined to mcast group 24470Sstevel@tonic-gate */ 24480Sstevel@tonic-gate if (!CLASSD(eip->ipha_dst)) { 24493448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_badregisters; 24503448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 24515240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24520Sstevel@tonic-gate "pim_input: Inner pkt not mcast .. !"); 24530Sstevel@tonic-gate } 24540Sstevel@tonic-gate freemsg(mp); 24550Sstevel@tonic-gate return (-1); 24560Sstevel@tonic-gate } 24573448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 24585240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 24590Sstevel@tonic-gate "register from %x, to %x, len %d", 24600Sstevel@tonic-gate ntohl(eip->ipha_src), 24610Sstevel@tonic-gate ntohl(eip->ipha_dst), 24620Sstevel@tonic-gate ntohs(eip->ipha_length)); 24630Sstevel@tonic-gate } 24640Sstevel@tonic-gate /* 24650Sstevel@tonic-gate * If the null register bit is not set, decapsulate 24660Sstevel@tonic-gate * the packet before forwarding it. 24670Sstevel@tonic-gate */ 24680Sstevel@tonic-gate if (!(ntohl(*reghdr) & PIM_NULL_REGISTER)) { 24690Sstevel@tonic-gate mblk_t *mp_copy; 24700Sstevel@tonic-gate 24710Sstevel@tonic-gate /* Copy the message */ 24720Sstevel@tonic-gate if ((mp_copy = copymsg(mp)) == NULL) { 24733448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 24740Sstevel@tonic-gate freemsg(mp); 24750Sstevel@tonic-gate return (-1); 24760Sstevel@tonic-gate } 24770Sstevel@tonic-gate 24780Sstevel@tonic-gate /* 24790Sstevel@tonic-gate * Decapsulate the packet and give it to 24800Sstevel@tonic-gate * register_mforward. 24810Sstevel@tonic-gate */ 24820Sstevel@tonic-gate mp_copy->b_rptr += iphlen + sizeof (pim_t) + 24830Sstevel@tonic-gate sizeof (*reghdr); 24843448Sdh155122 if (register_mforward(q, mp_copy, ill) != 0) { 24850Sstevel@tonic-gate freemsg(mp); 24860Sstevel@tonic-gate return (-1); 24870Sstevel@tonic-gate } 24880Sstevel@tonic-gate } 24890Sstevel@tonic-gate 24900Sstevel@tonic-gate /* 24910Sstevel@tonic-gate * Pass all valid PIM packets up to any process(es) listening on a raw 24920Sstevel@tonic-gate * PIM socket. For Solaris it is done right after pim_input() is 24930Sstevel@tonic-gate * called. 24940Sstevel@tonic-gate */ 24950Sstevel@tonic-gate return (0); 24960Sstevel@tonic-gate } 24970Sstevel@tonic-gate 24980Sstevel@tonic-gate /* 24990Sstevel@tonic-gate * PIM sparse mode hook. Called by pim_input after decapsulating 25000Sstevel@tonic-gate * the packet. Loop back the packet, as if we have received it. 25010Sstevel@tonic-gate * In pim_input() we have to check if the destination is a multicast address. 25020Sstevel@tonic-gate */ 25030Sstevel@tonic-gate /* ARGSUSED */ 25040Sstevel@tonic-gate static int 25053448Sdh155122 register_mforward(queue_t *q, mblk_t *mp, ill_t *ill) 25060Sstevel@tonic-gate { 25073448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 25085240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 25093448Sdh155122 25103448Sdh155122 ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs); 25113448Sdh155122 25123448Sdh155122 if (ipst->ips_ip_mrtdebug > 3) { 25130Sstevel@tonic-gate ipha_t *ipha; 25140Sstevel@tonic-gate 25150Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 25165240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 25170Sstevel@tonic-gate "register_mforward: src %x, dst %x\n", 25180Sstevel@tonic-gate ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 25190Sstevel@tonic-gate } 25200Sstevel@tonic-gate /* 25210Sstevel@tonic-gate * Need to pass in to ip_mforward() the information that the 25220Sstevel@tonic-gate * packet has arrived on the register_vif. We use the solution that 25230Sstevel@tonic-gate * ip_mroute_decap() employs: use mp->b_prev to pass some information 25240Sstevel@tonic-gate * to ip_mforward(). Nonzero value means the packet has arrived on a 25250Sstevel@tonic-gate * tunnel (ip_mroute_decap() puts the address of the other side of the 25260Sstevel@tonic-gate * tunnel there.) This is safe since ip_rput() either frees the packet 25270Sstevel@tonic-gate * or passes it to ip_mforward(). We use 25280Sstevel@tonic-gate * PIM_REGISTER_MARKER = 0xffffffff to indicate the has arrived on the 25290Sstevel@tonic-gate * register vif. If in the future we have more than one register vifs, 25300Sstevel@tonic-gate * then this will need re-examination. 25310Sstevel@tonic-gate */ 25320Sstevel@tonic-gate mp->b_prev = (mblk_t *)PIM_REGISTER_MARKER; 25333448Sdh155122 ++ipst->ips_mrtstat->mrts_pim_regforwards; 25340Sstevel@tonic-gate ip_rput(q, mp); 25350Sstevel@tonic-gate return (0); 25360Sstevel@tonic-gate } 25370Sstevel@tonic-gate 25380Sstevel@tonic-gate /* 25390Sstevel@tonic-gate * Send an encapsulated packet. 25400Sstevel@tonic-gate * Caller assumes can continue to use mp when routine returns. 25410Sstevel@tonic-gate */ 25420Sstevel@tonic-gate /* ARGSUSED */ 25430Sstevel@tonic-gate static void 25440Sstevel@tonic-gate encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 25450Sstevel@tonic-gate { 25460Sstevel@tonic-gate mblk_t *mp_copy; 25470Sstevel@tonic-gate ipha_t *ipha_copy; 25480Sstevel@tonic-gate size_t len; 25493448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 25505240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 25513448Sdh155122 25523448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 25535240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 25543448Sdh155122 "encap_send: vif %ld enter", 25553448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs)); 25560Sstevel@tonic-gate } 25570Sstevel@tonic-gate len = ntohs(ipha->ipha_length); 25580Sstevel@tonic-gate 25590Sstevel@tonic-gate /* 25600Sstevel@tonic-gate * Copy the old packet & pullup it's IP header into the 25610Sstevel@tonic-gate * new mbuf so we can modify it. Try to fill the new 25620Sstevel@tonic-gate * mbuf since if we don't the ethernet driver will. 25630Sstevel@tonic-gate */ 25640Sstevel@tonic-gate mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED); 25650Sstevel@tonic-gate if (mp_copy == NULL) 25660Sstevel@tonic-gate return; 25670Sstevel@tonic-gate mp_copy->b_rptr += 32; 25680Sstevel@tonic-gate mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr); 25690Sstevel@tonic-gate if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 25700Sstevel@tonic-gate freeb(mp_copy); 25710Sstevel@tonic-gate return; 25720Sstevel@tonic-gate } 25730Sstevel@tonic-gate 25740Sstevel@tonic-gate /* 25750Sstevel@tonic-gate * Fill in the encapsulating IP header. 25760Sstevel@tonic-gate * Remote tunnel dst in rmt_addr, from add_vif(). 25770Sstevel@tonic-gate */ 25780Sstevel@tonic-gate ipha_copy = (ipha_t *)mp_copy->b_rptr; 25790Sstevel@tonic-gate *ipha_copy = multicast_encap_iphdr; 25800Sstevel@tonic-gate ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET); 25810Sstevel@tonic-gate ipha_copy->ipha_length = htons(len + sizeof (ipha_t)); 25820Sstevel@tonic-gate ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr; 25830Sstevel@tonic-gate ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr; 25840Sstevel@tonic-gate ASSERT(ipha_copy->ipha_ident == 0); 25850Sstevel@tonic-gate 25860Sstevel@tonic-gate /* Turn the encapsulated IP header back into a valid one. */ 25870Sstevel@tonic-gate ipha = (ipha_t *)mp_copy->b_cont->b_rptr; 25880Sstevel@tonic-gate ipha->ipha_ttl--; 25890Sstevel@tonic-gate ipha->ipha_hdr_checksum = 0; 25900Sstevel@tonic-gate ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 25910Sstevel@tonic-gate 25923448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 25935240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 25940Sstevel@tonic-gate "encap_send: group 0x%x", ntohl(ipha->ipha_dst)); 25950Sstevel@tonic-gate } 25960Sstevel@tonic-gate if (vifp->v_rate_limit <= 0) 25970Sstevel@tonic-gate tbf_send_packet(vifp, mp_copy); 25980Sstevel@tonic-gate else 25990Sstevel@tonic-gate /* ipha is from the original header */ 26000Sstevel@tonic-gate tbf_control(vifp, mp_copy, ipha); 26010Sstevel@tonic-gate } 26020Sstevel@tonic-gate 26030Sstevel@tonic-gate /* 26040Sstevel@tonic-gate * De-encapsulate a packet and feed it back through IP input. 26050Sstevel@tonic-gate * This routine is called whenever IP gets a packet with prototype 26060Sstevel@tonic-gate * IPPROTO_ENCAP and a local destination address. 26070Sstevel@tonic-gate */ 26080Sstevel@tonic-gate void 26093448Sdh155122 ip_mroute_decap(queue_t *q, mblk_t *mp, ill_t *ill) 26100Sstevel@tonic-gate { 26110Sstevel@tonic-gate ipha_t *ipha = (ipha_t *)mp->b_rptr; 26120Sstevel@tonic-gate ipha_t *ipha_encap; 26130Sstevel@tonic-gate int hlen = IPH_HDR_LENGTH(ipha); 26140Sstevel@tonic-gate ipaddr_t src; 26150Sstevel@tonic-gate struct vif *vifp; 26163448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 26175240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 26180Sstevel@tonic-gate 26190Sstevel@tonic-gate /* 26200Sstevel@tonic-gate * Dump the packet if it's not to a multicast destination or if 26210Sstevel@tonic-gate * we don't have an encapsulating tunnel with the source. 26220Sstevel@tonic-gate * Note: This code assumes that the remote site IP address 26230Sstevel@tonic-gate * uniquely identifies the tunnel (i.e., that this site has 26240Sstevel@tonic-gate * at most one tunnel with the remote site). 26250Sstevel@tonic-gate */ 26260Sstevel@tonic-gate ipha_encap = (ipha_t *)((char *)ipha + hlen); 26270Sstevel@tonic-gate if (!CLASSD(ipha_encap->ipha_dst)) { 26283448Sdh155122 ipst->ips_mrtstat->mrts_bad_tunnel++; 26290Sstevel@tonic-gate ip1dbg(("ip_mroute_decap: bad tunnel\n")); 26300Sstevel@tonic-gate freemsg(mp); 26310Sstevel@tonic-gate return; 26320Sstevel@tonic-gate } 26330Sstevel@tonic-gate src = (ipaddr_t)ipha->ipha_src; 26343448Sdh155122 mutex_enter(&ipst->ips_last_encap_lock); 26353448Sdh155122 if (src != ipst->ips_last_encap_src) { 26360Sstevel@tonic-gate struct vif *vife; 26370Sstevel@tonic-gate 26383448Sdh155122 vifp = ipst->ips_vifs; 26393448Sdh155122 vife = vifp + ipst->ips_numvifs; 26403448Sdh155122 ipst->ips_last_encap_src = src; 26413448Sdh155122 ipst->ips_last_encap_vif = 0; 26420Sstevel@tonic-gate for (; vifp < vife; ++vifp) { 26430Sstevel@tonic-gate if (!lock_good_vif(vifp)) 26440Sstevel@tonic-gate continue; 26450Sstevel@tonic-gate if (vifp->v_rmt_addr.s_addr == src) { 26460Sstevel@tonic-gate if (vifp->v_flags & VIFF_TUNNEL) 26473448Sdh155122 ipst->ips_last_encap_vif = vifp; 26483448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 26495240Snordmark (void) mi_strlog(mrouter->conn_rq, 26500Sstevel@tonic-gate 1, SL_TRACE, 26510Sstevel@tonic-gate "ip_mroute_decap: good tun " 26520Sstevel@tonic-gate "vif %ld with %x", 26533448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), 26540Sstevel@tonic-gate ntohl(src)); 26550Sstevel@tonic-gate } 26560Sstevel@tonic-gate unlock_good_vif(vifp); 26570Sstevel@tonic-gate break; 26580Sstevel@tonic-gate } 26590Sstevel@tonic-gate unlock_good_vif(vifp); 26600Sstevel@tonic-gate } 26610Sstevel@tonic-gate } 26623448Sdh155122 if ((vifp = ipst->ips_last_encap_vif) == 0) { 26633448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 26643448Sdh155122 ipst->ips_mrtstat->mrts_bad_tunnel++; 26650Sstevel@tonic-gate freemsg(mp); 26660Sstevel@tonic-gate ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n", 26673448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src))); 26680Sstevel@tonic-gate return; 26690Sstevel@tonic-gate } 26703448Sdh155122 mutex_exit(&ipst->ips_last_encap_lock); 26710Sstevel@tonic-gate 26720Sstevel@tonic-gate /* 26730Sstevel@tonic-gate * Need to pass in the tunnel source to ip_mforward (so that it can 26740Sstevel@tonic-gate * verify that the packet arrived over the correct vif.) We use b_prev 26750Sstevel@tonic-gate * to pass this information. This is safe since the ip_rput either 26760Sstevel@tonic-gate * frees the packet or passes it to ip_mforward. 26770Sstevel@tonic-gate */ 26780Sstevel@tonic-gate mp->b_prev = (mblk_t *)(uintptr_t)src; 26790Sstevel@tonic-gate mp->b_rptr += hlen; 26800Sstevel@tonic-gate /* Feed back into ip_rput as an M_DATA. */ 26810Sstevel@tonic-gate ip_rput(q, mp); 26820Sstevel@tonic-gate } 26830Sstevel@tonic-gate 26840Sstevel@tonic-gate /* 26850Sstevel@tonic-gate * Remove all records with v_ipif == ipif. Called when an interface goes away 26860Sstevel@tonic-gate * (stream closed). Called as writer. 26870Sstevel@tonic-gate */ 26880Sstevel@tonic-gate void 26890Sstevel@tonic-gate reset_mrt_vif_ipif(ipif_t *ipif) 26900Sstevel@tonic-gate { 26910Sstevel@tonic-gate vifi_t vifi, tmp_vifi; 26920Sstevel@tonic-gate vifi_t num_of_vifs; 26933448Sdh155122 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 26940Sstevel@tonic-gate 26950Sstevel@tonic-gate /* Can't check vifi >= 0 since vifi_t is unsigned! */ 26960Sstevel@tonic-gate 26973448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 26983448Sdh155122 num_of_vifs = ipst->ips_numvifs; 26993448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 27000Sstevel@tonic-gate 27010Sstevel@tonic-gate for (vifi = num_of_vifs; vifi != 0; vifi--) { 27020Sstevel@tonic-gate tmp_vifi = vifi - 1; 27033448Sdh155122 if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) { 27043448Sdh155122 (void) del_vif(&tmp_vifi, NULL, NULL, ipst); 27050Sstevel@tonic-gate } 27060Sstevel@tonic-gate } 27070Sstevel@tonic-gate } 27080Sstevel@tonic-gate 27090Sstevel@tonic-gate /* Remove pending upcall msgs when ill goes away. Called by ill_delete. */ 27100Sstevel@tonic-gate void 27110Sstevel@tonic-gate reset_mrt_ill(ill_t *ill) 27120Sstevel@tonic-gate { 27130Sstevel@tonic-gate struct mfc *rt; 27140Sstevel@tonic-gate struct rtdetq *rte; 27150Sstevel@tonic-gate int i; 27163448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 27175240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 27180Sstevel@tonic-gate 27190Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) { 27203448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[i]); 27213448Sdh155122 if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) { 27223448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 27235240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 27243448Sdh155122 SL_TRACE, 27250Sstevel@tonic-gate "reset_mrt_ill: mfctable [%d]", i); 27260Sstevel@tonic-gate } 27270Sstevel@tonic-gate while (rt != NULL) { 27280Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 27290Sstevel@tonic-gate while ((rte = rt->mfc_rte) != NULL) { 27300Sstevel@tonic-gate if (rte->ill == ill) { 27313448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 27323448Sdh155122 (void) mi_strlog( 27335240Snordmark mrouter->conn_rq, 27343448Sdh155122 1, SL_TRACE, 27353448Sdh155122 "reset_mrt_ill: " 27367240Srh87107 "ill 0x%p", (void *)ill); 27370Sstevel@tonic-gate } 27380Sstevel@tonic-gate rt->mfc_rte = rte->rte_next; 27390Sstevel@tonic-gate freemsg(rte->mp); 27400Sstevel@tonic-gate mi_free((char *)rte); 27410Sstevel@tonic-gate } 27420Sstevel@tonic-gate } 27430Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 27440Sstevel@tonic-gate rt = rt->mfc_next; 27450Sstevel@tonic-gate } 27460Sstevel@tonic-gate } 27473448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 27480Sstevel@tonic-gate } 27490Sstevel@tonic-gate } 27500Sstevel@tonic-gate 27510Sstevel@tonic-gate /* 27520Sstevel@tonic-gate * Token bucket filter module. 27530Sstevel@tonic-gate * The ipha is for mcastgrp destination for phyint and encap. 27540Sstevel@tonic-gate */ 27550Sstevel@tonic-gate static void 27560Sstevel@tonic-gate tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) 27570Sstevel@tonic-gate { 27580Sstevel@tonic-gate size_t p_len = msgdsize(mp); 27590Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 27600Sstevel@tonic-gate timeout_id_t id = 0; 27613448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 27625240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 27630Sstevel@tonic-gate 27640Sstevel@tonic-gate /* Drop if packet is too large */ 27650Sstevel@tonic-gate if (p_len > MAX_BKT_SIZE) { 27663448Sdh155122 ipst->ips_mrtstat->mrts_pkt2large++; 27670Sstevel@tonic-gate freemsg(mp); 27680Sstevel@tonic-gate return; 27690Sstevel@tonic-gate } 27703448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 27715240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 27720Sstevel@tonic-gate "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x", 27733448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len, 27740Sstevel@tonic-gate ntohl(ipha->ipha_dst)); 27750Sstevel@tonic-gate } 27760Sstevel@tonic-gate 27770Sstevel@tonic-gate mutex_enter(&t->tbf_lock); 27780Sstevel@tonic-gate 27790Sstevel@tonic-gate tbf_update_tokens(vifp); 27800Sstevel@tonic-gate 27810Sstevel@tonic-gate /* 27820Sstevel@tonic-gate * If there are enough tokens, 27830Sstevel@tonic-gate * and the queue is empty, send this packet out. 27840Sstevel@tonic-gate */ 27853448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 27865240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 27870Sstevel@tonic-gate "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d", 27883448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len, 27890Sstevel@tonic-gate t->tbf_q_len); 27900Sstevel@tonic-gate } 27910Sstevel@tonic-gate /* No packets are queued */ 27920Sstevel@tonic-gate if (t->tbf_q_len == 0) { 27930Sstevel@tonic-gate /* queue empty, send packet if enough tokens */ 27940Sstevel@tonic-gate if (p_len <= t->tbf_n_tok) { 27950Sstevel@tonic-gate t->tbf_n_tok -= p_len; 27960Sstevel@tonic-gate mutex_exit(&t->tbf_lock); 27970Sstevel@tonic-gate tbf_send_packet(vifp, mp); 27980Sstevel@tonic-gate return; 27990Sstevel@tonic-gate } else { 28000Sstevel@tonic-gate /* Queue packet and timeout till later */ 28010Sstevel@tonic-gate tbf_queue(vifp, mp); 28020Sstevel@tonic-gate ASSERT(vifp->v_timeout_id == 0); 28030Sstevel@tonic-gate vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 28040Sstevel@tonic-gate TBF_REPROCESS); 28050Sstevel@tonic-gate } 28060Sstevel@tonic-gate } else if (t->tbf_q_len < t->tbf_max_q_len) { 28070Sstevel@tonic-gate /* Finite queue length, so queue pkts and process queue */ 28080Sstevel@tonic-gate tbf_queue(vifp, mp); 28090Sstevel@tonic-gate tbf_process_q(vifp); 28100Sstevel@tonic-gate } else { 28110Sstevel@tonic-gate /* Check that we have UDP header with IP header */ 28120Sstevel@tonic-gate size_t hdr_length = IPH_HDR_LENGTH(ipha) + 28135240Snordmark sizeof (struct udphdr); 28140Sstevel@tonic-gate 28150Sstevel@tonic-gate if ((mp->b_wptr - mp->b_rptr) < hdr_length) { 28160Sstevel@tonic-gate if (!pullupmsg(mp, hdr_length)) { 28170Sstevel@tonic-gate freemsg(mp); 28180Sstevel@tonic-gate ip1dbg(("tbf_ctl: couldn't pullup udp hdr, " 28190Sstevel@tonic-gate "vif %ld src 0x%x dst 0x%x\n", 28203448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), 28210Sstevel@tonic-gate ntohl(ipha->ipha_src), 28220Sstevel@tonic-gate ntohl(ipha->ipha_dst))); 28230Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock); 28240Sstevel@tonic-gate return; 28250Sstevel@tonic-gate } else 28260Sstevel@tonic-gate /* Have to reassign ipha after pullupmsg */ 28270Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 28280Sstevel@tonic-gate } 28290Sstevel@tonic-gate /* 28300Sstevel@tonic-gate * Queue length too much, 28310Sstevel@tonic-gate * try to selectively dq, or queue and process 28320Sstevel@tonic-gate */ 28330Sstevel@tonic-gate if (!tbf_dq_sel(vifp, ipha)) { 28343448Sdh155122 ipst->ips_mrtstat->mrts_q_overflow++; 28350Sstevel@tonic-gate freemsg(mp); 28360Sstevel@tonic-gate } else { 28370Sstevel@tonic-gate tbf_queue(vifp, mp); 28380Sstevel@tonic-gate tbf_process_q(vifp); 28390Sstevel@tonic-gate } 28400Sstevel@tonic-gate } 28410Sstevel@tonic-gate if (t->tbf_q_len == 0) { 28420Sstevel@tonic-gate id = vifp->v_timeout_id; 28430Sstevel@tonic-gate vifp->v_timeout_id = 0; 28440Sstevel@tonic-gate } 28450Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock); 28460Sstevel@tonic-gate if (id != 0) 28470Sstevel@tonic-gate (void) untimeout(id); 28480Sstevel@tonic-gate } 28490Sstevel@tonic-gate 28500Sstevel@tonic-gate /* 28510Sstevel@tonic-gate * Adds a packet to the tbf queue at the interface. 28520Sstevel@tonic-gate * The ipha is for mcastgrp destination for phyint and encap. 28530Sstevel@tonic-gate */ 28540Sstevel@tonic-gate static void 28550Sstevel@tonic-gate tbf_queue(struct vif *vifp, mblk_t *mp) 28560Sstevel@tonic-gate { 28570Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 28583448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 28595240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 28603448Sdh155122 28613448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 28625240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 28633448Sdh155122 "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs)); 28640Sstevel@tonic-gate } 28650Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 28660Sstevel@tonic-gate 28670Sstevel@tonic-gate if (t->tbf_t == NULL) { 28680Sstevel@tonic-gate /* Queue was empty */ 28690Sstevel@tonic-gate t->tbf_q = mp; 28700Sstevel@tonic-gate } else { 28710Sstevel@tonic-gate /* Insert at tail */ 28720Sstevel@tonic-gate t->tbf_t->b_next = mp; 28730Sstevel@tonic-gate } 28740Sstevel@tonic-gate /* set new tail pointer */ 28750Sstevel@tonic-gate t->tbf_t = mp; 28760Sstevel@tonic-gate 28770Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL; 28780Sstevel@tonic-gate 28790Sstevel@tonic-gate t->tbf_q_len++; 28800Sstevel@tonic-gate } 28810Sstevel@tonic-gate 28820Sstevel@tonic-gate /* 28830Sstevel@tonic-gate * Process the queue at the vif interface. 28840Sstevel@tonic-gate * Drops the tbf_lock when sending packets. 28850Sstevel@tonic-gate * 28860Sstevel@tonic-gate * NOTE : The caller should quntimeout if the queue length is 0. 28870Sstevel@tonic-gate */ 28880Sstevel@tonic-gate static void 28890Sstevel@tonic-gate tbf_process_q(struct vif *vifp) 28900Sstevel@tonic-gate { 28910Sstevel@tonic-gate mblk_t *mp; 28920Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 28930Sstevel@tonic-gate size_t len; 28943448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 28955240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 28963448Sdh155122 28973448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 28985240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 28990Sstevel@tonic-gate "tbf_process_q 1: vif %ld qlen = %d", 29003448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len); 29010Sstevel@tonic-gate } 29020Sstevel@tonic-gate 29030Sstevel@tonic-gate /* 29040Sstevel@tonic-gate * Loop through the queue at the interface and send 29050Sstevel@tonic-gate * as many packets as possible. 29060Sstevel@tonic-gate */ 29070Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 29080Sstevel@tonic-gate 29090Sstevel@tonic-gate while (t->tbf_q_len > 0) { 29100Sstevel@tonic-gate mp = t->tbf_q; 29110Sstevel@tonic-gate len = (size_t)msgdsize(mp); /* length of ip pkt */ 29120Sstevel@tonic-gate 29130Sstevel@tonic-gate /* Determine if the packet can be sent */ 29140Sstevel@tonic-gate if (len <= t->tbf_n_tok) { 29150Sstevel@tonic-gate /* 29160Sstevel@tonic-gate * If so, reduce no. of tokens, dequeue the packet, 29170Sstevel@tonic-gate * send the packet. 29180Sstevel@tonic-gate */ 29190Sstevel@tonic-gate t->tbf_n_tok -= len; 29200Sstevel@tonic-gate 29210Sstevel@tonic-gate t->tbf_q = mp->b_next; 29220Sstevel@tonic-gate if (--t->tbf_q_len == 0) { 29230Sstevel@tonic-gate t->tbf_t = NULL; 29240Sstevel@tonic-gate } 29250Sstevel@tonic-gate mp->b_next = NULL; 29260Sstevel@tonic-gate /* Exit mutex before sending packet, then re-enter */ 29270Sstevel@tonic-gate mutex_exit(&t->tbf_lock); 29280Sstevel@tonic-gate tbf_send_packet(vifp, mp); 29290Sstevel@tonic-gate mutex_enter(&t->tbf_lock); 29300Sstevel@tonic-gate } else 29310Sstevel@tonic-gate break; 29320Sstevel@tonic-gate } 29330Sstevel@tonic-gate } 29340Sstevel@tonic-gate 29350Sstevel@tonic-gate /* Called at tbf timeout to update tokens, process q and reset timer. */ 29360Sstevel@tonic-gate static void 29370Sstevel@tonic-gate tbf_reprocess_q(void *arg) 29380Sstevel@tonic-gate { 29390Sstevel@tonic-gate struct vif *vifp = arg; 29403448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 29415240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 29420Sstevel@tonic-gate 29430Sstevel@tonic-gate mutex_enter(&vifp->v_tbf->tbf_lock); 29440Sstevel@tonic-gate vifp->v_timeout_id = 0; 29450Sstevel@tonic-gate tbf_update_tokens(vifp); 29460Sstevel@tonic-gate 29470Sstevel@tonic-gate tbf_process_q(vifp); 29480Sstevel@tonic-gate 29490Sstevel@tonic-gate if (vifp->v_tbf->tbf_q_len > 0) { 29500Sstevel@tonic-gate vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 29510Sstevel@tonic-gate TBF_REPROCESS); 29520Sstevel@tonic-gate } 29530Sstevel@tonic-gate mutex_exit(&vifp->v_tbf->tbf_lock); 29540Sstevel@tonic-gate 29553448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 29565240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 29570Sstevel@tonic-gate "tbf_reprcess_q: vif %ld timeout id = %p", 29583448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id); 29590Sstevel@tonic-gate } 29600Sstevel@tonic-gate } 29610Sstevel@tonic-gate 29620Sstevel@tonic-gate /* 29630Sstevel@tonic-gate * Function that will selectively discard a member of the tbf queue, 29640Sstevel@tonic-gate * based on the precedence value and the priority. 29650Sstevel@tonic-gate * 29660Sstevel@tonic-gate * NOTE : The caller should quntimeout if the queue length is 0. 29670Sstevel@tonic-gate */ 29680Sstevel@tonic-gate static int 29690Sstevel@tonic-gate tbf_dq_sel(struct vif *vifp, ipha_t *ipha) 29700Sstevel@tonic-gate { 29710Sstevel@tonic-gate uint_t p; 29720Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 29730Sstevel@tonic-gate mblk_t **np; 29740Sstevel@tonic-gate mblk_t *last, *mp; 29753448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 29765240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 29773448Sdh155122 29783448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 29795240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 29800Sstevel@tonic-gate "dq_sel: vif %ld dst 0x%x", 29813448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst)); 29820Sstevel@tonic-gate } 29830Sstevel@tonic-gate 29840Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 29850Sstevel@tonic-gate p = priority(vifp, ipha); 29860Sstevel@tonic-gate 29870Sstevel@tonic-gate np = &t->tbf_q; 29880Sstevel@tonic-gate last = NULL; 29890Sstevel@tonic-gate while ((mp = *np) != NULL) { 29900Sstevel@tonic-gate if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) { 29910Sstevel@tonic-gate *np = mp->b_next; 29920Sstevel@tonic-gate /* If removing the last packet, fix the tail pointer */ 29930Sstevel@tonic-gate if (mp == t->tbf_t) 29940Sstevel@tonic-gate t->tbf_t = last; 29950Sstevel@tonic-gate mp->b_prev = mp->b_next = NULL; 29960Sstevel@tonic-gate freemsg(mp); 29970Sstevel@tonic-gate /* 29980Sstevel@tonic-gate * It's impossible for the queue to be empty, but 29990Sstevel@tonic-gate * we check anyway. 30000Sstevel@tonic-gate */ 30010Sstevel@tonic-gate if (--t->tbf_q_len == 0) { 30020Sstevel@tonic-gate t->tbf_t = NULL; 30030Sstevel@tonic-gate } 30043448Sdh155122 ipst->ips_mrtstat->mrts_drop_sel++; 30050Sstevel@tonic-gate return (1); 30060Sstevel@tonic-gate } 30070Sstevel@tonic-gate np = &mp->b_next; 30080Sstevel@tonic-gate last = mp; 30090Sstevel@tonic-gate } 30100Sstevel@tonic-gate return (0); 30110Sstevel@tonic-gate } 30120Sstevel@tonic-gate 30130Sstevel@tonic-gate /* Sends packet, 2 cases - encap tunnel, phyint. */ 30140Sstevel@tonic-gate static void 30150Sstevel@tonic-gate tbf_send_packet(struct vif *vifp, mblk_t *mp) 30160Sstevel@tonic-gate { 30170Sstevel@tonic-gate ipif_t *ipif; 30183448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 30195240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 30200Sstevel@tonic-gate 30210Sstevel@tonic-gate /* If encap tunnel options */ 30220Sstevel@tonic-gate if (vifp->v_flags & VIFF_TUNNEL) { 30233448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 30245240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 30250Sstevel@tonic-gate "tbf_send_pkt: ENCAP tunnel vif %ld", 30263448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs)); 30270Sstevel@tonic-gate } 30280Sstevel@tonic-gate 30290Sstevel@tonic-gate /* 30300Sstevel@tonic-gate * Feed into ip_wput which will set the ident field and 30310Sstevel@tonic-gate * checksum the encapsulating header. 30320Sstevel@tonic-gate * BSD gets the cached route vifp->v_route from ip_output() 30330Sstevel@tonic-gate * to speed up route table lookups. Not necessary in SunOS 5.x. 30340Sstevel@tonic-gate */ 30350Sstevel@tonic-gate put(vifp->v_ipif->ipif_wq, mp); 30360Sstevel@tonic-gate return; 30370Sstevel@tonic-gate 30380Sstevel@tonic-gate /* phyint */ 30390Sstevel@tonic-gate } else { 30400Sstevel@tonic-gate /* Need to loop back to members on the outgoing interface. */ 30410Sstevel@tonic-gate ipha_t *ipha; 30420Sstevel@tonic-gate ipaddr_t dst; 30430Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 30440Sstevel@tonic-gate dst = ipha->ipha_dst; 30450Sstevel@tonic-gate ipif = vifp->v_ipif; 30460Sstevel@tonic-gate 30470Sstevel@tonic-gate if (ilm_lookup_ipif(ipif, dst) != NULL) { 30480Sstevel@tonic-gate /* 30490Sstevel@tonic-gate * The packet is not yet reassembled, thus we need to 30500Sstevel@tonic-gate * pass it to ip_rput_local for checksum verification 30510Sstevel@tonic-gate * and reassembly (and fanout the user stream). 30520Sstevel@tonic-gate */ 30530Sstevel@tonic-gate mblk_t *mp_loop; 30540Sstevel@tonic-gate ire_t *ire; 30550Sstevel@tonic-gate 30563448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 30575240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 30583448Sdh155122 SL_TRACE, 30590Sstevel@tonic-gate "tbf_send_pkt: loopback vif %ld", 30603448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs)); 30610Sstevel@tonic-gate } 30620Sstevel@tonic-gate mp_loop = copymsg(mp); 30630Sstevel@tonic-gate ire = ire_ctable_lookup(~0, 0, IRE_BROADCAST, NULL, 30643448Sdh155122 ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); 30650Sstevel@tonic-gate 30660Sstevel@tonic-gate if (mp_loop != NULL && ire != NULL) { 30670Sstevel@tonic-gate IP_RPUT_LOCAL(ipif->ipif_rq, mp_loop, 30680Sstevel@tonic-gate ((ipha_t *)mp_loop->b_rptr), 30690Sstevel@tonic-gate ire, (ill_t *)ipif->ipif_rq->q_ptr); 30700Sstevel@tonic-gate } else { 30710Sstevel@tonic-gate /* Either copymsg failed or no ire */ 30725240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, 30733448Sdh155122 SL_TRACE, 30740Sstevel@tonic-gate "tbf_send_pkt: mp_loop 0x%p, ire 0x%p " 30757240Srh87107 "vif %ld\n", (void *)mp_loop, (void *)ire, 30763448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs)); 30770Sstevel@tonic-gate } 30780Sstevel@tonic-gate if (ire != NULL) 30790Sstevel@tonic-gate ire_refrele(ire); 30800Sstevel@tonic-gate } 30813448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 30825240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 30830Sstevel@tonic-gate "tbf_send_pkt: phyint forward vif %ld dst = 0x%x", 30843448Sdh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst)); 30850Sstevel@tonic-gate } 30860Sstevel@tonic-gate ip_rput_forward_multicast(dst, mp, ipif); 30870Sstevel@tonic-gate } 30880Sstevel@tonic-gate } 30890Sstevel@tonic-gate 30900Sstevel@tonic-gate /* 30910Sstevel@tonic-gate * Determine the current time and then the elapsed time (between the last time 30920Sstevel@tonic-gate * and time now). Update the no. of tokens in the bucket. 30930Sstevel@tonic-gate */ 30940Sstevel@tonic-gate static void 30950Sstevel@tonic-gate tbf_update_tokens(struct vif *vifp) 30960Sstevel@tonic-gate { 30970Sstevel@tonic-gate timespec_t tp; 30980Sstevel@tonic-gate hrtime_t tm; 30990Sstevel@tonic-gate struct tbf *t = vifp->v_tbf; 31003448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 31015240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 31020Sstevel@tonic-gate 31030Sstevel@tonic-gate ASSERT(MUTEX_HELD(&t->tbf_lock)); 31040Sstevel@tonic-gate 31050Sstevel@tonic-gate /* Time in secs and nsecs, rate limit in kbits/sec */ 31060Sstevel@tonic-gate gethrestime(&tp); 31070Sstevel@tonic-gate 31080Sstevel@tonic-gate /*LINTED*/ 31090Sstevel@tonic-gate TV_DELTA(tp, t->tbf_last_pkt_t, tm); 31100Sstevel@tonic-gate 31110Sstevel@tonic-gate /* 31120Sstevel@tonic-gate * This formula is actually 31130Sstevel@tonic-gate * "time in seconds" * "bytes/second". Scaled for nsec. 31140Sstevel@tonic-gate * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8) 31150Sstevel@tonic-gate * 31160Sstevel@tonic-gate * The (1000/1024) was introduced in add_vif to optimize 31170Sstevel@tonic-gate * this divide into a shift. 31180Sstevel@tonic-gate */ 31190Sstevel@tonic-gate t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8; 31200Sstevel@tonic-gate t->tbf_last_pkt_t = tp; 31210Sstevel@tonic-gate 31220Sstevel@tonic-gate if (t->tbf_n_tok > MAX_BKT_SIZE) 31230Sstevel@tonic-gate t->tbf_n_tok = MAX_BKT_SIZE; 31243448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 31255240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 31260Sstevel@tonic-gate "tbf_update_tok: tm %lld tok %d vif %ld", 31273448Sdh155122 tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs)); 31280Sstevel@tonic-gate } 31290Sstevel@tonic-gate } 31300Sstevel@tonic-gate 31310Sstevel@tonic-gate /* 31320Sstevel@tonic-gate * Priority currently is based on port nos. 31330Sstevel@tonic-gate * Different forwarding mechanisms have different ways 31340Sstevel@tonic-gate * of obtaining the port no. Hence, the vif must be 31350Sstevel@tonic-gate * given along with the packet itself. 31360Sstevel@tonic-gate * 31370Sstevel@tonic-gate */ 31380Sstevel@tonic-gate static int 31390Sstevel@tonic-gate priority(struct vif *vifp, ipha_t *ipha) 31400Sstevel@tonic-gate { 31410Sstevel@tonic-gate int prio; 31423448Sdh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 31435240Snordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 31440Sstevel@tonic-gate 31450Sstevel@tonic-gate /* Temporary hack; may add general packet classifier some day */ 31460Sstevel@tonic-gate 31470Sstevel@tonic-gate ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock)); 31480Sstevel@tonic-gate 31490Sstevel@tonic-gate /* 31500Sstevel@tonic-gate * The UDP port space is divided up into four priority ranges: 31510Sstevel@tonic-gate * [0, 16384) : unclassified - lowest priority 31520Sstevel@tonic-gate * [16384, 32768) : audio - highest priority 31530Sstevel@tonic-gate * [32768, 49152) : whiteboard - medium priority 31540Sstevel@tonic-gate * [49152, 65536) : video - low priority 31550Sstevel@tonic-gate */ 31560Sstevel@tonic-gate 31570Sstevel@tonic-gate if (ipha->ipha_protocol == IPPROTO_UDP) { 31580Sstevel@tonic-gate struct udphdr *udp = 31590Sstevel@tonic-gate (struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha)); 31600Sstevel@tonic-gate switch (ntohs(udp->uh_dport) & 0xc000) { 31610Sstevel@tonic-gate case 0x4000: 31620Sstevel@tonic-gate prio = 70; 31630Sstevel@tonic-gate break; 31640Sstevel@tonic-gate case 0x8000: 31650Sstevel@tonic-gate prio = 60; 31660Sstevel@tonic-gate break; 31670Sstevel@tonic-gate case 0xc000: 31680Sstevel@tonic-gate prio = 55; 31690Sstevel@tonic-gate break; 31700Sstevel@tonic-gate default: 31710Sstevel@tonic-gate prio = 50; 31720Sstevel@tonic-gate break; 31730Sstevel@tonic-gate } 31743448Sdh155122 if (ipst->ips_ip_mrtdebug > 1) { 31755240Snordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 31760Sstevel@tonic-gate "priority: port %x prio %d\n", 31770Sstevel@tonic-gate ntohs(udp->uh_dport), prio); 31780Sstevel@tonic-gate } 31790Sstevel@tonic-gate } else 31800Sstevel@tonic-gate prio = 50; /* default priority */ 31810Sstevel@tonic-gate return (prio); 31820Sstevel@tonic-gate } 31830Sstevel@tonic-gate 31840Sstevel@tonic-gate /* 31850Sstevel@tonic-gate * End of token bucket filter modifications 31860Sstevel@tonic-gate */ 31870Sstevel@tonic-gate 31880Sstevel@tonic-gate 31890Sstevel@tonic-gate 31900Sstevel@tonic-gate /* 31910Sstevel@tonic-gate * Produces data for netstat -M. 31920Sstevel@tonic-gate */ 31930Sstevel@tonic-gate int 31943448Sdh155122 ip_mroute_stats(mblk_t *mp, ip_stack_t *ipst) 31950Sstevel@tonic-gate { 31963448Sdh155122 ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl); 31973448Sdh155122 ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl); 31983448Sdh155122 if (!snmp_append_data(mp, (char *)ipst->ips_mrtstat, 31993448Sdh155122 sizeof (struct mrtstat))) { 32000Sstevel@tonic-gate ip0dbg(("ip_mroute_stats: failed %ld bytes\n", 32013448Sdh155122 (size_t)sizeof (struct mrtstat))); 32020Sstevel@tonic-gate return (0); 32030Sstevel@tonic-gate } 32040Sstevel@tonic-gate return (1); 32050Sstevel@tonic-gate } 32060Sstevel@tonic-gate 32070Sstevel@tonic-gate /* 32080Sstevel@tonic-gate * Sends info for SNMP's MIB. 32090Sstevel@tonic-gate */ 32100Sstevel@tonic-gate int 32113448Sdh155122 ip_mroute_vif(mblk_t *mp, ip_stack_t *ipst) 32120Sstevel@tonic-gate { 32130Sstevel@tonic-gate struct vifctl vi; 32140Sstevel@tonic-gate vifi_t vifi; 32150Sstevel@tonic-gate 32163448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 32173448Sdh155122 for (vifi = 0; vifi < ipst->ips_numvifs; vifi++) { 32183448Sdh155122 if (ipst->ips_vifs[vifi].v_lcl_addr.s_addr == 0) 32190Sstevel@tonic-gate continue; 32200Sstevel@tonic-gate /* 32210Sstevel@tonic-gate * No locks here, an approximation is fine. 32220Sstevel@tonic-gate */ 32230Sstevel@tonic-gate vi.vifc_vifi = vifi; 32243448Sdh155122 vi.vifc_flags = ipst->ips_vifs[vifi].v_flags; 32253448Sdh155122 vi.vifc_threshold = ipst->ips_vifs[vifi].v_threshold; 32263448Sdh155122 vi.vifc_rate_limit = ipst->ips_vifs[vifi].v_rate_limit; 32273448Sdh155122 vi.vifc_lcl_addr = ipst->ips_vifs[vifi].v_lcl_addr; 32283448Sdh155122 vi.vifc_rmt_addr = ipst->ips_vifs[vifi].v_rmt_addr; 32293448Sdh155122 vi.vifc_pkt_in = ipst->ips_vifs[vifi].v_pkt_in; 32303448Sdh155122 vi.vifc_pkt_out = ipst->ips_vifs[vifi].v_pkt_out; 32310Sstevel@tonic-gate 32320Sstevel@tonic-gate if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) { 32330Sstevel@tonic-gate ip0dbg(("ip_mroute_vif: failed %ld bytes\n", 32340Sstevel@tonic-gate (size_t)sizeof (vi))); 32350Sstevel@tonic-gate return (0); 32360Sstevel@tonic-gate } 32370Sstevel@tonic-gate } 32383448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 32390Sstevel@tonic-gate return (1); 32400Sstevel@tonic-gate } 32410Sstevel@tonic-gate 32420Sstevel@tonic-gate /* 32430Sstevel@tonic-gate * Called by ip_snmp_get to send up multicast routing table. 32440Sstevel@tonic-gate */ 32450Sstevel@tonic-gate int 32463448Sdh155122 ip_mroute_mrt(mblk_t *mp, ip_stack_t *ipst) 32470Sstevel@tonic-gate { 32480Sstevel@tonic-gate int i, j; 32490Sstevel@tonic-gate struct mfc *rt; 32500Sstevel@tonic-gate struct mfcctl mfcc; 32510Sstevel@tonic-gate 32520Sstevel@tonic-gate /* 32530Sstevel@tonic-gate * Make sure multicast has not been turned off. 32540Sstevel@tonic-gate */ 32553448Sdh155122 if (is_mrouter_off(ipst)) 32560Sstevel@tonic-gate return (1); 32570Sstevel@tonic-gate 32580Sstevel@tonic-gate /* Loop over all hash buckets and their chains */ 32590Sstevel@tonic-gate for (i = 0; i < MFCTBLSIZ; i++) { 32603448Sdh155122 MFCB_REFHOLD(&ipst->ips_mfcs[i]); 32613448Sdh155122 for (rt = ipst->ips_mfcs[i].mfcb_mfc; rt; rt = rt->mfc_next) { 32620Sstevel@tonic-gate mutex_enter(&rt->mfc_mutex); 32630Sstevel@tonic-gate if (rt->mfc_rte != NULL || 32640Sstevel@tonic-gate (rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 32650Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 32660Sstevel@tonic-gate continue; 32670Sstevel@tonic-gate } 32680Sstevel@tonic-gate mfcc.mfcc_origin = rt->mfc_origin; 32690Sstevel@tonic-gate mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp; 32700Sstevel@tonic-gate mfcc.mfcc_parent = rt->mfc_parent; 32710Sstevel@tonic-gate mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt; 32723448Sdh155122 mutex_enter(&ipst->ips_numvifs_mutex); 32733448Sdh155122 for (j = 0; j < (int)ipst->ips_numvifs; j++) 32740Sstevel@tonic-gate mfcc.mfcc_ttls[j] = rt->mfc_ttls[j]; 32753448Sdh155122 for (j = (int)ipst->ips_numvifs; j < MAXVIFS; j++) 32760Sstevel@tonic-gate mfcc.mfcc_ttls[j] = 0; 32773448Sdh155122 mutex_exit(&ipst->ips_numvifs_mutex); 32780Sstevel@tonic-gate 32790Sstevel@tonic-gate mutex_exit(&rt->mfc_mutex); 32800Sstevel@tonic-gate if (!snmp_append_data(mp, (char *)&mfcc, 32810Sstevel@tonic-gate sizeof (mfcc))) { 32823448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 32830Sstevel@tonic-gate ip0dbg(("ip_mroute_mrt: failed %ld bytes\n", 32840Sstevel@tonic-gate (size_t)sizeof (mfcc))); 32850Sstevel@tonic-gate return (0); 32860Sstevel@tonic-gate } 32870Sstevel@tonic-gate } 32883448Sdh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 32890Sstevel@tonic-gate } 32900Sstevel@tonic-gate return (1); 32910Sstevel@tonic-gate } 3292