xref: /onnv-gate/usr/src/uts/common/io/mac/mac_flow.c (revision 11528:31f6cde5edcb)
18275SEric Cheng /*
28275SEric Cheng  * CDDL HEADER START
38275SEric Cheng  *
48275SEric Cheng  * The contents of this file are subject to the terms of the
58275SEric Cheng  * Common Development and Distribution License (the "License").
68275SEric Cheng  * You may not use this file except in compliance with the License.
78275SEric Cheng  *
88275SEric Cheng  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
98275SEric Cheng  * or http://www.opensolaris.org/os/licensing.
108275SEric Cheng  * See the License for the specific language governing permissions
118275SEric Cheng  * and limitations under the License.
128275SEric Cheng  *
138275SEric Cheng  * When distributing Covered Code, include this CDDL HEADER in each
148275SEric Cheng  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
158275SEric Cheng  * If applicable, add the following below this CDDL HEADER, with the
168275SEric Cheng  * fields enclosed by brackets "[]" replaced with your own identifying
178275SEric Cheng  * information: Portions Copyright [yyyy] [name of copyright owner]
188275SEric Cheng  *
198275SEric Cheng  * CDDL HEADER END
208275SEric Cheng  */
218275SEric Cheng 
228275SEric Cheng /*
23*11528SBaban.Kenkre@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
248275SEric Cheng  * Use is subject to license terms.
258275SEric Cheng  */
268275SEric Cheng 
278275SEric Cheng #include <sys/strsun.h>
288275SEric Cheng #include <sys/sdt.h>
298275SEric Cheng #include <sys/mac.h>
308275SEric Cheng #include <sys/mac_impl.h>
318275SEric Cheng #include <sys/mac_client_impl.h>
328275SEric Cheng #include <sys/dls.h>
338275SEric Cheng #include <sys/dls_impl.h>
348275SEric Cheng #include <sys/mac_soft_ring.h>
358275SEric Cheng #include <sys/ethernet.h>
368275SEric Cheng #include <sys/vlan.h>
378275SEric Cheng #include <inet/ip.h>
388275SEric Cheng #include <inet/ip6.h>
398275SEric Cheng #include <netinet/tcp.h>
408275SEric Cheng #include <netinet/udp.h>
418275SEric Cheng #include <netinet/sctp.h>
428275SEric Cheng 
438275SEric Cheng /* global flow table, will be a per exclusive-zone table later */
448275SEric Cheng static mod_hash_t	*flow_hash;
458275SEric Cheng static krwlock_t	flow_tab_lock;
468275SEric Cheng 
478275SEric Cheng static kmem_cache_t	*flow_cache;
488275SEric Cheng static kmem_cache_t	*flow_tab_cache;
498275SEric Cheng static flow_ops_t	flow_l2_ops;
508275SEric Cheng 
518275SEric Cheng typedef struct {
528275SEric Cheng 	const char	*fs_name;
538275SEric Cheng 	uint_t		fs_offset;
548275SEric Cheng } flow_stats_info_t;
558275SEric Cheng 
568275SEric Cheng #define	FS_OFF(f)	(offsetof(flow_stats_t, f))
578275SEric Cheng static flow_stats_info_t flow_stats_list[] = {
588275SEric Cheng 	{"rbytes",	FS_OFF(fs_rbytes)},
598275SEric Cheng 	{"ipackets",	FS_OFF(fs_ipackets)},
608275SEric Cheng 	{"ierrors",	FS_OFF(fs_ierrors)},
618275SEric Cheng 	{"obytes",	FS_OFF(fs_obytes)},
628275SEric Cheng 	{"opackets",	FS_OFF(fs_opackets)},
638275SEric Cheng 	{"oerrors",	FS_OFF(fs_oerrors)}
648275SEric Cheng };
658275SEric Cheng #define	FS_SIZE		(sizeof (flow_stats_list) / sizeof (flow_stats_info_t))
668275SEric Cheng 
678275SEric Cheng /*
688275SEric Cheng  * Checks whether a flow mask is legal.
698275SEric Cheng  */
708275SEric Cheng static flow_tab_info_t	*mac_flow_tab_info_get(flow_mask_t);
718275SEric Cheng 
728275SEric Cheng static void
738275SEric Cheng flow_stat_init(kstat_named_t *knp)
748275SEric Cheng {
758275SEric Cheng 	int	i;
768275SEric Cheng 
778275SEric Cheng 	for (i = 0; i < FS_SIZE; i++, knp++) {
788275SEric Cheng 		kstat_named_init(knp, flow_stats_list[i].fs_name,
798275SEric Cheng 		    KSTAT_DATA_UINT64);
808275SEric Cheng 	}
818275SEric Cheng }
828275SEric Cheng 
838275SEric Cheng static int
848275SEric Cheng flow_stat_update(kstat_t *ksp, int rw)
858275SEric Cheng {
8610616SSebastien.Roy@Sun.COM 	flow_entry_t	*fep = ksp->ks_private;
8710616SSebastien.Roy@Sun.COM 	flow_stats_t 	*fsp = &fep->fe_flowstats;
8810616SSebastien.Roy@Sun.COM 	kstat_named_t	*knp = ksp->ks_data;
8910616SSebastien.Roy@Sun.COM 	uint64_t	*statp;
9010616SSebastien.Roy@Sun.COM 	int		i;
918275SEric Cheng 
928275SEric Cheng 	if (rw != KSTAT_READ)
938275SEric Cheng 		return (EACCES);
948275SEric Cheng 
958275SEric Cheng 	for (i = 0; i < FS_SIZE; i++, knp++) {
968275SEric Cheng 		statp = (uint64_t *)
978275SEric Cheng 		    ((uchar_t *)fsp + flow_stats_list[i].fs_offset);
988275SEric Cheng 
998275SEric Cheng 		knp->value.ui64 = *statp;
1008275SEric Cheng 	}
1018275SEric Cheng 	return (0);
1028275SEric Cheng }
1038275SEric Cheng 
1048275SEric Cheng static void
1058275SEric Cheng flow_stat_create(flow_entry_t *fep)
1068275SEric Cheng {
1078275SEric Cheng 	kstat_t		*ksp;
1088275SEric Cheng 	kstat_named_t	*knp;
1098275SEric Cheng 	uint_t		nstats = FS_SIZE;
1108275SEric Cheng 
11110616SSebastien.Roy@Sun.COM 	/*
11210616SSebastien.Roy@Sun.COM 	 * Fow now, flow entries are only manipulated and visible from the
11310616SSebastien.Roy@Sun.COM 	 * global zone.
11410616SSebastien.Roy@Sun.COM 	 */
11510616SSebastien.Roy@Sun.COM 	ksp = kstat_create_zone("unix", 0, (char *)fep->fe_flow_name, "flow",
11610616SSebastien.Roy@Sun.COM 	    KSTAT_TYPE_NAMED, nstats, 0, GLOBAL_ZONEID);
1178275SEric Cheng 	if (ksp == NULL)
1188275SEric Cheng 		return;
1198275SEric Cheng 
1208275SEric Cheng 	ksp->ks_update = flow_stat_update;
1218275SEric Cheng 	ksp->ks_private = fep;
1228275SEric Cheng 	fep->fe_ksp = ksp;
1238275SEric Cheng 
1248275SEric Cheng 	knp = (kstat_named_t *)ksp->ks_data;
1258275SEric Cheng 	flow_stat_init(knp);
1268275SEric Cheng 	kstat_install(ksp);
1278275SEric Cheng }
1288275SEric Cheng 
1298275SEric Cheng void
1308275SEric Cheng flow_stat_destroy(flow_entry_t *fep)
1318275SEric Cheng {
1328275SEric Cheng 	if (fep->fe_ksp != NULL) {
1338275SEric Cheng 		kstat_delete(fep->fe_ksp);
1348275SEric Cheng 		fep->fe_ksp = NULL;
1358275SEric Cheng 	}
1368275SEric Cheng }
1378275SEric Cheng 
1388275SEric Cheng /*
1398275SEric Cheng  * Initialize the flow table
1408275SEric Cheng  */
1418275SEric Cheng void
1428275SEric Cheng mac_flow_init()
1438275SEric Cheng {
1448275SEric Cheng 	flow_cache = kmem_cache_create("flow_entry_cache",
1458275SEric Cheng 	    sizeof (flow_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
1468275SEric Cheng 	flow_tab_cache = kmem_cache_create("flow_tab_cache",
1478275SEric Cheng 	    sizeof (flow_tab_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
1488275SEric Cheng 	flow_hash = mod_hash_create_extended("flow_hash",
1498275SEric Cheng 	    100, mod_hash_null_keydtor, mod_hash_null_valdtor,
1508275SEric Cheng 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
1518275SEric Cheng 	rw_init(&flow_tab_lock, NULL, RW_DEFAULT, NULL);
1528275SEric Cheng }
1538275SEric Cheng 
1548275SEric Cheng /*
1558275SEric Cheng  * Cleanup and release the flow table
1568275SEric Cheng  */
1578275SEric Cheng void
1588275SEric Cheng mac_flow_fini()
1598275SEric Cheng {
1608275SEric Cheng 	kmem_cache_destroy(flow_cache);
1618275SEric Cheng 	kmem_cache_destroy(flow_tab_cache);
1628275SEric Cheng 	mod_hash_destroy_hash(flow_hash);
1638275SEric Cheng 	rw_destroy(&flow_tab_lock);
1648275SEric Cheng }
1658275SEric Cheng 
1668275SEric Cheng /*
1678275SEric Cheng  * mac_create_flow(): create a flow_entry_t.
1688275SEric Cheng  */
1698275SEric Cheng int
1708275SEric Cheng mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name,
1718275SEric Cheng     void *client_cookie, uint_t type, flow_entry_t **flentp)
1728275SEric Cheng {
1738275SEric Cheng 	flow_entry_t	*flent = *flentp;
1748275SEric Cheng 	int		err = 0;
1758275SEric Cheng 
1768275SEric Cheng 	if (mrp != NULL) {
1778275SEric Cheng 		err = mac_validate_props(mrp);
1788275SEric Cheng 		if (err != 0)
1798275SEric Cheng 			return (err);
1808275SEric Cheng 	}
1818275SEric Cheng 
1828275SEric Cheng 	if (flent == NULL) {
1838275SEric Cheng 		flent = kmem_cache_alloc(flow_cache, KM_SLEEP);
1848275SEric Cheng 		bzero(flent, sizeof (*flent));
1858275SEric Cheng 		mutex_init(&flent->fe_lock, NULL, MUTEX_DEFAULT, NULL);
1868275SEric Cheng 		cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL);
1878275SEric Cheng 
1888275SEric Cheng 		/* Initialize the receiver function to a safe routine */
1898275SEric Cheng 		flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop;
1908275SEric Cheng 		flent->fe_index = -1;
1918275SEric Cheng 	}
1928558SGirish.Moodalbail@Sun.COM 	(void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
1938275SEric Cheng 
1948275SEric Cheng 	/* This is an initial flow, will be configured later */
1958275SEric Cheng 	if (fd == NULL) {
1968275SEric Cheng 		*flentp = flent;
1978275SEric Cheng 		return (0);
1988275SEric Cheng 	}
1998275SEric Cheng 
2008275SEric Cheng 	flent->fe_client_cookie = client_cookie;
2018275SEric Cheng 	flent->fe_type = type;
2028275SEric Cheng 
2038275SEric Cheng 	/* Save flow desc */
2048275SEric Cheng 	bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
2058275SEric Cheng 
2068275SEric Cheng 	if (mrp != NULL) {
2078275SEric Cheng 		/*
2088275SEric Cheng 		 * We have already set fe_resource_props for a Link.
2098275SEric Cheng 		 */
2108275SEric Cheng 		if (type & FLOW_USER) {
2118275SEric Cheng 			bcopy(mrp, &flent->fe_resource_props,
2128275SEric Cheng 			    sizeof (mac_resource_props_t));
2138275SEric Cheng 		}
2148275SEric Cheng 		/*
2158275SEric Cheng 		 * The effective resource list should reflect the priority
2168275SEric Cheng 		 * that we set implicitly.
2178275SEric Cheng 		 */
2188275SEric Cheng 		if (!(mrp->mrp_mask & MRP_PRIORITY))
2198275SEric Cheng 			mrp->mrp_mask |= MRP_PRIORITY;
2208275SEric Cheng 		if (type & FLOW_USER)
2218275SEric Cheng 			mrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
2228275SEric Cheng 		else
2238275SEric Cheng 			mrp->mrp_priority = MPL_LINK_DEFAULT;
2248275SEric Cheng 		bcopy(mrp, &flent->fe_effective_props,
2258275SEric Cheng 		    sizeof (mac_resource_props_t));
2268275SEric Cheng 	}
2278275SEric Cheng 	flow_stat_create(flent);
2288275SEric Cheng 
2298275SEric Cheng 	*flentp = flent;
2308275SEric Cheng 	return (0);
2318275SEric Cheng }
2328275SEric Cheng 
2338275SEric Cheng /*
2348275SEric Cheng  * Validate flow entry and add it to a flow table.
2358275SEric Cheng  */
2368275SEric Cheng int
2378275SEric Cheng mac_flow_add(flow_tab_t *ft, flow_entry_t *flent)
2388275SEric Cheng {
2398275SEric Cheng 	flow_entry_t	**headp, **p;
2408275SEric Cheng 	flow_ops_t	*ops = &ft->ft_ops;
2418275SEric Cheng 	flow_mask_t	mask;
2428275SEric Cheng 	uint32_t	index;
2438275SEric Cheng 	int		err;
2448275SEric Cheng 
2458275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
2468275SEric Cheng 
2478275SEric Cheng 	/*
2488275SEric Cheng 	 * Check for invalid bits in mask.
2498275SEric Cheng 	 */
2508275SEric Cheng 	mask = flent->fe_flow_desc.fd_mask;
2518275SEric Cheng 	if ((mask & ft->ft_mask) == 0 || (mask & ~ft->ft_mask) != 0)
2528275SEric Cheng 		return (EOPNOTSUPP);
2538275SEric Cheng 
2548275SEric Cheng 	/*
2558275SEric Cheng 	 * Validate flent.
2568275SEric Cheng 	 */
2578275SEric Cheng 	if ((err = ops->fo_accept_fe(ft, flent)) != 0) {
2588275SEric Cheng 		DTRACE_PROBE3(accept_failed, flow_tab_t *, ft,
2598275SEric Cheng 		    flow_entry_t *, flent, int, err);
2608275SEric Cheng 		return (err);
2618275SEric Cheng 	}
2628275SEric Cheng 
2638275SEric Cheng 	/*
2648275SEric Cheng 	 * Flent is valid. now calculate hash and insert it
2658275SEric Cheng 	 * into hash table.
2668275SEric Cheng 	 */
2678275SEric Cheng 	index = ops->fo_hash_fe(ft, flent);
2688275SEric Cheng 
2698275SEric Cheng 	/*
2708275SEric Cheng 	 * We do not need a lock up until now because we were
2718275SEric Cheng 	 * not accessing the flow table.
2728275SEric Cheng 	 */
2738275SEric Cheng 	rw_enter(&ft->ft_lock, RW_WRITER);
2748275SEric Cheng 	headp = &ft->ft_table[index];
2758275SEric Cheng 
2768275SEric Cheng 	/*
2778275SEric Cheng 	 * Check for duplicate flow.
2788275SEric Cheng 	 */
2798275SEric Cheng 	for (p = headp; *p != NULL; p = &(*p)->fe_next) {
2808275SEric Cheng 		if ((*p)->fe_flow_desc.fd_mask !=
2818275SEric Cheng 		    flent->fe_flow_desc.fd_mask)
2828275SEric Cheng 			continue;
2838275SEric Cheng 
2848275SEric Cheng 		if (ft->ft_ops.fo_match_fe(ft, *p, flent)) {
2858275SEric Cheng 			rw_exit(&ft->ft_lock);
2868275SEric Cheng 			DTRACE_PROBE3(dup_flow, flow_tab_t *, ft,
2878275SEric Cheng 			    flow_entry_t *, flent, int, err);
2888275SEric Cheng 			return (EALREADY);
2898275SEric Cheng 		}
2908275SEric Cheng 	}
2918275SEric Cheng 
2928275SEric Cheng 	/*
2938275SEric Cheng 	 * Insert flow to hash list.
2948275SEric Cheng 	 */
2958275SEric Cheng 	err = ops->fo_insert_fe(ft, headp, flent);
2968275SEric Cheng 	if (err != 0) {
2978275SEric Cheng 		rw_exit(&ft->ft_lock);
2988275SEric Cheng 		DTRACE_PROBE3(insert_failed, flow_tab_t *, ft,
2998275SEric Cheng 		    flow_entry_t *, flent, int, err);
3008275SEric Cheng 		return (err);
3018275SEric Cheng 	}
3028275SEric Cheng 
3038275SEric Cheng 	/*
3048275SEric Cheng 	 * Save the hash index so it can be used by mac_flow_remove().
3058275SEric Cheng 	 */
3068275SEric Cheng 	flent->fe_index = (int)index;
3078275SEric Cheng 
3088275SEric Cheng 	/*
3098275SEric Cheng 	 * Save the flow tab back reference.
3108275SEric Cheng 	 */
3118275SEric Cheng 	flent->fe_flow_tab = ft;
3128275SEric Cheng 	FLOW_MARK(flent, FE_FLOW_TAB);
3138275SEric Cheng 	ft->ft_flow_count++;
3148275SEric Cheng 	rw_exit(&ft->ft_lock);
3158275SEric Cheng 	return (0);
3168275SEric Cheng }
3178275SEric Cheng 
3188275SEric Cheng /*
3198275SEric Cheng  * Remove a flow from a mac client's subflow table
3208275SEric Cheng  */
3218275SEric Cheng void
3228275SEric Cheng mac_flow_rem_subflow(flow_entry_t *flent)
3238275SEric Cheng {
3248275SEric Cheng 	flow_tab_t		*ft = flent->fe_flow_tab;
3258275SEric Cheng 	mac_client_impl_t	*mcip = ft->ft_mcip;
3269073SCathy.Zhou@Sun.COM 	mac_handle_t		mh = (mac_handle_t)ft->ft_mip;
3278275SEric Cheng 
3289073SCathy.Zhou@Sun.COM 	ASSERT(MAC_PERIM_HELD(mh));
3298275SEric Cheng 
3308275SEric Cheng 	mac_flow_remove(ft, flent, B_FALSE);
3318275SEric Cheng 	if (flent->fe_mcip == NULL) {
3328275SEric Cheng 		/*
3338275SEric Cheng 		 * The interface is not yet plumbed and mac_client_flow_add
3348275SEric Cheng 		 * was not done.
3358275SEric Cheng 		 */
3368275SEric Cheng 		if (FLOW_TAB_EMPTY(ft)) {
3378275SEric Cheng 			mac_flow_tab_destroy(ft);
3388275SEric Cheng 			mcip->mci_subflow_tab = NULL;
3398275SEric Cheng 		}
3409073SCathy.Zhou@Sun.COM 	} else {
3419073SCathy.Zhou@Sun.COM 		mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3429073SCathy.Zhou@Sun.COM 		mac_link_flow_clean((mac_client_handle_t)mcip, flent);
3438275SEric Cheng 	}
3449073SCathy.Zhou@Sun.COM 	mac_fastpath_enable(mh);
3458275SEric Cheng }
3468275SEric Cheng 
3478275SEric Cheng /*
3488275SEric Cheng  * Add a flow to a mac client's subflow table and instantiate the flow
3498275SEric Cheng  * in the mac by creating the associated SRSs etc.
3508275SEric Cheng  */
3518275SEric Cheng int
3528275SEric Cheng mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent,
3538275SEric Cheng     boolean_t instantiate_flow)
3548275SEric Cheng {
3558275SEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
3569073SCathy.Zhou@Sun.COM 	mac_handle_t		mh = (mac_handle_t)mcip->mci_mip;
3578275SEric Cheng 	flow_tab_info_t		*ftinfo;
3588275SEric Cheng 	flow_mask_t		mask;
3598275SEric Cheng 	flow_tab_t		*ft;
3608275SEric Cheng 	int			err;
3618275SEric Cheng 	boolean_t		ft_created = B_FALSE;
3628275SEric Cheng 
3639073SCathy.Zhou@Sun.COM 	ASSERT(MAC_PERIM_HELD(mh));
3649073SCathy.Zhou@Sun.COM 
3659073SCathy.Zhou@Sun.COM 	if ((err = mac_fastpath_disable(mh)) != 0)
3669073SCathy.Zhou@Sun.COM 		return (err);
3678275SEric Cheng 
3688275SEric Cheng 	/*
3698275SEric Cheng 	 * If the subflow table exists already just add the new subflow
3708275SEric Cheng 	 * to the existing table, else we create a new subflow table below.
3718275SEric Cheng 	 */
3728275SEric Cheng 	ft = mcip->mci_subflow_tab;
3738275SEric Cheng 	if (ft == NULL) {
3748275SEric Cheng 		mask = flent->fe_flow_desc.fd_mask;
3758275SEric Cheng 		/*
3768275SEric Cheng 		 * Try to create a new table and then add the subflow to the
3778275SEric Cheng 		 * newly created subflow table
3788275SEric Cheng 		 */
3799073SCathy.Zhou@Sun.COM 		if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) {
3809073SCathy.Zhou@Sun.COM 			mac_fastpath_enable(mh);
3818275SEric Cheng 			return (EOPNOTSUPP);
3829073SCathy.Zhou@Sun.COM 		}
3838275SEric Cheng 
3848275SEric Cheng 		mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size,
3858275SEric Cheng 		    mcip->mci_mip, &ft);
3868275SEric Cheng 		ft_created = B_TRUE;
3878275SEric Cheng 	}
3888275SEric Cheng 
3898275SEric Cheng 	err = mac_flow_add(ft, flent);
3908275SEric Cheng 	if (err != 0) {
3918275SEric Cheng 		if (ft_created)
3928275SEric Cheng 			mac_flow_tab_destroy(ft);
3939073SCathy.Zhou@Sun.COM 		mac_fastpath_enable(mh);
3948275SEric Cheng 		return (err);
3958275SEric Cheng 	}
3968275SEric Cheng 
3978275SEric Cheng 	if (instantiate_flow) {
3988275SEric Cheng 		/* Now activate the flow by creating its SRSs */
3998275SEric Cheng 		ASSERT(MCIP_DATAPATH_SETUP(mcip));
4008275SEric Cheng 		err = mac_link_flow_init((mac_client_handle_t)mcip, flent);
4018275SEric Cheng 		if (err != 0) {
4028275SEric Cheng 			mac_flow_remove(ft, flent, B_FALSE);
4038275SEric Cheng 			if (ft_created)
4048275SEric Cheng 				mac_flow_tab_destroy(ft);
4059073SCathy.Zhou@Sun.COM 			mac_fastpath_enable(mh);
4068275SEric Cheng 			return (err);
4078275SEric Cheng 		}
4088275SEric Cheng 	} else {
4098275SEric Cheng 		FLOW_MARK(flent, FE_UF_NO_DATAPATH);
4108275SEric Cheng 	}
4118275SEric Cheng 	if (ft_created) {
4128275SEric Cheng 		ASSERT(mcip->mci_subflow_tab == NULL);
4138275SEric Cheng 		ft->ft_mcip = mcip;
4148275SEric Cheng 		mcip->mci_subflow_tab = ft;
4158275SEric Cheng 		if (instantiate_flow)
4168275SEric Cheng 			mac_client_update_classifier(mcip, B_TRUE);
4178275SEric Cheng 	}
4188275SEric Cheng 	return (0);
4198275SEric Cheng }
4208275SEric Cheng 
4218275SEric Cheng /*
4228275SEric Cheng  * Remove flow entry from flow table.
4238275SEric Cheng  */
4248275SEric Cheng void
4258275SEric Cheng mac_flow_remove(flow_tab_t *ft, flow_entry_t *flent, boolean_t temp)
4268275SEric Cheng {
4278275SEric Cheng 	flow_entry_t	**fp;
4288275SEric Cheng 
4298275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
4308275SEric Cheng 	if (!(flent->fe_flags & FE_FLOW_TAB))
4318275SEric Cheng 		return;
4328275SEric Cheng 
4338275SEric Cheng 	rw_enter(&ft->ft_lock, RW_WRITER);
4348275SEric Cheng 	/*
4358275SEric Cheng 	 * If this is a permanent removal from the flow table, mark it
4368275SEric Cheng 	 * CONDEMNED to prevent future references. If this is a temporary
4378275SEric Cheng 	 * removal from the table, say to update the flow descriptor then
4388275SEric Cheng 	 * we don't mark it CONDEMNED
4398275SEric Cheng 	 */
4408275SEric Cheng 	if (!temp)
4418275SEric Cheng 		FLOW_MARK(flent, FE_CONDEMNED);
4428275SEric Cheng 	/*
4438275SEric Cheng 	 * Locate the specified flent.
4448275SEric Cheng 	 */
4458275SEric Cheng 	fp = &ft->ft_table[flent->fe_index];
4468275SEric Cheng 	while (*fp != flent)
4478275SEric Cheng 		fp = &(*fp)->fe_next;
4488275SEric Cheng 
4498275SEric Cheng 	/*
4508275SEric Cheng 	 * The flent must exist. Otherwise it's a bug.
4518275SEric Cheng 	 */
4528275SEric Cheng 	ASSERT(fp != NULL);
4538275SEric Cheng 	*fp = flent->fe_next;
4548275SEric Cheng 	flent->fe_next = NULL;
4558275SEric Cheng 
4568275SEric Cheng 	/*
4578275SEric Cheng 	 * Reset fe_index to -1 so any attempt to call mac_flow_remove()
4588275SEric Cheng 	 * on a flent that is supposed to be in the table (FE_FLOW_TAB)
4598275SEric Cheng 	 * will panic.
4608275SEric Cheng 	 */
4618275SEric Cheng 	flent->fe_index = -1;
4628275SEric Cheng 	FLOW_UNMARK(flent, FE_FLOW_TAB);
4638275SEric Cheng 	ft->ft_flow_count--;
4648275SEric Cheng 	rw_exit(&ft->ft_lock);
4658275SEric Cheng }
4668275SEric Cheng 
4678275SEric Cheng /*
4688275SEric Cheng  * This is the flow lookup routine used by the mac sw classifier engine.
4698275SEric Cheng  */
4708275SEric Cheng int
4718275SEric Cheng mac_flow_lookup(flow_tab_t *ft, mblk_t *mp, uint_t flags, flow_entry_t **flentp)
4728275SEric Cheng {
4738275SEric Cheng 	flow_state_t	s;
4748275SEric Cheng 	flow_entry_t	*flent;
4758275SEric Cheng 	flow_ops_t	*ops = &ft->ft_ops;
4768275SEric Cheng 	boolean_t	retried = B_FALSE;
4778275SEric Cheng 	int		i, err;
4788275SEric Cheng 
4798275SEric Cheng 	s.fs_flags = flags;
4808833SVenu.Iyer@Sun.COM retry:
4818275SEric Cheng 	s.fs_mp = mp;
4828275SEric Cheng 
4838275SEric Cheng 	/*
4848275SEric Cheng 	 * Walk the list of predeclared accept functions.
4858275SEric Cheng 	 * Each of these would accumulate enough state to allow the next
4868275SEric Cheng 	 * accept routine to make progress.
4878275SEric Cheng 	 */
4888275SEric Cheng 	for (i = 0; i < FLOW_MAX_ACCEPT && ops->fo_accept[i] != NULL; i++) {
4898275SEric Cheng 		if ((err = (ops->fo_accept[i])(ft, &s)) != 0) {
4908833SVenu.Iyer@Sun.COM 			mblk_t	*last;
4918833SVenu.Iyer@Sun.COM 
4928275SEric Cheng 			/*
4938275SEric Cheng 			 * ENOBUFS indicates that the mp could be too short
4948275SEric Cheng 			 * and may need a pullup.
4958275SEric Cheng 			 */
4968275SEric Cheng 			if (err != ENOBUFS || retried)
4978275SEric Cheng 				return (err);
4988275SEric Cheng 
4998275SEric Cheng 			/*
5008833SVenu.Iyer@Sun.COM 			 * The pullup is done on the last processed mblk, not
5018833SVenu.Iyer@Sun.COM 			 * the starting one. pullup is not done if the mblk
5028833SVenu.Iyer@Sun.COM 			 * has references or if b_cont is NULL.
5038275SEric Cheng 			 */
5048833SVenu.Iyer@Sun.COM 			last = s.fs_mp;
5058833SVenu.Iyer@Sun.COM 			if (DB_REF(last) > 1 || last->b_cont == NULL ||
5068833SVenu.Iyer@Sun.COM 			    pullupmsg(last, -1) == 0)
5078275SEric Cheng 				return (EINVAL);
5088275SEric Cheng 
5098275SEric Cheng 			retried = B_TRUE;
5108275SEric Cheng 			DTRACE_PROBE2(need_pullup, flow_tab_t *, ft,
5118275SEric Cheng 			    flow_state_t *, &s);
5128275SEric Cheng 			goto retry;
5138275SEric Cheng 		}
5148275SEric Cheng 	}
5158275SEric Cheng 
5168275SEric Cheng 	/*
5178275SEric Cheng 	 * The packet is considered sane. We may now attempt to
5188275SEric Cheng 	 * find the corresponding flent.
5198275SEric Cheng 	 */
5208275SEric Cheng 	rw_enter(&ft->ft_lock, RW_READER);
5218275SEric Cheng 	flent = ft->ft_table[ops->fo_hash(ft, &s)];
5228275SEric Cheng 	for (; flent != NULL; flent = flent->fe_next) {
5238275SEric Cheng 		if (flent->fe_match(ft, flent, &s)) {
5248275SEric Cheng 			FLOW_TRY_REFHOLD(flent, err);
5258275SEric Cheng 			if (err != 0)
5268275SEric Cheng 				continue;
5278275SEric Cheng 			*flentp = flent;
5288275SEric Cheng 			rw_exit(&ft->ft_lock);
5298275SEric Cheng 			return (0);
5308275SEric Cheng 		}
5318275SEric Cheng 	}
5328275SEric Cheng 	rw_exit(&ft->ft_lock);
5338275SEric Cheng 	return (ENOENT);
5348275SEric Cheng }
5358275SEric Cheng 
5368275SEric Cheng /*
5378275SEric Cheng  * Walk flow table.
5388275SEric Cheng  * The caller is assumed to have proper perimeter protection.
5398275SEric Cheng  */
5408275SEric Cheng int
5418275SEric Cheng mac_flow_walk_nolock(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
5428275SEric Cheng     void *arg)
5438275SEric Cheng {
5448275SEric Cheng 	int		err, i, cnt = 0;
5458275SEric Cheng 	flow_entry_t	*flent;
5468275SEric Cheng 
5478275SEric Cheng 	if (ft == NULL)
5488275SEric Cheng 		return (0);
5498275SEric Cheng 
5508275SEric Cheng 	for (i = 0; i < ft->ft_size; i++) {
5518275SEric Cheng 		for (flent = ft->ft_table[i]; flent != NULL;
5528275SEric Cheng 		    flent = flent->fe_next) {
5538275SEric Cheng 			cnt++;
5548275SEric Cheng 			err = (*fn)(flent, arg);
5558275SEric Cheng 			if (err != 0)
5568275SEric Cheng 				return (err);
5578275SEric Cheng 		}
5588275SEric Cheng 	}
5598275SEric Cheng 	VERIFY(cnt == ft->ft_flow_count);
5608275SEric Cheng 	return (0);
5618275SEric Cheng }
5628275SEric Cheng 
5638275SEric Cheng /*
5648275SEric Cheng  * Same as the above except a mutex is used for protection here.
5658275SEric Cheng  */
5668275SEric Cheng int
5678275SEric Cheng mac_flow_walk(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
5688275SEric Cheng     void *arg)
5698275SEric Cheng {
5708275SEric Cheng 	int		err;
5718275SEric Cheng 
5728275SEric Cheng 	if (ft == NULL)
5738275SEric Cheng 		return (0);
5748275SEric Cheng 
5758275SEric Cheng 	rw_enter(&ft->ft_lock, RW_WRITER);
5768275SEric Cheng 	err = mac_flow_walk_nolock(ft, fn, arg);
5778275SEric Cheng 	rw_exit(&ft->ft_lock);
5788275SEric Cheng 	return (err);
5798275SEric Cheng }
5808275SEric Cheng 
5818275SEric Cheng static boolean_t	mac_flow_clean(flow_entry_t *);
5828275SEric Cheng 
5838275SEric Cheng /*
5848275SEric Cheng  * Destroy a flow entry. Called when the last reference on a flow is released.
5858275SEric Cheng  */
5868275SEric Cheng void
5878275SEric Cheng mac_flow_destroy(flow_entry_t *flent)
5888275SEric Cheng {
5898275SEric Cheng 	ASSERT(flent->fe_refcnt == 0);
5908275SEric Cheng 
5918275SEric Cheng 	if ((flent->fe_type & FLOW_USER) != 0) {
5928275SEric Cheng 		ASSERT(mac_flow_clean(flent));
5938275SEric Cheng 	} else {
5948275SEric Cheng 		mac_flow_cleanup(flent);
5958275SEric Cheng 	}
5968275SEric Cheng 
5978275SEric Cheng 	mutex_destroy(&flent->fe_lock);
5988275SEric Cheng 	cv_destroy(&flent->fe_cv);
5998275SEric Cheng 	flow_stat_destroy(flent);
6008275SEric Cheng 	kmem_cache_free(flow_cache, flent);
6018275SEric Cheng }
6028275SEric Cheng 
6038275SEric Cheng /*
6048275SEric Cheng  * XXX eric
6058275SEric Cheng  * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and
6068275SEric Cheng  * mac_link_flow_modify() should really be moved/reworked into the
6078275SEric Cheng  * two functions below. This would consolidate all the mac property
6088275SEric Cheng  * checking in one place. I'm leaving this alone for now since it's
6098275SEric Cheng  * out of scope of the new flows work.
6108275SEric Cheng  */
6118275SEric Cheng /* ARGSUSED */
6128275SEric Cheng uint32_t
6138275SEric Cheng mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp)
6148275SEric Cheng {
6158275SEric Cheng 	uint32_t		changed_mask = 0;
6168275SEric Cheng 	mac_resource_props_t	*fmrp = &flent->fe_effective_props;
6178275SEric Cheng 	int			i;
6188275SEric Cheng 
6198275SEric Cheng 	if ((mrp->mrp_mask & MRP_MAXBW) != 0 &&
6208275SEric Cheng 	    (fmrp->mrp_maxbw != mrp->mrp_maxbw)) {
6218275SEric Cheng 		changed_mask |= MRP_MAXBW;
6228275SEric Cheng 		fmrp->mrp_maxbw = mrp->mrp_maxbw;
6238275SEric Cheng 		if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) {
6248275SEric Cheng 			fmrp->mrp_mask &= ~MRP_MAXBW;
6258275SEric Cheng 		} else {
6268275SEric Cheng 			fmrp->mrp_mask |= MRP_MAXBW;
6278275SEric Cheng 		}
6288275SEric Cheng 	}
6298275SEric Cheng 
6308275SEric Cheng 	if ((mrp->mrp_mask & MRP_PRIORITY) != 0) {
6318275SEric Cheng 		if (fmrp->mrp_priority != mrp->mrp_priority)
6328275SEric Cheng 			changed_mask |= MRP_PRIORITY;
6338275SEric Cheng 		if (mrp->mrp_priority == MPL_RESET) {
6348275SEric Cheng 			fmrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
6358275SEric Cheng 			fmrp->mrp_mask &= ~MRP_PRIORITY;
6368275SEric Cheng 		} else {
6378275SEric Cheng 			fmrp->mrp_priority = mrp->mrp_priority;
6388275SEric Cheng 			fmrp->mrp_mask |= MRP_PRIORITY;
6398275SEric Cheng 		}
6408275SEric Cheng 	}
6418275SEric Cheng 
6428275SEric Cheng 	/* modify fanout */
6438275SEric Cheng 	if ((mrp->mrp_mask & MRP_CPUS) != 0) {
6448275SEric Cheng 		if ((fmrp->mrp_ncpus == mrp->mrp_ncpus) &&
6458275SEric Cheng 		    (fmrp->mrp_fanout_mode == mrp->mrp_fanout_mode)) {
6468275SEric Cheng 			for (i = 0; i < mrp->mrp_ncpus; i++) {
6478275SEric Cheng 				if (mrp->mrp_cpu[i] != fmrp->mrp_cpu[i])
6488275SEric Cheng 					break;
6498275SEric Cheng 			}
6508275SEric Cheng 			if (i == mrp->mrp_ncpus) {
6518275SEric Cheng 				/*
6528275SEric Cheng 				 * The new set of cpus passed is exactly
6538275SEric Cheng 				 * the same as the existing set.
6548275SEric Cheng 				 */
6558275SEric Cheng 				return (changed_mask);
6568275SEric Cheng 			}
6578275SEric Cheng 		}
6588275SEric Cheng 		changed_mask |= MRP_CPUS;
6598275SEric Cheng 		MAC_COPY_CPUS(mrp, fmrp);
6608275SEric Cheng 	}
6618275SEric Cheng 	return (changed_mask);
6628275SEric Cheng }
6638275SEric Cheng 
6648275SEric Cheng void
6658275SEric Cheng mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp)
6668275SEric Cheng {
6678275SEric Cheng 	uint32_t changed_mask;
6688275SEric Cheng 	mac_client_impl_t *mcip = flent->fe_mcip;
6698275SEric Cheng 	mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
6708275SEric Cheng 
6718275SEric Cheng 	ASSERT(flent != NULL);
6728275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
6738275SEric Cheng 
6748275SEric Cheng 	rw_enter(&ft->ft_lock, RW_WRITER);
6758275SEric Cheng 
6768275SEric Cheng 	/* Update the cached values inside the subflow entry */
6778275SEric Cheng 	changed_mask = mac_flow_modify_props(flent, mrp);
6788275SEric Cheng 	rw_exit(&ft->ft_lock);
6798275SEric Cheng 	/*
6808275SEric Cheng 	 * Push the changed parameters to the scheduling code in the
6818275SEric Cheng 	 * SRS's, to take effect right away.
6828275SEric Cheng 	 */
6838275SEric Cheng 	if (changed_mask & MRP_MAXBW) {
6848275SEric Cheng 		mac_srs_update_bwlimit(flent, mrp);
6858275SEric Cheng 		/*
6868275SEric Cheng 		 * If bandwidth is changed, we may have to change
6878275SEric Cheng 		 * the number of soft ring to be used for fanout.
6888275SEric Cheng 		 * Call mac_flow_update_fanout() if MAC_BIND_CPU
6898275SEric Cheng 		 * is not set and there is no user supplied cpu
6908275SEric Cheng 		 * info. This applies only to link at this time.
6918275SEric Cheng 		 */
6928275SEric Cheng 		if (!(flent->fe_type & FLOW_USER) &&
6938275SEric Cheng 		    !(changed_mask & MRP_CPUS) &&
6948275SEric Cheng 		    !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) {
6958275SEric Cheng 			mac_fanout_setup(mcip, flent, mcip_mrp,
6968275SEric Cheng 			    mac_rx_deliver, mcip, NULL);
6978275SEric Cheng 		}
6988275SEric Cheng 	}
6998275SEric Cheng 	if (mrp->mrp_mask & MRP_PRIORITY)
7008275SEric Cheng 		mac_flow_update_priority(mcip, flent);
7018275SEric Cheng 
7028275SEric Cheng 	if (changed_mask & MRP_CPUS)
7038275SEric Cheng 		mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL);
7048275SEric Cheng }
7058275SEric Cheng 
7068275SEric Cheng /*
7078275SEric Cheng  * This function waits for a certain condition to be met and is generally
7088275SEric Cheng  * used before a destructive or quiescing operation.
7098275SEric Cheng  */
7108275SEric Cheng void
7118275SEric Cheng mac_flow_wait(flow_entry_t *flent, mac_flow_state_t event)
7128275SEric Cheng {
7138275SEric Cheng 	mutex_enter(&flent->fe_lock);
7148275SEric Cheng 	flent->fe_flags |= FE_WAITER;
7158275SEric Cheng 
7168275SEric Cheng 	switch (event) {
7178275SEric Cheng 	case FLOW_DRIVER_UPCALL:
7188275SEric Cheng 		/*
7198275SEric Cheng 		 * We want to make sure the driver upcalls have finished before
7208275SEric Cheng 		 * we signal the Rx SRS worker to quit.
7218275SEric Cheng 		 */
7228275SEric Cheng 		while (flent->fe_refcnt != 1)
7238275SEric Cheng 			cv_wait(&flent->fe_cv, &flent->fe_lock);
7248275SEric Cheng 		break;
7258275SEric Cheng 
7268275SEric Cheng 	case FLOW_USER_REF:
7278275SEric Cheng 		/*
7288275SEric Cheng 		 * Wait for the fe_user_refcnt to drop to 0. The flow has
7298275SEric Cheng 		 * been removed from the global flow hash.
7308275SEric Cheng 		 */
7318275SEric Cheng 		ASSERT(!(flent->fe_flags & FE_G_FLOW_HASH));
7328275SEric Cheng 		while (flent->fe_user_refcnt != 0)
7338275SEric Cheng 			cv_wait(&flent->fe_cv, &flent->fe_lock);
7348275SEric Cheng 		break;
7358275SEric Cheng 
7368275SEric Cheng 	default:
7378275SEric Cheng 		ASSERT(0);
7388275SEric Cheng 	}
7398275SEric Cheng 
7408275SEric Cheng 	flent->fe_flags &= ~FE_WAITER;
7418275SEric Cheng 	mutex_exit(&flent->fe_lock);
7428275SEric Cheng }
7438275SEric Cheng 
7448275SEric Cheng static boolean_t
7458275SEric Cheng mac_flow_clean(flow_entry_t *flent)
7468275SEric Cheng {
7478275SEric Cheng 	ASSERT(flent->fe_next == NULL);
7488275SEric Cheng 	ASSERT(flent->fe_tx_srs == NULL);
7498275SEric Cheng 	ASSERT(flent->fe_rx_srs_cnt == 0 && flent->fe_rx_srs[0] == NULL);
7508275SEric Cheng 	ASSERT(flent->fe_mbg == NULL);
7518275SEric Cheng 
7528275SEric Cheng 	return (B_TRUE);
7538275SEric Cheng }
7548275SEric Cheng 
7558275SEric Cheng void
7568275SEric Cheng mac_flow_cleanup(flow_entry_t *flent)
7578275SEric Cheng {
7588275SEric Cheng 	if ((flent->fe_type & FLOW_USER) == 0) {
7598275SEric Cheng 		ASSERT((flent->fe_mbg == NULL && flent->fe_mcip != NULL) ||
7608275SEric Cheng 		    (flent->fe_mbg != NULL && flent->fe_mcip == NULL));
7618275SEric Cheng 		ASSERT(flent->fe_refcnt == 0);
7628275SEric Cheng 	} else {
7638275SEric Cheng 		ASSERT(flent->fe_refcnt == 1);
7648275SEric Cheng 	}
7658275SEric Cheng 
7668275SEric Cheng 	if (flent->fe_mbg != NULL) {
7678275SEric Cheng 		ASSERT(flent->fe_tx_srs == NULL);
7688275SEric Cheng 		/* This is a multicast or broadcast flow entry */
7698275SEric Cheng 		mac_bcast_grp_free(flent->fe_mbg);
7708275SEric Cheng 		flent->fe_mbg = NULL;
7718275SEric Cheng 	}
7728275SEric Cheng 
7738275SEric Cheng 	if (flent->fe_tx_srs != NULL) {
7748275SEric Cheng 		ASSERT(flent->fe_mbg == NULL);
7758275SEric Cheng 		mac_srs_free(flent->fe_tx_srs);
7768275SEric Cheng 		flent->fe_tx_srs = NULL;
7778275SEric Cheng 	}
7788275SEric Cheng 
7798275SEric Cheng 	/*
7808275SEric Cheng 	 * In the normal case fe_rx_srs_cnt is 1. However in the error case
7818275SEric Cheng 	 * when mac_unicast_add fails we may not have set up any SRS
7828275SEric Cheng 	 * in which case fe_rx_srs_cnt will be zero.
7838275SEric Cheng 	 */
7848275SEric Cheng 	if (flent->fe_rx_srs_cnt != 0) {
7858275SEric Cheng 		ASSERT(flent->fe_rx_srs_cnt == 1);
7868275SEric Cheng 		mac_srs_free(flent->fe_rx_srs[0]);
7878275SEric Cheng 		flent->fe_rx_srs[0] = NULL;
7888275SEric Cheng 		flent->fe_rx_srs_cnt = 0;
7898275SEric Cheng 	}
7908275SEric Cheng 	ASSERT(flent->fe_rx_srs[0] == NULL);
7918275SEric Cheng }
7928275SEric Cheng 
7938275SEric Cheng void
7948275SEric Cheng mac_flow_get_desc(flow_entry_t *flent, flow_desc_t *fd)
7958275SEric Cheng {
7968275SEric Cheng 	/*
7978275SEric Cheng 	 * Grab the fe_lock to see a self-consistent fe_flow_desc.
7988275SEric Cheng 	 * Updates to the fe_flow_desc happen under the fe_lock
7998275SEric Cheng 	 * after removing the flent from the flow table
8008275SEric Cheng 	 */
8018275SEric Cheng 	mutex_enter(&flent->fe_lock);
8028275SEric Cheng 	bcopy(&flent->fe_flow_desc, fd, sizeof (*fd));
8038275SEric Cheng 	mutex_exit(&flent->fe_lock);
8048275SEric Cheng }
8058275SEric Cheng 
8068275SEric Cheng /*
8078275SEric Cheng  * Update a field of a flow entry. The mac perimeter ensures that
8088275SEric Cheng  * this is the only thread doing a modify operation on this mac end point.
8098275SEric Cheng  * So the flow table can't change or disappear. The ft_lock protects access
8108275SEric Cheng  * to the flow entry, and holding the lock ensures that there isn't any thread
8118275SEric Cheng  * accessing the flow entry or attempting a flow table lookup. However
8128275SEric Cheng  * data threads that are using the flow entry based on the old descriptor
8138275SEric Cheng  * will continue to use the flow entry. If strong coherence is required
8148275SEric Cheng  * then the flow will have to be quiesced before the descriptor can be
8158275SEric Cheng  * changed.
8168275SEric Cheng  */
8178275SEric Cheng void
8188275SEric Cheng mac_flow_set_desc(flow_entry_t *flent, flow_desc_t *fd)
8198275SEric Cheng {
8208275SEric Cheng 	flow_tab_t	*ft = flent->fe_flow_tab;
8218275SEric Cheng 	flow_desc_t	old_desc;
8228275SEric Cheng 	int		err;
8238275SEric Cheng 
8248275SEric Cheng 	if (ft == NULL) {
8258275SEric Cheng 		/*
8268275SEric Cheng 		 * The flow hasn't yet been inserted into the table,
8278275SEric Cheng 		 * so only the caller knows about this flow, however for
8288275SEric Cheng 		 * uniformity we grab the fe_lock here.
8298275SEric Cheng 		 */
8308275SEric Cheng 		mutex_enter(&flent->fe_lock);
8318275SEric Cheng 		bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
8328275SEric Cheng 		mutex_exit(&flent->fe_lock);
8338275SEric Cheng 	}
8348275SEric Cheng 
8358275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
8368275SEric Cheng 
8378275SEric Cheng 	/*
8388275SEric Cheng 	 * Need to remove the flow entry from the table and reinsert it,
8398275SEric Cheng 	 * into a potentially diference hash line. The hash depends on
8408275SEric Cheng 	 * the new descriptor fields. However access to fe_desc itself
8418275SEric Cheng 	 * is always under the fe_lock. This helps log and stat functions
8428275SEric Cheng 	 * see a self-consistent fe_flow_desc.
8438275SEric Cheng 	 */
8448275SEric Cheng 	mac_flow_remove(ft, flent, B_TRUE);
8458275SEric Cheng 	old_desc = flent->fe_flow_desc;
8468275SEric Cheng 
8478275SEric Cheng 	mutex_enter(&flent->fe_lock);
8488275SEric Cheng 	bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
8498275SEric Cheng 	mutex_exit(&flent->fe_lock);
8508275SEric Cheng 
8518275SEric Cheng 	if (mac_flow_add(ft, flent) != 0) {
8528275SEric Cheng 		/*
8538275SEric Cheng 		 * The add failed say due to an invalid flow descriptor.
8548275SEric Cheng 		 * Undo the update
8558275SEric Cheng 		 */
8568275SEric Cheng 		flent->fe_flow_desc = old_desc;
8578275SEric Cheng 		err = mac_flow_add(ft, flent);
8588275SEric Cheng 		ASSERT(err == 0);
8598275SEric Cheng 	}
8608275SEric Cheng }
8618275SEric Cheng 
8628275SEric Cheng void
8638275SEric Cheng mac_flow_set_name(flow_entry_t *flent, const char *name)
8648275SEric Cheng {
8658275SEric Cheng 	flow_tab_t	*ft = flent->fe_flow_tab;
8668275SEric Cheng 
8678275SEric Cheng 	if (ft == NULL) {
8688275SEric Cheng 		/*
8698275SEric Cheng 		 *  The flow hasn't yet been inserted into the table,
8708275SEric Cheng 		 * so only the caller knows about this flow
8718275SEric Cheng 		 */
8728558SGirish.Moodalbail@Sun.COM 		(void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
8738275SEric Cheng 	} else {
8748275SEric Cheng 		ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
8758275SEric Cheng 	}
8768275SEric Cheng 
8778275SEric Cheng 	mutex_enter(&flent->fe_lock);
8788558SGirish.Moodalbail@Sun.COM 	(void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
8798275SEric Cheng 	mutex_exit(&flent->fe_lock);
8808275SEric Cheng }
8818275SEric Cheng 
8828275SEric Cheng /*
8838275SEric Cheng  * Return the client-private cookie that was associated with
8848275SEric Cheng  * the flow when it was created.
8858275SEric Cheng  */
8868275SEric Cheng void *
8878275SEric Cheng mac_flow_get_client_cookie(flow_entry_t *flent)
8888275SEric Cheng {
8898275SEric Cheng 	return (flent->fe_client_cookie);
8908275SEric Cheng }
8918275SEric Cheng 
8928275SEric Cheng /*
8938275SEric Cheng  * Forward declarations.
8948275SEric Cheng  */
8958275SEric Cheng static uint32_t	flow_l2_hash(flow_tab_t *, flow_state_t *);
89610616SSebastien.Roy@Sun.COM static uint32_t	flow_l2_hash_fe(flow_tab_t *, flow_entry_t *);
8978275SEric Cheng static int	flow_l2_accept(flow_tab_t *, flow_state_t *);
8988275SEric Cheng static uint32_t	flow_ether_hash(flow_tab_t *, flow_state_t *);
89910616SSebastien.Roy@Sun.COM static uint32_t	flow_ether_hash_fe(flow_tab_t *, flow_entry_t *);
9008275SEric Cheng static int	flow_ether_accept(flow_tab_t *, flow_state_t *);
9018275SEric Cheng 
9028275SEric Cheng /*
9038275SEric Cheng  * Create flow table.
9048275SEric Cheng  */
9058275SEric Cheng void
9068275SEric Cheng mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size,
9078275SEric Cheng     mac_impl_t *mip, flow_tab_t **ftp)
9088275SEric Cheng {
9098275SEric Cheng 	flow_tab_t	*ft;
9108275SEric Cheng 	flow_ops_t	*new_ops;
9118275SEric Cheng 
9128275SEric Cheng 	ft = kmem_cache_alloc(flow_tab_cache, KM_SLEEP);
9138275SEric Cheng 	bzero(ft, sizeof (*ft));
9148275SEric Cheng 
9158275SEric Cheng 	ft->ft_table = kmem_zalloc(size * sizeof (flow_entry_t *), KM_SLEEP);
9168275SEric Cheng 
9178275SEric Cheng 	/*
9188275SEric Cheng 	 * We make a copy of the ops vector instead of just pointing to it
9198275SEric Cheng 	 * because we might want to customize the ops vector on a per table
9208275SEric Cheng 	 * basis (e.g. for optimization).
9218275SEric Cheng 	 */
9228275SEric Cheng 	new_ops = &ft->ft_ops;
9238275SEric Cheng 	bcopy(ops, new_ops, sizeof (*ops));
9248275SEric Cheng 	ft->ft_mask = mask;
9258275SEric Cheng 	ft->ft_size = size;
9268275SEric Cheng 	ft->ft_mip = mip;
9278275SEric Cheng 
9288275SEric Cheng 	/*
92910616SSebastien.Roy@Sun.COM 	 * Optimizations for DL_ETHER media.
9308275SEric Cheng 	 */
9318275SEric Cheng 	if (mip->mi_info.mi_nativemedia == DL_ETHER) {
9328275SEric Cheng 		if (new_ops->fo_hash == flow_l2_hash)
9338275SEric Cheng 			new_ops->fo_hash = flow_ether_hash;
93410616SSebastien.Roy@Sun.COM 		if (new_ops->fo_hash_fe == flow_l2_hash_fe)
93510616SSebastien.Roy@Sun.COM 			new_ops->fo_hash_fe = flow_ether_hash_fe;
9368275SEric Cheng 		if (new_ops->fo_accept[0] == flow_l2_accept)
9378275SEric Cheng 			new_ops->fo_accept[0] = flow_ether_accept;
9388275SEric Cheng 	}
9398275SEric Cheng 	*ftp = ft;
9408275SEric Cheng }
9418275SEric Cheng 
9428275SEric Cheng void
9438275SEric Cheng mac_flow_l2tab_create(mac_impl_t *mip, flow_tab_t **ftp)
9448275SEric Cheng {
9458275SEric Cheng 	mac_flow_tab_create(&flow_l2_ops, FLOW_LINK_DST | FLOW_LINK_VID,
9468275SEric Cheng 	    1024, mip, ftp);
9478275SEric Cheng }
9488275SEric Cheng 
9498275SEric Cheng /*
9508275SEric Cheng  * Destroy flow table.
9518275SEric Cheng  */
9528275SEric Cheng void
9538275SEric Cheng mac_flow_tab_destroy(flow_tab_t *ft)
9548275SEric Cheng {
9558275SEric Cheng 	if (ft == NULL)
9568275SEric Cheng 		return;
9578275SEric Cheng 
9588275SEric Cheng 	ASSERT(ft->ft_flow_count == 0);
9598275SEric Cheng 	kmem_free(ft->ft_table, ft->ft_size * sizeof (flow_entry_t *));
9608275SEric Cheng 	bzero(ft, sizeof (*ft));
9618275SEric Cheng 	kmem_cache_free(flow_tab_cache, ft);
9628275SEric Cheng }
9638275SEric Cheng 
9648275SEric Cheng /*
9658275SEric Cheng  * Add a new flow entry to the global flow hash table
9668275SEric Cheng  */
9678275SEric Cheng int
9688275SEric Cheng mac_flow_hash_add(flow_entry_t *flent)
9698275SEric Cheng {
9708275SEric Cheng 	int	err;
9718275SEric Cheng 
9728275SEric Cheng 	rw_enter(&flow_tab_lock, RW_WRITER);
9738275SEric Cheng 	err = mod_hash_insert(flow_hash,
9748275SEric Cheng 	    (mod_hash_key_t)flent->fe_flow_name, (mod_hash_val_t)flent);
9758275SEric Cheng 	if (err != 0) {
9768275SEric Cheng 		rw_exit(&flow_tab_lock);
9778275SEric Cheng 		return (EEXIST);
9788275SEric Cheng 	}
9798275SEric Cheng 	/* Mark as inserted into the global flow hash table */
9808275SEric Cheng 	FLOW_MARK(flent, FE_G_FLOW_HASH);
9818275SEric Cheng 	rw_exit(&flow_tab_lock);
9828275SEric Cheng 	return (err);
9838275SEric Cheng }
9848275SEric Cheng 
9858275SEric Cheng /*
9868275SEric Cheng  * Remove a flow entry from the global flow hash table
9878275SEric Cheng  */
9888275SEric Cheng void
9898275SEric Cheng mac_flow_hash_remove(flow_entry_t *flent)
9908275SEric Cheng {
9918275SEric Cheng 	mod_hash_val_t	val;
9928275SEric Cheng 
9938275SEric Cheng 	rw_enter(&flow_tab_lock, RW_WRITER);
9948275SEric Cheng 	VERIFY(mod_hash_remove(flow_hash,
9958275SEric Cheng 	    (mod_hash_key_t)flent->fe_flow_name, &val) == 0);
9968275SEric Cheng 
9978275SEric Cheng 	/* Clear the mark that says inserted into the global flow hash table */
9988275SEric Cheng 	FLOW_UNMARK(flent, FE_G_FLOW_HASH);
9998275SEric Cheng 	rw_exit(&flow_tab_lock);
10008275SEric Cheng }
10018275SEric Cheng 
10028275SEric Cheng /*
10038275SEric Cheng  * Retrieve a flow entry from the global flow hash table.
10048275SEric Cheng  */
10058275SEric Cheng int
10068275SEric Cheng mac_flow_lookup_byname(char *name, flow_entry_t **flentp)
10078275SEric Cheng {
10088275SEric Cheng 	int		err;
10098275SEric Cheng 	flow_entry_t	*flent;
10108275SEric Cheng 
10118275SEric Cheng 	rw_enter(&flow_tab_lock, RW_READER);
10128275SEric Cheng 	err = mod_hash_find(flow_hash, (mod_hash_key_t)name,
10138275SEric Cheng 	    (mod_hash_val_t *)&flent);
10148275SEric Cheng 	if (err != 0) {
10158275SEric Cheng 		rw_exit(&flow_tab_lock);
10168275SEric Cheng 		return (ENOENT);
10178275SEric Cheng 	}
10188275SEric Cheng 	ASSERT(flent != NULL);
10198275SEric Cheng 	FLOW_USER_REFHOLD(flent);
10208275SEric Cheng 	rw_exit(&flow_tab_lock);
10218275SEric Cheng 
10228275SEric Cheng 	*flentp = flent;
10238275SEric Cheng 	return (0);
10248275SEric Cheng }
10258275SEric Cheng 
10268275SEric Cheng /*
10278275SEric Cheng  * Initialize or release mac client flows by walking the subflow table.
10288275SEric Cheng  * These are typically invoked during plumb/unplumb of links.
10298275SEric Cheng  */
10308275SEric Cheng 
10318275SEric Cheng static int
10328275SEric Cheng mac_link_init_flows_cb(flow_entry_t *flent, void *arg)
10338275SEric Cheng {
10348275SEric Cheng 	mac_client_impl_t	*mcip = arg;
10358275SEric Cheng 
10368275SEric Cheng 	if (mac_link_flow_init(arg, flent) != 0) {
10378275SEric Cheng 		cmn_err(CE_WARN, "Failed to initialize flow '%s' on link '%s'",
10388275SEric Cheng 		    flent->fe_flow_name, mcip->mci_name);
10398275SEric Cheng 	} else {
10408275SEric Cheng 		FLOW_UNMARK(flent, FE_UF_NO_DATAPATH);
10418275SEric Cheng 	}
10428275SEric Cheng 	return (0);
10438275SEric Cheng }
10448275SEric Cheng 
10458275SEric Cheng void
10468275SEric Cheng mac_link_init_flows(mac_client_handle_t mch)
10478275SEric Cheng {
10488275SEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
10498275SEric Cheng 
10508275SEric Cheng 	(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
10518275SEric Cheng 	    mac_link_init_flows_cb, mcip);
10528275SEric Cheng 	/*
10538275SEric Cheng 	 * If mac client had subflow(s) configured before plumb, change
10548275SEric Cheng 	 * function to mac_rx_srs_subflow_process and in case of hardware
10558275SEric Cheng 	 * classification, disable polling.
10568275SEric Cheng 	 */
10578275SEric Cheng 	mac_client_update_classifier(mcip, B_TRUE);
10588275SEric Cheng 
10598275SEric Cheng }
10608275SEric Cheng 
10618275SEric Cheng boolean_t
10628275SEric Cheng mac_link_has_flows(mac_client_handle_t mch)
10638275SEric Cheng {
10648275SEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
10658275SEric Cheng 
10668275SEric Cheng 	if (!FLOW_TAB_EMPTY(mcip->mci_subflow_tab))
10678275SEric Cheng 		return (B_TRUE);
10688275SEric Cheng 
10698275SEric Cheng 	return (B_FALSE);
10708275SEric Cheng }
10718275SEric Cheng 
10728275SEric Cheng static int
10738275SEric Cheng mac_link_release_flows_cb(flow_entry_t *flent, void *arg)
10748275SEric Cheng {
10758275SEric Cheng 	FLOW_MARK(flent, FE_UF_NO_DATAPATH);
10768275SEric Cheng 	mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
10778275SEric Cheng 	mac_link_flow_clean(arg, flent);
10788275SEric Cheng 	return (0);
10798275SEric Cheng }
10808275SEric Cheng 
10818275SEric Cheng void
10828275SEric Cheng mac_link_release_flows(mac_client_handle_t mch)
10838275SEric Cheng {
10848275SEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
10858275SEric Cheng 
10868275SEric Cheng 	/*
10878275SEric Cheng 	 * Change the mci_flent callback back to mac_rx_srs_process()
10888275SEric Cheng 	 * because flows are about to be deactivated.
10898275SEric Cheng 	 */
10908275SEric Cheng 	mac_client_update_classifier(mcip, B_FALSE);
10918275SEric Cheng 	(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
10928275SEric Cheng 	    mac_link_release_flows_cb, mcip);
10938275SEric Cheng }
10948275SEric Cheng 
10958275SEric Cheng void
10968275SEric Cheng mac_rename_flow(flow_entry_t *fep, const char *new_name)
10978275SEric Cheng {
10988275SEric Cheng 	mac_flow_set_name(fep, new_name);
10998275SEric Cheng 	if (fep->fe_ksp != NULL) {
11008275SEric Cheng 		flow_stat_destroy(fep);
11018275SEric Cheng 		flow_stat_create(fep);
11028275SEric Cheng 	}
11038275SEric Cheng }
11048275SEric Cheng 
11058275SEric Cheng /*
11068275SEric Cheng  * mac_link_flow_init()
11078275SEric Cheng  * Internal flow interface used for allocating SRSs and related
11088275SEric Cheng  * data structures. Not meant to be used by mac clients.
11098275SEric Cheng  */
11108275SEric Cheng int
11118275SEric Cheng mac_link_flow_init(mac_client_handle_t mch, flow_entry_t *sub_flow)
11128275SEric Cheng {
11138275SEric Cheng 	mac_client_impl_t 	*mcip = (mac_client_impl_t *)mch;
11148275SEric Cheng 	mac_impl_t		*mip = mcip->mci_mip;
11158275SEric Cheng 	int			err;
11168275SEric Cheng 
11178275SEric Cheng 	ASSERT(mch != NULL);
11188275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
11198275SEric Cheng 
11208275SEric Cheng 	if ((err = mac_datapath_setup(mcip, sub_flow, SRST_FLOW)) != 0)
11218275SEric Cheng 		return (err);
11228275SEric Cheng 
11238275SEric Cheng 	sub_flow->fe_mcip = mcip;
11248275SEric Cheng 
11258275SEric Cheng 	return (0);
11268275SEric Cheng }
11278275SEric Cheng 
11288275SEric Cheng /*
11298275SEric Cheng  * mac_link_flow_add()
11308275SEric Cheng  * Used by flowadm(1m) or kernel mac clients for creating flows.
11318275SEric Cheng  */
11328275SEric Cheng int
11338275SEric Cheng mac_link_flow_add(datalink_id_t linkid, char *flow_name,
11348275SEric Cheng     flow_desc_t *flow_desc, mac_resource_props_t *mrp)
11358275SEric Cheng {
11368275SEric Cheng 	flow_entry_t		*flent = NULL;
11378275SEric Cheng 	int			err;
11388275SEric Cheng 	dls_dl_handle_t		dlh;
11398275SEric Cheng 	dls_link_t		*dlp;
11408275SEric Cheng 	boolean_t		link_held = B_FALSE;
11418275SEric Cheng 	boolean_t		hash_added = B_FALSE;
11428275SEric Cheng 	mac_perim_handle_t	mph;
11438275SEric Cheng 
11448275SEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
11458275SEric Cheng 	if (err == 0) {
11468275SEric Cheng 		FLOW_USER_REFRELE(flent);
11478275SEric Cheng 		return (EEXIST);
11488275SEric Cheng 	}
11498275SEric Cheng 
11508275SEric Cheng 	/*
11518275SEric Cheng 	 * First create a flow entry given the description provided
11528275SEric Cheng 	 * by the caller.
11538275SEric Cheng 	 */
11548275SEric Cheng 	err = mac_flow_create(flow_desc, mrp, flow_name, NULL,
11558275SEric Cheng 	    FLOW_USER | FLOW_OTHER, &flent);
11568275SEric Cheng 
11578275SEric Cheng 	if (err != 0)
11588275SEric Cheng 		return (err);
11598275SEric Cheng 
11608275SEric Cheng 	/*
11618275SEric Cheng 	 * We've got a local variable referencing this flow now, so we need
11628275SEric Cheng 	 * to hold it. We'll release this flow before returning.
11638275SEric Cheng 	 * All failures until we return will undo any action that may internally
11648275SEric Cheng 	 * held the flow, so the last REFRELE will assure a clean freeing
11658275SEric Cheng 	 * of resources.
11668275SEric Cheng 	 */
11678275SEric Cheng 	FLOW_REFHOLD(flent);
11688275SEric Cheng 
11698275SEric Cheng 	flent->fe_link_id = linkid;
11708275SEric Cheng 	FLOW_MARK(flent, FE_INCIPIENT);
11718275SEric Cheng 
11728275SEric Cheng 	err = mac_perim_enter_by_linkid(linkid, &mph);
11738275SEric Cheng 	if (err != 0) {
11748275SEric Cheng 		FLOW_FINAL_REFRELE(flent);
11758275SEric Cheng 		return (err);
11768275SEric Cheng 	}
11778275SEric Cheng 
11788275SEric Cheng 	/*
11798275SEric Cheng 	 * dls will eventually be merged with mac so it's ok
11808275SEric Cheng 	 * to call dls' internal functions.
11818275SEric Cheng 	 */
11828275SEric Cheng 	err = dls_devnet_hold_link(linkid, &dlh, &dlp);
11838275SEric Cheng 	if (err != 0)
11848275SEric Cheng 		goto bail;
11858275SEric Cheng 
11868275SEric Cheng 	link_held = B_TRUE;
11878275SEric Cheng 
11888275SEric Cheng 	/*
11898275SEric Cheng 	 * Add the flow to the global flow table, this table will be per
11908275SEric Cheng 	 * exclusive zone so each zone can have its own flow namespace.
11918275SEric Cheng 	 * RFE 6625651 will fix this.
11928275SEric Cheng 	 *
11938275SEric Cheng 	 */
11948275SEric Cheng 	if ((err = mac_flow_hash_add(flent)) != 0)
11958275SEric Cheng 		goto bail;
11968275SEric Cheng 
11978275SEric Cheng 	hash_added = B_TRUE;
11988275SEric Cheng 
11998275SEric Cheng 	/*
12008275SEric Cheng 	 * do not allow flows to be configured on an anchor VNIC
12018275SEric Cheng 	 */
12028275SEric Cheng 	if (mac_capab_get(dlp->dl_mh, MAC_CAPAB_ANCHOR_VNIC, NULL)) {
12038275SEric Cheng 		err = ENOTSUP;
12048275SEric Cheng 		goto bail;
12058275SEric Cheng 	}
12068275SEric Cheng 
12078275SEric Cheng 	/*
12088275SEric Cheng 	 * Add the subflow to the subflow table. Also instantiate the flow
12098833SVenu.Iyer@Sun.COM 	 * in the mac if there is an active user (we check if the MAC client's
12108833SVenu.Iyer@Sun.COM 	 * datapath has been setup).
12118275SEric Cheng 	 */
12128833SVenu.Iyer@Sun.COM 	err = mac_flow_add_subflow(dlp->dl_mch, flent,
12138833SVenu.Iyer@Sun.COM 	    MCIP_DATAPATH_SETUP((mac_client_impl_t *)dlp->dl_mch));
12148275SEric Cheng 	if (err != 0)
12158275SEric Cheng 		goto bail;
12168275SEric Cheng 
12178275SEric Cheng 	FLOW_UNMARK(flent, FE_INCIPIENT);
12188275SEric Cheng 	dls_devnet_rele_link(dlh, dlp);
12198275SEric Cheng 	mac_perim_exit(mph);
12208275SEric Cheng 	return (0);
12218275SEric Cheng 
12228275SEric Cheng bail:
12238275SEric Cheng 	if (hash_added)
12248275SEric Cheng 		mac_flow_hash_remove(flent);
12258275SEric Cheng 
12268275SEric Cheng 	if (link_held)
12278275SEric Cheng 		dls_devnet_rele_link(dlh, dlp);
12288275SEric Cheng 
12298275SEric Cheng 	/*
12308275SEric Cheng 	 * Wait for any transient global flow hash refs to clear
12318275SEric Cheng 	 * and then release the creation reference on the flow
12328275SEric Cheng 	 */
12338275SEric Cheng 	mac_flow_wait(flent, FLOW_USER_REF);
12348275SEric Cheng 	FLOW_FINAL_REFRELE(flent);
12358275SEric Cheng 	mac_perim_exit(mph);
12368275SEric Cheng 	return (err);
12378275SEric Cheng }
12388275SEric Cheng 
12398275SEric Cheng /*
12408275SEric Cheng  * mac_link_flow_clean()
12418275SEric Cheng  * Internal flow interface used for freeing SRSs and related
12428275SEric Cheng  * data structures. Not meant to be used by mac clients.
12438275SEric Cheng  */
12448275SEric Cheng void
12458275SEric Cheng mac_link_flow_clean(mac_client_handle_t mch, flow_entry_t *sub_flow)
12468275SEric Cheng {
12478275SEric Cheng 	mac_client_impl_t 	*mcip = (mac_client_impl_t *)mch;
12488275SEric Cheng 	mac_impl_t		*mip = mcip->mci_mip;
12498275SEric Cheng 	boolean_t		last_subflow;
12508275SEric Cheng 
12518275SEric Cheng 	ASSERT(mch != NULL);
12528275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
12538275SEric Cheng 
12548275SEric Cheng 	/*
12558275SEric Cheng 	 * This sub flow entry may fail to be fully initialized by
12568275SEric Cheng 	 * mac_link_flow_init(). If so, simply return.
12578275SEric Cheng 	 */
12588275SEric Cheng 	if (sub_flow->fe_mcip == NULL)
12598275SEric Cheng 		return;
12608275SEric Cheng 
12618275SEric Cheng 	last_subflow = FLOW_TAB_EMPTY(mcip->mci_subflow_tab);
12628275SEric Cheng 	/*
12638275SEric Cheng 	 * Tear down the data path
12648275SEric Cheng 	 */
12658275SEric Cheng 	mac_datapath_teardown(mcip, sub_flow, SRST_FLOW);
12668275SEric Cheng 	sub_flow->fe_mcip = NULL;
12678275SEric Cheng 
12688275SEric Cheng 	/*
12698275SEric Cheng 	 * Delete the SRSs associated with this subflow. If this is being
12708275SEric Cheng 	 * driven by flowadm(1M) then the subflow will be deleted by
12718275SEric Cheng 	 * dls_rem_flow. However if this is a result of the interface being
12728275SEric Cheng 	 * unplumbed then the subflow itself won't be deleted.
12738275SEric Cheng 	 */
12748275SEric Cheng 	mac_flow_cleanup(sub_flow);
12758275SEric Cheng 
12768275SEric Cheng 	/*
12778275SEric Cheng 	 * If all the subflows are gone, renable some of the stuff
12788275SEric Cheng 	 * we disabled when adding a subflow, polling etc.
12798275SEric Cheng 	 */
12808275SEric Cheng 	if (last_subflow) {
12818275SEric Cheng 		/*
12828275SEric Cheng 		 * The subflow table itself is not protected by any locks or
12838275SEric Cheng 		 * refcnts. Hence quiesce the client upfront before clearing
12848275SEric Cheng 		 * mci_subflow_tab.
12858275SEric Cheng 		 */
12868275SEric Cheng 		mac_client_quiesce(mcip);
12878275SEric Cheng 		mac_client_update_classifier(mcip, B_FALSE);
12888275SEric Cheng 		mac_flow_tab_destroy(mcip->mci_subflow_tab);
12898275SEric Cheng 		mcip->mci_subflow_tab = NULL;
12908275SEric Cheng 		mac_client_restart(mcip);
12918275SEric Cheng 	}
12928275SEric Cheng }
12938275SEric Cheng 
12948275SEric Cheng /*
12958275SEric Cheng  * mac_link_flow_remove()
12968275SEric Cheng  * Used by flowadm(1m) or kernel mac clients for removing flows.
12978275SEric Cheng  */
12988275SEric Cheng int
12998275SEric Cheng mac_link_flow_remove(char *flow_name)
13008275SEric Cheng {
13018275SEric Cheng 	flow_entry_t		*flent;
13028275SEric Cheng 	mac_perim_handle_t	mph;
13038275SEric Cheng 	int			err;
13048275SEric Cheng 	datalink_id_t		linkid;
13058275SEric Cheng 
13068275SEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
13078275SEric Cheng 	if (err != 0)
13088275SEric Cheng 		return (err);
13098275SEric Cheng 
13108275SEric Cheng 	linkid = flent->fe_link_id;
13118275SEric Cheng 	FLOW_USER_REFRELE(flent);
13128275SEric Cheng 
13138275SEric Cheng 	/*
13148275SEric Cheng 	 * The perim must be acquired before acquiring any other references
13158275SEric Cheng 	 * to maintain the lock and perimeter hierarchy. Please note the
13168275SEric Cheng 	 * FLOW_REFRELE above.
13178275SEric Cheng 	 */
13188275SEric Cheng 	err = mac_perim_enter_by_linkid(linkid, &mph);
13198275SEric Cheng 	if (err != 0)
13208275SEric Cheng 		return (err);
13218275SEric Cheng 
13228275SEric Cheng 	/*
13238275SEric Cheng 	 * Note the second lookup of the flow, because a concurrent thread
13248275SEric Cheng 	 * may have removed it already while we were waiting to enter the
13258275SEric Cheng 	 * link's perimeter.
13268275SEric Cheng 	 */
13278275SEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
13288275SEric Cheng 	if (err != 0) {
13298275SEric Cheng 		mac_perim_exit(mph);
13308275SEric Cheng 		return (err);
13318275SEric Cheng 	}
13328275SEric Cheng 	FLOW_USER_REFRELE(flent);
13338275SEric Cheng 
13348275SEric Cheng 	/*
13358275SEric Cheng 	 * Remove the flow from the subflow table and deactivate the flow
13368275SEric Cheng 	 * by quiescing and removings its SRSs
13378275SEric Cheng 	 */
13388275SEric Cheng 	mac_flow_rem_subflow(flent);
13398275SEric Cheng 
13408275SEric Cheng 	/*
13418275SEric Cheng 	 * Finally, remove the flow from the global table.
13428275SEric Cheng 	 */
13438275SEric Cheng 	mac_flow_hash_remove(flent);
13448275SEric Cheng 
13458275SEric Cheng 	/*
13468275SEric Cheng 	 * Wait for any transient global flow hash refs to clear
13478275SEric Cheng 	 * and then release the creation reference on the flow
13488275SEric Cheng 	 */
13498275SEric Cheng 	mac_flow_wait(flent, FLOW_USER_REF);
13508275SEric Cheng 	FLOW_FINAL_REFRELE(flent);
13518275SEric Cheng 
13528275SEric Cheng 	mac_perim_exit(mph);
13538275SEric Cheng 
13548275SEric Cheng 	return (0);
13558275SEric Cheng }
13568275SEric Cheng 
13578275SEric Cheng /*
13588275SEric Cheng  * mac_link_flow_modify()
13598275SEric Cheng  * Modifies the properties of a flow identified by its name.
13608275SEric Cheng  */
13618275SEric Cheng int
13628275SEric Cheng mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp)
13638275SEric Cheng {
13648275SEric Cheng 	flow_entry_t		*flent;
13658275SEric Cheng 	mac_client_impl_t 	*mcip;
13668275SEric Cheng 	int			err = 0;
13678275SEric Cheng 	mac_perim_handle_t	mph;
13688275SEric Cheng 	datalink_id_t		linkid;
13698275SEric Cheng 	flow_tab_t		*flow_tab;
13708275SEric Cheng 
13718275SEric Cheng 	err = mac_validate_props(mrp);
13728275SEric Cheng 	if (err != 0)
13738275SEric Cheng 		return (err);
13748275SEric Cheng 
13758275SEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
13768275SEric Cheng 	if (err != 0)
13778275SEric Cheng 		return (err);
13788275SEric Cheng 
13798275SEric Cheng 	linkid = flent->fe_link_id;
13808275SEric Cheng 	FLOW_USER_REFRELE(flent);
13818275SEric Cheng 
13828275SEric Cheng 	/*
13838275SEric Cheng 	 * The perim must be acquired before acquiring any other references
13848275SEric Cheng 	 * to maintain the lock and perimeter hierarchy. Please note the
13858275SEric Cheng 	 * FLOW_REFRELE above.
13868275SEric Cheng 	 */
13878275SEric Cheng 	err = mac_perim_enter_by_linkid(linkid, &mph);
13888275SEric Cheng 	if (err != 0)
13898275SEric Cheng 		return (err);
13908275SEric Cheng 
13918275SEric Cheng 	/*
13928275SEric Cheng 	 * Note the second lookup of the flow, because a concurrent thread
13938275SEric Cheng 	 * may have removed it already while we were waiting to enter the
13948275SEric Cheng 	 * link's perimeter.
13958275SEric Cheng 	 */
13968275SEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
13978275SEric Cheng 	if (err != 0) {
13988275SEric Cheng 		mac_perim_exit(mph);
13998275SEric Cheng 		return (err);
14008275SEric Cheng 	}
14018275SEric Cheng 	FLOW_USER_REFRELE(flent);
14028275SEric Cheng 
14038275SEric Cheng 	/*
14048275SEric Cheng 	 * If this flow is attached to a MAC client, then pass the request
14058275SEric Cheng 	 * along to the client.
14068275SEric Cheng 	 * Otherwise, just update the cached values.
14078275SEric Cheng 	 */
14088275SEric Cheng 	mcip = flent->fe_mcip;
14098275SEric Cheng 	mac_update_resources(mrp, &flent->fe_resource_props, B_TRUE);
14108275SEric Cheng 	if (mcip != NULL) {
14118275SEric Cheng 		if ((flow_tab = mcip->mci_subflow_tab) == NULL) {
14128275SEric Cheng 			err = ENOENT;
14138275SEric Cheng 		} else {
14148275SEric Cheng 			mac_flow_modify(flow_tab, flent, mrp);
14158275SEric Cheng 		}
14168275SEric Cheng 	} else {
14178275SEric Cheng 		(void) mac_flow_modify_props(flent, mrp);
14188275SEric Cheng 	}
14198275SEric Cheng 
14208275SEric Cheng done:
14218275SEric Cheng 	mac_perim_exit(mph);
14228275SEric Cheng 	return (err);
14238275SEric Cheng }
14248275SEric Cheng 
14258275SEric Cheng 
14268275SEric Cheng /*
14278275SEric Cheng  * State structure and misc functions used by mac_link_flow_walk().
14288275SEric Cheng  */
14298275SEric Cheng typedef struct {
14308275SEric Cheng 	int	(*ws_func)(mac_flowinfo_t *, void *);
14318275SEric Cheng 	void	*ws_arg;
14328275SEric Cheng } flow_walk_state_t;
14338275SEric Cheng 
14348275SEric Cheng static void
14358275SEric Cheng mac_link_flowinfo_copy(mac_flowinfo_t *finfop, flow_entry_t *flent)
14368275SEric Cheng {
14378558SGirish.Moodalbail@Sun.COM 	(void) strlcpy(finfop->fi_flow_name, flent->fe_flow_name,
14388558SGirish.Moodalbail@Sun.COM 	    MAXFLOWNAMELEN);
14398275SEric Cheng 	finfop->fi_link_id = flent->fe_link_id;
14408275SEric Cheng 	finfop->fi_flow_desc = flent->fe_flow_desc;
14418275SEric Cheng 	finfop->fi_resource_props = flent->fe_resource_props;
14428275SEric Cheng }
14438275SEric Cheng 
14448275SEric Cheng static int
14458275SEric Cheng mac_link_flow_walk_cb(flow_entry_t *flent, void *arg)
14468275SEric Cheng {
14478275SEric Cheng 	flow_walk_state_t	*statep = arg;
14488275SEric Cheng 	mac_flowinfo_t		finfo;
14498275SEric Cheng 
14508275SEric Cheng 	mac_link_flowinfo_copy(&finfo, flent);
14518275SEric Cheng 	return (statep->ws_func(&finfo, statep->ws_arg));
14528275SEric Cheng }
14538275SEric Cheng 
14548275SEric Cheng /*
14558275SEric Cheng  * mac_link_flow_walk()
14568275SEric Cheng  * Invokes callback 'func' for all flows belonging to the specified link.
14578275SEric Cheng  */
14588275SEric Cheng int
14598275SEric Cheng mac_link_flow_walk(datalink_id_t linkid,
14608275SEric Cheng     int (*func)(mac_flowinfo_t *, void *), void *arg)
14618275SEric Cheng {
14628275SEric Cheng 	mac_client_impl_t	*mcip;
14638275SEric Cheng 	mac_perim_handle_t	mph;
14648275SEric Cheng 	flow_walk_state_t	state;
14658275SEric Cheng 	dls_dl_handle_t		dlh;
14668275SEric Cheng 	dls_link_t		*dlp;
14678275SEric Cheng 	int			err;
14688275SEric Cheng 
14698275SEric Cheng 	err = mac_perim_enter_by_linkid(linkid, &mph);
14708275SEric Cheng 	if (err != 0)
14718275SEric Cheng 		return (err);
14728275SEric Cheng 
14738275SEric Cheng 	err = dls_devnet_hold_link(linkid, &dlh, &dlp);
14748275SEric Cheng 	if (err != 0) {
14758275SEric Cheng 		mac_perim_exit(mph);
14768275SEric Cheng 		return (err);
14778275SEric Cheng 	}
14788275SEric Cheng 
14798275SEric Cheng 	mcip = (mac_client_impl_t *)dlp->dl_mch;
14808275SEric Cheng 	state.ws_func = func;
14818275SEric Cheng 	state.ws_arg = arg;
14828275SEric Cheng 
14838275SEric Cheng 	err = mac_flow_walk_nolock(mcip->mci_subflow_tab,
14848275SEric Cheng 	    mac_link_flow_walk_cb, &state);
14858275SEric Cheng 
14868275SEric Cheng 	dls_devnet_rele_link(dlh, dlp);
14878275SEric Cheng 	mac_perim_exit(mph);
14888275SEric Cheng 	return (err);
14898275SEric Cheng }
14908275SEric Cheng 
14918275SEric Cheng /*
14928275SEric Cheng  * mac_link_flow_info()
14938275SEric Cheng  * Retrieves information about a specific flow.
14948275SEric Cheng  */
14958275SEric Cheng int
14968275SEric Cheng mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo)
14978275SEric Cheng {
14988275SEric Cheng 	flow_entry_t	*flent;
14998275SEric Cheng 	int		err;
15008275SEric Cheng 
15018275SEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
15028275SEric Cheng 	if (err != 0)
15038275SEric Cheng 		return (err);
15048275SEric Cheng 
15058275SEric Cheng 	mac_link_flowinfo_copy(finfo, flent);
15068275SEric Cheng 	FLOW_USER_REFRELE(flent);
15078275SEric Cheng 	return (0);
15088275SEric Cheng }
15098275SEric Cheng 
151010616SSebastien.Roy@Sun.COM /*
151110616SSebastien.Roy@Sun.COM  * Hash function macro that takes an Ethernet address and VLAN id as input.
151210616SSebastien.Roy@Sun.COM  */
151310616SSebastien.Roy@Sun.COM #define	HASH_ETHER_VID(a, v, s)	\
15148275SEric Cheng 	((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s))
15158275SEric Cheng 
151610616SSebastien.Roy@Sun.COM /*
151710616SSebastien.Roy@Sun.COM  * Generic layer-2 address hashing function that takes an address and address
151810616SSebastien.Roy@Sun.COM  * length as input.  This is the DJB hash function.
151910616SSebastien.Roy@Sun.COM  */
152010616SSebastien.Roy@Sun.COM static uint32_t
152110616SSebastien.Roy@Sun.COM flow_l2_addrhash(uint8_t *addr, size_t addrlen, size_t htsize)
152210616SSebastien.Roy@Sun.COM {
152310616SSebastien.Roy@Sun.COM 	uint32_t	hash = 5381;
152410616SSebastien.Roy@Sun.COM 	size_t		i;
152510616SSebastien.Roy@Sun.COM 
152610616SSebastien.Roy@Sun.COM 	for (i = 0; i < addrlen; i++)
152710616SSebastien.Roy@Sun.COM 		hash = ((hash << 5) + hash) + addr[i];
152810616SSebastien.Roy@Sun.COM 	return (hash % htsize);
152910616SSebastien.Roy@Sun.COM }
153010616SSebastien.Roy@Sun.COM 
15318275SEric Cheng #define	PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end))
15328275SEric Cheng 
15338833SVenu.Iyer@Sun.COM #define	CHECK_AND_ADJUST_START_PTR(s, start) {		\
15348833SVenu.Iyer@Sun.COM 	if ((s)->fs_mp->b_wptr == (start)) {		\
15358833SVenu.Iyer@Sun.COM 		mblk_t	*next = (s)->fs_mp->b_cont;	\
15368833SVenu.Iyer@Sun.COM 		if (next == NULL)			\
15378833SVenu.Iyer@Sun.COM 			return (EINVAL);		\
15388833SVenu.Iyer@Sun.COM 							\
15398833SVenu.Iyer@Sun.COM 		(s)->fs_mp = next;			\
15408833SVenu.Iyer@Sun.COM 		(start) = next->b_rptr;			\
15418833SVenu.Iyer@Sun.COM 	}						\
15428833SVenu.Iyer@Sun.COM }
15438833SVenu.Iyer@Sun.COM 
15448275SEric Cheng /* ARGSUSED */
15458275SEric Cheng static boolean_t
15468275SEric Cheng flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
15478275SEric Cheng {
15488275SEric Cheng 	flow_l2info_t		*l2 = &s->fs_l2info;
15498275SEric Cheng 	flow_desc_t		*fd = &flent->fe_flow_desc;
15508275SEric Cheng 
15518275SEric Cheng 	return (l2->l2_vid == fd->fd_vid &&
15528275SEric Cheng 	    bcmp(l2->l2_daddr, fd->fd_dst_mac, fd->fd_mac_len) == 0);
15538275SEric Cheng }
15548275SEric Cheng 
15558275SEric Cheng /*
15568275SEric Cheng  * Layer 2 hash function.
15578275SEric Cheng  * Must be paired with flow_l2_accept() within a set of flow_ops
15588275SEric Cheng  * because it assumes the dest address is already extracted.
15598275SEric Cheng  */
15608275SEric Cheng static uint32_t
15618275SEric Cheng flow_l2_hash(flow_tab_t *ft, flow_state_t *s)
15628275SEric Cheng {
156310616SSebastien.Roy@Sun.COM 	return (flow_l2_addrhash(s->fs_l2info.l2_daddr,
156410616SSebastien.Roy@Sun.COM 	    ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
15658275SEric Cheng }
15668275SEric Cheng 
15678275SEric Cheng /*
15688275SEric Cheng  * This is the generic layer 2 accept function.
15698275SEric Cheng  * It makes use of mac_header_info() to extract the header length,
15708275SEric Cheng  * sap, vlan ID and destination address.
15718275SEric Cheng  */
15728275SEric Cheng static int
15738275SEric Cheng flow_l2_accept(flow_tab_t *ft, flow_state_t *s)
15748275SEric Cheng {
15758275SEric Cheng 	boolean_t		is_ether;
15768275SEric Cheng 	flow_l2info_t		*l2 = &s->fs_l2info;
15778275SEric Cheng 	mac_header_info_t	mhi;
15788275SEric Cheng 	int			err;
15798275SEric Cheng 
15808275SEric Cheng 	is_ether = (ft->ft_mip->mi_info.mi_nativemedia == DL_ETHER);
15818275SEric Cheng 	if ((err = mac_header_info((mac_handle_t)ft->ft_mip,
15828275SEric Cheng 	    s->fs_mp, &mhi)) != 0) {
15838275SEric Cheng 		if (err == EINVAL)
15848275SEric Cheng 			err = ENOBUFS;
15858275SEric Cheng 
15868275SEric Cheng 		return (err);
15878275SEric Cheng 	}
15888275SEric Cheng 
15898275SEric Cheng 	l2->l2_start = s->fs_mp->b_rptr;
15908275SEric Cheng 	l2->l2_daddr = (uint8_t *)mhi.mhi_daddr;
15918275SEric Cheng 
15928275SEric Cheng 	if (is_ether && mhi.mhi_bindsap == ETHERTYPE_VLAN &&
15938275SEric Cheng 	    ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
15948275SEric Cheng 		struct ether_vlan_header	*evhp =
15958275SEric Cheng 		    (struct ether_vlan_header *)l2->l2_start;
15968275SEric Cheng 
15978275SEric Cheng 		if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
15988275SEric Cheng 			return (ENOBUFS);
15998275SEric Cheng 
16008275SEric Cheng 		l2->l2_sap = ntohs(evhp->ether_type);
16018275SEric Cheng 		l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
16028275SEric Cheng 		l2->l2_hdrsize = sizeof (*evhp);
16038275SEric Cheng 	} else {
16048275SEric Cheng 		l2->l2_sap = mhi.mhi_bindsap;
16058275SEric Cheng 		l2->l2_vid = 0;
16068275SEric Cheng 		l2->l2_hdrsize = (uint32_t)mhi.mhi_hdrsize;
16078275SEric Cheng 	}
16088275SEric Cheng 	return (0);
16098275SEric Cheng }
16108275SEric Cheng 
16118275SEric Cheng /*
16128275SEric Cheng  * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/
16138275SEric Cheng  * accept(). The notable difference is that dest address is now extracted
16148275SEric Cheng  * by hash() rather than by accept(). This saves a few memory references
16158275SEric Cheng  * for flow tables that do not care about mac addresses.
16168275SEric Cheng  */
16178275SEric Cheng static uint32_t
16188275SEric Cheng flow_ether_hash(flow_tab_t *ft, flow_state_t *s)
16198275SEric Cheng {
16208275SEric Cheng 	flow_l2info_t			*l2 = &s->fs_l2info;
16218275SEric Cheng 	struct ether_vlan_header	*evhp;
16228275SEric Cheng 
16238275SEric Cheng 	evhp = (struct ether_vlan_header *)l2->l2_start;
16248275SEric Cheng 	l2->l2_daddr = evhp->ether_dhost.ether_addr_octet;
162510616SSebastien.Roy@Sun.COM 	return (HASH_ETHER_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size));
162610616SSebastien.Roy@Sun.COM }
162710616SSebastien.Roy@Sun.COM 
162810616SSebastien.Roy@Sun.COM static uint32_t
162910616SSebastien.Roy@Sun.COM flow_ether_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
163010616SSebastien.Roy@Sun.COM {
163110616SSebastien.Roy@Sun.COM 	flow_desc_t	*fd = &flent->fe_flow_desc;
163210616SSebastien.Roy@Sun.COM 
163310616SSebastien.Roy@Sun.COM 	ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0);
163410616SSebastien.Roy@Sun.COM 	return (HASH_ETHER_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size));
16358275SEric Cheng }
16368275SEric Cheng 
16378275SEric Cheng /* ARGSUSED */
16388275SEric Cheng static int
16398275SEric Cheng flow_ether_accept(flow_tab_t *ft, flow_state_t *s)
16408275SEric Cheng {
16418275SEric Cheng 	flow_l2info_t			*l2 = &s->fs_l2info;
16428275SEric Cheng 	struct ether_vlan_header	*evhp;
16438275SEric Cheng 	uint16_t			sap;
16448275SEric Cheng 
16458275SEric Cheng 	evhp = (struct ether_vlan_header *)s->fs_mp->b_rptr;
16468275SEric Cheng 	l2->l2_start = (uchar_t *)evhp;
16478275SEric Cheng 
16488275SEric Cheng 	if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (struct ether_header)))
16498275SEric Cheng 		return (ENOBUFS);
16508275SEric Cheng 
16518275SEric Cheng 	if ((sap = ntohs(evhp->ether_tpid)) == ETHERTYPE_VLAN &&
16528275SEric Cheng 	    ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
16538275SEric Cheng 		if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
16548275SEric Cheng 			return (ENOBUFS);
16558275SEric Cheng 
16568275SEric Cheng 		l2->l2_sap = ntohs(evhp->ether_type);
16578275SEric Cheng 		l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
16588275SEric Cheng 		l2->l2_hdrsize = sizeof (struct ether_vlan_header);
16598275SEric Cheng 	} else {
16608275SEric Cheng 		l2->l2_sap = sap;
16618275SEric Cheng 		l2->l2_vid = 0;
16628275SEric Cheng 		l2->l2_hdrsize = sizeof (struct ether_header);
16638275SEric Cheng 	}
16648275SEric Cheng 	return (0);
16658275SEric Cheng }
16668275SEric Cheng 
16678275SEric Cheng /*
16688275SEric Cheng  * Validates a layer 2 flow entry.
16698275SEric Cheng  */
16708275SEric Cheng static int
16718275SEric Cheng flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
16728275SEric Cheng {
16738275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
16748275SEric Cheng 
16758275SEric Cheng 	/*
167610616SSebastien.Roy@Sun.COM 	 * Dest address is mandatory, and 0 length addresses are not yet
167710616SSebastien.Roy@Sun.COM 	 * supported.
16788275SEric Cheng 	 */
167910616SSebastien.Roy@Sun.COM 	if ((fd->fd_mask & FLOW_LINK_DST) == 0 || fd->fd_mac_len == 0)
16808275SEric Cheng 		return (EINVAL);
16818275SEric Cheng 
16828275SEric Cheng 	if ((fd->fd_mask & FLOW_LINK_VID) != 0) {
16838275SEric Cheng 		/*
16848275SEric Cheng 		 * VLAN flows are only supported over ethernet macs.
16858275SEric Cheng 		 */
16868275SEric Cheng 		if (ft->ft_mip->mi_info.mi_nativemedia != DL_ETHER)
16878275SEric Cheng 			return (EINVAL);
16888275SEric Cheng 
16898275SEric Cheng 		if (fd->fd_vid == 0)
16908275SEric Cheng 			return (EINVAL);
16918275SEric Cheng 
16928275SEric Cheng 	}
16938275SEric Cheng 	flent->fe_match = flow_l2_match;
16948275SEric Cheng 	return (0);
16958275SEric Cheng }
16968275SEric Cheng 
16978275SEric Cheng /*
16988275SEric Cheng  * Calculates hash index of flow entry.
16998275SEric Cheng  */
17008275SEric Cheng static uint32_t
17018275SEric Cheng flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
17028275SEric Cheng {
17038275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
17048275SEric Cheng 
170510616SSebastien.Roy@Sun.COM 	ASSERT((fd->fd_mask & FLOW_LINK_VID) == 0 && fd->fd_vid == 0);
170610616SSebastien.Roy@Sun.COM 	return (flow_l2_addrhash(fd->fd_dst_mac,
170710616SSebastien.Roy@Sun.COM 	    ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
17088275SEric Cheng }
17098275SEric Cheng 
17108275SEric Cheng /*
17118275SEric Cheng  * This is used for duplicate flow checking.
17128275SEric Cheng  */
17138275SEric Cheng /* ARGSUSED */
17148275SEric Cheng static boolean_t
17158275SEric Cheng flow_l2_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
17168275SEric Cheng {
17178275SEric Cheng 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
17188275SEric Cheng 
17198275SEric Cheng 	ASSERT(fd1->fd_mac_len == fd2->fd_mac_len && fd1->fd_mac_len != 0);
17208275SEric Cheng 	return (bcmp(&fd1->fd_dst_mac, &fd2->fd_dst_mac,
17218275SEric Cheng 	    fd1->fd_mac_len) == 0 && fd1->fd_vid == fd2->fd_vid);
17228275SEric Cheng }
17238275SEric Cheng 
17248275SEric Cheng /*
17258275SEric Cheng  * Generic flow entry insertion function.
17268275SEric Cheng  * Used by flow tables that do not have ordering requirements.
17278275SEric Cheng  */
17288275SEric Cheng /* ARGSUSED */
17298275SEric Cheng static int
17308275SEric Cheng flow_generic_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
17318275SEric Cheng     flow_entry_t *flent)
17328275SEric Cheng {
17338275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
17348275SEric Cheng 
17358275SEric Cheng 	if (*headp != NULL) {
17368275SEric Cheng 		ASSERT(flent->fe_next == NULL);
17378275SEric Cheng 		flent->fe_next = *headp;
17388275SEric Cheng 	}
17398275SEric Cheng 	*headp = flent;
17408275SEric Cheng 	return (0);
17418275SEric Cheng }
17428275SEric Cheng 
17438275SEric Cheng /*
17448275SEric Cheng  * IP version independent DSField matching function.
17458275SEric Cheng  */
17468275SEric Cheng /* ARGSUSED */
17478275SEric Cheng static boolean_t
17488275SEric Cheng flow_ip_dsfield_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
17498275SEric Cheng {
17508275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
17518275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
17528275SEric Cheng 
17538275SEric Cheng 	switch (l3info->l3_version) {
17548275SEric Cheng 	case IPV4_VERSION: {
17558275SEric Cheng 		ipha_t		*ipha = (ipha_t *)l3info->l3_start;
17568275SEric Cheng 
17578275SEric Cheng 		return ((ipha->ipha_type_of_service &
17588275SEric Cheng 		    fd->fd_dsfield_mask) == fd->fd_dsfield);
17598275SEric Cheng 	}
17608275SEric Cheng 	case IPV6_VERSION: {
17618275SEric Cheng 		ip6_t		*ip6h = (ip6_t *)l3info->l3_start;
17628275SEric Cheng 
17638275SEric Cheng 		return ((IPV6_FLOW_TCLASS(ip6h->ip6_vcf) &
17648275SEric Cheng 		    fd->fd_dsfield_mask) == fd->fd_dsfield);
17658275SEric Cheng 	}
17668275SEric Cheng 	default:
17678275SEric Cheng 		return (B_FALSE);
17688275SEric Cheng 	}
17698275SEric Cheng }
17708275SEric Cheng 
17718275SEric Cheng /*
17728275SEric Cheng  * IP v4 and v6 address matching.
17738275SEric Cheng  * The netmask only needs to be applied on the packet but not on the
17748275SEric Cheng  * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets.
17758275SEric Cheng  */
17768275SEric Cheng 
17778275SEric Cheng /* ARGSUSED */
17788275SEric Cheng static boolean_t
17798275SEric Cheng flow_ip_v4_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
17808275SEric Cheng {
17818275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
17828275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
17838275SEric Cheng 	ipha_t		*ipha = (ipha_t *)l3info->l3_start;
17848275SEric Cheng 	in_addr_t	addr;
17858275SEric Cheng 
17868275SEric Cheng 	addr = (l3info->l3_dst_or_src ? ipha->ipha_dst : ipha->ipha_src);
17878275SEric Cheng 	if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
17888275SEric Cheng 		return ((addr & V4_PART_OF_V6(fd->fd_local_netmask)) ==
17898275SEric Cheng 		    V4_PART_OF_V6(fd->fd_local_addr));
17908275SEric Cheng 	}
17918275SEric Cheng 	return ((addr & V4_PART_OF_V6(fd->fd_remote_netmask)) ==
17928275SEric Cheng 	    V4_PART_OF_V6(fd->fd_remote_addr));
17938275SEric Cheng }
17948275SEric Cheng 
17958275SEric Cheng /* ARGSUSED */
17968275SEric Cheng static boolean_t
17978275SEric Cheng flow_ip_v6_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
17988275SEric Cheng {
17998275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
18008275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
18018275SEric Cheng 	ip6_t		*ip6h = (ip6_t *)l3info->l3_start;
18028275SEric Cheng 	in6_addr_t	*addrp;
18038275SEric Cheng 
18048275SEric Cheng 	addrp = (l3info->l3_dst_or_src ? &ip6h->ip6_dst : &ip6h->ip6_src);
18058275SEric Cheng 	if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
18068275SEric Cheng 		return (V6_MASK_EQ(*addrp, fd->fd_local_netmask,
18078275SEric Cheng 		    fd->fd_local_addr));
18088275SEric Cheng 	}
18098275SEric Cheng 	return (V6_MASK_EQ(*addrp, fd->fd_remote_netmask, fd->fd_remote_addr));
18108275SEric Cheng }
18118275SEric Cheng 
18128275SEric Cheng /* ARGSUSED */
18138275SEric Cheng static boolean_t
18148275SEric Cheng flow_ip_proto_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
18158275SEric Cheng {
18168275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
18178275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
18188275SEric Cheng 
18198275SEric Cheng 	return (l3info->l3_protocol == fd->fd_protocol);
18208275SEric Cheng }
18218275SEric Cheng 
18228275SEric Cheng static uint32_t
18238275SEric Cheng flow_ip_hash(flow_tab_t *ft, flow_state_t *s)
18248275SEric Cheng {
18258275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
18268275SEric Cheng 	flow_mask_t	mask = ft->ft_mask;
18278275SEric Cheng 
18288275SEric Cheng 	if ((mask & FLOW_IP_LOCAL) != 0) {
18298275SEric Cheng 		l3info->l3_dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
18308275SEric Cheng 	} else if ((mask & FLOW_IP_REMOTE) != 0) {
18318275SEric Cheng 		l3info->l3_dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
18328275SEric Cheng 	} else if ((mask & FLOW_IP_DSFIELD) != 0) {
18338275SEric Cheng 		/*
18348275SEric Cheng 		 * DSField flents are arranged as a single list.
18358275SEric Cheng 		 */
18368275SEric Cheng 		return (0);
18378275SEric Cheng 	}
18388275SEric Cheng 	/*
18398275SEric Cheng 	 * IP addr flents are hashed into two lists, v4 or v6.
18408275SEric Cheng 	 */
18418275SEric Cheng 	ASSERT(ft->ft_size >= 2);
18428275SEric Cheng 	return ((l3info->l3_version == IPV4_VERSION) ? 0 : 1);
18438275SEric Cheng }
18448275SEric Cheng 
18458275SEric Cheng static uint32_t
18468275SEric Cheng flow_ip_proto_hash(flow_tab_t *ft, flow_state_t *s)
18478275SEric Cheng {
18488275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
18498275SEric Cheng 
18508275SEric Cheng 	return (l3info->l3_protocol % ft->ft_size);
18518275SEric Cheng }
18528275SEric Cheng 
18538275SEric Cheng /* ARGSUSED */
18548275SEric Cheng static int
18558275SEric Cheng flow_ip_accept(flow_tab_t *ft, flow_state_t *s)
18568275SEric Cheng {
18578275SEric Cheng 	flow_l2info_t	*l2info = &s->fs_l2info;
18588275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
18598275SEric Cheng 	uint16_t	sap = l2info->l2_sap;
18608275SEric Cheng 	uchar_t		*l3_start;
18618275SEric Cheng 
18628833SVenu.Iyer@Sun.COM 	l3_start = l2info->l2_start + l2info->l2_hdrsize;
18638833SVenu.Iyer@Sun.COM 
18648833SVenu.Iyer@Sun.COM 	/*
18658833SVenu.Iyer@Sun.COM 	 * Adjust start pointer if we're at the end of an mblk.
18668833SVenu.Iyer@Sun.COM 	 */
18678833SVenu.Iyer@Sun.COM 	CHECK_AND_ADJUST_START_PTR(s, l3_start);
18688833SVenu.Iyer@Sun.COM 
18698833SVenu.Iyer@Sun.COM 	l3info->l3_start = l3_start;
18708275SEric Cheng 	if (!OK_32PTR(l3_start))
18718275SEric Cheng 		return (EINVAL);
18728275SEric Cheng 
18738275SEric Cheng 	switch (sap) {
18748275SEric Cheng 	case ETHERTYPE_IP: {
18758275SEric Cheng 		ipha_t	*ipha = (ipha_t *)l3_start;
18768275SEric Cheng 
18778275SEric Cheng 		if (PKT_TOO_SMALL(s, l3_start + IP_SIMPLE_HDR_LENGTH))
18788275SEric Cheng 			return (ENOBUFS);
18798275SEric Cheng 
18808275SEric Cheng 		l3info->l3_hdrsize = IPH_HDR_LENGTH(ipha);
18818275SEric Cheng 		l3info->l3_protocol = ipha->ipha_protocol;
18828275SEric Cheng 		l3info->l3_version = IPV4_VERSION;
18838275SEric Cheng 		l3info->l3_fragmented =
18848275SEric Cheng 		    IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags);
18858275SEric Cheng 		break;
18868275SEric Cheng 	}
18878275SEric Cheng 	case ETHERTYPE_IPV6: {
18888275SEric Cheng 		ip6_t   *ip6h = (ip6_t *)l3_start;
18898275SEric Cheng 		uint16_t ip6_hdrlen;
18908275SEric Cheng 		uint8_t	 nexthdr;
18918275SEric Cheng 
18928275SEric Cheng 		if (!mac_ip_hdr_length_v6(s->fs_mp, ip6h, &ip6_hdrlen,
1893*11528SBaban.Kenkre@Sun.COM 		    &nexthdr, NULL, NULL)) {
18948275SEric Cheng 			return (ENOBUFS);
18958275SEric Cheng 		}
18968275SEric Cheng 		l3info->l3_hdrsize = ip6_hdrlen;
18978275SEric Cheng 		l3info->l3_protocol = nexthdr;
18988275SEric Cheng 		l3info->l3_version = IPV6_VERSION;
18998275SEric Cheng 		l3info->l3_fragmented = B_FALSE;
19008275SEric Cheng 		break;
19018275SEric Cheng 	}
19028275SEric Cheng 	default:
19038275SEric Cheng 		return (EINVAL);
19048275SEric Cheng 	}
19058275SEric Cheng 	return (0);
19068275SEric Cheng }
19078275SEric Cheng 
19088275SEric Cheng /* ARGSUSED */
19098275SEric Cheng static int
19108275SEric Cheng flow_ip_proto_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
19118275SEric Cheng {
19128275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
19138275SEric Cheng 
19148275SEric Cheng 	switch (fd->fd_protocol) {
19158275SEric Cheng 	case IPPROTO_TCP:
19168275SEric Cheng 	case IPPROTO_UDP:
19178275SEric Cheng 	case IPPROTO_SCTP:
19188275SEric Cheng 	case IPPROTO_ICMP:
19198275SEric Cheng 	case IPPROTO_ICMPV6:
19208275SEric Cheng 		flent->fe_match = flow_ip_proto_match;
19218275SEric Cheng 		return (0);
19228275SEric Cheng 	default:
19238275SEric Cheng 		return (EINVAL);
19248275SEric Cheng 	}
19258275SEric Cheng }
19268275SEric Cheng 
19278275SEric Cheng /* ARGSUSED */
19288275SEric Cheng static int
19298275SEric Cheng flow_ip_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
19308275SEric Cheng {
19318275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
19328275SEric Cheng 	flow_mask_t	mask;
19338275SEric Cheng 	uint8_t		version;
19348275SEric Cheng 	in6_addr_t	*addr, *netmask;
19358275SEric Cheng 
19368275SEric Cheng 	/*
19378275SEric Cheng 	 * DSField does not require a IP version.
19388275SEric Cheng 	 */
19398275SEric Cheng 	if (fd->fd_mask == FLOW_IP_DSFIELD) {
19408275SEric Cheng 		if (fd->fd_dsfield_mask == 0)
19418275SEric Cheng 			return (EINVAL);
19428275SEric Cheng 
19438275SEric Cheng 		flent->fe_match = flow_ip_dsfield_match;
19448275SEric Cheng 		return (0);
19458275SEric Cheng 	}
19468275SEric Cheng 
19478275SEric Cheng 	/*
19488275SEric Cheng 	 * IP addresses must come with a version to avoid ambiguity.
19498275SEric Cheng 	 */
19508275SEric Cheng 	if ((fd->fd_mask & FLOW_IP_VERSION) == 0)
19518275SEric Cheng 		return (EINVAL);
19528275SEric Cheng 
19538275SEric Cheng 	version = fd->fd_ipversion;
19548275SEric Cheng 	if (version != IPV4_VERSION && version != IPV6_VERSION)
19558275SEric Cheng 		return (EINVAL);
19568275SEric Cheng 
19578275SEric Cheng 	mask = fd->fd_mask & ~FLOW_IP_VERSION;
19588275SEric Cheng 	switch (mask) {
19598275SEric Cheng 	case FLOW_IP_LOCAL:
19608275SEric Cheng 		addr = &fd->fd_local_addr;
19618275SEric Cheng 		netmask = &fd->fd_local_netmask;
19628275SEric Cheng 		break;
19638275SEric Cheng 	case FLOW_IP_REMOTE:
19648275SEric Cheng 		addr = &fd->fd_remote_addr;
19658275SEric Cheng 		netmask = &fd->fd_remote_netmask;
19668275SEric Cheng 		break;
19678275SEric Cheng 	default:
19688275SEric Cheng 		return (EINVAL);
19698275SEric Cheng 	}
19708275SEric Cheng 
19718275SEric Cheng 	/*
19728275SEric Cheng 	 * Apply netmask onto specified address.
19738275SEric Cheng 	 */
19748275SEric Cheng 	V6_MASK_COPY(*addr, *netmask, *addr);
19758275SEric Cheng 	if (version == IPV4_VERSION) {
19768275SEric Cheng 		ipaddr_t	v4addr = V4_PART_OF_V6((*addr));
19778275SEric Cheng 		ipaddr_t	v4mask = V4_PART_OF_V6((*netmask));
19788275SEric Cheng 
19798275SEric Cheng 		if (v4addr == 0 || v4mask == 0)
19808275SEric Cheng 			return (EINVAL);
19818275SEric Cheng 		flent->fe_match = flow_ip_v4_match;
19828275SEric Cheng 	} else {
19838275SEric Cheng 		if (IN6_IS_ADDR_UNSPECIFIED(addr) ||
19848275SEric Cheng 		    IN6_IS_ADDR_UNSPECIFIED(netmask))
19858275SEric Cheng 			return (EINVAL);
19868275SEric Cheng 		flent->fe_match = flow_ip_v6_match;
19878275SEric Cheng 	}
19888275SEric Cheng 	return (0);
19898275SEric Cheng }
19908275SEric Cheng 
19918275SEric Cheng static uint32_t
19928275SEric Cheng flow_ip_proto_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
19938275SEric Cheng {
19948275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
19958275SEric Cheng 
19968275SEric Cheng 	return (fd->fd_protocol % ft->ft_size);
19978275SEric Cheng }
19988275SEric Cheng 
19998275SEric Cheng static uint32_t
20008275SEric Cheng flow_ip_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
20018275SEric Cheng {
20028275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
20038275SEric Cheng 
20048275SEric Cheng 	/*
20058275SEric Cheng 	 * DSField flents are arranged as a single list.
20068275SEric Cheng 	 */
20078275SEric Cheng 	if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
20088275SEric Cheng 		return (0);
20098275SEric Cheng 
20108275SEric Cheng 	/*
20118275SEric Cheng 	 * IP addr flents are hashed into two lists, v4 or v6.
20128275SEric Cheng 	 */
20138275SEric Cheng 	ASSERT(ft->ft_size >= 2);
20148275SEric Cheng 	return ((fd->fd_ipversion == IPV4_VERSION) ? 0 : 1);
20158275SEric Cheng }
20168275SEric Cheng 
20178275SEric Cheng /* ARGSUSED */
20188275SEric Cheng static boolean_t
20198275SEric Cheng flow_ip_proto_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
20208275SEric Cheng {
20218275SEric Cheng 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
20228275SEric Cheng 
20238275SEric Cheng 	return (fd1->fd_protocol == fd2->fd_protocol);
20248275SEric Cheng }
20258275SEric Cheng 
20268275SEric Cheng /* ARGSUSED */
20278275SEric Cheng static boolean_t
20288275SEric Cheng flow_ip_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
20298275SEric Cheng {
20308275SEric Cheng 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
20318275SEric Cheng 	in6_addr_t	*a1, *m1, *a2, *m2;
20328275SEric Cheng 
20338275SEric Cheng 	ASSERT(fd1->fd_mask == fd2->fd_mask);
20348275SEric Cheng 	if (fd1->fd_mask == FLOW_IP_DSFIELD) {
20358275SEric Cheng 		return (fd1->fd_dsfield == fd2->fd_dsfield &&
20368275SEric Cheng 		    fd1->fd_dsfield_mask == fd2->fd_dsfield_mask);
20378275SEric Cheng 	}
20388275SEric Cheng 
20398275SEric Cheng 	/*
20408275SEric Cheng 	 * flow_ip_accept_fe() already validated the version.
20418275SEric Cheng 	 */
20428275SEric Cheng 	ASSERT((fd1->fd_mask & FLOW_IP_VERSION) != 0);
20438275SEric Cheng 	if (fd1->fd_ipversion != fd2->fd_ipversion)
20448275SEric Cheng 		return (B_FALSE);
20458275SEric Cheng 
20468275SEric Cheng 	switch (fd1->fd_mask & ~FLOW_IP_VERSION) {
20478275SEric Cheng 	case FLOW_IP_LOCAL:
20488275SEric Cheng 		a1 = &fd1->fd_local_addr;
20498275SEric Cheng 		m1 = &fd1->fd_local_netmask;
20508275SEric Cheng 		a2 = &fd2->fd_local_addr;
20518275SEric Cheng 		m2 = &fd2->fd_local_netmask;
20528275SEric Cheng 		break;
20538275SEric Cheng 	case FLOW_IP_REMOTE:
20548275SEric Cheng 		a1 = &fd1->fd_remote_addr;
20558275SEric Cheng 		m1 = &fd1->fd_remote_netmask;
20568275SEric Cheng 		a2 = &fd2->fd_remote_addr;
20578275SEric Cheng 		m2 = &fd2->fd_remote_netmask;
20588275SEric Cheng 		break;
20598275SEric Cheng 	default:
20608275SEric Cheng 		/*
20618275SEric Cheng 		 * This is unreachable given the checks in
20628275SEric Cheng 		 * flow_ip_accept_fe().
20638275SEric Cheng 		 */
20648275SEric Cheng 		return (B_FALSE);
20658275SEric Cheng 	}
20668275SEric Cheng 
20678275SEric Cheng 	if (fd1->fd_ipversion == IPV4_VERSION) {
20688275SEric Cheng 		return (V4_PART_OF_V6((*a1)) == V4_PART_OF_V6((*a2)) &&
20698275SEric Cheng 		    V4_PART_OF_V6((*m1)) == V4_PART_OF_V6((*m2)));
20708275SEric Cheng 
20718275SEric Cheng 	} else {
20728275SEric Cheng 		return (IN6_ARE_ADDR_EQUAL(a1, a2) &&
20738275SEric Cheng 		    IN6_ARE_ADDR_EQUAL(m1, m2));
20748275SEric Cheng 	}
20758275SEric Cheng }
20768275SEric Cheng 
20778275SEric Cheng static int
20788275SEric Cheng flow_ip_mask2plen(in6_addr_t *v6mask)
20798275SEric Cheng {
20808275SEric Cheng 	int		bits;
20818275SEric Cheng 	int		plen = IPV6_ABITS;
20828275SEric Cheng 	int		i;
20838275SEric Cheng 
20848275SEric Cheng 	for (i = 3; i >= 0; i--) {
20858275SEric Cheng 		if (v6mask->s6_addr32[i] == 0) {
20868275SEric Cheng 			plen -= 32;
20878275SEric Cheng 			continue;
20888275SEric Cheng 		}
20898275SEric Cheng 		bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
20908275SEric Cheng 		if (bits == 0)
20918275SEric Cheng 			break;
20928275SEric Cheng 		plen -= bits;
20938275SEric Cheng 	}
20948275SEric Cheng 	return (plen);
20958275SEric Cheng }
20968275SEric Cheng 
20978275SEric Cheng /* ARGSUSED */
20988275SEric Cheng static int
20998275SEric Cheng flow_ip_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
21008275SEric Cheng     flow_entry_t *flent)
21018275SEric Cheng {
21028275SEric Cheng 	flow_entry_t	**p = headp;
21038275SEric Cheng 	flow_desc_t	*fd0, *fd;
21048275SEric Cheng 	in6_addr_t	*m0, *m;
21058275SEric Cheng 	int		plen0, plen;
21068275SEric Cheng 
21078275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
21088275SEric Cheng 
21098275SEric Cheng 	/*
21108275SEric Cheng 	 * No special ordering needed for dsfield.
21118275SEric Cheng 	 */
21128275SEric Cheng 	fd0 = &flent->fe_flow_desc;
21138275SEric Cheng 	if ((fd0->fd_mask & FLOW_IP_DSFIELD) != 0) {
21148275SEric Cheng 		if (*p != NULL) {
21158275SEric Cheng 			ASSERT(flent->fe_next == NULL);
21168275SEric Cheng 			flent->fe_next = *p;
21178275SEric Cheng 		}
21188275SEric Cheng 		*p = flent;
21198275SEric Cheng 		return (0);
21208275SEric Cheng 	}
21218275SEric Cheng 
21228275SEric Cheng 	/*
21238275SEric Cheng 	 * IP address flows are arranged in descending prefix length order.
21248275SEric Cheng 	 */
21258275SEric Cheng 	m0 = ((fd0->fd_mask & FLOW_IP_LOCAL) != 0) ?
21268275SEric Cheng 	    &fd0->fd_local_netmask : &fd0->fd_remote_netmask;
21278275SEric Cheng 	plen0 = flow_ip_mask2plen(m0);
21288275SEric Cheng 	ASSERT(plen0 != 0);
21298275SEric Cheng 
21308275SEric Cheng 	for (; *p != NULL; p = &(*p)->fe_next) {
21318275SEric Cheng 		fd = &(*p)->fe_flow_desc;
21328275SEric Cheng 
21338275SEric Cheng 		/*
21348275SEric Cheng 		 * Normally a dsfield flent shouldn't end up on the same
21358275SEric Cheng 		 * list as an IP address because flow tables are (for now)
21368275SEric Cheng 		 * disjoint. If we decide to support both IP and dsfield
21378275SEric Cheng 		 * in the same table in the future, this check will allow
21388275SEric Cheng 		 * for that.
21398275SEric Cheng 		 */
21408275SEric Cheng 		if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
21418275SEric Cheng 			continue;
21428275SEric Cheng 
21438275SEric Cheng 		/*
21448275SEric Cheng 		 * We also allow for the mixing of local and remote address
21458275SEric Cheng 		 * flents within one list.
21468275SEric Cheng 		 */
21478275SEric Cheng 		m = ((fd->fd_mask & FLOW_IP_LOCAL) != 0) ?
21488275SEric Cheng 		    &fd->fd_local_netmask : &fd->fd_remote_netmask;
21498275SEric Cheng 		plen = flow_ip_mask2plen(m);
21508275SEric Cheng 
21518275SEric Cheng 		if (plen <= plen0)
21528275SEric Cheng 			break;
21538275SEric Cheng 	}
21548275SEric Cheng 	if (*p != NULL) {
21558275SEric Cheng 		ASSERT(flent->fe_next == NULL);
21568275SEric Cheng 		flent->fe_next = *p;
21578275SEric Cheng 	}
21588275SEric Cheng 	*p = flent;
21598275SEric Cheng 	return (0);
21608275SEric Cheng }
21618275SEric Cheng 
21628275SEric Cheng /*
21638275SEric Cheng  * Transport layer protocol and port matching functions.
21648275SEric Cheng  */
21658275SEric Cheng 
21668275SEric Cheng /* ARGSUSED */
21678275SEric Cheng static boolean_t
21688275SEric Cheng flow_transport_lport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
21698275SEric Cheng {
21708275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
21718275SEric Cheng 	flow_l4info_t	*l4info = &s->fs_l4info;
21728275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
21738275SEric Cheng 
21748275SEric Cheng 	return (fd->fd_protocol == l3info->l3_protocol &&
21758275SEric Cheng 	    fd->fd_local_port == l4info->l4_hash_port);
21768275SEric Cheng }
21778275SEric Cheng 
21788275SEric Cheng /* ARGSUSED */
21798275SEric Cheng static boolean_t
21808275SEric Cheng flow_transport_rport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
21818275SEric Cheng {
21828275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
21838275SEric Cheng 	flow_l4info_t	*l4info = &s->fs_l4info;
21848275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
21858275SEric Cheng 
21868275SEric Cheng 	return (fd->fd_protocol == l3info->l3_protocol &&
21878275SEric Cheng 	    fd->fd_remote_port == l4info->l4_hash_port);
21888275SEric Cheng }
21898275SEric Cheng 
21908275SEric Cheng /*
21918275SEric Cheng  * Transport hash function.
21928275SEric Cheng  * Since we only support either local or remote port flows,
21938275SEric Cheng  * we only need to extract one of the ports to be used for
21948275SEric Cheng  * matching.
21958275SEric Cheng  */
21968275SEric Cheng static uint32_t
21978275SEric Cheng flow_transport_hash(flow_tab_t *ft, flow_state_t *s)
21988275SEric Cheng {
21998275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
22008275SEric Cheng 	flow_l4info_t	*l4info = &s->fs_l4info;
22018275SEric Cheng 	uint8_t		proto = l3info->l3_protocol;
22028275SEric Cheng 	boolean_t	dst_or_src;
22038275SEric Cheng 
22048275SEric Cheng 	if ((ft->ft_mask & FLOW_ULP_PORT_LOCAL) != 0) {
22058275SEric Cheng 		dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
22068275SEric Cheng 	} else {
22078275SEric Cheng 		dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
22088275SEric Cheng 	}
22098275SEric Cheng 
22108275SEric Cheng 	l4info->l4_hash_port = dst_or_src ? l4info->l4_dst_port :
22118275SEric Cheng 	    l4info->l4_src_port;
22128275SEric Cheng 
22138275SEric Cheng 	return ((l4info->l4_hash_port ^ (proto << 4)) % ft->ft_size);
22148275SEric Cheng }
22158275SEric Cheng 
22168275SEric Cheng /*
22178275SEric Cheng  * Unlike other accept() functions above, we do not need to get the header
22188275SEric Cheng  * size because this is our highest layer so far. If we want to do support
22198275SEric Cheng  * other higher layer protocols, we would need to save the l4_hdrsize
22208275SEric Cheng  * in the code below.
22218275SEric Cheng  */
22228275SEric Cheng 
22238275SEric Cheng /* ARGSUSED */
22248275SEric Cheng static int
22258275SEric Cheng flow_transport_accept(flow_tab_t *ft, flow_state_t *s)
22268275SEric Cheng {
22278275SEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
22288275SEric Cheng 	flow_l4info_t	*l4info = &s->fs_l4info;
22298275SEric Cheng 	uint8_t		proto = l3info->l3_protocol;
22308275SEric Cheng 	uchar_t		*l4_start;
22318275SEric Cheng 
22328833SVenu.Iyer@Sun.COM 	l4_start = l3info->l3_start + l3info->l3_hdrsize;
22338833SVenu.Iyer@Sun.COM 
22348833SVenu.Iyer@Sun.COM 	/*
22358833SVenu.Iyer@Sun.COM 	 * Adjust start pointer if we're at the end of an mblk.
22368833SVenu.Iyer@Sun.COM 	 */
22378833SVenu.Iyer@Sun.COM 	CHECK_AND_ADJUST_START_PTR(s, l4_start);
22388833SVenu.Iyer@Sun.COM 
22398833SVenu.Iyer@Sun.COM 	l4info->l4_start = l4_start;
22408275SEric Cheng 	if (!OK_32PTR(l4_start))
22418275SEric Cheng 		return (EINVAL);
22428275SEric Cheng 
22438275SEric Cheng 	if (l3info->l3_fragmented == B_TRUE)
22448275SEric Cheng 		return (EINVAL);
22458275SEric Cheng 
22468275SEric Cheng 	switch (proto) {
22478275SEric Cheng 	case IPPROTO_TCP: {
22488275SEric Cheng 		struct tcphdr	*tcph = (struct tcphdr *)l4_start;
22498275SEric Cheng 
22508275SEric Cheng 		if (PKT_TOO_SMALL(s, l4_start + sizeof (*tcph)))
22518275SEric Cheng 			return (ENOBUFS);
22528275SEric Cheng 
22538275SEric Cheng 		l4info->l4_src_port = tcph->th_sport;
22548275SEric Cheng 		l4info->l4_dst_port = tcph->th_dport;
22558275SEric Cheng 		break;
22568275SEric Cheng 	}
22578275SEric Cheng 	case IPPROTO_UDP: {
22588275SEric Cheng 		struct udphdr	*udph = (struct udphdr *)l4_start;
22598275SEric Cheng 
22608275SEric Cheng 		if (PKT_TOO_SMALL(s, l4_start + sizeof (*udph)))
22618275SEric Cheng 			return (ENOBUFS);
22628275SEric Cheng 
22638275SEric Cheng 		l4info->l4_src_port = udph->uh_sport;
22648275SEric Cheng 		l4info->l4_dst_port = udph->uh_dport;
22658275SEric Cheng 		break;
22668275SEric Cheng 	}
22678275SEric Cheng 	case IPPROTO_SCTP: {
22688275SEric Cheng 		sctp_hdr_t	*sctph = (sctp_hdr_t *)l4_start;
22698275SEric Cheng 
22708275SEric Cheng 		if (PKT_TOO_SMALL(s, l4_start + sizeof (*sctph)))
22718275SEric Cheng 			return (ENOBUFS);
22728275SEric Cheng 
22738275SEric Cheng 		l4info->l4_src_port = sctph->sh_sport;
22748275SEric Cheng 		l4info->l4_dst_port = sctph->sh_dport;
22758275SEric Cheng 		break;
22768275SEric Cheng 	}
22778275SEric Cheng 	default:
22788275SEric Cheng 		return (EINVAL);
22798275SEric Cheng 	}
22808275SEric Cheng 
22818275SEric Cheng 	return (0);
22828275SEric Cheng }
22838275SEric Cheng 
22848275SEric Cheng /*
22858275SEric Cheng  * Validates transport flow entry.
22868275SEric Cheng  * The protocol field must be present.
22878275SEric Cheng  */
22888275SEric Cheng 
22898275SEric Cheng /* ARGSUSED */
22908275SEric Cheng static int
22918275SEric Cheng flow_transport_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
22928275SEric Cheng {
22938275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
22948275SEric Cheng 	flow_mask_t	mask = fd->fd_mask;
22958275SEric Cheng 
22968275SEric Cheng 	if ((mask & FLOW_IP_PROTOCOL) == 0)
22978275SEric Cheng 		return (EINVAL);
22988275SEric Cheng 
22998275SEric Cheng 	switch (fd->fd_protocol) {
23008275SEric Cheng 	case IPPROTO_TCP:
23018275SEric Cheng 	case IPPROTO_UDP:
23028275SEric Cheng 	case IPPROTO_SCTP:
23038275SEric Cheng 		break;
23048275SEric Cheng 	default:
23058275SEric Cheng 		return (EINVAL);
23068275SEric Cheng 	}
23078275SEric Cheng 
23088275SEric Cheng 	switch (mask & ~FLOW_IP_PROTOCOL) {
23098275SEric Cheng 	case FLOW_ULP_PORT_LOCAL:
23108275SEric Cheng 		if (fd->fd_local_port == 0)
23118275SEric Cheng 			return (EINVAL);
23128275SEric Cheng 
23138275SEric Cheng 		flent->fe_match = flow_transport_lport_match;
23148275SEric Cheng 		break;
23158275SEric Cheng 	case FLOW_ULP_PORT_REMOTE:
23168275SEric Cheng 		if (fd->fd_remote_port == 0)
23178275SEric Cheng 			return (EINVAL);
23188275SEric Cheng 
23198275SEric Cheng 		flent->fe_match = flow_transport_rport_match;
23208275SEric Cheng 		break;
23218275SEric Cheng 	case 0:
23228275SEric Cheng 		/*
23238275SEric Cheng 		 * transport-only flows conflicts with our table type.
23248275SEric Cheng 		 */
23258275SEric Cheng 		return (EOPNOTSUPP);
23268275SEric Cheng 	default:
23278275SEric Cheng 		return (EINVAL);
23288275SEric Cheng 	}
23298275SEric Cheng 
23308275SEric Cheng 	return (0);
23318275SEric Cheng }
23328275SEric Cheng 
23338275SEric Cheng static uint32_t
23348275SEric Cheng flow_transport_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
23358275SEric Cheng {
23368275SEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
23378275SEric Cheng 	uint16_t	port = 0;
23388275SEric Cheng 
23398275SEric Cheng 	port = ((fd->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) ?
23408275SEric Cheng 	    fd->fd_local_port : fd->fd_remote_port;
23418275SEric Cheng 
23428275SEric Cheng 	return ((port ^ (fd->fd_protocol << 4)) % ft->ft_size);
23438275SEric Cheng }
23448275SEric Cheng 
23458275SEric Cheng /* ARGSUSED */
23468275SEric Cheng static boolean_t
23478275SEric Cheng flow_transport_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
23488275SEric Cheng {
23498275SEric Cheng 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
23508275SEric Cheng 
23518275SEric Cheng 	if (fd1->fd_protocol != fd2->fd_protocol)
23528275SEric Cheng 		return (B_FALSE);
23538275SEric Cheng 
23548275SEric Cheng 	if ((fd1->fd_mask & FLOW_ULP_PORT_LOCAL) != 0)
23558275SEric Cheng 		return (fd1->fd_local_port == fd2->fd_local_port);
23568275SEric Cheng 
235710734SEric Cheng 	if ((fd1->fd_mask & FLOW_ULP_PORT_REMOTE) != 0)
235810734SEric Cheng 		return (fd1->fd_remote_port == fd2->fd_remote_port);
235910734SEric Cheng 
236010734SEric Cheng 	return (B_TRUE);
23618275SEric Cheng }
23628275SEric Cheng 
23638275SEric Cheng static flow_ops_t flow_l2_ops = {
23648275SEric Cheng 	flow_l2_accept_fe,
23658275SEric Cheng 	flow_l2_hash_fe,
23668275SEric Cheng 	flow_l2_match_fe,
23678275SEric Cheng 	flow_generic_insert_fe,
23688275SEric Cheng 	flow_l2_hash,
23698275SEric Cheng 	{flow_l2_accept}
23708275SEric Cheng };
23718275SEric Cheng 
23728275SEric Cheng static flow_ops_t flow_ip_ops = {
23738275SEric Cheng 	flow_ip_accept_fe,
23748275SEric Cheng 	flow_ip_hash_fe,
23758275SEric Cheng 	flow_ip_match_fe,
23768275SEric Cheng 	flow_ip_insert_fe,
23778275SEric Cheng 	flow_ip_hash,
23788275SEric Cheng 	{flow_l2_accept, flow_ip_accept}
23798275SEric Cheng };
23808275SEric Cheng 
23818275SEric Cheng static flow_ops_t flow_ip_proto_ops = {
23828275SEric Cheng 	flow_ip_proto_accept_fe,
23838275SEric Cheng 	flow_ip_proto_hash_fe,
23848275SEric Cheng 	flow_ip_proto_match_fe,
23858275SEric Cheng 	flow_generic_insert_fe,
23868275SEric Cheng 	flow_ip_proto_hash,
23878275SEric Cheng 	{flow_l2_accept, flow_ip_accept}
23888275SEric Cheng };
23898275SEric Cheng 
23908275SEric Cheng static flow_ops_t flow_transport_ops = {
23918275SEric Cheng 	flow_transport_accept_fe,
23928275SEric Cheng 	flow_transport_hash_fe,
23938275SEric Cheng 	flow_transport_match_fe,
23948275SEric Cheng 	flow_generic_insert_fe,
23958275SEric Cheng 	flow_transport_hash,
23968275SEric Cheng 	{flow_l2_accept, flow_ip_accept, flow_transport_accept}
23978275SEric Cheng };
23988275SEric Cheng 
23998275SEric Cheng static flow_tab_info_t flow_tab_info_list[] = {
24008275SEric Cheng 	{&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_LOCAL, 2},
24018275SEric Cheng 	{&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_REMOTE, 2},
24028275SEric Cheng 	{&flow_ip_ops, FLOW_IP_DSFIELD, 1},
24038275SEric Cheng 	{&flow_ip_proto_ops, FLOW_IP_PROTOCOL, 256},
240410734SEric Cheng 	{&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_LOCAL, 1024},
240510734SEric Cheng 	{&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_REMOTE, 1024}
24068275SEric Cheng };
24078275SEric Cheng 
24088275SEric Cheng #define	FLOW_MAX_TAB_INFO \
24098275SEric Cheng 	((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t))
24108275SEric Cheng 
24118275SEric Cheng static flow_tab_info_t *
24128275SEric Cheng mac_flow_tab_info_get(flow_mask_t mask)
24138275SEric Cheng {
24148275SEric Cheng 	int	i;
24158275SEric Cheng 
24168275SEric Cheng 	for (i = 0; i < FLOW_MAX_TAB_INFO; i++) {
24178275SEric Cheng 		if (mask == flow_tab_info_list[i].fti_mask)
24188275SEric Cheng 			return (&flow_tab_info_list[i]);
24198275SEric Cheng 	}
24208275SEric Cheng 	return (NULL);
24218275SEric Cheng }
2422