18275SEric Cheng /* 28275SEric Cheng * CDDL HEADER START 38275SEric Cheng * 48275SEric Cheng * The contents of this file are subject to the terms of the 58275SEric Cheng * Common Development and Distribution License (the "License"). 68275SEric Cheng * You may not use this file except in compliance with the License. 78275SEric Cheng * 88275SEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 98275SEric Cheng * or http://www.opensolaris.org/os/licensing. 108275SEric Cheng * See the License for the specific language governing permissions 118275SEric Cheng * and limitations under the License. 128275SEric Cheng * 138275SEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 148275SEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 158275SEric Cheng * If applicable, add the following below this CDDL HEADER, with the 168275SEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying 178275SEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner] 188275SEric Cheng * 198275SEric Cheng * CDDL HEADER END 208275SEric Cheng */ 218275SEric Cheng 228275SEric Cheng /* 238558SGirish.Moodalbail@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 248275SEric Cheng * Use is subject to license terms. 258275SEric Cheng */ 268275SEric Cheng 278275SEric Cheng #include <sys/strsun.h> 288275SEric Cheng #include <sys/sdt.h> 298275SEric Cheng #include <sys/mac.h> 308275SEric Cheng #include <sys/mac_impl.h> 318275SEric Cheng #include <sys/mac_client_impl.h> 328275SEric Cheng #include <sys/dls.h> 338275SEric Cheng #include <sys/dls_impl.h> 348275SEric Cheng #include <sys/mac_soft_ring.h> 358275SEric Cheng #include <sys/ethernet.h> 368275SEric Cheng #include <sys/vlan.h> 378275SEric Cheng #include <inet/ip.h> 388275SEric Cheng #include <inet/ip6.h> 398275SEric Cheng #include <netinet/tcp.h> 408275SEric Cheng #include <netinet/udp.h> 418275SEric Cheng #include <netinet/sctp.h> 428275SEric Cheng 438275SEric Cheng /* global flow table, will be a per exclusive-zone table later */ 448275SEric Cheng static mod_hash_t *flow_hash; 458275SEric Cheng static krwlock_t flow_tab_lock; 468275SEric Cheng 478275SEric Cheng static kmem_cache_t *flow_cache; 488275SEric Cheng static kmem_cache_t *flow_tab_cache; 498275SEric Cheng static flow_ops_t flow_l2_ops; 508275SEric Cheng 518275SEric Cheng typedef struct { 528275SEric Cheng const char *fs_name; 538275SEric Cheng uint_t fs_offset; 548275SEric Cheng } flow_stats_info_t; 558275SEric Cheng 568275SEric Cheng #define FS_OFF(f) (offsetof(flow_stats_t, f)) 578275SEric Cheng static flow_stats_info_t flow_stats_list[] = { 588275SEric Cheng {"rbytes", FS_OFF(fs_rbytes)}, 598275SEric Cheng {"ipackets", FS_OFF(fs_ipackets)}, 608275SEric Cheng {"ierrors", FS_OFF(fs_ierrors)}, 618275SEric Cheng {"obytes", FS_OFF(fs_obytes)}, 628275SEric Cheng {"opackets", FS_OFF(fs_opackets)}, 638275SEric Cheng {"oerrors", FS_OFF(fs_oerrors)} 648275SEric Cheng }; 658275SEric Cheng #define FS_SIZE (sizeof (flow_stats_list) / sizeof (flow_stats_info_t)) 668275SEric Cheng 678275SEric Cheng /* 688275SEric Cheng * Checks whether a flow mask is legal. 698275SEric Cheng */ 708275SEric Cheng static flow_tab_info_t *mac_flow_tab_info_get(flow_mask_t); 718275SEric Cheng 728275SEric Cheng static void 738275SEric Cheng flow_stat_init(kstat_named_t *knp) 748275SEric Cheng { 758275SEric Cheng int i; 768275SEric Cheng 778275SEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) { 788275SEric Cheng kstat_named_init(knp, flow_stats_list[i].fs_name, 798275SEric Cheng KSTAT_DATA_UINT64); 808275SEric Cheng } 818275SEric Cheng } 828275SEric Cheng 838275SEric Cheng static int 848275SEric Cheng flow_stat_update(kstat_t *ksp, int rw) 858275SEric Cheng { 868275SEric Cheng flow_entry_t *fep = ksp->ks_private; 878275SEric Cheng flow_stats_t *fsp = &fep->fe_flowstats; 888275SEric Cheng kstat_named_t *knp = ksp->ks_data; 898275SEric Cheng uint64_t *statp; 908275SEric Cheng zoneid_t zid; 918275SEric Cheng int i; 928275SEric Cheng 938275SEric Cheng if (rw != KSTAT_READ) 948275SEric Cheng return (EACCES); 958275SEric Cheng 968275SEric Cheng zid = getzoneid(); 978275SEric Cheng if (zid != GLOBAL_ZONEID && zid != fep->fe_zoneid) { 988275SEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) 998275SEric Cheng knp->value.ui64 = 0; 1008275SEric Cheng 1018275SEric Cheng return (0); 1028275SEric Cheng } 1038275SEric Cheng 1048275SEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) { 1058275SEric Cheng statp = (uint64_t *) 1068275SEric Cheng ((uchar_t *)fsp + flow_stats_list[i].fs_offset); 1078275SEric Cheng 1088275SEric Cheng knp->value.ui64 = *statp; 1098275SEric Cheng } 1108275SEric Cheng return (0); 1118275SEric Cheng } 1128275SEric Cheng 1138275SEric Cheng static void 1148275SEric Cheng flow_stat_create(flow_entry_t *fep) 1158275SEric Cheng { 1168275SEric Cheng kstat_t *ksp; 1178275SEric Cheng kstat_named_t *knp; 1188275SEric Cheng uint_t nstats = FS_SIZE; 1198275SEric Cheng 1208275SEric Cheng ksp = kstat_create("unix", 0, (char *)fep->fe_flow_name, "flow", 1218275SEric Cheng KSTAT_TYPE_NAMED, nstats, 0); 1228275SEric Cheng if (ksp == NULL) 1238275SEric Cheng return; 1248275SEric Cheng 1258275SEric Cheng ksp->ks_update = flow_stat_update; 1268275SEric Cheng ksp->ks_private = fep; 1278275SEric Cheng fep->fe_ksp = ksp; 1288275SEric Cheng 1298275SEric Cheng knp = (kstat_named_t *)ksp->ks_data; 1308275SEric Cheng flow_stat_init(knp); 1318275SEric Cheng kstat_install(ksp); 1328275SEric Cheng } 1338275SEric Cheng 1348275SEric Cheng void 1358275SEric Cheng flow_stat_destroy(flow_entry_t *fep) 1368275SEric Cheng { 1378275SEric Cheng if (fep->fe_ksp != NULL) { 1388275SEric Cheng kstat_delete(fep->fe_ksp); 1398275SEric Cheng fep->fe_ksp = NULL; 1408275SEric Cheng } 1418275SEric Cheng } 1428275SEric Cheng 1438275SEric Cheng /* 1448275SEric Cheng * Initialize the flow table 1458275SEric Cheng */ 1468275SEric Cheng void 1478275SEric Cheng mac_flow_init() 1488275SEric Cheng { 1498275SEric Cheng flow_cache = kmem_cache_create("flow_entry_cache", 1508275SEric Cheng sizeof (flow_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1518275SEric Cheng flow_tab_cache = kmem_cache_create("flow_tab_cache", 1528275SEric Cheng sizeof (flow_tab_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1538275SEric Cheng flow_hash = mod_hash_create_extended("flow_hash", 1548275SEric Cheng 100, mod_hash_null_keydtor, mod_hash_null_valdtor, 1558275SEric Cheng mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 1568275SEric Cheng rw_init(&flow_tab_lock, NULL, RW_DEFAULT, NULL); 1578275SEric Cheng } 1588275SEric Cheng 1598275SEric Cheng /* 1608275SEric Cheng * Cleanup and release the flow table 1618275SEric Cheng */ 1628275SEric Cheng void 1638275SEric Cheng mac_flow_fini() 1648275SEric Cheng { 1658275SEric Cheng kmem_cache_destroy(flow_cache); 1668275SEric Cheng kmem_cache_destroy(flow_tab_cache); 1678275SEric Cheng mod_hash_destroy_hash(flow_hash); 1688275SEric Cheng rw_destroy(&flow_tab_lock); 1698275SEric Cheng } 1708275SEric Cheng 1718275SEric Cheng /* 1728275SEric Cheng * mac_create_flow(): create a flow_entry_t. 1738275SEric Cheng */ 1748275SEric Cheng int 1758275SEric Cheng mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, 1768275SEric Cheng void *client_cookie, uint_t type, flow_entry_t **flentp) 1778275SEric Cheng { 1788275SEric Cheng flow_entry_t *flent = *flentp; 1798275SEric Cheng int err = 0; 1808275SEric Cheng 1818275SEric Cheng if (mrp != NULL) { 1828275SEric Cheng err = mac_validate_props(mrp); 1838275SEric Cheng if (err != 0) 1848275SEric Cheng return (err); 1858275SEric Cheng } 1868275SEric Cheng 1878275SEric Cheng if (flent == NULL) { 1888275SEric Cheng flent = kmem_cache_alloc(flow_cache, KM_SLEEP); 1898275SEric Cheng bzero(flent, sizeof (*flent)); 1908275SEric Cheng mutex_init(&flent->fe_lock, NULL, MUTEX_DEFAULT, NULL); 1918275SEric Cheng cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL); 1928275SEric Cheng 1938275SEric Cheng /* Initialize the receiver function to a safe routine */ 1948275SEric Cheng flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; 1958275SEric Cheng flent->fe_index = -1; 1968275SEric Cheng } 1978558SGirish.Moodalbail@Sun.COM (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 1988275SEric Cheng 1998275SEric Cheng /* This is an initial flow, will be configured later */ 2008275SEric Cheng if (fd == NULL) { 2018275SEric Cheng *flentp = flent; 2028275SEric Cheng return (0); 2038275SEric Cheng } 2048275SEric Cheng 2058275SEric Cheng flent->fe_client_cookie = client_cookie; 2068275SEric Cheng flent->fe_type = type; 2078275SEric Cheng 2088275SEric Cheng /* 2098275SEric Cheng * As flow creation is only allowed in global zone, this will 2108275SEric Cheng * always set fe_zoneid to GLOBAL_ZONEID, and dls_add_flow() will 2118275SEric Cheng * later set the right value. 2128275SEric Cheng */ 2138275SEric Cheng flent->fe_zoneid = getzoneid(); 2148275SEric Cheng 2158275SEric Cheng /* Save flow desc */ 2168275SEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 2178275SEric Cheng 2188275SEric Cheng if (mrp != NULL) { 2198275SEric Cheng /* 2208275SEric Cheng * We have already set fe_resource_props for a Link. 2218275SEric Cheng */ 2228275SEric Cheng if (type & FLOW_USER) { 2238275SEric Cheng bcopy(mrp, &flent->fe_resource_props, 2248275SEric Cheng sizeof (mac_resource_props_t)); 2258275SEric Cheng } 2268275SEric Cheng /* 2278275SEric Cheng * The effective resource list should reflect the priority 2288275SEric Cheng * that we set implicitly. 2298275SEric Cheng */ 2308275SEric Cheng if (!(mrp->mrp_mask & MRP_PRIORITY)) 2318275SEric Cheng mrp->mrp_mask |= MRP_PRIORITY; 2328275SEric Cheng if (type & FLOW_USER) 2338275SEric Cheng mrp->mrp_priority = MPL_SUBFLOW_DEFAULT; 2348275SEric Cheng else 2358275SEric Cheng mrp->mrp_priority = MPL_LINK_DEFAULT; 2368275SEric Cheng bcopy(mrp, &flent->fe_effective_props, 2378275SEric Cheng sizeof (mac_resource_props_t)); 2388275SEric Cheng } 2398275SEric Cheng flow_stat_create(flent); 2408275SEric Cheng 2418275SEric Cheng *flentp = flent; 2428275SEric Cheng return (0); 2438275SEric Cheng } 2448275SEric Cheng 2458275SEric Cheng /* 2468275SEric Cheng * Validate flow entry and add it to a flow table. 2478275SEric Cheng */ 2488275SEric Cheng int 2498275SEric Cheng mac_flow_add(flow_tab_t *ft, flow_entry_t *flent) 2508275SEric Cheng { 2518275SEric Cheng flow_entry_t **headp, **p; 2528275SEric Cheng flow_ops_t *ops = &ft->ft_ops; 2538275SEric Cheng flow_mask_t mask; 2548275SEric Cheng uint32_t index; 2558275SEric Cheng int err; 2568275SEric Cheng 2578275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 2588275SEric Cheng 2598275SEric Cheng /* 2608275SEric Cheng * Check for invalid bits in mask. 2618275SEric Cheng */ 2628275SEric Cheng mask = flent->fe_flow_desc.fd_mask; 2638275SEric Cheng if ((mask & ft->ft_mask) == 0 || (mask & ~ft->ft_mask) != 0) 2648275SEric Cheng return (EOPNOTSUPP); 2658275SEric Cheng 2668275SEric Cheng /* 2678275SEric Cheng * Validate flent. 2688275SEric Cheng */ 2698275SEric Cheng if ((err = ops->fo_accept_fe(ft, flent)) != 0) { 2708275SEric Cheng DTRACE_PROBE3(accept_failed, flow_tab_t *, ft, 2718275SEric Cheng flow_entry_t *, flent, int, err); 2728275SEric Cheng return (err); 2738275SEric Cheng } 2748275SEric Cheng 2758275SEric Cheng /* 2768275SEric Cheng * Flent is valid. now calculate hash and insert it 2778275SEric Cheng * into hash table. 2788275SEric Cheng */ 2798275SEric Cheng index = ops->fo_hash_fe(ft, flent); 2808275SEric Cheng 2818275SEric Cheng /* 2828275SEric Cheng * We do not need a lock up until now because we were 2838275SEric Cheng * not accessing the flow table. 2848275SEric Cheng */ 2858275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 2868275SEric Cheng headp = &ft->ft_table[index]; 2878275SEric Cheng 2888275SEric Cheng /* 2898275SEric Cheng * Check for duplicate flow. 2908275SEric Cheng */ 2918275SEric Cheng for (p = headp; *p != NULL; p = &(*p)->fe_next) { 2928275SEric Cheng if ((*p)->fe_flow_desc.fd_mask != 2938275SEric Cheng flent->fe_flow_desc.fd_mask) 2948275SEric Cheng continue; 2958275SEric Cheng 2968275SEric Cheng if (ft->ft_ops.fo_match_fe(ft, *p, flent)) { 2978275SEric Cheng rw_exit(&ft->ft_lock); 2988275SEric Cheng DTRACE_PROBE3(dup_flow, flow_tab_t *, ft, 2998275SEric Cheng flow_entry_t *, flent, int, err); 3008275SEric Cheng return (EALREADY); 3018275SEric Cheng } 3028275SEric Cheng } 3038275SEric Cheng 3048275SEric Cheng /* 3058275SEric Cheng * Insert flow to hash list. 3068275SEric Cheng */ 3078275SEric Cheng err = ops->fo_insert_fe(ft, headp, flent); 3088275SEric Cheng if (err != 0) { 3098275SEric Cheng rw_exit(&ft->ft_lock); 3108275SEric Cheng DTRACE_PROBE3(insert_failed, flow_tab_t *, ft, 3118275SEric Cheng flow_entry_t *, flent, int, err); 3128275SEric Cheng return (err); 3138275SEric Cheng } 3148275SEric Cheng 3158275SEric Cheng /* 3168275SEric Cheng * Save the hash index so it can be used by mac_flow_remove(). 3178275SEric Cheng */ 3188275SEric Cheng flent->fe_index = (int)index; 3198275SEric Cheng 3208275SEric Cheng /* 3218275SEric Cheng * Save the flow tab back reference. 3228275SEric Cheng */ 3238275SEric Cheng flent->fe_flow_tab = ft; 3248275SEric Cheng FLOW_MARK(flent, FE_FLOW_TAB); 3258275SEric Cheng ft->ft_flow_count++; 3268275SEric Cheng rw_exit(&ft->ft_lock); 3278275SEric Cheng return (0); 3288275SEric Cheng } 3298275SEric Cheng 3308275SEric Cheng /* 3318275SEric Cheng * Remove a flow from a mac client's subflow table 3328275SEric Cheng */ 3338275SEric Cheng void 3348275SEric Cheng mac_flow_rem_subflow(flow_entry_t *flent) 3358275SEric Cheng { 3368275SEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 3378275SEric Cheng mac_client_impl_t *mcip = ft->ft_mcip; 3388275SEric Cheng 3398275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 3408275SEric Cheng 3418275SEric Cheng mac_flow_remove(ft, flent, B_FALSE); 3428275SEric Cheng if (flent->fe_mcip == NULL) { 3438275SEric Cheng /* 3448275SEric Cheng * The interface is not yet plumbed and mac_client_flow_add 3458275SEric Cheng * was not done. 3468275SEric Cheng */ 3478275SEric Cheng if (FLOW_TAB_EMPTY(ft)) { 3488275SEric Cheng mac_flow_tab_destroy(ft); 3498275SEric Cheng mcip->mci_subflow_tab = NULL; 3508275SEric Cheng } 3518275SEric Cheng return; 3528275SEric Cheng } 3538275SEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 3548275SEric Cheng mac_link_flow_clean((mac_client_handle_t)mcip, flent); 3558275SEric Cheng } 3568275SEric Cheng 3578275SEric Cheng /* 3588275SEric Cheng * Add a flow to a mac client's subflow table and instantiate the flow 3598275SEric Cheng * in the mac by creating the associated SRSs etc. 3608275SEric Cheng */ 3618275SEric Cheng int 3628275SEric Cheng mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent, 3638275SEric Cheng boolean_t instantiate_flow) 3648275SEric Cheng { 3658275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 3668275SEric Cheng flow_tab_info_t *ftinfo; 3678275SEric Cheng flow_mask_t mask; 3688275SEric Cheng flow_tab_t *ft; 3698275SEric Cheng int err; 3708275SEric Cheng boolean_t ft_created = B_FALSE; 3718275SEric Cheng 3728275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 3738275SEric Cheng 3748275SEric Cheng /* 3758275SEric Cheng * If the subflow table exists already just add the new subflow 3768275SEric Cheng * to the existing table, else we create a new subflow table below. 3778275SEric Cheng */ 3788275SEric Cheng ft = mcip->mci_subflow_tab; 3798275SEric Cheng if (ft == NULL) { 3808275SEric Cheng mask = flent->fe_flow_desc.fd_mask; 3818275SEric Cheng /* 3828275SEric Cheng * Try to create a new table and then add the subflow to the 3838275SEric Cheng * newly created subflow table 3848275SEric Cheng */ 3858275SEric Cheng if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) 3868275SEric Cheng return (EOPNOTSUPP); 3878275SEric Cheng 3888275SEric Cheng mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size, 3898275SEric Cheng mcip->mci_mip, &ft); 3908275SEric Cheng ft_created = B_TRUE; 3918275SEric Cheng } 3928275SEric Cheng 3938275SEric Cheng err = mac_flow_add(ft, flent); 3948275SEric Cheng if (err != 0) { 3958275SEric Cheng if (ft_created) 3968275SEric Cheng mac_flow_tab_destroy(ft); 3978275SEric Cheng return (err); 3988275SEric Cheng } 3998275SEric Cheng 4008275SEric Cheng if (instantiate_flow) { 4018275SEric Cheng /* Now activate the flow by creating its SRSs */ 4028275SEric Cheng ASSERT(MCIP_DATAPATH_SETUP(mcip)); 4038275SEric Cheng err = mac_link_flow_init((mac_client_handle_t)mcip, flent); 4048275SEric Cheng if (err != 0) { 4058275SEric Cheng mac_flow_remove(ft, flent, B_FALSE); 4068275SEric Cheng if (ft_created) 4078275SEric Cheng mac_flow_tab_destroy(ft); 4088275SEric Cheng return (err); 4098275SEric Cheng } 4108275SEric Cheng } else { 4118275SEric Cheng FLOW_MARK(flent, FE_UF_NO_DATAPATH); 4128275SEric Cheng } 4138275SEric Cheng if (ft_created) { 4148275SEric Cheng ASSERT(mcip->mci_subflow_tab == NULL); 4158275SEric Cheng ft->ft_mcip = mcip; 4168275SEric Cheng mcip->mci_subflow_tab = ft; 4178275SEric Cheng if (instantiate_flow) 4188275SEric Cheng mac_client_update_classifier(mcip, B_TRUE); 4198275SEric Cheng } 4208275SEric Cheng return (0); 4218275SEric Cheng } 4228275SEric Cheng 4238275SEric Cheng /* 4248275SEric Cheng * Remove flow entry from flow table. 4258275SEric Cheng */ 4268275SEric Cheng void 4278275SEric Cheng mac_flow_remove(flow_tab_t *ft, flow_entry_t *flent, boolean_t temp) 4288275SEric Cheng { 4298275SEric Cheng flow_entry_t **fp; 4308275SEric Cheng 4318275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 4328275SEric Cheng if (!(flent->fe_flags & FE_FLOW_TAB)) 4338275SEric Cheng return; 4348275SEric Cheng 4358275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 4368275SEric Cheng /* 4378275SEric Cheng * If this is a permanent removal from the flow table, mark it 4388275SEric Cheng * CONDEMNED to prevent future references. If this is a temporary 4398275SEric Cheng * removal from the table, say to update the flow descriptor then 4408275SEric Cheng * we don't mark it CONDEMNED 4418275SEric Cheng */ 4428275SEric Cheng if (!temp) 4438275SEric Cheng FLOW_MARK(flent, FE_CONDEMNED); 4448275SEric Cheng /* 4458275SEric Cheng * Locate the specified flent. 4468275SEric Cheng */ 4478275SEric Cheng fp = &ft->ft_table[flent->fe_index]; 4488275SEric Cheng while (*fp != flent) 4498275SEric Cheng fp = &(*fp)->fe_next; 4508275SEric Cheng 4518275SEric Cheng /* 4528275SEric Cheng * The flent must exist. Otherwise it's a bug. 4538275SEric Cheng */ 4548275SEric Cheng ASSERT(fp != NULL); 4558275SEric Cheng *fp = flent->fe_next; 4568275SEric Cheng flent->fe_next = NULL; 4578275SEric Cheng 4588275SEric Cheng /* 4598275SEric Cheng * Reset fe_index to -1 so any attempt to call mac_flow_remove() 4608275SEric Cheng * on a flent that is supposed to be in the table (FE_FLOW_TAB) 4618275SEric Cheng * will panic. 4628275SEric Cheng */ 4638275SEric Cheng flent->fe_index = -1; 4648275SEric Cheng FLOW_UNMARK(flent, FE_FLOW_TAB); 4658275SEric Cheng ft->ft_flow_count--; 4668275SEric Cheng rw_exit(&ft->ft_lock); 4678275SEric Cheng } 4688275SEric Cheng 4698275SEric Cheng /* 4708275SEric Cheng * This is the flow lookup routine used by the mac sw classifier engine. 4718275SEric Cheng */ 4728275SEric Cheng int 4738275SEric Cheng mac_flow_lookup(flow_tab_t *ft, mblk_t *mp, uint_t flags, flow_entry_t **flentp) 4748275SEric Cheng { 4758275SEric Cheng flow_state_t s; 4768275SEric Cheng flow_entry_t *flent; 4778275SEric Cheng flow_ops_t *ops = &ft->ft_ops; 4788275SEric Cheng boolean_t retried = B_FALSE; 4798275SEric Cheng int i, err; 4808275SEric Cheng 4818275SEric Cheng s.fs_flags = flags; 482*8833SVenu.Iyer@Sun.COM retry: 4838275SEric Cheng s.fs_mp = mp; 4848275SEric Cheng 4858275SEric Cheng /* 4868275SEric Cheng * Walk the list of predeclared accept functions. 4878275SEric Cheng * Each of these would accumulate enough state to allow the next 4888275SEric Cheng * accept routine to make progress. 4898275SEric Cheng */ 4908275SEric Cheng for (i = 0; i < FLOW_MAX_ACCEPT && ops->fo_accept[i] != NULL; i++) { 4918275SEric Cheng if ((err = (ops->fo_accept[i])(ft, &s)) != 0) { 492*8833SVenu.Iyer@Sun.COM mblk_t *last; 493*8833SVenu.Iyer@Sun.COM 4948275SEric Cheng /* 4958275SEric Cheng * ENOBUFS indicates that the mp could be too short 4968275SEric Cheng * and may need a pullup. 4978275SEric Cheng */ 4988275SEric Cheng if (err != ENOBUFS || retried) 4998275SEric Cheng return (err); 5008275SEric Cheng 5018275SEric Cheng /* 502*8833SVenu.Iyer@Sun.COM * The pullup is done on the last processed mblk, not 503*8833SVenu.Iyer@Sun.COM * the starting one. pullup is not done if the mblk 504*8833SVenu.Iyer@Sun.COM * has references or if b_cont is NULL. 5058275SEric Cheng */ 506*8833SVenu.Iyer@Sun.COM last = s.fs_mp; 507*8833SVenu.Iyer@Sun.COM if (DB_REF(last) > 1 || last->b_cont == NULL || 508*8833SVenu.Iyer@Sun.COM pullupmsg(last, -1) == 0) 5098275SEric Cheng return (EINVAL); 5108275SEric Cheng 5118275SEric Cheng retried = B_TRUE; 5128275SEric Cheng DTRACE_PROBE2(need_pullup, flow_tab_t *, ft, 5138275SEric Cheng flow_state_t *, &s); 5148275SEric Cheng goto retry; 5158275SEric Cheng } 5168275SEric Cheng } 5178275SEric Cheng 5188275SEric Cheng /* 5198275SEric Cheng * The packet is considered sane. We may now attempt to 5208275SEric Cheng * find the corresponding flent. 5218275SEric Cheng */ 5228275SEric Cheng rw_enter(&ft->ft_lock, RW_READER); 5238275SEric Cheng flent = ft->ft_table[ops->fo_hash(ft, &s)]; 5248275SEric Cheng for (; flent != NULL; flent = flent->fe_next) { 5258275SEric Cheng if (flent->fe_match(ft, flent, &s)) { 5268275SEric Cheng FLOW_TRY_REFHOLD(flent, err); 5278275SEric Cheng if (err != 0) 5288275SEric Cheng continue; 5298275SEric Cheng *flentp = flent; 5308275SEric Cheng rw_exit(&ft->ft_lock); 5318275SEric Cheng return (0); 5328275SEric Cheng } 5338275SEric Cheng } 5348275SEric Cheng rw_exit(&ft->ft_lock); 5358275SEric Cheng return (ENOENT); 5368275SEric Cheng } 5378275SEric Cheng 5388275SEric Cheng /* 5398275SEric Cheng * Walk flow table. 5408275SEric Cheng * The caller is assumed to have proper perimeter protection. 5418275SEric Cheng */ 5428275SEric Cheng int 5438275SEric Cheng mac_flow_walk_nolock(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *), 5448275SEric Cheng void *arg) 5458275SEric Cheng { 5468275SEric Cheng int err, i, cnt = 0; 5478275SEric Cheng flow_entry_t *flent; 5488275SEric Cheng 5498275SEric Cheng if (ft == NULL) 5508275SEric Cheng return (0); 5518275SEric Cheng 5528275SEric Cheng for (i = 0; i < ft->ft_size; i++) { 5538275SEric Cheng for (flent = ft->ft_table[i]; flent != NULL; 5548275SEric Cheng flent = flent->fe_next) { 5558275SEric Cheng cnt++; 5568275SEric Cheng err = (*fn)(flent, arg); 5578275SEric Cheng if (err != 0) 5588275SEric Cheng return (err); 5598275SEric Cheng } 5608275SEric Cheng } 5618275SEric Cheng VERIFY(cnt == ft->ft_flow_count); 5628275SEric Cheng return (0); 5638275SEric Cheng } 5648275SEric Cheng 5658275SEric Cheng /* 5668275SEric Cheng * Same as the above except a mutex is used for protection here. 5678275SEric Cheng */ 5688275SEric Cheng int 5698275SEric Cheng mac_flow_walk(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *), 5708275SEric Cheng void *arg) 5718275SEric Cheng { 5728275SEric Cheng int err; 5738275SEric Cheng 5748275SEric Cheng if (ft == NULL) 5758275SEric Cheng return (0); 5768275SEric Cheng 5778275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 5788275SEric Cheng err = mac_flow_walk_nolock(ft, fn, arg); 5798275SEric Cheng rw_exit(&ft->ft_lock); 5808275SEric Cheng return (err); 5818275SEric Cheng } 5828275SEric Cheng 5838275SEric Cheng static boolean_t mac_flow_clean(flow_entry_t *); 5848275SEric Cheng 5858275SEric Cheng /* 5868275SEric Cheng * Destroy a flow entry. Called when the last reference on a flow is released. 5878275SEric Cheng */ 5888275SEric Cheng void 5898275SEric Cheng mac_flow_destroy(flow_entry_t *flent) 5908275SEric Cheng { 5918275SEric Cheng ASSERT(flent->fe_refcnt == 0); 5928275SEric Cheng 5938275SEric Cheng if ((flent->fe_type & FLOW_USER) != 0) { 5948275SEric Cheng ASSERT(mac_flow_clean(flent)); 5958275SEric Cheng } else { 5968275SEric Cheng mac_flow_cleanup(flent); 5978275SEric Cheng } 5988275SEric Cheng 5998275SEric Cheng mutex_destroy(&flent->fe_lock); 6008275SEric Cheng cv_destroy(&flent->fe_cv); 6018275SEric Cheng flow_stat_destroy(flent); 6028275SEric Cheng kmem_cache_free(flow_cache, flent); 6038275SEric Cheng } 6048275SEric Cheng 6058275SEric Cheng /* 6068275SEric Cheng * XXX eric 6078275SEric Cheng * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and 6088275SEric Cheng * mac_link_flow_modify() should really be moved/reworked into the 6098275SEric Cheng * two functions below. This would consolidate all the mac property 6108275SEric Cheng * checking in one place. I'm leaving this alone for now since it's 6118275SEric Cheng * out of scope of the new flows work. 6128275SEric Cheng */ 6138275SEric Cheng /* ARGSUSED */ 6148275SEric Cheng uint32_t 6158275SEric Cheng mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) 6168275SEric Cheng { 6178275SEric Cheng uint32_t changed_mask = 0; 6188275SEric Cheng mac_resource_props_t *fmrp = &flent->fe_effective_props; 6198275SEric Cheng int i; 6208275SEric Cheng 6218275SEric Cheng if ((mrp->mrp_mask & MRP_MAXBW) != 0 && 6228275SEric Cheng (fmrp->mrp_maxbw != mrp->mrp_maxbw)) { 6238275SEric Cheng changed_mask |= MRP_MAXBW; 6248275SEric Cheng fmrp->mrp_maxbw = mrp->mrp_maxbw; 6258275SEric Cheng if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { 6268275SEric Cheng fmrp->mrp_mask &= ~MRP_MAXBW; 6278275SEric Cheng } else { 6288275SEric Cheng fmrp->mrp_mask |= MRP_MAXBW; 6298275SEric Cheng } 6308275SEric Cheng } 6318275SEric Cheng 6328275SEric Cheng if ((mrp->mrp_mask & MRP_PRIORITY) != 0) { 6338275SEric Cheng if (fmrp->mrp_priority != mrp->mrp_priority) 6348275SEric Cheng changed_mask |= MRP_PRIORITY; 6358275SEric Cheng if (mrp->mrp_priority == MPL_RESET) { 6368275SEric Cheng fmrp->mrp_priority = MPL_SUBFLOW_DEFAULT; 6378275SEric Cheng fmrp->mrp_mask &= ~MRP_PRIORITY; 6388275SEric Cheng } else { 6398275SEric Cheng fmrp->mrp_priority = mrp->mrp_priority; 6408275SEric Cheng fmrp->mrp_mask |= MRP_PRIORITY; 6418275SEric Cheng } 6428275SEric Cheng } 6438275SEric Cheng 6448275SEric Cheng /* modify fanout */ 6458275SEric Cheng if ((mrp->mrp_mask & MRP_CPUS) != 0) { 6468275SEric Cheng if ((fmrp->mrp_ncpus == mrp->mrp_ncpus) && 6478275SEric Cheng (fmrp->mrp_fanout_mode == mrp->mrp_fanout_mode)) { 6488275SEric Cheng for (i = 0; i < mrp->mrp_ncpus; i++) { 6498275SEric Cheng if (mrp->mrp_cpu[i] != fmrp->mrp_cpu[i]) 6508275SEric Cheng break; 6518275SEric Cheng } 6528275SEric Cheng if (i == mrp->mrp_ncpus) { 6538275SEric Cheng /* 6548275SEric Cheng * The new set of cpus passed is exactly 6558275SEric Cheng * the same as the existing set. 6568275SEric Cheng */ 6578275SEric Cheng return (changed_mask); 6588275SEric Cheng } 6598275SEric Cheng } 6608275SEric Cheng changed_mask |= MRP_CPUS; 6618275SEric Cheng MAC_COPY_CPUS(mrp, fmrp); 6628275SEric Cheng } 6638275SEric Cheng return (changed_mask); 6648275SEric Cheng } 6658275SEric Cheng 6668275SEric Cheng void 6678275SEric Cheng mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) 6688275SEric Cheng { 6698275SEric Cheng uint32_t changed_mask; 6708275SEric Cheng mac_client_impl_t *mcip = flent->fe_mcip; 6718275SEric Cheng mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip); 6728275SEric Cheng 6738275SEric Cheng ASSERT(flent != NULL); 6748275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 6758275SEric Cheng 6768275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 6778275SEric Cheng 6788275SEric Cheng /* Update the cached values inside the subflow entry */ 6798275SEric Cheng changed_mask = mac_flow_modify_props(flent, mrp); 6808275SEric Cheng rw_exit(&ft->ft_lock); 6818275SEric Cheng /* 6828275SEric Cheng * Push the changed parameters to the scheduling code in the 6838275SEric Cheng * SRS's, to take effect right away. 6848275SEric Cheng */ 6858275SEric Cheng if (changed_mask & MRP_MAXBW) { 6868275SEric Cheng mac_srs_update_bwlimit(flent, mrp); 6878275SEric Cheng /* 6888275SEric Cheng * If bandwidth is changed, we may have to change 6898275SEric Cheng * the number of soft ring to be used for fanout. 6908275SEric Cheng * Call mac_flow_update_fanout() if MAC_BIND_CPU 6918275SEric Cheng * is not set and there is no user supplied cpu 6928275SEric Cheng * info. This applies only to link at this time. 6938275SEric Cheng */ 6948275SEric Cheng if (!(flent->fe_type & FLOW_USER) && 6958275SEric Cheng !(changed_mask & MRP_CPUS) && 6968275SEric Cheng !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) { 6978275SEric Cheng mac_fanout_setup(mcip, flent, mcip_mrp, 6988275SEric Cheng mac_rx_deliver, mcip, NULL); 6998275SEric Cheng } 7008275SEric Cheng } 7018275SEric Cheng if (mrp->mrp_mask & MRP_PRIORITY) 7028275SEric Cheng mac_flow_update_priority(mcip, flent); 7038275SEric Cheng 7048275SEric Cheng if (changed_mask & MRP_CPUS) 7058275SEric Cheng mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL); 7068275SEric Cheng } 7078275SEric Cheng 7088275SEric Cheng /* 7098275SEric Cheng * This function waits for a certain condition to be met and is generally 7108275SEric Cheng * used before a destructive or quiescing operation. 7118275SEric Cheng */ 7128275SEric Cheng void 7138275SEric Cheng mac_flow_wait(flow_entry_t *flent, mac_flow_state_t event) 7148275SEric Cheng { 7158275SEric Cheng mutex_enter(&flent->fe_lock); 7168275SEric Cheng flent->fe_flags |= FE_WAITER; 7178275SEric Cheng 7188275SEric Cheng switch (event) { 7198275SEric Cheng case FLOW_DRIVER_UPCALL: 7208275SEric Cheng /* 7218275SEric Cheng * We want to make sure the driver upcalls have finished before 7228275SEric Cheng * we signal the Rx SRS worker to quit. 7238275SEric Cheng */ 7248275SEric Cheng while (flent->fe_refcnt != 1) 7258275SEric Cheng cv_wait(&flent->fe_cv, &flent->fe_lock); 7268275SEric Cheng break; 7278275SEric Cheng 7288275SEric Cheng case FLOW_USER_REF: 7298275SEric Cheng /* 7308275SEric Cheng * Wait for the fe_user_refcnt to drop to 0. The flow has 7318275SEric Cheng * been removed from the global flow hash. 7328275SEric Cheng */ 7338275SEric Cheng ASSERT(!(flent->fe_flags & FE_G_FLOW_HASH)); 7348275SEric Cheng while (flent->fe_user_refcnt != 0) 7358275SEric Cheng cv_wait(&flent->fe_cv, &flent->fe_lock); 7368275SEric Cheng break; 7378275SEric Cheng 7388275SEric Cheng default: 7398275SEric Cheng ASSERT(0); 7408275SEric Cheng } 7418275SEric Cheng 7428275SEric Cheng flent->fe_flags &= ~FE_WAITER; 7438275SEric Cheng mutex_exit(&flent->fe_lock); 7448275SEric Cheng } 7458275SEric Cheng 7468275SEric Cheng static boolean_t 7478275SEric Cheng mac_flow_clean(flow_entry_t *flent) 7488275SEric Cheng { 7498275SEric Cheng ASSERT(flent->fe_next == NULL); 7508275SEric Cheng ASSERT(flent->fe_tx_srs == NULL); 7518275SEric Cheng ASSERT(flent->fe_rx_srs_cnt == 0 && flent->fe_rx_srs[0] == NULL); 7528275SEric Cheng ASSERT(flent->fe_mbg == NULL); 7538275SEric Cheng 7548275SEric Cheng return (B_TRUE); 7558275SEric Cheng } 7568275SEric Cheng 7578275SEric Cheng void 7588275SEric Cheng mac_flow_cleanup(flow_entry_t *flent) 7598275SEric Cheng { 7608275SEric Cheng if ((flent->fe_type & FLOW_USER) == 0) { 7618275SEric Cheng ASSERT((flent->fe_mbg == NULL && flent->fe_mcip != NULL) || 7628275SEric Cheng (flent->fe_mbg != NULL && flent->fe_mcip == NULL)); 7638275SEric Cheng ASSERT(flent->fe_refcnt == 0); 7648275SEric Cheng } else { 7658275SEric Cheng ASSERT(flent->fe_refcnt == 1); 7668275SEric Cheng } 7678275SEric Cheng 7688275SEric Cheng if (flent->fe_mbg != NULL) { 7698275SEric Cheng ASSERT(flent->fe_tx_srs == NULL); 7708275SEric Cheng /* This is a multicast or broadcast flow entry */ 7718275SEric Cheng mac_bcast_grp_free(flent->fe_mbg); 7728275SEric Cheng flent->fe_mbg = NULL; 7738275SEric Cheng } 7748275SEric Cheng 7758275SEric Cheng if (flent->fe_tx_srs != NULL) { 7768275SEric Cheng ASSERT(flent->fe_mbg == NULL); 7778275SEric Cheng mac_srs_free(flent->fe_tx_srs); 7788275SEric Cheng flent->fe_tx_srs = NULL; 7798275SEric Cheng } 7808275SEric Cheng 7818275SEric Cheng /* 7828275SEric Cheng * In the normal case fe_rx_srs_cnt is 1. However in the error case 7838275SEric Cheng * when mac_unicast_add fails we may not have set up any SRS 7848275SEric Cheng * in which case fe_rx_srs_cnt will be zero. 7858275SEric Cheng */ 7868275SEric Cheng if (flent->fe_rx_srs_cnt != 0) { 7878275SEric Cheng ASSERT(flent->fe_rx_srs_cnt == 1); 7888275SEric Cheng mac_srs_free(flent->fe_rx_srs[0]); 7898275SEric Cheng flent->fe_rx_srs[0] = NULL; 7908275SEric Cheng flent->fe_rx_srs_cnt = 0; 7918275SEric Cheng } 7928275SEric Cheng ASSERT(flent->fe_rx_srs[0] == NULL); 7938275SEric Cheng } 7948275SEric Cheng 7958275SEric Cheng void 7968275SEric Cheng mac_flow_get_desc(flow_entry_t *flent, flow_desc_t *fd) 7978275SEric Cheng { 7988275SEric Cheng /* 7998275SEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 8008275SEric Cheng * Updates to the fe_flow_desc happen under the fe_lock 8018275SEric Cheng * after removing the flent from the flow table 8028275SEric Cheng */ 8038275SEric Cheng mutex_enter(&flent->fe_lock); 8048275SEric Cheng bcopy(&flent->fe_flow_desc, fd, sizeof (*fd)); 8058275SEric Cheng mutex_exit(&flent->fe_lock); 8068275SEric Cheng } 8078275SEric Cheng 8088275SEric Cheng /* 8098275SEric Cheng * Update a field of a flow entry. The mac perimeter ensures that 8108275SEric Cheng * this is the only thread doing a modify operation on this mac end point. 8118275SEric Cheng * So the flow table can't change or disappear. The ft_lock protects access 8128275SEric Cheng * to the flow entry, and holding the lock ensures that there isn't any thread 8138275SEric Cheng * accessing the flow entry or attempting a flow table lookup. However 8148275SEric Cheng * data threads that are using the flow entry based on the old descriptor 8158275SEric Cheng * will continue to use the flow entry. If strong coherence is required 8168275SEric Cheng * then the flow will have to be quiesced before the descriptor can be 8178275SEric Cheng * changed. 8188275SEric Cheng */ 8198275SEric Cheng void 8208275SEric Cheng mac_flow_set_desc(flow_entry_t *flent, flow_desc_t *fd) 8218275SEric Cheng { 8228275SEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 8238275SEric Cheng flow_desc_t old_desc; 8248275SEric Cheng int err; 8258275SEric Cheng 8268275SEric Cheng if (ft == NULL) { 8278275SEric Cheng /* 8288275SEric Cheng * The flow hasn't yet been inserted into the table, 8298275SEric Cheng * so only the caller knows about this flow, however for 8308275SEric Cheng * uniformity we grab the fe_lock here. 8318275SEric Cheng */ 8328275SEric Cheng mutex_enter(&flent->fe_lock); 8338275SEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 8348275SEric Cheng mutex_exit(&flent->fe_lock); 8358275SEric Cheng } 8368275SEric Cheng 8378275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 8388275SEric Cheng 8398275SEric Cheng /* 8408275SEric Cheng * Need to remove the flow entry from the table and reinsert it, 8418275SEric Cheng * into a potentially diference hash line. The hash depends on 8428275SEric Cheng * the new descriptor fields. However access to fe_desc itself 8438275SEric Cheng * is always under the fe_lock. This helps log and stat functions 8448275SEric Cheng * see a self-consistent fe_flow_desc. 8458275SEric Cheng */ 8468275SEric Cheng mac_flow_remove(ft, flent, B_TRUE); 8478275SEric Cheng old_desc = flent->fe_flow_desc; 8488275SEric Cheng 8498275SEric Cheng mutex_enter(&flent->fe_lock); 8508275SEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 8518275SEric Cheng mutex_exit(&flent->fe_lock); 8528275SEric Cheng 8538275SEric Cheng if (mac_flow_add(ft, flent) != 0) { 8548275SEric Cheng /* 8558275SEric Cheng * The add failed say due to an invalid flow descriptor. 8568275SEric Cheng * Undo the update 8578275SEric Cheng */ 8588275SEric Cheng flent->fe_flow_desc = old_desc; 8598275SEric Cheng err = mac_flow_add(ft, flent); 8608275SEric Cheng ASSERT(err == 0); 8618275SEric Cheng } 8628275SEric Cheng } 8638275SEric Cheng 8648275SEric Cheng void 8658275SEric Cheng mac_flow_set_name(flow_entry_t *flent, const char *name) 8668275SEric Cheng { 8678275SEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 8688275SEric Cheng 8698275SEric Cheng if (ft == NULL) { 8708275SEric Cheng /* 8718275SEric Cheng * The flow hasn't yet been inserted into the table, 8728275SEric Cheng * so only the caller knows about this flow 8738275SEric Cheng */ 8748558SGirish.Moodalbail@Sun.COM (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 8758275SEric Cheng } else { 8768275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 8778275SEric Cheng } 8788275SEric Cheng 8798275SEric Cheng mutex_enter(&flent->fe_lock); 8808558SGirish.Moodalbail@Sun.COM (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 8818275SEric Cheng mutex_exit(&flent->fe_lock); 8828275SEric Cheng } 8838275SEric Cheng 8848275SEric Cheng /* 8858275SEric Cheng * Return the client-private cookie that was associated with 8868275SEric Cheng * the flow when it was created. 8878275SEric Cheng */ 8888275SEric Cheng void * 8898275SEric Cheng mac_flow_get_client_cookie(flow_entry_t *flent) 8908275SEric Cheng { 8918275SEric Cheng return (flent->fe_client_cookie); 8928275SEric Cheng } 8938275SEric Cheng 8948275SEric Cheng /* 8958275SEric Cheng * Forward declarations. 8968275SEric Cheng */ 8978275SEric Cheng static uint32_t flow_l2_hash(flow_tab_t *, flow_state_t *); 8988275SEric Cheng static int flow_l2_accept(flow_tab_t *, flow_state_t *); 8998275SEric Cheng static uint32_t flow_ether_hash(flow_tab_t *, flow_state_t *); 9008275SEric Cheng static int flow_ether_accept(flow_tab_t *, flow_state_t *); 9018275SEric Cheng 9028275SEric Cheng /* 9038275SEric Cheng * Create flow table. 9048275SEric Cheng */ 9058275SEric Cheng void 9068275SEric Cheng mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size, 9078275SEric Cheng mac_impl_t *mip, flow_tab_t **ftp) 9088275SEric Cheng { 9098275SEric Cheng flow_tab_t *ft; 9108275SEric Cheng flow_ops_t *new_ops; 9118275SEric Cheng 9128275SEric Cheng ft = kmem_cache_alloc(flow_tab_cache, KM_SLEEP); 9138275SEric Cheng bzero(ft, sizeof (*ft)); 9148275SEric Cheng 9158275SEric Cheng ft->ft_table = kmem_zalloc(size * sizeof (flow_entry_t *), KM_SLEEP); 9168275SEric Cheng 9178275SEric Cheng /* 9188275SEric Cheng * We make a copy of the ops vector instead of just pointing to it 9198275SEric Cheng * because we might want to customize the ops vector on a per table 9208275SEric Cheng * basis (e.g. for optimization). 9218275SEric Cheng */ 9228275SEric Cheng new_ops = &ft->ft_ops; 9238275SEric Cheng bcopy(ops, new_ops, sizeof (*ops)); 9248275SEric Cheng ft->ft_mask = mask; 9258275SEric Cheng ft->ft_size = size; 9268275SEric Cheng ft->ft_mip = mip; 9278275SEric Cheng 9288275SEric Cheng /* 9298275SEric Cheng * Optimization for DL_ETHER media. 9308275SEric Cheng */ 9318275SEric Cheng if (mip->mi_info.mi_nativemedia == DL_ETHER) { 9328275SEric Cheng if (new_ops->fo_hash == flow_l2_hash) 9338275SEric Cheng new_ops->fo_hash = flow_ether_hash; 9348275SEric Cheng 9358275SEric Cheng if (new_ops->fo_accept[0] == flow_l2_accept) 9368275SEric Cheng new_ops->fo_accept[0] = flow_ether_accept; 9378275SEric Cheng 9388275SEric Cheng } 9398275SEric Cheng *ftp = ft; 9408275SEric Cheng } 9418275SEric Cheng 9428275SEric Cheng void 9438275SEric Cheng mac_flow_l2tab_create(mac_impl_t *mip, flow_tab_t **ftp) 9448275SEric Cheng { 9458275SEric Cheng mac_flow_tab_create(&flow_l2_ops, FLOW_LINK_DST | FLOW_LINK_VID, 9468275SEric Cheng 1024, mip, ftp); 9478275SEric Cheng } 9488275SEric Cheng 9498275SEric Cheng /* 9508275SEric Cheng * Destroy flow table. 9518275SEric Cheng */ 9528275SEric Cheng void 9538275SEric Cheng mac_flow_tab_destroy(flow_tab_t *ft) 9548275SEric Cheng { 9558275SEric Cheng if (ft == NULL) 9568275SEric Cheng return; 9578275SEric Cheng 9588275SEric Cheng ASSERT(ft->ft_flow_count == 0); 9598275SEric Cheng kmem_free(ft->ft_table, ft->ft_size * sizeof (flow_entry_t *)); 9608275SEric Cheng bzero(ft, sizeof (*ft)); 9618275SEric Cheng kmem_cache_free(flow_tab_cache, ft); 9628275SEric Cheng } 9638275SEric Cheng 9648275SEric Cheng /* 9658275SEric Cheng * Add a new flow entry to the global flow hash table 9668275SEric Cheng */ 9678275SEric Cheng int 9688275SEric Cheng mac_flow_hash_add(flow_entry_t *flent) 9698275SEric Cheng { 9708275SEric Cheng int err; 9718275SEric Cheng 9728275SEric Cheng rw_enter(&flow_tab_lock, RW_WRITER); 9738275SEric Cheng err = mod_hash_insert(flow_hash, 9748275SEric Cheng (mod_hash_key_t)flent->fe_flow_name, (mod_hash_val_t)flent); 9758275SEric Cheng if (err != 0) { 9768275SEric Cheng rw_exit(&flow_tab_lock); 9778275SEric Cheng return (EEXIST); 9788275SEric Cheng } 9798275SEric Cheng /* Mark as inserted into the global flow hash table */ 9808275SEric Cheng FLOW_MARK(flent, FE_G_FLOW_HASH); 9818275SEric Cheng rw_exit(&flow_tab_lock); 9828275SEric Cheng return (err); 9838275SEric Cheng } 9848275SEric Cheng 9858275SEric Cheng /* 9868275SEric Cheng * Remove a flow entry from the global flow hash table 9878275SEric Cheng */ 9888275SEric Cheng void 9898275SEric Cheng mac_flow_hash_remove(flow_entry_t *flent) 9908275SEric Cheng { 9918275SEric Cheng mod_hash_val_t val; 9928275SEric Cheng 9938275SEric Cheng rw_enter(&flow_tab_lock, RW_WRITER); 9948275SEric Cheng VERIFY(mod_hash_remove(flow_hash, 9958275SEric Cheng (mod_hash_key_t)flent->fe_flow_name, &val) == 0); 9968275SEric Cheng 9978275SEric Cheng /* Clear the mark that says inserted into the global flow hash table */ 9988275SEric Cheng FLOW_UNMARK(flent, FE_G_FLOW_HASH); 9998275SEric Cheng rw_exit(&flow_tab_lock); 10008275SEric Cheng } 10018275SEric Cheng 10028275SEric Cheng /* 10038275SEric Cheng * Retrieve a flow entry from the global flow hash table. 10048275SEric Cheng */ 10058275SEric Cheng int 10068275SEric Cheng mac_flow_lookup_byname(char *name, flow_entry_t **flentp) 10078275SEric Cheng { 10088275SEric Cheng int err; 10098275SEric Cheng flow_entry_t *flent; 10108275SEric Cheng 10118275SEric Cheng rw_enter(&flow_tab_lock, RW_READER); 10128275SEric Cheng err = mod_hash_find(flow_hash, (mod_hash_key_t)name, 10138275SEric Cheng (mod_hash_val_t *)&flent); 10148275SEric Cheng if (err != 0) { 10158275SEric Cheng rw_exit(&flow_tab_lock); 10168275SEric Cheng return (ENOENT); 10178275SEric Cheng } 10188275SEric Cheng ASSERT(flent != NULL); 10198275SEric Cheng FLOW_USER_REFHOLD(flent); 10208275SEric Cheng rw_exit(&flow_tab_lock); 10218275SEric Cheng 10228275SEric Cheng *flentp = flent; 10238275SEric Cheng return (0); 10248275SEric Cheng } 10258275SEric Cheng 10268275SEric Cheng /* 10278275SEric Cheng * Initialize or release mac client flows by walking the subflow table. 10288275SEric Cheng * These are typically invoked during plumb/unplumb of links. 10298275SEric Cheng */ 10308275SEric Cheng 10318275SEric Cheng static int 10328275SEric Cheng mac_link_init_flows_cb(flow_entry_t *flent, void *arg) 10338275SEric Cheng { 10348275SEric Cheng mac_client_impl_t *mcip = arg; 10358275SEric Cheng 10368275SEric Cheng if (mac_link_flow_init(arg, flent) != 0) { 10378275SEric Cheng cmn_err(CE_WARN, "Failed to initialize flow '%s' on link '%s'", 10388275SEric Cheng flent->fe_flow_name, mcip->mci_name); 10398275SEric Cheng } else { 10408275SEric Cheng FLOW_UNMARK(flent, FE_UF_NO_DATAPATH); 10418275SEric Cheng } 10428275SEric Cheng return (0); 10438275SEric Cheng } 10448275SEric Cheng 10458275SEric Cheng void 10468275SEric Cheng mac_link_init_flows(mac_client_handle_t mch) 10478275SEric Cheng { 10488275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 10498275SEric Cheng 10508275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 10518275SEric Cheng mac_link_init_flows_cb, mcip); 10528275SEric Cheng /* 10538275SEric Cheng * If mac client had subflow(s) configured before plumb, change 10548275SEric Cheng * function to mac_rx_srs_subflow_process and in case of hardware 10558275SEric Cheng * classification, disable polling. 10568275SEric Cheng */ 10578275SEric Cheng mac_client_update_classifier(mcip, B_TRUE); 10588275SEric Cheng 10598275SEric Cheng } 10608275SEric Cheng 10618275SEric Cheng boolean_t 10628275SEric Cheng mac_link_has_flows(mac_client_handle_t mch) 10638275SEric Cheng { 10648275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 10658275SEric Cheng 10668275SEric Cheng if (!FLOW_TAB_EMPTY(mcip->mci_subflow_tab)) 10678275SEric Cheng return (B_TRUE); 10688275SEric Cheng 10698275SEric Cheng return (B_FALSE); 10708275SEric Cheng } 10718275SEric Cheng 10728275SEric Cheng static int 10738275SEric Cheng mac_link_release_flows_cb(flow_entry_t *flent, void *arg) 10748275SEric Cheng { 10758275SEric Cheng FLOW_MARK(flent, FE_UF_NO_DATAPATH); 10768275SEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 10778275SEric Cheng mac_link_flow_clean(arg, flent); 10788275SEric Cheng return (0); 10798275SEric Cheng } 10808275SEric Cheng 10818275SEric Cheng void 10828275SEric Cheng mac_link_release_flows(mac_client_handle_t mch) 10838275SEric Cheng { 10848275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 10858275SEric Cheng 10868275SEric Cheng /* 10878275SEric Cheng * Change the mci_flent callback back to mac_rx_srs_process() 10888275SEric Cheng * because flows are about to be deactivated. 10898275SEric Cheng */ 10908275SEric Cheng mac_client_update_classifier(mcip, B_FALSE); 10918275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 10928275SEric Cheng mac_link_release_flows_cb, mcip); 10938275SEric Cheng } 10948275SEric Cheng 10958275SEric Cheng void 10968275SEric Cheng mac_rename_flow(flow_entry_t *fep, const char *new_name) 10978275SEric Cheng { 10988275SEric Cheng mac_flow_set_name(fep, new_name); 10998275SEric Cheng if (fep->fe_ksp != NULL) { 11008275SEric Cheng flow_stat_destroy(fep); 11018275SEric Cheng flow_stat_create(fep); 11028275SEric Cheng } 11038275SEric Cheng } 11048275SEric Cheng 11058275SEric Cheng /* 11068275SEric Cheng * mac_link_flow_init() 11078275SEric Cheng * Internal flow interface used for allocating SRSs and related 11088275SEric Cheng * data structures. Not meant to be used by mac clients. 11098275SEric Cheng */ 11108275SEric Cheng int 11118275SEric Cheng mac_link_flow_init(mac_client_handle_t mch, flow_entry_t *sub_flow) 11128275SEric Cheng { 11138275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 11148275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 11158275SEric Cheng int err; 11168275SEric Cheng 11178275SEric Cheng ASSERT(mch != NULL); 11188275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 11198275SEric Cheng 11208275SEric Cheng if ((err = mac_datapath_setup(mcip, sub_flow, SRST_FLOW)) != 0) 11218275SEric Cheng return (err); 11228275SEric Cheng 11238275SEric Cheng sub_flow->fe_mcip = mcip; 11248275SEric Cheng 11258275SEric Cheng return (0); 11268275SEric Cheng } 11278275SEric Cheng 11288275SEric Cheng /* 11298275SEric Cheng * mac_link_flow_add() 11308275SEric Cheng * Used by flowadm(1m) or kernel mac clients for creating flows. 11318275SEric Cheng */ 11328275SEric Cheng int 11338275SEric Cheng mac_link_flow_add(datalink_id_t linkid, char *flow_name, 11348275SEric Cheng flow_desc_t *flow_desc, mac_resource_props_t *mrp) 11358275SEric Cheng { 11368275SEric Cheng flow_entry_t *flent = NULL; 11378275SEric Cheng int err; 11388275SEric Cheng dls_dl_handle_t dlh; 11398275SEric Cheng dls_link_t *dlp; 11408275SEric Cheng boolean_t link_held = B_FALSE; 11418275SEric Cheng boolean_t hash_added = B_FALSE; 11428275SEric Cheng mac_perim_handle_t mph; 11438275SEric Cheng 11448275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 11458275SEric Cheng if (err == 0) { 11468275SEric Cheng FLOW_USER_REFRELE(flent); 11478275SEric Cheng return (EEXIST); 11488275SEric Cheng } 11498275SEric Cheng 11508275SEric Cheng /* 11518275SEric Cheng * First create a flow entry given the description provided 11528275SEric Cheng * by the caller. 11538275SEric Cheng */ 11548275SEric Cheng err = mac_flow_create(flow_desc, mrp, flow_name, NULL, 11558275SEric Cheng FLOW_USER | FLOW_OTHER, &flent); 11568275SEric Cheng 11578275SEric Cheng if (err != 0) 11588275SEric Cheng return (err); 11598275SEric Cheng 11608275SEric Cheng /* 11618275SEric Cheng * We've got a local variable referencing this flow now, so we need 11628275SEric Cheng * to hold it. We'll release this flow before returning. 11638275SEric Cheng * All failures until we return will undo any action that may internally 11648275SEric Cheng * held the flow, so the last REFRELE will assure a clean freeing 11658275SEric Cheng * of resources. 11668275SEric Cheng */ 11678275SEric Cheng FLOW_REFHOLD(flent); 11688275SEric Cheng 11698275SEric Cheng flent->fe_link_id = linkid; 11708275SEric Cheng FLOW_MARK(flent, FE_INCIPIENT); 11718275SEric Cheng 11728275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 11738275SEric Cheng if (err != 0) { 11748275SEric Cheng FLOW_FINAL_REFRELE(flent); 11758275SEric Cheng return (err); 11768275SEric Cheng } 11778275SEric Cheng 11788275SEric Cheng /* 11798275SEric Cheng * dls will eventually be merged with mac so it's ok 11808275SEric Cheng * to call dls' internal functions. 11818275SEric Cheng */ 11828275SEric Cheng err = dls_devnet_hold_link(linkid, &dlh, &dlp); 11838275SEric Cheng if (err != 0) 11848275SEric Cheng goto bail; 11858275SEric Cheng 11868275SEric Cheng link_held = B_TRUE; 11878275SEric Cheng 11888275SEric Cheng /* 11898275SEric Cheng * Add the flow to the global flow table, this table will be per 11908275SEric Cheng * exclusive zone so each zone can have its own flow namespace. 11918275SEric Cheng * RFE 6625651 will fix this. 11928275SEric Cheng * 11938275SEric Cheng */ 11948275SEric Cheng if ((err = mac_flow_hash_add(flent)) != 0) 11958275SEric Cheng goto bail; 11968275SEric Cheng 11978275SEric Cheng hash_added = B_TRUE; 11988275SEric Cheng 11998275SEric Cheng /* 12008275SEric Cheng * do not allow flows to be configured on an anchor VNIC 12018275SEric Cheng */ 12028275SEric Cheng if (mac_capab_get(dlp->dl_mh, MAC_CAPAB_ANCHOR_VNIC, NULL)) { 12038275SEric Cheng err = ENOTSUP; 12048275SEric Cheng goto bail; 12058275SEric Cheng } 12068275SEric Cheng 12078275SEric Cheng /* 12088275SEric Cheng * Save the zoneid of the underlying link in the flow entry, 12098275SEric Cheng * this is needed to prevent non-global zone from getting 12108275SEric Cheng * statistics information of global zone. 12118275SEric Cheng */ 12128275SEric Cheng flent->fe_zoneid = dlp->dl_zid; 12138275SEric Cheng 12148275SEric Cheng /* 12158275SEric Cheng * Add the subflow to the subflow table. Also instantiate the flow 1216*8833SVenu.Iyer@Sun.COM * in the mac if there is an active user (we check if the MAC client's 1217*8833SVenu.Iyer@Sun.COM * datapath has been setup). 12188275SEric Cheng */ 1219*8833SVenu.Iyer@Sun.COM err = mac_flow_add_subflow(dlp->dl_mch, flent, 1220*8833SVenu.Iyer@Sun.COM MCIP_DATAPATH_SETUP((mac_client_impl_t *)dlp->dl_mch)); 12218275SEric Cheng if (err != 0) 12228275SEric Cheng goto bail; 12238275SEric Cheng 12248275SEric Cheng FLOW_UNMARK(flent, FE_INCIPIENT); 12258275SEric Cheng dls_devnet_rele_link(dlh, dlp); 12268275SEric Cheng mac_perim_exit(mph); 12278275SEric Cheng return (0); 12288275SEric Cheng 12298275SEric Cheng bail: 12308275SEric Cheng if (hash_added) 12318275SEric Cheng mac_flow_hash_remove(flent); 12328275SEric Cheng 12338275SEric Cheng if (link_held) 12348275SEric Cheng dls_devnet_rele_link(dlh, dlp); 12358275SEric Cheng 12368275SEric Cheng /* 12378275SEric Cheng * Wait for any transient global flow hash refs to clear 12388275SEric Cheng * and then release the creation reference on the flow 12398275SEric Cheng */ 12408275SEric Cheng mac_flow_wait(flent, FLOW_USER_REF); 12418275SEric Cheng FLOW_FINAL_REFRELE(flent); 12428275SEric Cheng mac_perim_exit(mph); 12438275SEric Cheng return (err); 12448275SEric Cheng } 12458275SEric Cheng 12468275SEric Cheng /* 12478275SEric Cheng * mac_link_flow_clean() 12488275SEric Cheng * Internal flow interface used for freeing SRSs and related 12498275SEric Cheng * data structures. Not meant to be used by mac clients. 12508275SEric Cheng */ 12518275SEric Cheng void 12528275SEric Cheng mac_link_flow_clean(mac_client_handle_t mch, flow_entry_t *sub_flow) 12538275SEric Cheng { 12548275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 12558275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 12568275SEric Cheng boolean_t last_subflow; 12578275SEric Cheng 12588275SEric Cheng ASSERT(mch != NULL); 12598275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 12608275SEric Cheng 12618275SEric Cheng /* 12628275SEric Cheng * This sub flow entry may fail to be fully initialized by 12638275SEric Cheng * mac_link_flow_init(). If so, simply return. 12648275SEric Cheng */ 12658275SEric Cheng if (sub_flow->fe_mcip == NULL) 12668275SEric Cheng return; 12678275SEric Cheng 12688275SEric Cheng last_subflow = FLOW_TAB_EMPTY(mcip->mci_subflow_tab); 12698275SEric Cheng /* 12708275SEric Cheng * Tear down the data path 12718275SEric Cheng */ 12728275SEric Cheng mac_datapath_teardown(mcip, sub_flow, SRST_FLOW); 12738275SEric Cheng sub_flow->fe_mcip = NULL; 12748275SEric Cheng 12758275SEric Cheng /* 12768275SEric Cheng * Delete the SRSs associated with this subflow. If this is being 12778275SEric Cheng * driven by flowadm(1M) then the subflow will be deleted by 12788275SEric Cheng * dls_rem_flow. However if this is a result of the interface being 12798275SEric Cheng * unplumbed then the subflow itself won't be deleted. 12808275SEric Cheng */ 12818275SEric Cheng mac_flow_cleanup(sub_flow); 12828275SEric Cheng 12838275SEric Cheng /* 12848275SEric Cheng * If all the subflows are gone, renable some of the stuff 12858275SEric Cheng * we disabled when adding a subflow, polling etc. 12868275SEric Cheng */ 12878275SEric Cheng if (last_subflow) { 12888275SEric Cheng /* 12898275SEric Cheng * The subflow table itself is not protected by any locks or 12908275SEric Cheng * refcnts. Hence quiesce the client upfront before clearing 12918275SEric Cheng * mci_subflow_tab. 12928275SEric Cheng */ 12938275SEric Cheng mac_client_quiesce(mcip); 12948275SEric Cheng mac_client_update_classifier(mcip, B_FALSE); 12958275SEric Cheng mac_flow_tab_destroy(mcip->mci_subflow_tab); 12968275SEric Cheng mcip->mci_subflow_tab = NULL; 12978275SEric Cheng mac_client_restart(mcip); 12988275SEric Cheng } 12998275SEric Cheng } 13008275SEric Cheng 13018275SEric Cheng /* 13028275SEric Cheng * mac_link_flow_remove() 13038275SEric Cheng * Used by flowadm(1m) or kernel mac clients for removing flows. 13048275SEric Cheng */ 13058275SEric Cheng int 13068275SEric Cheng mac_link_flow_remove(char *flow_name) 13078275SEric Cheng { 13088275SEric Cheng flow_entry_t *flent; 13098275SEric Cheng mac_perim_handle_t mph; 13108275SEric Cheng int err; 13118275SEric Cheng datalink_id_t linkid; 13128275SEric Cheng 13138275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13148275SEric Cheng if (err != 0) 13158275SEric Cheng return (err); 13168275SEric Cheng 13178275SEric Cheng linkid = flent->fe_link_id; 13188275SEric Cheng FLOW_USER_REFRELE(flent); 13198275SEric Cheng 13208275SEric Cheng /* 13218275SEric Cheng * The perim must be acquired before acquiring any other references 13228275SEric Cheng * to maintain the lock and perimeter hierarchy. Please note the 13238275SEric Cheng * FLOW_REFRELE above. 13248275SEric Cheng */ 13258275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 13268275SEric Cheng if (err != 0) 13278275SEric Cheng return (err); 13288275SEric Cheng 13298275SEric Cheng /* 13308275SEric Cheng * Note the second lookup of the flow, because a concurrent thread 13318275SEric Cheng * may have removed it already while we were waiting to enter the 13328275SEric Cheng * link's perimeter. 13338275SEric Cheng */ 13348275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13358275SEric Cheng if (err != 0) { 13368275SEric Cheng mac_perim_exit(mph); 13378275SEric Cheng return (err); 13388275SEric Cheng } 13398275SEric Cheng FLOW_USER_REFRELE(flent); 13408275SEric Cheng 13418275SEric Cheng /* 13428275SEric Cheng * Remove the flow from the subflow table and deactivate the flow 13438275SEric Cheng * by quiescing and removings its SRSs 13448275SEric Cheng */ 13458275SEric Cheng mac_flow_rem_subflow(flent); 13468275SEric Cheng 13478275SEric Cheng /* 13488275SEric Cheng * Finally, remove the flow from the global table. 13498275SEric Cheng */ 13508275SEric Cheng mac_flow_hash_remove(flent); 13518275SEric Cheng 13528275SEric Cheng /* 13538275SEric Cheng * Wait for any transient global flow hash refs to clear 13548275SEric Cheng * and then release the creation reference on the flow 13558275SEric Cheng */ 13568275SEric Cheng mac_flow_wait(flent, FLOW_USER_REF); 13578275SEric Cheng FLOW_FINAL_REFRELE(flent); 13588275SEric Cheng 13598275SEric Cheng mac_perim_exit(mph); 13608275SEric Cheng 13618275SEric Cheng return (0); 13628275SEric Cheng } 13638275SEric Cheng 13648275SEric Cheng /* 13658275SEric Cheng * mac_link_flow_modify() 13668275SEric Cheng * Modifies the properties of a flow identified by its name. 13678275SEric Cheng */ 13688275SEric Cheng int 13698275SEric Cheng mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp) 13708275SEric Cheng { 13718275SEric Cheng flow_entry_t *flent; 13728275SEric Cheng mac_client_impl_t *mcip; 13738275SEric Cheng int err = 0; 13748275SEric Cheng mac_perim_handle_t mph; 13758275SEric Cheng datalink_id_t linkid; 13768275SEric Cheng flow_tab_t *flow_tab; 13778275SEric Cheng 13788275SEric Cheng err = mac_validate_props(mrp); 13798275SEric Cheng if (err != 0) 13808275SEric Cheng return (err); 13818275SEric Cheng 13828275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13838275SEric Cheng if (err != 0) 13848275SEric Cheng return (err); 13858275SEric Cheng 13868275SEric Cheng linkid = flent->fe_link_id; 13878275SEric Cheng FLOW_USER_REFRELE(flent); 13888275SEric Cheng 13898275SEric Cheng /* 13908275SEric Cheng * The perim must be acquired before acquiring any other references 13918275SEric Cheng * to maintain the lock and perimeter hierarchy. Please note the 13928275SEric Cheng * FLOW_REFRELE above. 13938275SEric Cheng */ 13948275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 13958275SEric Cheng if (err != 0) 13968275SEric Cheng return (err); 13978275SEric Cheng 13988275SEric Cheng /* 13998275SEric Cheng * Note the second lookup of the flow, because a concurrent thread 14008275SEric Cheng * may have removed it already while we were waiting to enter the 14018275SEric Cheng * link's perimeter. 14028275SEric Cheng */ 14038275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 14048275SEric Cheng if (err != 0) { 14058275SEric Cheng mac_perim_exit(mph); 14068275SEric Cheng return (err); 14078275SEric Cheng } 14088275SEric Cheng FLOW_USER_REFRELE(flent); 14098275SEric Cheng 14108275SEric Cheng /* 14118275SEric Cheng * If this flow is attached to a MAC client, then pass the request 14128275SEric Cheng * along to the client. 14138275SEric Cheng * Otherwise, just update the cached values. 14148275SEric Cheng */ 14158275SEric Cheng mcip = flent->fe_mcip; 14168275SEric Cheng mac_update_resources(mrp, &flent->fe_resource_props, B_TRUE); 14178275SEric Cheng if (mcip != NULL) { 14188275SEric Cheng if ((flow_tab = mcip->mci_subflow_tab) == NULL) { 14198275SEric Cheng err = ENOENT; 14208275SEric Cheng } else { 14218275SEric Cheng mac_flow_modify(flow_tab, flent, mrp); 14228275SEric Cheng } 14238275SEric Cheng } else { 14248275SEric Cheng (void) mac_flow_modify_props(flent, mrp); 14258275SEric Cheng } 14268275SEric Cheng 14278275SEric Cheng done: 14288275SEric Cheng mac_perim_exit(mph); 14298275SEric Cheng return (err); 14308275SEric Cheng } 14318275SEric Cheng 14328275SEric Cheng 14338275SEric Cheng /* 14348275SEric Cheng * State structure and misc functions used by mac_link_flow_walk(). 14358275SEric Cheng */ 14368275SEric Cheng typedef struct { 14378275SEric Cheng int (*ws_func)(mac_flowinfo_t *, void *); 14388275SEric Cheng void *ws_arg; 14398275SEric Cheng } flow_walk_state_t; 14408275SEric Cheng 14418275SEric Cheng static void 14428275SEric Cheng mac_link_flowinfo_copy(mac_flowinfo_t *finfop, flow_entry_t *flent) 14438275SEric Cheng { 14448558SGirish.Moodalbail@Sun.COM (void) strlcpy(finfop->fi_flow_name, flent->fe_flow_name, 14458558SGirish.Moodalbail@Sun.COM MAXFLOWNAMELEN); 14468275SEric Cheng finfop->fi_link_id = flent->fe_link_id; 14478275SEric Cheng finfop->fi_flow_desc = flent->fe_flow_desc; 14488275SEric Cheng finfop->fi_resource_props = flent->fe_resource_props; 14498275SEric Cheng } 14508275SEric Cheng 14518275SEric Cheng static int 14528275SEric Cheng mac_link_flow_walk_cb(flow_entry_t *flent, void *arg) 14538275SEric Cheng { 14548275SEric Cheng flow_walk_state_t *statep = arg; 14558275SEric Cheng mac_flowinfo_t finfo; 14568275SEric Cheng 14578275SEric Cheng mac_link_flowinfo_copy(&finfo, flent); 14588275SEric Cheng return (statep->ws_func(&finfo, statep->ws_arg)); 14598275SEric Cheng } 14608275SEric Cheng 14618275SEric Cheng /* 14628275SEric Cheng * mac_link_flow_walk() 14638275SEric Cheng * Invokes callback 'func' for all flows belonging to the specified link. 14648275SEric Cheng */ 14658275SEric Cheng int 14668275SEric Cheng mac_link_flow_walk(datalink_id_t linkid, 14678275SEric Cheng int (*func)(mac_flowinfo_t *, void *), void *arg) 14688275SEric Cheng { 14698275SEric Cheng mac_client_impl_t *mcip; 14708275SEric Cheng mac_perim_handle_t mph; 14718275SEric Cheng flow_walk_state_t state; 14728275SEric Cheng dls_dl_handle_t dlh; 14738275SEric Cheng dls_link_t *dlp; 14748275SEric Cheng int err; 14758275SEric Cheng 14768275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 14778275SEric Cheng if (err != 0) 14788275SEric Cheng return (err); 14798275SEric Cheng 14808275SEric Cheng err = dls_devnet_hold_link(linkid, &dlh, &dlp); 14818275SEric Cheng if (err != 0) { 14828275SEric Cheng mac_perim_exit(mph); 14838275SEric Cheng return (err); 14848275SEric Cheng } 14858275SEric Cheng 14868275SEric Cheng mcip = (mac_client_impl_t *)dlp->dl_mch; 14878275SEric Cheng state.ws_func = func; 14888275SEric Cheng state.ws_arg = arg; 14898275SEric Cheng 14908275SEric Cheng err = mac_flow_walk_nolock(mcip->mci_subflow_tab, 14918275SEric Cheng mac_link_flow_walk_cb, &state); 14928275SEric Cheng 14938275SEric Cheng dls_devnet_rele_link(dlh, dlp); 14948275SEric Cheng mac_perim_exit(mph); 14958275SEric Cheng return (err); 14968275SEric Cheng } 14978275SEric Cheng 14988275SEric Cheng /* 14998275SEric Cheng * mac_link_flow_info() 15008275SEric Cheng * Retrieves information about a specific flow. 15018275SEric Cheng */ 15028275SEric Cheng int 15038275SEric Cheng mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo) 15048275SEric Cheng { 15058275SEric Cheng flow_entry_t *flent; 15068275SEric Cheng int err; 15078275SEric Cheng 15088275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 15098275SEric Cheng if (err != 0) 15108275SEric Cheng return (err); 15118275SEric Cheng 15128275SEric Cheng mac_link_flowinfo_copy(finfo, flent); 15138275SEric Cheng FLOW_USER_REFRELE(flent); 15148275SEric Cheng return (0); 15158275SEric Cheng } 15168275SEric Cheng 15178275SEric Cheng #define HASH_MAC_VID(a, v, s) \ 15188275SEric Cheng ((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s)) 15198275SEric Cheng 15208275SEric Cheng #define PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end)) 15218275SEric Cheng 1522*8833SVenu.Iyer@Sun.COM #define CHECK_AND_ADJUST_START_PTR(s, start) { \ 1523*8833SVenu.Iyer@Sun.COM if ((s)->fs_mp->b_wptr == (start)) { \ 1524*8833SVenu.Iyer@Sun.COM mblk_t *next = (s)->fs_mp->b_cont; \ 1525*8833SVenu.Iyer@Sun.COM if (next == NULL) \ 1526*8833SVenu.Iyer@Sun.COM return (EINVAL); \ 1527*8833SVenu.Iyer@Sun.COM \ 1528*8833SVenu.Iyer@Sun.COM (s)->fs_mp = next; \ 1529*8833SVenu.Iyer@Sun.COM (start) = next->b_rptr; \ 1530*8833SVenu.Iyer@Sun.COM } \ 1531*8833SVenu.Iyer@Sun.COM } 1532*8833SVenu.Iyer@Sun.COM 15338275SEric Cheng /* ARGSUSED */ 15348275SEric Cheng static boolean_t 15358275SEric Cheng flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 15368275SEric Cheng { 15378275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 15388275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 15398275SEric Cheng 15408275SEric Cheng return (l2->l2_vid == fd->fd_vid && 15418275SEric Cheng bcmp(l2->l2_daddr, fd->fd_dst_mac, fd->fd_mac_len) == 0); 15428275SEric Cheng } 15438275SEric Cheng 15448275SEric Cheng /* 15458275SEric Cheng * Layer 2 hash function. 15468275SEric Cheng * Must be paired with flow_l2_accept() within a set of flow_ops 15478275SEric Cheng * because it assumes the dest address is already extracted. 15488275SEric Cheng */ 15498275SEric Cheng static uint32_t 15508275SEric Cheng flow_l2_hash(flow_tab_t *ft, flow_state_t *s) 15518275SEric Cheng { 15528275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 15538275SEric Cheng 15548275SEric Cheng return (HASH_MAC_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size)); 15558275SEric Cheng } 15568275SEric Cheng 15578275SEric Cheng /* 15588275SEric Cheng * This is the generic layer 2 accept function. 15598275SEric Cheng * It makes use of mac_header_info() to extract the header length, 15608275SEric Cheng * sap, vlan ID and destination address. 15618275SEric Cheng */ 15628275SEric Cheng static int 15638275SEric Cheng flow_l2_accept(flow_tab_t *ft, flow_state_t *s) 15648275SEric Cheng { 15658275SEric Cheng boolean_t is_ether; 15668275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 15678275SEric Cheng mac_header_info_t mhi; 15688275SEric Cheng int err; 15698275SEric Cheng 15708275SEric Cheng is_ether = (ft->ft_mip->mi_info.mi_nativemedia == DL_ETHER); 15718275SEric Cheng if ((err = mac_header_info((mac_handle_t)ft->ft_mip, 15728275SEric Cheng s->fs_mp, &mhi)) != 0) { 15738275SEric Cheng if (err == EINVAL) 15748275SEric Cheng err = ENOBUFS; 15758275SEric Cheng 15768275SEric Cheng return (err); 15778275SEric Cheng } 15788275SEric Cheng 15798275SEric Cheng l2->l2_start = s->fs_mp->b_rptr; 15808275SEric Cheng l2->l2_daddr = (uint8_t *)mhi.mhi_daddr; 15818275SEric Cheng 15828275SEric Cheng if (is_ether && mhi.mhi_bindsap == ETHERTYPE_VLAN && 15838275SEric Cheng ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) { 15848275SEric Cheng struct ether_vlan_header *evhp = 15858275SEric Cheng (struct ether_vlan_header *)l2->l2_start; 15868275SEric Cheng 15878275SEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp))) 15888275SEric Cheng return (ENOBUFS); 15898275SEric Cheng 15908275SEric Cheng l2->l2_sap = ntohs(evhp->ether_type); 15918275SEric Cheng l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci)); 15928275SEric Cheng l2->l2_hdrsize = sizeof (*evhp); 15938275SEric Cheng } else { 15948275SEric Cheng l2->l2_sap = mhi.mhi_bindsap; 15958275SEric Cheng l2->l2_vid = 0; 15968275SEric Cheng l2->l2_hdrsize = (uint32_t)mhi.mhi_hdrsize; 15978275SEric Cheng } 15988275SEric Cheng return (0); 15998275SEric Cheng } 16008275SEric Cheng 16018275SEric Cheng /* 16028275SEric Cheng * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/ 16038275SEric Cheng * accept(). The notable difference is that dest address is now extracted 16048275SEric Cheng * by hash() rather than by accept(). This saves a few memory references 16058275SEric Cheng * for flow tables that do not care about mac addresses. 16068275SEric Cheng */ 16078275SEric Cheng static uint32_t 16088275SEric Cheng flow_ether_hash(flow_tab_t *ft, flow_state_t *s) 16098275SEric Cheng { 16108275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 16118275SEric Cheng struct ether_vlan_header *evhp; 16128275SEric Cheng 16138275SEric Cheng evhp = (struct ether_vlan_header *)l2->l2_start; 16148275SEric Cheng l2->l2_daddr = evhp->ether_dhost.ether_addr_octet; 16158275SEric Cheng return (HASH_MAC_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size)); 16168275SEric Cheng } 16178275SEric Cheng 16188275SEric Cheng /* ARGSUSED */ 16198275SEric Cheng static int 16208275SEric Cheng flow_ether_accept(flow_tab_t *ft, flow_state_t *s) 16218275SEric Cheng { 16228275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 16238275SEric Cheng struct ether_vlan_header *evhp; 16248275SEric Cheng uint16_t sap; 16258275SEric Cheng 16268275SEric Cheng evhp = (struct ether_vlan_header *)s->fs_mp->b_rptr; 16278275SEric Cheng l2->l2_start = (uchar_t *)evhp; 16288275SEric Cheng 16298275SEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (struct ether_header))) 16308275SEric Cheng return (ENOBUFS); 16318275SEric Cheng 16328275SEric Cheng if ((sap = ntohs(evhp->ether_tpid)) == ETHERTYPE_VLAN && 16338275SEric Cheng ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) { 16348275SEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp))) 16358275SEric Cheng return (ENOBUFS); 16368275SEric Cheng 16378275SEric Cheng l2->l2_sap = ntohs(evhp->ether_type); 16388275SEric Cheng l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci)); 16398275SEric Cheng l2->l2_hdrsize = sizeof (struct ether_vlan_header); 16408275SEric Cheng } else { 16418275SEric Cheng l2->l2_sap = sap; 16428275SEric Cheng l2->l2_vid = 0; 16438275SEric Cheng l2->l2_hdrsize = sizeof (struct ether_header); 16448275SEric Cheng } 16458275SEric Cheng return (0); 16468275SEric Cheng } 16478275SEric Cheng 16488275SEric Cheng /* 16498275SEric Cheng * Validates a layer 2 flow entry. 16508275SEric Cheng */ 16518275SEric Cheng static int 16528275SEric Cheng flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 16538275SEric Cheng { 16548275SEric Cheng int i; 16558275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 16568275SEric Cheng 16578275SEric Cheng /* 16588275SEric Cheng * Dest address is mandatory. 16598275SEric Cheng */ 16608275SEric Cheng if ((fd->fd_mask & FLOW_LINK_DST) == 0) 16618275SEric Cheng return (EINVAL); 16628275SEric Cheng 16638275SEric Cheng for (i = 0; i < fd->fd_mac_len; i++) { 16648275SEric Cheng if (fd->fd_dst_mac[i] != 0) 16658275SEric Cheng break; 16668275SEric Cheng } 16678275SEric Cheng if (i == fd->fd_mac_len || fd->fd_mac_len < ETHERADDRL) 16688275SEric Cheng return (EINVAL); 16698275SEric Cheng 16708275SEric Cheng if ((fd->fd_mask & FLOW_LINK_VID) != 0) { 16718275SEric Cheng /* 16728275SEric Cheng * VLAN flows are only supported over ethernet macs. 16738275SEric Cheng */ 16748275SEric Cheng if (ft->ft_mip->mi_info.mi_nativemedia != DL_ETHER) 16758275SEric Cheng return (EINVAL); 16768275SEric Cheng 16778275SEric Cheng if (fd->fd_vid == 0) 16788275SEric Cheng return (EINVAL); 16798275SEric Cheng 16808275SEric Cheng } 16818275SEric Cheng flent->fe_match = flow_l2_match; 16828275SEric Cheng return (0); 16838275SEric Cheng } 16848275SEric Cheng 16858275SEric Cheng /* 16868275SEric Cheng * Calculates hash index of flow entry. 16878275SEric Cheng */ 16888275SEric Cheng static uint32_t 16898275SEric Cheng flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 16908275SEric Cheng { 16918275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 16928275SEric Cheng 16938275SEric Cheng ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0); 16948275SEric Cheng return (HASH_MAC_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size)); 16958275SEric Cheng } 16968275SEric Cheng 16978275SEric Cheng /* 16988275SEric Cheng * This is used for duplicate flow checking. 16998275SEric Cheng */ 17008275SEric Cheng /* ARGSUSED */ 17018275SEric Cheng static boolean_t 17028275SEric Cheng flow_l2_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 17038275SEric Cheng { 17048275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 17058275SEric Cheng 17068275SEric Cheng ASSERT(fd1->fd_mac_len == fd2->fd_mac_len && fd1->fd_mac_len != 0); 17078275SEric Cheng return (bcmp(&fd1->fd_dst_mac, &fd2->fd_dst_mac, 17088275SEric Cheng fd1->fd_mac_len) == 0 && fd1->fd_vid == fd2->fd_vid); 17098275SEric Cheng } 17108275SEric Cheng 17118275SEric Cheng /* 17128275SEric Cheng * Generic flow entry insertion function. 17138275SEric Cheng * Used by flow tables that do not have ordering requirements. 17148275SEric Cheng */ 17158275SEric Cheng /* ARGSUSED */ 17168275SEric Cheng static int 17178275SEric Cheng flow_generic_insert_fe(flow_tab_t *ft, flow_entry_t **headp, 17188275SEric Cheng flow_entry_t *flent) 17198275SEric Cheng { 17208275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 17218275SEric Cheng 17228275SEric Cheng if (*headp != NULL) { 17238275SEric Cheng ASSERT(flent->fe_next == NULL); 17248275SEric Cheng flent->fe_next = *headp; 17258275SEric Cheng } 17268275SEric Cheng *headp = flent; 17278275SEric Cheng return (0); 17288275SEric Cheng } 17298275SEric Cheng 17308275SEric Cheng /* 17318275SEric Cheng * IP version independent DSField matching function. 17328275SEric Cheng */ 17338275SEric Cheng /* ARGSUSED */ 17348275SEric Cheng static boolean_t 17358275SEric Cheng flow_ip_dsfield_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 17368275SEric Cheng { 17378275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 17388275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17398275SEric Cheng 17408275SEric Cheng switch (l3info->l3_version) { 17418275SEric Cheng case IPV4_VERSION: { 17428275SEric Cheng ipha_t *ipha = (ipha_t *)l3info->l3_start; 17438275SEric Cheng 17448275SEric Cheng return ((ipha->ipha_type_of_service & 17458275SEric Cheng fd->fd_dsfield_mask) == fd->fd_dsfield); 17468275SEric Cheng } 17478275SEric Cheng case IPV6_VERSION: { 17488275SEric Cheng ip6_t *ip6h = (ip6_t *)l3info->l3_start; 17498275SEric Cheng 17508275SEric Cheng return ((IPV6_FLOW_TCLASS(ip6h->ip6_vcf) & 17518275SEric Cheng fd->fd_dsfield_mask) == fd->fd_dsfield); 17528275SEric Cheng } 17538275SEric Cheng default: 17548275SEric Cheng return (B_FALSE); 17558275SEric Cheng } 17568275SEric Cheng } 17578275SEric Cheng 17588275SEric Cheng /* 17598275SEric Cheng * IP v4 and v6 address matching. 17608275SEric Cheng * The netmask only needs to be applied on the packet but not on the 17618275SEric Cheng * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets. 17628275SEric Cheng */ 17638275SEric Cheng 17648275SEric Cheng /* ARGSUSED */ 17658275SEric Cheng static boolean_t 17668275SEric Cheng flow_ip_v4_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 17678275SEric Cheng { 17688275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 17698275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17708275SEric Cheng ipha_t *ipha = (ipha_t *)l3info->l3_start; 17718275SEric Cheng in_addr_t addr; 17728275SEric Cheng 17738275SEric Cheng addr = (l3info->l3_dst_or_src ? ipha->ipha_dst : ipha->ipha_src); 17748275SEric Cheng if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) { 17758275SEric Cheng return ((addr & V4_PART_OF_V6(fd->fd_local_netmask)) == 17768275SEric Cheng V4_PART_OF_V6(fd->fd_local_addr)); 17778275SEric Cheng } 17788275SEric Cheng return ((addr & V4_PART_OF_V6(fd->fd_remote_netmask)) == 17798275SEric Cheng V4_PART_OF_V6(fd->fd_remote_addr)); 17808275SEric Cheng } 17818275SEric Cheng 17828275SEric Cheng /* ARGSUSED */ 17838275SEric Cheng static boolean_t 17848275SEric Cheng flow_ip_v6_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 17858275SEric Cheng { 17868275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 17878275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17888275SEric Cheng ip6_t *ip6h = (ip6_t *)l3info->l3_start; 17898275SEric Cheng in6_addr_t *addrp; 17908275SEric Cheng 17918275SEric Cheng addrp = (l3info->l3_dst_or_src ? &ip6h->ip6_dst : &ip6h->ip6_src); 17928275SEric Cheng if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) { 17938275SEric Cheng return (V6_MASK_EQ(*addrp, fd->fd_local_netmask, 17948275SEric Cheng fd->fd_local_addr)); 17958275SEric Cheng } 17968275SEric Cheng return (V6_MASK_EQ(*addrp, fd->fd_remote_netmask, fd->fd_remote_addr)); 17978275SEric Cheng } 17988275SEric Cheng 17998275SEric Cheng /* ARGSUSED */ 18008275SEric Cheng static boolean_t 18018275SEric Cheng flow_ip_proto_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 18028275SEric Cheng { 18038275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18048275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 18058275SEric Cheng 18068275SEric Cheng return (l3info->l3_protocol == fd->fd_protocol); 18078275SEric Cheng } 18088275SEric Cheng 18098275SEric Cheng static uint32_t 18108275SEric Cheng flow_ip_hash(flow_tab_t *ft, flow_state_t *s) 18118275SEric Cheng { 18128275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18138275SEric Cheng flow_mask_t mask = ft->ft_mask; 18148275SEric Cheng 18158275SEric Cheng if ((mask & FLOW_IP_LOCAL) != 0) { 18168275SEric Cheng l3info->l3_dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0); 18178275SEric Cheng } else if ((mask & FLOW_IP_REMOTE) != 0) { 18188275SEric Cheng l3info->l3_dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0); 18198275SEric Cheng } else if ((mask & FLOW_IP_DSFIELD) != 0) { 18208275SEric Cheng /* 18218275SEric Cheng * DSField flents are arranged as a single list. 18228275SEric Cheng */ 18238275SEric Cheng return (0); 18248275SEric Cheng } 18258275SEric Cheng /* 18268275SEric Cheng * IP addr flents are hashed into two lists, v4 or v6. 18278275SEric Cheng */ 18288275SEric Cheng ASSERT(ft->ft_size >= 2); 18298275SEric Cheng return ((l3info->l3_version == IPV4_VERSION) ? 0 : 1); 18308275SEric Cheng } 18318275SEric Cheng 18328275SEric Cheng static uint32_t 18338275SEric Cheng flow_ip_proto_hash(flow_tab_t *ft, flow_state_t *s) 18348275SEric Cheng { 18358275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18368275SEric Cheng 18378275SEric Cheng return (l3info->l3_protocol % ft->ft_size); 18388275SEric Cheng } 18398275SEric Cheng 18408275SEric Cheng /* ARGSUSED */ 18418275SEric Cheng static int 18428275SEric Cheng flow_ip_accept(flow_tab_t *ft, flow_state_t *s) 18438275SEric Cheng { 18448275SEric Cheng flow_l2info_t *l2info = &s->fs_l2info; 18458275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18468275SEric Cheng uint16_t sap = l2info->l2_sap; 18478275SEric Cheng uchar_t *l3_start; 18488275SEric Cheng 1849*8833SVenu.Iyer@Sun.COM l3_start = l2info->l2_start + l2info->l2_hdrsize; 1850*8833SVenu.Iyer@Sun.COM 1851*8833SVenu.Iyer@Sun.COM /* 1852*8833SVenu.Iyer@Sun.COM * Adjust start pointer if we're at the end of an mblk. 1853*8833SVenu.Iyer@Sun.COM */ 1854*8833SVenu.Iyer@Sun.COM CHECK_AND_ADJUST_START_PTR(s, l3_start); 1855*8833SVenu.Iyer@Sun.COM 1856*8833SVenu.Iyer@Sun.COM l3info->l3_start = l3_start; 18578275SEric Cheng if (!OK_32PTR(l3_start)) 18588275SEric Cheng return (EINVAL); 18598275SEric Cheng 18608275SEric Cheng switch (sap) { 18618275SEric Cheng case ETHERTYPE_IP: { 18628275SEric Cheng ipha_t *ipha = (ipha_t *)l3_start; 18638275SEric Cheng 18648275SEric Cheng if (PKT_TOO_SMALL(s, l3_start + IP_SIMPLE_HDR_LENGTH)) 18658275SEric Cheng return (ENOBUFS); 18668275SEric Cheng 18678275SEric Cheng l3info->l3_hdrsize = IPH_HDR_LENGTH(ipha); 18688275SEric Cheng l3info->l3_protocol = ipha->ipha_protocol; 18698275SEric Cheng l3info->l3_version = IPV4_VERSION; 18708275SEric Cheng l3info->l3_fragmented = 18718275SEric Cheng IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags); 18728275SEric Cheng break; 18738275SEric Cheng } 18748275SEric Cheng case ETHERTYPE_IPV6: { 18758275SEric Cheng ip6_t *ip6h = (ip6_t *)l3_start; 18768275SEric Cheng uint16_t ip6_hdrlen; 18778275SEric Cheng uint8_t nexthdr; 18788275SEric Cheng 18798275SEric Cheng if (!mac_ip_hdr_length_v6(s->fs_mp, ip6h, &ip6_hdrlen, 18808275SEric Cheng &nexthdr)) { 18818275SEric Cheng return (ENOBUFS); 18828275SEric Cheng } 18838275SEric Cheng l3info->l3_hdrsize = ip6_hdrlen; 18848275SEric Cheng l3info->l3_protocol = nexthdr; 18858275SEric Cheng l3info->l3_version = IPV6_VERSION; 18868275SEric Cheng l3info->l3_fragmented = B_FALSE; 18878275SEric Cheng break; 18888275SEric Cheng } 18898275SEric Cheng default: 18908275SEric Cheng return (EINVAL); 18918275SEric Cheng } 18928275SEric Cheng return (0); 18938275SEric Cheng } 18948275SEric Cheng 18958275SEric Cheng /* ARGSUSED */ 18968275SEric Cheng static int 18978275SEric Cheng flow_ip_proto_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 18988275SEric Cheng { 18998275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19008275SEric Cheng 19018275SEric Cheng switch (fd->fd_protocol) { 19028275SEric Cheng case IPPROTO_TCP: 19038275SEric Cheng case IPPROTO_UDP: 19048275SEric Cheng case IPPROTO_SCTP: 19058275SEric Cheng case IPPROTO_ICMP: 19068275SEric Cheng case IPPROTO_ICMPV6: 19078275SEric Cheng flent->fe_match = flow_ip_proto_match; 19088275SEric Cheng return (0); 19098275SEric Cheng default: 19108275SEric Cheng return (EINVAL); 19118275SEric Cheng } 19128275SEric Cheng } 19138275SEric Cheng 19148275SEric Cheng /* ARGSUSED */ 19158275SEric Cheng static int 19168275SEric Cheng flow_ip_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 19178275SEric Cheng { 19188275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19198275SEric Cheng flow_mask_t mask; 19208275SEric Cheng uint8_t version; 19218275SEric Cheng in6_addr_t *addr, *netmask; 19228275SEric Cheng 19238275SEric Cheng /* 19248275SEric Cheng * DSField does not require a IP version. 19258275SEric Cheng */ 19268275SEric Cheng if (fd->fd_mask == FLOW_IP_DSFIELD) { 19278275SEric Cheng if (fd->fd_dsfield_mask == 0) 19288275SEric Cheng return (EINVAL); 19298275SEric Cheng 19308275SEric Cheng flent->fe_match = flow_ip_dsfield_match; 19318275SEric Cheng return (0); 19328275SEric Cheng } 19338275SEric Cheng 19348275SEric Cheng /* 19358275SEric Cheng * IP addresses must come with a version to avoid ambiguity. 19368275SEric Cheng */ 19378275SEric Cheng if ((fd->fd_mask & FLOW_IP_VERSION) == 0) 19388275SEric Cheng return (EINVAL); 19398275SEric Cheng 19408275SEric Cheng version = fd->fd_ipversion; 19418275SEric Cheng if (version != IPV4_VERSION && version != IPV6_VERSION) 19428275SEric Cheng return (EINVAL); 19438275SEric Cheng 19448275SEric Cheng mask = fd->fd_mask & ~FLOW_IP_VERSION; 19458275SEric Cheng switch (mask) { 19468275SEric Cheng case FLOW_IP_LOCAL: 19478275SEric Cheng addr = &fd->fd_local_addr; 19488275SEric Cheng netmask = &fd->fd_local_netmask; 19498275SEric Cheng break; 19508275SEric Cheng case FLOW_IP_REMOTE: 19518275SEric Cheng addr = &fd->fd_remote_addr; 19528275SEric Cheng netmask = &fd->fd_remote_netmask; 19538275SEric Cheng break; 19548275SEric Cheng default: 19558275SEric Cheng return (EINVAL); 19568275SEric Cheng } 19578275SEric Cheng 19588275SEric Cheng /* 19598275SEric Cheng * Apply netmask onto specified address. 19608275SEric Cheng */ 19618275SEric Cheng V6_MASK_COPY(*addr, *netmask, *addr); 19628275SEric Cheng if (version == IPV4_VERSION) { 19638275SEric Cheng ipaddr_t v4addr = V4_PART_OF_V6((*addr)); 19648275SEric Cheng ipaddr_t v4mask = V4_PART_OF_V6((*netmask)); 19658275SEric Cheng 19668275SEric Cheng if (v4addr == 0 || v4mask == 0) 19678275SEric Cheng return (EINVAL); 19688275SEric Cheng flent->fe_match = flow_ip_v4_match; 19698275SEric Cheng } else { 19708275SEric Cheng if (IN6_IS_ADDR_UNSPECIFIED(addr) || 19718275SEric Cheng IN6_IS_ADDR_UNSPECIFIED(netmask)) 19728275SEric Cheng return (EINVAL); 19738275SEric Cheng flent->fe_match = flow_ip_v6_match; 19748275SEric Cheng } 19758275SEric Cheng return (0); 19768275SEric Cheng } 19778275SEric Cheng 19788275SEric Cheng static uint32_t 19798275SEric Cheng flow_ip_proto_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 19808275SEric Cheng { 19818275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19828275SEric Cheng 19838275SEric Cheng return (fd->fd_protocol % ft->ft_size); 19848275SEric Cheng } 19858275SEric Cheng 19868275SEric Cheng static uint32_t 19878275SEric Cheng flow_ip_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 19888275SEric Cheng { 19898275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19908275SEric Cheng 19918275SEric Cheng /* 19928275SEric Cheng * DSField flents are arranged as a single list. 19938275SEric Cheng */ 19948275SEric Cheng if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0) 19958275SEric Cheng return (0); 19968275SEric Cheng 19978275SEric Cheng /* 19988275SEric Cheng * IP addr flents are hashed into two lists, v4 or v6. 19998275SEric Cheng */ 20008275SEric Cheng ASSERT(ft->ft_size >= 2); 20018275SEric Cheng return ((fd->fd_ipversion == IPV4_VERSION) ? 0 : 1); 20028275SEric Cheng } 20038275SEric Cheng 20048275SEric Cheng /* ARGSUSED */ 20058275SEric Cheng static boolean_t 20068275SEric Cheng flow_ip_proto_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 20078275SEric Cheng { 20088275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 20098275SEric Cheng 20108275SEric Cheng return (fd1->fd_protocol == fd2->fd_protocol); 20118275SEric Cheng } 20128275SEric Cheng 20138275SEric Cheng /* ARGSUSED */ 20148275SEric Cheng static boolean_t 20158275SEric Cheng flow_ip_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 20168275SEric Cheng { 20178275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 20188275SEric Cheng in6_addr_t *a1, *m1, *a2, *m2; 20198275SEric Cheng 20208275SEric Cheng ASSERT(fd1->fd_mask == fd2->fd_mask); 20218275SEric Cheng if (fd1->fd_mask == FLOW_IP_DSFIELD) { 20228275SEric Cheng return (fd1->fd_dsfield == fd2->fd_dsfield && 20238275SEric Cheng fd1->fd_dsfield_mask == fd2->fd_dsfield_mask); 20248275SEric Cheng } 20258275SEric Cheng 20268275SEric Cheng /* 20278275SEric Cheng * flow_ip_accept_fe() already validated the version. 20288275SEric Cheng */ 20298275SEric Cheng ASSERT((fd1->fd_mask & FLOW_IP_VERSION) != 0); 20308275SEric Cheng if (fd1->fd_ipversion != fd2->fd_ipversion) 20318275SEric Cheng return (B_FALSE); 20328275SEric Cheng 20338275SEric Cheng switch (fd1->fd_mask & ~FLOW_IP_VERSION) { 20348275SEric Cheng case FLOW_IP_LOCAL: 20358275SEric Cheng a1 = &fd1->fd_local_addr; 20368275SEric Cheng m1 = &fd1->fd_local_netmask; 20378275SEric Cheng a2 = &fd2->fd_local_addr; 20388275SEric Cheng m2 = &fd2->fd_local_netmask; 20398275SEric Cheng break; 20408275SEric Cheng case FLOW_IP_REMOTE: 20418275SEric Cheng a1 = &fd1->fd_remote_addr; 20428275SEric Cheng m1 = &fd1->fd_remote_netmask; 20438275SEric Cheng a2 = &fd2->fd_remote_addr; 20448275SEric Cheng m2 = &fd2->fd_remote_netmask; 20458275SEric Cheng break; 20468275SEric Cheng default: 20478275SEric Cheng /* 20488275SEric Cheng * This is unreachable given the checks in 20498275SEric Cheng * flow_ip_accept_fe(). 20508275SEric Cheng */ 20518275SEric Cheng return (B_FALSE); 20528275SEric Cheng } 20538275SEric Cheng 20548275SEric Cheng if (fd1->fd_ipversion == IPV4_VERSION) { 20558275SEric Cheng return (V4_PART_OF_V6((*a1)) == V4_PART_OF_V6((*a2)) && 20568275SEric Cheng V4_PART_OF_V6((*m1)) == V4_PART_OF_V6((*m2))); 20578275SEric Cheng 20588275SEric Cheng } else { 20598275SEric Cheng return (IN6_ARE_ADDR_EQUAL(a1, a2) && 20608275SEric Cheng IN6_ARE_ADDR_EQUAL(m1, m2)); 20618275SEric Cheng } 20628275SEric Cheng } 20638275SEric Cheng 20648275SEric Cheng static int 20658275SEric Cheng flow_ip_mask2plen(in6_addr_t *v6mask) 20668275SEric Cheng { 20678275SEric Cheng int bits; 20688275SEric Cheng int plen = IPV6_ABITS; 20698275SEric Cheng int i; 20708275SEric Cheng 20718275SEric Cheng for (i = 3; i >= 0; i--) { 20728275SEric Cheng if (v6mask->s6_addr32[i] == 0) { 20738275SEric Cheng plen -= 32; 20748275SEric Cheng continue; 20758275SEric Cheng } 20768275SEric Cheng bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 20778275SEric Cheng if (bits == 0) 20788275SEric Cheng break; 20798275SEric Cheng plen -= bits; 20808275SEric Cheng } 20818275SEric Cheng return (plen); 20828275SEric Cheng } 20838275SEric Cheng 20848275SEric Cheng /* ARGSUSED */ 20858275SEric Cheng static int 20868275SEric Cheng flow_ip_insert_fe(flow_tab_t *ft, flow_entry_t **headp, 20878275SEric Cheng flow_entry_t *flent) 20888275SEric Cheng { 20898275SEric Cheng flow_entry_t **p = headp; 20908275SEric Cheng flow_desc_t *fd0, *fd; 20918275SEric Cheng in6_addr_t *m0, *m; 20928275SEric Cheng int plen0, plen; 20938275SEric Cheng 20948275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 20958275SEric Cheng 20968275SEric Cheng /* 20978275SEric Cheng * No special ordering needed for dsfield. 20988275SEric Cheng */ 20998275SEric Cheng fd0 = &flent->fe_flow_desc; 21008275SEric Cheng if ((fd0->fd_mask & FLOW_IP_DSFIELD) != 0) { 21018275SEric Cheng if (*p != NULL) { 21028275SEric Cheng ASSERT(flent->fe_next == NULL); 21038275SEric Cheng flent->fe_next = *p; 21048275SEric Cheng } 21058275SEric Cheng *p = flent; 21068275SEric Cheng return (0); 21078275SEric Cheng } 21088275SEric Cheng 21098275SEric Cheng /* 21108275SEric Cheng * IP address flows are arranged in descending prefix length order. 21118275SEric Cheng */ 21128275SEric Cheng m0 = ((fd0->fd_mask & FLOW_IP_LOCAL) != 0) ? 21138275SEric Cheng &fd0->fd_local_netmask : &fd0->fd_remote_netmask; 21148275SEric Cheng plen0 = flow_ip_mask2plen(m0); 21158275SEric Cheng ASSERT(plen0 != 0); 21168275SEric Cheng 21178275SEric Cheng for (; *p != NULL; p = &(*p)->fe_next) { 21188275SEric Cheng fd = &(*p)->fe_flow_desc; 21198275SEric Cheng 21208275SEric Cheng /* 21218275SEric Cheng * Normally a dsfield flent shouldn't end up on the same 21228275SEric Cheng * list as an IP address because flow tables are (for now) 21238275SEric Cheng * disjoint. If we decide to support both IP and dsfield 21248275SEric Cheng * in the same table in the future, this check will allow 21258275SEric Cheng * for that. 21268275SEric Cheng */ 21278275SEric Cheng if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0) 21288275SEric Cheng continue; 21298275SEric Cheng 21308275SEric Cheng /* 21318275SEric Cheng * We also allow for the mixing of local and remote address 21328275SEric Cheng * flents within one list. 21338275SEric Cheng */ 21348275SEric Cheng m = ((fd->fd_mask & FLOW_IP_LOCAL) != 0) ? 21358275SEric Cheng &fd->fd_local_netmask : &fd->fd_remote_netmask; 21368275SEric Cheng plen = flow_ip_mask2plen(m); 21378275SEric Cheng 21388275SEric Cheng if (plen <= plen0) 21398275SEric Cheng break; 21408275SEric Cheng } 21418275SEric Cheng if (*p != NULL) { 21428275SEric Cheng ASSERT(flent->fe_next == NULL); 21438275SEric Cheng flent->fe_next = *p; 21448275SEric Cheng } 21458275SEric Cheng *p = flent; 21468275SEric Cheng return (0); 21478275SEric Cheng } 21488275SEric Cheng 21498275SEric Cheng /* 21508275SEric Cheng * Transport layer protocol and port matching functions. 21518275SEric Cheng */ 21528275SEric Cheng 21538275SEric Cheng /* ARGSUSED */ 21548275SEric Cheng static boolean_t 21558275SEric Cheng flow_transport_lport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 21568275SEric Cheng { 21578275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 21588275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 21598275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 21608275SEric Cheng 21618275SEric Cheng return (fd->fd_protocol == l3info->l3_protocol && 21628275SEric Cheng fd->fd_local_port == l4info->l4_hash_port); 21638275SEric Cheng } 21648275SEric Cheng 21658275SEric Cheng /* ARGSUSED */ 21668275SEric Cheng static boolean_t 21678275SEric Cheng flow_transport_rport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 21688275SEric Cheng { 21698275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 21708275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 21718275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 21728275SEric Cheng 21738275SEric Cheng return (fd->fd_protocol == l3info->l3_protocol && 21748275SEric Cheng fd->fd_remote_port == l4info->l4_hash_port); 21758275SEric Cheng } 21768275SEric Cheng 21778275SEric Cheng /* 21788275SEric Cheng * Transport hash function. 21798275SEric Cheng * Since we only support either local or remote port flows, 21808275SEric Cheng * we only need to extract one of the ports to be used for 21818275SEric Cheng * matching. 21828275SEric Cheng */ 21838275SEric Cheng static uint32_t 21848275SEric Cheng flow_transport_hash(flow_tab_t *ft, flow_state_t *s) 21858275SEric Cheng { 21868275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 21878275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 21888275SEric Cheng uint8_t proto = l3info->l3_protocol; 21898275SEric Cheng boolean_t dst_or_src; 21908275SEric Cheng 21918275SEric Cheng if ((ft->ft_mask & FLOW_ULP_PORT_LOCAL) != 0) { 21928275SEric Cheng dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0); 21938275SEric Cheng } else { 21948275SEric Cheng dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0); 21958275SEric Cheng } 21968275SEric Cheng 21978275SEric Cheng l4info->l4_hash_port = dst_or_src ? l4info->l4_dst_port : 21988275SEric Cheng l4info->l4_src_port; 21998275SEric Cheng 22008275SEric Cheng return ((l4info->l4_hash_port ^ (proto << 4)) % ft->ft_size); 22018275SEric Cheng } 22028275SEric Cheng 22038275SEric Cheng /* 22048275SEric Cheng * Unlike other accept() functions above, we do not need to get the header 22058275SEric Cheng * size because this is our highest layer so far. If we want to do support 22068275SEric Cheng * other higher layer protocols, we would need to save the l4_hdrsize 22078275SEric Cheng * in the code below. 22088275SEric Cheng */ 22098275SEric Cheng 22108275SEric Cheng /* ARGSUSED */ 22118275SEric Cheng static int 22128275SEric Cheng flow_transport_accept(flow_tab_t *ft, flow_state_t *s) 22138275SEric Cheng { 22148275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 22158275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 22168275SEric Cheng uint8_t proto = l3info->l3_protocol; 22178275SEric Cheng uchar_t *l4_start; 22188275SEric Cheng 2219*8833SVenu.Iyer@Sun.COM l4_start = l3info->l3_start + l3info->l3_hdrsize; 2220*8833SVenu.Iyer@Sun.COM 2221*8833SVenu.Iyer@Sun.COM /* 2222*8833SVenu.Iyer@Sun.COM * Adjust start pointer if we're at the end of an mblk. 2223*8833SVenu.Iyer@Sun.COM */ 2224*8833SVenu.Iyer@Sun.COM CHECK_AND_ADJUST_START_PTR(s, l4_start); 2225*8833SVenu.Iyer@Sun.COM 2226*8833SVenu.Iyer@Sun.COM l4info->l4_start = l4_start; 22278275SEric Cheng if (!OK_32PTR(l4_start)) 22288275SEric Cheng return (EINVAL); 22298275SEric Cheng 22308275SEric Cheng if (l3info->l3_fragmented == B_TRUE) 22318275SEric Cheng return (EINVAL); 22328275SEric Cheng 22338275SEric Cheng switch (proto) { 22348275SEric Cheng case IPPROTO_TCP: { 22358275SEric Cheng struct tcphdr *tcph = (struct tcphdr *)l4_start; 22368275SEric Cheng 22378275SEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*tcph))) 22388275SEric Cheng return (ENOBUFS); 22398275SEric Cheng 22408275SEric Cheng l4info->l4_src_port = tcph->th_sport; 22418275SEric Cheng l4info->l4_dst_port = tcph->th_dport; 22428275SEric Cheng break; 22438275SEric Cheng } 22448275SEric Cheng case IPPROTO_UDP: { 22458275SEric Cheng struct udphdr *udph = (struct udphdr *)l4_start; 22468275SEric Cheng 22478275SEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*udph))) 22488275SEric Cheng return (ENOBUFS); 22498275SEric Cheng 22508275SEric Cheng l4info->l4_src_port = udph->uh_sport; 22518275SEric Cheng l4info->l4_dst_port = udph->uh_dport; 22528275SEric Cheng break; 22538275SEric Cheng } 22548275SEric Cheng case IPPROTO_SCTP: { 22558275SEric Cheng sctp_hdr_t *sctph = (sctp_hdr_t *)l4_start; 22568275SEric Cheng 22578275SEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*sctph))) 22588275SEric Cheng return (ENOBUFS); 22598275SEric Cheng 22608275SEric Cheng l4info->l4_src_port = sctph->sh_sport; 22618275SEric Cheng l4info->l4_dst_port = sctph->sh_dport; 22628275SEric Cheng break; 22638275SEric Cheng } 22648275SEric Cheng default: 22658275SEric Cheng return (EINVAL); 22668275SEric Cheng } 22678275SEric Cheng 22688275SEric Cheng return (0); 22698275SEric Cheng } 22708275SEric Cheng 22718275SEric Cheng /* 22728275SEric Cheng * Validates transport flow entry. 22738275SEric Cheng * The protocol field must be present. 22748275SEric Cheng */ 22758275SEric Cheng 22768275SEric Cheng /* ARGSUSED */ 22778275SEric Cheng static int 22788275SEric Cheng flow_transport_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 22798275SEric Cheng { 22808275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 22818275SEric Cheng flow_mask_t mask = fd->fd_mask; 22828275SEric Cheng 22838275SEric Cheng if ((mask & FLOW_IP_PROTOCOL) == 0) 22848275SEric Cheng return (EINVAL); 22858275SEric Cheng 22868275SEric Cheng switch (fd->fd_protocol) { 22878275SEric Cheng case IPPROTO_TCP: 22888275SEric Cheng case IPPROTO_UDP: 22898275SEric Cheng case IPPROTO_SCTP: 22908275SEric Cheng break; 22918275SEric Cheng default: 22928275SEric Cheng return (EINVAL); 22938275SEric Cheng } 22948275SEric Cheng 22958275SEric Cheng switch (mask & ~FLOW_IP_PROTOCOL) { 22968275SEric Cheng case FLOW_ULP_PORT_LOCAL: 22978275SEric Cheng if (fd->fd_local_port == 0) 22988275SEric Cheng return (EINVAL); 22998275SEric Cheng 23008275SEric Cheng flent->fe_match = flow_transport_lport_match; 23018275SEric Cheng break; 23028275SEric Cheng case FLOW_ULP_PORT_REMOTE: 23038275SEric Cheng if (fd->fd_remote_port == 0) 23048275SEric Cheng return (EINVAL); 23058275SEric Cheng 23068275SEric Cheng flent->fe_match = flow_transport_rport_match; 23078275SEric Cheng break; 23088275SEric Cheng case 0: 23098275SEric Cheng /* 23108275SEric Cheng * transport-only flows conflicts with our table type. 23118275SEric Cheng */ 23128275SEric Cheng return (EOPNOTSUPP); 23138275SEric Cheng default: 23148275SEric Cheng return (EINVAL); 23158275SEric Cheng } 23168275SEric Cheng 23178275SEric Cheng return (0); 23188275SEric Cheng } 23198275SEric Cheng 23208275SEric Cheng static uint32_t 23218275SEric Cheng flow_transport_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 23228275SEric Cheng { 23238275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 23248275SEric Cheng uint16_t port = 0; 23258275SEric Cheng 23268275SEric Cheng port = ((fd->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) ? 23278275SEric Cheng fd->fd_local_port : fd->fd_remote_port; 23288275SEric Cheng 23298275SEric Cheng return ((port ^ (fd->fd_protocol << 4)) % ft->ft_size); 23308275SEric Cheng } 23318275SEric Cheng 23328275SEric Cheng /* ARGSUSED */ 23338275SEric Cheng static boolean_t 23348275SEric Cheng flow_transport_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 23358275SEric Cheng { 23368275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 23378275SEric Cheng 23388275SEric Cheng if (fd1->fd_protocol != fd2->fd_protocol) 23398275SEric Cheng return (B_FALSE); 23408275SEric Cheng 23418275SEric Cheng if ((fd1->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) 23428275SEric Cheng return (fd1->fd_local_port == fd2->fd_local_port); 23438275SEric Cheng 23448275SEric Cheng return (fd1->fd_remote_port == fd2->fd_remote_port); 23458275SEric Cheng } 23468275SEric Cheng 23478275SEric Cheng static flow_ops_t flow_l2_ops = { 23488275SEric Cheng flow_l2_accept_fe, 23498275SEric Cheng flow_l2_hash_fe, 23508275SEric Cheng flow_l2_match_fe, 23518275SEric Cheng flow_generic_insert_fe, 23528275SEric Cheng flow_l2_hash, 23538275SEric Cheng {flow_l2_accept} 23548275SEric Cheng }; 23558275SEric Cheng 23568275SEric Cheng static flow_ops_t flow_ip_ops = { 23578275SEric Cheng flow_ip_accept_fe, 23588275SEric Cheng flow_ip_hash_fe, 23598275SEric Cheng flow_ip_match_fe, 23608275SEric Cheng flow_ip_insert_fe, 23618275SEric Cheng flow_ip_hash, 23628275SEric Cheng {flow_l2_accept, flow_ip_accept} 23638275SEric Cheng }; 23648275SEric Cheng 23658275SEric Cheng static flow_ops_t flow_ip_proto_ops = { 23668275SEric Cheng flow_ip_proto_accept_fe, 23678275SEric Cheng flow_ip_proto_hash_fe, 23688275SEric Cheng flow_ip_proto_match_fe, 23698275SEric Cheng flow_generic_insert_fe, 23708275SEric Cheng flow_ip_proto_hash, 23718275SEric Cheng {flow_l2_accept, flow_ip_accept} 23728275SEric Cheng }; 23738275SEric Cheng 23748275SEric Cheng static flow_ops_t flow_transport_ops = { 23758275SEric Cheng flow_transport_accept_fe, 23768275SEric Cheng flow_transport_hash_fe, 23778275SEric Cheng flow_transport_match_fe, 23788275SEric Cheng flow_generic_insert_fe, 23798275SEric Cheng flow_transport_hash, 23808275SEric Cheng {flow_l2_accept, flow_ip_accept, flow_transport_accept} 23818275SEric Cheng }; 23828275SEric Cheng 23838275SEric Cheng static flow_tab_info_t flow_tab_info_list[] = { 23848275SEric Cheng {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_LOCAL, 2}, 23858275SEric Cheng {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_REMOTE, 2}, 23868275SEric Cheng {&flow_ip_ops, FLOW_IP_DSFIELD, 1}, 23878275SEric Cheng {&flow_ip_proto_ops, FLOW_IP_PROTOCOL, 256}, 23888275SEric Cheng {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_LOCAL, 1024} 23898275SEric Cheng }; 23908275SEric Cheng 23918275SEric Cheng #define FLOW_MAX_TAB_INFO \ 23928275SEric Cheng ((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t)) 23938275SEric Cheng 23948275SEric Cheng static flow_tab_info_t * 23958275SEric Cheng mac_flow_tab_info_get(flow_mask_t mask) 23968275SEric Cheng { 23978275SEric Cheng int i; 23988275SEric Cheng 23998275SEric Cheng for (i = 0; i < FLOW_MAX_TAB_INFO; i++) { 24008275SEric Cheng if (mask == flow_tab_info_list[i].fti_mask) 24018275SEric Cheng return (&flow_tab_info_list[i]); 24028275SEric Cheng } 24038275SEric Cheng return (NULL); 24048275SEric Cheng } 2405