18275SEric Cheng /* 28275SEric Cheng * CDDL HEADER START 38275SEric Cheng * 48275SEric Cheng * The contents of this file are subject to the terms of the 58275SEric Cheng * Common Development and Distribution License (the "License"). 68275SEric Cheng * You may not use this file except in compliance with the License. 78275SEric Cheng * 88275SEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 98275SEric Cheng * or http://www.opensolaris.org/os/licensing. 108275SEric Cheng * See the License for the specific language governing permissions 118275SEric Cheng * and limitations under the License. 128275SEric Cheng * 138275SEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 148275SEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 158275SEric Cheng * If applicable, add the following below this CDDL HEADER, with the 168275SEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying 178275SEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner] 188275SEric Cheng * 198275SEric Cheng * CDDL HEADER END 208275SEric Cheng */ 218275SEric Cheng 228275SEric Cheng /* 238558SGirish.Moodalbail@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 248275SEric Cheng * Use is subject to license terms. 258275SEric Cheng */ 268275SEric Cheng 278275SEric Cheng #include <sys/strsun.h> 288275SEric Cheng #include <sys/sdt.h> 298275SEric Cheng #include <sys/mac.h> 308275SEric Cheng #include <sys/mac_impl.h> 318275SEric Cheng #include <sys/mac_client_impl.h> 328275SEric Cheng #include <sys/dls.h> 338275SEric Cheng #include <sys/dls_impl.h> 348275SEric Cheng #include <sys/mac_soft_ring.h> 358275SEric Cheng #include <sys/ethernet.h> 368275SEric Cheng #include <sys/vlan.h> 378275SEric Cheng #include <inet/ip.h> 388275SEric Cheng #include <inet/ip6.h> 398275SEric Cheng #include <netinet/tcp.h> 408275SEric Cheng #include <netinet/udp.h> 418275SEric Cheng #include <netinet/sctp.h> 428275SEric Cheng 438275SEric Cheng /* global flow table, will be a per exclusive-zone table later */ 448275SEric Cheng static mod_hash_t *flow_hash; 458275SEric Cheng static krwlock_t flow_tab_lock; 468275SEric Cheng 478275SEric Cheng static kmem_cache_t *flow_cache; 488275SEric Cheng static kmem_cache_t *flow_tab_cache; 498275SEric Cheng static flow_ops_t flow_l2_ops; 508275SEric Cheng 518275SEric Cheng typedef struct { 528275SEric Cheng const char *fs_name; 538275SEric Cheng uint_t fs_offset; 548275SEric Cheng } flow_stats_info_t; 558275SEric Cheng 568275SEric Cheng #define FS_OFF(f) (offsetof(flow_stats_t, f)) 578275SEric Cheng static flow_stats_info_t flow_stats_list[] = { 588275SEric Cheng {"rbytes", FS_OFF(fs_rbytes)}, 598275SEric Cheng {"ipackets", FS_OFF(fs_ipackets)}, 608275SEric Cheng {"ierrors", FS_OFF(fs_ierrors)}, 618275SEric Cheng {"obytes", FS_OFF(fs_obytes)}, 628275SEric Cheng {"opackets", FS_OFF(fs_opackets)}, 638275SEric Cheng {"oerrors", FS_OFF(fs_oerrors)} 648275SEric Cheng }; 658275SEric Cheng #define FS_SIZE (sizeof (flow_stats_list) / sizeof (flow_stats_info_t)) 668275SEric Cheng 678275SEric Cheng /* 688275SEric Cheng * Checks whether a flow mask is legal. 698275SEric Cheng */ 708275SEric Cheng static flow_tab_info_t *mac_flow_tab_info_get(flow_mask_t); 718275SEric Cheng 728275SEric Cheng static void 738275SEric Cheng flow_stat_init(kstat_named_t *knp) 748275SEric Cheng { 758275SEric Cheng int i; 768275SEric Cheng 778275SEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) { 788275SEric Cheng kstat_named_init(knp, flow_stats_list[i].fs_name, 798275SEric Cheng KSTAT_DATA_UINT64); 808275SEric Cheng } 818275SEric Cheng } 828275SEric Cheng 838275SEric Cheng static int 848275SEric Cheng flow_stat_update(kstat_t *ksp, int rw) 858275SEric Cheng { 868275SEric Cheng flow_entry_t *fep = ksp->ks_private; 878275SEric Cheng flow_stats_t *fsp = &fep->fe_flowstats; 888275SEric Cheng kstat_named_t *knp = ksp->ks_data; 898275SEric Cheng uint64_t *statp; 908275SEric Cheng zoneid_t zid; 918275SEric Cheng int i; 928275SEric Cheng 938275SEric Cheng if (rw != KSTAT_READ) 948275SEric Cheng return (EACCES); 958275SEric Cheng 968275SEric Cheng zid = getzoneid(); 978275SEric Cheng if (zid != GLOBAL_ZONEID && zid != fep->fe_zoneid) { 988275SEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) 998275SEric Cheng knp->value.ui64 = 0; 1008275SEric Cheng 1018275SEric Cheng return (0); 1028275SEric Cheng } 1038275SEric Cheng 1048275SEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) { 1058275SEric Cheng statp = (uint64_t *) 1068275SEric Cheng ((uchar_t *)fsp + flow_stats_list[i].fs_offset); 1078275SEric Cheng 1088275SEric Cheng knp->value.ui64 = *statp; 1098275SEric Cheng } 1108275SEric Cheng return (0); 1118275SEric Cheng } 1128275SEric Cheng 1138275SEric Cheng static void 1148275SEric Cheng flow_stat_create(flow_entry_t *fep) 1158275SEric Cheng { 1168275SEric Cheng kstat_t *ksp; 1178275SEric Cheng kstat_named_t *knp; 1188275SEric Cheng uint_t nstats = FS_SIZE; 1198275SEric Cheng 1208275SEric Cheng ksp = kstat_create("unix", 0, (char *)fep->fe_flow_name, "flow", 1218275SEric Cheng KSTAT_TYPE_NAMED, nstats, 0); 1228275SEric Cheng if (ksp == NULL) 1238275SEric Cheng return; 1248275SEric Cheng 1258275SEric Cheng ksp->ks_update = flow_stat_update; 1268275SEric Cheng ksp->ks_private = fep; 1278275SEric Cheng fep->fe_ksp = ksp; 1288275SEric Cheng 1298275SEric Cheng knp = (kstat_named_t *)ksp->ks_data; 1308275SEric Cheng flow_stat_init(knp); 1318275SEric Cheng kstat_install(ksp); 1328275SEric Cheng } 1338275SEric Cheng 1348275SEric Cheng void 1358275SEric Cheng flow_stat_destroy(flow_entry_t *fep) 1368275SEric Cheng { 1378275SEric Cheng if (fep->fe_ksp != NULL) { 1388275SEric Cheng kstat_delete(fep->fe_ksp); 1398275SEric Cheng fep->fe_ksp = NULL; 1408275SEric Cheng } 1418275SEric Cheng } 1428275SEric Cheng 1438275SEric Cheng /* 1448275SEric Cheng * Initialize the flow table 1458275SEric Cheng */ 1468275SEric Cheng void 1478275SEric Cheng mac_flow_init() 1488275SEric Cheng { 1498275SEric Cheng flow_cache = kmem_cache_create("flow_entry_cache", 1508275SEric Cheng sizeof (flow_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1518275SEric Cheng flow_tab_cache = kmem_cache_create("flow_tab_cache", 1528275SEric Cheng sizeof (flow_tab_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1538275SEric Cheng flow_hash = mod_hash_create_extended("flow_hash", 1548275SEric Cheng 100, mod_hash_null_keydtor, mod_hash_null_valdtor, 1558275SEric Cheng mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 1568275SEric Cheng rw_init(&flow_tab_lock, NULL, RW_DEFAULT, NULL); 1578275SEric Cheng } 1588275SEric Cheng 1598275SEric Cheng /* 1608275SEric Cheng * Cleanup and release the flow table 1618275SEric Cheng */ 1628275SEric Cheng void 1638275SEric Cheng mac_flow_fini() 1648275SEric Cheng { 1658275SEric Cheng kmem_cache_destroy(flow_cache); 1668275SEric Cheng kmem_cache_destroy(flow_tab_cache); 1678275SEric Cheng mod_hash_destroy_hash(flow_hash); 1688275SEric Cheng rw_destroy(&flow_tab_lock); 1698275SEric Cheng } 1708275SEric Cheng 1718275SEric Cheng /* 1728275SEric Cheng * mac_create_flow(): create a flow_entry_t. 1738275SEric Cheng */ 1748275SEric Cheng int 1758275SEric Cheng mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, 1768275SEric Cheng void *client_cookie, uint_t type, flow_entry_t **flentp) 1778275SEric Cheng { 1788275SEric Cheng flow_entry_t *flent = *flentp; 1798275SEric Cheng int err = 0; 1808275SEric Cheng 1818275SEric Cheng if (mrp != NULL) { 1828275SEric Cheng err = mac_validate_props(mrp); 1838275SEric Cheng if (err != 0) 1848275SEric Cheng return (err); 1858275SEric Cheng } 1868275SEric Cheng 1878275SEric Cheng if (flent == NULL) { 1888275SEric Cheng flent = kmem_cache_alloc(flow_cache, KM_SLEEP); 1898275SEric Cheng bzero(flent, sizeof (*flent)); 1908275SEric Cheng mutex_init(&flent->fe_lock, NULL, MUTEX_DEFAULT, NULL); 1918275SEric Cheng cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL); 1928275SEric Cheng 1938275SEric Cheng /* Initialize the receiver function to a safe routine */ 1948275SEric Cheng flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; 1958275SEric Cheng flent->fe_index = -1; 1968275SEric Cheng } 1978558SGirish.Moodalbail@Sun.COM (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 1988275SEric Cheng 1998275SEric Cheng /* This is an initial flow, will be configured later */ 2008275SEric Cheng if (fd == NULL) { 2018275SEric Cheng *flentp = flent; 2028275SEric Cheng return (0); 2038275SEric Cheng } 2048275SEric Cheng 2058275SEric Cheng flent->fe_client_cookie = client_cookie; 2068275SEric Cheng flent->fe_type = type; 2078275SEric Cheng 2088275SEric Cheng /* 2098275SEric Cheng * As flow creation is only allowed in global zone, this will 2108275SEric Cheng * always set fe_zoneid to GLOBAL_ZONEID, and dls_add_flow() will 2118275SEric Cheng * later set the right value. 2128275SEric Cheng */ 2138275SEric Cheng flent->fe_zoneid = getzoneid(); 2148275SEric Cheng 2158275SEric Cheng /* Save flow desc */ 2168275SEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 2178275SEric Cheng 2188275SEric Cheng if (mrp != NULL) { 2198275SEric Cheng /* 2208275SEric Cheng * We have already set fe_resource_props for a Link. 2218275SEric Cheng */ 2228275SEric Cheng if (type & FLOW_USER) { 2238275SEric Cheng bcopy(mrp, &flent->fe_resource_props, 2248275SEric Cheng sizeof (mac_resource_props_t)); 2258275SEric Cheng } 2268275SEric Cheng /* 2278275SEric Cheng * The effective resource list should reflect the priority 2288275SEric Cheng * that we set implicitly. 2298275SEric Cheng */ 2308275SEric Cheng if (!(mrp->mrp_mask & MRP_PRIORITY)) 2318275SEric Cheng mrp->mrp_mask |= MRP_PRIORITY; 2328275SEric Cheng if (type & FLOW_USER) 2338275SEric Cheng mrp->mrp_priority = MPL_SUBFLOW_DEFAULT; 2348275SEric Cheng else 2358275SEric Cheng mrp->mrp_priority = MPL_LINK_DEFAULT; 2368275SEric Cheng bcopy(mrp, &flent->fe_effective_props, 2378275SEric Cheng sizeof (mac_resource_props_t)); 2388275SEric Cheng } 2398275SEric Cheng flow_stat_create(flent); 2408275SEric Cheng 2418275SEric Cheng *flentp = flent; 2428275SEric Cheng return (0); 2438275SEric Cheng } 2448275SEric Cheng 2458275SEric Cheng /* 2468275SEric Cheng * Validate flow entry and add it to a flow table. 2478275SEric Cheng */ 2488275SEric Cheng int 2498275SEric Cheng mac_flow_add(flow_tab_t *ft, flow_entry_t *flent) 2508275SEric Cheng { 2518275SEric Cheng flow_entry_t **headp, **p; 2528275SEric Cheng flow_ops_t *ops = &ft->ft_ops; 2538275SEric Cheng flow_mask_t mask; 2548275SEric Cheng uint32_t index; 2558275SEric Cheng int err; 2568275SEric Cheng 2578275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 2588275SEric Cheng 2598275SEric Cheng /* 2608275SEric Cheng * Check for invalid bits in mask. 2618275SEric Cheng */ 2628275SEric Cheng mask = flent->fe_flow_desc.fd_mask; 2638275SEric Cheng if ((mask & ft->ft_mask) == 0 || (mask & ~ft->ft_mask) != 0) 2648275SEric Cheng return (EOPNOTSUPP); 2658275SEric Cheng 2668275SEric Cheng /* 2678275SEric Cheng * Validate flent. 2688275SEric Cheng */ 2698275SEric Cheng if ((err = ops->fo_accept_fe(ft, flent)) != 0) { 2708275SEric Cheng DTRACE_PROBE3(accept_failed, flow_tab_t *, ft, 2718275SEric Cheng flow_entry_t *, flent, int, err); 2728275SEric Cheng return (err); 2738275SEric Cheng } 2748275SEric Cheng 2758275SEric Cheng /* 2768275SEric Cheng * Flent is valid. now calculate hash and insert it 2778275SEric Cheng * into hash table. 2788275SEric Cheng */ 2798275SEric Cheng index = ops->fo_hash_fe(ft, flent); 2808275SEric Cheng 2818275SEric Cheng /* 2828275SEric Cheng * We do not need a lock up until now because we were 2838275SEric Cheng * not accessing the flow table. 2848275SEric Cheng */ 2858275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 2868275SEric Cheng headp = &ft->ft_table[index]; 2878275SEric Cheng 2888275SEric Cheng /* 2898275SEric Cheng * Check for duplicate flow. 2908275SEric Cheng */ 2918275SEric Cheng for (p = headp; *p != NULL; p = &(*p)->fe_next) { 2928275SEric Cheng if ((*p)->fe_flow_desc.fd_mask != 2938275SEric Cheng flent->fe_flow_desc.fd_mask) 2948275SEric Cheng continue; 2958275SEric Cheng 2968275SEric Cheng if (ft->ft_ops.fo_match_fe(ft, *p, flent)) { 2978275SEric Cheng rw_exit(&ft->ft_lock); 2988275SEric Cheng DTRACE_PROBE3(dup_flow, flow_tab_t *, ft, 2998275SEric Cheng flow_entry_t *, flent, int, err); 3008275SEric Cheng return (EALREADY); 3018275SEric Cheng } 3028275SEric Cheng } 3038275SEric Cheng 3048275SEric Cheng /* 3058275SEric Cheng * Insert flow to hash list. 3068275SEric Cheng */ 3078275SEric Cheng err = ops->fo_insert_fe(ft, headp, flent); 3088275SEric Cheng if (err != 0) { 3098275SEric Cheng rw_exit(&ft->ft_lock); 3108275SEric Cheng DTRACE_PROBE3(insert_failed, flow_tab_t *, ft, 3118275SEric Cheng flow_entry_t *, flent, int, err); 3128275SEric Cheng return (err); 3138275SEric Cheng } 3148275SEric Cheng 3158275SEric Cheng /* 3168275SEric Cheng * Save the hash index so it can be used by mac_flow_remove(). 3178275SEric Cheng */ 3188275SEric Cheng flent->fe_index = (int)index; 3198275SEric Cheng 3208275SEric Cheng /* 3218275SEric Cheng * Save the flow tab back reference. 3228275SEric Cheng */ 3238275SEric Cheng flent->fe_flow_tab = ft; 3248275SEric Cheng FLOW_MARK(flent, FE_FLOW_TAB); 3258275SEric Cheng ft->ft_flow_count++; 3268275SEric Cheng rw_exit(&ft->ft_lock); 3278275SEric Cheng return (0); 3288275SEric Cheng } 3298275SEric Cheng 3308275SEric Cheng /* 3318275SEric Cheng * Remove a flow from a mac client's subflow table 3328275SEric Cheng */ 3338275SEric Cheng void 3348275SEric Cheng mac_flow_rem_subflow(flow_entry_t *flent) 3358275SEric Cheng { 3368275SEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 3378275SEric Cheng mac_client_impl_t *mcip = ft->ft_mcip; 338*9073SCathy.Zhou@Sun.COM mac_handle_t mh = (mac_handle_t)ft->ft_mip; 3398275SEric Cheng 340*9073SCathy.Zhou@Sun.COM ASSERT(MAC_PERIM_HELD(mh)); 3418275SEric Cheng 3428275SEric Cheng mac_flow_remove(ft, flent, B_FALSE); 3438275SEric Cheng if (flent->fe_mcip == NULL) { 3448275SEric Cheng /* 3458275SEric Cheng * The interface is not yet plumbed and mac_client_flow_add 3468275SEric Cheng * was not done. 3478275SEric Cheng */ 3488275SEric Cheng if (FLOW_TAB_EMPTY(ft)) { 3498275SEric Cheng mac_flow_tab_destroy(ft); 3508275SEric Cheng mcip->mci_subflow_tab = NULL; 3518275SEric Cheng } 352*9073SCathy.Zhou@Sun.COM } else { 353*9073SCathy.Zhou@Sun.COM mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 354*9073SCathy.Zhou@Sun.COM mac_link_flow_clean((mac_client_handle_t)mcip, flent); 3558275SEric Cheng } 356*9073SCathy.Zhou@Sun.COM mac_fastpath_enable(mh); 3578275SEric Cheng } 3588275SEric Cheng 3598275SEric Cheng /* 3608275SEric Cheng * Add a flow to a mac client's subflow table and instantiate the flow 3618275SEric Cheng * in the mac by creating the associated SRSs etc. 3628275SEric Cheng */ 3638275SEric Cheng int 3648275SEric Cheng mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent, 3658275SEric Cheng boolean_t instantiate_flow) 3668275SEric Cheng { 3678275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 368*9073SCathy.Zhou@Sun.COM mac_handle_t mh = (mac_handle_t)mcip->mci_mip; 3698275SEric Cheng flow_tab_info_t *ftinfo; 3708275SEric Cheng flow_mask_t mask; 3718275SEric Cheng flow_tab_t *ft; 3728275SEric Cheng int err; 3738275SEric Cheng boolean_t ft_created = B_FALSE; 3748275SEric Cheng 375*9073SCathy.Zhou@Sun.COM ASSERT(MAC_PERIM_HELD(mh)); 376*9073SCathy.Zhou@Sun.COM 377*9073SCathy.Zhou@Sun.COM if ((err = mac_fastpath_disable(mh)) != 0) 378*9073SCathy.Zhou@Sun.COM return (err); 3798275SEric Cheng 3808275SEric Cheng /* 3818275SEric Cheng * If the subflow table exists already just add the new subflow 3828275SEric Cheng * to the existing table, else we create a new subflow table below. 3838275SEric Cheng */ 3848275SEric Cheng ft = mcip->mci_subflow_tab; 3858275SEric Cheng if (ft == NULL) { 3868275SEric Cheng mask = flent->fe_flow_desc.fd_mask; 3878275SEric Cheng /* 3888275SEric Cheng * Try to create a new table and then add the subflow to the 3898275SEric Cheng * newly created subflow table 3908275SEric Cheng */ 391*9073SCathy.Zhou@Sun.COM if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) { 392*9073SCathy.Zhou@Sun.COM mac_fastpath_enable(mh); 3938275SEric Cheng return (EOPNOTSUPP); 394*9073SCathy.Zhou@Sun.COM } 3958275SEric Cheng 3968275SEric Cheng mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size, 3978275SEric Cheng mcip->mci_mip, &ft); 3988275SEric Cheng ft_created = B_TRUE; 3998275SEric Cheng } 4008275SEric Cheng 4018275SEric Cheng err = mac_flow_add(ft, flent); 4028275SEric Cheng if (err != 0) { 4038275SEric Cheng if (ft_created) 4048275SEric Cheng mac_flow_tab_destroy(ft); 405*9073SCathy.Zhou@Sun.COM mac_fastpath_enable(mh); 4068275SEric Cheng return (err); 4078275SEric Cheng } 4088275SEric Cheng 4098275SEric Cheng if (instantiate_flow) { 4108275SEric Cheng /* Now activate the flow by creating its SRSs */ 4118275SEric Cheng ASSERT(MCIP_DATAPATH_SETUP(mcip)); 4128275SEric Cheng err = mac_link_flow_init((mac_client_handle_t)mcip, flent); 4138275SEric Cheng if (err != 0) { 4148275SEric Cheng mac_flow_remove(ft, flent, B_FALSE); 4158275SEric Cheng if (ft_created) 4168275SEric Cheng mac_flow_tab_destroy(ft); 417*9073SCathy.Zhou@Sun.COM mac_fastpath_enable(mh); 4188275SEric Cheng return (err); 4198275SEric Cheng } 4208275SEric Cheng } else { 4218275SEric Cheng FLOW_MARK(flent, FE_UF_NO_DATAPATH); 4228275SEric Cheng } 4238275SEric Cheng if (ft_created) { 4248275SEric Cheng ASSERT(mcip->mci_subflow_tab == NULL); 4258275SEric Cheng ft->ft_mcip = mcip; 4268275SEric Cheng mcip->mci_subflow_tab = ft; 4278275SEric Cheng if (instantiate_flow) 4288275SEric Cheng mac_client_update_classifier(mcip, B_TRUE); 4298275SEric Cheng } 4308275SEric Cheng return (0); 4318275SEric Cheng } 4328275SEric Cheng 4338275SEric Cheng /* 4348275SEric Cheng * Remove flow entry from flow table. 4358275SEric Cheng */ 4368275SEric Cheng void 4378275SEric Cheng mac_flow_remove(flow_tab_t *ft, flow_entry_t *flent, boolean_t temp) 4388275SEric Cheng { 4398275SEric Cheng flow_entry_t **fp; 4408275SEric Cheng 4418275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 4428275SEric Cheng if (!(flent->fe_flags & FE_FLOW_TAB)) 4438275SEric Cheng return; 4448275SEric Cheng 4458275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 4468275SEric Cheng /* 4478275SEric Cheng * If this is a permanent removal from the flow table, mark it 4488275SEric Cheng * CONDEMNED to prevent future references. If this is a temporary 4498275SEric Cheng * removal from the table, say to update the flow descriptor then 4508275SEric Cheng * we don't mark it CONDEMNED 4518275SEric Cheng */ 4528275SEric Cheng if (!temp) 4538275SEric Cheng FLOW_MARK(flent, FE_CONDEMNED); 4548275SEric Cheng /* 4558275SEric Cheng * Locate the specified flent. 4568275SEric Cheng */ 4578275SEric Cheng fp = &ft->ft_table[flent->fe_index]; 4588275SEric Cheng while (*fp != flent) 4598275SEric Cheng fp = &(*fp)->fe_next; 4608275SEric Cheng 4618275SEric Cheng /* 4628275SEric Cheng * The flent must exist. Otherwise it's a bug. 4638275SEric Cheng */ 4648275SEric Cheng ASSERT(fp != NULL); 4658275SEric Cheng *fp = flent->fe_next; 4668275SEric Cheng flent->fe_next = NULL; 4678275SEric Cheng 4688275SEric Cheng /* 4698275SEric Cheng * Reset fe_index to -1 so any attempt to call mac_flow_remove() 4708275SEric Cheng * on a flent that is supposed to be in the table (FE_FLOW_TAB) 4718275SEric Cheng * will panic. 4728275SEric Cheng */ 4738275SEric Cheng flent->fe_index = -1; 4748275SEric Cheng FLOW_UNMARK(flent, FE_FLOW_TAB); 4758275SEric Cheng ft->ft_flow_count--; 4768275SEric Cheng rw_exit(&ft->ft_lock); 4778275SEric Cheng } 4788275SEric Cheng 4798275SEric Cheng /* 4808275SEric Cheng * This is the flow lookup routine used by the mac sw classifier engine. 4818275SEric Cheng */ 4828275SEric Cheng int 4838275SEric Cheng mac_flow_lookup(flow_tab_t *ft, mblk_t *mp, uint_t flags, flow_entry_t **flentp) 4848275SEric Cheng { 4858275SEric Cheng flow_state_t s; 4868275SEric Cheng flow_entry_t *flent; 4878275SEric Cheng flow_ops_t *ops = &ft->ft_ops; 4888275SEric Cheng boolean_t retried = B_FALSE; 4898275SEric Cheng int i, err; 4908275SEric Cheng 4918275SEric Cheng s.fs_flags = flags; 4928833SVenu.Iyer@Sun.COM retry: 4938275SEric Cheng s.fs_mp = mp; 4948275SEric Cheng 4958275SEric Cheng /* 4968275SEric Cheng * Walk the list of predeclared accept functions. 4978275SEric Cheng * Each of these would accumulate enough state to allow the next 4988275SEric Cheng * accept routine to make progress. 4998275SEric Cheng */ 5008275SEric Cheng for (i = 0; i < FLOW_MAX_ACCEPT && ops->fo_accept[i] != NULL; i++) { 5018275SEric Cheng if ((err = (ops->fo_accept[i])(ft, &s)) != 0) { 5028833SVenu.Iyer@Sun.COM mblk_t *last; 5038833SVenu.Iyer@Sun.COM 5048275SEric Cheng /* 5058275SEric Cheng * ENOBUFS indicates that the mp could be too short 5068275SEric Cheng * and may need a pullup. 5078275SEric Cheng */ 5088275SEric Cheng if (err != ENOBUFS || retried) 5098275SEric Cheng return (err); 5108275SEric Cheng 5118275SEric Cheng /* 5128833SVenu.Iyer@Sun.COM * The pullup is done on the last processed mblk, not 5138833SVenu.Iyer@Sun.COM * the starting one. pullup is not done if the mblk 5148833SVenu.Iyer@Sun.COM * has references or if b_cont is NULL. 5158275SEric Cheng */ 5168833SVenu.Iyer@Sun.COM last = s.fs_mp; 5178833SVenu.Iyer@Sun.COM if (DB_REF(last) > 1 || last->b_cont == NULL || 5188833SVenu.Iyer@Sun.COM pullupmsg(last, -1) == 0) 5198275SEric Cheng return (EINVAL); 5208275SEric Cheng 5218275SEric Cheng retried = B_TRUE; 5228275SEric Cheng DTRACE_PROBE2(need_pullup, flow_tab_t *, ft, 5238275SEric Cheng flow_state_t *, &s); 5248275SEric Cheng goto retry; 5258275SEric Cheng } 5268275SEric Cheng } 5278275SEric Cheng 5288275SEric Cheng /* 5298275SEric Cheng * The packet is considered sane. We may now attempt to 5308275SEric Cheng * find the corresponding flent. 5318275SEric Cheng */ 5328275SEric Cheng rw_enter(&ft->ft_lock, RW_READER); 5338275SEric Cheng flent = ft->ft_table[ops->fo_hash(ft, &s)]; 5348275SEric Cheng for (; flent != NULL; flent = flent->fe_next) { 5358275SEric Cheng if (flent->fe_match(ft, flent, &s)) { 5368275SEric Cheng FLOW_TRY_REFHOLD(flent, err); 5378275SEric Cheng if (err != 0) 5388275SEric Cheng continue; 5398275SEric Cheng *flentp = flent; 5408275SEric Cheng rw_exit(&ft->ft_lock); 5418275SEric Cheng return (0); 5428275SEric Cheng } 5438275SEric Cheng } 5448275SEric Cheng rw_exit(&ft->ft_lock); 5458275SEric Cheng return (ENOENT); 5468275SEric Cheng } 5478275SEric Cheng 5488275SEric Cheng /* 5498275SEric Cheng * Walk flow table. 5508275SEric Cheng * The caller is assumed to have proper perimeter protection. 5518275SEric Cheng */ 5528275SEric Cheng int 5538275SEric Cheng mac_flow_walk_nolock(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *), 5548275SEric Cheng void *arg) 5558275SEric Cheng { 5568275SEric Cheng int err, i, cnt = 0; 5578275SEric Cheng flow_entry_t *flent; 5588275SEric Cheng 5598275SEric Cheng if (ft == NULL) 5608275SEric Cheng return (0); 5618275SEric Cheng 5628275SEric Cheng for (i = 0; i < ft->ft_size; i++) { 5638275SEric Cheng for (flent = ft->ft_table[i]; flent != NULL; 5648275SEric Cheng flent = flent->fe_next) { 5658275SEric Cheng cnt++; 5668275SEric Cheng err = (*fn)(flent, arg); 5678275SEric Cheng if (err != 0) 5688275SEric Cheng return (err); 5698275SEric Cheng } 5708275SEric Cheng } 5718275SEric Cheng VERIFY(cnt == ft->ft_flow_count); 5728275SEric Cheng return (0); 5738275SEric Cheng } 5748275SEric Cheng 5758275SEric Cheng /* 5768275SEric Cheng * Same as the above except a mutex is used for protection here. 5778275SEric Cheng */ 5788275SEric Cheng int 5798275SEric Cheng mac_flow_walk(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *), 5808275SEric Cheng void *arg) 5818275SEric Cheng { 5828275SEric Cheng int err; 5838275SEric Cheng 5848275SEric Cheng if (ft == NULL) 5858275SEric Cheng return (0); 5868275SEric Cheng 5878275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 5888275SEric Cheng err = mac_flow_walk_nolock(ft, fn, arg); 5898275SEric Cheng rw_exit(&ft->ft_lock); 5908275SEric Cheng return (err); 5918275SEric Cheng } 5928275SEric Cheng 5938275SEric Cheng static boolean_t mac_flow_clean(flow_entry_t *); 5948275SEric Cheng 5958275SEric Cheng /* 5968275SEric Cheng * Destroy a flow entry. Called when the last reference on a flow is released. 5978275SEric Cheng */ 5988275SEric Cheng void 5998275SEric Cheng mac_flow_destroy(flow_entry_t *flent) 6008275SEric Cheng { 6018275SEric Cheng ASSERT(flent->fe_refcnt == 0); 6028275SEric Cheng 6038275SEric Cheng if ((flent->fe_type & FLOW_USER) != 0) { 6048275SEric Cheng ASSERT(mac_flow_clean(flent)); 6058275SEric Cheng } else { 6068275SEric Cheng mac_flow_cleanup(flent); 6078275SEric Cheng } 6088275SEric Cheng 6098275SEric Cheng mutex_destroy(&flent->fe_lock); 6108275SEric Cheng cv_destroy(&flent->fe_cv); 6118275SEric Cheng flow_stat_destroy(flent); 6128275SEric Cheng kmem_cache_free(flow_cache, flent); 6138275SEric Cheng } 6148275SEric Cheng 6158275SEric Cheng /* 6168275SEric Cheng * XXX eric 6178275SEric Cheng * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and 6188275SEric Cheng * mac_link_flow_modify() should really be moved/reworked into the 6198275SEric Cheng * two functions below. This would consolidate all the mac property 6208275SEric Cheng * checking in one place. I'm leaving this alone for now since it's 6218275SEric Cheng * out of scope of the new flows work. 6228275SEric Cheng */ 6238275SEric Cheng /* ARGSUSED */ 6248275SEric Cheng uint32_t 6258275SEric Cheng mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) 6268275SEric Cheng { 6278275SEric Cheng uint32_t changed_mask = 0; 6288275SEric Cheng mac_resource_props_t *fmrp = &flent->fe_effective_props; 6298275SEric Cheng int i; 6308275SEric Cheng 6318275SEric Cheng if ((mrp->mrp_mask & MRP_MAXBW) != 0 && 6328275SEric Cheng (fmrp->mrp_maxbw != mrp->mrp_maxbw)) { 6338275SEric Cheng changed_mask |= MRP_MAXBW; 6348275SEric Cheng fmrp->mrp_maxbw = mrp->mrp_maxbw; 6358275SEric Cheng if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { 6368275SEric Cheng fmrp->mrp_mask &= ~MRP_MAXBW; 6378275SEric Cheng } else { 6388275SEric Cheng fmrp->mrp_mask |= MRP_MAXBW; 6398275SEric Cheng } 6408275SEric Cheng } 6418275SEric Cheng 6428275SEric Cheng if ((mrp->mrp_mask & MRP_PRIORITY) != 0) { 6438275SEric Cheng if (fmrp->mrp_priority != mrp->mrp_priority) 6448275SEric Cheng changed_mask |= MRP_PRIORITY; 6458275SEric Cheng if (mrp->mrp_priority == MPL_RESET) { 6468275SEric Cheng fmrp->mrp_priority = MPL_SUBFLOW_DEFAULT; 6478275SEric Cheng fmrp->mrp_mask &= ~MRP_PRIORITY; 6488275SEric Cheng } else { 6498275SEric Cheng fmrp->mrp_priority = mrp->mrp_priority; 6508275SEric Cheng fmrp->mrp_mask |= MRP_PRIORITY; 6518275SEric Cheng } 6528275SEric Cheng } 6538275SEric Cheng 6548275SEric Cheng /* modify fanout */ 6558275SEric Cheng if ((mrp->mrp_mask & MRP_CPUS) != 0) { 6568275SEric Cheng if ((fmrp->mrp_ncpus == mrp->mrp_ncpus) && 6578275SEric Cheng (fmrp->mrp_fanout_mode == mrp->mrp_fanout_mode)) { 6588275SEric Cheng for (i = 0; i < mrp->mrp_ncpus; i++) { 6598275SEric Cheng if (mrp->mrp_cpu[i] != fmrp->mrp_cpu[i]) 6608275SEric Cheng break; 6618275SEric Cheng } 6628275SEric Cheng if (i == mrp->mrp_ncpus) { 6638275SEric Cheng /* 6648275SEric Cheng * The new set of cpus passed is exactly 6658275SEric Cheng * the same as the existing set. 6668275SEric Cheng */ 6678275SEric Cheng return (changed_mask); 6688275SEric Cheng } 6698275SEric Cheng } 6708275SEric Cheng changed_mask |= MRP_CPUS; 6718275SEric Cheng MAC_COPY_CPUS(mrp, fmrp); 6728275SEric Cheng } 6738275SEric Cheng return (changed_mask); 6748275SEric Cheng } 6758275SEric Cheng 6768275SEric Cheng void 6778275SEric Cheng mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) 6788275SEric Cheng { 6798275SEric Cheng uint32_t changed_mask; 6808275SEric Cheng mac_client_impl_t *mcip = flent->fe_mcip; 6818275SEric Cheng mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip); 6828275SEric Cheng 6838275SEric Cheng ASSERT(flent != NULL); 6848275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 6858275SEric Cheng 6868275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 6878275SEric Cheng 6888275SEric Cheng /* Update the cached values inside the subflow entry */ 6898275SEric Cheng changed_mask = mac_flow_modify_props(flent, mrp); 6908275SEric Cheng rw_exit(&ft->ft_lock); 6918275SEric Cheng /* 6928275SEric Cheng * Push the changed parameters to the scheduling code in the 6938275SEric Cheng * SRS's, to take effect right away. 6948275SEric Cheng */ 6958275SEric Cheng if (changed_mask & MRP_MAXBW) { 6968275SEric Cheng mac_srs_update_bwlimit(flent, mrp); 6978275SEric Cheng /* 6988275SEric Cheng * If bandwidth is changed, we may have to change 6998275SEric Cheng * the number of soft ring to be used for fanout. 7008275SEric Cheng * Call mac_flow_update_fanout() if MAC_BIND_CPU 7018275SEric Cheng * is not set and there is no user supplied cpu 7028275SEric Cheng * info. This applies only to link at this time. 7038275SEric Cheng */ 7048275SEric Cheng if (!(flent->fe_type & FLOW_USER) && 7058275SEric Cheng !(changed_mask & MRP_CPUS) && 7068275SEric Cheng !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) { 7078275SEric Cheng mac_fanout_setup(mcip, flent, mcip_mrp, 7088275SEric Cheng mac_rx_deliver, mcip, NULL); 7098275SEric Cheng } 7108275SEric Cheng } 7118275SEric Cheng if (mrp->mrp_mask & MRP_PRIORITY) 7128275SEric Cheng mac_flow_update_priority(mcip, flent); 7138275SEric Cheng 7148275SEric Cheng if (changed_mask & MRP_CPUS) 7158275SEric Cheng mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL); 7168275SEric Cheng } 7178275SEric Cheng 7188275SEric Cheng /* 7198275SEric Cheng * This function waits for a certain condition to be met and is generally 7208275SEric Cheng * used before a destructive or quiescing operation. 7218275SEric Cheng */ 7228275SEric Cheng void 7238275SEric Cheng mac_flow_wait(flow_entry_t *flent, mac_flow_state_t event) 7248275SEric Cheng { 7258275SEric Cheng mutex_enter(&flent->fe_lock); 7268275SEric Cheng flent->fe_flags |= FE_WAITER; 7278275SEric Cheng 7288275SEric Cheng switch (event) { 7298275SEric Cheng case FLOW_DRIVER_UPCALL: 7308275SEric Cheng /* 7318275SEric Cheng * We want to make sure the driver upcalls have finished before 7328275SEric Cheng * we signal the Rx SRS worker to quit. 7338275SEric Cheng */ 7348275SEric Cheng while (flent->fe_refcnt != 1) 7358275SEric Cheng cv_wait(&flent->fe_cv, &flent->fe_lock); 7368275SEric Cheng break; 7378275SEric Cheng 7388275SEric Cheng case FLOW_USER_REF: 7398275SEric Cheng /* 7408275SEric Cheng * Wait for the fe_user_refcnt to drop to 0. The flow has 7418275SEric Cheng * been removed from the global flow hash. 7428275SEric Cheng */ 7438275SEric Cheng ASSERT(!(flent->fe_flags & FE_G_FLOW_HASH)); 7448275SEric Cheng while (flent->fe_user_refcnt != 0) 7458275SEric Cheng cv_wait(&flent->fe_cv, &flent->fe_lock); 7468275SEric Cheng break; 7478275SEric Cheng 7488275SEric Cheng default: 7498275SEric Cheng ASSERT(0); 7508275SEric Cheng } 7518275SEric Cheng 7528275SEric Cheng flent->fe_flags &= ~FE_WAITER; 7538275SEric Cheng mutex_exit(&flent->fe_lock); 7548275SEric Cheng } 7558275SEric Cheng 7568275SEric Cheng static boolean_t 7578275SEric Cheng mac_flow_clean(flow_entry_t *flent) 7588275SEric Cheng { 7598275SEric Cheng ASSERT(flent->fe_next == NULL); 7608275SEric Cheng ASSERT(flent->fe_tx_srs == NULL); 7618275SEric Cheng ASSERT(flent->fe_rx_srs_cnt == 0 && flent->fe_rx_srs[0] == NULL); 7628275SEric Cheng ASSERT(flent->fe_mbg == NULL); 7638275SEric Cheng 7648275SEric Cheng return (B_TRUE); 7658275SEric Cheng } 7668275SEric Cheng 7678275SEric Cheng void 7688275SEric Cheng mac_flow_cleanup(flow_entry_t *flent) 7698275SEric Cheng { 7708275SEric Cheng if ((flent->fe_type & FLOW_USER) == 0) { 7718275SEric Cheng ASSERT((flent->fe_mbg == NULL && flent->fe_mcip != NULL) || 7728275SEric Cheng (flent->fe_mbg != NULL && flent->fe_mcip == NULL)); 7738275SEric Cheng ASSERT(flent->fe_refcnt == 0); 7748275SEric Cheng } else { 7758275SEric Cheng ASSERT(flent->fe_refcnt == 1); 7768275SEric Cheng } 7778275SEric Cheng 7788275SEric Cheng if (flent->fe_mbg != NULL) { 7798275SEric Cheng ASSERT(flent->fe_tx_srs == NULL); 7808275SEric Cheng /* This is a multicast or broadcast flow entry */ 7818275SEric Cheng mac_bcast_grp_free(flent->fe_mbg); 7828275SEric Cheng flent->fe_mbg = NULL; 7838275SEric Cheng } 7848275SEric Cheng 7858275SEric Cheng if (flent->fe_tx_srs != NULL) { 7868275SEric Cheng ASSERT(flent->fe_mbg == NULL); 7878275SEric Cheng mac_srs_free(flent->fe_tx_srs); 7888275SEric Cheng flent->fe_tx_srs = NULL; 7898275SEric Cheng } 7908275SEric Cheng 7918275SEric Cheng /* 7928275SEric Cheng * In the normal case fe_rx_srs_cnt is 1. However in the error case 7938275SEric Cheng * when mac_unicast_add fails we may not have set up any SRS 7948275SEric Cheng * in which case fe_rx_srs_cnt will be zero. 7958275SEric Cheng */ 7968275SEric Cheng if (flent->fe_rx_srs_cnt != 0) { 7978275SEric Cheng ASSERT(flent->fe_rx_srs_cnt == 1); 7988275SEric Cheng mac_srs_free(flent->fe_rx_srs[0]); 7998275SEric Cheng flent->fe_rx_srs[0] = NULL; 8008275SEric Cheng flent->fe_rx_srs_cnt = 0; 8018275SEric Cheng } 8028275SEric Cheng ASSERT(flent->fe_rx_srs[0] == NULL); 8038275SEric Cheng } 8048275SEric Cheng 8058275SEric Cheng void 8068275SEric Cheng mac_flow_get_desc(flow_entry_t *flent, flow_desc_t *fd) 8078275SEric Cheng { 8088275SEric Cheng /* 8098275SEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 8108275SEric Cheng * Updates to the fe_flow_desc happen under the fe_lock 8118275SEric Cheng * after removing the flent from the flow table 8128275SEric Cheng */ 8138275SEric Cheng mutex_enter(&flent->fe_lock); 8148275SEric Cheng bcopy(&flent->fe_flow_desc, fd, sizeof (*fd)); 8158275SEric Cheng mutex_exit(&flent->fe_lock); 8168275SEric Cheng } 8178275SEric Cheng 8188275SEric Cheng /* 8198275SEric Cheng * Update a field of a flow entry. The mac perimeter ensures that 8208275SEric Cheng * this is the only thread doing a modify operation on this mac end point. 8218275SEric Cheng * So the flow table can't change or disappear. The ft_lock protects access 8228275SEric Cheng * to the flow entry, and holding the lock ensures that there isn't any thread 8238275SEric Cheng * accessing the flow entry or attempting a flow table lookup. However 8248275SEric Cheng * data threads that are using the flow entry based on the old descriptor 8258275SEric Cheng * will continue to use the flow entry. If strong coherence is required 8268275SEric Cheng * then the flow will have to be quiesced before the descriptor can be 8278275SEric Cheng * changed. 8288275SEric Cheng */ 8298275SEric Cheng void 8308275SEric Cheng mac_flow_set_desc(flow_entry_t *flent, flow_desc_t *fd) 8318275SEric Cheng { 8328275SEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 8338275SEric Cheng flow_desc_t old_desc; 8348275SEric Cheng int err; 8358275SEric Cheng 8368275SEric Cheng if (ft == NULL) { 8378275SEric Cheng /* 8388275SEric Cheng * The flow hasn't yet been inserted into the table, 8398275SEric Cheng * so only the caller knows about this flow, however for 8408275SEric Cheng * uniformity we grab the fe_lock here. 8418275SEric Cheng */ 8428275SEric Cheng mutex_enter(&flent->fe_lock); 8438275SEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 8448275SEric Cheng mutex_exit(&flent->fe_lock); 8458275SEric Cheng } 8468275SEric Cheng 8478275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 8488275SEric Cheng 8498275SEric Cheng /* 8508275SEric Cheng * Need to remove the flow entry from the table and reinsert it, 8518275SEric Cheng * into a potentially diference hash line. The hash depends on 8528275SEric Cheng * the new descriptor fields. However access to fe_desc itself 8538275SEric Cheng * is always under the fe_lock. This helps log and stat functions 8548275SEric Cheng * see a self-consistent fe_flow_desc. 8558275SEric Cheng */ 8568275SEric Cheng mac_flow_remove(ft, flent, B_TRUE); 8578275SEric Cheng old_desc = flent->fe_flow_desc; 8588275SEric Cheng 8598275SEric Cheng mutex_enter(&flent->fe_lock); 8608275SEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 8618275SEric Cheng mutex_exit(&flent->fe_lock); 8628275SEric Cheng 8638275SEric Cheng if (mac_flow_add(ft, flent) != 0) { 8648275SEric Cheng /* 8658275SEric Cheng * The add failed say due to an invalid flow descriptor. 8668275SEric Cheng * Undo the update 8678275SEric Cheng */ 8688275SEric Cheng flent->fe_flow_desc = old_desc; 8698275SEric Cheng err = mac_flow_add(ft, flent); 8708275SEric Cheng ASSERT(err == 0); 8718275SEric Cheng } 8728275SEric Cheng } 8738275SEric Cheng 8748275SEric Cheng void 8758275SEric Cheng mac_flow_set_name(flow_entry_t *flent, const char *name) 8768275SEric Cheng { 8778275SEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 8788275SEric Cheng 8798275SEric Cheng if (ft == NULL) { 8808275SEric Cheng /* 8818275SEric Cheng * The flow hasn't yet been inserted into the table, 8828275SEric Cheng * so only the caller knows about this flow 8838275SEric Cheng */ 8848558SGirish.Moodalbail@Sun.COM (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 8858275SEric Cheng } else { 8868275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 8878275SEric Cheng } 8888275SEric Cheng 8898275SEric Cheng mutex_enter(&flent->fe_lock); 8908558SGirish.Moodalbail@Sun.COM (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 8918275SEric Cheng mutex_exit(&flent->fe_lock); 8928275SEric Cheng } 8938275SEric Cheng 8948275SEric Cheng /* 8958275SEric Cheng * Return the client-private cookie that was associated with 8968275SEric Cheng * the flow when it was created. 8978275SEric Cheng */ 8988275SEric Cheng void * 8998275SEric Cheng mac_flow_get_client_cookie(flow_entry_t *flent) 9008275SEric Cheng { 9018275SEric Cheng return (flent->fe_client_cookie); 9028275SEric Cheng } 9038275SEric Cheng 9048275SEric Cheng /* 9058275SEric Cheng * Forward declarations. 9068275SEric Cheng */ 9078275SEric Cheng static uint32_t flow_l2_hash(flow_tab_t *, flow_state_t *); 9088275SEric Cheng static int flow_l2_accept(flow_tab_t *, flow_state_t *); 9098275SEric Cheng static uint32_t flow_ether_hash(flow_tab_t *, flow_state_t *); 9108275SEric Cheng static int flow_ether_accept(flow_tab_t *, flow_state_t *); 9118275SEric Cheng 9128275SEric Cheng /* 9138275SEric Cheng * Create flow table. 9148275SEric Cheng */ 9158275SEric Cheng void 9168275SEric Cheng mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size, 9178275SEric Cheng mac_impl_t *mip, flow_tab_t **ftp) 9188275SEric Cheng { 9198275SEric Cheng flow_tab_t *ft; 9208275SEric Cheng flow_ops_t *new_ops; 9218275SEric Cheng 9228275SEric Cheng ft = kmem_cache_alloc(flow_tab_cache, KM_SLEEP); 9238275SEric Cheng bzero(ft, sizeof (*ft)); 9248275SEric Cheng 9258275SEric Cheng ft->ft_table = kmem_zalloc(size * sizeof (flow_entry_t *), KM_SLEEP); 9268275SEric Cheng 9278275SEric Cheng /* 9288275SEric Cheng * We make a copy of the ops vector instead of just pointing to it 9298275SEric Cheng * because we might want to customize the ops vector on a per table 9308275SEric Cheng * basis (e.g. for optimization). 9318275SEric Cheng */ 9328275SEric Cheng new_ops = &ft->ft_ops; 9338275SEric Cheng bcopy(ops, new_ops, sizeof (*ops)); 9348275SEric Cheng ft->ft_mask = mask; 9358275SEric Cheng ft->ft_size = size; 9368275SEric Cheng ft->ft_mip = mip; 9378275SEric Cheng 9388275SEric Cheng /* 9398275SEric Cheng * Optimization for DL_ETHER media. 9408275SEric Cheng */ 9418275SEric Cheng if (mip->mi_info.mi_nativemedia == DL_ETHER) { 9428275SEric Cheng if (new_ops->fo_hash == flow_l2_hash) 9438275SEric Cheng new_ops->fo_hash = flow_ether_hash; 9448275SEric Cheng 9458275SEric Cheng if (new_ops->fo_accept[0] == flow_l2_accept) 9468275SEric Cheng new_ops->fo_accept[0] = flow_ether_accept; 9478275SEric Cheng 9488275SEric Cheng } 9498275SEric Cheng *ftp = ft; 9508275SEric Cheng } 9518275SEric Cheng 9528275SEric Cheng void 9538275SEric Cheng mac_flow_l2tab_create(mac_impl_t *mip, flow_tab_t **ftp) 9548275SEric Cheng { 9558275SEric Cheng mac_flow_tab_create(&flow_l2_ops, FLOW_LINK_DST | FLOW_LINK_VID, 9568275SEric Cheng 1024, mip, ftp); 9578275SEric Cheng } 9588275SEric Cheng 9598275SEric Cheng /* 9608275SEric Cheng * Destroy flow table. 9618275SEric Cheng */ 9628275SEric Cheng void 9638275SEric Cheng mac_flow_tab_destroy(flow_tab_t *ft) 9648275SEric Cheng { 9658275SEric Cheng if (ft == NULL) 9668275SEric Cheng return; 9678275SEric Cheng 9688275SEric Cheng ASSERT(ft->ft_flow_count == 0); 9698275SEric Cheng kmem_free(ft->ft_table, ft->ft_size * sizeof (flow_entry_t *)); 9708275SEric Cheng bzero(ft, sizeof (*ft)); 9718275SEric Cheng kmem_cache_free(flow_tab_cache, ft); 9728275SEric Cheng } 9738275SEric Cheng 9748275SEric Cheng /* 9758275SEric Cheng * Add a new flow entry to the global flow hash table 9768275SEric Cheng */ 9778275SEric Cheng int 9788275SEric Cheng mac_flow_hash_add(flow_entry_t *flent) 9798275SEric Cheng { 9808275SEric Cheng int err; 9818275SEric Cheng 9828275SEric Cheng rw_enter(&flow_tab_lock, RW_WRITER); 9838275SEric Cheng err = mod_hash_insert(flow_hash, 9848275SEric Cheng (mod_hash_key_t)flent->fe_flow_name, (mod_hash_val_t)flent); 9858275SEric Cheng if (err != 0) { 9868275SEric Cheng rw_exit(&flow_tab_lock); 9878275SEric Cheng return (EEXIST); 9888275SEric Cheng } 9898275SEric Cheng /* Mark as inserted into the global flow hash table */ 9908275SEric Cheng FLOW_MARK(flent, FE_G_FLOW_HASH); 9918275SEric Cheng rw_exit(&flow_tab_lock); 9928275SEric Cheng return (err); 9938275SEric Cheng } 9948275SEric Cheng 9958275SEric Cheng /* 9968275SEric Cheng * Remove a flow entry from the global flow hash table 9978275SEric Cheng */ 9988275SEric Cheng void 9998275SEric Cheng mac_flow_hash_remove(flow_entry_t *flent) 10008275SEric Cheng { 10018275SEric Cheng mod_hash_val_t val; 10028275SEric Cheng 10038275SEric Cheng rw_enter(&flow_tab_lock, RW_WRITER); 10048275SEric Cheng VERIFY(mod_hash_remove(flow_hash, 10058275SEric Cheng (mod_hash_key_t)flent->fe_flow_name, &val) == 0); 10068275SEric Cheng 10078275SEric Cheng /* Clear the mark that says inserted into the global flow hash table */ 10088275SEric Cheng FLOW_UNMARK(flent, FE_G_FLOW_HASH); 10098275SEric Cheng rw_exit(&flow_tab_lock); 10108275SEric Cheng } 10118275SEric Cheng 10128275SEric Cheng /* 10138275SEric Cheng * Retrieve a flow entry from the global flow hash table. 10148275SEric Cheng */ 10158275SEric Cheng int 10168275SEric Cheng mac_flow_lookup_byname(char *name, flow_entry_t **flentp) 10178275SEric Cheng { 10188275SEric Cheng int err; 10198275SEric Cheng flow_entry_t *flent; 10208275SEric Cheng 10218275SEric Cheng rw_enter(&flow_tab_lock, RW_READER); 10228275SEric Cheng err = mod_hash_find(flow_hash, (mod_hash_key_t)name, 10238275SEric Cheng (mod_hash_val_t *)&flent); 10248275SEric Cheng if (err != 0) { 10258275SEric Cheng rw_exit(&flow_tab_lock); 10268275SEric Cheng return (ENOENT); 10278275SEric Cheng } 10288275SEric Cheng ASSERT(flent != NULL); 10298275SEric Cheng FLOW_USER_REFHOLD(flent); 10308275SEric Cheng rw_exit(&flow_tab_lock); 10318275SEric Cheng 10328275SEric Cheng *flentp = flent; 10338275SEric Cheng return (0); 10348275SEric Cheng } 10358275SEric Cheng 10368275SEric Cheng /* 10378275SEric Cheng * Initialize or release mac client flows by walking the subflow table. 10388275SEric Cheng * These are typically invoked during plumb/unplumb of links. 10398275SEric Cheng */ 10408275SEric Cheng 10418275SEric Cheng static int 10428275SEric Cheng mac_link_init_flows_cb(flow_entry_t *flent, void *arg) 10438275SEric Cheng { 10448275SEric Cheng mac_client_impl_t *mcip = arg; 10458275SEric Cheng 10468275SEric Cheng if (mac_link_flow_init(arg, flent) != 0) { 10478275SEric Cheng cmn_err(CE_WARN, "Failed to initialize flow '%s' on link '%s'", 10488275SEric Cheng flent->fe_flow_name, mcip->mci_name); 10498275SEric Cheng } else { 10508275SEric Cheng FLOW_UNMARK(flent, FE_UF_NO_DATAPATH); 10518275SEric Cheng } 10528275SEric Cheng return (0); 10538275SEric Cheng } 10548275SEric Cheng 10558275SEric Cheng void 10568275SEric Cheng mac_link_init_flows(mac_client_handle_t mch) 10578275SEric Cheng { 10588275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 10598275SEric Cheng 10608275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 10618275SEric Cheng mac_link_init_flows_cb, mcip); 10628275SEric Cheng /* 10638275SEric Cheng * If mac client had subflow(s) configured before plumb, change 10648275SEric Cheng * function to mac_rx_srs_subflow_process and in case of hardware 10658275SEric Cheng * classification, disable polling. 10668275SEric Cheng */ 10678275SEric Cheng mac_client_update_classifier(mcip, B_TRUE); 10688275SEric Cheng 10698275SEric Cheng } 10708275SEric Cheng 10718275SEric Cheng boolean_t 10728275SEric Cheng mac_link_has_flows(mac_client_handle_t mch) 10738275SEric Cheng { 10748275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 10758275SEric Cheng 10768275SEric Cheng if (!FLOW_TAB_EMPTY(mcip->mci_subflow_tab)) 10778275SEric Cheng return (B_TRUE); 10788275SEric Cheng 10798275SEric Cheng return (B_FALSE); 10808275SEric Cheng } 10818275SEric Cheng 10828275SEric Cheng static int 10838275SEric Cheng mac_link_release_flows_cb(flow_entry_t *flent, void *arg) 10848275SEric Cheng { 10858275SEric Cheng FLOW_MARK(flent, FE_UF_NO_DATAPATH); 10868275SEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 10878275SEric Cheng mac_link_flow_clean(arg, flent); 10888275SEric Cheng return (0); 10898275SEric Cheng } 10908275SEric Cheng 10918275SEric Cheng void 10928275SEric Cheng mac_link_release_flows(mac_client_handle_t mch) 10938275SEric Cheng { 10948275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 10958275SEric Cheng 10968275SEric Cheng /* 10978275SEric Cheng * Change the mci_flent callback back to mac_rx_srs_process() 10988275SEric Cheng * because flows are about to be deactivated. 10998275SEric Cheng */ 11008275SEric Cheng mac_client_update_classifier(mcip, B_FALSE); 11018275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 11028275SEric Cheng mac_link_release_flows_cb, mcip); 11038275SEric Cheng } 11048275SEric Cheng 11058275SEric Cheng void 11068275SEric Cheng mac_rename_flow(flow_entry_t *fep, const char *new_name) 11078275SEric Cheng { 11088275SEric Cheng mac_flow_set_name(fep, new_name); 11098275SEric Cheng if (fep->fe_ksp != NULL) { 11108275SEric Cheng flow_stat_destroy(fep); 11118275SEric Cheng flow_stat_create(fep); 11128275SEric Cheng } 11138275SEric Cheng } 11148275SEric Cheng 11158275SEric Cheng /* 11168275SEric Cheng * mac_link_flow_init() 11178275SEric Cheng * Internal flow interface used for allocating SRSs and related 11188275SEric Cheng * data structures. Not meant to be used by mac clients. 11198275SEric Cheng */ 11208275SEric Cheng int 11218275SEric Cheng mac_link_flow_init(mac_client_handle_t mch, flow_entry_t *sub_flow) 11228275SEric Cheng { 11238275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 11248275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 11258275SEric Cheng int err; 11268275SEric Cheng 11278275SEric Cheng ASSERT(mch != NULL); 11288275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 11298275SEric Cheng 11308275SEric Cheng if ((err = mac_datapath_setup(mcip, sub_flow, SRST_FLOW)) != 0) 11318275SEric Cheng return (err); 11328275SEric Cheng 11338275SEric Cheng sub_flow->fe_mcip = mcip; 11348275SEric Cheng 11358275SEric Cheng return (0); 11368275SEric Cheng } 11378275SEric Cheng 11388275SEric Cheng /* 11398275SEric Cheng * mac_link_flow_add() 11408275SEric Cheng * Used by flowadm(1m) or kernel mac clients for creating flows. 11418275SEric Cheng */ 11428275SEric Cheng int 11438275SEric Cheng mac_link_flow_add(datalink_id_t linkid, char *flow_name, 11448275SEric Cheng flow_desc_t *flow_desc, mac_resource_props_t *mrp) 11458275SEric Cheng { 11468275SEric Cheng flow_entry_t *flent = NULL; 11478275SEric Cheng int err; 11488275SEric Cheng dls_dl_handle_t dlh; 11498275SEric Cheng dls_link_t *dlp; 11508275SEric Cheng boolean_t link_held = B_FALSE; 11518275SEric Cheng boolean_t hash_added = B_FALSE; 11528275SEric Cheng mac_perim_handle_t mph; 11538275SEric Cheng 11548275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 11558275SEric Cheng if (err == 0) { 11568275SEric Cheng FLOW_USER_REFRELE(flent); 11578275SEric Cheng return (EEXIST); 11588275SEric Cheng } 11598275SEric Cheng 11608275SEric Cheng /* 11618275SEric Cheng * First create a flow entry given the description provided 11628275SEric Cheng * by the caller. 11638275SEric Cheng */ 11648275SEric Cheng err = mac_flow_create(flow_desc, mrp, flow_name, NULL, 11658275SEric Cheng FLOW_USER | FLOW_OTHER, &flent); 11668275SEric Cheng 11678275SEric Cheng if (err != 0) 11688275SEric Cheng return (err); 11698275SEric Cheng 11708275SEric Cheng /* 11718275SEric Cheng * We've got a local variable referencing this flow now, so we need 11728275SEric Cheng * to hold it. We'll release this flow before returning. 11738275SEric Cheng * All failures until we return will undo any action that may internally 11748275SEric Cheng * held the flow, so the last REFRELE will assure a clean freeing 11758275SEric Cheng * of resources. 11768275SEric Cheng */ 11778275SEric Cheng FLOW_REFHOLD(flent); 11788275SEric Cheng 11798275SEric Cheng flent->fe_link_id = linkid; 11808275SEric Cheng FLOW_MARK(flent, FE_INCIPIENT); 11818275SEric Cheng 11828275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 11838275SEric Cheng if (err != 0) { 11848275SEric Cheng FLOW_FINAL_REFRELE(flent); 11858275SEric Cheng return (err); 11868275SEric Cheng } 11878275SEric Cheng 11888275SEric Cheng /* 11898275SEric Cheng * dls will eventually be merged with mac so it's ok 11908275SEric Cheng * to call dls' internal functions. 11918275SEric Cheng */ 11928275SEric Cheng err = dls_devnet_hold_link(linkid, &dlh, &dlp); 11938275SEric Cheng if (err != 0) 11948275SEric Cheng goto bail; 11958275SEric Cheng 11968275SEric Cheng link_held = B_TRUE; 11978275SEric Cheng 11988275SEric Cheng /* 11998275SEric Cheng * Add the flow to the global flow table, this table will be per 12008275SEric Cheng * exclusive zone so each zone can have its own flow namespace. 12018275SEric Cheng * RFE 6625651 will fix this. 12028275SEric Cheng * 12038275SEric Cheng */ 12048275SEric Cheng if ((err = mac_flow_hash_add(flent)) != 0) 12058275SEric Cheng goto bail; 12068275SEric Cheng 12078275SEric Cheng hash_added = B_TRUE; 12088275SEric Cheng 12098275SEric Cheng /* 12108275SEric Cheng * do not allow flows to be configured on an anchor VNIC 12118275SEric Cheng */ 12128275SEric Cheng if (mac_capab_get(dlp->dl_mh, MAC_CAPAB_ANCHOR_VNIC, NULL)) { 12138275SEric Cheng err = ENOTSUP; 12148275SEric Cheng goto bail; 12158275SEric Cheng } 12168275SEric Cheng 12178275SEric Cheng /* 12188275SEric Cheng * Save the zoneid of the underlying link in the flow entry, 12198275SEric Cheng * this is needed to prevent non-global zone from getting 12208275SEric Cheng * statistics information of global zone. 12218275SEric Cheng */ 12228275SEric Cheng flent->fe_zoneid = dlp->dl_zid; 12238275SEric Cheng 12248275SEric Cheng /* 12258275SEric Cheng * Add the subflow to the subflow table. Also instantiate the flow 12268833SVenu.Iyer@Sun.COM * in the mac if there is an active user (we check if the MAC client's 12278833SVenu.Iyer@Sun.COM * datapath has been setup). 12288275SEric Cheng */ 12298833SVenu.Iyer@Sun.COM err = mac_flow_add_subflow(dlp->dl_mch, flent, 12308833SVenu.Iyer@Sun.COM MCIP_DATAPATH_SETUP((mac_client_impl_t *)dlp->dl_mch)); 12318275SEric Cheng if (err != 0) 12328275SEric Cheng goto bail; 12338275SEric Cheng 12348275SEric Cheng FLOW_UNMARK(flent, FE_INCIPIENT); 12358275SEric Cheng dls_devnet_rele_link(dlh, dlp); 12368275SEric Cheng mac_perim_exit(mph); 12378275SEric Cheng return (0); 12388275SEric Cheng 12398275SEric Cheng bail: 12408275SEric Cheng if (hash_added) 12418275SEric Cheng mac_flow_hash_remove(flent); 12428275SEric Cheng 12438275SEric Cheng if (link_held) 12448275SEric Cheng dls_devnet_rele_link(dlh, dlp); 12458275SEric Cheng 12468275SEric Cheng /* 12478275SEric Cheng * Wait for any transient global flow hash refs to clear 12488275SEric Cheng * and then release the creation reference on the flow 12498275SEric Cheng */ 12508275SEric Cheng mac_flow_wait(flent, FLOW_USER_REF); 12518275SEric Cheng FLOW_FINAL_REFRELE(flent); 12528275SEric Cheng mac_perim_exit(mph); 12538275SEric Cheng return (err); 12548275SEric Cheng } 12558275SEric Cheng 12568275SEric Cheng /* 12578275SEric Cheng * mac_link_flow_clean() 12588275SEric Cheng * Internal flow interface used for freeing SRSs and related 12598275SEric Cheng * data structures. Not meant to be used by mac clients. 12608275SEric Cheng */ 12618275SEric Cheng void 12628275SEric Cheng mac_link_flow_clean(mac_client_handle_t mch, flow_entry_t *sub_flow) 12638275SEric Cheng { 12648275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 12658275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 12668275SEric Cheng boolean_t last_subflow; 12678275SEric Cheng 12688275SEric Cheng ASSERT(mch != NULL); 12698275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 12708275SEric Cheng 12718275SEric Cheng /* 12728275SEric Cheng * This sub flow entry may fail to be fully initialized by 12738275SEric Cheng * mac_link_flow_init(). If so, simply return. 12748275SEric Cheng */ 12758275SEric Cheng if (sub_flow->fe_mcip == NULL) 12768275SEric Cheng return; 12778275SEric Cheng 12788275SEric Cheng last_subflow = FLOW_TAB_EMPTY(mcip->mci_subflow_tab); 12798275SEric Cheng /* 12808275SEric Cheng * Tear down the data path 12818275SEric Cheng */ 12828275SEric Cheng mac_datapath_teardown(mcip, sub_flow, SRST_FLOW); 12838275SEric Cheng sub_flow->fe_mcip = NULL; 12848275SEric Cheng 12858275SEric Cheng /* 12868275SEric Cheng * Delete the SRSs associated with this subflow. If this is being 12878275SEric Cheng * driven by flowadm(1M) then the subflow will be deleted by 12888275SEric Cheng * dls_rem_flow. However if this is a result of the interface being 12898275SEric Cheng * unplumbed then the subflow itself won't be deleted. 12908275SEric Cheng */ 12918275SEric Cheng mac_flow_cleanup(sub_flow); 12928275SEric Cheng 12938275SEric Cheng /* 12948275SEric Cheng * If all the subflows are gone, renable some of the stuff 12958275SEric Cheng * we disabled when adding a subflow, polling etc. 12968275SEric Cheng */ 12978275SEric Cheng if (last_subflow) { 12988275SEric Cheng /* 12998275SEric Cheng * The subflow table itself is not protected by any locks or 13008275SEric Cheng * refcnts. Hence quiesce the client upfront before clearing 13018275SEric Cheng * mci_subflow_tab. 13028275SEric Cheng */ 13038275SEric Cheng mac_client_quiesce(mcip); 13048275SEric Cheng mac_client_update_classifier(mcip, B_FALSE); 13058275SEric Cheng mac_flow_tab_destroy(mcip->mci_subflow_tab); 13068275SEric Cheng mcip->mci_subflow_tab = NULL; 13078275SEric Cheng mac_client_restart(mcip); 13088275SEric Cheng } 13098275SEric Cheng } 13108275SEric Cheng 13118275SEric Cheng /* 13128275SEric Cheng * mac_link_flow_remove() 13138275SEric Cheng * Used by flowadm(1m) or kernel mac clients for removing flows. 13148275SEric Cheng */ 13158275SEric Cheng int 13168275SEric Cheng mac_link_flow_remove(char *flow_name) 13178275SEric Cheng { 13188275SEric Cheng flow_entry_t *flent; 13198275SEric Cheng mac_perim_handle_t mph; 13208275SEric Cheng int err; 13218275SEric Cheng datalink_id_t linkid; 13228275SEric Cheng 13238275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13248275SEric Cheng if (err != 0) 13258275SEric Cheng return (err); 13268275SEric Cheng 13278275SEric Cheng linkid = flent->fe_link_id; 13288275SEric Cheng FLOW_USER_REFRELE(flent); 13298275SEric Cheng 13308275SEric Cheng /* 13318275SEric Cheng * The perim must be acquired before acquiring any other references 13328275SEric Cheng * to maintain the lock and perimeter hierarchy. Please note the 13338275SEric Cheng * FLOW_REFRELE above. 13348275SEric Cheng */ 13358275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 13368275SEric Cheng if (err != 0) 13378275SEric Cheng return (err); 13388275SEric Cheng 13398275SEric Cheng /* 13408275SEric Cheng * Note the second lookup of the flow, because a concurrent thread 13418275SEric Cheng * may have removed it already while we were waiting to enter the 13428275SEric Cheng * link's perimeter. 13438275SEric Cheng */ 13448275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13458275SEric Cheng if (err != 0) { 13468275SEric Cheng mac_perim_exit(mph); 13478275SEric Cheng return (err); 13488275SEric Cheng } 13498275SEric Cheng FLOW_USER_REFRELE(flent); 13508275SEric Cheng 13518275SEric Cheng /* 13528275SEric Cheng * Remove the flow from the subflow table and deactivate the flow 13538275SEric Cheng * by quiescing and removings its SRSs 13548275SEric Cheng */ 13558275SEric Cheng mac_flow_rem_subflow(flent); 13568275SEric Cheng 13578275SEric Cheng /* 13588275SEric Cheng * Finally, remove the flow from the global table. 13598275SEric Cheng */ 13608275SEric Cheng mac_flow_hash_remove(flent); 13618275SEric Cheng 13628275SEric Cheng /* 13638275SEric Cheng * Wait for any transient global flow hash refs to clear 13648275SEric Cheng * and then release the creation reference on the flow 13658275SEric Cheng */ 13668275SEric Cheng mac_flow_wait(flent, FLOW_USER_REF); 13678275SEric Cheng FLOW_FINAL_REFRELE(flent); 13688275SEric Cheng 13698275SEric Cheng mac_perim_exit(mph); 13708275SEric Cheng 13718275SEric Cheng return (0); 13728275SEric Cheng } 13738275SEric Cheng 13748275SEric Cheng /* 13758275SEric Cheng * mac_link_flow_modify() 13768275SEric Cheng * Modifies the properties of a flow identified by its name. 13778275SEric Cheng */ 13788275SEric Cheng int 13798275SEric Cheng mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp) 13808275SEric Cheng { 13818275SEric Cheng flow_entry_t *flent; 13828275SEric Cheng mac_client_impl_t *mcip; 13838275SEric Cheng int err = 0; 13848275SEric Cheng mac_perim_handle_t mph; 13858275SEric Cheng datalink_id_t linkid; 13868275SEric Cheng flow_tab_t *flow_tab; 13878275SEric Cheng 13888275SEric Cheng err = mac_validate_props(mrp); 13898275SEric Cheng if (err != 0) 13908275SEric Cheng return (err); 13918275SEric Cheng 13928275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13938275SEric Cheng if (err != 0) 13948275SEric Cheng return (err); 13958275SEric Cheng 13968275SEric Cheng linkid = flent->fe_link_id; 13978275SEric Cheng FLOW_USER_REFRELE(flent); 13988275SEric Cheng 13998275SEric Cheng /* 14008275SEric Cheng * The perim must be acquired before acquiring any other references 14018275SEric Cheng * to maintain the lock and perimeter hierarchy. Please note the 14028275SEric Cheng * FLOW_REFRELE above. 14038275SEric Cheng */ 14048275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 14058275SEric Cheng if (err != 0) 14068275SEric Cheng return (err); 14078275SEric Cheng 14088275SEric Cheng /* 14098275SEric Cheng * Note the second lookup of the flow, because a concurrent thread 14108275SEric Cheng * may have removed it already while we were waiting to enter the 14118275SEric Cheng * link's perimeter. 14128275SEric Cheng */ 14138275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 14148275SEric Cheng if (err != 0) { 14158275SEric Cheng mac_perim_exit(mph); 14168275SEric Cheng return (err); 14178275SEric Cheng } 14188275SEric Cheng FLOW_USER_REFRELE(flent); 14198275SEric Cheng 14208275SEric Cheng /* 14218275SEric Cheng * If this flow is attached to a MAC client, then pass the request 14228275SEric Cheng * along to the client. 14238275SEric Cheng * Otherwise, just update the cached values. 14248275SEric Cheng */ 14258275SEric Cheng mcip = flent->fe_mcip; 14268275SEric Cheng mac_update_resources(mrp, &flent->fe_resource_props, B_TRUE); 14278275SEric Cheng if (mcip != NULL) { 14288275SEric Cheng if ((flow_tab = mcip->mci_subflow_tab) == NULL) { 14298275SEric Cheng err = ENOENT; 14308275SEric Cheng } else { 14318275SEric Cheng mac_flow_modify(flow_tab, flent, mrp); 14328275SEric Cheng } 14338275SEric Cheng } else { 14348275SEric Cheng (void) mac_flow_modify_props(flent, mrp); 14358275SEric Cheng } 14368275SEric Cheng 14378275SEric Cheng done: 14388275SEric Cheng mac_perim_exit(mph); 14398275SEric Cheng return (err); 14408275SEric Cheng } 14418275SEric Cheng 14428275SEric Cheng 14438275SEric Cheng /* 14448275SEric Cheng * State structure and misc functions used by mac_link_flow_walk(). 14458275SEric Cheng */ 14468275SEric Cheng typedef struct { 14478275SEric Cheng int (*ws_func)(mac_flowinfo_t *, void *); 14488275SEric Cheng void *ws_arg; 14498275SEric Cheng } flow_walk_state_t; 14508275SEric Cheng 14518275SEric Cheng static void 14528275SEric Cheng mac_link_flowinfo_copy(mac_flowinfo_t *finfop, flow_entry_t *flent) 14538275SEric Cheng { 14548558SGirish.Moodalbail@Sun.COM (void) strlcpy(finfop->fi_flow_name, flent->fe_flow_name, 14558558SGirish.Moodalbail@Sun.COM MAXFLOWNAMELEN); 14568275SEric Cheng finfop->fi_link_id = flent->fe_link_id; 14578275SEric Cheng finfop->fi_flow_desc = flent->fe_flow_desc; 14588275SEric Cheng finfop->fi_resource_props = flent->fe_resource_props; 14598275SEric Cheng } 14608275SEric Cheng 14618275SEric Cheng static int 14628275SEric Cheng mac_link_flow_walk_cb(flow_entry_t *flent, void *arg) 14638275SEric Cheng { 14648275SEric Cheng flow_walk_state_t *statep = arg; 14658275SEric Cheng mac_flowinfo_t finfo; 14668275SEric Cheng 14678275SEric Cheng mac_link_flowinfo_copy(&finfo, flent); 14688275SEric Cheng return (statep->ws_func(&finfo, statep->ws_arg)); 14698275SEric Cheng } 14708275SEric Cheng 14718275SEric Cheng /* 14728275SEric Cheng * mac_link_flow_walk() 14738275SEric Cheng * Invokes callback 'func' for all flows belonging to the specified link. 14748275SEric Cheng */ 14758275SEric Cheng int 14768275SEric Cheng mac_link_flow_walk(datalink_id_t linkid, 14778275SEric Cheng int (*func)(mac_flowinfo_t *, void *), void *arg) 14788275SEric Cheng { 14798275SEric Cheng mac_client_impl_t *mcip; 14808275SEric Cheng mac_perim_handle_t mph; 14818275SEric Cheng flow_walk_state_t state; 14828275SEric Cheng dls_dl_handle_t dlh; 14838275SEric Cheng dls_link_t *dlp; 14848275SEric Cheng int err; 14858275SEric Cheng 14868275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 14878275SEric Cheng if (err != 0) 14888275SEric Cheng return (err); 14898275SEric Cheng 14908275SEric Cheng err = dls_devnet_hold_link(linkid, &dlh, &dlp); 14918275SEric Cheng if (err != 0) { 14928275SEric Cheng mac_perim_exit(mph); 14938275SEric Cheng return (err); 14948275SEric Cheng } 14958275SEric Cheng 14968275SEric Cheng mcip = (mac_client_impl_t *)dlp->dl_mch; 14978275SEric Cheng state.ws_func = func; 14988275SEric Cheng state.ws_arg = arg; 14998275SEric Cheng 15008275SEric Cheng err = mac_flow_walk_nolock(mcip->mci_subflow_tab, 15018275SEric Cheng mac_link_flow_walk_cb, &state); 15028275SEric Cheng 15038275SEric Cheng dls_devnet_rele_link(dlh, dlp); 15048275SEric Cheng mac_perim_exit(mph); 15058275SEric Cheng return (err); 15068275SEric Cheng } 15078275SEric Cheng 15088275SEric Cheng /* 15098275SEric Cheng * mac_link_flow_info() 15108275SEric Cheng * Retrieves information about a specific flow. 15118275SEric Cheng */ 15128275SEric Cheng int 15138275SEric Cheng mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo) 15148275SEric Cheng { 15158275SEric Cheng flow_entry_t *flent; 15168275SEric Cheng int err; 15178275SEric Cheng 15188275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 15198275SEric Cheng if (err != 0) 15208275SEric Cheng return (err); 15218275SEric Cheng 15228275SEric Cheng mac_link_flowinfo_copy(finfo, flent); 15238275SEric Cheng FLOW_USER_REFRELE(flent); 15248275SEric Cheng return (0); 15258275SEric Cheng } 15268275SEric Cheng 15278275SEric Cheng #define HASH_MAC_VID(a, v, s) \ 15288275SEric Cheng ((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s)) 15298275SEric Cheng 15308275SEric Cheng #define PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end)) 15318275SEric Cheng 15328833SVenu.Iyer@Sun.COM #define CHECK_AND_ADJUST_START_PTR(s, start) { \ 15338833SVenu.Iyer@Sun.COM if ((s)->fs_mp->b_wptr == (start)) { \ 15348833SVenu.Iyer@Sun.COM mblk_t *next = (s)->fs_mp->b_cont; \ 15358833SVenu.Iyer@Sun.COM if (next == NULL) \ 15368833SVenu.Iyer@Sun.COM return (EINVAL); \ 15378833SVenu.Iyer@Sun.COM \ 15388833SVenu.Iyer@Sun.COM (s)->fs_mp = next; \ 15398833SVenu.Iyer@Sun.COM (start) = next->b_rptr; \ 15408833SVenu.Iyer@Sun.COM } \ 15418833SVenu.Iyer@Sun.COM } 15428833SVenu.Iyer@Sun.COM 15438275SEric Cheng /* ARGSUSED */ 15448275SEric Cheng static boolean_t 15458275SEric Cheng flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 15468275SEric Cheng { 15478275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 15488275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 15498275SEric Cheng 15508275SEric Cheng return (l2->l2_vid == fd->fd_vid && 15518275SEric Cheng bcmp(l2->l2_daddr, fd->fd_dst_mac, fd->fd_mac_len) == 0); 15528275SEric Cheng } 15538275SEric Cheng 15548275SEric Cheng /* 15558275SEric Cheng * Layer 2 hash function. 15568275SEric Cheng * Must be paired with flow_l2_accept() within a set of flow_ops 15578275SEric Cheng * because it assumes the dest address is already extracted. 15588275SEric Cheng */ 15598275SEric Cheng static uint32_t 15608275SEric Cheng flow_l2_hash(flow_tab_t *ft, flow_state_t *s) 15618275SEric Cheng { 15628275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 15638275SEric Cheng 15648275SEric Cheng return (HASH_MAC_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size)); 15658275SEric Cheng } 15668275SEric Cheng 15678275SEric Cheng /* 15688275SEric Cheng * This is the generic layer 2 accept function. 15698275SEric Cheng * It makes use of mac_header_info() to extract the header length, 15708275SEric Cheng * sap, vlan ID and destination address. 15718275SEric Cheng */ 15728275SEric Cheng static int 15738275SEric Cheng flow_l2_accept(flow_tab_t *ft, flow_state_t *s) 15748275SEric Cheng { 15758275SEric Cheng boolean_t is_ether; 15768275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 15778275SEric Cheng mac_header_info_t mhi; 15788275SEric Cheng int err; 15798275SEric Cheng 15808275SEric Cheng is_ether = (ft->ft_mip->mi_info.mi_nativemedia == DL_ETHER); 15818275SEric Cheng if ((err = mac_header_info((mac_handle_t)ft->ft_mip, 15828275SEric Cheng s->fs_mp, &mhi)) != 0) { 15838275SEric Cheng if (err == EINVAL) 15848275SEric Cheng err = ENOBUFS; 15858275SEric Cheng 15868275SEric Cheng return (err); 15878275SEric Cheng } 15888275SEric Cheng 15898275SEric Cheng l2->l2_start = s->fs_mp->b_rptr; 15908275SEric Cheng l2->l2_daddr = (uint8_t *)mhi.mhi_daddr; 15918275SEric Cheng 15928275SEric Cheng if (is_ether && mhi.mhi_bindsap == ETHERTYPE_VLAN && 15938275SEric Cheng ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) { 15948275SEric Cheng struct ether_vlan_header *evhp = 15958275SEric Cheng (struct ether_vlan_header *)l2->l2_start; 15968275SEric Cheng 15978275SEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp))) 15988275SEric Cheng return (ENOBUFS); 15998275SEric Cheng 16008275SEric Cheng l2->l2_sap = ntohs(evhp->ether_type); 16018275SEric Cheng l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci)); 16028275SEric Cheng l2->l2_hdrsize = sizeof (*evhp); 16038275SEric Cheng } else { 16048275SEric Cheng l2->l2_sap = mhi.mhi_bindsap; 16058275SEric Cheng l2->l2_vid = 0; 16068275SEric Cheng l2->l2_hdrsize = (uint32_t)mhi.mhi_hdrsize; 16078275SEric Cheng } 16088275SEric Cheng return (0); 16098275SEric Cheng } 16108275SEric Cheng 16118275SEric Cheng /* 16128275SEric Cheng * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/ 16138275SEric Cheng * accept(). The notable difference is that dest address is now extracted 16148275SEric Cheng * by hash() rather than by accept(). This saves a few memory references 16158275SEric Cheng * for flow tables that do not care about mac addresses. 16168275SEric Cheng */ 16178275SEric Cheng static uint32_t 16188275SEric Cheng flow_ether_hash(flow_tab_t *ft, flow_state_t *s) 16198275SEric Cheng { 16208275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 16218275SEric Cheng struct ether_vlan_header *evhp; 16228275SEric Cheng 16238275SEric Cheng evhp = (struct ether_vlan_header *)l2->l2_start; 16248275SEric Cheng l2->l2_daddr = evhp->ether_dhost.ether_addr_octet; 16258275SEric Cheng return (HASH_MAC_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size)); 16268275SEric Cheng } 16278275SEric Cheng 16288275SEric Cheng /* ARGSUSED */ 16298275SEric Cheng static int 16308275SEric Cheng flow_ether_accept(flow_tab_t *ft, flow_state_t *s) 16318275SEric Cheng { 16328275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 16338275SEric Cheng struct ether_vlan_header *evhp; 16348275SEric Cheng uint16_t sap; 16358275SEric Cheng 16368275SEric Cheng evhp = (struct ether_vlan_header *)s->fs_mp->b_rptr; 16378275SEric Cheng l2->l2_start = (uchar_t *)evhp; 16388275SEric Cheng 16398275SEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (struct ether_header))) 16408275SEric Cheng return (ENOBUFS); 16418275SEric Cheng 16428275SEric Cheng if ((sap = ntohs(evhp->ether_tpid)) == ETHERTYPE_VLAN && 16438275SEric Cheng ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) { 16448275SEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp))) 16458275SEric Cheng return (ENOBUFS); 16468275SEric Cheng 16478275SEric Cheng l2->l2_sap = ntohs(evhp->ether_type); 16488275SEric Cheng l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci)); 16498275SEric Cheng l2->l2_hdrsize = sizeof (struct ether_vlan_header); 16508275SEric Cheng } else { 16518275SEric Cheng l2->l2_sap = sap; 16528275SEric Cheng l2->l2_vid = 0; 16538275SEric Cheng l2->l2_hdrsize = sizeof (struct ether_header); 16548275SEric Cheng } 16558275SEric Cheng return (0); 16568275SEric Cheng } 16578275SEric Cheng 16588275SEric Cheng /* 16598275SEric Cheng * Validates a layer 2 flow entry. 16608275SEric Cheng */ 16618275SEric Cheng static int 16628275SEric Cheng flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 16638275SEric Cheng { 16648275SEric Cheng int i; 16658275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 16668275SEric Cheng 16678275SEric Cheng /* 16688275SEric Cheng * Dest address is mandatory. 16698275SEric Cheng */ 16708275SEric Cheng if ((fd->fd_mask & FLOW_LINK_DST) == 0) 16718275SEric Cheng return (EINVAL); 16728275SEric Cheng 16738275SEric Cheng for (i = 0; i < fd->fd_mac_len; i++) { 16748275SEric Cheng if (fd->fd_dst_mac[i] != 0) 16758275SEric Cheng break; 16768275SEric Cheng } 16778275SEric Cheng if (i == fd->fd_mac_len || fd->fd_mac_len < ETHERADDRL) 16788275SEric Cheng return (EINVAL); 16798275SEric Cheng 16808275SEric Cheng if ((fd->fd_mask & FLOW_LINK_VID) != 0) { 16818275SEric Cheng /* 16828275SEric Cheng * VLAN flows are only supported over ethernet macs. 16838275SEric Cheng */ 16848275SEric Cheng if (ft->ft_mip->mi_info.mi_nativemedia != DL_ETHER) 16858275SEric Cheng return (EINVAL); 16868275SEric Cheng 16878275SEric Cheng if (fd->fd_vid == 0) 16888275SEric Cheng return (EINVAL); 16898275SEric Cheng 16908275SEric Cheng } 16918275SEric Cheng flent->fe_match = flow_l2_match; 16928275SEric Cheng return (0); 16938275SEric Cheng } 16948275SEric Cheng 16958275SEric Cheng /* 16968275SEric Cheng * Calculates hash index of flow entry. 16978275SEric Cheng */ 16988275SEric Cheng static uint32_t 16998275SEric Cheng flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 17008275SEric Cheng { 17018275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17028275SEric Cheng 17038275SEric Cheng ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0); 17048275SEric Cheng return (HASH_MAC_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size)); 17058275SEric Cheng } 17068275SEric Cheng 17078275SEric Cheng /* 17088275SEric Cheng * This is used for duplicate flow checking. 17098275SEric Cheng */ 17108275SEric Cheng /* ARGSUSED */ 17118275SEric Cheng static boolean_t 17128275SEric Cheng flow_l2_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 17138275SEric Cheng { 17148275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 17158275SEric Cheng 17168275SEric Cheng ASSERT(fd1->fd_mac_len == fd2->fd_mac_len && fd1->fd_mac_len != 0); 17178275SEric Cheng return (bcmp(&fd1->fd_dst_mac, &fd2->fd_dst_mac, 17188275SEric Cheng fd1->fd_mac_len) == 0 && fd1->fd_vid == fd2->fd_vid); 17198275SEric Cheng } 17208275SEric Cheng 17218275SEric Cheng /* 17228275SEric Cheng * Generic flow entry insertion function. 17238275SEric Cheng * Used by flow tables that do not have ordering requirements. 17248275SEric Cheng */ 17258275SEric Cheng /* ARGSUSED */ 17268275SEric Cheng static int 17278275SEric Cheng flow_generic_insert_fe(flow_tab_t *ft, flow_entry_t **headp, 17288275SEric Cheng flow_entry_t *flent) 17298275SEric Cheng { 17308275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 17318275SEric Cheng 17328275SEric Cheng if (*headp != NULL) { 17338275SEric Cheng ASSERT(flent->fe_next == NULL); 17348275SEric Cheng flent->fe_next = *headp; 17358275SEric Cheng } 17368275SEric Cheng *headp = flent; 17378275SEric Cheng return (0); 17388275SEric Cheng } 17398275SEric Cheng 17408275SEric Cheng /* 17418275SEric Cheng * IP version independent DSField matching function. 17428275SEric Cheng */ 17438275SEric Cheng /* ARGSUSED */ 17448275SEric Cheng static boolean_t 17458275SEric Cheng flow_ip_dsfield_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 17468275SEric Cheng { 17478275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 17488275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17498275SEric Cheng 17508275SEric Cheng switch (l3info->l3_version) { 17518275SEric Cheng case IPV4_VERSION: { 17528275SEric Cheng ipha_t *ipha = (ipha_t *)l3info->l3_start; 17538275SEric Cheng 17548275SEric Cheng return ((ipha->ipha_type_of_service & 17558275SEric Cheng fd->fd_dsfield_mask) == fd->fd_dsfield); 17568275SEric Cheng } 17578275SEric Cheng case IPV6_VERSION: { 17588275SEric Cheng ip6_t *ip6h = (ip6_t *)l3info->l3_start; 17598275SEric Cheng 17608275SEric Cheng return ((IPV6_FLOW_TCLASS(ip6h->ip6_vcf) & 17618275SEric Cheng fd->fd_dsfield_mask) == fd->fd_dsfield); 17628275SEric Cheng } 17638275SEric Cheng default: 17648275SEric Cheng return (B_FALSE); 17658275SEric Cheng } 17668275SEric Cheng } 17678275SEric Cheng 17688275SEric Cheng /* 17698275SEric Cheng * IP v4 and v6 address matching. 17708275SEric Cheng * The netmask only needs to be applied on the packet but not on the 17718275SEric Cheng * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets. 17728275SEric Cheng */ 17738275SEric Cheng 17748275SEric Cheng /* ARGSUSED */ 17758275SEric Cheng static boolean_t 17768275SEric Cheng flow_ip_v4_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 17778275SEric Cheng { 17788275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 17798275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17808275SEric Cheng ipha_t *ipha = (ipha_t *)l3info->l3_start; 17818275SEric Cheng in_addr_t addr; 17828275SEric Cheng 17838275SEric Cheng addr = (l3info->l3_dst_or_src ? ipha->ipha_dst : ipha->ipha_src); 17848275SEric Cheng if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) { 17858275SEric Cheng return ((addr & V4_PART_OF_V6(fd->fd_local_netmask)) == 17868275SEric Cheng V4_PART_OF_V6(fd->fd_local_addr)); 17878275SEric Cheng } 17888275SEric Cheng return ((addr & V4_PART_OF_V6(fd->fd_remote_netmask)) == 17898275SEric Cheng V4_PART_OF_V6(fd->fd_remote_addr)); 17908275SEric Cheng } 17918275SEric Cheng 17928275SEric Cheng /* ARGSUSED */ 17938275SEric Cheng static boolean_t 17948275SEric Cheng flow_ip_v6_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 17958275SEric Cheng { 17968275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 17978275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17988275SEric Cheng ip6_t *ip6h = (ip6_t *)l3info->l3_start; 17998275SEric Cheng in6_addr_t *addrp; 18008275SEric Cheng 18018275SEric Cheng addrp = (l3info->l3_dst_or_src ? &ip6h->ip6_dst : &ip6h->ip6_src); 18028275SEric Cheng if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) { 18038275SEric Cheng return (V6_MASK_EQ(*addrp, fd->fd_local_netmask, 18048275SEric Cheng fd->fd_local_addr)); 18058275SEric Cheng } 18068275SEric Cheng return (V6_MASK_EQ(*addrp, fd->fd_remote_netmask, fd->fd_remote_addr)); 18078275SEric Cheng } 18088275SEric Cheng 18098275SEric Cheng /* ARGSUSED */ 18108275SEric Cheng static boolean_t 18118275SEric Cheng flow_ip_proto_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 18128275SEric Cheng { 18138275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18148275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 18158275SEric Cheng 18168275SEric Cheng return (l3info->l3_protocol == fd->fd_protocol); 18178275SEric Cheng } 18188275SEric Cheng 18198275SEric Cheng static uint32_t 18208275SEric Cheng flow_ip_hash(flow_tab_t *ft, flow_state_t *s) 18218275SEric Cheng { 18228275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18238275SEric Cheng flow_mask_t mask = ft->ft_mask; 18248275SEric Cheng 18258275SEric Cheng if ((mask & FLOW_IP_LOCAL) != 0) { 18268275SEric Cheng l3info->l3_dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0); 18278275SEric Cheng } else if ((mask & FLOW_IP_REMOTE) != 0) { 18288275SEric Cheng l3info->l3_dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0); 18298275SEric Cheng } else if ((mask & FLOW_IP_DSFIELD) != 0) { 18308275SEric Cheng /* 18318275SEric Cheng * DSField flents are arranged as a single list. 18328275SEric Cheng */ 18338275SEric Cheng return (0); 18348275SEric Cheng } 18358275SEric Cheng /* 18368275SEric Cheng * IP addr flents are hashed into two lists, v4 or v6. 18378275SEric Cheng */ 18388275SEric Cheng ASSERT(ft->ft_size >= 2); 18398275SEric Cheng return ((l3info->l3_version == IPV4_VERSION) ? 0 : 1); 18408275SEric Cheng } 18418275SEric Cheng 18428275SEric Cheng static uint32_t 18438275SEric Cheng flow_ip_proto_hash(flow_tab_t *ft, flow_state_t *s) 18448275SEric Cheng { 18458275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18468275SEric Cheng 18478275SEric Cheng return (l3info->l3_protocol % ft->ft_size); 18488275SEric Cheng } 18498275SEric Cheng 18508275SEric Cheng /* ARGSUSED */ 18518275SEric Cheng static int 18528275SEric Cheng flow_ip_accept(flow_tab_t *ft, flow_state_t *s) 18538275SEric Cheng { 18548275SEric Cheng flow_l2info_t *l2info = &s->fs_l2info; 18558275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18568275SEric Cheng uint16_t sap = l2info->l2_sap; 18578275SEric Cheng uchar_t *l3_start; 18588275SEric Cheng 18598833SVenu.Iyer@Sun.COM l3_start = l2info->l2_start + l2info->l2_hdrsize; 18608833SVenu.Iyer@Sun.COM 18618833SVenu.Iyer@Sun.COM /* 18628833SVenu.Iyer@Sun.COM * Adjust start pointer if we're at the end of an mblk. 18638833SVenu.Iyer@Sun.COM */ 18648833SVenu.Iyer@Sun.COM CHECK_AND_ADJUST_START_PTR(s, l3_start); 18658833SVenu.Iyer@Sun.COM 18668833SVenu.Iyer@Sun.COM l3info->l3_start = l3_start; 18678275SEric Cheng if (!OK_32PTR(l3_start)) 18688275SEric Cheng return (EINVAL); 18698275SEric Cheng 18708275SEric Cheng switch (sap) { 18718275SEric Cheng case ETHERTYPE_IP: { 18728275SEric Cheng ipha_t *ipha = (ipha_t *)l3_start; 18738275SEric Cheng 18748275SEric Cheng if (PKT_TOO_SMALL(s, l3_start + IP_SIMPLE_HDR_LENGTH)) 18758275SEric Cheng return (ENOBUFS); 18768275SEric Cheng 18778275SEric Cheng l3info->l3_hdrsize = IPH_HDR_LENGTH(ipha); 18788275SEric Cheng l3info->l3_protocol = ipha->ipha_protocol; 18798275SEric Cheng l3info->l3_version = IPV4_VERSION; 18808275SEric Cheng l3info->l3_fragmented = 18818275SEric Cheng IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags); 18828275SEric Cheng break; 18838275SEric Cheng } 18848275SEric Cheng case ETHERTYPE_IPV6: { 18858275SEric Cheng ip6_t *ip6h = (ip6_t *)l3_start; 18868275SEric Cheng uint16_t ip6_hdrlen; 18878275SEric Cheng uint8_t nexthdr; 18888275SEric Cheng 18898275SEric Cheng if (!mac_ip_hdr_length_v6(s->fs_mp, ip6h, &ip6_hdrlen, 18908275SEric Cheng &nexthdr)) { 18918275SEric Cheng return (ENOBUFS); 18928275SEric Cheng } 18938275SEric Cheng l3info->l3_hdrsize = ip6_hdrlen; 18948275SEric Cheng l3info->l3_protocol = nexthdr; 18958275SEric Cheng l3info->l3_version = IPV6_VERSION; 18968275SEric Cheng l3info->l3_fragmented = B_FALSE; 18978275SEric Cheng break; 18988275SEric Cheng } 18998275SEric Cheng default: 19008275SEric Cheng return (EINVAL); 19018275SEric Cheng } 19028275SEric Cheng return (0); 19038275SEric Cheng } 19048275SEric Cheng 19058275SEric Cheng /* ARGSUSED */ 19068275SEric Cheng static int 19078275SEric Cheng flow_ip_proto_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 19088275SEric Cheng { 19098275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19108275SEric Cheng 19118275SEric Cheng switch (fd->fd_protocol) { 19128275SEric Cheng case IPPROTO_TCP: 19138275SEric Cheng case IPPROTO_UDP: 19148275SEric Cheng case IPPROTO_SCTP: 19158275SEric Cheng case IPPROTO_ICMP: 19168275SEric Cheng case IPPROTO_ICMPV6: 19178275SEric Cheng flent->fe_match = flow_ip_proto_match; 19188275SEric Cheng return (0); 19198275SEric Cheng default: 19208275SEric Cheng return (EINVAL); 19218275SEric Cheng } 19228275SEric Cheng } 19238275SEric Cheng 19248275SEric Cheng /* ARGSUSED */ 19258275SEric Cheng static int 19268275SEric Cheng flow_ip_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 19278275SEric Cheng { 19288275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19298275SEric Cheng flow_mask_t mask; 19308275SEric Cheng uint8_t version; 19318275SEric Cheng in6_addr_t *addr, *netmask; 19328275SEric Cheng 19338275SEric Cheng /* 19348275SEric Cheng * DSField does not require a IP version. 19358275SEric Cheng */ 19368275SEric Cheng if (fd->fd_mask == FLOW_IP_DSFIELD) { 19378275SEric Cheng if (fd->fd_dsfield_mask == 0) 19388275SEric Cheng return (EINVAL); 19398275SEric Cheng 19408275SEric Cheng flent->fe_match = flow_ip_dsfield_match; 19418275SEric Cheng return (0); 19428275SEric Cheng } 19438275SEric Cheng 19448275SEric Cheng /* 19458275SEric Cheng * IP addresses must come with a version to avoid ambiguity. 19468275SEric Cheng */ 19478275SEric Cheng if ((fd->fd_mask & FLOW_IP_VERSION) == 0) 19488275SEric Cheng return (EINVAL); 19498275SEric Cheng 19508275SEric Cheng version = fd->fd_ipversion; 19518275SEric Cheng if (version != IPV4_VERSION && version != IPV6_VERSION) 19528275SEric Cheng return (EINVAL); 19538275SEric Cheng 19548275SEric Cheng mask = fd->fd_mask & ~FLOW_IP_VERSION; 19558275SEric Cheng switch (mask) { 19568275SEric Cheng case FLOW_IP_LOCAL: 19578275SEric Cheng addr = &fd->fd_local_addr; 19588275SEric Cheng netmask = &fd->fd_local_netmask; 19598275SEric Cheng break; 19608275SEric Cheng case FLOW_IP_REMOTE: 19618275SEric Cheng addr = &fd->fd_remote_addr; 19628275SEric Cheng netmask = &fd->fd_remote_netmask; 19638275SEric Cheng break; 19648275SEric Cheng default: 19658275SEric Cheng return (EINVAL); 19668275SEric Cheng } 19678275SEric Cheng 19688275SEric Cheng /* 19698275SEric Cheng * Apply netmask onto specified address. 19708275SEric Cheng */ 19718275SEric Cheng V6_MASK_COPY(*addr, *netmask, *addr); 19728275SEric Cheng if (version == IPV4_VERSION) { 19738275SEric Cheng ipaddr_t v4addr = V4_PART_OF_V6((*addr)); 19748275SEric Cheng ipaddr_t v4mask = V4_PART_OF_V6((*netmask)); 19758275SEric Cheng 19768275SEric Cheng if (v4addr == 0 || v4mask == 0) 19778275SEric Cheng return (EINVAL); 19788275SEric Cheng flent->fe_match = flow_ip_v4_match; 19798275SEric Cheng } else { 19808275SEric Cheng if (IN6_IS_ADDR_UNSPECIFIED(addr) || 19818275SEric Cheng IN6_IS_ADDR_UNSPECIFIED(netmask)) 19828275SEric Cheng return (EINVAL); 19838275SEric Cheng flent->fe_match = flow_ip_v6_match; 19848275SEric Cheng } 19858275SEric Cheng return (0); 19868275SEric Cheng } 19878275SEric Cheng 19888275SEric Cheng static uint32_t 19898275SEric Cheng flow_ip_proto_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 19908275SEric Cheng { 19918275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19928275SEric Cheng 19938275SEric Cheng return (fd->fd_protocol % ft->ft_size); 19948275SEric Cheng } 19958275SEric Cheng 19968275SEric Cheng static uint32_t 19978275SEric Cheng flow_ip_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 19988275SEric Cheng { 19998275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 20008275SEric Cheng 20018275SEric Cheng /* 20028275SEric Cheng * DSField flents are arranged as a single list. 20038275SEric Cheng */ 20048275SEric Cheng if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0) 20058275SEric Cheng return (0); 20068275SEric Cheng 20078275SEric Cheng /* 20088275SEric Cheng * IP addr flents are hashed into two lists, v4 or v6. 20098275SEric Cheng */ 20108275SEric Cheng ASSERT(ft->ft_size >= 2); 20118275SEric Cheng return ((fd->fd_ipversion == IPV4_VERSION) ? 0 : 1); 20128275SEric Cheng } 20138275SEric Cheng 20148275SEric Cheng /* ARGSUSED */ 20158275SEric Cheng static boolean_t 20168275SEric Cheng flow_ip_proto_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 20178275SEric Cheng { 20188275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 20198275SEric Cheng 20208275SEric Cheng return (fd1->fd_protocol == fd2->fd_protocol); 20218275SEric Cheng } 20228275SEric Cheng 20238275SEric Cheng /* ARGSUSED */ 20248275SEric Cheng static boolean_t 20258275SEric Cheng flow_ip_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 20268275SEric Cheng { 20278275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 20288275SEric Cheng in6_addr_t *a1, *m1, *a2, *m2; 20298275SEric Cheng 20308275SEric Cheng ASSERT(fd1->fd_mask == fd2->fd_mask); 20318275SEric Cheng if (fd1->fd_mask == FLOW_IP_DSFIELD) { 20328275SEric Cheng return (fd1->fd_dsfield == fd2->fd_dsfield && 20338275SEric Cheng fd1->fd_dsfield_mask == fd2->fd_dsfield_mask); 20348275SEric Cheng } 20358275SEric Cheng 20368275SEric Cheng /* 20378275SEric Cheng * flow_ip_accept_fe() already validated the version. 20388275SEric Cheng */ 20398275SEric Cheng ASSERT((fd1->fd_mask & FLOW_IP_VERSION) != 0); 20408275SEric Cheng if (fd1->fd_ipversion != fd2->fd_ipversion) 20418275SEric Cheng return (B_FALSE); 20428275SEric Cheng 20438275SEric Cheng switch (fd1->fd_mask & ~FLOW_IP_VERSION) { 20448275SEric Cheng case FLOW_IP_LOCAL: 20458275SEric Cheng a1 = &fd1->fd_local_addr; 20468275SEric Cheng m1 = &fd1->fd_local_netmask; 20478275SEric Cheng a2 = &fd2->fd_local_addr; 20488275SEric Cheng m2 = &fd2->fd_local_netmask; 20498275SEric Cheng break; 20508275SEric Cheng case FLOW_IP_REMOTE: 20518275SEric Cheng a1 = &fd1->fd_remote_addr; 20528275SEric Cheng m1 = &fd1->fd_remote_netmask; 20538275SEric Cheng a2 = &fd2->fd_remote_addr; 20548275SEric Cheng m2 = &fd2->fd_remote_netmask; 20558275SEric Cheng break; 20568275SEric Cheng default: 20578275SEric Cheng /* 20588275SEric Cheng * This is unreachable given the checks in 20598275SEric Cheng * flow_ip_accept_fe(). 20608275SEric Cheng */ 20618275SEric Cheng return (B_FALSE); 20628275SEric Cheng } 20638275SEric Cheng 20648275SEric Cheng if (fd1->fd_ipversion == IPV4_VERSION) { 20658275SEric Cheng return (V4_PART_OF_V6((*a1)) == V4_PART_OF_V6((*a2)) && 20668275SEric Cheng V4_PART_OF_V6((*m1)) == V4_PART_OF_V6((*m2))); 20678275SEric Cheng 20688275SEric Cheng } else { 20698275SEric Cheng return (IN6_ARE_ADDR_EQUAL(a1, a2) && 20708275SEric Cheng IN6_ARE_ADDR_EQUAL(m1, m2)); 20718275SEric Cheng } 20728275SEric Cheng } 20738275SEric Cheng 20748275SEric Cheng static int 20758275SEric Cheng flow_ip_mask2plen(in6_addr_t *v6mask) 20768275SEric Cheng { 20778275SEric Cheng int bits; 20788275SEric Cheng int plen = IPV6_ABITS; 20798275SEric Cheng int i; 20808275SEric Cheng 20818275SEric Cheng for (i = 3; i >= 0; i--) { 20828275SEric Cheng if (v6mask->s6_addr32[i] == 0) { 20838275SEric Cheng plen -= 32; 20848275SEric Cheng continue; 20858275SEric Cheng } 20868275SEric Cheng bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 20878275SEric Cheng if (bits == 0) 20888275SEric Cheng break; 20898275SEric Cheng plen -= bits; 20908275SEric Cheng } 20918275SEric Cheng return (plen); 20928275SEric Cheng } 20938275SEric Cheng 20948275SEric Cheng /* ARGSUSED */ 20958275SEric Cheng static int 20968275SEric Cheng flow_ip_insert_fe(flow_tab_t *ft, flow_entry_t **headp, 20978275SEric Cheng flow_entry_t *flent) 20988275SEric Cheng { 20998275SEric Cheng flow_entry_t **p = headp; 21008275SEric Cheng flow_desc_t *fd0, *fd; 21018275SEric Cheng in6_addr_t *m0, *m; 21028275SEric Cheng int plen0, plen; 21038275SEric Cheng 21048275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 21058275SEric Cheng 21068275SEric Cheng /* 21078275SEric Cheng * No special ordering needed for dsfield. 21088275SEric Cheng */ 21098275SEric Cheng fd0 = &flent->fe_flow_desc; 21108275SEric Cheng if ((fd0->fd_mask & FLOW_IP_DSFIELD) != 0) { 21118275SEric Cheng if (*p != NULL) { 21128275SEric Cheng ASSERT(flent->fe_next == NULL); 21138275SEric Cheng flent->fe_next = *p; 21148275SEric Cheng } 21158275SEric Cheng *p = flent; 21168275SEric Cheng return (0); 21178275SEric Cheng } 21188275SEric Cheng 21198275SEric Cheng /* 21208275SEric Cheng * IP address flows are arranged in descending prefix length order. 21218275SEric Cheng */ 21228275SEric Cheng m0 = ((fd0->fd_mask & FLOW_IP_LOCAL) != 0) ? 21238275SEric Cheng &fd0->fd_local_netmask : &fd0->fd_remote_netmask; 21248275SEric Cheng plen0 = flow_ip_mask2plen(m0); 21258275SEric Cheng ASSERT(plen0 != 0); 21268275SEric Cheng 21278275SEric Cheng for (; *p != NULL; p = &(*p)->fe_next) { 21288275SEric Cheng fd = &(*p)->fe_flow_desc; 21298275SEric Cheng 21308275SEric Cheng /* 21318275SEric Cheng * Normally a dsfield flent shouldn't end up on the same 21328275SEric Cheng * list as an IP address because flow tables are (for now) 21338275SEric Cheng * disjoint. If we decide to support both IP and dsfield 21348275SEric Cheng * in the same table in the future, this check will allow 21358275SEric Cheng * for that. 21368275SEric Cheng */ 21378275SEric Cheng if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0) 21388275SEric Cheng continue; 21398275SEric Cheng 21408275SEric Cheng /* 21418275SEric Cheng * We also allow for the mixing of local and remote address 21428275SEric Cheng * flents within one list. 21438275SEric Cheng */ 21448275SEric Cheng m = ((fd->fd_mask & FLOW_IP_LOCAL) != 0) ? 21458275SEric Cheng &fd->fd_local_netmask : &fd->fd_remote_netmask; 21468275SEric Cheng plen = flow_ip_mask2plen(m); 21478275SEric Cheng 21488275SEric Cheng if (plen <= plen0) 21498275SEric Cheng break; 21508275SEric Cheng } 21518275SEric Cheng if (*p != NULL) { 21528275SEric Cheng ASSERT(flent->fe_next == NULL); 21538275SEric Cheng flent->fe_next = *p; 21548275SEric Cheng } 21558275SEric Cheng *p = flent; 21568275SEric Cheng return (0); 21578275SEric Cheng } 21588275SEric Cheng 21598275SEric Cheng /* 21608275SEric Cheng * Transport layer protocol and port matching functions. 21618275SEric Cheng */ 21628275SEric Cheng 21638275SEric Cheng /* ARGSUSED */ 21648275SEric Cheng static boolean_t 21658275SEric Cheng flow_transport_lport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 21668275SEric Cheng { 21678275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 21688275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 21698275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 21708275SEric Cheng 21718275SEric Cheng return (fd->fd_protocol == l3info->l3_protocol && 21728275SEric Cheng fd->fd_local_port == l4info->l4_hash_port); 21738275SEric Cheng } 21748275SEric Cheng 21758275SEric Cheng /* ARGSUSED */ 21768275SEric Cheng static boolean_t 21778275SEric Cheng flow_transport_rport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 21788275SEric Cheng { 21798275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 21808275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 21818275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 21828275SEric Cheng 21838275SEric Cheng return (fd->fd_protocol == l3info->l3_protocol && 21848275SEric Cheng fd->fd_remote_port == l4info->l4_hash_port); 21858275SEric Cheng } 21868275SEric Cheng 21878275SEric Cheng /* 21888275SEric Cheng * Transport hash function. 21898275SEric Cheng * Since we only support either local or remote port flows, 21908275SEric Cheng * we only need to extract one of the ports to be used for 21918275SEric Cheng * matching. 21928275SEric Cheng */ 21938275SEric Cheng static uint32_t 21948275SEric Cheng flow_transport_hash(flow_tab_t *ft, flow_state_t *s) 21958275SEric Cheng { 21968275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 21978275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 21988275SEric Cheng uint8_t proto = l3info->l3_protocol; 21998275SEric Cheng boolean_t dst_or_src; 22008275SEric Cheng 22018275SEric Cheng if ((ft->ft_mask & FLOW_ULP_PORT_LOCAL) != 0) { 22028275SEric Cheng dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0); 22038275SEric Cheng } else { 22048275SEric Cheng dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0); 22058275SEric Cheng } 22068275SEric Cheng 22078275SEric Cheng l4info->l4_hash_port = dst_or_src ? l4info->l4_dst_port : 22088275SEric Cheng l4info->l4_src_port; 22098275SEric Cheng 22108275SEric Cheng return ((l4info->l4_hash_port ^ (proto << 4)) % ft->ft_size); 22118275SEric Cheng } 22128275SEric Cheng 22138275SEric Cheng /* 22148275SEric Cheng * Unlike other accept() functions above, we do not need to get the header 22158275SEric Cheng * size because this is our highest layer so far. If we want to do support 22168275SEric Cheng * other higher layer protocols, we would need to save the l4_hdrsize 22178275SEric Cheng * in the code below. 22188275SEric Cheng */ 22198275SEric Cheng 22208275SEric Cheng /* ARGSUSED */ 22218275SEric Cheng static int 22228275SEric Cheng flow_transport_accept(flow_tab_t *ft, flow_state_t *s) 22238275SEric Cheng { 22248275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 22258275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 22268275SEric Cheng uint8_t proto = l3info->l3_protocol; 22278275SEric Cheng uchar_t *l4_start; 22288275SEric Cheng 22298833SVenu.Iyer@Sun.COM l4_start = l3info->l3_start + l3info->l3_hdrsize; 22308833SVenu.Iyer@Sun.COM 22318833SVenu.Iyer@Sun.COM /* 22328833SVenu.Iyer@Sun.COM * Adjust start pointer if we're at the end of an mblk. 22338833SVenu.Iyer@Sun.COM */ 22348833SVenu.Iyer@Sun.COM CHECK_AND_ADJUST_START_PTR(s, l4_start); 22358833SVenu.Iyer@Sun.COM 22368833SVenu.Iyer@Sun.COM l4info->l4_start = l4_start; 22378275SEric Cheng if (!OK_32PTR(l4_start)) 22388275SEric Cheng return (EINVAL); 22398275SEric Cheng 22408275SEric Cheng if (l3info->l3_fragmented == B_TRUE) 22418275SEric Cheng return (EINVAL); 22428275SEric Cheng 22438275SEric Cheng switch (proto) { 22448275SEric Cheng case IPPROTO_TCP: { 22458275SEric Cheng struct tcphdr *tcph = (struct tcphdr *)l4_start; 22468275SEric Cheng 22478275SEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*tcph))) 22488275SEric Cheng return (ENOBUFS); 22498275SEric Cheng 22508275SEric Cheng l4info->l4_src_port = tcph->th_sport; 22518275SEric Cheng l4info->l4_dst_port = tcph->th_dport; 22528275SEric Cheng break; 22538275SEric Cheng } 22548275SEric Cheng case IPPROTO_UDP: { 22558275SEric Cheng struct udphdr *udph = (struct udphdr *)l4_start; 22568275SEric Cheng 22578275SEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*udph))) 22588275SEric Cheng return (ENOBUFS); 22598275SEric Cheng 22608275SEric Cheng l4info->l4_src_port = udph->uh_sport; 22618275SEric Cheng l4info->l4_dst_port = udph->uh_dport; 22628275SEric Cheng break; 22638275SEric Cheng } 22648275SEric Cheng case IPPROTO_SCTP: { 22658275SEric Cheng sctp_hdr_t *sctph = (sctp_hdr_t *)l4_start; 22668275SEric Cheng 22678275SEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*sctph))) 22688275SEric Cheng return (ENOBUFS); 22698275SEric Cheng 22708275SEric Cheng l4info->l4_src_port = sctph->sh_sport; 22718275SEric Cheng l4info->l4_dst_port = sctph->sh_dport; 22728275SEric Cheng break; 22738275SEric Cheng } 22748275SEric Cheng default: 22758275SEric Cheng return (EINVAL); 22768275SEric Cheng } 22778275SEric Cheng 22788275SEric Cheng return (0); 22798275SEric Cheng } 22808275SEric Cheng 22818275SEric Cheng /* 22828275SEric Cheng * Validates transport flow entry. 22838275SEric Cheng * The protocol field must be present. 22848275SEric Cheng */ 22858275SEric Cheng 22868275SEric Cheng /* ARGSUSED */ 22878275SEric Cheng static int 22888275SEric Cheng flow_transport_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 22898275SEric Cheng { 22908275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 22918275SEric Cheng flow_mask_t mask = fd->fd_mask; 22928275SEric Cheng 22938275SEric Cheng if ((mask & FLOW_IP_PROTOCOL) == 0) 22948275SEric Cheng return (EINVAL); 22958275SEric Cheng 22968275SEric Cheng switch (fd->fd_protocol) { 22978275SEric Cheng case IPPROTO_TCP: 22988275SEric Cheng case IPPROTO_UDP: 22998275SEric Cheng case IPPROTO_SCTP: 23008275SEric Cheng break; 23018275SEric Cheng default: 23028275SEric Cheng return (EINVAL); 23038275SEric Cheng } 23048275SEric Cheng 23058275SEric Cheng switch (mask & ~FLOW_IP_PROTOCOL) { 23068275SEric Cheng case FLOW_ULP_PORT_LOCAL: 23078275SEric Cheng if (fd->fd_local_port == 0) 23088275SEric Cheng return (EINVAL); 23098275SEric Cheng 23108275SEric Cheng flent->fe_match = flow_transport_lport_match; 23118275SEric Cheng break; 23128275SEric Cheng case FLOW_ULP_PORT_REMOTE: 23138275SEric Cheng if (fd->fd_remote_port == 0) 23148275SEric Cheng return (EINVAL); 23158275SEric Cheng 23168275SEric Cheng flent->fe_match = flow_transport_rport_match; 23178275SEric Cheng break; 23188275SEric Cheng case 0: 23198275SEric Cheng /* 23208275SEric Cheng * transport-only flows conflicts with our table type. 23218275SEric Cheng */ 23228275SEric Cheng return (EOPNOTSUPP); 23238275SEric Cheng default: 23248275SEric Cheng return (EINVAL); 23258275SEric Cheng } 23268275SEric Cheng 23278275SEric Cheng return (0); 23288275SEric Cheng } 23298275SEric Cheng 23308275SEric Cheng static uint32_t 23318275SEric Cheng flow_transport_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 23328275SEric Cheng { 23338275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 23348275SEric Cheng uint16_t port = 0; 23358275SEric Cheng 23368275SEric Cheng port = ((fd->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) ? 23378275SEric Cheng fd->fd_local_port : fd->fd_remote_port; 23388275SEric Cheng 23398275SEric Cheng return ((port ^ (fd->fd_protocol << 4)) % ft->ft_size); 23408275SEric Cheng } 23418275SEric Cheng 23428275SEric Cheng /* ARGSUSED */ 23438275SEric Cheng static boolean_t 23448275SEric Cheng flow_transport_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 23458275SEric Cheng { 23468275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 23478275SEric Cheng 23488275SEric Cheng if (fd1->fd_protocol != fd2->fd_protocol) 23498275SEric Cheng return (B_FALSE); 23508275SEric Cheng 23518275SEric Cheng if ((fd1->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) 23528275SEric Cheng return (fd1->fd_local_port == fd2->fd_local_port); 23538275SEric Cheng 23548275SEric Cheng return (fd1->fd_remote_port == fd2->fd_remote_port); 23558275SEric Cheng } 23568275SEric Cheng 23578275SEric Cheng static flow_ops_t flow_l2_ops = { 23588275SEric Cheng flow_l2_accept_fe, 23598275SEric Cheng flow_l2_hash_fe, 23608275SEric Cheng flow_l2_match_fe, 23618275SEric Cheng flow_generic_insert_fe, 23628275SEric Cheng flow_l2_hash, 23638275SEric Cheng {flow_l2_accept} 23648275SEric Cheng }; 23658275SEric Cheng 23668275SEric Cheng static flow_ops_t flow_ip_ops = { 23678275SEric Cheng flow_ip_accept_fe, 23688275SEric Cheng flow_ip_hash_fe, 23698275SEric Cheng flow_ip_match_fe, 23708275SEric Cheng flow_ip_insert_fe, 23718275SEric Cheng flow_ip_hash, 23728275SEric Cheng {flow_l2_accept, flow_ip_accept} 23738275SEric Cheng }; 23748275SEric Cheng 23758275SEric Cheng static flow_ops_t flow_ip_proto_ops = { 23768275SEric Cheng flow_ip_proto_accept_fe, 23778275SEric Cheng flow_ip_proto_hash_fe, 23788275SEric Cheng flow_ip_proto_match_fe, 23798275SEric Cheng flow_generic_insert_fe, 23808275SEric Cheng flow_ip_proto_hash, 23818275SEric Cheng {flow_l2_accept, flow_ip_accept} 23828275SEric Cheng }; 23838275SEric Cheng 23848275SEric Cheng static flow_ops_t flow_transport_ops = { 23858275SEric Cheng flow_transport_accept_fe, 23868275SEric Cheng flow_transport_hash_fe, 23878275SEric Cheng flow_transport_match_fe, 23888275SEric Cheng flow_generic_insert_fe, 23898275SEric Cheng flow_transport_hash, 23908275SEric Cheng {flow_l2_accept, flow_ip_accept, flow_transport_accept} 23918275SEric Cheng }; 23928275SEric Cheng 23938275SEric Cheng static flow_tab_info_t flow_tab_info_list[] = { 23948275SEric Cheng {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_LOCAL, 2}, 23958275SEric Cheng {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_REMOTE, 2}, 23968275SEric Cheng {&flow_ip_ops, FLOW_IP_DSFIELD, 1}, 23978275SEric Cheng {&flow_ip_proto_ops, FLOW_IP_PROTOCOL, 256}, 23988275SEric Cheng {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_LOCAL, 1024} 23998275SEric Cheng }; 24008275SEric Cheng 24018275SEric Cheng #define FLOW_MAX_TAB_INFO \ 24028275SEric Cheng ((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t)) 24038275SEric Cheng 24048275SEric Cheng static flow_tab_info_t * 24058275SEric Cheng mac_flow_tab_info_get(flow_mask_t mask) 24068275SEric Cheng { 24078275SEric Cheng int i; 24088275SEric Cheng 24098275SEric Cheng for (i = 0; i < FLOW_MAX_TAB_INFO; i++) { 24108275SEric Cheng if (mask == flow_tab_info_list[i].fti_mask) 24118275SEric Cheng return (&flow_tab_info_list[i]); 24128275SEric Cheng } 24138275SEric Cheng return (NULL); 24148275SEric Cheng } 2415