18275SEric Cheng /* 28275SEric Cheng * CDDL HEADER START 38275SEric Cheng * 48275SEric Cheng * The contents of this file are subject to the terms of the 58275SEric Cheng * Common Development and Distribution License (the "License"). 68275SEric Cheng * You may not use this file except in compliance with the License. 78275SEric Cheng * 88275SEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 98275SEric Cheng * or http://www.opensolaris.org/os/licensing. 108275SEric Cheng * See the License for the specific language governing permissions 118275SEric Cheng * and limitations under the License. 128275SEric Cheng * 138275SEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 148275SEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 158275SEric Cheng * If applicable, add the following below this CDDL HEADER, with the 168275SEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying 178275SEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner] 188275SEric Cheng * 198275SEric Cheng * CDDL HEADER END 208275SEric Cheng */ 218275SEric Cheng 228275SEric Cheng /* 23*11528SBaban.Kenkre@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 248275SEric Cheng * Use is subject to license terms. 258275SEric Cheng */ 268275SEric Cheng 278275SEric Cheng #include <sys/strsun.h> 288275SEric Cheng #include <sys/sdt.h> 298275SEric Cheng #include <sys/mac.h> 308275SEric Cheng #include <sys/mac_impl.h> 318275SEric Cheng #include <sys/mac_client_impl.h> 328275SEric Cheng #include <sys/dls.h> 338275SEric Cheng #include <sys/dls_impl.h> 348275SEric Cheng #include <sys/mac_soft_ring.h> 358275SEric Cheng #include <sys/ethernet.h> 368275SEric Cheng #include <sys/vlan.h> 378275SEric Cheng #include <inet/ip.h> 388275SEric Cheng #include <inet/ip6.h> 398275SEric Cheng #include <netinet/tcp.h> 408275SEric Cheng #include <netinet/udp.h> 418275SEric Cheng #include <netinet/sctp.h> 428275SEric Cheng 438275SEric Cheng /* global flow table, will be a per exclusive-zone table later */ 448275SEric Cheng static mod_hash_t *flow_hash; 458275SEric Cheng static krwlock_t flow_tab_lock; 468275SEric Cheng 478275SEric Cheng static kmem_cache_t *flow_cache; 488275SEric Cheng static kmem_cache_t *flow_tab_cache; 498275SEric Cheng static flow_ops_t flow_l2_ops; 508275SEric Cheng 518275SEric Cheng typedef struct { 528275SEric Cheng const char *fs_name; 538275SEric Cheng uint_t fs_offset; 548275SEric Cheng } flow_stats_info_t; 558275SEric Cheng 568275SEric Cheng #define FS_OFF(f) (offsetof(flow_stats_t, f)) 578275SEric Cheng static flow_stats_info_t flow_stats_list[] = { 588275SEric Cheng {"rbytes", FS_OFF(fs_rbytes)}, 598275SEric Cheng {"ipackets", FS_OFF(fs_ipackets)}, 608275SEric Cheng {"ierrors", FS_OFF(fs_ierrors)}, 618275SEric Cheng {"obytes", FS_OFF(fs_obytes)}, 628275SEric Cheng {"opackets", FS_OFF(fs_opackets)}, 638275SEric Cheng {"oerrors", FS_OFF(fs_oerrors)} 648275SEric Cheng }; 658275SEric Cheng #define FS_SIZE (sizeof (flow_stats_list) / sizeof (flow_stats_info_t)) 668275SEric Cheng 678275SEric Cheng /* 688275SEric Cheng * Checks whether a flow mask is legal. 698275SEric Cheng */ 708275SEric Cheng static flow_tab_info_t *mac_flow_tab_info_get(flow_mask_t); 718275SEric Cheng 728275SEric Cheng static void 738275SEric Cheng flow_stat_init(kstat_named_t *knp) 748275SEric Cheng { 758275SEric Cheng int i; 768275SEric Cheng 778275SEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) { 788275SEric Cheng kstat_named_init(knp, flow_stats_list[i].fs_name, 798275SEric Cheng KSTAT_DATA_UINT64); 808275SEric Cheng } 818275SEric Cheng } 828275SEric Cheng 838275SEric Cheng static int 848275SEric Cheng flow_stat_update(kstat_t *ksp, int rw) 858275SEric Cheng { 8610616SSebastien.Roy@Sun.COM flow_entry_t *fep = ksp->ks_private; 8710616SSebastien.Roy@Sun.COM flow_stats_t *fsp = &fep->fe_flowstats; 8810616SSebastien.Roy@Sun.COM kstat_named_t *knp = ksp->ks_data; 8910616SSebastien.Roy@Sun.COM uint64_t *statp; 9010616SSebastien.Roy@Sun.COM int i; 918275SEric Cheng 928275SEric Cheng if (rw != KSTAT_READ) 938275SEric Cheng return (EACCES); 948275SEric Cheng 958275SEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) { 968275SEric Cheng statp = (uint64_t *) 978275SEric Cheng ((uchar_t *)fsp + flow_stats_list[i].fs_offset); 988275SEric Cheng 998275SEric Cheng knp->value.ui64 = *statp; 1008275SEric Cheng } 1018275SEric Cheng return (0); 1028275SEric Cheng } 1038275SEric Cheng 1048275SEric Cheng static void 1058275SEric Cheng flow_stat_create(flow_entry_t *fep) 1068275SEric Cheng { 1078275SEric Cheng kstat_t *ksp; 1088275SEric Cheng kstat_named_t *knp; 1098275SEric Cheng uint_t nstats = FS_SIZE; 1108275SEric Cheng 11110616SSebastien.Roy@Sun.COM /* 11210616SSebastien.Roy@Sun.COM * Fow now, flow entries are only manipulated and visible from the 11310616SSebastien.Roy@Sun.COM * global zone. 11410616SSebastien.Roy@Sun.COM */ 11510616SSebastien.Roy@Sun.COM ksp = kstat_create_zone("unix", 0, (char *)fep->fe_flow_name, "flow", 11610616SSebastien.Roy@Sun.COM KSTAT_TYPE_NAMED, nstats, 0, GLOBAL_ZONEID); 1178275SEric Cheng if (ksp == NULL) 1188275SEric Cheng return; 1198275SEric Cheng 1208275SEric Cheng ksp->ks_update = flow_stat_update; 1218275SEric Cheng ksp->ks_private = fep; 1228275SEric Cheng fep->fe_ksp = ksp; 1238275SEric Cheng 1248275SEric Cheng knp = (kstat_named_t *)ksp->ks_data; 1258275SEric Cheng flow_stat_init(knp); 1268275SEric Cheng kstat_install(ksp); 1278275SEric Cheng } 1288275SEric Cheng 1298275SEric Cheng void 1308275SEric Cheng flow_stat_destroy(flow_entry_t *fep) 1318275SEric Cheng { 1328275SEric Cheng if (fep->fe_ksp != NULL) { 1338275SEric Cheng kstat_delete(fep->fe_ksp); 1348275SEric Cheng fep->fe_ksp = NULL; 1358275SEric Cheng } 1368275SEric Cheng } 1378275SEric Cheng 1388275SEric Cheng /* 1398275SEric Cheng * Initialize the flow table 1408275SEric Cheng */ 1418275SEric Cheng void 1428275SEric Cheng mac_flow_init() 1438275SEric Cheng { 1448275SEric Cheng flow_cache = kmem_cache_create("flow_entry_cache", 1458275SEric Cheng sizeof (flow_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1468275SEric Cheng flow_tab_cache = kmem_cache_create("flow_tab_cache", 1478275SEric Cheng sizeof (flow_tab_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1488275SEric Cheng flow_hash = mod_hash_create_extended("flow_hash", 1498275SEric Cheng 100, mod_hash_null_keydtor, mod_hash_null_valdtor, 1508275SEric Cheng mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 1518275SEric Cheng rw_init(&flow_tab_lock, NULL, RW_DEFAULT, NULL); 1528275SEric Cheng } 1538275SEric Cheng 1548275SEric Cheng /* 1558275SEric Cheng * Cleanup and release the flow table 1568275SEric Cheng */ 1578275SEric Cheng void 1588275SEric Cheng mac_flow_fini() 1598275SEric Cheng { 1608275SEric Cheng kmem_cache_destroy(flow_cache); 1618275SEric Cheng kmem_cache_destroy(flow_tab_cache); 1628275SEric Cheng mod_hash_destroy_hash(flow_hash); 1638275SEric Cheng rw_destroy(&flow_tab_lock); 1648275SEric Cheng } 1658275SEric Cheng 1668275SEric Cheng /* 1678275SEric Cheng * mac_create_flow(): create a flow_entry_t. 1688275SEric Cheng */ 1698275SEric Cheng int 1708275SEric Cheng mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, 1718275SEric Cheng void *client_cookie, uint_t type, flow_entry_t **flentp) 1728275SEric Cheng { 1738275SEric Cheng flow_entry_t *flent = *flentp; 1748275SEric Cheng int err = 0; 1758275SEric Cheng 1768275SEric Cheng if (mrp != NULL) { 1778275SEric Cheng err = mac_validate_props(mrp); 1788275SEric Cheng if (err != 0) 1798275SEric Cheng return (err); 1808275SEric Cheng } 1818275SEric Cheng 1828275SEric Cheng if (flent == NULL) { 1838275SEric Cheng flent = kmem_cache_alloc(flow_cache, KM_SLEEP); 1848275SEric Cheng bzero(flent, sizeof (*flent)); 1858275SEric Cheng mutex_init(&flent->fe_lock, NULL, MUTEX_DEFAULT, NULL); 1868275SEric Cheng cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL); 1878275SEric Cheng 1888275SEric Cheng /* Initialize the receiver function to a safe routine */ 1898275SEric Cheng flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; 1908275SEric Cheng flent->fe_index = -1; 1918275SEric Cheng } 1928558SGirish.Moodalbail@Sun.COM (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 1938275SEric Cheng 1948275SEric Cheng /* This is an initial flow, will be configured later */ 1958275SEric Cheng if (fd == NULL) { 1968275SEric Cheng *flentp = flent; 1978275SEric Cheng return (0); 1988275SEric Cheng } 1998275SEric Cheng 2008275SEric Cheng flent->fe_client_cookie = client_cookie; 2018275SEric Cheng flent->fe_type = type; 2028275SEric Cheng 2038275SEric Cheng /* Save flow desc */ 2048275SEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 2058275SEric Cheng 2068275SEric Cheng if (mrp != NULL) { 2078275SEric Cheng /* 2088275SEric Cheng * We have already set fe_resource_props for a Link. 2098275SEric Cheng */ 2108275SEric Cheng if (type & FLOW_USER) { 2118275SEric Cheng bcopy(mrp, &flent->fe_resource_props, 2128275SEric Cheng sizeof (mac_resource_props_t)); 2138275SEric Cheng } 2148275SEric Cheng /* 2158275SEric Cheng * The effective resource list should reflect the priority 2168275SEric Cheng * that we set implicitly. 2178275SEric Cheng */ 2188275SEric Cheng if (!(mrp->mrp_mask & MRP_PRIORITY)) 2198275SEric Cheng mrp->mrp_mask |= MRP_PRIORITY; 2208275SEric Cheng if (type & FLOW_USER) 2218275SEric Cheng mrp->mrp_priority = MPL_SUBFLOW_DEFAULT; 2228275SEric Cheng else 2238275SEric Cheng mrp->mrp_priority = MPL_LINK_DEFAULT; 2248275SEric Cheng bcopy(mrp, &flent->fe_effective_props, 2258275SEric Cheng sizeof (mac_resource_props_t)); 2268275SEric Cheng } 2278275SEric Cheng flow_stat_create(flent); 2288275SEric Cheng 2298275SEric Cheng *flentp = flent; 2308275SEric Cheng return (0); 2318275SEric Cheng } 2328275SEric Cheng 2338275SEric Cheng /* 2348275SEric Cheng * Validate flow entry and add it to a flow table. 2358275SEric Cheng */ 2368275SEric Cheng int 2378275SEric Cheng mac_flow_add(flow_tab_t *ft, flow_entry_t *flent) 2388275SEric Cheng { 2398275SEric Cheng flow_entry_t **headp, **p; 2408275SEric Cheng flow_ops_t *ops = &ft->ft_ops; 2418275SEric Cheng flow_mask_t mask; 2428275SEric Cheng uint32_t index; 2438275SEric Cheng int err; 2448275SEric Cheng 2458275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 2468275SEric Cheng 2478275SEric Cheng /* 2488275SEric Cheng * Check for invalid bits in mask. 2498275SEric Cheng */ 2508275SEric Cheng mask = flent->fe_flow_desc.fd_mask; 2518275SEric Cheng if ((mask & ft->ft_mask) == 0 || (mask & ~ft->ft_mask) != 0) 2528275SEric Cheng return (EOPNOTSUPP); 2538275SEric Cheng 2548275SEric Cheng /* 2558275SEric Cheng * Validate flent. 2568275SEric Cheng */ 2578275SEric Cheng if ((err = ops->fo_accept_fe(ft, flent)) != 0) { 2588275SEric Cheng DTRACE_PROBE3(accept_failed, flow_tab_t *, ft, 2598275SEric Cheng flow_entry_t *, flent, int, err); 2608275SEric Cheng return (err); 2618275SEric Cheng } 2628275SEric Cheng 2638275SEric Cheng /* 2648275SEric Cheng * Flent is valid. now calculate hash and insert it 2658275SEric Cheng * into hash table. 2668275SEric Cheng */ 2678275SEric Cheng index = ops->fo_hash_fe(ft, flent); 2688275SEric Cheng 2698275SEric Cheng /* 2708275SEric Cheng * We do not need a lock up until now because we were 2718275SEric Cheng * not accessing the flow table. 2728275SEric Cheng */ 2738275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 2748275SEric Cheng headp = &ft->ft_table[index]; 2758275SEric Cheng 2768275SEric Cheng /* 2778275SEric Cheng * Check for duplicate flow. 2788275SEric Cheng */ 2798275SEric Cheng for (p = headp; *p != NULL; p = &(*p)->fe_next) { 2808275SEric Cheng if ((*p)->fe_flow_desc.fd_mask != 2818275SEric Cheng flent->fe_flow_desc.fd_mask) 2828275SEric Cheng continue; 2838275SEric Cheng 2848275SEric Cheng if (ft->ft_ops.fo_match_fe(ft, *p, flent)) { 2858275SEric Cheng rw_exit(&ft->ft_lock); 2868275SEric Cheng DTRACE_PROBE3(dup_flow, flow_tab_t *, ft, 2878275SEric Cheng flow_entry_t *, flent, int, err); 2888275SEric Cheng return (EALREADY); 2898275SEric Cheng } 2908275SEric Cheng } 2918275SEric Cheng 2928275SEric Cheng /* 2938275SEric Cheng * Insert flow to hash list. 2948275SEric Cheng */ 2958275SEric Cheng err = ops->fo_insert_fe(ft, headp, flent); 2968275SEric Cheng if (err != 0) { 2978275SEric Cheng rw_exit(&ft->ft_lock); 2988275SEric Cheng DTRACE_PROBE3(insert_failed, flow_tab_t *, ft, 2998275SEric Cheng flow_entry_t *, flent, int, err); 3008275SEric Cheng return (err); 3018275SEric Cheng } 3028275SEric Cheng 3038275SEric Cheng /* 3048275SEric Cheng * Save the hash index so it can be used by mac_flow_remove(). 3058275SEric Cheng */ 3068275SEric Cheng flent->fe_index = (int)index; 3078275SEric Cheng 3088275SEric Cheng /* 3098275SEric Cheng * Save the flow tab back reference. 3108275SEric Cheng */ 3118275SEric Cheng flent->fe_flow_tab = ft; 3128275SEric Cheng FLOW_MARK(flent, FE_FLOW_TAB); 3138275SEric Cheng ft->ft_flow_count++; 3148275SEric Cheng rw_exit(&ft->ft_lock); 3158275SEric Cheng return (0); 3168275SEric Cheng } 3178275SEric Cheng 3188275SEric Cheng /* 3198275SEric Cheng * Remove a flow from a mac client's subflow table 3208275SEric Cheng */ 3218275SEric Cheng void 3228275SEric Cheng mac_flow_rem_subflow(flow_entry_t *flent) 3238275SEric Cheng { 3248275SEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 3258275SEric Cheng mac_client_impl_t *mcip = ft->ft_mcip; 3269073SCathy.Zhou@Sun.COM mac_handle_t mh = (mac_handle_t)ft->ft_mip; 3278275SEric Cheng 3289073SCathy.Zhou@Sun.COM ASSERT(MAC_PERIM_HELD(mh)); 3298275SEric Cheng 3308275SEric Cheng mac_flow_remove(ft, flent, B_FALSE); 3318275SEric Cheng if (flent->fe_mcip == NULL) { 3328275SEric Cheng /* 3338275SEric Cheng * The interface is not yet plumbed and mac_client_flow_add 3348275SEric Cheng * was not done. 3358275SEric Cheng */ 3368275SEric Cheng if (FLOW_TAB_EMPTY(ft)) { 3378275SEric Cheng mac_flow_tab_destroy(ft); 3388275SEric Cheng mcip->mci_subflow_tab = NULL; 3398275SEric Cheng } 3409073SCathy.Zhou@Sun.COM } else { 3419073SCathy.Zhou@Sun.COM mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 3429073SCathy.Zhou@Sun.COM mac_link_flow_clean((mac_client_handle_t)mcip, flent); 3438275SEric Cheng } 3449073SCathy.Zhou@Sun.COM mac_fastpath_enable(mh); 3458275SEric Cheng } 3468275SEric Cheng 3478275SEric Cheng /* 3488275SEric Cheng * Add a flow to a mac client's subflow table and instantiate the flow 3498275SEric Cheng * in the mac by creating the associated SRSs etc. 3508275SEric Cheng */ 3518275SEric Cheng int 3528275SEric Cheng mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent, 3538275SEric Cheng boolean_t instantiate_flow) 3548275SEric Cheng { 3558275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 3569073SCathy.Zhou@Sun.COM mac_handle_t mh = (mac_handle_t)mcip->mci_mip; 3578275SEric Cheng flow_tab_info_t *ftinfo; 3588275SEric Cheng flow_mask_t mask; 3598275SEric Cheng flow_tab_t *ft; 3608275SEric Cheng int err; 3618275SEric Cheng boolean_t ft_created = B_FALSE; 3628275SEric Cheng 3639073SCathy.Zhou@Sun.COM ASSERT(MAC_PERIM_HELD(mh)); 3649073SCathy.Zhou@Sun.COM 3659073SCathy.Zhou@Sun.COM if ((err = mac_fastpath_disable(mh)) != 0) 3669073SCathy.Zhou@Sun.COM return (err); 3678275SEric Cheng 3688275SEric Cheng /* 3698275SEric Cheng * If the subflow table exists already just add the new subflow 3708275SEric Cheng * to the existing table, else we create a new subflow table below. 3718275SEric Cheng */ 3728275SEric Cheng ft = mcip->mci_subflow_tab; 3738275SEric Cheng if (ft == NULL) { 3748275SEric Cheng mask = flent->fe_flow_desc.fd_mask; 3758275SEric Cheng /* 3768275SEric Cheng * Try to create a new table and then add the subflow to the 3778275SEric Cheng * newly created subflow table 3788275SEric Cheng */ 3799073SCathy.Zhou@Sun.COM if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) { 3809073SCathy.Zhou@Sun.COM mac_fastpath_enable(mh); 3818275SEric Cheng return (EOPNOTSUPP); 3829073SCathy.Zhou@Sun.COM } 3838275SEric Cheng 3848275SEric Cheng mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size, 3858275SEric Cheng mcip->mci_mip, &ft); 3868275SEric Cheng ft_created = B_TRUE; 3878275SEric Cheng } 3888275SEric Cheng 3898275SEric Cheng err = mac_flow_add(ft, flent); 3908275SEric Cheng if (err != 0) { 3918275SEric Cheng if (ft_created) 3928275SEric Cheng mac_flow_tab_destroy(ft); 3939073SCathy.Zhou@Sun.COM mac_fastpath_enable(mh); 3948275SEric Cheng return (err); 3958275SEric Cheng } 3968275SEric Cheng 3978275SEric Cheng if (instantiate_flow) { 3988275SEric Cheng /* Now activate the flow by creating its SRSs */ 3998275SEric Cheng ASSERT(MCIP_DATAPATH_SETUP(mcip)); 4008275SEric Cheng err = mac_link_flow_init((mac_client_handle_t)mcip, flent); 4018275SEric Cheng if (err != 0) { 4028275SEric Cheng mac_flow_remove(ft, flent, B_FALSE); 4038275SEric Cheng if (ft_created) 4048275SEric Cheng mac_flow_tab_destroy(ft); 4059073SCathy.Zhou@Sun.COM mac_fastpath_enable(mh); 4068275SEric Cheng return (err); 4078275SEric Cheng } 4088275SEric Cheng } else { 4098275SEric Cheng FLOW_MARK(flent, FE_UF_NO_DATAPATH); 4108275SEric Cheng } 4118275SEric Cheng if (ft_created) { 4128275SEric Cheng ASSERT(mcip->mci_subflow_tab == NULL); 4138275SEric Cheng ft->ft_mcip = mcip; 4148275SEric Cheng mcip->mci_subflow_tab = ft; 4158275SEric Cheng if (instantiate_flow) 4168275SEric Cheng mac_client_update_classifier(mcip, B_TRUE); 4178275SEric Cheng } 4188275SEric Cheng return (0); 4198275SEric Cheng } 4208275SEric Cheng 4218275SEric Cheng /* 4228275SEric Cheng * Remove flow entry from flow table. 4238275SEric Cheng */ 4248275SEric Cheng void 4258275SEric Cheng mac_flow_remove(flow_tab_t *ft, flow_entry_t *flent, boolean_t temp) 4268275SEric Cheng { 4278275SEric Cheng flow_entry_t **fp; 4288275SEric Cheng 4298275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 4308275SEric Cheng if (!(flent->fe_flags & FE_FLOW_TAB)) 4318275SEric Cheng return; 4328275SEric Cheng 4338275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 4348275SEric Cheng /* 4358275SEric Cheng * If this is a permanent removal from the flow table, mark it 4368275SEric Cheng * CONDEMNED to prevent future references. If this is a temporary 4378275SEric Cheng * removal from the table, say to update the flow descriptor then 4388275SEric Cheng * we don't mark it CONDEMNED 4398275SEric Cheng */ 4408275SEric Cheng if (!temp) 4418275SEric Cheng FLOW_MARK(flent, FE_CONDEMNED); 4428275SEric Cheng /* 4438275SEric Cheng * Locate the specified flent. 4448275SEric Cheng */ 4458275SEric Cheng fp = &ft->ft_table[flent->fe_index]; 4468275SEric Cheng while (*fp != flent) 4478275SEric Cheng fp = &(*fp)->fe_next; 4488275SEric Cheng 4498275SEric Cheng /* 4508275SEric Cheng * The flent must exist. Otherwise it's a bug. 4518275SEric Cheng */ 4528275SEric Cheng ASSERT(fp != NULL); 4538275SEric Cheng *fp = flent->fe_next; 4548275SEric Cheng flent->fe_next = NULL; 4558275SEric Cheng 4568275SEric Cheng /* 4578275SEric Cheng * Reset fe_index to -1 so any attempt to call mac_flow_remove() 4588275SEric Cheng * on a flent that is supposed to be in the table (FE_FLOW_TAB) 4598275SEric Cheng * will panic. 4608275SEric Cheng */ 4618275SEric Cheng flent->fe_index = -1; 4628275SEric Cheng FLOW_UNMARK(flent, FE_FLOW_TAB); 4638275SEric Cheng ft->ft_flow_count--; 4648275SEric Cheng rw_exit(&ft->ft_lock); 4658275SEric Cheng } 4668275SEric Cheng 4678275SEric Cheng /* 4688275SEric Cheng * This is the flow lookup routine used by the mac sw classifier engine. 4698275SEric Cheng */ 4708275SEric Cheng int 4718275SEric Cheng mac_flow_lookup(flow_tab_t *ft, mblk_t *mp, uint_t flags, flow_entry_t **flentp) 4728275SEric Cheng { 4738275SEric Cheng flow_state_t s; 4748275SEric Cheng flow_entry_t *flent; 4758275SEric Cheng flow_ops_t *ops = &ft->ft_ops; 4768275SEric Cheng boolean_t retried = B_FALSE; 4778275SEric Cheng int i, err; 4788275SEric Cheng 4798275SEric Cheng s.fs_flags = flags; 4808833SVenu.Iyer@Sun.COM retry: 4818275SEric Cheng s.fs_mp = mp; 4828275SEric Cheng 4838275SEric Cheng /* 4848275SEric Cheng * Walk the list of predeclared accept functions. 4858275SEric Cheng * Each of these would accumulate enough state to allow the next 4868275SEric Cheng * accept routine to make progress. 4878275SEric Cheng */ 4888275SEric Cheng for (i = 0; i < FLOW_MAX_ACCEPT && ops->fo_accept[i] != NULL; i++) { 4898275SEric Cheng if ((err = (ops->fo_accept[i])(ft, &s)) != 0) { 4908833SVenu.Iyer@Sun.COM mblk_t *last; 4918833SVenu.Iyer@Sun.COM 4928275SEric Cheng /* 4938275SEric Cheng * ENOBUFS indicates that the mp could be too short 4948275SEric Cheng * and may need a pullup. 4958275SEric Cheng */ 4968275SEric Cheng if (err != ENOBUFS || retried) 4978275SEric Cheng return (err); 4988275SEric Cheng 4998275SEric Cheng /* 5008833SVenu.Iyer@Sun.COM * The pullup is done on the last processed mblk, not 5018833SVenu.Iyer@Sun.COM * the starting one. pullup is not done if the mblk 5028833SVenu.Iyer@Sun.COM * has references or if b_cont is NULL. 5038275SEric Cheng */ 5048833SVenu.Iyer@Sun.COM last = s.fs_mp; 5058833SVenu.Iyer@Sun.COM if (DB_REF(last) > 1 || last->b_cont == NULL || 5068833SVenu.Iyer@Sun.COM pullupmsg(last, -1) == 0) 5078275SEric Cheng return (EINVAL); 5088275SEric Cheng 5098275SEric Cheng retried = B_TRUE; 5108275SEric Cheng DTRACE_PROBE2(need_pullup, flow_tab_t *, ft, 5118275SEric Cheng flow_state_t *, &s); 5128275SEric Cheng goto retry; 5138275SEric Cheng } 5148275SEric Cheng } 5158275SEric Cheng 5168275SEric Cheng /* 5178275SEric Cheng * The packet is considered sane. We may now attempt to 5188275SEric Cheng * find the corresponding flent. 5198275SEric Cheng */ 5208275SEric Cheng rw_enter(&ft->ft_lock, RW_READER); 5218275SEric Cheng flent = ft->ft_table[ops->fo_hash(ft, &s)]; 5228275SEric Cheng for (; flent != NULL; flent = flent->fe_next) { 5238275SEric Cheng if (flent->fe_match(ft, flent, &s)) { 5248275SEric Cheng FLOW_TRY_REFHOLD(flent, err); 5258275SEric Cheng if (err != 0) 5268275SEric Cheng continue; 5278275SEric Cheng *flentp = flent; 5288275SEric Cheng rw_exit(&ft->ft_lock); 5298275SEric Cheng return (0); 5308275SEric Cheng } 5318275SEric Cheng } 5328275SEric Cheng rw_exit(&ft->ft_lock); 5338275SEric Cheng return (ENOENT); 5348275SEric Cheng } 5358275SEric Cheng 5368275SEric Cheng /* 5378275SEric Cheng * Walk flow table. 5388275SEric Cheng * The caller is assumed to have proper perimeter protection. 5398275SEric Cheng */ 5408275SEric Cheng int 5418275SEric Cheng mac_flow_walk_nolock(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *), 5428275SEric Cheng void *arg) 5438275SEric Cheng { 5448275SEric Cheng int err, i, cnt = 0; 5458275SEric Cheng flow_entry_t *flent; 5468275SEric Cheng 5478275SEric Cheng if (ft == NULL) 5488275SEric Cheng return (0); 5498275SEric Cheng 5508275SEric Cheng for (i = 0; i < ft->ft_size; i++) { 5518275SEric Cheng for (flent = ft->ft_table[i]; flent != NULL; 5528275SEric Cheng flent = flent->fe_next) { 5538275SEric Cheng cnt++; 5548275SEric Cheng err = (*fn)(flent, arg); 5558275SEric Cheng if (err != 0) 5568275SEric Cheng return (err); 5578275SEric Cheng } 5588275SEric Cheng } 5598275SEric Cheng VERIFY(cnt == ft->ft_flow_count); 5608275SEric Cheng return (0); 5618275SEric Cheng } 5628275SEric Cheng 5638275SEric Cheng /* 5648275SEric Cheng * Same as the above except a mutex is used for protection here. 5658275SEric Cheng */ 5668275SEric Cheng int 5678275SEric Cheng mac_flow_walk(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *), 5688275SEric Cheng void *arg) 5698275SEric Cheng { 5708275SEric Cheng int err; 5718275SEric Cheng 5728275SEric Cheng if (ft == NULL) 5738275SEric Cheng return (0); 5748275SEric Cheng 5758275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 5768275SEric Cheng err = mac_flow_walk_nolock(ft, fn, arg); 5778275SEric Cheng rw_exit(&ft->ft_lock); 5788275SEric Cheng return (err); 5798275SEric Cheng } 5808275SEric Cheng 5818275SEric Cheng static boolean_t mac_flow_clean(flow_entry_t *); 5828275SEric Cheng 5838275SEric Cheng /* 5848275SEric Cheng * Destroy a flow entry. Called when the last reference on a flow is released. 5858275SEric Cheng */ 5868275SEric Cheng void 5878275SEric Cheng mac_flow_destroy(flow_entry_t *flent) 5888275SEric Cheng { 5898275SEric Cheng ASSERT(flent->fe_refcnt == 0); 5908275SEric Cheng 5918275SEric Cheng if ((flent->fe_type & FLOW_USER) != 0) { 5928275SEric Cheng ASSERT(mac_flow_clean(flent)); 5938275SEric Cheng } else { 5948275SEric Cheng mac_flow_cleanup(flent); 5958275SEric Cheng } 5968275SEric Cheng 5978275SEric Cheng mutex_destroy(&flent->fe_lock); 5988275SEric Cheng cv_destroy(&flent->fe_cv); 5998275SEric Cheng flow_stat_destroy(flent); 6008275SEric Cheng kmem_cache_free(flow_cache, flent); 6018275SEric Cheng } 6028275SEric Cheng 6038275SEric Cheng /* 6048275SEric Cheng * XXX eric 6058275SEric Cheng * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and 6068275SEric Cheng * mac_link_flow_modify() should really be moved/reworked into the 6078275SEric Cheng * two functions below. This would consolidate all the mac property 6088275SEric Cheng * checking in one place. I'm leaving this alone for now since it's 6098275SEric Cheng * out of scope of the new flows work. 6108275SEric Cheng */ 6118275SEric Cheng /* ARGSUSED */ 6128275SEric Cheng uint32_t 6138275SEric Cheng mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) 6148275SEric Cheng { 6158275SEric Cheng uint32_t changed_mask = 0; 6168275SEric Cheng mac_resource_props_t *fmrp = &flent->fe_effective_props; 6178275SEric Cheng int i; 6188275SEric Cheng 6198275SEric Cheng if ((mrp->mrp_mask & MRP_MAXBW) != 0 && 6208275SEric Cheng (fmrp->mrp_maxbw != mrp->mrp_maxbw)) { 6218275SEric Cheng changed_mask |= MRP_MAXBW; 6228275SEric Cheng fmrp->mrp_maxbw = mrp->mrp_maxbw; 6238275SEric Cheng if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { 6248275SEric Cheng fmrp->mrp_mask &= ~MRP_MAXBW; 6258275SEric Cheng } else { 6268275SEric Cheng fmrp->mrp_mask |= MRP_MAXBW; 6278275SEric Cheng } 6288275SEric Cheng } 6298275SEric Cheng 6308275SEric Cheng if ((mrp->mrp_mask & MRP_PRIORITY) != 0) { 6318275SEric Cheng if (fmrp->mrp_priority != mrp->mrp_priority) 6328275SEric Cheng changed_mask |= MRP_PRIORITY; 6338275SEric Cheng if (mrp->mrp_priority == MPL_RESET) { 6348275SEric Cheng fmrp->mrp_priority = MPL_SUBFLOW_DEFAULT; 6358275SEric Cheng fmrp->mrp_mask &= ~MRP_PRIORITY; 6368275SEric Cheng } else { 6378275SEric Cheng fmrp->mrp_priority = mrp->mrp_priority; 6388275SEric Cheng fmrp->mrp_mask |= MRP_PRIORITY; 6398275SEric Cheng } 6408275SEric Cheng } 6418275SEric Cheng 6428275SEric Cheng /* modify fanout */ 6438275SEric Cheng if ((mrp->mrp_mask & MRP_CPUS) != 0) { 6448275SEric Cheng if ((fmrp->mrp_ncpus == mrp->mrp_ncpus) && 6458275SEric Cheng (fmrp->mrp_fanout_mode == mrp->mrp_fanout_mode)) { 6468275SEric Cheng for (i = 0; i < mrp->mrp_ncpus; i++) { 6478275SEric Cheng if (mrp->mrp_cpu[i] != fmrp->mrp_cpu[i]) 6488275SEric Cheng break; 6498275SEric Cheng } 6508275SEric Cheng if (i == mrp->mrp_ncpus) { 6518275SEric Cheng /* 6528275SEric Cheng * The new set of cpus passed is exactly 6538275SEric Cheng * the same as the existing set. 6548275SEric Cheng */ 6558275SEric Cheng return (changed_mask); 6568275SEric Cheng } 6578275SEric Cheng } 6588275SEric Cheng changed_mask |= MRP_CPUS; 6598275SEric Cheng MAC_COPY_CPUS(mrp, fmrp); 6608275SEric Cheng } 6618275SEric Cheng return (changed_mask); 6628275SEric Cheng } 6638275SEric Cheng 6648275SEric Cheng void 6658275SEric Cheng mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) 6668275SEric Cheng { 6678275SEric Cheng uint32_t changed_mask; 6688275SEric Cheng mac_client_impl_t *mcip = flent->fe_mcip; 6698275SEric Cheng mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip); 6708275SEric Cheng 6718275SEric Cheng ASSERT(flent != NULL); 6728275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 6738275SEric Cheng 6748275SEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 6758275SEric Cheng 6768275SEric Cheng /* Update the cached values inside the subflow entry */ 6778275SEric Cheng changed_mask = mac_flow_modify_props(flent, mrp); 6788275SEric Cheng rw_exit(&ft->ft_lock); 6798275SEric Cheng /* 6808275SEric Cheng * Push the changed parameters to the scheduling code in the 6818275SEric Cheng * SRS's, to take effect right away. 6828275SEric Cheng */ 6838275SEric Cheng if (changed_mask & MRP_MAXBW) { 6848275SEric Cheng mac_srs_update_bwlimit(flent, mrp); 6858275SEric Cheng /* 6868275SEric Cheng * If bandwidth is changed, we may have to change 6878275SEric Cheng * the number of soft ring to be used for fanout. 6888275SEric Cheng * Call mac_flow_update_fanout() if MAC_BIND_CPU 6898275SEric Cheng * is not set and there is no user supplied cpu 6908275SEric Cheng * info. This applies only to link at this time. 6918275SEric Cheng */ 6928275SEric Cheng if (!(flent->fe_type & FLOW_USER) && 6938275SEric Cheng !(changed_mask & MRP_CPUS) && 6948275SEric Cheng !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) { 6958275SEric Cheng mac_fanout_setup(mcip, flent, mcip_mrp, 6968275SEric Cheng mac_rx_deliver, mcip, NULL); 6978275SEric Cheng } 6988275SEric Cheng } 6998275SEric Cheng if (mrp->mrp_mask & MRP_PRIORITY) 7008275SEric Cheng mac_flow_update_priority(mcip, flent); 7018275SEric Cheng 7028275SEric Cheng if (changed_mask & MRP_CPUS) 7038275SEric Cheng mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL); 7048275SEric Cheng } 7058275SEric Cheng 7068275SEric Cheng /* 7078275SEric Cheng * This function waits for a certain condition to be met and is generally 7088275SEric Cheng * used before a destructive or quiescing operation. 7098275SEric Cheng */ 7108275SEric Cheng void 7118275SEric Cheng mac_flow_wait(flow_entry_t *flent, mac_flow_state_t event) 7128275SEric Cheng { 7138275SEric Cheng mutex_enter(&flent->fe_lock); 7148275SEric Cheng flent->fe_flags |= FE_WAITER; 7158275SEric Cheng 7168275SEric Cheng switch (event) { 7178275SEric Cheng case FLOW_DRIVER_UPCALL: 7188275SEric Cheng /* 7198275SEric Cheng * We want to make sure the driver upcalls have finished before 7208275SEric Cheng * we signal the Rx SRS worker to quit. 7218275SEric Cheng */ 7228275SEric Cheng while (flent->fe_refcnt != 1) 7238275SEric Cheng cv_wait(&flent->fe_cv, &flent->fe_lock); 7248275SEric Cheng break; 7258275SEric Cheng 7268275SEric Cheng case FLOW_USER_REF: 7278275SEric Cheng /* 7288275SEric Cheng * Wait for the fe_user_refcnt to drop to 0. The flow has 7298275SEric Cheng * been removed from the global flow hash. 7308275SEric Cheng */ 7318275SEric Cheng ASSERT(!(flent->fe_flags & FE_G_FLOW_HASH)); 7328275SEric Cheng while (flent->fe_user_refcnt != 0) 7338275SEric Cheng cv_wait(&flent->fe_cv, &flent->fe_lock); 7348275SEric Cheng break; 7358275SEric Cheng 7368275SEric Cheng default: 7378275SEric Cheng ASSERT(0); 7388275SEric Cheng } 7398275SEric Cheng 7408275SEric Cheng flent->fe_flags &= ~FE_WAITER; 7418275SEric Cheng mutex_exit(&flent->fe_lock); 7428275SEric Cheng } 7438275SEric Cheng 7448275SEric Cheng static boolean_t 7458275SEric Cheng mac_flow_clean(flow_entry_t *flent) 7468275SEric Cheng { 7478275SEric Cheng ASSERT(flent->fe_next == NULL); 7488275SEric Cheng ASSERT(flent->fe_tx_srs == NULL); 7498275SEric Cheng ASSERT(flent->fe_rx_srs_cnt == 0 && flent->fe_rx_srs[0] == NULL); 7508275SEric Cheng ASSERT(flent->fe_mbg == NULL); 7518275SEric Cheng 7528275SEric Cheng return (B_TRUE); 7538275SEric Cheng } 7548275SEric Cheng 7558275SEric Cheng void 7568275SEric Cheng mac_flow_cleanup(flow_entry_t *flent) 7578275SEric Cheng { 7588275SEric Cheng if ((flent->fe_type & FLOW_USER) == 0) { 7598275SEric Cheng ASSERT((flent->fe_mbg == NULL && flent->fe_mcip != NULL) || 7608275SEric Cheng (flent->fe_mbg != NULL && flent->fe_mcip == NULL)); 7618275SEric Cheng ASSERT(flent->fe_refcnt == 0); 7628275SEric Cheng } else { 7638275SEric Cheng ASSERT(flent->fe_refcnt == 1); 7648275SEric Cheng } 7658275SEric Cheng 7668275SEric Cheng if (flent->fe_mbg != NULL) { 7678275SEric Cheng ASSERT(flent->fe_tx_srs == NULL); 7688275SEric Cheng /* This is a multicast or broadcast flow entry */ 7698275SEric Cheng mac_bcast_grp_free(flent->fe_mbg); 7708275SEric Cheng flent->fe_mbg = NULL; 7718275SEric Cheng } 7728275SEric Cheng 7738275SEric Cheng if (flent->fe_tx_srs != NULL) { 7748275SEric Cheng ASSERT(flent->fe_mbg == NULL); 7758275SEric Cheng mac_srs_free(flent->fe_tx_srs); 7768275SEric Cheng flent->fe_tx_srs = NULL; 7778275SEric Cheng } 7788275SEric Cheng 7798275SEric Cheng /* 7808275SEric Cheng * In the normal case fe_rx_srs_cnt is 1. However in the error case 7818275SEric Cheng * when mac_unicast_add fails we may not have set up any SRS 7828275SEric Cheng * in which case fe_rx_srs_cnt will be zero. 7838275SEric Cheng */ 7848275SEric Cheng if (flent->fe_rx_srs_cnt != 0) { 7858275SEric Cheng ASSERT(flent->fe_rx_srs_cnt == 1); 7868275SEric Cheng mac_srs_free(flent->fe_rx_srs[0]); 7878275SEric Cheng flent->fe_rx_srs[0] = NULL; 7888275SEric Cheng flent->fe_rx_srs_cnt = 0; 7898275SEric Cheng } 7908275SEric Cheng ASSERT(flent->fe_rx_srs[0] == NULL); 7918275SEric Cheng } 7928275SEric Cheng 7938275SEric Cheng void 7948275SEric Cheng mac_flow_get_desc(flow_entry_t *flent, flow_desc_t *fd) 7958275SEric Cheng { 7968275SEric Cheng /* 7978275SEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 7988275SEric Cheng * Updates to the fe_flow_desc happen under the fe_lock 7998275SEric Cheng * after removing the flent from the flow table 8008275SEric Cheng */ 8018275SEric Cheng mutex_enter(&flent->fe_lock); 8028275SEric Cheng bcopy(&flent->fe_flow_desc, fd, sizeof (*fd)); 8038275SEric Cheng mutex_exit(&flent->fe_lock); 8048275SEric Cheng } 8058275SEric Cheng 8068275SEric Cheng /* 8078275SEric Cheng * Update a field of a flow entry. The mac perimeter ensures that 8088275SEric Cheng * this is the only thread doing a modify operation on this mac end point. 8098275SEric Cheng * So the flow table can't change or disappear. The ft_lock protects access 8108275SEric Cheng * to the flow entry, and holding the lock ensures that there isn't any thread 8118275SEric Cheng * accessing the flow entry or attempting a flow table lookup. However 8128275SEric Cheng * data threads that are using the flow entry based on the old descriptor 8138275SEric Cheng * will continue to use the flow entry. If strong coherence is required 8148275SEric Cheng * then the flow will have to be quiesced before the descriptor can be 8158275SEric Cheng * changed. 8168275SEric Cheng */ 8178275SEric Cheng void 8188275SEric Cheng mac_flow_set_desc(flow_entry_t *flent, flow_desc_t *fd) 8198275SEric Cheng { 8208275SEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 8218275SEric Cheng flow_desc_t old_desc; 8228275SEric Cheng int err; 8238275SEric Cheng 8248275SEric Cheng if (ft == NULL) { 8258275SEric Cheng /* 8268275SEric Cheng * The flow hasn't yet been inserted into the table, 8278275SEric Cheng * so only the caller knows about this flow, however for 8288275SEric Cheng * uniformity we grab the fe_lock here. 8298275SEric Cheng */ 8308275SEric Cheng mutex_enter(&flent->fe_lock); 8318275SEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 8328275SEric Cheng mutex_exit(&flent->fe_lock); 8338275SEric Cheng } 8348275SEric Cheng 8358275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 8368275SEric Cheng 8378275SEric Cheng /* 8388275SEric Cheng * Need to remove the flow entry from the table and reinsert it, 8398275SEric Cheng * into a potentially diference hash line. The hash depends on 8408275SEric Cheng * the new descriptor fields. However access to fe_desc itself 8418275SEric Cheng * is always under the fe_lock. This helps log and stat functions 8428275SEric Cheng * see a self-consistent fe_flow_desc. 8438275SEric Cheng */ 8448275SEric Cheng mac_flow_remove(ft, flent, B_TRUE); 8458275SEric Cheng old_desc = flent->fe_flow_desc; 8468275SEric Cheng 8478275SEric Cheng mutex_enter(&flent->fe_lock); 8488275SEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 8498275SEric Cheng mutex_exit(&flent->fe_lock); 8508275SEric Cheng 8518275SEric Cheng if (mac_flow_add(ft, flent) != 0) { 8528275SEric Cheng /* 8538275SEric Cheng * The add failed say due to an invalid flow descriptor. 8548275SEric Cheng * Undo the update 8558275SEric Cheng */ 8568275SEric Cheng flent->fe_flow_desc = old_desc; 8578275SEric Cheng err = mac_flow_add(ft, flent); 8588275SEric Cheng ASSERT(err == 0); 8598275SEric Cheng } 8608275SEric Cheng } 8618275SEric Cheng 8628275SEric Cheng void 8638275SEric Cheng mac_flow_set_name(flow_entry_t *flent, const char *name) 8648275SEric Cheng { 8658275SEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 8668275SEric Cheng 8678275SEric Cheng if (ft == NULL) { 8688275SEric Cheng /* 8698275SEric Cheng * The flow hasn't yet been inserted into the table, 8708275SEric Cheng * so only the caller knows about this flow 8718275SEric Cheng */ 8728558SGirish.Moodalbail@Sun.COM (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 8738275SEric Cheng } else { 8748275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 8758275SEric Cheng } 8768275SEric Cheng 8778275SEric Cheng mutex_enter(&flent->fe_lock); 8788558SGirish.Moodalbail@Sun.COM (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 8798275SEric Cheng mutex_exit(&flent->fe_lock); 8808275SEric Cheng } 8818275SEric Cheng 8828275SEric Cheng /* 8838275SEric Cheng * Return the client-private cookie that was associated with 8848275SEric Cheng * the flow when it was created. 8858275SEric Cheng */ 8868275SEric Cheng void * 8878275SEric Cheng mac_flow_get_client_cookie(flow_entry_t *flent) 8888275SEric Cheng { 8898275SEric Cheng return (flent->fe_client_cookie); 8908275SEric Cheng } 8918275SEric Cheng 8928275SEric Cheng /* 8938275SEric Cheng * Forward declarations. 8948275SEric Cheng */ 8958275SEric Cheng static uint32_t flow_l2_hash(flow_tab_t *, flow_state_t *); 89610616SSebastien.Roy@Sun.COM static uint32_t flow_l2_hash_fe(flow_tab_t *, flow_entry_t *); 8978275SEric Cheng static int flow_l2_accept(flow_tab_t *, flow_state_t *); 8988275SEric Cheng static uint32_t flow_ether_hash(flow_tab_t *, flow_state_t *); 89910616SSebastien.Roy@Sun.COM static uint32_t flow_ether_hash_fe(flow_tab_t *, flow_entry_t *); 9008275SEric Cheng static int flow_ether_accept(flow_tab_t *, flow_state_t *); 9018275SEric Cheng 9028275SEric Cheng /* 9038275SEric Cheng * Create flow table. 9048275SEric Cheng */ 9058275SEric Cheng void 9068275SEric Cheng mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size, 9078275SEric Cheng mac_impl_t *mip, flow_tab_t **ftp) 9088275SEric Cheng { 9098275SEric Cheng flow_tab_t *ft; 9108275SEric Cheng flow_ops_t *new_ops; 9118275SEric Cheng 9128275SEric Cheng ft = kmem_cache_alloc(flow_tab_cache, KM_SLEEP); 9138275SEric Cheng bzero(ft, sizeof (*ft)); 9148275SEric Cheng 9158275SEric Cheng ft->ft_table = kmem_zalloc(size * sizeof (flow_entry_t *), KM_SLEEP); 9168275SEric Cheng 9178275SEric Cheng /* 9188275SEric Cheng * We make a copy of the ops vector instead of just pointing to it 9198275SEric Cheng * because we might want to customize the ops vector on a per table 9208275SEric Cheng * basis (e.g. for optimization). 9218275SEric Cheng */ 9228275SEric Cheng new_ops = &ft->ft_ops; 9238275SEric Cheng bcopy(ops, new_ops, sizeof (*ops)); 9248275SEric Cheng ft->ft_mask = mask; 9258275SEric Cheng ft->ft_size = size; 9268275SEric Cheng ft->ft_mip = mip; 9278275SEric Cheng 9288275SEric Cheng /* 92910616SSebastien.Roy@Sun.COM * Optimizations for DL_ETHER media. 9308275SEric Cheng */ 9318275SEric Cheng if (mip->mi_info.mi_nativemedia == DL_ETHER) { 9328275SEric Cheng if (new_ops->fo_hash == flow_l2_hash) 9338275SEric Cheng new_ops->fo_hash = flow_ether_hash; 93410616SSebastien.Roy@Sun.COM if (new_ops->fo_hash_fe == flow_l2_hash_fe) 93510616SSebastien.Roy@Sun.COM new_ops->fo_hash_fe = flow_ether_hash_fe; 9368275SEric Cheng if (new_ops->fo_accept[0] == flow_l2_accept) 9378275SEric Cheng new_ops->fo_accept[0] = flow_ether_accept; 9388275SEric Cheng } 9398275SEric Cheng *ftp = ft; 9408275SEric Cheng } 9418275SEric Cheng 9428275SEric Cheng void 9438275SEric Cheng mac_flow_l2tab_create(mac_impl_t *mip, flow_tab_t **ftp) 9448275SEric Cheng { 9458275SEric Cheng mac_flow_tab_create(&flow_l2_ops, FLOW_LINK_DST | FLOW_LINK_VID, 9468275SEric Cheng 1024, mip, ftp); 9478275SEric Cheng } 9488275SEric Cheng 9498275SEric Cheng /* 9508275SEric Cheng * Destroy flow table. 9518275SEric Cheng */ 9528275SEric Cheng void 9538275SEric Cheng mac_flow_tab_destroy(flow_tab_t *ft) 9548275SEric Cheng { 9558275SEric Cheng if (ft == NULL) 9568275SEric Cheng return; 9578275SEric Cheng 9588275SEric Cheng ASSERT(ft->ft_flow_count == 0); 9598275SEric Cheng kmem_free(ft->ft_table, ft->ft_size * sizeof (flow_entry_t *)); 9608275SEric Cheng bzero(ft, sizeof (*ft)); 9618275SEric Cheng kmem_cache_free(flow_tab_cache, ft); 9628275SEric Cheng } 9638275SEric Cheng 9648275SEric Cheng /* 9658275SEric Cheng * Add a new flow entry to the global flow hash table 9668275SEric Cheng */ 9678275SEric Cheng int 9688275SEric Cheng mac_flow_hash_add(flow_entry_t *flent) 9698275SEric Cheng { 9708275SEric Cheng int err; 9718275SEric Cheng 9728275SEric Cheng rw_enter(&flow_tab_lock, RW_WRITER); 9738275SEric Cheng err = mod_hash_insert(flow_hash, 9748275SEric Cheng (mod_hash_key_t)flent->fe_flow_name, (mod_hash_val_t)flent); 9758275SEric Cheng if (err != 0) { 9768275SEric Cheng rw_exit(&flow_tab_lock); 9778275SEric Cheng return (EEXIST); 9788275SEric Cheng } 9798275SEric Cheng /* Mark as inserted into the global flow hash table */ 9808275SEric Cheng FLOW_MARK(flent, FE_G_FLOW_HASH); 9818275SEric Cheng rw_exit(&flow_tab_lock); 9828275SEric Cheng return (err); 9838275SEric Cheng } 9848275SEric Cheng 9858275SEric Cheng /* 9868275SEric Cheng * Remove a flow entry from the global flow hash table 9878275SEric Cheng */ 9888275SEric Cheng void 9898275SEric Cheng mac_flow_hash_remove(flow_entry_t *flent) 9908275SEric Cheng { 9918275SEric Cheng mod_hash_val_t val; 9928275SEric Cheng 9938275SEric Cheng rw_enter(&flow_tab_lock, RW_WRITER); 9948275SEric Cheng VERIFY(mod_hash_remove(flow_hash, 9958275SEric Cheng (mod_hash_key_t)flent->fe_flow_name, &val) == 0); 9968275SEric Cheng 9978275SEric Cheng /* Clear the mark that says inserted into the global flow hash table */ 9988275SEric Cheng FLOW_UNMARK(flent, FE_G_FLOW_HASH); 9998275SEric Cheng rw_exit(&flow_tab_lock); 10008275SEric Cheng } 10018275SEric Cheng 10028275SEric Cheng /* 10038275SEric Cheng * Retrieve a flow entry from the global flow hash table. 10048275SEric Cheng */ 10058275SEric Cheng int 10068275SEric Cheng mac_flow_lookup_byname(char *name, flow_entry_t **flentp) 10078275SEric Cheng { 10088275SEric Cheng int err; 10098275SEric Cheng flow_entry_t *flent; 10108275SEric Cheng 10118275SEric Cheng rw_enter(&flow_tab_lock, RW_READER); 10128275SEric Cheng err = mod_hash_find(flow_hash, (mod_hash_key_t)name, 10138275SEric Cheng (mod_hash_val_t *)&flent); 10148275SEric Cheng if (err != 0) { 10158275SEric Cheng rw_exit(&flow_tab_lock); 10168275SEric Cheng return (ENOENT); 10178275SEric Cheng } 10188275SEric Cheng ASSERT(flent != NULL); 10198275SEric Cheng FLOW_USER_REFHOLD(flent); 10208275SEric Cheng rw_exit(&flow_tab_lock); 10218275SEric Cheng 10228275SEric Cheng *flentp = flent; 10238275SEric Cheng return (0); 10248275SEric Cheng } 10258275SEric Cheng 10268275SEric Cheng /* 10278275SEric Cheng * Initialize or release mac client flows by walking the subflow table. 10288275SEric Cheng * These are typically invoked during plumb/unplumb of links. 10298275SEric Cheng */ 10308275SEric Cheng 10318275SEric Cheng static int 10328275SEric Cheng mac_link_init_flows_cb(flow_entry_t *flent, void *arg) 10338275SEric Cheng { 10348275SEric Cheng mac_client_impl_t *mcip = arg; 10358275SEric Cheng 10368275SEric Cheng if (mac_link_flow_init(arg, flent) != 0) { 10378275SEric Cheng cmn_err(CE_WARN, "Failed to initialize flow '%s' on link '%s'", 10388275SEric Cheng flent->fe_flow_name, mcip->mci_name); 10398275SEric Cheng } else { 10408275SEric Cheng FLOW_UNMARK(flent, FE_UF_NO_DATAPATH); 10418275SEric Cheng } 10428275SEric Cheng return (0); 10438275SEric Cheng } 10448275SEric Cheng 10458275SEric Cheng void 10468275SEric Cheng mac_link_init_flows(mac_client_handle_t mch) 10478275SEric Cheng { 10488275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 10498275SEric Cheng 10508275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 10518275SEric Cheng mac_link_init_flows_cb, mcip); 10528275SEric Cheng /* 10538275SEric Cheng * If mac client had subflow(s) configured before plumb, change 10548275SEric Cheng * function to mac_rx_srs_subflow_process and in case of hardware 10558275SEric Cheng * classification, disable polling. 10568275SEric Cheng */ 10578275SEric Cheng mac_client_update_classifier(mcip, B_TRUE); 10588275SEric Cheng 10598275SEric Cheng } 10608275SEric Cheng 10618275SEric Cheng boolean_t 10628275SEric Cheng mac_link_has_flows(mac_client_handle_t mch) 10638275SEric Cheng { 10648275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 10658275SEric Cheng 10668275SEric Cheng if (!FLOW_TAB_EMPTY(mcip->mci_subflow_tab)) 10678275SEric Cheng return (B_TRUE); 10688275SEric Cheng 10698275SEric Cheng return (B_FALSE); 10708275SEric Cheng } 10718275SEric Cheng 10728275SEric Cheng static int 10738275SEric Cheng mac_link_release_flows_cb(flow_entry_t *flent, void *arg) 10748275SEric Cheng { 10758275SEric Cheng FLOW_MARK(flent, FE_UF_NO_DATAPATH); 10768275SEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 10778275SEric Cheng mac_link_flow_clean(arg, flent); 10788275SEric Cheng return (0); 10798275SEric Cheng } 10808275SEric Cheng 10818275SEric Cheng void 10828275SEric Cheng mac_link_release_flows(mac_client_handle_t mch) 10838275SEric Cheng { 10848275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 10858275SEric Cheng 10868275SEric Cheng /* 10878275SEric Cheng * Change the mci_flent callback back to mac_rx_srs_process() 10888275SEric Cheng * because flows are about to be deactivated. 10898275SEric Cheng */ 10908275SEric Cheng mac_client_update_classifier(mcip, B_FALSE); 10918275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 10928275SEric Cheng mac_link_release_flows_cb, mcip); 10938275SEric Cheng } 10948275SEric Cheng 10958275SEric Cheng void 10968275SEric Cheng mac_rename_flow(flow_entry_t *fep, const char *new_name) 10978275SEric Cheng { 10988275SEric Cheng mac_flow_set_name(fep, new_name); 10998275SEric Cheng if (fep->fe_ksp != NULL) { 11008275SEric Cheng flow_stat_destroy(fep); 11018275SEric Cheng flow_stat_create(fep); 11028275SEric Cheng } 11038275SEric Cheng } 11048275SEric Cheng 11058275SEric Cheng /* 11068275SEric Cheng * mac_link_flow_init() 11078275SEric Cheng * Internal flow interface used for allocating SRSs and related 11088275SEric Cheng * data structures. Not meant to be used by mac clients. 11098275SEric Cheng */ 11108275SEric Cheng int 11118275SEric Cheng mac_link_flow_init(mac_client_handle_t mch, flow_entry_t *sub_flow) 11128275SEric Cheng { 11138275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 11148275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 11158275SEric Cheng int err; 11168275SEric Cheng 11178275SEric Cheng ASSERT(mch != NULL); 11188275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 11198275SEric Cheng 11208275SEric Cheng if ((err = mac_datapath_setup(mcip, sub_flow, SRST_FLOW)) != 0) 11218275SEric Cheng return (err); 11228275SEric Cheng 11238275SEric Cheng sub_flow->fe_mcip = mcip; 11248275SEric Cheng 11258275SEric Cheng return (0); 11268275SEric Cheng } 11278275SEric Cheng 11288275SEric Cheng /* 11298275SEric Cheng * mac_link_flow_add() 11308275SEric Cheng * Used by flowadm(1m) or kernel mac clients for creating flows. 11318275SEric Cheng */ 11328275SEric Cheng int 11338275SEric Cheng mac_link_flow_add(datalink_id_t linkid, char *flow_name, 11348275SEric Cheng flow_desc_t *flow_desc, mac_resource_props_t *mrp) 11358275SEric Cheng { 11368275SEric Cheng flow_entry_t *flent = NULL; 11378275SEric Cheng int err; 11388275SEric Cheng dls_dl_handle_t dlh; 11398275SEric Cheng dls_link_t *dlp; 11408275SEric Cheng boolean_t link_held = B_FALSE; 11418275SEric Cheng boolean_t hash_added = B_FALSE; 11428275SEric Cheng mac_perim_handle_t mph; 11438275SEric Cheng 11448275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 11458275SEric Cheng if (err == 0) { 11468275SEric Cheng FLOW_USER_REFRELE(flent); 11478275SEric Cheng return (EEXIST); 11488275SEric Cheng } 11498275SEric Cheng 11508275SEric Cheng /* 11518275SEric Cheng * First create a flow entry given the description provided 11528275SEric Cheng * by the caller. 11538275SEric Cheng */ 11548275SEric Cheng err = mac_flow_create(flow_desc, mrp, flow_name, NULL, 11558275SEric Cheng FLOW_USER | FLOW_OTHER, &flent); 11568275SEric Cheng 11578275SEric Cheng if (err != 0) 11588275SEric Cheng return (err); 11598275SEric Cheng 11608275SEric Cheng /* 11618275SEric Cheng * We've got a local variable referencing this flow now, so we need 11628275SEric Cheng * to hold it. We'll release this flow before returning. 11638275SEric Cheng * All failures until we return will undo any action that may internally 11648275SEric Cheng * held the flow, so the last REFRELE will assure a clean freeing 11658275SEric Cheng * of resources. 11668275SEric Cheng */ 11678275SEric Cheng FLOW_REFHOLD(flent); 11688275SEric Cheng 11698275SEric Cheng flent->fe_link_id = linkid; 11708275SEric Cheng FLOW_MARK(flent, FE_INCIPIENT); 11718275SEric Cheng 11728275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 11738275SEric Cheng if (err != 0) { 11748275SEric Cheng FLOW_FINAL_REFRELE(flent); 11758275SEric Cheng return (err); 11768275SEric Cheng } 11778275SEric Cheng 11788275SEric Cheng /* 11798275SEric Cheng * dls will eventually be merged with mac so it's ok 11808275SEric Cheng * to call dls' internal functions. 11818275SEric Cheng */ 11828275SEric Cheng err = dls_devnet_hold_link(linkid, &dlh, &dlp); 11838275SEric Cheng if (err != 0) 11848275SEric Cheng goto bail; 11858275SEric Cheng 11868275SEric Cheng link_held = B_TRUE; 11878275SEric Cheng 11888275SEric Cheng /* 11898275SEric Cheng * Add the flow to the global flow table, this table will be per 11908275SEric Cheng * exclusive zone so each zone can have its own flow namespace. 11918275SEric Cheng * RFE 6625651 will fix this. 11928275SEric Cheng * 11938275SEric Cheng */ 11948275SEric Cheng if ((err = mac_flow_hash_add(flent)) != 0) 11958275SEric Cheng goto bail; 11968275SEric Cheng 11978275SEric Cheng hash_added = B_TRUE; 11988275SEric Cheng 11998275SEric Cheng /* 12008275SEric Cheng * do not allow flows to be configured on an anchor VNIC 12018275SEric Cheng */ 12028275SEric Cheng if (mac_capab_get(dlp->dl_mh, MAC_CAPAB_ANCHOR_VNIC, NULL)) { 12038275SEric Cheng err = ENOTSUP; 12048275SEric Cheng goto bail; 12058275SEric Cheng } 12068275SEric Cheng 12078275SEric Cheng /* 12088275SEric Cheng * Add the subflow to the subflow table. Also instantiate the flow 12098833SVenu.Iyer@Sun.COM * in the mac if there is an active user (we check if the MAC client's 12108833SVenu.Iyer@Sun.COM * datapath has been setup). 12118275SEric Cheng */ 12128833SVenu.Iyer@Sun.COM err = mac_flow_add_subflow(dlp->dl_mch, flent, 12138833SVenu.Iyer@Sun.COM MCIP_DATAPATH_SETUP((mac_client_impl_t *)dlp->dl_mch)); 12148275SEric Cheng if (err != 0) 12158275SEric Cheng goto bail; 12168275SEric Cheng 12178275SEric Cheng FLOW_UNMARK(flent, FE_INCIPIENT); 12188275SEric Cheng dls_devnet_rele_link(dlh, dlp); 12198275SEric Cheng mac_perim_exit(mph); 12208275SEric Cheng return (0); 12218275SEric Cheng 12228275SEric Cheng bail: 12238275SEric Cheng if (hash_added) 12248275SEric Cheng mac_flow_hash_remove(flent); 12258275SEric Cheng 12268275SEric Cheng if (link_held) 12278275SEric Cheng dls_devnet_rele_link(dlh, dlp); 12288275SEric Cheng 12298275SEric Cheng /* 12308275SEric Cheng * Wait for any transient global flow hash refs to clear 12318275SEric Cheng * and then release the creation reference on the flow 12328275SEric Cheng */ 12338275SEric Cheng mac_flow_wait(flent, FLOW_USER_REF); 12348275SEric Cheng FLOW_FINAL_REFRELE(flent); 12358275SEric Cheng mac_perim_exit(mph); 12368275SEric Cheng return (err); 12378275SEric Cheng } 12388275SEric Cheng 12398275SEric Cheng /* 12408275SEric Cheng * mac_link_flow_clean() 12418275SEric Cheng * Internal flow interface used for freeing SRSs and related 12428275SEric Cheng * data structures. Not meant to be used by mac clients. 12438275SEric Cheng */ 12448275SEric Cheng void 12458275SEric Cheng mac_link_flow_clean(mac_client_handle_t mch, flow_entry_t *sub_flow) 12468275SEric Cheng { 12478275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 12488275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 12498275SEric Cheng boolean_t last_subflow; 12508275SEric Cheng 12518275SEric Cheng ASSERT(mch != NULL); 12528275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 12538275SEric Cheng 12548275SEric Cheng /* 12558275SEric Cheng * This sub flow entry may fail to be fully initialized by 12568275SEric Cheng * mac_link_flow_init(). If so, simply return. 12578275SEric Cheng */ 12588275SEric Cheng if (sub_flow->fe_mcip == NULL) 12598275SEric Cheng return; 12608275SEric Cheng 12618275SEric Cheng last_subflow = FLOW_TAB_EMPTY(mcip->mci_subflow_tab); 12628275SEric Cheng /* 12638275SEric Cheng * Tear down the data path 12648275SEric Cheng */ 12658275SEric Cheng mac_datapath_teardown(mcip, sub_flow, SRST_FLOW); 12668275SEric Cheng sub_flow->fe_mcip = NULL; 12678275SEric Cheng 12688275SEric Cheng /* 12698275SEric Cheng * Delete the SRSs associated with this subflow. If this is being 12708275SEric Cheng * driven by flowadm(1M) then the subflow will be deleted by 12718275SEric Cheng * dls_rem_flow. However if this is a result of the interface being 12728275SEric Cheng * unplumbed then the subflow itself won't be deleted. 12738275SEric Cheng */ 12748275SEric Cheng mac_flow_cleanup(sub_flow); 12758275SEric Cheng 12768275SEric Cheng /* 12778275SEric Cheng * If all the subflows are gone, renable some of the stuff 12788275SEric Cheng * we disabled when adding a subflow, polling etc. 12798275SEric Cheng */ 12808275SEric Cheng if (last_subflow) { 12818275SEric Cheng /* 12828275SEric Cheng * The subflow table itself is not protected by any locks or 12838275SEric Cheng * refcnts. Hence quiesce the client upfront before clearing 12848275SEric Cheng * mci_subflow_tab. 12858275SEric Cheng */ 12868275SEric Cheng mac_client_quiesce(mcip); 12878275SEric Cheng mac_client_update_classifier(mcip, B_FALSE); 12888275SEric Cheng mac_flow_tab_destroy(mcip->mci_subflow_tab); 12898275SEric Cheng mcip->mci_subflow_tab = NULL; 12908275SEric Cheng mac_client_restart(mcip); 12918275SEric Cheng } 12928275SEric Cheng } 12938275SEric Cheng 12948275SEric Cheng /* 12958275SEric Cheng * mac_link_flow_remove() 12968275SEric Cheng * Used by flowadm(1m) or kernel mac clients for removing flows. 12978275SEric Cheng */ 12988275SEric Cheng int 12998275SEric Cheng mac_link_flow_remove(char *flow_name) 13008275SEric Cheng { 13018275SEric Cheng flow_entry_t *flent; 13028275SEric Cheng mac_perim_handle_t mph; 13038275SEric Cheng int err; 13048275SEric Cheng datalink_id_t linkid; 13058275SEric Cheng 13068275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13078275SEric Cheng if (err != 0) 13088275SEric Cheng return (err); 13098275SEric Cheng 13108275SEric Cheng linkid = flent->fe_link_id; 13118275SEric Cheng FLOW_USER_REFRELE(flent); 13128275SEric Cheng 13138275SEric Cheng /* 13148275SEric Cheng * The perim must be acquired before acquiring any other references 13158275SEric Cheng * to maintain the lock and perimeter hierarchy. Please note the 13168275SEric Cheng * FLOW_REFRELE above. 13178275SEric Cheng */ 13188275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 13198275SEric Cheng if (err != 0) 13208275SEric Cheng return (err); 13218275SEric Cheng 13228275SEric Cheng /* 13238275SEric Cheng * Note the second lookup of the flow, because a concurrent thread 13248275SEric Cheng * may have removed it already while we were waiting to enter the 13258275SEric Cheng * link's perimeter. 13268275SEric Cheng */ 13278275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13288275SEric Cheng if (err != 0) { 13298275SEric Cheng mac_perim_exit(mph); 13308275SEric Cheng return (err); 13318275SEric Cheng } 13328275SEric Cheng FLOW_USER_REFRELE(flent); 13338275SEric Cheng 13348275SEric Cheng /* 13358275SEric Cheng * Remove the flow from the subflow table and deactivate the flow 13368275SEric Cheng * by quiescing and removings its SRSs 13378275SEric Cheng */ 13388275SEric Cheng mac_flow_rem_subflow(flent); 13398275SEric Cheng 13408275SEric Cheng /* 13418275SEric Cheng * Finally, remove the flow from the global table. 13428275SEric Cheng */ 13438275SEric Cheng mac_flow_hash_remove(flent); 13448275SEric Cheng 13458275SEric Cheng /* 13468275SEric Cheng * Wait for any transient global flow hash refs to clear 13478275SEric Cheng * and then release the creation reference on the flow 13488275SEric Cheng */ 13498275SEric Cheng mac_flow_wait(flent, FLOW_USER_REF); 13508275SEric Cheng FLOW_FINAL_REFRELE(flent); 13518275SEric Cheng 13528275SEric Cheng mac_perim_exit(mph); 13538275SEric Cheng 13548275SEric Cheng return (0); 13558275SEric Cheng } 13568275SEric Cheng 13578275SEric Cheng /* 13588275SEric Cheng * mac_link_flow_modify() 13598275SEric Cheng * Modifies the properties of a flow identified by its name. 13608275SEric Cheng */ 13618275SEric Cheng int 13628275SEric Cheng mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp) 13638275SEric Cheng { 13648275SEric Cheng flow_entry_t *flent; 13658275SEric Cheng mac_client_impl_t *mcip; 13668275SEric Cheng int err = 0; 13678275SEric Cheng mac_perim_handle_t mph; 13688275SEric Cheng datalink_id_t linkid; 13698275SEric Cheng flow_tab_t *flow_tab; 13708275SEric Cheng 13718275SEric Cheng err = mac_validate_props(mrp); 13728275SEric Cheng if (err != 0) 13738275SEric Cheng return (err); 13748275SEric Cheng 13758275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13768275SEric Cheng if (err != 0) 13778275SEric Cheng return (err); 13788275SEric Cheng 13798275SEric Cheng linkid = flent->fe_link_id; 13808275SEric Cheng FLOW_USER_REFRELE(flent); 13818275SEric Cheng 13828275SEric Cheng /* 13838275SEric Cheng * The perim must be acquired before acquiring any other references 13848275SEric Cheng * to maintain the lock and perimeter hierarchy. Please note the 13858275SEric Cheng * FLOW_REFRELE above. 13868275SEric Cheng */ 13878275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 13888275SEric Cheng if (err != 0) 13898275SEric Cheng return (err); 13908275SEric Cheng 13918275SEric Cheng /* 13928275SEric Cheng * Note the second lookup of the flow, because a concurrent thread 13938275SEric Cheng * may have removed it already while we were waiting to enter the 13948275SEric Cheng * link's perimeter. 13958275SEric Cheng */ 13968275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 13978275SEric Cheng if (err != 0) { 13988275SEric Cheng mac_perim_exit(mph); 13998275SEric Cheng return (err); 14008275SEric Cheng } 14018275SEric Cheng FLOW_USER_REFRELE(flent); 14028275SEric Cheng 14038275SEric Cheng /* 14048275SEric Cheng * If this flow is attached to a MAC client, then pass the request 14058275SEric Cheng * along to the client. 14068275SEric Cheng * Otherwise, just update the cached values. 14078275SEric Cheng */ 14088275SEric Cheng mcip = flent->fe_mcip; 14098275SEric Cheng mac_update_resources(mrp, &flent->fe_resource_props, B_TRUE); 14108275SEric Cheng if (mcip != NULL) { 14118275SEric Cheng if ((flow_tab = mcip->mci_subflow_tab) == NULL) { 14128275SEric Cheng err = ENOENT; 14138275SEric Cheng } else { 14148275SEric Cheng mac_flow_modify(flow_tab, flent, mrp); 14158275SEric Cheng } 14168275SEric Cheng } else { 14178275SEric Cheng (void) mac_flow_modify_props(flent, mrp); 14188275SEric Cheng } 14198275SEric Cheng 14208275SEric Cheng done: 14218275SEric Cheng mac_perim_exit(mph); 14228275SEric Cheng return (err); 14238275SEric Cheng } 14248275SEric Cheng 14258275SEric Cheng 14268275SEric Cheng /* 14278275SEric Cheng * State structure and misc functions used by mac_link_flow_walk(). 14288275SEric Cheng */ 14298275SEric Cheng typedef struct { 14308275SEric Cheng int (*ws_func)(mac_flowinfo_t *, void *); 14318275SEric Cheng void *ws_arg; 14328275SEric Cheng } flow_walk_state_t; 14338275SEric Cheng 14348275SEric Cheng static void 14358275SEric Cheng mac_link_flowinfo_copy(mac_flowinfo_t *finfop, flow_entry_t *flent) 14368275SEric Cheng { 14378558SGirish.Moodalbail@Sun.COM (void) strlcpy(finfop->fi_flow_name, flent->fe_flow_name, 14388558SGirish.Moodalbail@Sun.COM MAXFLOWNAMELEN); 14398275SEric Cheng finfop->fi_link_id = flent->fe_link_id; 14408275SEric Cheng finfop->fi_flow_desc = flent->fe_flow_desc; 14418275SEric Cheng finfop->fi_resource_props = flent->fe_resource_props; 14428275SEric Cheng } 14438275SEric Cheng 14448275SEric Cheng static int 14458275SEric Cheng mac_link_flow_walk_cb(flow_entry_t *flent, void *arg) 14468275SEric Cheng { 14478275SEric Cheng flow_walk_state_t *statep = arg; 14488275SEric Cheng mac_flowinfo_t finfo; 14498275SEric Cheng 14508275SEric Cheng mac_link_flowinfo_copy(&finfo, flent); 14518275SEric Cheng return (statep->ws_func(&finfo, statep->ws_arg)); 14528275SEric Cheng } 14538275SEric Cheng 14548275SEric Cheng /* 14558275SEric Cheng * mac_link_flow_walk() 14568275SEric Cheng * Invokes callback 'func' for all flows belonging to the specified link. 14578275SEric Cheng */ 14588275SEric Cheng int 14598275SEric Cheng mac_link_flow_walk(datalink_id_t linkid, 14608275SEric Cheng int (*func)(mac_flowinfo_t *, void *), void *arg) 14618275SEric Cheng { 14628275SEric Cheng mac_client_impl_t *mcip; 14638275SEric Cheng mac_perim_handle_t mph; 14648275SEric Cheng flow_walk_state_t state; 14658275SEric Cheng dls_dl_handle_t dlh; 14668275SEric Cheng dls_link_t *dlp; 14678275SEric Cheng int err; 14688275SEric Cheng 14698275SEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 14708275SEric Cheng if (err != 0) 14718275SEric Cheng return (err); 14728275SEric Cheng 14738275SEric Cheng err = dls_devnet_hold_link(linkid, &dlh, &dlp); 14748275SEric Cheng if (err != 0) { 14758275SEric Cheng mac_perim_exit(mph); 14768275SEric Cheng return (err); 14778275SEric Cheng } 14788275SEric Cheng 14798275SEric Cheng mcip = (mac_client_impl_t *)dlp->dl_mch; 14808275SEric Cheng state.ws_func = func; 14818275SEric Cheng state.ws_arg = arg; 14828275SEric Cheng 14838275SEric Cheng err = mac_flow_walk_nolock(mcip->mci_subflow_tab, 14848275SEric Cheng mac_link_flow_walk_cb, &state); 14858275SEric Cheng 14868275SEric Cheng dls_devnet_rele_link(dlh, dlp); 14878275SEric Cheng mac_perim_exit(mph); 14888275SEric Cheng return (err); 14898275SEric Cheng } 14908275SEric Cheng 14918275SEric Cheng /* 14928275SEric Cheng * mac_link_flow_info() 14938275SEric Cheng * Retrieves information about a specific flow. 14948275SEric Cheng */ 14958275SEric Cheng int 14968275SEric Cheng mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo) 14978275SEric Cheng { 14988275SEric Cheng flow_entry_t *flent; 14998275SEric Cheng int err; 15008275SEric Cheng 15018275SEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 15028275SEric Cheng if (err != 0) 15038275SEric Cheng return (err); 15048275SEric Cheng 15058275SEric Cheng mac_link_flowinfo_copy(finfo, flent); 15068275SEric Cheng FLOW_USER_REFRELE(flent); 15078275SEric Cheng return (0); 15088275SEric Cheng } 15098275SEric Cheng 151010616SSebastien.Roy@Sun.COM /* 151110616SSebastien.Roy@Sun.COM * Hash function macro that takes an Ethernet address and VLAN id as input. 151210616SSebastien.Roy@Sun.COM */ 151310616SSebastien.Roy@Sun.COM #define HASH_ETHER_VID(a, v, s) \ 15148275SEric Cheng ((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s)) 15158275SEric Cheng 151610616SSebastien.Roy@Sun.COM /* 151710616SSebastien.Roy@Sun.COM * Generic layer-2 address hashing function that takes an address and address 151810616SSebastien.Roy@Sun.COM * length as input. This is the DJB hash function. 151910616SSebastien.Roy@Sun.COM */ 152010616SSebastien.Roy@Sun.COM static uint32_t 152110616SSebastien.Roy@Sun.COM flow_l2_addrhash(uint8_t *addr, size_t addrlen, size_t htsize) 152210616SSebastien.Roy@Sun.COM { 152310616SSebastien.Roy@Sun.COM uint32_t hash = 5381; 152410616SSebastien.Roy@Sun.COM size_t i; 152510616SSebastien.Roy@Sun.COM 152610616SSebastien.Roy@Sun.COM for (i = 0; i < addrlen; i++) 152710616SSebastien.Roy@Sun.COM hash = ((hash << 5) + hash) + addr[i]; 152810616SSebastien.Roy@Sun.COM return (hash % htsize); 152910616SSebastien.Roy@Sun.COM } 153010616SSebastien.Roy@Sun.COM 15318275SEric Cheng #define PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end)) 15328275SEric Cheng 15338833SVenu.Iyer@Sun.COM #define CHECK_AND_ADJUST_START_PTR(s, start) { \ 15348833SVenu.Iyer@Sun.COM if ((s)->fs_mp->b_wptr == (start)) { \ 15358833SVenu.Iyer@Sun.COM mblk_t *next = (s)->fs_mp->b_cont; \ 15368833SVenu.Iyer@Sun.COM if (next == NULL) \ 15378833SVenu.Iyer@Sun.COM return (EINVAL); \ 15388833SVenu.Iyer@Sun.COM \ 15398833SVenu.Iyer@Sun.COM (s)->fs_mp = next; \ 15408833SVenu.Iyer@Sun.COM (start) = next->b_rptr; \ 15418833SVenu.Iyer@Sun.COM } \ 15428833SVenu.Iyer@Sun.COM } 15438833SVenu.Iyer@Sun.COM 15448275SEric Cheng /* ARGSUSED */ 15458275SEric Cheng static boolean_t 15468275SEric Cheng flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 15478275SEric Cheng { 15488275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 15498275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 15508275SEric Cheng 15518275SEric Cheng return (l2->l2_vid == fd->fd_vid && 15528275SEric Cheng bcmp(l2->l2_daddr, fd->fd_dst_mac, fd->fd_mac_len) == 0); 15538275SEric Cheng } 15548275SEric Cheng 15558275SEric Cheng /* 15568275SEric Cheng * Layer 2 hash function. 15578275SEric Cheng * Must be paired with flow_l2_accept() within a set of flow_ops 15588275SEric Cheng * because it assumes the dest address is already extracted. 15598275SEric Cheng */ 15608275SEric Cheng static uint32_t 15618275SEric Cheng flow_l2_hash(flow_tab_t *ft, flow_state_t *s) 15628275SEric Cheng { 156310616SSebastien.Roy@Sun.COM return (flow_l2_addrhash(s->fs_l2info.l2_daddr, 156410616SSebastien.Roy@Sun.COM ft->ft_mip->mi_type->mt_addr_length, ft->ft_size)); 15658275SEric Cheng } 15668275SEric Cheng 15678275SEric Cheng /* 15688275SEric Cheng * This is the generic layer 2 accept function. 15698275SEric Cheng * It makes use of mac_header_info() to extract the header length, 15708275SEric Cheng * sap, vlan ID and destination address. 15718275SEric Cheng */ 15728275SEric Cheng static int 15738275SEric Cheng flow_l2_accept(flow_tab_t *ft, flow_state_t *s) 15748275SEric Cheng { 15758275SEric Cheng boolean_t is_ether; 15768275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 15778275SEric Cheng mac_header_info_t mhi; 15788275SEric Cheng int err; 15798275SEric Cheng 15808275SEric Cheng is_ether = (ft->ft_mip->mi_info.mi_nativemedia == DL_ETHER); 15818275SEric Cheng if ((err = mac_header_info((mac_handle_t)ft->ft_mip, 15828275SEric Cheng s->fs_mp, &mhi)) != 0) { 15838275SEric Cheng if (err == EINVAL) 15848275SEric Cheng err = ENOBUFS; 15858275SEric Cheng 15868275SEric Cheng return (err); 15878275SEric Cheng } 15888275SEric Cheng 15898275SEric Cheng l2->l2_start = s->fs_mp->b_rptr; 15908275SEric Cheng l2->l2_daddr = (uint8_t *)mhi.mhi_daddr; 15918275SEric Cheng 15928275SEric Cheng if (is_ether && mhi.mhi_bindsap == ETHERTYPE_VLAN && 15938275SEric Cheng ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) { 15948275SEric Cheng struct ether_vlan_header *evhp = 15958275SEric Cheng (struct ether_vlan_header *)l2->l2_start; 15968275SEric Cheng 15978275SEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp))) 15988275SEric Cheng return (ENOBUFS); 15998275SEric Cheng 16008275SEric Cheng l2->l2_sap = ntohs(evhp->ether_type); 16018275SEric Cheng l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci)); 16028275SEric Cheng l2->l2_hdrsize = sizeof (*evhp); 16038275SEric Cheng } else { 16048275SEric Cheng l2->l2_sap = mhi.mhi_bindsap; 16058275SEric Cheng l2->l2_vid = 0; 16068275SEric Cheng l2->l2_hdrsize = (uint32_t)mhi.mhi_hdrsize; 16078275SEric Cheng } 16088275SEric Cheng return (0); 16098275SEric Cheng } 16108275SEric Cheng 16118275SEric Cheng /* 16128275SEric Cheng * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/ 16138275SEric Cheng * accept(). The notable difference is that dest address is now extracted 16148275SEric Cheng * by hash() rather than by accept(). This saves a few memory references 16158275SEric Cheng * for flow tables that do not care about mac addresses. 16168275SEric Cheng */ 16178275SEric Cheng static uint32_t 16188275SEric Cheng flow_ether_hash(flow_tab_t *ft, flow_state_t *s) 16198275SEric Cheng { 16208275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 16218275SEric Cheng struct ether_vlan_header *evhp; 16228275SEric Cheng 16238275SEric Cheng evhp = (struct ether_vlan_header *)l2->l2_start; 16248275SEric Cheng l2->l2_daddr = evhp->ether_dhost.ether_addr_octet; 162510616SSebastien.Roy@Sun.COM return (HASH_ETHER_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size)); 162610616SSebastien.Roy@Sun.COM } 162710616SSebastien.Roy@Sun.COM 162810616SSebastien.Roy@Sun.COM static uint32_t 162910616SSebastien.Roy@Sun.COM flow_ether_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 163010616SSebastien.Roy@Sun.COM { 163110616SSebastien.Roy@Sun.COM flow_desc_t *fd = &flent->fe_flow_desc; 163210616SSebastien.Roy@Sun.COM 163310616SSebastien.Roy@Sun.COM ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0); 163410616SSebastien.Roy@Sun.COM return (HASH_ETHER_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size)); 16358275SEric Cheng } 16368275SEric Cheng 16378275SEric Cheng /* ARGSUSED */ 16388275SEric Cheng static int 16398275SEric Cheng flow_ether_accept(flow_tab_t *ft, flow_state_t *s) 16408275SEric Cheng { 16418275SEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 16428275SEric Cheng struct ether_vlan_header *evhp; 16438275SEric Cheng uint16_t sap; 16448275SEric Cheng 16458275SEric Cheng evhp = (struct ether_vlan_header *)s->fs_mp->b_rptr; 16468275SEric Cheng l2->l2_start = (uchar_t *)evhp; 16478275SEric Cheng 16488275SEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (struct ether_header))) 16498275SEric Cheng return (ENOBUFS); 16508275SEric Cheng 16518275SEric Cheng if ((sap = ntohs(evhp->ether_tpid)) == ETHERTYPE_VLAN && 16528275SEric Cheng ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) { 16538275SEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp))) 16548275SEric Cheng return (ENOBUFS); 16558275SEric Cheng 16568275SEric Cheng l2->l2_sap = ntohs(evhp->ether_type); 16578275SEric Cheng l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci)); 16588275SEric Cheng l2->l2_hdrsize = sizeof (struct ether_vlan_header); 16598275SEric Cheng } else { 16608275SEric Cheng l2->l2_sap = sap; 16618275SEric Cheng l2->l2_vid = 0; 16628275SEric Cheng l2->l2_hdrsize = sizeof (struct ether_header); 16638275SEric Cheng } 16648275SEric Cheng return (0); 16658275SEric Cheng } 16668275SEric Cheng 16678275SEric Cheng /* 16688275SEric Cheng * Validates a layer 2 flow entry. 16698275SEric Cheng */ 16708275SEric Cheng static int 16718275SEric Cheng flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 16728275SEric Cheng { 16738275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 16748275SEric Cheng 16758275SEric Cheng /* 167610616SSebastien.Roy@Sun.COM * Dest address is mandatory, and 0 length addresses are not yet 167710616SSebastien.Roy@Sun.COM * supported. 16788275SEric Cheng */ 167910616SSebastien.Roy@Sun.COM if ((fd->fd_mask & FLOW_LINK_DST) == 0 || fd->fd_mac_len == 0) 16808275SEric Cheng return (EINVAL); 16818275SEric Cheng 16828275SEric Cheng if ((fd->fd_mask & FLOW_LINK_VID) != 0) { 16838275SEric Cheng /* 16848275SEric Cheng * VLAN flows are only supported over ethernet macs. 16858275SEric Cheng */ 16868275SEric Cheng if (ft->ft_mip->mi_info.mi_nativemedia != DL_ETHER) 16878275SEric Cheng return (EINVAL); 16888275SEric Cheng 16898275SEric Cheng if (fd->fd_vid == 0) 16908275SEric Cheng return (EINVAL); 16918275SEric Cheng 16928275SEric Cheng } 16938275SEric Cheng flent->fe_match = flow_l2_match; 16948275SEric Cheng return (0); 16958275SEric Cheng } 16968275SEric Cheng 16978275SEric Cheng /* 16988275SEric Cheng * Calculates hash index of flow entry. 16998275SEric Cheng */ 17008275SEric Cheng static uint32_t 17018275SEric Cheng flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 17028275SEric Cheng { 17038275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17048275SEric Cheng 170510616SSebastien.Roy@Sun.COM ASSERT((fd->fd_mask & FLOW_LINK_VID) == 0 && fd->fd_vid == 0); 170610616SSebastien.Roy@Sun.COM return (flow_l2_addrhash(fd->fd_dst_mac, 170710616SSebastien.Roy@Sun.COM ft->ft_mip->mi_type->mt_addr_length, ft->ft_size)); 17088275SEric Cheng } 17098275SEric Cheng 17108275SEric Cheng /* 17118275SEric Cheng * This is used for duplicate flow checking. 17128275SEric Cheng */ 17138275SEric Cheng /* ARGSUSED */ 17148275SEric Cheng static boolean_t 17158275SEric Cheng flow_l2_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 17168275SEric Cheng { 17178275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 17188275SEric Cheng 17198275SEric Cheng ASSERT(fd1->fd_mac_len == fd2->fd_mac_len && fd1->fd_mac_len != 0); 17208275SEric Cheng return (bcmp(&fd1->fd_dst_mac, &fd2->fd_dst_mac, 17218275SEric Cheng fd1->fd_mac_len) == 0 && fd1->fd_vid == fd2->fd_vid); 17228275SEric Cheng } 17238275SEric Cheng 17248275SEric Cheng /* 17258275SEric Cheng * Generic flow entry insertion function. 17268275SEric Cheng * Used by flow tables that do not have ordering requirements. 17278275SEric Cheng */ 17288275SEric Cheng /* ARGSUSED */ 17298275SEric Cheng static int 17308275SEric Cheng flow_generic_insert_fe(flow_tab_t *ft, flow_entry_t **headp, 17318275SEric Cheng flow_entry_t *flent) 17328275SEric Cheng { 17338275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 17348275SEric Cheng 17358275SEric Cheng if (*headp != NULL) { 17368275SEric Cheng ASSERT(flent->fe_next == NULL); 17378275SEric Cheng flent->fe_next = *headp; 17388275SEric Cheng } 17398275SEric Cheng *headp = flent; 17408275SEric Cheng return (0); 17418275SEric Cheng } 17428275SEric Cheng 17438275SEric Cheng /* 17448275SEric Cheng * IP version independent DSField matching function. 17458275SEric Cheng */ 17468275SEric Cheng /* ARGSUSED */ 17478275SEric Cheng static boolean_t 17488275SEric Cheng flow_ip_dsfield_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 17498275SEric Cheng { 17508275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 17518275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17528275SEric Cheng 17538275SEric Cheng switch (l3info->l3_version) { 17548275SEric Cheng case IPV4_VERSION: { 17558275SEric Cheng ipha_t *ipha = (ipha_t *)l3info->l3_start; 17568275SEric Cheng 17578275SEric Cheng return ((ipha->ipha_type_of_service & 17588275SEric Cheng fd->fd_dsfield_mask) == fd->fd_dsfield); 17598275SEric Cheng } 17608275SEric Cheng case IPV6_VERSION: { 17618275SEric Cheng ip6_t *ip6h = (ip6_t *)l3info->l3_start; 17628275SEric Cheng 17638275SEric Cheng return ((IPV6_FLOW_TCLASS(ip6h->ip6_vcf) & 17648275SEric Cheng fd->fd_dsfield_mask) == fd->fd_dsfield); 17658275SEric Cheng } 17668275SEric Cheng default: 17678275SEric Cheng return (B_FALSE); 17688275SEric Cheng } 17698275SEric Cheng } 17708275SEric Cheng 17718275SEric Cheng /* 17728275SEric Cheng * IP v4 and v6 address matching. 17738275SEric Cheng * The netmask only needs to be applied on the packet but not on the 17748275SEric Cheng * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets. 17758275SEric Cheng */ 17768275SEric Cheng 17778275SEric Cheng /* ARGSUSED */ 17788275SEric Cheng static boolean_t 17798275SEric Cheng flow_ip_v4_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 17808275SEric Cheng { 17818275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 17828275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 17838275SEric Cheng ipha_t *ipha = (ipha_t *)l3info->l3_start; 17848275SEric Cheng in_addr_t addr; 17858275SEric Cheng 17868275SEric Cheng addr = (l3info->l3_dst_or_src ? ipha->ipha_dst : ipha->ipha_src); 17878275SEric Cheng if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) { 17888275SEric Cheng return ((addr & V4_PART_OF_V6(fd->fd_local_netmask)) == 17898275SEric Cheng V4_PART_OF_V6(fd->fd_local_addr)); 17908275SEric Cheng } 17918275SEric Cheng return ((addr & V4_PART_OF_V6(fd->fd_remote_netmask)) == 17928275SEric Cheng V4_PART_OF_V6(fd->fd_remote_addr)); 17938275SEric Cheng } 17948275SEric Cheng 17958275SEric Cheng /* ARGSUSED */ 17968275SEric Cheng static boolean_t 17978275SEric Cheng flow_ip_v6_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 17988275SEric Cheng { 17998275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18008275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 18018275SEric Cheng ip6_t *ip6h = (ip6_t *)l3info->l3_start; 18028275SEric Cheng in6_addr_t *addrp; 18038275SEric Cheng 18048275SEric Cheng addrp = (l3info->l3_dst_or_src ? &ip6h->ip6_dst : &ip6h->ip6_src); 18058275SEric Cheng if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) { 18068275SEric Cheng return (V6_MASK_EQ(*addrp, fd->fd_local_netmask, 18078275SEric Cheng fd->fd_local_addr)); 18088275SEric Cheng } 18098275SEric Cheng return (V6_MASK_EQ(*addrp, fd->fd_remote_netmask, fd->fd_remote_addr)); 18108275SEric Cheng } 18118275SEric Cheng 18128275SEric Cheng /* ARGSUSED */ 18138275SEric Cheng static boolean_t 18148275SEric Cheng flow_ip_proto_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 18158275SEric Cheng { 18168275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18178275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 18188275SEric Cheng 18198275SEric Cheng return (l3info->l3_protocol == fd->fd_protocol); 18208275SEric Cheng } 18218275SEric Cheng 18228275SEric Cheng static uint32_t 18238275SEric Cheng flow_ip_hash(flow_tab_t *ft, flow_state_t *s) 18248275SEric Cheng { 18258275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18268275SEric Cheng flow_mask_t mask = ft->ft_mask; 18278275SEric Cheng 18288275SEric Cheng if ((mask & FLOW_IP_LOCAL) != 0) { 18298275SEric Cheng l3info->l3_dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0); 18308275SEric Cheng } else if ((mask & FLOW_IP_REMOTE) != 0) { 18318275SEric Cheng l3info->l3_dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0); 18328275SEric Cheng } else if ((mask & FLOW_IP_DSFIELD) != 0) { 18338275SEric Cheng /* 18348275SEric Cheng * DSField flents are arranged as a single list. 18358275SEric Cheng */ 18368275SEric Cheng return (0); 18378275SEric Cheng } 18388275SEric Cheng /* 18398275SEric Cheng * IP addr flents are hashed into two lists, v4 or v6. 18408275SEric Cheng */ 18418275SEric Cheng ASSERT(ft->ft_size >= 2); 18428275SEric Cheng return ((l3info->l3_version == IPV4_VERSION) ? 0 : 1); 18438275SEric Cheng } 18448275SEric Cheng 18458275SEric Cheng static uint32_t 18468275SEric Cheng flow_ip_proto_hash(flow_tab_t *ft, flow_state_t *s) 18478275SEric Cheng { 18488275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18498275SEric Cheng 18508275SEric Cheng return (l3info->l3_protocol % ft->ft_size); 18518275SEric Cheng } 18528275SEric Cheng 18538275SEric Cheng /* ARGSUSED */ 18548275SEric Cheng static int 18558275SEric Cheng flow_ip_accept(flow_tab_t *ft, flow_state_t *s) 18568275SEric Cheng { 18578275SEric Cheng flow_l2info_t *l2info = &s->fs_l2info; 18588275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 18598275SEric Cheng uint16_t sap = l2info->l2_sap; 18608275SEric Cheng uchar_t *l3_start; 18618275SEric Cheng 18628833SVenu.Iyer@Sun.COM l3_start = l2info->l2_start + l2info->l2_hdrsize; 18638833SVenu.Iyer@Sun.COM 18648833SVenu.Iyer@Sun.COM /* 18658833SVenu.Iyer@Sun.COM * Adjust start pointer if we're at the end of an mblk. 18668833SVenu.Iyer@Sun.COM */ 18678833SVenu.Iyer@Sun.COM CHECK_AND_ADJUST_START_PTR(s, l3_start); 18688833SVenu.Iyer@Sun.COM 18698833SVenu.Iyer@Sun.COM l3info->l3_start = l3_start; 18708275SEric Cheng if (!OK_32PTR(l3_start)) 18718275SEric Cheng return (EINVAL); 18728275SEric Cheng 18738275SEric Cheng switch (sap) { 18748275SEric Cheng case ETHERTYPE_IP: { 18758275SEric Cheng ipha_t *ipha = (ipha_t *)l3_start; 18768275SEric Cheng 18778275SEric Cheng if (PKT_TOO_SMALL(s, l3_start + IP_SIMPLE_HDR_LENGTH)) 18788275SEric Cheng return (ENOBUFS); 18798275SEric Cheng 18808275SEric Cheng l3info->l3_hdrsize = IPH_HDR_LENGTH(ipha); 18818275SEric Cheng l3info->l3_protocol = ipha->ipha_protocol; 18828275SEric Cheng l3info->l3_version = IPV4_VERSION; 18838275SEric Cheng l3info->l3_fragmented = 18848275SEric Cheng IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags); 18858275SEric Cheng break; 18868275SEric Cheng } 18878275SEric Cheng case ETHERTYPE_IPV6: { 18888275SEric Cheng ip6_t *ip6h = (ip6_t *)l3_start; 18898275SEric Cheng uint16_t ip6_hdrlen; 18908275SEric Cheng uint8_t nexthdr; 18918275SEric Cheng 18928275SEric Cheng if (!mac_ip_hdr_length_v6(s->fs_mp, ip6h, &ip6_hdrlen, 1893*11528SBaban.Kenkre@Sun.COM &nexthdr, NULL, NULL)) { 18948275SEric Cheng return (ENOBUFS); 18958275SEric Cheng } 18968275SEric Cheng l3info->l3_hdrsize = ip6_hdrlen; 18978275SEric Cheng l3info->l3_protocol = nexthdr; 18988275SEric Cheng l3info->l3_version = IPV6_VERSION; 18998275SEric Cheng l3info->l3_fragmented = B_FALSE; 19008275SEric Cheng break; 19018275SEric Cheng } 19028275SEric Cheng default: 19038275SEric Cheng return (EINVAL); 19048275SEric Cheng } 19058275SEric Cheng return (0); 19068275SEric Cheng } 19078275SEric Cheng 19088275SEric Cheng /* ARGSUSED */ 19098275SEric Cheng static int 19108275SEric Cheng flow_ip_proto_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 19118275SEric Cheng { 19128275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19138275SEric Cheng 19148275SEric Cheng switch (fd->fd_protocol) { 19158275SEric Cheng case IPPROTO_TCP: 19168275SEric Cheng case IPPROTO_UDP: 19178275SEric Cheng case IPPROTO_SCTP: 19188275SEric Cheng case IPPROTO_ICMP: 19198275SEric Cheng case IPPROTO_ICMPV6: 19208275SEric Cheng flent->fe_match = flow_ip_proto_match; 19218275SEric Cheng return (0); 19228275SEric Cheng default: 19238275SEric Cheng return (EINVAL); 19248275SEric Cheng } 19258275SEric Cheng } 19268275SEric Cheng 19278275SEric Cheng /* ARGSUSED */ 19288275SEric Cheng static int 19298275SEric Cheng flow_ip_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 19308275SEric Cheng { 19318275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19328275SEric Cheng flow_mask_t mask; 19338275SEric Cheng uint8_t version; 19348275SEric Cheng in6_addr_t *addr, *netmask; 19358275SEric Cheng 19368275SEric Cheng /* 19378275SEric Cheng * DSField does not require a IP version. 19388275SEric Cheng */ 19398275SEric Cheng if (fd->fd_mask == FLOW_IP_DSFIELD) { 19408275SEric Cheng if (fd->fd_dsfield_mask == 0) 19418275SEric Cheng return (EINVAL); 19428275SEric Cheng 19438275SEric Cheng flent->fe_match = flow_ip_dsfield_match; 19448275SEric Cheng return (0); 19458275SEric Cheng } 19468275SEric Cheng 19478275SEric Cheng /* 19488275SEric Cheng * IP addresses must come with a version to avoid ambiguity. 19498275SEric Cheng */ 19508275SEric Cheng if ((fd->fd_mask & FLOW_IP_VERSION) == 0) 19518275SEric Cheng return (EINVAL); 19528275SEric Cheng 19538275SEric Cheng version = fd->fd_ipversion; 19548275SEric Cheng if (version != IPV4_VERSION && version != IPV6_VERSION) 19558275SEric Cheng return (EINVAL); 19568275SEric Cheng 19578275SEric Cheng mask = fd->fd_mask & ~FLOW_IP_VERSION; 19588275SEric Cheng switch (mask) { 19598275SEric Cheng case FLOW_IP_LOCAL: 19608275SEric Cheng addr = &fd->fd_local_addr; 19618275SEric Cheng netmask = &fd->fd_local_netmask; 19628275SEric Cheng break; 19638275SEric Cheng case FLOW_IP_REMOTE: 19648275SEric Cheng addr = &fd->fd_remote_addr; 19658275SEric Cheng netmask = &fd->fd_remote_netmask; 19668275SEric Cheng break; 19678275SEric Cheng default: 19688275SEric Cheng return (EINVAL); 19698275SEric Cheng } 19708275SEric Cheng 19718275SEric Cheng /* 19728275SEric Cheng * Apply netmask onto specified address. 19738275SEric Cheng */ 19748275SEric Cheng V6_MASK_COPY(*addr, *netmask, *addr); 19758275SEric Cheng if (version == IPV4_VERSION) { 19768275SEric Cheng ipaddr_t v4addr = V4_PART_OF_V6((*addr)); 19778275SEric Cheng ipaddr_t v4mask = V4_PART_OF_V6((*netmask)); 19788275SEric Cheng 19798275SEric Cheng if (v4addr == 0 || v4mask == 0) 19808275SEric Cheng return (EINVAL); 19818275SEric Cheng flent->fe_match = flow_ip_v4_match; 19828275SEric Cheng } else { 19838275SEric Cheng if (IN6_IS_ADDR_UNSPECIFIED(addr) || 19848275SEric Cheng IN6_IS_ADDR_UNSPECIFIED(netmask)) 19858275SEric Cheng return (EINVAL); 19868275SEric Cheng flent->fe_match = flow_ip_v6_match; 19878275SEric Cheng } 19888275SEric Cheng return (0); 19898275SEric Cheng } 19908275SEric Cheng 19918275SEric Cheng static uint32_t 19928275SEric Cheng flow_ip_proto_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 19938275SEric Cheng { 19948275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 19958275SEric Cheng 19968275SEric Cheng return (fd->fd_protocol % ft->ft_size); 19978275SEric Cheng } 19988275SEric Cheng 19998275SEric Cheng static uint32_t 20008275SEric Cheng flow_ip_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 20018275SEric Cheng { 20028275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 20038275SEric Cheng 20048275SEric Cheng /* 20058275SEric Cheng * DSField flents are arranged as a single list. 20068275SEric Cheng */ 20078275SEric Cheng if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0) 20088275SEric Cheng return (0); 20098275SEric Cheng 20108275SEric Cheng /* 20118275SEric Cheng * IP addr flents are hashed into two lists, v4 or v6. 20128275SEric Cheng */ 20138275SEric Cheng ASSERT(ft->ft_size >= 2); 20148275SEric Cheng return ((fd->fd_ipversion == IPV4_VERSION) ? 0 : 1); 20158275SEric Cheng } 20168275SEric Cheng 20178275SEric Cheng /* ARGSUSED */ 20188275SEric Cheng static boolean_t 20198275SEric Cheng flow_ip_proto_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 20208275SEric Cheng { 20218275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 20228275SEric Cheng 20238275SEric Cheng return (fd1->fd_protocol == fd2->fd_protocol); 20248275SEric Cheng } 20258275SEric Cheng 20268275SEric Cheng /* ARGSUSED */ 20278275SEric Cheng static boolean_t 20288275SEric Cheng flow_ip_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 20298275SEric Cheng { 20308275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 20318275SEric Cheng in6_addr_t *a1, *m1, *a2, *m2; 20328275SEric Cheng 20338275SEric Cheng ASSERT(fd1->fd_mask == fd2->fd_mask); 20348275SEric Cheng if (fd1->fd_mask == FLOW_IP_DSFIELD) { 20358275SEric Cheng return (fd1->fd_dsfield == fd2->fd_dsfield && 20368275SEric Cheng fd1->fd_dsfield_mask == fd2->fd_dsfield_mask); 20378275SEric Cheng } 20388275SEric Cheng 20398275SEric Cheng /* 20408275SEric Cheng * flow_ip_accept_fe() already validated the version. 20418275SEric Cheng */ 20428275SEric Cheng ASSERT((fd1->fd_mask & FLOW_IP_VERSION) != 0); 20438275SEric Cheng if (fd1->fd_ipversion != fd2->fd_ipversion) 20448275SEric Cheng return (B_FALSE); 20458275SEric Cheng 20468275SEric Cheng switch (fd1->fd_mask & ~FLOW_IP_VERSION) { 20478275SEric Cheng case FLOW_IP_LOCAL: 20488275SEric Cheng a1 = &fd1->fd_local_addr; 20498275SEric Cheng m1 = &fd1->fd_local_netmask; 20508275SEric Cheng a2 = &fd2->fd_local_addr; 20518275SEric Cheng m2 = &fd2->fd_local_netmask; 20528275SEric Cheng break; 20538275SEric Cheng case FLOW_IP_REMOTE: 20548275SEric Cheng a1 = &fd1->fd_remote_addr; 20558275SEric Cheng m1 = &fd1->fd_remote_netmask; 20568275SEric Cheng a2 = &fd2->fd_remote_addr; 20578275SEric Cheng m2 = &fd2->fd_remote_netmask; 20588275SEric Cheng break; 20598275SEric Cheng default: 20608275SEric Cheng /* 20618275SEric Cheng * This is unreachable given the checks in 20628275SEric Cheng * flow_ip_accept_fe(). 20638275SEric Cheng */ 20648275SEric Cheng return (B_FALSE); 20658275SEric Cheng } 20668275SEric Cheng 20678275SEric Cheng if (fd1->fd_ipversion == IPV4_VERSION) { 20688275SEric Cheng return (V4_PART_OF_V6((*a1)) == V4_PART_OF_V6((*a2)) && 20698275SEric Cheng V4_PART_OF_V6((*m1)) == V4_PART_OF_V6((*m2))); 20708275SEric Cheng 20718275SEric Cheng } else { 20728275SEric Cheng return (IN6_ARE_ADDR_EQUAL(a1, a2) && 20738275SEric Cheng IN6_ARE_ADDR_EQUAL(m1, m2)); 20748275SEric Cheng } 20758275SEric Cheng } 20768275SEric Cheng 20778275SEric Cheng static int 20788275SEric Cheng flow_ip_mask2plen(in6_addr_t *v6mask) 20798275SEric Cheng { 20808275SEric Cheng int bits; 20818275SEric Cheng int plen = IPV6_ABITS; 20828275SEric Cheng int i; 20838275SEric Cheng 20848275SEric Cheng for (i = 3; i >= 0; i--) { 20858275SEric Cheng if (v6mask->s6_addr32[i] == 0) { 20868275SEric Cheng plen -= 32; 20878275SEric Cheng continue; 20888275SEric Cheng } 20898275SEric Cheng bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 20908275SEric Cheng if (bits == 0) 20918275SEric Cheng break; 20928275SEric Cheng plen -= bits; 20938275SEric Cheng } 20948275SEric Cheng return (plen); 20958275SEric Cheng } 20968275SEric Cheng 20978275SEric Cheng /* ARGSUSED */ 20988275SEric Cheng static int 20998275SEric Cheng flow_ip_insert_fe(flow_tab_t *ft, flow_entry_t **headp, 21008275SEric Cheng flow_entry_t *flent) 21018275SEric Cheng { 21028275SEric Cheng flow_entry_t **p = headp; 21038275SEric Cheng flow_desc_t *fd0, *fd; 21048275SEric Cheng in6_addr_t *m0, *m; 21058275SEric Cheng int plen0, plen; 21068275SEric Cheng 21078275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 21088275SEric Cheng 21098275SEric Cheng /* 21108275SEric Cheng * No special ordering needed for dsfield. 21118275SEric Cheng */ 21128275SEric Cheng fd0 = &flent->fe_flow_desc; 21138275SEric Cheng if ((fd0->fd_mask & FLOW_IP_DSFIELD) != 0) { 21148275SEric Cheng if (*p != NULL) { 21158275SEric Cheng ASSERT(flent->fe_next == NULL); 21168275SEric Cheng flent->fe_next = *p; 21178275SEric Cheng } 21188275SEric Cheng *p = flent; 21198275SEric Cheng return (0); 21208275SEric Cheng } 21218275SEric Cheng 21228275SEric Cheng /* 21238275SEric Cheng * IP address flows are arranged in descending prefix length order. 21248275SEric Cheng */ 21258275SEric Cheng m0 = ((fd0->fd_mask & FLOW_IP_LOCAL) != 0) ? 21268275SEric Cheng &fd0->fd_local_netmask : &fd0->fd_remote_netmask; 21278275SEric Cheng plen0 = flow_ip_mask2plen(m0); 21288275SEric Cheng ASSERT(plen0 != 0); 21298275SEric Cheng 21308275SEric Cheng for (; *p != NULL; p = &(*p)->fe_next) { 21318275SEric Cheng fd = &(*p)->fe_flow_desc; 21328275SEric Cheng 21338275SEric Cheng /* 21348275SEric Cheng * Normally a dsfield flent shouldn't end up on the same 21358275SEric Cheng * list as an IP address because flow tables are (for now) 21368275SEric Cheng * disjoint. If we decide to support both IP and dsfield 21378275SEric Cheng * in the same table in the future, this check will allow 21388275SEric Cheng * for that. 21398275SEric Cheng */ 21408275SEric Cheng if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0) 21418275SEric Cheng continue; 21428275SEric Cheng 21438275SEric Cheng /* 21448275SEric Cheng * We also allow for the mixing of local and remote address 21458275SEric Cheng * flents within one list. 21468275SEric Cheng */ 21478275SEric Cheng m = ((fd->fd_mask & FLOW_IP_LOCAL) != 0) ? 21488275SEric Cheng &fd->fd_local_netmask : &fd->fd_remote_netmask; 21498275SEric Cheng plen = flow_ip_mask2plen(m); 21508275SEric Cheng 21518275SEric Cheng if (plen <= plen0) 21528275SEric Cheng break; 21538275SEric Cheng } 21548275SEric Cheng if (*p != NULL) { 21558275SEric Cheng ASSERT(flent->fe_next == NULL); 21568275SEric Cheng flent->fe_next = *p; 21578275SEric Cheng } 21588275SEric Cheng *p = flent; 21598275SEric Cheng return (0); 21608275SEric Cheng } 21618275SEric Cheng 21628275SEric Cheng /* 21638275SEric Cheng * Transport layer protocol and port matching functions. 21648275SEric Cheng */ 21658275SEric Cheng 21668275SEric Cheng /* ARGSUSED */ 21678275SEric Cheng static boolean_t 21688275SEric Cheng flow_transport_lport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 21698275SEric Cheng { 21708275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 21718275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 21728275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 21738275SEric Cheng 21748275SEric Cheng return (fd->fd_protocol == l3info->l3_protocol && 21758275SEric Cheng fd->fd_local_port == l4info->l4_hash_port); 21768275SEric Cheng } 21778275SEric Cheng 21788275SEric Cheng /* ARGSUSED */ 21798275SEric Cheng static boolean_t 21808275SEric Cheng flow_transport_rport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 21818275SEric Cheng { 21828275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 21838275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 21848275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 21858275SEric Cheng 21868275SEric Cheng return (fd->fd_protocol == l3info->l3_protocol && 21878275SEric Cheng fd->fd_remote_port == l4info->l4_hash_port); 21888275SEric Cheng } 21898275SEric Cheng 21908275SEric Cheng /* 21918275SEric Cheng * Transport hash function. 21928275SEric Cheng * Since we only support either local or remote port flows, 21938275SEric Cheng * we only need to extract one of the ports to be used for 21948275SEric Cheng * matching. 21958275SEric Cheng */ 21968275SEric Cheng static uint32_t 21978275SEric Cheng flow_transport_hash(flow_tab_t *ft, flow_state_t *s) 21988275SEric Cheng { 21998275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 22008275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 22018275SEric Cheng uint8_t proto = l3info->l3_protocol; 22028275SEric Cheng boolean_t dst_or_src; 22038275SEric Cheng 22048275SEric Cheng if ((ft->ft_mask & FLOW_ULP_PORT_LOCAL) != 0) { 22058275SEric Cheng dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0); 22068275SEric Cheng } else { 22078275SEric Cheng dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0); 22088275SEric Cheng } 22098275SEric Cheng 22108275SEric Cheng l4info->l4_hash_port = dst_or_src ? l4info->l4_dst_port : 22118275SEric Cheng l4info->l4_src_port; 22128275SEric Cheng 22138275SEric Cheng return ((l4info->l4_hash_port ^ (proto << 4)) % ft->ft_size); 22148275SEric Cheng } 22158275SEric Cheng 22168275SEric Cheng /* 22178275SEric Cheng * Unlike other accept() functions above, we do not need to get the header 22188275SEric Cheng * size because this is our highest layer so far. If we want to do support 22198275SEric Cheng * other higher layer protocols, we would need to save the l4_hdrsize 22208275SEric Cheng * in the code below. 22218275SEric Cheng */ 22228275SEric Cheng 22238275SEric Cheng /* ARGSUSED */ 22248275SEric Cheng static int 22258275SEric Cheng flow_transport_accept(flow_tab_t *ft, flow_state_t *s) 22268275SEric Cheng { 22278275SEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 22288275SEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 22298275SEric Cheng uint8_t proto = l3info->l3_protocol; 22308275SEric Cheng uchar_t *l4_start; 22318275SEric Cheng 22328833SVenu.Iyer@Sun.COM l4_start = l3info->l3_start + l3info->l3_hdrsize; 22338833SVenu.Iyer@Sun.COM 22348833SVenu.Iyer@Sun.COM /* 22358833SVenu.Iyer@Sun.COM * Adjust start pointer if we're at the end of an mblk. 22368833SVenu.Iyer@Sun.COM */ 22378833SVenu.Iyer@Sun.COM CHECK_AND_ADJUST_START_PTR(s, l4_start); 22388833SVenu.Iyer@Sun.COM 22398833SVenu.Iyer@Sun.COM l4info->l4_start = l4_start; 22408275SEric Cheng if (!OK_32PTR(l4_start)) 22418275SEric Cheng return (EINVAL); 22428275SEric Cheng 22438275SEric Cheng if (l3info->l3_fragmented == B_TRUE) 22448275SEric Cheng return (EINVAL); 22458275SEric Cheng 22468275SEric Cheng switch (proto) { 22478275SEric Cheng case IPPROTO_TCP: { 22488275SEric Cheng struct tcphdr *tcph = (struct tcphdr *)l4_start; 22498275SEric Cheng 22508275SEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*tcph))) 22518275SEric Cheng return (ENOBUFS); 22528275SEric Cheng 22538275SEric Cheng l4info->l4_src_port = tcph->th_sport; 22548275SEric Cheng l4info->l4_dst_port = tcph->th_dport; 22558275SEric Cheng break; 22568275SEric Cheng } 22578275SEric Cheng case IPPROTO_UDP: { 22588275SEric Cheng struct udphdr *udph = (struct udphdr *)l4_start; 22598275SEric Cheng 22608275SEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*udph))) 22618275SEric Cheng return (ENOBUFS); 22628275SEric Cheng 22638275SEric Cheng l4info->l4_src_port = udph->uh_sport; 22648275SEric Cheng l4info->l4_dst_port = udph->uh_dport; 22658275SEric Cheng break; 22668275SEric Cheng } 22678275SEric Cheng case IPPROTO_SCTP: { 22688275SEric Cheng sctp_hdr_t *sctph = (sctp_hdr_t *)l4_start; 22698275SEric Cheng 22708275SEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*sctph))) 22718275SEric Cheng return (ENOBUFS); 22728275SEric Cheng 22738275SEric Cheng l4info->l4_src_port = sctph->sh_sport; 22748275SEric Cheng l4info->l4_dst_port = sctph->sh_dport; 22758275SEric Cheng break; 22768275SEric Cheng } 22778275SEric Cheng default: 22788275SEric Cheng return (EINVAL); 22798275SEric Cheng } 22808275SEric Cheng 22818275SEric Cheng return (0); 22828275SEric Cheng } 22838275SEric Cheng 22848275SEric Cheng /* 22858275SEric Cheng * Validates transport flow entry. 22868275SEric Cheng * The protocol field must be present. 22878275SEric Cheng */ 22888275SEric Cheng 22898275SEric Cheng /* ARGSUSED */ 22908275SEric Cheng static int 22918275SEric Cheng flow_transport_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 22928275SEric Cheng { 22938275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 22948275SEric Cheng flow_mask_t mask = fd->fd_mask; 22958275SEric Cheng 22968275SEric Cheng if ((mask & FLOW_IP_PROTOCOL) == 0) 22978275SEric Cheng return (EINVAL); 22988275SEric Cheng 22998275SEric Cheng switch (fd->fd_protocol) { 23008275SEric Cheng case IPPROTO_TCP: 23018275SEric Cheng case IPPROTO_UDP: 23028275SEric Cheng case IPPROTO_SCTP: 23038275SEric Cheng break; 23048275SEric Cheng default: 23058275SEric Cheng return (EINVAL); 23068275SEric Cheng } 23078275SEric Cheng 23088275SEric Cheng switch (mask & ~FLOW_IP_PROTOCOL) { 23098275SEric Cheng case FLOW_ULP_PORT_LOCAL: 23108275SEric Cheng if (fd->fd_local_port == 0) 23118275SEric Cheng return (EINVAL); 23128275SEric Cheng 23138275SEric Cheng flent->fe_match = flow_transport_lport_match; 23148275SEric Cheng break; 23158275SEric Cheng case FLOW_ULP_PORT_REMOTE: 23168275SEric Cheng if (fd->fd_remote_port == 0) 23178275SEric Cheng return (EINVAL); 23188275SEric Cheng 23198275SEric Cheng flent->fe_match = flow_transport_rport_match; 23208275SEric Cheng break; 23218275SEric Cheng case 0: 23228275SEric Cheng /* 23238275SEric Cheng * transport-only flows conflicts with our table type. 23248275SEric Cheng */ 23258275SEric Cheng return (EOPNOTSUPP); 23268275SEric Cheng default: 23278275SEric Cheng return (EINVAL); 23288275SEric Cheng } 23298275SEric Cheng 23308275SEric Cheng return (0); 23318275SEric Cheng } 23328275SEric Cheng 23338275SEric Cheng static uint32_t 23348275SEric Cheng flow_transport_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 23358275SEric Cheng { 23368275SEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 23378275SEric Cheng uint16_t port = 0; 23388275SEric Cheng 23398275SEric Cheng port = ((fd->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) ? 23408275SEric Cheng fd->fd_local_port : fd->fd_remote_port; 23418275SEric Cheng 23428275SEric Cheng return ((port ^ (fd->fd_protocol << 4)) % ft->ft_size); 23438275SEric Cheng } 23448275SEric Cheng 23458275SEric Cheng /* ARGSUSED */ 23468275SEric Cheng static boolean_t 23478275SEric Cheng flow_transport_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 23488275SEric Cheng { 23498275SEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 23508275SEric Cheng 23518275SEric Cheng if (fd1->fd_protocol != fd2->fd_protocol) 23528275SEric Cheng return (B_FALSE); 23538275SEric Cheng 23548275SEric Cheng if ((fd1->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) 23558275SEric Cheng return (fd1->fd_local_port == fd2->fd_local_port); 23568275SEric Cheng 235710734SEric Cheng if ((fd1->fd_mask & FLOW_ULP_PORT_REMOTE) != 0) 235810734SEric Cheng return (fd1->fd_remote_port == fd2->fd_remote_port); 235910734SEric Cheng 236010734SEric Cheng return (B_TRUE); 23618275SEric Cheng } 23628275SEric Cheng 23638275SEric Cheng static flow_ops_t flow_l2_ops = { 23648275SEric Cheng flow_l2_accept_fe, 23658275SEric Cheng flow_l2_hash_fe, 23668275SEric Cheng flow_l2_match_fe, 23678275SEric Cheng flow_generic_insert_fe, 23688275SEric Cheng flow_l2_hash, 23698275SEric Cheng {flow_l2_accept} 23708275SEric Cheng }; 23718275SEric Cheng 23728275SEric Cheng static flow_ops_t flow_ip_ops = { 23738275SEric Cheng flow_ip_accept_fe, 23748275SEric Cheng flow_ip_hash_fe, 23758275SEric Cheng flow_ip_match_fe, 23768275SEric Cheng flow_ip_insert_fe, 23778275SEric Cheng flow_ip_hash, 23788275SEric Cheng {flow_l2_accept, flow_ip_accept} 23798275SEric Cheng }; 23808275SEric Cheng 23818275SEric Cheng static flow_ops_t flow_ip_proto_ops = { 23828275SEric Cheng flow_ip_proto_accept_fe, 23838275SEric Cheng flow_ip_proto_hash_fe, 23848275SEric Cheng flow_ip_proto_match_fe, 23858275SEric Cheng flow_generic_insert_fe, 23868275SEric Cheng flow_ip_proto_hash, 23878275SEric Cheng {flow_l2_accept, flow_ip_accept} 23888275SEric Cheng }; 23898275SEric Cheng 23908275SEric Cheng static flow_ops_t flow_transport_ops = { 23918275SEric Cheng flow_transport_accept_fe, 23928275SEric Cheng flow_transport_hash_fe, 23938275SEric Cheng flow_transport_match_fe, 23948275SEric Cheng flow_generic_insert_fe, 23958275SEric Cheng flow_transport_hash, 23968275SEric Cheng {flow_l2_accept, flow_ip_accept, flow_transport_accept} 23978275SEric Cheng }; 23988275SEric Cheng 23998275SEric Cheng static flow_tab_info_t flow_tab_info_list[] = { 24008275SEric Cheng {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_LOCAL, 2}, 24018275SEric Cheng {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_REMOTE, 2}, 24028275SEric Cheng {&flow_ip_ops, FLOW_IP_DSFIELD, 1}, 24038275SEric Cheng {&flow_ip_proto_ops, FLOW_IP_PROTOCOL, 256}, 240410734SEric Cheng {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_LOCAL, 1024}, 240510734SEric Cheng {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_REMOTE, 1024} 24068275SEric Cheng }; 24078275SEric Cheng 24088275SEric Cheng #define FLOW_MAX_TAB_INFO \ 24098275SEric Cheng ((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t)) 24108275SEric Cheng 24118275SEric Cheng static flow_tab_info_t * 24128275SEric Cheng mac_flow_tab_info_get(flow_mask_t mask) 24138275SEric Cheng { 24148275SEric Cheng int i; 24158275SEric Cheng 24168275SEric Cheng for (i = 0; i < FLOW_MAX_TAB_INFO; i++) { 24178275SEric Cheng if (mask == flow_tab_info_list[i].fti_mask) 24188275SEric Cheng return (&flow_tab_info_list[i]); 24198275SEric Cheng } 24208275SEric Cheng return (NULL); 24218275SEric Cheng } 2422