10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51852Syz147064 * Common Development and Distribution License (the "License"). 61852Syz147064 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 215084Sjohnlev 220Sstevel@tonic-gate /* 235895Syz147064 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* 280Sstevel@tonic-gate * MAC Services Module 298275SEric Cheng * 308275SEric Cheng * The GLDv3 framework locking - The MAC layer 318275SEric Cheng * -------------------------------------------- 328275SEric Cheng * 338275SEric Cheng * The MAC layer is central to the GLD framework and can provide the locking 348275SEric Cheng * framework needed for itself and for the use of MAC clients. MAC end points 358275SEric Cheng * are fairly disjoint and don't share a lot of state. So a coarse grained 368275SEric Cheng * multi-threading scheme is to single thread all create/modify/delete or set 378275SEric Cheng * type of control operations on a per mac end point while allowing data threads 388275SEric Cheng * concurrently. 398275SEric Cheng * 408275SEric Cheng * Control operations (set) that modify a mac end point are always serialized on 418275SEric Cheng * a per mac end point basis, We have at most 1 such thread per mac end point 428275SEric Cheng * at a time. 438275SEric Cheng * 448275SEric Cheng * All other operations that are not serialized are essentially multi-threaded. 458275SEric Cheng * For example a control operation (get) like getting statistics which may not 468275SEric Cheng * care about reading values atomically or data threads sending or receiving 478275SEric Cheng * data. Mostly these type of operations don't modify the control state. Any 488275SEric Cheng * state these operations care about are protected using traditional locks. 498275SEric Cheng * 508275SEric Cheng * The perimeter only serializes serial operations. It does not imply there 518275SEric Cheng * aren't any other concurrent operations. However a serialized operation may 528275SEric Cheng * sometimes need to make sure it is the only thread. In this case it needs 538275SEric Cheng * to use reference counting mechanisms to cv_wait until any current data 548275SEric Cheng * threads are done. 558275SEric Cheng * 568275SEric Cheng * The mac layer itself does not hold any locks across a call to another layer. 578275SEric Cheng * The perimeter is however held across a down call to the driver to make the 588275SEric Cheng * whole control operation atomic with respect to other control operations. 598275SEric Cheng * Also the data path and get type control operations may proceed concurrently. 608275SEric Cheng * These operations synchronize with the single serial operation on a given mac 618275SEric Cheng * end point using regular locks. The perimeter ensures that conflicting 628275SEric Cheng * operations like say a mac_multicast_add and a mac_multicast_remove on the 638275SEric Cheng * same mac end point don't interfere with each other and also ensures that the 648275SEric Cheng * changes in the mac layer and the call to the underlying driver to say add a 658275SEric Cheng * multicast address are done atomically without interference from a thread 668275SEric Cheng * trying to delete the same address. 678275SEric Cheng * 688275SEric Cheng * For example, consider 698275SEric Cheng * mac_multicst_add() 708275SEric Cheng * { 718275SEric Cheng * mac_perimeter_enter(); serialize all control operations 728275SEric Cheng * 738275SEric Cheng * grab list lock protect against access by data threads 748275SEric Cheng * add to list 758275SEric Cheng * drop list lock 768275SEric Cheng * 778275SEric Cheng * call driver's mi_multicst 788275SEric Cheng * 798275SEric Cheng * mac_perimeter_exit(); 808275SEric Cheng * } 818275SEric Cheng * 828275SEric Cheng * To lessen the number of serialization locks and simplify the lock hierarchy, 838275SEric Cheng * we serialize all the control operations on a per mac end point by using a 848275SEric Cheng * single serialization lock called the perimeter. We allow recursive entry into 858275SEric Cheng * the perimeter to facilitate use of this mechanism by both the mac client and 868275SEric Cheng * the MAC layer itself. 878275SEric Cheng * 888275SEric Cheng * MAC client means an entity that does an operation on a mac handle 898275SEric Cheng * obtained from a mac_open/mac_client_open. Similarly MAC driver means 908275SEric Cheng * an entity that does an operation on a mac handle obtained from a 918275SEric Cheng * mac_register. An entity could be both client and driver but on different 928275SEric Cheng * handles eg. aggr. and should only make the corresponding mac interface calls 938275SEric Cheng * i.e. mac driver interface or mac client interface as appropriate for that 948275SEric Cheng * mac handle. 958275SEric Cheng * 968275SEric Cheng * General rules. 978275SEric Cheng * ------------- 988275SEric Cheng * 998275SEric Cheng * R1. The lock order of upcall threads is natually opposite to downcall 1008275SEric Cheng * threads. Hence upcalls must not hold any locks across layers for fear of 1018275SEric Cheng * recursive lock enter and lock order violation. This applies to all layers. 1028275SEric Cheng * 1038275SEric Cheng * R2. The perimeter is just another lock. Since it is held in the down 1048275SEric Cheng * direction, acquiring the perimeter in an upcall is prohibited as it would 1058275SEric Cheng * cause a deadlock. This applies to all layers. 1068275SEric Cheng * 1078275SEric Cheng * Note that upcalls that need to grab the mac perimeter (for example 1088275SEric Cheng * mac_notify upcalls) can still achieve that by posting the request to a 1098275SEric Cheng * thread, which can then grab all the required perimeters and locks in the 1108275SEric Cheng * right global order. Note that in the above example the mac layer iself 1118275SEric Cheng * won't grab the mac perimeter in the mac_notify upcall, instead the upcall 1128275SEric Cheng * to the client must do that. Please see the aggr code for an example. 1138275SEric Cheng * 1148275SEric Cheng * MAC client rules 1158275SEric Cheng * ---------------- 1168275SEric Cheng * 1178275SEric Cheng * R3. A MAC client may use the MAC provided perimeter facility to serialize 1188275SEric Cheng * control operations on a per mac end point. It does this by by acquring 1198275SEric Cheng * and holding the perimeter across a sequence of calls to the mac layer. 1208275SEric Cheng * This ensures atomicity across the entire block of mac calls. In this 1218275SEric Cheng * model the MAC client must not hold any client locks across the calls to 1228275SEric Cheng * the mac layer. This model is the preferred solution. 1238275SEric Cheng * 1248275SEric Cheng * R4. However if a MAC client has a lot of global state across all mac end 1258275SEric Cheng * points the per mac end point serialization may not be sufficient. In this 1268275SEric Cheng * case the client may choose to use global locks or use its own serialization. 1278275SEric Cheng * To avoid deadlocks, these client layer locks held across the mac calls 1288275SEric Cheng * in the control path must never be acquired by the data path for the reason 1298275SEric Cheng * mentioned below. 1308275SEric Cheng * 1318275SEric Cheng * (Assume that a control operation that holds a client lock blocks in the 1328275SEric Cheng * mac layer waiting for upcall reference counts to drop to zero. If an upcall 1338275SEric Cheng * data thread that holds this reference count, tries to acquire the same 1348275SEric Cheng * client lock subsequently it will deadlock). 1358275SEric Cheng * 1368275SEric Cheng * A MAC client may follow either the R3 model or the R4 model, but can't 1378275SEric Cheng * mix both. In the former, the hierarchy is Perim -> client locks, but in 1388275SEric Cheng * the latter it is client locks -> Perim. 1398275SEric Cheng * 1408275SEric Cheng * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able 1418275SEric Cheng * context since they may block while trying to acquire the perimeter. 1428275SEric Cheng * In addition some calls may block waiting for upcall refcnts to come down to 1438275SEric Cheng * zero. 1448275SEric Cheng * 1458275SEric Cheng * R6. MAC clients must make sure that they are single threaded and all threads 1468275SEric Cheng * from the top (in particular data threads) have finished before calling 1478275SEric Cheng * mac_client_close. The MAC framework does not track the number of client 1488275SEric Cheng * threads using the mac client handle. Also mac clients must make sure 1498275SEric Cheng * they have undone all the control operations before calling mac_client_close. 1508275SEric Cheng * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding 1518275SEric Cheng * mac_unicast_add/mac_multicast_add. 1528275SEric Cheng * 1538275SEric Cheng * MAC framework rules 1548275SEric Cheng * ------------------- 1558275SEric Cheng * 1568275SEric Cheng * R7. The mac layer itself must not hold any mac layer locks (except the mac 1578275SEric Cheng * perimeter) across a call to any other layer from the mac layer. The call to 1588275SEric Cheng * any other layer could be via mi_* entry points, classifier entry points into 1598275SEric Cheng * the driver or via upcall pointers into layers above. The mac perimeter may 1608275SEric Cheng * be acquired or held only in the down direction, for e.g. when calling into 1618275SEric Cheng * a mi_* driver enty point to provide atomicity of the operation. 1628275SEric Cheng * 1638275SEric Cheng * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across 1648275SEric Cheng * mac driver interfaces, the MAC layer must provide a cut out for control 1658275SEric Cheng * interfaces like upcall notifications and start them in a separate thread. 1668275SEric Cheng * 1678275SEric Cheng * R9. Note that locking order also implies a plumbing order. For example 1688275SEric Cheng * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt 1698275SEric Cheng * to plumb in any other order must be failed at mac_open time, otherwise it 1708275SEric Cheng * could lead to deadlocks due to inverse locking order. 1718275SEric Cheng * 1728275SEric Cheng * R10. MAC driver interfaces must not block since the driver could call them 1738275SEric Cheng * in interrupt context. 1748275SEric Cheng * 1758275SEric Cheng * R11. Walkers must preferably not hold any locks while calling walker 1768275SEric Cheng * callbacks. Instead these can operate on reference counts. In simple 1778275SEric Cheng * callbacks it may be ok to hold a lock and call the callbacks, but this is 1788275SEric Cheng * harder to maintain in the general case of arbitrary callbacks. 1798275SEric Cheng * 1808275SEric Cheng * R12. The MAC layer must protect upcall notification callbacks using reference 1818275SEric Cheng * counts rather than holding locks across the callbacks. 1828275SEric Cheng * 1838275SEric Cheng * R13. Given the variety of drivers, it is preferable if the MAC layer can make 1848275SEric Cheng * sure that any pointers (such as mac ring pointers) it passes to the driver 1858275SEric Cheng * remain valid until mac unregister time. Currently the mac layer achieves 1868275SEric Cheng * this by using generation numbers for rings and freeing the mac rings only 1878275SEric Cheng * at unregister time. The MAC layer must provide a layer of indirection and 1888275SEric Cheng * must not expose underlying driver rings or driver data structures/pointers 1898275SEric Cheng * directly to MAC clients. 1908275SEric Cheng * 1918275SEric Cheng * MAC driver rules 1928275SEric Cheng * ---------------- 1938275SEric Cheng * 1948275SEric Cheng * R14. It would be preferable if MAC drivers don't hold any locks across any 1958275SEric Cheng * mac call. However at a minimum they must not hold any locks across data 1968275SEric Cheng * upcalls. They must also make sure that all references to mac data structures 1978275SEric Cheng * are cleaned up and that it is single threaded at mac_unregister time. 1988275SEric Cheng * 1998275SEric Cheng * R15. MAC driver interfaces don't block and so the action may be done 2008275SEric Cheng * asynchronously in a separate thread as for example handling notifications. 2018275SEric Cheng * The driver must not assume that the action is complete when the call 2028275SEric Cheng * returns. 2038275SEric Cheng * 2048275SEric Cheng * R16. Drivers must maintain a generation number per Rx ring, and pass it 2058275SEric Cheng * back to mac_rx_ring(); They are expected to increment the generation 2068275SEric Cheng * number whenever the ring's stop routine is invoked. 2078275SEric Cheng * See comments in mac_rx_ring(); 2088275SEric Cheng * 2098275SEric Cheng * R17 Similarly mi_stop is another synchronization point and the driver must 2108275SEric Cheng * ensure that all upcalls are done and there won't be any future upcall 2118275SEric Cheng * before returning from mi_stop. 2128275SEric Cheng * 2138275SEric Cheng * R18. The driver may assume that all set/modify control operations via 2148275SEric Cheng * the mi_* entry points are single threaded on a per mac end point. 2158275SEric Cheng * 2168275SEric Cheng * Lock and Perimeter hierarchy scenarios 2178275SEric Cheng * --------------------------------------- 2188275SEric Cheng * 2198275SEric Cheng * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify] 2208275SEric Cheng * 2218275SEric Cheng * ft_lock -> fe_lock [mac_flow_lookup] 2228275SEric Cheng * 2238275SEric Cheng * mi_rw_lock -> fe_lock [mac_bcast_send] 2248275SEric Cheng * 2258275SEric Cheng * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw] 2268275SEric Cheng * 2278275SEric Cheng * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind] 2288275SEric Cheng * 2298275SEric Cheng * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename] 2308275SEric Cheng * 2318275SEric Cheng * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac 2328275SEric Cheng * client to driver. In the case of clients that explictly use the mac provided 2338275SEric Cheng * perimeter mechanism for its serialization, the hierarchy is 2348275SEric Cheng * Perimeter -> mac layer locks, since the client never holds any locks across 2358275SEric Cheng * the mac calls. In the case of clients that use its own locks the hierarchy 2368275SEric Cheng * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly 2378275SEric Cheng * calls mac_perim_enter/exit in this case. 2388275SEric Cheng * 2398275SEric Cheng * Subflow creation rules 2408275SEric Cheng * --------------------------- 2418275SEric Cheng * o In case of a user specified cpulist present on underlying link and flows, 2428275SEric Cheng * the flows cpulist must be a subset of the underlying link. 2438275SEric Cheng * o In case of a user specified fanout mode present on link and flow, the 2448275SEric Cheng * subflow fanout count has to be less than or equal to that of the 2458275SEric Cheng * underlying link. The cpu-bindings for the subflows will be a subset of 2468275SEric Cheng * the underlying link. 2478275SEric Cheng * o In case if no cpulist specified on both underlying link and flow, the 2488275SEric Cheng * underlying link relies on a MAC tunable to provide out of box fanout. 2498275SEric Cheng * The subflow will have no cpulist (the subflow will be unbound) 2508275SEric Cheng * o In case if no cpulist is specified on the underlying link, a subflow can 2518275SEric Cheng * carry either a user-specified cpulist or fanout count. The cpu-bindings 2528275SEric Cheng * for the subflow will not adhere to restriction that they need to be subset 2538275SEric Cheng * of the underlying link. 2548275SEric Cheng * o In case where the underlying link is carrying either a user specified 2558275SEric Cheng * cpulist or fanout mode and for a unspecified subflow, the subflow will be 2568275SEric Cheng * created unbound. 2578275SEric Cheng * o While creating unbound subflows, bandwidth mode changes attempt to 2588275SEric Cheng * figure a right fanout count. In such cases the fanout count will override 2598275SEric Cheng * the unbound cpu-binding behavior. 2608275SEric Cheng * o In addition to this, while cycling between flow and link properties, we 2618275SEric Cheng * impose a restriction that if a link property has a subflow with 2628275SEric Cheng * user-specified attributes, we will not allow changing the link property. 2638275SEric Cheng * The administrator needs to reset all the user specified properties for the 2648275SEric Cheng * subflows before attempting a link property change. 2658275SEric Cheng * Some of the above rules can be overridden by specifying additional command 2668275SEric Cheng * line options while creating or modifying link or subflow properties. 2670Sstevel@tonic-gate */ 2680Sstevel@tonic-gate 2690Sstevel@tonic-gate #include <sys/types.h> 2700Sstevel@tonic-gate #include <sys/conf.h> 2715895Syz147064 #include <sys/id_space.h> 2726077Syz147064 #include <sys/esunddi.h> 2730Sstevel@tonic-gate #include <sys/stat.h> 2745895Syz147064 #include <sys/mkdev.h> 2750Sstevel@tonic-gate #include <sys/stream.h> 2760Sstevel@tonic-gate #include <sys/strsun.h> 2770Sstevel@tonic-gate #include <sys/strsubr.h> 2780Sstevel@tonic-gate #include <sys/dlpi.h> 2798275SEric Cheng #include <sys/modhash.h> 2808275SEric Cheng #include <sys/mac_provider.h> 2818275SEric Cheng #include <sys/mac_client_impl.h> 2828275SEric Cheng #include <sys/mac_soft_ring.h> 2838275SEric Cheng #include <sys/mac_impl.h> 2848275SEric Cheng #include <sys/mac.h> 2855895Syz147064 #include <sys/dls.h> 286269Sericheng #include <sys/dld.h> 2872311Sseb #include <sys/modctl.h> 2883448Sdh155122 #include <sys/fs/dv_node.h> 2895009Sgd78059 #include <sys/thread.h> 2905009Sgd78059 #include <sys/proc.h> 2915009Sgd78059 #include <sys/callb.h> 2925009Sgd78059 #include <sys/cpuvar.h> 2933288Sseb #include <sys/atomic.h> 2948275SEric Cheng #include <sys/bitmap.h> 2954913Sethindra #include <sys/sdt.h> 2968275SEric Cheng #include <sys/mac_flow.h> 2978275SEric Cheng #include <sys/ddi_intr_impl.h> 2988275SEric Cheng #include <sys/disp.h> 2998275SEric Cheng #include <sys/sdt.h> 3008275SEric Cheng #include <sys/vnic.h> 3018275SEric Cheng #include <sys/vnic_impl.h> 3028275SEric Cheng #include <sys/vlan.h> 3038275SEric Cheng #include <inet/ip.h> 3048275SEric Cheng #include <inet/ip6.h> 3058275SEric Cheng #include <sys/exacct.h> 3068275SEric Cheng #include <sys/exacct_impl.h> 3075903Ssowmini #include <inet/nd.h> 3086512Ssowmini #include <sys/ethernet.h> 3090Sstevel@tonic-gate 3100Sstevel@tonic-gate #define IMPL_HASHSZ 67 /* prime */ 3110Sstevel@tonic-gate 3128275SEric Cheng kmem_cache_t *i_mac_impl_cachep; 3138275SEric Cheng mod_hash_t *i_mac_impl_hash; 314269Sericheng krwlock_t i_mac_impl_lock; 315269Sericheng uint_t i_mac_impl_count; 3168275SEric Cheng static kmem_cache_t *mac_ring_cache; 3175895Syz147064 static id_space_t *minor_ids; 3185895Syz147064 static uint32_t minor_count; 3190Sstevel@tonic-gate 3208275SEric Cheng /* 3218275SEric Cheng * Logging stuff. Perhaps mac_logging_interval could be broken into 3228275SEric Cheng * mac_flow_log_interval and mac_link_log_interval if we want to be 3238275SEric Cheng * able to schedule them differently. 3248275SEric Cheng */ 3258275SEric Cheng uint_t mac_logging_interval; 3268275SEric Cheng boolean_t mac_flow_log_enable; 3278275SEric Cheng boolean_t mac_link_log_enable; 3288275SEric Cheng timeout_id_t mac_logging_timer; 3298275SEric Cheng 3308275SEric Cheng /* for debugging, see MAC_DBG_PRT() in mac_impl.h */ 3318275SEric Cheng int mac_dbg = 0; 3328275SEric Cheng 3332311Sseb #define MACTYPE_KMODDIR "mac" 3342311Sseb #define MACTYPE_HASHSZ 67 3352311Sseb static mod_hash_t *i_mactype_hash; 3363288Sseb /* 3373288Sseb * i_mactype_lock synchronizes threads that obtain references to mactype_t 3383288Sseb * structures through i_mactype_getplugin(). 3393288Sseb */ 3403288Sseb static kmutex_t i_mactype_lock; 3412311Sseb 3420Sstevel@tonic-gate /* 3438275SEric Cheng * mac_tx_percpu_cnt 3448275SEric Cheng * 3458275SEric Cheng * Number of per cpu locks per mac_client_impl_t. Used by the transmit side 3468275SEric Cheng * in mac_tx to reduce lock contention. This is sized at boot time in mac_init. 3478275SEric Cheng * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2. 3488275SEric Cheng * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1. 3495084Sjohnlev */ 3508275SEric Cheng int mac_tx_percpu_cnt; 3518275SEric Cheng int mac_tx_percpu_cnt_max = 128; 3528275SEric Cheng 3538275SEric Cheng static int i_mac_constructor(void *, void *, int); 3548275SEric Cheng static void i_mac_destructor(void *, void *); 3558275SEric Cheng static int i_mac_ring_ctor(void *, void *, int); 3568275SEric Cheng static void i_mac_ring_dtor(void *, void *); 3578275SEric Cheng static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *); 3588275SEric Cheng void mac_tx_client_flush(mac_client_impl_t *); 3598275SEric Cheng void mac_tx_client_block(mac_client_impl_t *); 3608275SEric Cheng static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); 3618275SEric Cheng static int mac_start_group_and_rings(mac_group_t *); 3628275SEric Cheng static void mac_stop_group_and_rings(mac_group_t *); 3632311Sseb 3640Sstevel@tonic-gate /* 3650Sstevel@tonic-gate * Module initialization functions. 3660Sstevel@tonic-gate */ 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate void 3690Sstevel@tonic-gate mac_init(void) 3700Sstevel@tonic-gate { 3718275SEric Cheng mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus : 3728275SEric Cheng boot_max_ncpus); 3738275SEric Cheng 3748275SEric Cheng /* Upper bound is mac_tx_percpu_cnt_max */ 3758275SEric Cheng if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max) 3768275SEric Cheng mac_tx_percpu_cnt = mac_tx_percpu_cnt_max; 3778275SEric Cheng 3788275SEric Cheng if (mac_tx_percpu_cnt < 1) { 3798275SEric Cheng /* Someone set max_tx_percpu_cnt_max to 0 or less */ 3808275SEric Cheng mac_tx_percpu_cnt = 1; 3818275SEric Cheng } 3828275SEric Cheng 3838275SEric Cheng ASSERT(mac_tx_percpu_cnt >= 1); 3848275SEric Cheng mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1)); 3858275SEric Cheng /* 3868275SEric Cheng * Make it of the form 2**N - 1 in the range 3878275SEric Cheng * [0 .. mac_tx_percpu_cnt_max - 1] 3888275SEric Cheng */ 3898275SEric Cheng mac_tx_percpu_cnt--; 3908275SEric Cheng 3910Sstevel@tonic-gate i_mac_impl_cachep = kmem_cache_create("mac_impl_cache", 3922311Sseb sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor, 3932311Sseb NULL, NULL, NULL, 0); 3940Sstevel@tonic-gate ASSERT(i_mac_impl_cachep != NULL); 3950Sstevel@tonic-gate 3968275SEric Cheng mac_ring_cache = kmem_cache_create("mac_ring_cache", 3978275SEric Cheng sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL, 3988275SEric Cheng NULL, NULL, 0); 3998275SEric Cheng ASSERT(mac_ring_cache != NULL); 4005084Sjohnlev 401269Sericheng i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash", 402269Sericheng IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 403269Sericheng mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 404269Sericheng rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL); 4058275SEric Cheng 4068275SEric Cheng mac_flow_init(); 4078275SEric Cheng mac_soft_ring_init(); 4088275SEric Cheng mac_bcast_init(); 4098275SEric Cheng mac_client_init(); 4108275SEric Cheng 411269Sericheng i_mac_impl_count = 0; 4122311Sseb 4132311Sseb i_mactype_hash = mod_hash_create_extended("mactype_hash", 4142311Sseb MACTYPE_HASHSZ, 4152311Sseb mod_hash_null_keydtor, mod_hash_null_valdtor, 4162311Sseb mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 4175895Syz147064 4185895Syz147064 /* 4195895Syz147064 * Allocate an id space to manage minor numbers. The range of the 4205895Syz147064 * space will be from MAC_MAX_MINOR+1 to MAXMIN32 (maximum legal 4215895Syz147064 * minor number is MAXMIN, but id_t is type of integer and does not 4225895Syz147064 * allow MAXMIN). 4235895Syz147064 */ 4245895Syz147064 minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, MAXMIN32); 4255895Syz147064 ASSERT(minor_ids != NULL); 4265895Syz147064 minor_count = 0; 4278275SEric Cheng 4288275SEric Cheng /* Let's default to 20 seconds */ 4298275SEric Cheng mac_logging_interval = 20; 4308275SEric Cheng mac_flow_log_enable = B_FALSE; 4318275SEric Cheng mac_link_log_enable = B_FALSE; 4328275SEric Cheng mac_logging_timer = 0; 4330Sstevel@tonic-gate } 4340Sstevel@tonic-gate 4350Sstevel@tonic-gate int 4360Sstevel@tonic-gate mac_fini(void) 4370Sstevel@tonic-gate { 4385895Syz147064 if (i_mac_impl_count > 0 || minor_count > 0) 439269Sericheng return (EBUSY); 4400Sstevel@tonic-gate 4415895Syz147064 id_space_destroy(minor_ids); 4428275SEric Cheng mac_flow_fini(); 4435895Syz147064 444269Sericheng mod_hash_destroy_hash(i_mac_impl_hash); 445269Sericheng rw_destroy(&i_mac_impl_lock); 4460Sstevel@tonic-gate 4478275SEric Cheng mac_client_fini(); 4488275SEric Cheng kmem_cache_destroy(mac_ring_cache); 4492311Sseb 4502311Sseb mod_hash_destroy_hash(i_mactype_hash); 4518275SEric Cheng mac_soft_ring_finish(); 4520Sstevel@tonic-gate return (0); 4530Sstevel@tonic-gate } 4540Sstevel@tonic-gate 4558275SEric Cheng void 4568275SEric Cheng mac_init_ops(struct dev_ops *ops, const char *name) 4578275SEric Cheng { 4588275SEric Cheng dld_init_ops(ops, name); 4598275SEric Cheng } 4608275SEric Cheng 4618275SEric Cheng void 4628275SEric Cheng mac_fini_ops(struct dev_ops *ops) 4638275SEric Cheng { 4648275SEric Cheng dld_fini_ops(ops); 4658275SEric Cheng } 4668275SEric Cheng 4678275SEric Cheng /*ARGSUSED*/ 4688275SEric Cheng static int 4698275SEric Cheng i_mac_constructor(void *buf, void *arg, int kmflag) 4708275SEric Cheng { 4718275SEric Cheng mac_impl_t *mip = buf; 4728275SEric Cheng 4738275SEric Cheng bzero(buf, sizeof (mac_impl_t)); 4748275SEric Cheng 4758275SEric Cheng mip->mi_linkstate = LINK_STATE_UNKNOWN; 4768275SEric Cheng mip->mi_nclients = 0; 4778275SEric Cheng 4788275SEric Cheng mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL); 4798275SEric Cheng rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); 4808275SEric Cheng mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); 4818275SEric Cheng mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); 4828275SEric Cheng mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL); 4838275SEric Cheng 4848275SEric Cheng mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock; 4858275SEric Cheng cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 4868275SEric Cheng mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock; 4878275SEric Cheng cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 4888275SEric Cheng return (0); 4898275SEric Cheng } 4908275SEric Cheng 4918275SEric Cheng /*ARGSUSED*/ 4928275SEric Cheng static void 4938275SEric Cheng i_mac_destructor(void *buf, void *arg) 4948275SEric Cheng { 4958275SEric Cheng mac_impl_t *mip = buf; 4968275SEric Cheng mac_cb_info_t *mcbi; 4978275SEric Cheng 4988275SEric Cheng ASSERT(mip->mi_ref == 0); 4998275SEric Cheng ASSERT(mip->mi_active == 0); 5008275SEric Cheng ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); 5018275SEric Cheng ASSERT(mip->mi_devpromisc == 0); 5028275SEric Cheng ASSERT(mip->mi_promisc == 0); 5038275SEric Cheng ASSERT(mip->mi_ksp == NULL); 5048275SEric Cheng ASSERT(mip->mi_kstat_count == 0); 5058275SEric Cheng ASSERT(mip->mi_nclients == 0); 5068275SEric Cheng ASSERT(mip->mi_nactiveclients == 0); 5078275SEric Cheng ASSERT(mip->mi_state_flags == 0); 5088275SEric Cheng ASSERT(mip->mi_factory_addr == NULL); 5098275SEric Cheng ASSERT(mip->mi_factory_addr_num == 0); 5108275SEric Cheng ASSERT(mip->mi_default_tx_ring == NULL); 5118275SEric Cheng 5128275SEric Cheng mcbi = &mip->mi_notify_cb_info; 5138275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0); 5148275SEric Cheng ASSERT(mip->mi_notify_bits == 0); 5158275SEric Cheng ASSERT(mip->mi_notify_thread == NULL); 5168275SEric Cheng ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock); 5178275SEric Cheng mcbi->mcbi_lockp = NULL; 5188275SEric Cheng 5198275SEric Cheng mcbi = &mip->mi_promisc_cb_info; 5208275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL); 5218275SEric Cheng ASSERT(mip->mi_promisc_list == NULL); 5228275SEric Cheng ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock); 5238275SEric Cheng mcbi->mcbi_lockp = NULL; 5248275SEric Cheng 5258275SEric Cheng ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); 5268275SEric Cheng ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); 5278275SEric Cheng 5288275SEric Cheng mutex_destroy(&mip->mi_lock); 5298275SEric Cheng rw_destroy(&mip->mi_rw_lock); 5308275SEric Cheng 5318275SEric Cheng mutex_destroy(&mip->mi_promisc_lock); 5328275SEric Cheng cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv); 5338275SEric Cheng mutex_destroy(&mip->mi_notify_lock); 5348275SEric Cheng cv_destroy(&mip->mi_notify_cb_info.mcbi_cv); 5358275SEric Cheng mutex_destroy(&mip->mi_ring_lock); 5368275SEric Cheng } 5378275SEric Cheng 5388275SEric Cheng /* ARGSUSED */ 5398275SEric Cheng static int 5408275SEric Cheng i_mac_ring_ctor(void *buf, void *arg, int kmflag) 5418275SEric Cheng { 5428275SEric Cheng mac_ring_t *ring = (mac_ring_t *)buf; 5438275SEric Cheng 5448275SEric Cheng bzero(ring, sizeof (mac_ring_t)); 5458275SEric Cheng cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL); 5468275SEric Cheng mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL); 5478275SEric Cheng ring->mr_state = MR_FREE; 5488275SEric Cheng return (0); 5498275SEric Cheng } 5508275SEric Cheng 5518275SEric Cheng /* ARGSUSED */ 5528275SEric Cheng static void 5538275SEric Cheng i_mac_ring_dtor(void *buf, void *arg) 5548275SEric Cheng { 5558275SEric Cheng mac_ring_t *ring = (mac_ring_t *)buf; 5568275SEric Cheng 5578275SEric Cheng cv_destroy(&ring->mr_cv); 5588275SEric Cheng mutex_destroy(&ring->mr_lock); 5598275SEric Cheng } 5608275SEric Cheng 5618275SEric Cheng /* 5628275SEric Cheng * Common functions to do mac callback addition and deletion. Currently this is 5638275SEric Cheng * used by promisc callbacks and notify callbacks. List addition and deletion 5648275SEric Cheng * need to take care of list walkers. List walkers in general, can't hold list 5658275SEric Cheng * locks and make upcall callbacks due to potential lock order and recursive 5668275SEric Cheng * reentry issues. Instead list walkers increment the list walker count to mark 5678275SEric Cheng * the presence of a walker thread. Addition can be carefully done to ensure 5688275SEric Cheng * that the list walker always sees either the old list or the new list. 5698275SEric Cheng * However the deletion can't be done while the walker is active, instead the 5708275SEric Cheng * deleting thread simply marks the entry as logically deleted. The last walker 5718275SEric Cheng * physically deletes and frees up the logically deleted entries when the walk 5728275SEric Cheng * is complete. 5738275SEric Cheng */ 5748275SEric Cheng void 5758275SEric Cheng mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 5768275SEric Cheng mac_cb_t *mcb_elem) 5778275SEric Cheng { 5788275SEric Cheng mac_cb_t *p; 5798275SEric Cheng mac_cb_t **pp; 5808275SEric Cheng 5818275SEric Cheng /* Verify it is not already in the list */ 5828275SEric Cheng for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 5838275SEric Cheng if (p == mcb_elem) 5848275SEric Cheng break; 5858275SEric Cheng } 5868275SEric Cheng VERIFY(p == NULL); 5878275SEric Cheng 5888275SEric Cheng /* 5898275SEric Cheng * Add it to the head of the callback list. The membar ensures that 5908275SEric Cheng * the following list pointer manipulations reach global visibility 5918275SEric Cheng * in exactly the program order below. 5928275SEric Cheng */ 5938275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 5948275SEric Cheng 5958275SEric Cheng mcb_elem->mcb_nextp = *mcb_head; 5968275SEric Cheng membar_producer(); 5978275SEric Cheng *mcb_head = mcb_elem; 5988275SEric Cheng } 5998275SEric Cheng 6008275SEric Cheng /* 6018275SEric Cheng * Mark the entry as logically deleted. If there aren't any walkers unlink 6028275SEric Cheng * from the list. In either case return the corresponding status. 6038275SEric Cheng */ 6048275SEric Cheng boolean_t 6058275SEric Cheng mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 6068275SEric Cheng mac_cb_t *mcb_elem) 6078275SEric Cheng { 6088275SEric Cheng mac_cb_t *p; 6098275SEric Cheng mac_cb_t **pp; 6108275SEric Cheng 6118275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 6128275SEric Cheng /* 6138275SEric Cheng * Search the callback list for the entry to be removed 6148275SEric Cheng */ 6158275SEric Cheng for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 6168275SEric Cheng if (p == mcb_elem) 6178275SEric Cheng break; 6188275SEric Cheng } 6198275SEric Cheng VERIFY(p != NULL); 6208275SEric Cheng 6218275SEric Cheng /* 6228275SEric Cheng * If there are walkers just mark it as deleted and the last walker 6238275SEric Cheng * will remove from the list and free it. 6248275SEric Cheng */ 6258275SEric Cheng if (mcbi->mcbi_walker_cnt != 0) { 6268275SEric Cheng p->mcb_flags |= MCB_CONDEMNED; 6278275SEric Cheng mcbi->mcbi_del_cnt++; 6288275SEric Cheng return (B_FALSE); 6298275SEric Cheng } 6308275SEric Cheng 6318275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == 0); 6328275SEric Cheng *pp = p->mcb_nextp; 6338275SEric Cheng p->mcb_nextp = NULL; 6348275SEric Cheng return (B_TRUE); 6358275SEric Cheng } 6368275SEric Cheng 6378275SEric Cheng /* 6388275SEric Cheng * Wait for all pending callback removals to be completed 6398275SEric Cheng */ 6408275SEric Cheng void 6418275SEric Cheng mac_callback_remove_wait(mac_cb_info_t *mcbi) 6428275SEric Cheng { 6438275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 6448275SEric Cheng while (mcbi->mcbi_del_cnt != 0) { 6458275SEric Cheng DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi); 6468275SEric Cheng cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 6478275SEric Cheng } 6488275SEric Cheng } 6498275SEric Cheng 6500Sstevel@tonic-gate /* 6518275SEric Cheng * The last mac callback walker does the cleanup. Walk the list and unlik 6528275SEric Cheng * all the logically deleted entries and construct a temporary list of 6538275SEric Cheng * removed entries. Return the list of removed entries to the caller. 6548275SEric Cheng */ 6558275SEric Cheng mac_cb_t * 6568275SEric Cheng mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head) 6578275SEric Cheng { 6588275SEric Cheng mac_cb_t *p; 6598275SEric Cheng mac_cb_t **pp; 6608275SEric Cheng mac_cb_t *rmlist = NULL; /* List of removed elements */ 6618275SEric Cheng int cnt = 0; 6628275SEric Cheng 6638275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 6648275SEric Cheng ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0); 6658275SEric Cheng 6668275SEric Cheng pp = mcb_head; 6678275SEric Cheng while (*pp != NULL) { 6688275SEric Cheng if ((*pp)->mcb_flags & MCB_CONDEMNED) { 6698275SEric Cheng p = *pp; 6708275SEric Cheng *pp = p->mcb_nextp; 6718275SEric Cheng p->mcb_nextp = rmlist; 6728275SEric Cheng rmlist = p; 6738275SEric Cheng cnt++; 6748275SEric Cheng continue; 6758275SEric Cheng } 6768275SEric Cheng pp = &(*pp)->mcb_nextp; 6778275SEric Cheng } 6788275SEric Cheng 6798275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == cnt); 6808275SEric Cheng mcbi->mcbi_del_cnt = 0; 6818275SEric Cheng return (rmlist); 6828275SEric Cheng } 6838275SEric Cheng 6848275SEric Cheng boolean_t 6858275SEric Cheng mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 6868275SEric Cheng { 6878275SEric Cheng mac_cb_t *mcb; 6888275SEric Cheng 6898275SEric Cheng /* Verify it is not already in the list */ 6908275SEric Cheng for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) { 6918275SEric Cheng if (mcb == mcb_elem) 6928275SEric Cheng return (B_TRUE); 6938275SEric Cheng } 6948275SEric Cheng 6958275SEric Cheng return (B_FALSE); 6968275SEric Cheng } 6978275SEric Cheng 6988275SEric Cheng boolean_t 6998275SEric Cheng mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 7008275SEric Cheng { 7018275SEric Cheng boolean_t found; 7028275SEric Cheng 7038275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 7048275SEric Cheng found = mac_callback_lookup(mcb_headp, mcb_elem); 7058275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 7068275SEric Cheng 7078275SEric Cheng return (found); 7088275SEric Cheng } 7098275SEric Cheng 7108275SEric Cheng /* Free the list of removed callbacks */ 7118275SEric Cheng void 7128275SEric Cheng mac_callback_free(mac_cb_t *rmlist) 7138275SEric Cheng { 7148275SEric Cheng mac_cb_t *mcb; 7158275SEric Cheng mac_cb_t *mcb_next; 7168275SEric Cheng 7178275SEric Cheng for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 7188275SEric Cheng mcb_next = mcb->mcb_nextp; 7198275SEric Cheng kmem_free(mcb->mcb_objp, mcb->mcb_objsize); 7208275SEric Cheng } 7218275SEric Cheng } 7228275SEric Cheng 7238275SEric Cheng /* 7248275SEric Cheng * The promisc callbacks are in 2 lists, one off the 'mip' and another off the 7258275SEric Cheng * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there 7268275SEric Cheng * is only a single shared total walker count, and an entry can't be physically 7278275SEric Cheng * unlinked if a walker is active on either list. The last walker does this 7288275SEric Cheng * cleanup of logically deleted entries. 7298275SEric Cheng */ 7308275SEric Cheng void 7318275SEric Cheng i_mac_promisc_walker_cleanup(mac_impl_t *mip) 7328275SEric Cheng { 7338275SEric Cheng mac_cb_t *rmlist; 7348275SEric Cheng mac_cb_t *mcb; 7358275SEric Cheng mac_cb_t *mcb_next; 7368275SEric Cheng mac_promisc_impl_t *mpip; 7378275SEric Cheng 7388275SEric Cheng /* 7398275SEric Cheng * Construct a temporary list of deleted callbacks by walking the 7408275SEric Cheng * the mi_promisc_list. Then for each entry in the temporary list, 7418275SEric Cheng * remove it from the mci_promisc_list and free the entry. 7428275SEric Cheng */ 7438275SEric Cheng rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info, 7448275SEric Cheng &mip->mi_promisc_list); 7458275SEric Cheng 7468275SEric Cheng for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 7478275SEric Cheng mcb_next = mcb->mcb_nextp; 7488275SEric Cheng mpip = (mac_promisc_impl_t *)mcb->mcb_objp; 7498275SEric Cheng VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info, 7508275SEric Cheng &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link)); 7518275SEric Cheng mcb->mcb_flags = 0; 7528275SEric Cheng mcb->mcb_nextp = NULL; 7538275SEric Cheng kmem_cache_free(mac_promisc_impl_cache, mpip); 7548275SEric Cheng } 7558275SEric Cheng } 7568275SEric Cheng 7578275SEric Cheng void 7588275SEric Cheng i_mac_notify(mac_impl_t *mip, mac_notify_type_t type) 7598275SEric Cheng { 7608275SEric Cheng mac_cb_info_t *mcbi; 7618275SEric Cheng 7628275SEric Cheng /* 7638275SEric Cheng * Signal the notify thread even after mi_ref has become zero and 7648275SEric Cheng * mi_disabled is set. The synchronization with the notify thread 7658275SEric Cheng * happens in mac_unregister and that implies the driver must make 7668275SEric Cheng * sure it is single-threaded (with respect to mac calls) and that 7678275SEric Cheng * all pending mac calls have returned before it calls mac_unregister 7688275SEric Cheng */ 7698275SEric Cheng rw_enter(&i_mac_impl_lock, RW_READER); 7708275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) 7718275SEric Cheng goto exit; 7728275SEric Cheng 7738275SEric Cheng /* 7748275SEric Cheng * Guard against incorrect notifications. (Running a newer 7758275SEric Cheng * mac client against an older implementation?) 7768275SEric Cheng */ 7778275SEric Cheng if (type >= MAC_NNOTE) 7788275SEric Cheng goto exit; 7798275SEric Cheng 7808275SEric Cheng mcbi = &mip->mi_notify_cb_info; 7818275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 7828275SEric Cheng mip->mi_notify_bits |= (1 << type); 7838275SEric Cheng cv_broadcast(&mcbi->mcbi_cv); 7848275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 7858275SEric Cheng 7868275SEric Cheng exit: 7878275SEric Cheng rw_exit(&i_mac_impl_lock); 7888275SEric Cheng } 7898275SEric Cheng 7908275SEric Cheng /* 7918275SEric Cheng * Mac serialization primitives. Please see the block comment at the 7928275SEric Cheng * top of the file. 7930Sstevel@tonic-gate */ 7948275SEric Cheng void 7958275SEric Cheng i_mac_perim_enter(mac_impl_t *mip) 7968275SEric Cheng { 7978275SEric Cheng mac_client_impl_t *mcip; 7988275SEric Cheng 7998275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8008275SEric Cheng /* 8018275SEric Cheng * This is a VNIC. Return the lower mac since that is what 8028275SEric Cheng * we want to serialize on. 8038275SEric Cheng */ 8048275SEric Cheng mcip = mac_vnic_lower(mip); 8058275SEric Cheng mip = mcip->mci_mip; 8068275SEric Cheng } 8078275SEric Cheng 8088275SEric Cheng mutex_enter(&mip->mi_perim_lock); 8098275SEric Cheng if (mip->mi_perim_owner == curthread) { 8108275SEric Cheng mip->mi_perim_ocnt++; 8118275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8128275SEric Cheng return; 8138275SEric Cheng } 8148275SEric Cheng 8158275SEric Cheng while (mip->mi_perim_owner != NULL) 8168275SEric Cheng cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock); 8178275SEric Cheng 8188275SEric Cheng mip->mi_perim_owner = curthread; 8198275SEric Cheng ASSERT(mip->mi_perim_ocnt == 0); 8208275SEric Cheng mip->mi_perim_ocnt++; 8218275SEric Cheng #ifdef DEBUG 8228275SEric Cheng mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack, 8238275SEric Cheng MAC_PERIM_STACK_DEPTH); 8248275SEric Cheng #endif 8258275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8268275SEric Cheng } 8278275SEric Cheng 8288275SEric Cheng int 8298275SEric Cheng i_mac_perim_enter_nowait(mac_impl_t *mip) 8308275SEric Cheng { 8318275SEric Cheng /* 8328275SEric Cheng * The vnic is a special case, since the serialization is done based 8338275SEric Cheng * on the lower mac. If the lower mac is busy, it does not imply the 8348275SEric Cheng * vnic can't be unregistered. But in the case of other drivers, 8358275SEric Cheng * a busy perimeter or open mac handles implies that the mac is busy 8368275SEric Cheng * and can't be unregistered. 8378275SEric Cheng */ 8388275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8398275SEric Cheng i_mac_perim_enter(mip); 8408275SEric Cheng return (0); 8418275SEric Cheng } 8428275SEric Cheng 8438275SEric Cheng mutex_enter(&mip->mi_perim_lock); 8448275SEric Cheng if (mip->mi_perim_owner != NULL) { 8458275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8468275SEric Cheng return (EBUSY); 8478275SEric Cheng } 8488275SEric Cheng ASSERT(mip->mi_perim_ocnt == 0); 8498275SEric Cheng mip->mi_perim_owner = curthread; 8508275SEric Cheng mip->mi_perim_ocnt++; 8518275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8528275SEric Cheng 8538275SEric Cheng return (0); 8548275SEric Cheng } 8558275SEric Cheng 8568275SEric Cheng void 8578275SEric Cheng i_mac_perim_exit(mac_impl_t *mip) 8588275SEric Cheng { 8598275SEric Cheng mac_client_impl_t *mcip; 8608275SEric Cheng 8618275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8628275SEric Cheng /* 8638275SEric Cheng * This is a VNIC. Return the lower mac since that is what 8648275SEric Cheng * we want to serialize on. 8658275SEric Cheng */ 8668275SEric Cheng mcip = mac_vnic_lower(mip); 8678275SEric Cheng mip = mcip->mci_mip; 8688275SEric Cheng } 8698275SEric Cheng 8708275SEric Cheng ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0); 8718275SEric Cheng 8728275SEric Cheng mutex_enter(&mip->mi_perim_lock); 8738275SEric Cheng if (--mip->mi_perim_ocnt == 0) { 8748275SEric Cheng mip->mi_perim_owner = NULL; 8758275SEric Cheng cv_signal(&mip->mi_perim_cv); 8768275SEric Cheng } 8778275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8788275SEric Cheng } 8798275SEric Cheng 8808275SEric Cheng /* 8818275SEric Cheng * Returns whether the current thread holds the mac perimeter. Used in making 8828275SEric Cheng * assertions. 8838275SEric Cheng */ 8848275SEric Cheng boolean_t 8858275SEric Cheng mac_perim_held(mac_handle_t mh) 8868275SEric Cheng { 8878275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 8888275SEric Cheng mac_client_impl_t *mcip; 8898275SEric Cheng 8908275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8918275SEric Cheng /* 8928275SEric Cheng * This is a VNIC. Return the lower mac since that is what 8938275SEric Cheng * we want to serialize on. 8948275SEric Cheng */ 8958275SEric Cheng mcip = mac_vnic_lower(mip); 8968275SEric Cheng mip = mcip->mci_mip; 8978275SEric Cheng } 8988275SEric Cheng return (mip->mi_perim_owner == curthread); 8998275SEric Cheng } 9008275SEric Cheng 9018275SEric Cheng /* 9028275SEric Cheng * mac client interfaces to enter the mac perimeter of a mac end point, given 9038275SEric Cheng * its mac handle, or macname or linkid. 9048275SEric Cheng */ 9058275SEric Cheng void 9068275SEric Cheng mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp) 9078275SEric Cheng { 9088275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 9098275SEric Cheng 9108275SEric Cheng i_mac_perim_enter(mip); 9118275SEric Cheng /* 9128275SEric Cheng * The mac_perim_handle_t returned encodes the 'mip' and whether a 9138275SEric Cheng * mac_open has been done internally while entering the perimeter. 9148275SEric Cheng * This information is used in mac_perim_exit 9158275SEric Cheng */ 9168275SEric Cheng MAC_ENCODE_MPH(*mphp, mip, 0); 9178275SEric Cheng } 9188275SEric Cheng 9198275SEric Cheng int 9208275SEric Cheng mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp) 9218275SEric Cheng { 9228275SEric Cheng int err; 9238275SEric Cheng mac_handle_t mh; 9248275SEric Cheng 9258275SEric Cheng if ((err = mac_open(name, &mh)) != 0) 9268275SEric Cheng return (err); 9278275SEric Cheng 9288275SEric Cheng mac_perim_enter_by_mh(mh, mphp); 9298275SEric Cheng MAC_ENCODE_MPH(*mphp, mh, 1); 9308275SEric Cheng return (0); 9318275SEric Cheng } 9328275SEric Cheng 9338275SEric Cheng int 9348275SEric Cheng mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp) 9358275SEric Cheng { 9368275SEric Cheng int err; 9378275SEric Cheng mac_handle_t mh; 9388275SEric Cheng 9398275SEric Cheng if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 9408275SEric Cheng return (err); 9418275SEric Cheng 9428275SEric Cheng mac_perim_enter_by_mh(mh, mphp); 9438275SEric Cheng MAC_ENCODE_MPH(*mphp, mh, 1); 9448275SEric Cheng return (0); 9458275SEric Cheng } 9468275SEric Cheng 9478275SEric Cheng void 9488275SEric Cheng mac_perim_exit(mac_perim_handle_t mph) 9498275SEric Cheng { 9508275SEric Cheng mac_impl_t *mip; 9518275SEric Cheng boolean_t need_close; 9528275SEric Cheng 9538275SEric Cheng MAC_DECODE_MPH(mph, mip, need_close); 9548275SEric Cheng i_mac_perim_exit(mip); 9558275SEric Cheng if (need_close) 9568275SEric Cheng mac_close((mac_handle_t)mip); 9578275SEric Cheng } 9588275SEric Cheng 9598275SEric Cheng int 9605895Syz147064 mac_hold(const char *macname, mac_impl_t **pmip) 9610Sstevel@tonic-gate { 9620Sstevel@tonic-gate mac_impl_t *mip; 9630Sstevel@tonic-gate int err; 9640Sstevel@tonic-gate 9650Sstevel@tonic-gate /* 9660Sstevel@tonic-gate * Check the device name length to make sure it won't overflow our 9670Sstevel@tonic-gate * buffer. 9680Sstevel@tonic-gate */ 9692311Sseb if (strlen(macname) >= MAXNAMELEN) 9700Sstevel@tonic-gate return (EINVAL); 9710Sstevel@tonic-gate 9720Sstevel@tonic-gate /* 9735895Syz147064 * Look up its entry in the global hash table. 9740Sstevel@tonic-gate */ 9755895Syz147064 rw_enter(&i_mac_impl_lock, RW_WRITER); 9765895Syz147064 err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, 9775895Syz147064 (mod_hash_val_t *)&mip); 9785895Syz147064 9795895Syz147064 if (err != 0) { 9805895Syz147064 rw_exit(&i_mac_impl_lock); 9815895Syz147064 return (ENOENT); 9825895Syz147064 } 9835895Syz147064 9848275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) { 9855895Syz147064 rw_exit(&i_mac_impl_lock); 9865895Syz147064 return (ENOENT); 9875895Syz147064 } 9885895Syz147064 9898275SEric Cheng if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) { 9905895Syz147064 rw_exit(&i_mac_impl_lock); 9915895Syz147064 return (EBUSY); 9925895Syz147064 } 9935895Syz147064 9945895Syz147064 mip->mi_ref++; 9955895Syz147064 rw_exit(&i_mac_impl_lock); 9965895Syz147064 9975895Syz147064 *pmip = mip; 9985895Syz147064 return (0); 9995895Syz147064 } 10005895Syz147064 10018275SEric Cheng void 10025895Syz147064 mac_rele(mac_impl_t *mip) 10035895Syz147064 { 10045895Syz147064 rw_enter(&i_mac_impl_lock, RW_WRITER); 10055895Syz147064 ASSERT(mip->mi_ref != 0); 10068275SEric Cheng if (--mip->mi_ref == 0) { 10078275SEric Cheng ASSERT(mip->mi_nactiveclients == 0 && 10088275SEric Cheng !(mip->mi_state_flags & MIS_EXCLUSIVE)); 10095895Syz147064 } 10105895Syz147064 rw_exit(&i_mac_impl_lock); 10115895Syz147064 } 10125895Syz147064 10138275SEric Cheng /* 10148275SEric Cheng * This function is called only by mac_client_open. 10158275SEric Cheng */ 10165895Syz147064 int 10178275SEric Cheng mac_start(mac_impl_t *mip) 10180Sstevel@tonic-gate { 10198275SEric Cheng int err = 0; 10208275SEric Cheng 10218275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 10222311Sseb ASSERT(mip->mi_start != NULL); 10230Sstevel@tonic-gate 10240Sstevel@tonic-gate /* 10250Sstevel@tonic-gate * Check whether the device is already started. 10260Sstevel@tonic-gate */ 10278275SEric Cheng if (mip->mi_active++ == 0) { 10288275SEric Cheng mac_ring_t *ring = NULL; 10298275SEric Cheng 10308275SEric Cheng /* 10318275SEric Cheng * Start the device. 10328275SEric Cheng */ 10338275SEric Cheng err = mip->mi_start(mip->mi_driver); 10348275SEric Cheng if (err != 0) { 10358275SEric Cheng mip->mi_active--; 10368275SEric Cheng return (err); 10378275SEric Cheng } 10388275SEric Cheng 10390Sstevel@tonic-gate /* 10408275SEric Cheng * Start the default tx ring. 10410Sstevel@tonic-gate */ 10428275SEric Cheng if (mip->mi_default_tx_ring != NULL) { 10438275SEric Cheng 10448275SEric Cheng ring = (mac_ring_t *)mip->mi_default_tx_ring; 10458275SEric Cheng err = mac_start_ring(ring); 10468275SEric Cheng if (err != 0) { 10478275SEric Cheng mip->mi_active--; 10488275SEric Cheng return (err); 10498275SEric Cheng } 10508275SEric Cheng ring->mr_state = MR_INUSE; 10518275SEric Cheng } 10528275SEric Cheng 10538275SEric Cheng if (mip->mi_rx_groups != NULL) { 10548275SEric Cheng /* 10558275SEric Cheng * Start the default ring, since it will be needed 10568275SEric Cheng * to receive broadcast and multicast traffic for 10578275SEric Cheng * both primary and non-primary MAC clients. 10588275SEric Cheng */ 10598275SEric Cheng mac_group_t *grp = &mip->mi_rx_groups[0]; 10608275SEric Cheng 10618275SEric Cheng ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 10628275SEric Cheng err = mac_start_group_and_rings(grp); 10638275SEric Cheng if (err != 0) { 10648275SEric Cheng mip->mi_active--; 10658275SEric Cheng if (ring != NULL) { 10668275SEric Cheng mac_stop_ring(ring); 10678275SEric Cheng ring->mr_state = MR_FREE; 10688275SEric Cheng } 10698275SEric Cheng return (err); 10708275SEric Cheng } 10718275SEric Cheng mac_set_rx_group_state(grp, MAC_GROUP_STATE_SHARED); 10728275SEric Cheng } 10730Sstevel@tonic-gate } 10740Sstevel@tonic-gate 10750Sstevel@tonic-gate return (err); 10760Sstevel@tonic-gate } 10770Sstevel@tonic-gate 10788275SEric Cheng /* 10798275SEric Cheng * This function is called only by mac_client_close. 10808275SEric Cheng */ 10810Sstevel@tonic-gate void 10828275SEric Cheng mac_stop(mac_impl_t *mip) 10830Sstevel@tonic-gate { 10842311Sseb ASSERT(mip->mi_stop != NULL); 10858275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 10860Sstevel@tonic-gate 10870Sstevel@tonic-gate /* 10880Sstevel@tonic-gate * Check whether the device is still needed. 10890Sstevel@tonic-gate */ 10900Sstevel@tonic-gate ASSERT(mip->mi_active != 0); 10918275SEric Cheng if (--mip->mi_active == 0) { 10928275SEric Cheng if (mip->mi_rx_groups != NULL) { 10930Sstevel@tonic-gate /* 10948275SEric Cheng * There should be no more active clients since the 10958275SEric Cheng * MAC is being stopped. Stop the default RX group 10968275SEric Cheng * and transition it back to registered state. 10970Sstevel@tonic-gate */ 10988275SEric Cheng mac_group_t *grp = &mip->mi_rx_groups[0]; 10990Sstevel@tonic-gate 11000Sstevel@tonic-gate /* 11018275SEric Cheng * When clients are torn down, the groups 11028275SEric Cheng * are release via mac_release_rx_group which 11038275SEric Cheng * knows the the default group is always in 11048275SEric Cheng * started mode since broadcast uses it. So 11058275SEric Cheng * we can assert that their are no clients 11068275SEric Cheng * (since mac_bcast_add doesn't register itself 11078275SEric Cheng * as a client) and group is in SHARED state. 11080Sstevel@tonic-gate */ 11098275SEric Cheng ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); 11108275SEric Cheng ASSERT(MAC_RX_GROUP_NO_CLIENT(grp) && 11118275SEric Cheng mip->mi_nactiveclients == 0); 11128275SEric Cheng mac_stop_group_and_rings(grp); 11138275SEric Cheng mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); 11140Sstevel@tonic-gate } 11158275SEric Cheng 11168275SEric Cheng if (mip->mi_default_tx_ring != NULL) { 11178275SEric Cheng mac_ring_t *ring; 11188275SEric Cheng 11198275SEric Cheng ring = (mac_ring_t *)mip->mi_default_tx_ring; 11208275SEric Cheng mac_stop_ring(ring); 11218275SEric Cheng ring->mr_state = MR_FREE; 11228275SEric Cheng } 11238275SEric Cheng 11248275SEric Cheng /* 11258275SEric Cheng * Stop the device. 11268275SEric Cheng */ 11278275SEric Cheng mip->mi_stop(mip->mi_driver); 11282331Skrgopi } 11292331Skrgopi } 11302331Skrgopi 11310Sstevel@tonic-gate int 11328275SEric Cheng i_mac_promisc_set(mac_impl_t *mip, boolean_t on, mac_promisc_type_t ptype) 11330Sstevel@tonic-gate { 11340Sstevel@tonic-gate int err = 0; 11350Sstevel@tonic-gate 11368275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 11372311Sseb ASSERT(mip->mi_setpromisc != NULL); 11380Sstevel@tonic-gate ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC); 11390Sstevel@tonic-gate 11400Sstevel@tonic-gate /* 11410Sstevel@tonic-gate * Determine whether we should enable or disable promiscuous mode. 11420Sstevel@tonic-gate * For details on the distinction between "device promiscuous mode" 11430Sstevel@tonic-gate * and "MAC promiscuous mode", see PSARC/2005/289. 11440Sstevel@tonic-gate */ 11450Sstevel@tonic-gate if (on) { 11460Sstevel@tonic-gate /* 11470Sstevel@tonic-gate * Enable promiscuous mode on the device if not yet enabled. 11480Sstevel@tonic-gate */ 11490Sstevel@tonic-gate if (mip->mi_devpromisc++ == 0) { 11502311Sseb err = mip->mi_setpromisc(mip->mi_driver, B_TRUE); 11512311Sseb if (err != 0) { 11520Sstevel@tonic-gate mip->mi_devpromisc--; 11538275SEric Cheng return (err); 11540Sstevel@tonic-gate } 11550Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 11560Sstevel@tonic-gate } 11570Sstevel@tonic-gate 11580Sstevel@tonic-gate /* 11590Sstevel@tonic-gate * Enable promiscuous mode on the MAC if not yet enabled. 11600Sstevel@tonic-gate */ 11610Sstevel@tonic-gate if (ptype == MAC_PROMISC && mip->mi_promisc++ == 0) 11620Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_PROMISC); 11630Sstevel@tonic-gate } else { 11648275SEric Cheng if (mip->mi_devpromisc == 0) 11658275SEric Cheng return (EPROTO); 11668275SEric Cheng 11670Sstevel@tonic-gate /* 11680Sstevel@tonic-gate * Disable promiscuous mode on the device if this is the last 11690Sstevel@tonic-gate * enabling. 11700Sstevel@tonic-gate */ 11710Sstevel@tonic-gate if (--mip->mi_devpromisc == 0) { 11722311Sseb err = mip->mi_setpromisc(mip->mi_driver, B_FALSE); 11732311Sseb if (err != 0) { 11740Sstevel@tonic-gate mip->mi_devpromisc++; 11758275SEric Cheng return (err); 11760Sstevel@tonic-gate } 11770Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 11780Sstevel@tonic-gate } 11790Sstevel@tonic-gate 11800Sstevel@tonic-gate /* 11810Sstevel@tonic-gate * Disable promiscuous mode on the MAC if this is the last 11820Sstevel@tonic-gate * enabling. 11830Sstevel@tonic-gate */ 11840Sstevel@tonic-gate if (ptype == MAC_PROMISC && --mip->mi_promisc == 0) 11850Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_PROMISC); 11860Sstevel@tonic-gate } 11870Sstevel@tonic-gate 11888275SEric Cheng return (0); 11890Sstevel@tonic-gate } 11900Sstevel@tonic-gate 11918275SEric Cheng int 11928275SEric Cheng mac_promisc_set(mac_handle_t mh, boolean_t on, mac_promisc_type_t ptype) 11938275SEric Cheng { 11948275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 11958275SEric Cheng int rv; 11968275SEric Cheng 11978275SEric Cheng i_mac_perim_enter(mip); 11988275SEric Cheng rv = i_mac_promisc_set(mip, on, ptype); 11998275SEric Cheng i_mac_perim_exit(mip); 12008275SEric Cheng 12018275SEric Cheng return (rv); 12028275SEric Cheng } 12038275SEric Cheng 12048275SEric Cheng /* 12058275SEric Cheng * The promiscuity state can change any time. If the caller needs to take 12068275SEric Cheng * actions that are atomic with the promiscuity state, then the caller needs 12078275SEric Cheng * to bracket the entire sequence with mac_perim_enter/exit 12088275SEric Cheng */ 12090Sstevel@tonic-gate boolean_t 12100Sstevel@tonic-gate mac_promisc_get(mac_handle_t mh, mac_promisc_type_t ptype) 12110Sstevel@tonic-gate { 12120Sstevel@tonic-gate mac_impl_t *mip = (mac_impl_t *)mh; 12130Sstevel@tonic-gate 12140Sstevel@tonic-gate ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC); 12150Sstevel@tonic-gate 12160Sstevel@tonic-gate /* 12170Sstevel@tonic-gate * Return the current promiscuity. 12180Sstevel@tonic-gate */ 12190Sstevel@tonic-gate if (ptype == MAC_DEVPROMISC) 12200Sstevel@tonic-gate return (mip->mi_devpromisc != 0); 12210Sstevel@tonic-gate else 12220Sstevel@tonic-gate return (mip->mi_promisc != 0); 12230Sstevel@tonic-gate } 12240Sstevel@tonic-gate 12258275SEric Cheng /* 12268275SEric Cheng * Invoked at MAC instance attach time to initialize the list 12278275SEric Cheng * of factory MAC addresses supported by a MAC instance. This function 12288275SEric Cheng * builds a local cache in the mac_impl_t for the MAC addresses 12298275SEric Cheng * supported by the underlying hardware. The MAC clients themselves 12308275SEric Cheng * use the mac_addr_factory*() functions to query and reserve 12318275SEric Cheng * factory MAC addresses. 12328275SEric Cheng */ 12330Sstevel@tonic-gate void 12348275SEric Cheng mac_addr_factory_init(mac_impl_t *mip) 12355903Ssowmini { 12368275SEric Cheng mac_capab_multifactaddr_t capab; 12378275SEric Cheng uint8_t *addr; 12388275SEric Cheng int i; 12390Sstevel@tonic-gate 12400Sstevel@tonic-gate /* 12418275SEric Cheng * First round to see how many factory MAC addresses are available. 12420Sstevel@tonic-gate */ 12438275SEric Cheng bzero(&capab, sizeof (capab)); 12448275SEric Cheng if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR, 12458275SEric Cheng &capab) || (capab.mcm_naddr == 0)) { 12466512Ssowmini /* 12478275SEric Cheng * The MAC instance doesn't support multiple factory 12488275SEric Cheng * MAC addresses, we're done here. 12496512Ssowmini */ 12506512Ssowmini return; 12515903Ssowmini } 12526512Ssowmini 12530Sstevel@tonic-gate /* 12548275SEric Cheng * Allocate the space and get all the factory addresses. 125556Smeem */ 12568275SEric Cheng addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP); 12578275SEric Cheng capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr); 12588275SEric Cheng 12598275SEric Cheng mip->mi_factory_addr_num = capab.mcm_naddr; 12608275SEric Cheng mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num * 12618275SEric Cheng sizeof (mac_factory_addr_t), KM_SLEEP); 12628275SEric Cheng 12638275SEric Cheng for (i = 0; i < capab.mcm_naddr; i++) { 12648275SEric Cheng bcopy(addr + i * MAXMACADDRLEN, 12658275SEric Cheng mip->mi_factory_addr[i].mfa_addr, 12668275SEric Cheng mip->mi_type->mt_addr_length); 12678275SEric Cheng mip->mi_factory_addr[i].mfa_in_use = B_FALSE; 126856Smeem } 126956Smeem 12708275SEric Cheng kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN); 12718275SEric Cheng } 12728275SEric Cheng 12738275SEric Cheng void 12748275SEric Cheng mac_addr_factory_fini(mac_impl_t *mip) 12758275SEric Cheng { 12768275SEric Cheng if (mip->mi_factory_addr == NULL) { 12778275SEric Cheng ASSERT(mip->mi_factory_addr_num == 0); 12788275SEric Cheng return; 12798275SEric Cheng } 12808275SEric Cheng 12818275SEric Cheng kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num * 12828275SEric Cheng sizeof (mac_factory_addr_t)); 12838275SEric Cheng 12848275SEric Cheng mip->mi_factory_addr = NULL; 12858275SEric Cheng mip->mi_factory_addr_num = 0; 12860Sstevel@tonic-gate } 12870Sstevel@tonic-gate 12885084Sjohnlev /* 12898275SEric Cheng * Reserve a factory MAC address. If *slot is set to -1, the function 12908275SEric Cheng * attempts to reserve any of the available factory MAC addresses and 12918275SEric Cheng * returns the reserved slot id. If no slots are available, the function 12928275SEric Cheng * returns ENOSPC. If *slot is not set to -1, the function reserves 12938275SEric Cheng * the specified slot if it is available, or returns EBUSY is the slot 12948275SEric Cheng * is already used. Returns ENOTSUP if the underlying MAC does not 12958275SEric Cheng * support multiple factory addresses. If the slot number is not -1 but 12968275SEric Cheng * is invalid, returns EINVAL. 12978275SEric Cheng */ 12988275SEric Cheng int 12998275SEric Cheng mac_addr_factory_reserve(mac_client_handle_t mch, int *slot) 13008275SEric Cheng { 13018275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 13028275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 13038275SEric Cheng int i, ret = 0; 13048275SEric Cheng 13058275SEric Cheng i_mac_perim_enter(mip); 13068275SEric Cheng /* 13078275SEric Cheng * Protect against concurrent readers that may need a self-consistent 13088275SEric Cheng * view of the factory addresses 13098275SEric Cheng */ 13108275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_WRITER); 13118275SEric Cheng 13128275SEric Cheng if (mip->mi_factory_addr_num == 0) { 13138275SEric Cheng ret = ENOTSUP; 13148275SEric Cheng goto bail; 13158275SEric Cheng } 13168275SEric Cheng 13178275SEric Cheng if (*slot != -1) { 13188275SEric Cheng /* check the specified slot */ 13198275SEric Cheng if (*slot < 1 || *slot > mip->mi_factory_addr_num) { 13208275SEric Cheng ret = EINVAL; 13218275SEric Cheng goto bail; 13228275SEric Cheng } 13238275SEric Cheng if (mip->mi_factory_addr[*slot-1].mfa_in_use) { 13248275SEric Cheng ret = EBUSY; 13258275SEric Cheng goto bail; 13268275SEric Cheng } 13278275SEric Cheng } else { 13288275SEric Cheng /* pick the next available slot */ 13298275SEric Cheng for (i = 0; i < mip->mi_factory_addr_num; i++) { 13308275SEric Cheng if (!mip->mi_factory_addr[i].mfa_in_use) 13318275SEric Cheng break; 13328275SEric Cheng } 13338275SEric Cheng 13348275SEric Cheng if (i == mip->mi_factory_addr_num) { 13358275SEric Cheng ret = ENOSPC; 13368275SEric Cheng goto bail; 13378275SEric Cheng } 13388275SEric Cheng *slot = i+1; 13398275SEric Cheng } 13408275SEric Cheng 13418275SEric Cheng mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE; 13428275SEric Cheng mip->mi_factory_addr[*slot-1].mfa_client = mcip; 13438275SEric Cheng 13448275SEric Cheng bail: 13458275SEric Cheng rw_exit(&mip->mi_rw_lock); 13468275SEric Cheng i_mac_perim_exit(mip); 13478275SEric Cheng return (ret); 13488275SEric Cheng } 13498275SEric Cheng 13508275SEric Cheng /* 13518275SEric Cheng * Release the specified factory MAC address slot. 13525084Sjohnlev */ 13538275SEric Cheng void 13548275SEric Cheng mac_addr_factory_release(mac_client_handle_t mch, uint_t slot) 13558275SEric Cheng { 13568275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 13578275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 13588275SEric Cheng 13598275SEric Cheng i_mac_perim_enter(mip); 13608275SEric Cheng /* 13618275SEric Cheng * Protect against concurrent readers that may need a self-consistent 13628275SEric Cheng * view of the factory addresses 13638275SEric Cheng */ 13648275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_WRITER); 13658275SEric Cheng 13668275SEric Cheng ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 13678275SEric Cheng ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use); 13688275SEric Cheng 13698275SEric Cheng mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE; 13708275SEric Cheng 13718275SEric Cheng rw_exit(&mip->mi_rw_lock); 13728275SEric Cheng i_mac_perim_exit(mip); 13738275SEric Cheng } 13748275SEric Cheng 13758275SEric Cheng /* 13768275SEric Cheng * Stores in mac_addr the value of the specified MAC address. Returns 13778275SEric Cheng * 0 on success, or EINVAL if the slot number is not valid for the MAC. 13788275SEric Cheng * The caller must provide a string of at least MAXNAMELEN bytes. 13798275SEric Cheng */ 13808275SEric Cheng void 13818275SEric Cheng mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr, 13828275SEric Cheng uint_t *addr_len, char *client_name, boolean_t *in_use_arg) 13835084Sjohnlev { 13848275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 13858275SEric Cheng boolean_t in_use; 13868275SEric Cheng 13878275SEric Cheng ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 13888275SEric Cheng 13898275SEric Cheng /* 13908275SEric Cheng * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter 13918275SEric Cheng * and mi_rw_lock 13928275SEric Cheng */ 13938275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_READER); 13948275SEric Cheng bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN); 13958275SEric Cheng *addr_len = mip->mi_type->mt_addr_length; 13968275SEric Cheng in_use = mip->mi_factory_addr[slot-1].mfa_in_use; 13978275SEric Cheng if (in_use && client_name != NULL) { 13988275SEric Cheng bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name, 13998275SEric Cheng client_name, MAXNAMELEN); 14008275SEric Cheng } 14018275SEric Cheng if (in_use_arg != NULL) 14028275SEric Cheng *in_use_arg = in_use; 14038275SEric Cheng rw_exit(&mip->mi_rw_lock); 14048275SEric Cheng } 14058275SEric Cheng 14068275SEric Cheng /* 14078275SEric Cheng * Returns the number of factory MAC addresses (in addition to the 14088275SEric Cheng * primary MAC address), 0 if the underlying MAC doesn't support 14098275SEric Cheng * that feature. 14108275SEric Cheng */ 14118275SEric Cheng uint_t 14128275SEric Cheng mac_addr_factory_num(mac_handle_t mh) 14138275SEric Cheng { 14148275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 14158275SEric Cheng 14168275SEric Cheng return (mip->mi_factory_addr_num); 14178275SEric Cheng } 14188275SEric Cheng 14198275SEric Cheng 14208275SEric Cheng void 14218275SEric Cheng mac_rx_group_unmark(mac_group_t *grp, uint_t flag) 14228275SEric Cheng { 14238275SEric Cheng mac_ring_t *ring; 14248275SEric Cheng 14258275SEric Cheng for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) 14268275SEric Cheng ring->mr_flag &= ~flag; 14275084Sjohnlev } 14285084Sjohnlev 14295084Sjohnlev /* 14308275SEric Cheng * The following mac_hwrings_xxx() functions are private mac client functions 14318275SEric Cheng * used by the aggr driver to access and control the underlying HW Rx group 14328275SEric Cheng * and rings. In this case, the aggr driver has exclusive control of the 14338275SEric Cheng * underlying HW Rx group/rings, it calls the following functions to 14348275SEric Cheng * start/stop the HW Rx rings, disable/enable polling, add/remove mac' 14358275SEric Cheng * addresses, or set up the Rx callback. 14365084Sjohnlev */ 14378275SEric Cheng /* ARGSUSED */ 14388275SEric Cheng static void 14398275SEric Cheng mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs, 14408275SEric Cheng mblk_t *mp_chain, boolean_t loopback) 14410Sstevel@tonic-gate { 14428275SEric Cheng mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)srs; 14438275SEric Cheng mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; 14448275SEric Cheng mac_direct_rx_t proc; 14458275SEric Cheng void *arg1; 14468275SEric Cheng mac_resource_handle_t arg2; 14478275SEric Cheng 14488275SEric Cheng proc = srs_rx->sr_func; 14498275SEric Cheng arg1 = srs_rx->sr_arg1; 14508275SEric Cheng arg2 = mac_srs->srs_mrh; 14518275SEric Cheng 14528275SEric Cheng proc(arg1, arg2, mp_chain, NULL); 14530Sstevel@tonic-gate } 14540Sstevel@tonic-gate 14558275SEric Cheng /* 14568275SEric Cheng * This function is called to get the list of HW rings that are reserved by 14578275SEric Cheng * an exclusive mac client. 14588275SEric Cheng * 14598275SEric Cheng * Return value: the number of HW rings. 14608275SEric Cheng */ 14618275SEric Cheng int 14628275SEric Cheng mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, 14638275SEric Cheng mac_ring_handle_t *hwrh) 14640Sstevel@tonic-gate { 14658275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 14668275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 14678275SEric Cheng mac_group_t *grp = flent->fe_rx_ring_group; 14688275SEric Cheng mac_ring_t *ring; 14698275SEric Cheng int cnt = 0; 14700Sstevel@tonic-gate 14710Sstevel@tonic-gate /* 14728275SEric Cheng * The mac client did not reserve any RX group, return directly. 14738275SEric Cheng * This is probably because the underlying MAC does not support 14748275SEric Cheng * any RX groups. 14758275SEric Cheng */ 14768275SEric Cheng *hwgh = NULL; 14778275SEric Cheng if (grp == NULL) 14788275SEric Cheng return (0); 14798275SEric Cheng 14808275SEric Cheng /* 14818275SEric Cheng * This RX group must be reserved by this mac client. 14820Sstevel@tonic-gate */ 14838275SEric Cheng ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && 14848275SEric Cheng (mch == (mac_client_handle_t)(MAC_RX_GROUP_ONLY_CLIENT(grp)))); 14858275SEric Cheng 14868275SEric Cheng for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) { 14878275SEric Cheng ASSERT(cnt < MAX_RINGS_PER_GROUP); 14888275SEric Cheng hwrh[cnt++] = (mac_ring_handle_t)ring; 14898275SEric Cheng } 14908275SEric Cheng *hwgh = (mac_group_handle_t)grp; 14918275SEric Cheng return (cnt); 14928275SEric Cheng } 14938275SEric Cheng 14948275SEric Cheng /* 14958275SEric Cheng * Setup the RX callback of the mac client which exclusively controls HW ring. 14968275SEric Cheng */ 14978275SEric Cheng void 14988275SEric Cheng mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh) 14998275SEric Cheng { 15008275SEric Cheng mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 15018275SEric Cheng mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 15028275SEric Cheng 15038275SEric Cheng mac_srs->srs_mrh = prh; 15048275SEric Cheng mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; 15050Sstevel@tonic-gate } 15060Sstevel@tonic-gate 15070Sstevel@tonic-gate void 15088275SEric Cheng mac_hwring_teardown(mac_ring_handle_t hwrh) 15098275SEric Cheng { 15108275SEric Cheng mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 15118275SEric Cheng mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 15128275SEric Cheng 15138275SEric Cheng mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; 15148275SEric Cheng mac_srs->srs_mrh = NULL; 15158275SEric Cheng } 15168275SEric Cheng 15178275SEric Cheng int 15188275SEric Cheng mac_hwring_disable_intr(mac_ring_handle_t rh) 15190Sstevel@tonic-gate { 15208275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15218275SEric Cheng mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 15228275SEric Cheng 15238275SEric Cheng return (intr->mi_disable(intr->mi_handle)); 15248275SEric Cheng } 15258275SEric Cheng 15268275SEric Cheng int 15278275SEric Cheng mac_hwring_enable_intr(mac_ring_handle_t rh) 15288275SEric Cheng { 15298275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15308275SEric Cheng mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 15318275SEric Cheng 15328275SEric Cheng return (intr->mi_enable(intr->mi_handle)); 15338275SEric Cheng } 15348275SEric Cheng 15358275SEric Cheng int 15368275SEric Cheng mac_hwring_start(mac_ring_handle_t rh) 15378275SEric Cheng { 15388275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15398275SEric Cheng 15408275SEric Cheng MAC_RING_UNMARK(rr_ring, MR_QUIESCE); 15418275SEric Cheng return (0); 15420Sstevel@tonic-gate } 15430Sstevel@tonic-gate 15440Sstevel@tonic-gate void 15458275SEric Cheng mac_hwring_stop(mac_ring_handle_t rh) 15468275SEric Cheng { 15478275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15488275SEric Cheng 15498275SEric Cheng mac_rx_ring_quiesce(rr_ring, MR_QUIESCE); 15508275SEric Cheng } 15518275SEric Cheng 15528275SEric Cheng mblk_t * 15538275SEric Cheng mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) 15548275SEric Cheng { 15558275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15568275SEric Cheng mac_ring_info_t *info = &rr_ring->mr_info; 15578275SEric Cheng 15588275SEric Cheng return (info->mri_poll(info->mri_driver, bytes_to_pickup)); 15598275SEric Cheng } 15608275SEric Cheng 15618275SEric Cheng int 15628275SEric Cheng mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) 15638275SEric Cheng { 15648275SEric Cheng mac_group_t *group = (mac_group_t *)gh; 15658275SEric Cheng 15668275SEric Cheng return (mac_group_addmac(group, addr)); 15678275SEric Cheng } 15688275SEric Cheng 15698275SEric Cheng int 15708275SEric Cheng mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) 15718275SEric Cheng { 15728275SEric Cheng mac_group_t *group = (mac_group_t *)gh; 15738275SEric Cheng 15748275SEric Cheng return (mac_group_remmac(group, addr)); 15758275SEric Cheng } 15768275SEric Cheng 15778275SEric Cheng /* 15788275SEric Cheng * Set the RX group to be shared/reserved. Note that the group must be 15798275SEric Cheng * started/stopped outside of this function. 15808275SEric Cheng */ 15818275SEric Cheng void 15828275SEric Cheng mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) 15830Sstevel@tonic-gate { 15848275SEric Cheng /* 15858275SEric Cheng * If there is no change in the group state, just return. 15868275SEric Cheng */ 15878275SEric Cheng if (grp->mrg_state == state) 15888275SEric Cheng return; 15898275SEric Cheng 15908275SEric Cheng switch (state) { 15918275SEric Cheng case MAC_GROUP_STATE_RESERVED: 15928275SEric Cheng /* 15938275SEric Cheng * Successfully reserved the group. 15948275SEric Cheng * 15958275SEric Cheng * Given that there is an exclusive client controlling this 15968275SEric Cheng * group, we enable the group level polling when available, 15978275SEric Cheng * so that SRSs get to turn on/off individual rings they's 15988275SEric Cheng * assigned to. 15998275SEric Cheng */ 16008275SEric Cheng ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 16018275SEric Cheng 16028275SEric Cheng if (GROUP_INTR_DISABLE_FUNC(grp) != NULL) 16038275SEric Cheng GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 16048275SEric Cheng 16058275SEric Cheng break; 16068275SEric Cheng 16078275SEric Cheng case MAC_GROUP_STATE_SHARED: 16088275SEric Cheng /* 16098275SEric Cheng * Set all rings of this group to software classified. 16108275SEric Cheng * If the group has an overriding interrupt, then re-enable it. 16118275SEric Cheng */ 16128275SEric Cheng ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 16138275SEric Cheng 16148275SEric Cheng if (GROUP_INTR_ENABLE_FUNC(grp) != NULL) 16158275SEric Cheng GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 16168275SEric Cheng 16178275SEric Cheng /* The ring is not available for reservations any more */ 16188275SEric Cheng break; 16198275SEric Cheng 16208275SEric Cheng case MAC_GROUP_STATE_REGISTERED: 16218275SEric Cheng /* Also callable from mac_register, perim is not held */ 16228275SEric Cheng break; 16238275SEric Cheng 16248275SEric Cheng default: 16258275SEric Cheng ASSERT(B_FALSE); 16268275SEric Cheng break; 16278275SEric Cheng } 16288275SEric Cheng 16298275SEric Cheng grp->mrg_state = state; 16308275SEric Cheng } 16318275SEric Cheng 16328275SEric Cheng /* 16338275SEric Cheng * Quiesce future hardware classified packets for the specified Rx ring 16348275SEric Cheng */ 16358275SEric Cheng static void 16368275SEric Cheng mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag) 16378275SEric Cheng { 16388275SEric Cheng ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER); 16398275SEric Cheng ASSERT(ring_flag == MR_CONDEMNED || ring_flag == MR_QUIESCE); 16408275SEric Cheng 16418275SEric Cheng mutex_enter(&rx_ring->mr_lock); 16428275SEric Cheng rx_ring->mr_flag |= ring_flag; 16438275SEric Cheng while (rx_ring->mr_refcnt != 0) 16448275SEric Cheng cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock); 16458275SEric Cheng mutex_exit(&rx_ring->mr_lock); 16460Sstevel@tonic-gate } 16470Sstevel@tonic-gate 16484913Sethindra /* 16498275SEric Cheng * Please see mac_tx for details about the per cpu locking scheme 16504913Sethindra */ 16518275SEric Cheng static void 16528275SEric Cheng mac_tx_lock_all(mac_client_impl_t *mcip) 16538275SEric Cheng { 16548275SEric Cheng int i; 16558275SEric Cheng 16568275SEric Cheng for (i = 0; i <= mac_tx_percpu_cnt; i++) 16578275SEric Cheng mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 16588275SEric Cheng } 16598275SEric Cheng 16608275SEric Cheng static void 16618275SEric Cheng mac_tx_unlock_all(mac_client_impl_t *mcip) 16628275SEric Cheng { 16638275SEric Cheng int i; 16648275SEric Cheng 16658275SEric Cheng for (i = mac_tx_percpu_cnt; i >= 0; i--) 16668275SEric Cheng mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 16678275SEric Cheng } 16688275SEric Cheng 16698275SEric Cheng static void 16708275SEric Cheng mac_tx_unlock_allbutzero(mac_client_impl_t *mcip) 16718275SEric Cheng { 16728275SEric Cheng int i; 16738275SEric Cheng 16748275SEric Cheng for (i = mac_tx_percpu_cnt; i > 0; i--) 16758275SEric Cheng mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 16768275SEric Cheng } 16778275SEric Cheng 16788275SEric Cheng static int 16798275SEric Cheng mac_tx_sum_refcnt(mac_client_impl_t *mcip) 16800Sstevel@tonic-gate { 16818275SEric Cheng int i; 16828275SEric Cheng int refcnt = 0; 16838275SEric Cheng 16848275SEric Cheng for (i = 0; i <= mac_tx_percpu_cnt; i++) 16858275SEric Cheng refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt; 16868275SEric Cheng 16878275SEric Cheng return (refcnt); 16880Sstevel@tonic-gate } 16890Sstevel@tonic-gate 16908275SEric Cheng /* 16918275SEric Cheng * Stop future Tx packets coming down from the client in preparation for 16928275SEric Cheng * quiescing the Tx side. This is needed for dynamic reclaim and reassignment 16938275SEric Cheng * of rings between clients 16948275SEric Cheng */ 16958275SEric Cheng void 16968275SEric Cheng mac_tx_client_block(mac_client_impl_t *mcip) 16975084Sjohnlev { 16988275SEric Cheng mac_tx_lock_all(mcip); 16998275SEric Cheng mcip->mci_tx_flag |= MCI_TX_QUIESCE; 17008275SEric Cheng while (mac_tx_sum_refcnt(mcip) != 0) { 17018275SEric Cheng mac_tx_unlock_allbutzero(mcip); 17028275SEric Cheng cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock); 17038275SEric Cheng mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock); 17048275SEric Cheng mac_tx_lock_all(mcip); 17058275SEric Cheng } 17068275SEric Cheng mac_tx_unlock_all(mcip); 17075084Sjohnlev } 17085084Sjohnlev 17098275SEric Cheng void 17108275SEric Cheng mac_tx_client_unblock(mac_client_impl_t *mcip) 17115084Sjohnlev { 17128275SEric Cheng mac_tx_lock_all(mcip); 17138275SEric Cheng mcip->mci_tx_flag &= ~MCI_TX_QUIESCE; 17148275SEric Cheng mac_tx_unlock_all(mcip); 17155084Sjohnlev } 17165084Sjohnlev 17170Sstevel@tonic-gate /* 17188275SEric Cheng * Wait for an SRS to quiesce. The SRS worker will signal us when the 17198275SEric Cheng * quiesce is done. 17208275SEric Cheng */ 17218275SEric Cheng static void 17228275SEric Cheng mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag) 17238275SEric Cheng { 17248275SEric Cheng mutex_enter(&srs->srs_lock); 17258275SEric Cheng while (!(srs->srs_state & srs_flag)) 17268275SEric Cheng cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock); 17278275SEric Cheng mutex_exit(&srs->srs_lock); 17288275SEric Cheng } 17298275SEric Cheng 17308275SEric Cheng /* 17318275SEric Cheng * Quiescing an Rx SRS is achieved by the following sequence. The protocol 17328275SEric Cheng * works bottom up by cutting off packet flow from the bottommost point in the 17338275SEric Cheng * mac, then the SRS, and then the soft rings. There are 2 use cases of this 17348275SEric Cheng * mechanism. One is a temporary quiesce of the SRS, such as say while changing 17358275SEric Cheng * the Rx callbacks. Another use case is Rx SRS teardown. In the former case 17368275SEric Cheng * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used 17378275SEric Cheng * for the SRS and MR flags. In the former case the threads pause waiting for 17388275SEric Cheng * a restart, while in the latter case the threads exit. The Tx SRS teardown 17398275SEric Cheng * is also mostly similar to the above. 17408275SEric Cheng * 17418275SEric Cheng * 1. Stop future hardware classified packets at the lowest level in the mac. 17428275SEric Cheng * Remove any hardware classification rule (CONDEMNED case) and mark the 17438275SEric Cheng * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt 17448275SEric Cheng * from increasing. Upcalls from the driver that come through hardware 17458275SEric Cheng * classification will be dropped in mac_rx from now on. Then we wait for 17468275SEric Cheng * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are 17478275SEric Cheng * sure there aren't any upcall threads from the driver through hardware 17488275SEric Cheng * classification. In the case of SRS teardown we also remove the 17498275SEric Cheng * classification rule in the driver. 17508275SEric Cheng * 17518275SEric Cheng * 2. Stop future software classified packets by marking the flow entry with 17528275SEric Cheng * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from 17538275SEric Cheng * increasing. We also remove the flow entry from the table in the latter 17548275SEric Cheng * case. Then wait for the fe_refcnt to reach an appropriate quiescent value 17558275SEric Cheng * that indicates there aren't any active threads using that flow entry. 17568275SEric Cheng * 17578275SEric Cheng * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread, 17588275SEric Cheng * SRS worker thread, and the soft ring threads are quiesced in sequence 17598275SEric Cheng * with the SRS worker thread serving as a master controller. This 17608275SEric Cheng * mechansim is explained in mac_srs_worker_quiesce(). 17618275SEric Cheng * 17628275SEric Cheng * The restart mechanism to reactivate the SRS and softrings is explained 17638275SEric Cheng * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the 17648275SEric Cheng * restart sequence. 17650Sstevel@tonic-gate */ 17660Sstevel@tonic-gate void 17678275SEric Cheng mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 17680Sstevel@tonic-gate { 17698275SEric Cheng flow_entry_t *flent = srs->srs_flent; 17708275SEric Cheng uint_t mr_flag, srs_done_flag; 17718275SEric Cheng 17728275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 17738275SEric Cheng ASSERT(!(srs->srs_type & SRST_TX)); 17748275SEric Cheng 17758275SEric Cheng if (srs_quiesce_flag == SRS_CONDEMNED) { 17768275SEric Cheng mr_flag = MR_CONDEMNED; 17778275SEric Cheng srs_done_flag = SRS_CONDEMNED_DONE; 17788275SEric Cheng if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 17798275SEric Cheng mac_srs_client_poll_disable(srs->srs_mcip, srs); 17808275SEric Cheng } else { 17818275SEric Cheng ASSERT(srs_quiesce_flag == SRS_QUIESCE); 17828275SEric Cheng mr_flag = MR_QUIESCE; 17838275SEric Cheng srs_done_flag = SRS_QUIESCE_DONE; 17848275SEric Cheng if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 17858275SEric Cheng mac_srs_client_poll_quiesce(srs->srs_mcip, srs); 17868275SEric Cheng } 17878275SEric Cheng 17888275SEric Cheng if (srs->srs_ring != NULL) { 17898275SEric Cheng mac_rx_ring_quiesce(srs->srs_ring, mr_flag); 17908275SEric Cheng } else { 17918275SEric Cheng /* 17928275SEric Cheng * SRS is driven by software classification. In case 17938275SEric Cheng * of CONDEMNED, the top level teardown functions will 17948275SEric Cheng * deal with flow removal. 17958275SEric Cheng */ 17968275SEric Cheng if (srs_quiesce_flag != SRS_CONDEMNED) { 17978275SEric Cheng FLOW_MARK(flent, FE_QUIESCE); 17988275SEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 17998275SEric Cheng } 18008275SEric Cheng } 18010Sstevel@tonic-gate 18020Sstevel@tonic-gate /* 18038275SEric Cheng * Signal the SRS to quiesce itself, and then cv_wait for the 18048275SEric Cheng * SRS quiesce to complete. The SRS worker thread will wake us 18058275SEric Cheng * up when the quiesce is complete 18064913Sethindra */ 18078275SEric Cheng mac_srs_signal(srs, srs_quiesce_flag); 18088275SEric Cheng mac_srs_quiesce_wait(srs, srs_done_flag); 18094913Sethindra } 18104913Sethindra 18114913Sethindra /* 18128275SEric Cheng * Remove an SRS. 18134913Sethindra */ 18144913Sethindra void 18158275SEric Cheng mac_rx_srs_remove(mac_soft_ring_set_t *srs) 18164913Sethindra { 18178275SEric Cheng flow_entry_t *flent = srs->srs_flent; 18188275SEric Cheng int i; 18198275SEric Cheng 18208275SEric Cheng mac_rx_srs_quiesce(srs, SRS_CONDEMNED); 18218275SEric Cheng /* 18228275SEric Cheng * Locate and remove our entry in the fe_rx_srs[] array, and 18238275SEric Cheng * adjust the fe_rx_srs array entries and array count by 18248275SEric Cheng * moving the last entry into the vacated spot. 18258275SEric Cheng */ 18268275SEric Cheng mutex_enter(&flent->fe_lock); 18278275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 18288275SEric Cheng if (flent->fe_rx_srs[i] == srs) 18298275SEric Cheng break; 18304913Sethindra } 18318275SEric Cheng 18328275SEric Cheng ASSERT(i != 0 && i < flent->fe_rx_srs_cnt); 18338275SEric Cheng if (i != flent->fe_rx_srs_cnt - 1) { 18348275SEric Cheng flent->fe_rx_srs[i] = 18358275SEric Cheng flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1]; 18368275SEric Cheng i = flent->fe_rx_srs_cnt - 1; 18378275SEric Cheng } 18388275SEric Cheng 18398275SEric Cheng flent->fe_rx_srs[i] = NULL; 18408275SEric Cheng flent->fe_rx_srs_cnt--; 18418275SEric Cheng mutex_exit(&flent->fe_lock); 18428275SEric Cheng 18438275SEric Cheng mac_srs_free(srs); 18440Sstevel@tonic-gate } 18450Sstevel@tonic-gate 18468275SEric Cheng static void 18478275SEric Cheng mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag) 18480Sstevel@tonic-gate { 18498275SEric Cheng mutex_enter(&srs->srs_lock); 18508275SEric Cheng srs->srs_state &= ~flag; 18518275SEric Cheng mutex_exit(&srs->srs_lock); 18528275SEric Cheng } 18538275SEric Cheng 18548275SEric Cheng void 18558275SEric Cheng mac_rx_srs_restart(mac_soft_ring_set_t *srs) 18568275SEric Cheng { 18578275SEric Cheng flow_entry_t *flent = srs->srs_flent; 18588275SEric Cheng mac_ring_t *mr; 18598275SEric Cheng 18608275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 18618275SEric Cheng ASSERT((srs->srs_type & SRST_TX) == 0); 18620Sstevel@tonic-gate 18630Sstevel@tonic-gate /* 18648275SEric Cheng * This handles a change in the number of SRSs between the quiesce and 18658275SEric Cheng * and restart operation of a flow. 18668275SEric Cheng */ 18678275SEric Cheng if (!SRS_QUIESCED(srs)) 18688275SEric Cheng return; 18698275SEric Cheng 18708275SEric Cheng /* 18718275SEric Cheng * Signal the SRS to restart itself. Wait for the restart to complete 18728275SEric Cheng * Note that we only restart the SRS if it is not marked as 18738275SEric Cheng * permanently quiesced. 18740Sstevel@tonic-gate */ 18758275SEric Cheng if (!SRS_QUIESCED_PERMANENT(srs)) { 18768275SEric Cheng mac_srs_signal(srs, SRS_RESTART); 18778275SEric Cheng mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 18788275SEric Cheng mac_srs_clear_flag(srs, SRS_RESTART_DONE); 18798275SEric Cheng 18808275SEric Cheng mac_srs_client_poll_restart(srs->srs_mcip, srs); 18818275SEric Cheng } 18828275SEric Cheng 18838275SEric Cheng /* Finally clear the flags to let the packets in */ 18848275SEric Cheng mr = srs->srs_ring; 18858275SEric Cheng if (mr != NULL) { 18868275SEric Cheng MAC_RING_UNMARK(mr, MR_QUIESCE); 18878275SEric Cheng /* In case the ring was stopped, safely restart it */ 18888275SEric Cheng (void) mac_start_ring(mr); 18898275SEric Cheng } else { 18908275SEric Cheng FLOW_UNMARK(flent, FE_QUIESCE); 18918275SEric Cheng } 18928275SEric Cheng } 18938275SEric Cheng 18948275SEric Cheng /* 18958275SEric Cheng * Temporary quiesce of a flow and associated Rx SRS. 18968275SEric Cheng * Please see block comment above mac_rx_classify_flow_rem. 18978275SEric Cheng */ 18988275SEric Cheng /* ARGSUSED */ 18998275SEric Cheng int 19008275SEric Cheng mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg) 19018275SEric Cheng { 19028275SEric Cheng int i; 19038275SEric Cheng 19048275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 19058275SEric Cheng mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i], 19068275SEric Cheng SRS_QUIESCE); 19078275SEric Cheng } 19088275SEric Cheng return (0); 19090Sstevel@tonic-gate } 19100Sstevel@tonic-gate 19110Sstevel@tonic-gate /* 19128275SEric Cheng * Restart a flow and associated Rx SRS that has been quiesced temporarily 19138275SEric Cheng * Please see block comment above mac_rx_classify_flow_rem 19140Sstevel@tonic-gate */ 19158275SEric Cheng /* ARGSUSED */ 19168275SEric Cheng int 19178275SEric Cheng mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg) 19188275SEric Cheng { 19198275SEric Cheng int i; 19208275SEric Cheng 19218275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) 19228275SEric Cheng mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]); 19238275SEric Cheng 19248275SEric Cheng return (0); 19258275SEric Cheng } 19268275SEric Cheng 19270Sstevel@tonic-gate void 19288275SEric Cheng mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on) 19290Sstevel@tonic-gate { 19308275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 19318275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 19328275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 19338275SEric Cheng mac_soft_ring_set_t *mac_srs; 19348275SEric Cheng int i; 19358275SEric Cheng 19368275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 19378275SEric Cheng 19388275SEric Cheng if (flent == NULL) 19398275SEric Cheng return; 19408275SEric Cheng 19418275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 19428275SEric Cheng mac_srs = flent->fe_rx_srs[i]; 19438275SEric Cheng mutex_enter(&mac_srs->srs_lock); 19448275SEric Cheng if (on) 19458275SEric Cheng mac_srs->srs_state |= SRS_QUIESCE_PERM; 19468275SEric Cheng else 19478275SEric Cheng mac_srs->srs_state &= ~SRS_QUIESCE_PERM; 19488275SEric Cheng mutex_exit(&mac_srs->srs_lock); 19490Sstevel@tonic-gate } 19508275SEric Cheng } 19518275SEric Cheng 19528275SEric Cheng void 19538275SEric Cheng mac_rx_client_quiesce(mac_client_handle_t mch) 19548275SEric Cheng { 19558275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 19568275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 19578275SEric Cheng 19588275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 19598275SEric Cheng 19608275SEric Cheng if (MCIP_DATAPATH_SETUP(mcip)) { 19618275SEric Cheng (void) mac_rx_classify_flow_quiesce(mcip->mci_flent, 19628275SEric Cheng NULL); 19638275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 19648275SEric Cheng mac_rx_classify_flow_quiesce, NULL); 19658275SEric Cheng } 19660Sstevel@tonic-gate } 19670Sstevel@tonic-gate 19680Sstevel@tonic-gate void 19698275SEric Cheng mac_rx_client_restart(mac_client_handle_t mch) 19700Sstevel@tonic-gate { 19718275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 19728275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 19738275SEric Cheng 19748275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 19758275SEric Cheng 19768275SEric Cheng if (MCIP_DATAPATH_SETUP(mcip)) { 19778275SEric Cheng (void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL); 19788275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 19798275SEric Cheng mac_rx_classify_flow_restart, NULL); 19808275SEric Cheng } 19818275SEric Cheng } 19828275SEric Cheng 19838275SEric Cheng /* 19848275SEric Cheng * This function only quiesces the Tx SRS and softring worker threads. Callers 19858275SEric Cheng * need to make sure that there aren't any mac client threads doing current or 19868275SEric Cheng * future transmits in the mac before calling this function. 19878275SEric Cheng */ 19888275SEric Cheng void 19898275SEric Cheng mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 19908275SEric Cheng { 19918275SEric Cheng mac_client_impl_t *mcip = srs->srs_mcip; 19928275SEric Cheng 19938275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 19948275SEric Cheng 19958275SEric Cheng ASSERT(srs->srs_type & SRST_TX); 19968275SEric Cheng ASSERT(srs_quiesce_flag == SRS_CONDEMNED || 19978275SEric Cheng srs_quiesce_flag == SRS_QUIESCE); 19980Sstevel@tonic-gate 19990Sstevel@tonic-gate /* 20008275SEric Cheng * Signal the SRS to quiesce itself, and then cv_wait for the 20018275SEric Cheng * SRS quiesce to complete. The SRS worker thread will wake us 20028275SEric Cheng * up when the quiesce is complete 20030Sstevel@tonic-gate */ 20048275SEric Cheng mac_srs_signal(srs, srs_quiesce_flag); 20058275SEric Cheng mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ? 20068275SEric Cheng SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE); 20078275SEric Cheng } 20088275SEric Cheng 20098275SEric Cheng void 20108275SEric Cheng mac_tx_srs_restart(mac_soft_ring_set_t *srs) 20118275SEric Cheng { 20128275SEric Cheng /* 20138275SEric Cheng * Resizing the fanout could result in creation of new SRSs. 20148275SEric Cheng * They may not necessarily be in the quiesced state in which 20158275SEric Cheng * case it need be restarted 20168275SEric Cheng */ 20178275SEric Cheng if (!SRS_QUIESCED(srs)) 20188275SEric Cheng return; 20198275SEric Cheng 20208275SEric Cheng mac_srs_signal(srs, SRS_RESTART); 20218275SEric Cheng mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 20228275SEric Cheng mac_srs_clear_flag(srs, SRS_RESTART_DONE); 20230Sstevel@tonic-gate } 20240Sstevel@tonic-gate 20250Sstevel@tonic-gate /* 20268275SEric Cheng * Temporary quiesce of a flow and associated Rx SRS. 20278275SEric Cheng * Please see block comment above mac_rx_srs_quiesce 20280Sstevel@tonic-gate */ 20298275SEric Cheng /* ARGSUSED */ 20308275SEric Cheng int 20318275SEric Cheng mac_tx_flow_quiesce(flow_entry_t *flent, void *arg) 20320Sstevel@tonic-gate { 20332311Sseb /* 20348275SEric Cheng * The fe_tx_srs is null for a subflow on an interface that is 20358275SEric Cheng * not plumbed 20362311Sseb */ 20378275SEric Cheng if (flent->fe_tx_srs != NULL) 20388275SEric Cheng mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE); 20398275SEric Cheng return (0); 20408275SEric Cheng } 20418275SEric Cheng 20428275SEric Cheng /* ARGSUSED */ 20438275SEric Cheng int 20448275SEric Cheng mac_tx_flow_restart(flow_entry_t *flent, void *arg) 20458275SEric Cheng { 20468275SEric Cheng /* 20478275SEric Cheng * The fe_tx_srs is null for a subflow on an interface that is 20488275SEric Cheng * not plumbed 20498275SEric Cheng */ 20508275SEric Cheng if (flent->fe_tx_srs != NULL) 20518275SEric Cheng mac_tx_srs_restart(flent->fe_tx_srs); 20528275SEric Cheng return (0); 20532311Sseb } 20542311Sseb 20552311Sseb void 20568275SEric Cheng mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag) 20578275SEric Cheng { 20588275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20598275SEric Cheng 20608275SEric Cheng mac_tx_client_block(mcip); 20618275SEric Cheng if (MCIP_TX_SRS(mcip) != NULL) { 20628275SEric Cheng mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag); 20638275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 20648275SEric Cheng mac_tx_flow_quiesce, NULL); 20658275SEric Cheng } 20668275SEric Cheng } 20678275SEric Cheng 20688275SEric Cheng void 20698275SEric Cheng mac_tx_client_restart(mac_client_impl_t *mcip) 20702311Sseb { 20718275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20728275SEric Cheng 20738275SEric Cheng mac_tx_client_unblock(mcip); 20748275SEric Cheng if (MCIP_TX_SRS(mcip) != NULL) { 20758275SEric Cheng mac_tx_srs_restart(MCIP_TX_SRS(mcip)); 20768275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 20778275SEric Cheng mac_tx_flow_restart, NULL); 20788275SEric Cheng } 20798275SEric Cheng } 20808275SEric Cheng 20818275SEric Cheng void 20828275SEric Cheng mac_tx_client_flush(mac_client_impl_t *mcip) 20838275SEric Cheng { 20848275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20858275SEric Cheng 20868275SEric Cheng mac_tx_client_quiesce(mcip, SRS_QUIESCE); 20878275SEric Cheng mac_tx_client_restart(mcip); 20888275SEric Cheng } 20898275SEric Cheng 20908275SEric Cheng void 20918275SEric Cheng mac_client_quiesce(mac_client_impl_t *mcip) 20928275SEric Cheng { 20938275SEric Cheng mac_rx_client_quiesce((mac_client_handle_t)mcip); 20948275SEric Cheng mac_tx_client_quiesce(mcip, SRS_QUIESCE); 20958275SEric Cheng } 20968275SEric Cheng 20978275SEric Cheng void 20988275SEric Cheng mac_client_restart(mac_client_impl_t *mcip) 20998275SEric Cheng { 21008275SEric Cheng mac_rx_client_restart((mac_client_handle_t)mcip); 21018275SEric Cheng mac_tx_client_restart(mcip); 21022311Sseb } 21032311Sseb 21042311Sseb /* 21055895Syz147064 * Allocate a minor number. 21065895Syz147064 */ 21075895Syz147064 minor_t 21085895Syz147064 mac_minor_hold(boolean_t sleep) 21095895Syz147064 { 21105895Syz147064 minor_t minor; 21115895Syz147064 21125895Syz147064 /* 21135895Syz147064 * Grab a value from the arena. 21145895Syz147064 */ 21155895Syz147064 atomic_add_32(&minor_count, 1); 21165895Syz147064 21175895Syz147064 if (sleep) 21185895Syz147064 minor = (uint_t)id_alloc(minor_ids); 21195895Syz147064 else 21205895Syz147064 minor = (uint_t)id_alloc_nosleep(minor_ids); 21215895Syz147064 21225895Syz147064 if (minor == 0) { 21235895Syz147064 atomic_add_32(&minor_count, -1); 21245895Syz147064 return (0); 21255895Syz147064 } 21265895Syz147064 21275895Syz147064 return (minor); 21285895Syz147064 } 21295895Syz147064 21305895Syz147064 /* 21315895Syz147064 * Release a previously allocated minor number. 21325895Syz147064 */ 21335895Syz147064 void 21345895Syz147064 mac_minor_rele(minor_t minor) 21355895Syz147064 { 21365895Syz147064 /* 21375895Syz147064 * Return the value to the arena. 21385895Syz147064 */ 21395895Syz147064 id_free(minor_ids, minor); 21405895Syz147064 atomic_add_32(&minor_count, -1); 21415895Syz147064 } 21425895Syz147064 21435895Syz147064 uint32_t 21445895Syz147064 mac_no_notification(mac_handle_t mh) 21455895Syz147064 { 21465895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 21475895Syz147064 return (mip->mi_unsup_note); 21485895Syz147064 } 21495895Syz147064 21505895Syz147064 /* 21518275SEric Cheng * Prevent any new opens of this mac in preparation for unregister 21522311Sseb */ 21532311Sseb int 21548275SEric Cheng i_mac_disable(mac_impl_t *mip) 21552311Sseb { 21568275SEric Cheng mac_client_impl_t *mcip; 21578275SEric Cheng 21588275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 21598275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) { 21608275SEric Cheng /* Already disabled, return success */ 21618275SEric Cheng rw_exit(&i_mac_impl_lock); 21628275SEric Cheng return (0); 21635895Syz147064 } 21642311Sseb /* 21658275SEric Cheng * See if there are any other references to this mac_t (e.g., VLAN's). 21668275SEric Cheng * If so return failure. If all the other checks below pass, then 21678275SEric Cheng * set mi_disabled atomically under the i_mac_impl_lock to prevent 21688275SEric Cheng * any new VLAN's from being created or new mac client opens of this 21698275SEric Cheng * mac end point. 21702311Sseb */ 21718275SEric Cheng if (mip->mi_ref > 0) { 21728275SEric Cheng rw_exit(&i_mac_impl_lock); 21738275SEric Cheng return (EBUSY); 21742311Sseb } 21752311Sseb 21762311Sseb /* 21778275SEric Cheng * mac clients must delete all multicast groups they join before 21788275SEric Cheng * closing. bcast groups are reference counted, the last client 21798275SEric Cheng * to delete the group will wait till the group is physically 21808275SEric Cheng * deleted. Since all clients have closed this mac end point 21818275SEric Cheng * mi_bcast_ngrps must be zero at this point 21822311Sseb */ 21838275SEric Cheng ASSERT(mip->mi_bcast_ngrps == 0); 21845009Sgd78059 21855009Sgd78059 /* 21868275SEric Cheng * Don't let go of this if it has some flows. 21878275SEric Cheng * All other code guarantees no flows are added to a disabled 21888275SEric Cheng * mac, therefore it is sufficient to check for the flow table 21898275SEric Cheng * only here. 21902311Sseb */ 21918275SEric Cheng mcip = mac_primary_client_handle(mip); 21928275SEric Cheng if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) { 21938275SEric Cheng rw_exit(&i_mac_impl_lock); 21948275SEric Cheng return (ENOTEMPTY); 21955895Syz147064 } 21965895Syz147064 21978275SEric Cheng mip->mi_state_flags |= MIS_DISABLED; 21981852Syz147064 rw_exit(&i_mac_impl_lock); 2199269Sericheng return (0); 22008275SEric Cheng } 22018275SEric Cheng 22028275SEric Cheng int 22038275SEric Cheng mac_disable_nowait(mac_handle_t mh) 22048275SEric Cheng { 22058275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 22068275SEric Cheng int err; 22078275SEric Cheng 22088275SEric Cheng if ((err = i_mac_perim_enter_nowait(mip)) != 0) 22098275SEric Cheng return (err); 22108275SEric Cheng err = i_mac_disable(mip); 22118275SEric Cheng i_mac_perim_exit(mip); 2212269Sericheng return (err); 22130Sstevel@tonic-gate } 22140Sstevel@tonic-gate 22150Sstevel@tonic-gate int 22165084Sjohnlev mac_disable(mac_handle_t mh) 22170Sstevel@tonic-gate { 22188275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 22198275SEric Cheng int err; 22208275SEric Cheng 22218275SEric Cheng i_mac_perim_enter(mip); 22228275SEric Cheng err = i_mac_disable(mip); 22238275SEric Cheng i_mac_perim_exit(mip); 22245084Sjohnlev 22250Sstevel@tonic-gate /* 22268275SEric Cheng * Clean up notification thread and wait for it to exit. 22275009Sgd78059 */ 22288275SEric Cheng if (err == 0) 22298275SEric Cheng i_mac_notify_exit(mip); 22308275SEric Cheng 22318275SEric Cheng return (err); 22320Sstevel@tonic-gate } 22330Sstevel@tonic-gate 22344913Sethindra /* 22358275SEric Cheng * Called when the MAC instance has a non empty flow table, to de-multiplex 22368275SEric Cheng * incoming packets to the right flow. 22378275SEric Cheng * The MAC's rw lock is assumed held as a READER. 22384913Sethindra */ 22398275SEric Cheng /* ARGSUSED */ 22408275SEric Cheng static mblk_t * 22418275SEric Cheng mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) 22420Sstevel@tonic-gate { 22438275SEric Cheng flow_entry_t *flent = NULL; 22448275SEric Cheng uint_t flags = FLOW_INBOUND; 22458275SEric Cheng int err; 22464913Sethindra 22474913Sethindra /* 22488275SEric Cheng * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN 22498275SEric Cheng * to mac_flow_lookup() so that the VLAN packets can be successfully 22508275SEric Cheng * passed to the non-VLAN aggregation flows. 22518275SEric Cheng * 22528275SEric Cheng * Note that there is possibly a race between this and 22538275SEric Cheng * mac_unicast_remove/add() and VLAN packets could be incorrectly 22548275SEric Cheng * classified to non-VLAN flows of non-aggregation mac clients. These 22558275SEric Cheng * VLAN packets will be then filtered out by the mac module. 22564913Sethindra */ 22578275SEric Cheng if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) 22588275SEric Cheng flags |= FLOW_IGNORE_VLAN; 22598275SEric Cheng 22608275SEric Cheng err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent); 22618275SEric Cheng if (err != 0) { 22628275SEric Cheng /* no registered receive function */ 22638275SEric Cheng return (mp); 22648275SEric Cheng } else { 22658275SEric Cheng mac_client_impl_t *mcip; 22664913Sethindra 22674913Sethindra /* 22688275SEric Cheng * This flent might just be an additional one on the MAC client, 22698275SEric Cheng * i.e. for classification purposes (different fdesc), however 22708275SEric Cheng * the resources, SRS et. al., are in the mci_flent, so if 22718275SEric Cheng * this isn't the mci_flent, we need to get it. 22724913Sethindra */ 22738275SEric Cheng if ((mcip = flent->fe_mcip) != NULL && 22748275SEric Cheng mcip->mci_flent != flent) { 22758275SEric Cheng FLOW_REFRELE(flent); 22768275SEric Cheng flent = mcip->mci_flent; 22778275SEric Cheng FLOW_TRY_REFHOLD(flent, err); 22788275SEric Cheng if (err != 0) 22798275SEric Cheng return (mp); 22808275SEric Cheng } 22818275SEric Cheng (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp, 22828275SEric Cheng B_FALSE); 22838275SEric Cheng FLOW_REFRELE(flent); 22845084Sjohnlev } 22855084Sjohnlev return (NULL); 22865084Sjohnlev } 22875084Sjohnlev 22885084Sjohnlev mblk_t * 22898275SEric Cheng mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 22900Sstevel@tonic-gate { 22912311Sseb mac_impl_t *mip = (mac_impl_t *)mh; 22928275SEric Cheng mblk_t *bp, *bp1, **bpp, *list = NULL; 22930Sstevel@tonic-gate 22940Sstevel@tonic-gate /* 22958275SEric Cheng * We walk the chain and attempt to classify each packet. 22968275SEric Cheng * The packets that couldn't be classified will be returned 22978275SEric Cheng * back to the caller. 22980Sstevel@tonic-gate */ 22998275SEric Cheng bp = mp_chain; 23008275SEric Cheng bpp = &list; 23018275SEric Cheng while (bp != NULL) { 23028275SEric Cheng bp1 = bp; 23038275SEric Cheng bp = bp->b_next; 23048275SEric Cheng bp1->b_next = NULL; 23058275SEric Cheng 23068275SEric Cheng if (mac_rx_classify(mip, mrh, bp1) != NULL) { 23078275SEric Cheng *bpp = bp1; 23088275SEric Cheng bpp = &bp1->b_next; 23098275SEric Cheng } 23108275SEric Cheng } 23118275SEric Cheng return (list); 23120Sstevel@tonic-gate } 23130Sstevel@tonic-gate 23148275SEric Cheng static int 23158275SEric Cheng mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg) 23160Sstevel@tonic-gate { 23178275SEric Cheng mac_ring_handle_t ring = arg; 23188275SEric Cheng 23198275SEric Cheng if (flent->fe_tx_srs) 23208275SEric Cheng mac_tx_srs_wakeup(flent->fe_tx_srs, ring); 23212311Sseb return (0); 23222311Sseb } 23232311Sseb 23240Sstevel@tonic-gate void 23258275SEric Cheng i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) 23268275SEric Cheng { 23278275SEric Cheng mac_client_impl_t *cclient; 23288275SEric Cheng mac_soft_ring_set_t *mac_srs; 23298275SEric Cheng 23308275SEric Cheng /* 23318275SEric Cheng * After grabbing the mi_rw_lock, the list of clients can't change. 23328275SEric Cheng * If there are any clients mi_disabled must be B_FALSE and can't 23338275SEric Cheng * get set since there are clients. If there aren't any clients we 23348275SEric Cheng * don't do anything. In any case the mip has to be valid. The driver 23358275SEric Cheng * must make sure that it goes single threaded (with respect to mac 23368275SEric Cheng * calls) and wait for all pending mac calls to finish before calling 23378275SEric Cheng * mac_unregister. 23388275SEric Cheng */ 23398275SEric Cheng rw_enter(&i_mac_impl_lock, RW_READER); 23408275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) { 23418275SEric Cheng rw_exit(&i_mac_impl_lock); 23428275SEric Cheng return; 23438275SEric Cheng } 23448275SEric Cheng 23458275SEric Cheng /* 23468275SEric Cheng * Get MAC tx srs from walking mac_client_handle list. 23478275SEric Cheng */ 23488275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_READER); 23498275SEric Cheng for (cclient = mip->mi_clients_list; cclient != NULL; 23508275SEric Cheng cclient = cclient->mci_client_next) { 23518275SEric Cheng if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) 23528275SEric Cheng mac_tx_srs_wakeup(mac_srs, ring); 23538275SEric Cheng if (!FLOW_TAB_EMPTY(cclient->mci_subflow_tab)) { 23548275SEric Cheng (void) mac_flow_walk_nolock(cclient->mci_subflow_tab, 23558275SEric Cheng mac_tx_flow_srs_wakeup, ring); 23568275SEric Cheng } 23578275SEric Cheng } 23588275SEric Cheng rw_exit(&mip->mi_rw_lock); 23598275SEric Cheng rw_exit(&i_mac_impl_lock); 23608275SEric Cheng } 23618275SEric Cheng 23628275SEric Cheng /* ARGSUSED */ 23638275SEric Cheng void 23648275SEric Cheng mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg, 23650Sstevel@tonic-gate boolean_t add) 23660Sstevel@tonic-gate { 23678275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 23688275SEric Cheng 23698275SEric Cheng i_mac_perim_enter((mac_impl_t *)mh); 23700Sstevel@tonic-gate /* 23710Sstevel@tonic-gate * If no specific refresh function was given then default to the 23720Sstevel@tonic-gate * driver's m_multicst entry point. 23730Sstevel@tonic-gate */ 23740Sstevel@tonic-gate if (refresh == NULL) { 23752311Sseb refresh = mip->mi_multicst; 23762311Sseb arg = mip->mi_driver; 23770Sstevel@tonic-gate } 23788275SEric Cheng 23798275SEric Cheng mac_bcast_refresh(mip, refresh, arg, add); 23808275SEric Cheng i_mac_perim_exit((mac_impl_t *)mh); 23810Sstevel@tonic-gate } 23820Sstevel@tonic-gate 23830Sstevel@tonic-gate void 23842311Sseb mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) 23850Sstevel@tonic-gate { 23862311Sseb mac_impl_t *mip = (mac_impl_t *)mh; 23870Sstevel@tonic-gate 23880Sstevel@tonic-gate /* 23890Sstevel@tonic-gate * If no specific refresh function was given then default to the 23900Sstevel@tonic-gate * driver's m_promisc entry point. 23910Sstevel@tonic-gate */ 23920Sstevel@tonic-gate if (refresh == NULL) { 23932311Sseb refresh = mip->mi_setpromisc; 23942311Sseb arg = mip->mi_driver; 23950Sstevel@tonic-gate } 23960Sstevel@tonic-gate ASSERT(refresh != NULL); 23970Sstevel@tonic-gate 23980Sstevel@tonic-gate /* 23990Sstevel@tonic-gate * Call the refresh function with the current promiscuity. 24000Sstevel@tonic-gate */ 24010Sstevel@tonic-gate refresh(arg, (mip->mi_devpromisc != 0)); 24020Sstevel@tonic-gate } 24030Sstevel@tonic-gate 24045895Syz147064 /* 24055895Syz147064 * The mac client requests that the mac not to change its margin size to 24065895Syz147064 * be less than the specified value. If "current" is B_TRUE, then the client 24075895Syz147064 * requests the mac not to change its margin size to be smaller than the 24085895Syz147064 * current size. Further, return the current margin size value in this case. 24095895Syz147064 * 24105895Syz147064 * We keep every requested size in an ordered list from largest to smallest. 24115895Syz147064 */ 24125895Syz147064 int 24135895Syz147064 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) 24145895Syz147064 { 24155895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 24165895Syz147064 mac_margin_req_t **pp, *p; 24175895Syz147064 int err = 0; 24185895Syz147064 24198275SEric Cheng rw_enter(&(mip->mi_rw_lock), RW_WRITER); 24205895Syz147064 if (current) 24215895Syz147064 *marginp = mip->mi_margin; 24225895Syz147064 24235895Syz147064 /* 24245895Syz147064 * If the current margin value cannot satisfy the margin requested, 24255895Syz147064 * return ENOTSUP directly. 24265895Syz147064 */ 24275895Syz147064 if (*marginp > mip->mi_margin) { 24285895Syz147064 err = ENOTSUP; 24295895Syz147064 goto done; 24305895Syz147064 } 24315895Syz147064 24325895Syz147064 /* 24335895Syz147064 * Check whether the given margin is already in the list. If so, 24345895Syz147064 * bump the reference count. 24355895Syz147064 */ 24368275SEric Cheng for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) { 24375895Syz147064 if (p->mmr_margin == *marginp) { 24385895Syz147064 /* 24395895Syz147064 * The margin requested is already in the list, 24405895Syz147064 * so just bump the reference count. 24415895Syz147064 */ 24425895Syz147064 p->mmr_ref++; 24435895Syz147064 goto done; 24445895Syz147064 } 24455895Syz147064 if (p->mmr_margin < *marginp) 24465895Syz147064 break; 24475895Syz147064 } 24485895Syz147064 24495895Syz147064 24508275SEric Cheng p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP); 24515895Syz147064 p->mmr_margin = *marginp; 24525895Syz147064 p->mmr_ref++; 24535895Syz147064 p->mmr_nextp = *pp; 24545895Syz147064 *pp = p; 24555895Syz147064 24565895Syz147064 done: 24578275SEric Cheng rw_exit(&(mip->mi_rw_lock)); 24585895Syz147064 return (err); 24595895Syz147064 } 24605895Syz147064 24615895Syz147064 /* 24625895Syz147064 * The mac client requests to cancel its previous mac_margin_add() request. 24635895Syz147064 * We remove the requested margin size from the list. 24645895Syz147064 */ 24655895Syz147064 int 24665895Syz147064 mac_margin_remove(mac_handle_t mh, uint32_t margin) 24675895Syz147064 { 24685895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 24695895Syz147064 mac_margin_req_t **pp, *p; 24705895Syz147064 int err = 0; 24715895Syz147064 24728275SEric Cheng rw_enter(&(mip->mi_rw_lock), RW_WRITER); 24735895Syz147064 /* 24745895Syz147064 * Find the entry in the list for the given margin. 24755895Syz147064 */ 24765895Syz147064 for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { 24775895Syz147064 if (p->mmr_margin == margin) { 24785895Syz147064 if (--p->mmr_ref == 0) 24795895Syz147064 break; 24805895Syz147064 24815895Syz147064 /* 24825895Syz147064 * There is still a reference to this address so 24835895Syz147064 * there's nothing more to do. 24845895Syz147064 */ 24855895Syz147064 goto done; 24865895Syz147064 } 24875895Syz147064 } 24885895Syz147064 24895895Syz147064 /* 24905895Syz147064 * We did not find an entry for the given margin. 24915895Syz147064 */ 24925895Syz147064 if (p == NULL) { 24935895Syz147064 err = ENOENT; 24945895Syz147064 goto done; 24955895Syz147064 } 24965895Syz147064 24975895Syz147064 ASSERT(p->mmr_ref == 0); 24985895Syz147064 24995895Syz147064 /* 25005895Syz147064 * Remove it from the list. 25015895Syz147064 */ 25025895Syz147064 *pp = p->mmr_nextp; 25035895Syz147064 kmem_free(p, sizeof (mac_margin_req_t)); 25045895Syz147064 done: 25058275SEric Cheng rw_exit(&(mip->mi_rw_lock)); 25065895Syz147064 return (err); 25075895Syz147064 } 25085895Syz147064 25095895Syz147064 boolean_t 25105895Syz147064 mac_margin_update(mac_handle_t mh, uint32_t margin) 25115895Syz147064 { 25125895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 25135895Syz147064 uint32_t margin_needed = 0; 25145895Syz147064 25158275SEric Cheng rw_enter(&(mip->mi_rw_lock), RW_WRITER); 25165895Syz147064 25175895Syz147064 if (mip->mi_mmrp != NULL) 25185895Syz147064 margin_needed = mip->mi_mmrp->mmr_margin; 25195895Syz147064 25205895Syz147064 if (margin_needed <= margin) 25215895Syz147064 mip->mi_margin = margin; 25225895Syz147064 25238275SEric Cheng rw_exit(&(mip->mi_rw_lock)); 25245895Syz147064 25255895Syz147064 if (margin_needed <= margin) 25265895Syz147064 i_mac_notify(mip, MAC_NOTE_MARGIN); 25275895Syz147064 25285895Syz147064 return (margin_needed <= margin); 25295895Syz147064 } 25305895Syz147064 25312311Sseb /* 25322311Sseb * MAC Type Plugin functions. 25332311Sseb */ 25342311Sseb 25358275SEric Cheng mactype_t * 25368275SEric Cheng mactype_getplugin(const char *pname) 25378275SEric Cheng { 25388275SEric Cheng mactype_t *mtype = NULL; 25398275SEric Cheng boolean_t tried_modload = B_FALSE; 25408275SEric Cheng 25418275SEric Cheng mutex_enter(&i_mactype_lock); 25428275SEric Cheng 25438275SEric Cheng find_registered_mactype: 25448275SEric Cheng if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname, 25458275SEric Cheng (mod_hash_val_t *)&mtype) != 0) { 25468275SEric Cheng if (!tried_modload) { 25478275SEric Cheng /* 25488275SEric Cheng * If the plugin has not yet been loaded, then 25498275SEric Cheng * attempt to load it now. If modload() succeeds, 25508275SEric Cheng * the plugin should have registered using 25518275SEric Cheng * mactype_register(), in which case we can go back 25528275SEric Cheng * and attempt to find it again. 25538275SEric Cheng */ 25548275SEric Cheng if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) { 25558275SEric Cheng tried_modload = B_TRUE; 25568275SEric Cheng goto find_registered_mactype; 25578275SEric Cheng } 25588275SEric Cheng } 25598275SEric Cheng } else { 25608275SEric Cheng /* 25618275SEric Cheng * Note that there's no danger that the plugin we've loaded 25628275SEric Cheng * could be unloaded between the modload() step and the 25638275SEric Cheng * reference count bump here, as we're holding 25648275SEric Cheng * i_mactype_lock, which mactype_unregister() also holds. 25658275SEric Cheng */ 25668275SEric Cheng atomic_inc_32(&mtype->mt_ref); 25678275SEric Cheng } 25688275SEric Cheng 25698275SEric Cheng mutex_exit(&i_mactype_lock); 25708275SEric Cheng return (mtype); 25718275SEric Cheng } 25728275SEric Cheng 25732311Sseb mactype_register_t * 25742311Sseb mactype_alloc(uint_t mactype_version) 25752311Sseb { 25762311Sseb mactype_register_t *mtrp; 25772311Sseb 25782311Sseb /* 25792311Sseb * Make sure there isn't a version mismatch between the plugin and 25802311Sseb * the framework. In the future, if multiple versions are 25812311Sseb * supported, this check could become more sophisticated. 25822311Sseb */ 25832311Sseb if (mactype_version != MACTYPE_VERSION) 25842311Sseb return (NULL); 25852311Sseb 25862311Sseb mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP); 25872311Sseb mtrp->mtr_version = mactype_version; 25882311Sseb return (mtrp); 25892311Sseb } 25902311Sseb 25912311Sseb void 25922311Sseb mactype_free(mactype_register_t *mtrp) 25932311Sseb { 25942311Sseb kmem_free(mtrp, sizeof (mactype_register_t)); 25952311Sseb } 25962311Sseb 25972311Sseb int 25982311Sseb mactype_register(mactype_register_t *mtrp) 25992311Sseb { 26002311Sseb mactype_t *mtp; 26012311Sseb mactype_ops_t *ops = mtrp->mtr_ops; 26022311Sseb 26032311Sseb /* Do some sanity checking before we register this MAC type. */ 26046353Sdr146992 if (mtrp->mtr_ident == NULL || ops == NULL) 26052311Sseb return (EINVAL); 26062311Sseb 26072311Sseb /* 26082311Sseb * Verify that all mandatory callbacks are set in the ops 26092311Sseb * vector. 26102311Sseb */ 26112311Sseb if (ops->mtops_unicst_verify == NULL || 26122311Sseb ops->mtops_multicst_verify == NULL || 26132311Sseb ops->mtops_sap_verify == NULL || 26142311Sseb ops->mtops_header == NULL || 26152311Sseb ops->mtops_header_info == NULL) { 26162311Sseb return (EINVAL); 26172311Sseb } 26182311Sseb 26192311Sseb mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP); 26202311Sseb mtp->mt_ident = mtrp->mtr_ident; 26212311Sseb mtp->mt_ops = *ops; 26222311Sseb mtp->mt_type = mtrp->mtr_mactype; 26233147Sxc151355 mtp->mt_nativetype = mtrp->mtr_nativetype; 26242311Sseb mtp->mt_addr_length = mtrp->mtr_addrlen; 26252311Sseb if (mtrp->mtr_brdcst_addr != NULL) { 26262311Sseb mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP); 26272311Sseb bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr, 26282311Sseb mtrp->mtr_addrlen); 26292311Sseb } 26302311Sseb 26312311Sseb mtp->mt_stats = mtrp->mtr_stats; 26322311Sseb mtp->mt_statcount = mtrp->mtr_statcount; 26332311Sseb 26346512Ssowmini mtp->mt_mapping = mtrp->mtr_mapping; 26356512Ssowmini mtp->mt_mappingcount = mtrp->mtr_mappingcount; 26366512Ssowmini 26372311Sseb if (mod_hash_insert(i_mactype_hash, 26382311Sseb (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) { 26392311Sseb kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 26402311Sseb kmem_free(mtp, sizeof (*mtp)); 26412311Sseb return (EEXIST); 26422311Sseb } 26432311Sseb return (0); 26442311Sseb } 26452311Sseb 26462311Sseb int 26472311Sseb mactype_unregister(const char *ident) 26482311Sseb { 26492311Sseb mactype_t *mtp; 26502311Sseb mod_hash_val_t val; 26512311Sseb int err; 26522311Sseb 26532311Sseb /* 26542311Sseb * Let's not allow MAC drivers to use this plugin while we're 26553288Sseb * trying to unregister it. Holding i_mactype_lock also prevents a 26563288Sseb * plugin from unregistering while a MAC driver is attempting to 26573288Sseb * hold a reference to it in i_mactype_getplugin(). 26582311Sseb */ 26593288Sseb mutex_enter(&i_mactype_lock); 26602311Sseb 26612311Sseb if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident, 26622311Sseb (mod_hash_val_t *)&mtp)) != 0) { 26632311Sseb /* A plugin is trying to unregister, but it never registered. */ 26643288Sseb err = ENXIO; 26653288Sseb goto done; 26662311Sseb } 26672311Sseb 26683288Sseb if (mtp->mt_ref != 0) { 26693288Sseb err = EBUSY; 26703288Sseb goto done; 26712311Sseb } 26722311Sseb 26732311Sseb err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val); 26742311Sseb ASSERT(err == 0); 26752311Sseb if (err != 0) { 26762311Sseb /* This should never happen, thus the ASSERT() above. */ 26773288Sseb err = EINVAL; 26783288Sseb goto done; 26792311Sseb } 26802311Sseb ASSERT(mtp == (mactype_t *)val); 26812311Sseb 26822311Sseb kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 26832311Sseb kmem_free(mtp, sizeof (mactype_t)); 26843288Sseb done: 26853288Sseb mutex_exit(&i_mactype_lock); 26863288Sseb return (err); 26872311Sseb } 26885903Ssowmini 26898275SEric Cheng /* 26908275SEric Cheng * Returns TRUE when the specified property is intended for the MAC framework, 26918275SEric Cheng * as opposed to driver defined properties. 26928275SEric Cheng */ 26938275SEric Cheng static boolean_t 26948275SEric Cheng mac_is_macprop(mac_prop_t *macprop) 26958275SEric Cheng { 26968275SEric Cheng switch (macprop->mp_id) { 26978275SEric Cheng case MAC_PROP_MAXBW: 26988275SEric Cheng case MAC_PROP_PRIO: 26998275SEric Cheng case MAC_PROP_BIND_CPU: 27008275SEric Cheng return (B_TRUE); 27018275SEric Cheng default: 27028275SEric Cheng return (B_FALSE); 27038275SEric Cheng } 27048275SEric Cheng } 27058275SEric Cheng 27068275SEric Cheng /* 27078275SEric Cheng * mac_set_prop() sets mac or hardware driver properties: 27088275SEric Cheng * mac properties include maxbw, priority, and cpu binding list. Driver 27098275SEric Cheng * properties are private properties to the hardware, such as mtu, speed 27108275SEric Cheng * etc. 27118275SEric Cheng * If the property is a driver property, mac_set_prop() calls driver's callback 27128275SEric Cheng * function to set it. 27138275SEric Cheng * If the property is a mac property, mac_set_prop() invokes mac_set_resources() 27148275SEric Cheng * which will cache the property value in mac_impl_t and may call 27158275SEric Cheng * mac_client_set_resource() to update property value of the primary mac client, 27168275SEric Cheng * if it exists. 27178275SEric Cheng */ 27185903Ssowmini int 27195903Ssowmini mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) 27205903Ssowmini { 27215903Ssowmini int err = ENOTSUP; 27225903Ssowmini mac_impl_t *mip = (mac_impl_t *)mh; 27235903Ssowmini 27248275SEric Cheng ASSERT(MAC_PERIM_HELD(mh)); 27258275SEric Cheng 27268275SEric Cheng /* If it is mac property, call mac_set_resources() */ 27278275SEric Cheng if (mac_is_macprop(macprop)) { 27288275SEric Cheng mac_resource_props_t mrp; 27298275SEric Cheng 27308275SEric Cheng if (valsize < sizeof (mac_resource_props_t)) 27318275SEric Cheng return (EINVAL); 27328275SEric Cheng bzero(&mrp, sizeof (mac_resource_props_t)); 27338275SEric Cheng bcopy(val, &mrp, sizeof (mrp)); 27348275SEric Cheng return (mac_set_resources(mh, &mrp)); 27358275SEric Cheng } 27368275SEric Cheng /* For driver properties, call driver's callback */ 27375903Ssowmini if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { 27385903Ssowmini err = mip->mi_callbacks->mc_setprop(mip->mi_driver, 27395903Ssowmini macprop->mp_name, macprop->mp_id, valsize, val); 27405903Ssowmini } 27418275SEric Cheng 27425903Ssowmini return (err); 27435903Ssowmini } 27445903Ssowmini 27458275SEric Cheng /* 27468275SEric Cheng * mac_get_prop() gets mac or hardware driver properties. 27478275SEric Cheng * 27488275SEric Cheng * If the property is a driver property, mac_get_prop() calls driver's callback 27498275SEric Cheng * function to get it. 27508275SEric Cheng * If the property is a mac property, mac_get_prop() invokes mac_get_resources() 27518275SEric Cheng * which returns the cached value in mac_impl_t. 27528275SEric Cheng */ 27535903Ssowmini int 27548118SVasumathi.Sundaram@Sun.COM mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, 27558118SVasumathi.Sundaram@Sun.COM uint_t *perm) 27565903Ssowmini { 27575903Ssowmini int err = ENOTSUP; 27585903Ssowmini mac_impl_t *mip = (mac_impl_t *)mh; 27596512Ssowmini uint32_t sdu; 27606512Ssowmini link_state_t link_state; 27616512Ssowmini 27628275SEric Cheng /* If mac property, read from cache */ 27638275SEric Cheng if (mac_is_macprop(macprop)) { 27648275SEric Cheng mac_resource_props_t mrp; 27658275SEric Cheng 27668275SEric Cheng if (valsize < sizeof (mac_resource_props_t)) 27678275SEric Cheng return (EINVAL); 27688275SEric Cheng bzero(&mrp, sizeof (mac_resource_props_t)); 27698275SEric Cheng mac_get_resources(mh, &mrp); 27708275SEric Cheng bcopy(&mrp, val, sizeof (mac_resource_props_t)); 27718275SEric Cheng return (0); 27728275SEric Cheng } 27738275SEric Cheng 27746512Ssowmini switch (macprop->mp_id) { 27756789Sam223141 case MAC_PROP_MTU: 27766512Ssowmini if (valsize < sizeof (sdu)) 27776512Ssowmini return (EINVAL); 27786789Sam223141 if ((macprop->mp_flags & MAC_PROP_DEFAULT) == 0) { 27796512Ssowmini mac_sdu_get(mh, NULL, &sdu); 27806512Ssowmini bcopy(&sdu, val, sizeof (sdu)); 27818118SVasumathi.Sundaram@Sun.COM if (mac_set_prop(mh, macprop, val, sizeof (sdu)) != 0) 27828118SVasumathi.Sundaram@Sun.COM *perm = MAC_PROP_PERM_READ; 27838118SVasumathi.Sundaram@Sun.COM else 27848118SVasumathi.Sundaram@Sun.COM *perm = MAC_PROP_PERM_RW; 27856512Ssowmini return (0); 27866512Ssowmini } else { 27876512Ssowmini if (mip->mi_info.mi_media == DL_ETHER) { 27886512Ssowmini sdu = ETHERMTU; 27896512Ssowmini bcopy(&sdu, val, sizeof (sdu)); 27906512Ssowmini return (0); 27916512Ssowmini } 27926512Ssowmini /* 27936512Ssowmini * ask driver for its default. 27946512Ssowmini */ 27956512Ssowmini break; 27966512Ssowmini } 27976789Sam223141 case MAC_PROP_STATUS: 27986512Ssowmini if (valsize < sizeof (link_state)) 27996512Ssowmini return (EINVAL); 28008118SVasumathi.Sundaram@Sun.COM *perm = MAC_PROP_PERM_READ; 28016512Ssowmini link_state = mac_link_get(mh); 28026512Ssowmini bcopy(&link_state, val, sizeof (link_state)); 28036512Ssowmini return (0); 28046512Ssowmini default: 28056512Ssowmini break; 28068275SEric Cheng 28076512Ssowmini } 28088275SEric Cheng /* If driver property, request from driver */ 28095903Ssowmini if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { 28105903Ssowmini err = mip->mi_callbacks->mc_getprop(mip->mi_driver, 28116512Ssowmini macprop->mp_name, macprop->mp_id, macprop->mp_flags, 28128118SVasumathi.Sundaram@Sun.COM valsize, val, perm); 28135903Ssowmini } 28145903Ssowmini return (err); 28155903Ssowmini } 28165903Ssowmini 28178275SEric Cheng void 28186512Ssowmini mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop) 28196512Ssowmini { 28206512Ssowmini mac_priv_prop_t *mpriv; 28216512Ssowmini 28226512Ssowmini if (mpp == NULL) 28236512Ssowmini return; 28246512Ssowmini 28256512Ssowmini mpriv = kmem_zalloc(nprop * sizeof (*mpriv), KM_SLEEP); 28266512Ssowmini (void) memcpy(mpriv, mpp, nprop * sizeof (*mpriv)); 28276512Ssowmini mip->mi_priv_prop = mpriv; 28286512Ssowmini mip->mi_priv_prop_count = nprop; 28296512Ssowmini } 28307406SSowmini.Varadhan@Sun.COM 28318275SEric Cheng void 28327406SSowmini.Varadhan@Sun.COM mac_unregister_priv_prop(mac_impl_t *mip) 28337406SSowmini.Varadhan@Sun.COM { 28347406SSowmini.Varadhan@Sun.COM mac_priv_prop_t *mpriv; 28357406SSowmini.Varadhan@Sun.COM 28367406SSowmini.Varadhan@Sun.COM mpriv = mip->mi_priv_prop; 28377406SSowmini.Varadhan@Sun.COM if (mpriv != NULL) { 28387406SSowmini.Varadhan@Sun.COM kmem_free(mpriv, mip->mi_priv_prop_count * sizeof (*mpriv)); 28397406SSowmini.Varadhan@Sun.COM mip->mi_priv_prop = NULL; 28407406SSowmini.Varadhan@Sun.COM } 28417406SSowmini.Varadhan@Sun.COM mip->mi_priv_prop_count = 0; 28427406SSowmini.Varadhan@Sun.COM } 28438275SEric Cheng 28448275SEric Cheng /* 28458275SEric Cheng * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure 28468275SEric Cheng * (by invoking mac_rx()) even after processing mac_stop_ring(). In such 28478275SEric Cheng * cases if MAC free's the ring structure after mac_stop_ring(), any 28488275SEric Cheng * illegal access to the ring structure coming from the driver will panic 28498275SEric Cheng * the system. In order to protect the system from such inadverent access, 28508275SEric Cheng * we maintain a cache of rings in the mac_impl_t after they get free'd up. 28518275SEric Cheng * When packets are received on free'd up rings, MAC (through the generation 28528275SEric Cheng * count mechanism) will drop such packets. 28538275SEric Cheng */ 28548275SEric Cheng static mac_ring_t * 28558275SEric Cheng mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings) 28568275SEric Cheng { 28578275SEric Cheng mac_ring_t *ring; 28588275SEric Cheng 28598275SEric Cheng if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 28608275SEric Cheng mutex_enter(&mip->mi_ring_lock); 28618275SEric Cheng if (mip->mi_ring_freelist != NULL) { 28628275SEric Cheng ring = mip->mi_ring_freelist; 28638275SEric Cheng mip->mi_ring_freelist = ring->mr_next; 28648275SEric Cheng bzero(ring, sizeof (mac_ring_t)); 28658275SEric Cheng } else { 28668275SEric Cheng ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); 28678275SEric Cheng } 28688275SEric Cheng mutex_exit(&mip->mi_ring_lock); 28698275SEric Cheng } else { 28708275SEric Cheng ring = kmem_zalloc(sizeof (mac_ring_t), KM_SLEEP); 28718275SEric Cheng } 28728275SEric Cheng ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); 28738275SEric Cheng return (ring); 28748275SEric Cheng } 28758275SEric Cheng 28768275SEric Cheng static void 28778275SEric Cheng mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) 28788275SEric Cheng { 28798275SEric Cheng if (ring->mr_type == MAC_RING_TYPE_RX) { 28808275SEric Cheng mutex_enter(&mip->mi_ring_lock); 28818275SEric Cheng ring->mr_state = MR_FREE; 28828275SEric Cheng ring->mr_flag = 0; 28838275SEric Cheng ring->mr_next = mip->mi_ring_freelist; 28848275SEric Cheng mip->mi_ring_freelist = ring; 28858275SEric Cheng mutex_exit(&mip->mi_ring_lock); 28868275SEric Cheng } else { 28878275SEric Cheng kmem_free(ring, sizeof (mac_ring_t)); 28888275SEric Cheng } 28898275SEric Cheng } 28908275SEric Cheng 28918275SEric Cheng static void 28928275SEric Cheng mac_ring_freeall(mac_impl_t *mip) 28938275SEric Cheng { 28948275SEric Cheng mac_ring_t *ring_next; 28958275SEric Cheng mutex_enter(&mip->mi_ring_lock); 28968275SEric Cheng mac_ring_t *ring = mip->mi_ring_freelist; 28978275SEric Cheng while (ring != NULL) { 28988275SEric Cheng ring_next = ring->mr_next; 28998275SEric Cheng kmem_cache_free(mac_ring_cache, ring); 29008275SEric Cheng ring = ring_next; 29018275SEric Cheng } 29028275SEric Cheng mip->mi_ring_freelist = NULL; 29038275SEric Cheng mutex_exit(&mip->mi_ring_lock); 29048275SEric Cheng } 29058275SEric Cheng 29068275SEric Cheng int 29078275SEric Cheng mac_start_ring(mac_ring_t *ring) 29088275SEric Cheng { 29098275SEric Cheng int rv = 0; 29108275SEric Cheng 29118275SEric Cheng if (ring->mr_start != NULL) 29128275SEric Cheng rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); 29138275SEric Cheng 29148275SEric Cheng return (rv); 29158275SEric Cheng } 29168275SEric Cheng 29178275SEric Cheng void 29188275SEric Cheng mac_stop_ring(mac_ring_t *ring) 29198275SEric Cheng { 29208275SEric Cheng if (ring->mr_stop != NULL) 29218275SEric Cheng ring->mr_stop(ring->mr_driver); 29228275SEric Cheng 29238275SEric Cheng /* 29248275SEric Cheng * Increment the ring generation number for this ring. 29258275SEric Cheng */ 29268275SEric Cheng ring->mr_gen_num++; 29278275SEric Cheng } 29288275SEric Cheng 29298275SEric Cheng int 29308275SEric Cheng mac_start_group(mac_group_t *group) 29318275SEric Cheng { 29328275SEric Cheng int rv = 0; 29338275SEric Cheng 29348275SEric Cheng if (group->mrg_start != NULL) 29358275SEric Cheng rv = group->mrg_start(group->mrg_driver); 29368275SEric Cheng 29378275SEric Cheng return (rv); 29388275SEric Cheng } 29398275SEric Cheng 29408275SEric Cheng void 29418275SEric Cheng mac_stop_group(mac_group_t *group) 29428275SEric Cheng { 29438275SEric Cheng if (group->mrg_stop != NULL) 29448275SEric Cheng group->mrg_stop(group->mrg_driver); 29458275SEric Cheng } 29468275SEric Cheng 29478275SEric Cheng /* 29488275SEric Cheng * Called from mac_start() on the default Rx group. Broadcast and multicast 29498275SEric Cheng * packets are received only on the default group. Hence the default group 29508275SEric Cheng * needs to be up even if the primary client is not up, for the other groups 29518275SEric Cheng * to be functional. We do this by calling this function at mac_start time 29528275SEric Cheng * itself. However the broadcast packets that are received can't make their 29538275SEric Cheng * way beyond mac_rx until a mac client creates a broadcast flow. 29548275SEric Cheng */ 29558275SEric Cheng static int 29568275SEric Cheng mac_start_group_and_rings(mac_group_t *group) 29578275SEric Cheng { 29588275SEric Cheng mac_ring_t *ring; 29598275SEric Cheng int rv = 0; 29608275SEric Cheng 29618275SEric Cheng ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED); 29628275SEric Cheng if ((rv = mac_start_group(group)) != 0) 29638275SEric Cheng return (rv); 29648275SEric Cheng 29658275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 29668275SEric Cheng ASSERT(ring->mr_state == MR_FREE); 29678275SEric Cheng if ((rv = mac_start_ring(ring)) != 0) 29688275SEric Cheng goto error; 29698275SEric Cheng ring->mr_state = MR_INUSE; 29708275SEric Cheng ring->mr_classify_type = MAC_SW_CLASSIFIER; 29718275SEric Cheng } 29728275SEric Cheng return (0); 29738275SEric Cheng 29748275SEric Cheng error: 29758275SEric Cheng mac_stop_group_and_rings(group); 29768275SEric Cheng return (rv); 29778275SEric Cheng } 29788275SEric Cheng 29798275SEric Cheng /* Called from mac_stop on the default Rx group */ 29808275SEric Cheng static void 29818275SEric Cheng mac_stop_group_and_rings(mac_group_t *group) 29828275SEric Cheng { 29838275SEric Cheng mac_ring_t *ring; 29848275SEric Cheng 29858275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 29868275SEric Cheng if (ring->mr_state != MR_FREE) { 29878275SEric Cheng mac_stop_ring(ring); 29888275SEric Cheng ring->mr_state = MR_FREE; 29898275SEric Cheng ring->mr_flag = 0; 29908275SEric Cheng ring->mr_classify_type = MAC_NO_CLASSIFIER; 29918275SEric Cheng } 29928275SEric Cheng } 29938275SEric Cheng mac_stop_group(group); 29948275SEric Cheng } 29958275SEric Cheng 29968275SEric Cheng 29978275SEric Cheng static mac_ring_t * 29988275SEric Cheng mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, 29998275SEric Cheng mac_capab_rings_t *cap_rings) 30008275SEric Cheng { 30018275SEric Cheng mac_ring_t *ring; 30028275SEric Cheng mac_ring_info_t ring_info; 30038275SEric Cheng 30048275SEric Cheng ring = mac_ring_alloc(mip, cap_rings); 30058275SEric Cheng 30068275SEric Cheng /* Prepare basic information of ring */ 30078275SEric Cheng ring->mr_index = index; 30088275SEric Cheng ring->mr_type = group->mrg_type; 30098275SEric Cheng ring->mr_gh = (mac_group_handle_t)group; 30108275SEric Cheng 30118275SEric Cheng /* Insert the new ring to the list. */ 30128275SEric Cheng ring->mr_next = group->mrg_rings; 30138275SEric Cheng group->mrg_rings = ring; 30148275SEric Cheng 30158275SEric Cheng /* Zero to reuse the info data structure */ 30168275SEric Cheng bzero(&ring_info, sizeof (ring_info)); 30178275SEric Cheng 30188275SEric Cheng /* Query ring information from driver */ 30198275SEric Cheng cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index, 30208275SEric Cheng index, &ring_info, (mac_ring_handle_t)ring); 30218275SEric Cheng 30228275SEric Cheng ring->mr_info = ring_info; 30238275SEric Cheng 30248275SEric Cheng /* Update ring's status */ 30258275SEric Cheng ring->mr_state = MR_FREE; 30268275SEric Cheng ring->mr_flag = 0; 30278275SEric Cheng 30288275SEric Cheng /* Update the ring count of the group */ 30298275SEric Cheng group->mrg_cur_count++; 30308275SEric Cheng return (ring); 30318275SEric Cheng } 30328275SEric Cheng 30338275SEric Cheng /* 30348275SEric Cheng * Rings are chained together for easy regrouping. 30358275SEric Cheng */ 30368275SEric Cheng static void 30378275SEric Cheng mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, 30388275SEric Cheng mac_capab_rings_t *cap_rings) 30398275SEric Cheng { 30408275SEric Cheng int index; 30418275SEric Cheng 30428275SEric Cheng /* 30438275SEric Cheng * Initialize all ring members of this group. Size of zero will not 30448275SEric Cheng * enter the loop, so it's safe for initializing an empty group. 30458275SEric Cheng */ 30468275SEric Cheng for (index = size - 1; index >= 0; index--) 30478275SEric Cheng (void) mac_init_ring(mip, group, index, cap_rings); 30488275SEric Cheng } 30498275SEric Cheng 30508275SEric Cheng int 30518275SEric Cheng mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) 30528275SEric Cheng { 30538275SEric Cheng mac_capab_rings_t *cap_rings; 30548275SEric Cheng mac_group_t *group, *groups; 30558275SEric Cheng mac_group_info_t group_info; 30568275SEric Cheng uint_t group_free = 0; 30578275SEric Cheng uint_t ring_left; 30588275SEric Cheng mac_ring_t *ring; 30598275SEric Cheng int g, err = 0; 30608275SEric Cheng 30618275SEric Cheng switch (rtype) { 30628275SEric Cheng case MAC_RING_TYPE_RX: 30638275SEric Cheng ASSERT(mip->mi_rx_groups == NULL); 30648275SEric Cheng 30658275SEric Cheng cap_rings = &mip->mi_rx_rings_cap; 30668275SEric Cheng cap_rings->mr_type = MAC_RING_TYPE_RX; 30678275SEric Cheng break; 30688275SEric Cheng case MAC_RING_TYPE_TX: 30698275SEric Cheng ASSERT(mip->mi_tx_groups == NULL); 30708275SEric Cheng 30718275SEric Cheng cap_rings = &mip->mi_tx_rings_cap; 30728275SEric Cheng cap_rings->mr_type = MAC_RING_TYPE_TX; 30738275SEric Cheng break; 30748275SEric Cheng default: 30758275SEric Cheng ASSERT(B_FALSE); 30768275SEric Cheng } 30778275SEric Cheng 30788275SEric Cheng if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, 30798275SEric Cheng cap_rings)) 30808275SEric Cheng return (0); 30818275SEric Cheng 30828275SEric Cheng /* 30838275SEric Cheng * Allocate a contiguous buffer for all groups. 30848275SEric Cheng */ 30858275SEric Cheng groups = kmem_zalloc(sizeof (mac_group_t) * (cap_rings->mr_gnum + 1), 30868275SEric Cheng KM_SLEEP); 30878275SEric Cheng 30888275SEric Cheng ring_left = cap_rings->mr_rnum; 30898275SEric Cheng 30908275SEric Cheng /* 30918275SEric Cheng * Get all ring groups if any, and get their ring members 30928275SEric Cheng * if any. 30938275SEric Cheng */ 30948275SEric Cheng for (g = 0; g < cap_rings->mr_gnum; g++) { 30958275SEric Cheng group = groups + g; 30968275SEric Cheng 30978275SEric Cheng /* Prepare basic information of the group */ 30988275SEric Cheng group->mrg_index = g; 30998275SEric Cheng group->mrg_type = rtype; 31008275SEric Cheng group->mrg_state = MAC_GROUP_STATE_UNINIT; 31018275SEric Cheng group->mrg_mh = (mac_handle_t)mip; 31028275SEric Cheng group->mrg_next = group + 1; 31038275SEric Cheng 31048275SEric Cheng /* Zero to reuse the info data structure */ 31058275SEric Cheng bzero(&group_info, sizeof (group_info)); 31068275SEric Cheng 31078275SEric Cheng /* Query group information from driver */ 31088275SEric Cheng cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, 31098275SEric Cheng (mac_group_handle_t)group); 31108275SEric Cheng 31118275SEric Cheng switch (cap_rings->mr_group_type) { 31128275SEric Cheng case MAC_GROUP_TYPE_DYNAMIC: 31138275SEric Cheng if (cap_rings->mr_gaddring == NULL || 31148275SEric Cheng cap_rings->mr_gremring == NULL) { 31158275SEric Cheng DTRACE_PROBE3( 31168275SEric Cheng mac__init__rings_no_addremring, 31178275SEric Cheng char *, mip->mi_name, 31188275SEric Cheng mac_group_add_ring_t, 31198275SEric Cheng cap_rings->mr_gaddring, 31208275SEric Cheng mac_group_add_ring_t, 31218275SEric Cheng cap_rings->mr_gremring); 31228275SEric Cheng err = EINVAL; 31238275SEric Cheng goto bail; 31248275SEric Cheng } 31258275SEric Cheng 31268275SEric Cheng switch (rtype) { 31278275SEric Cheng case MAC_RING_TYPE_RX: 31288275SEric Cheng /* 31298275SEric Cheng * The first RX group must have non-zero 31308275SEric Cheng * rings, and the following groups must 31318275SEric Cheng * have zero rings. 31328275SEric Cheng */ 31338275SEric Cheng if (g == 0 && group_info.mgi_count == 0) { 31348275SEric Cheng DTRACE_PROBE1( 31358275SEric Cheng mac__init__rings__rx__def__zero, 31368275SEric Cheng char *, mip->mi_name); 31378275SEric Cheng err = EINVAL; 31388275SEric Cheng goto bail; 31398275SEric Cheng } 31408275SEric Cheng if (g > 0 && group_info.mgi_count != 0) { 31418275SEric Cheng DTRACE_PROBE3( 31428275SEric Cheng mac__init__rings__rx__nonzero, 31438275SEric Cheng char *, mip->mi_name, 31448275SEric Cheng int, g, int, group_info.mgi_count); 31458275SEric Cheng err = EINVAL; 31468275SEric Cheng goto bail; 31478275SEric Cheng } 31488275SEric Cheng break; 31498275SEric Cheng case MAC_RING_TYPE_TX: 31508275SEric Cheng /* 31518275SEric Cheng * All TX ring groups must have zero rings. 31528275SEric Cheng */ 31538275SEric Cheng if (group_info.mgi_count != 0) { 31548275SEric Cheng DTRACE_PROBE3( 31558275SEric Cheng mac__init__rings__tx__nonzero, 31568275SEric Cheng char *, mip->mi_name, 31578275SEric Cheng int, g, int, group_info.mgi_count); 31588275SEric Cheng err = EINVAL; 31598275SEric Cheng goto bail; 31608275SEric Cheng } 31618275SEric Cheng break; 31628275SEric Cheng } 31638275SEric Cheng break; 31648275SEric Cheng case MAC_GROUP_TYPE_STATIC: 31658275SEric Cheng /* 31668275SEric Cheng * Note that an empty group is allowed, e.g., an aggr 31678275SEric Cheng * would start with an empty group. 31688275SEric Cheng */ 31698275SEric Cheng break; 31708275SEric Cheng default: 31718275SEric Cheng /* unknown group type */ 31728275SEric Cheng DTRACE_PROBE2(mac__init__rings__unknown__type, 31738275SEric Cheng char *, mip->mi_name, 31748275SEric Cheng int, cap_rings->mr_group_type); 31758275SEric Cheng err = EINVAL; 31768275SEric Cheng goto bail; 31778275SEric Cheng } 31788275SEric Cheng 31798275SEric Cheng 31808275SEric Cheng /* 31818275SEric Cheng * Driver must register group->mgi_addmac/remmac() for rx groups 31828275SEric Cheng * to support multiple MAC addresses. 31838275SEric Cheng */ 31848275SEric Cheng if (rtype == MAC_RING_TYPE_RX) { 31858275SEric Cheng if ((group_info.mgi_addmac == NULL) || 31868275SEric Cheng (group_info.mgi_addmac == NULL)) 31878275SEric Cheng goto bail; 31888275SEric Cheng } 31898275SEric Cheng 31908275SEric Cheng /* Cache driver-supplied information */ 31918275SEric Cheng group->mrg_info = group_info; 31928275SEric Cheng 31938275SEric Cheng /* Update the group's status and group count. */ 31948275SEric Cheng mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); 31958275SEric Cheng group_free++; 31968275SEric Cheng 31978275SEric Cheng group->mrg_rings = NULL; 31988275SEric Cheng group->mrg_cur_count = 0; 31998275SEric Cheng mac_init_group(mip, group, group_info.mgi_count, cap_rings); 32008275SEric Cheng ring_left -= group_info.mgi_count; 32018275SEric Cheng 32028275SEric Cheng /* The current group size should be equal to default value */ 32038275SEric Cheng ASSERT(group->mrg_cur_count == group_info.mgi_count); 32048275SEric Cheng } 32058275SEric Cheng 32068275SEric Cheng /* Build up a dummy group for free resources as a pool */ 32078275SEric Cheng group = groups + cap_rings->mr_gnum; 32088275SEric Cheng 32098275SEric Cheng /* Prepare basic information of the group */ 32108275SEric Cheng group->mrg_index = -1; 32118275SEric Cheng group->mrg_type = rtype; 32128275SEric Cheng group->mrg_state = MAC_GROUP_STATE_UNINIT; 32138275SEric Cheng group->mrg_mh = (mac_handle_t)mip; 32148275SEric Cheng group->mrg_next = NULL; 32158275SEric Cheng 32168275SEric Cheng /* 32178275SEric Cheng * If there are ungrouped rings, allocate a continuous buffer for 32188275SEric Cheng * remaining resources. 32198275SEric Cheng */ 32208275SEric Cheng if (ring_left != 0) { 32218275SEric Cheng group->mrg_rings = NULL; 32228275SEric Cheng group->mrg_cur_count = 0; 32238275SEric Cheng mac_init_group(mip, group, ring_left, cap_rings); 32248275SEric Cheng 32258275SEric Cheng /* The current group size should be equal to ring_left */ 32268275SEric Cheng ASSERT(group->mrg_cur_count == ring_left); 32278275SEric Cheng 32288275SEric Cheng ring_left = 0; 32298275SEric Cheng 32308275SEric Cheng /* Update this group's status */ 32318275SEric Cheng mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); 32328275SEric Cheng } else 32338275SEric Cheng group->mrg_rings = NULL; 32348275SEric Cheng 32358275SEric Cheng ASSERT(ring_left == 0); 32368275SEric Cheng 32378275SEric Cheng bail: 32388275SEric Cheng /* Cache other important information to finalize the initialization */ 32398275SEric Cheng switch (rtype) { 32408275SEric Cheng case MAC_RING_TYPE_RX: 32418275SEric Cheng mip->mi_rx_group_type = cap_rings->mr_group_type; 32428275SEric Cheng mip->mi_rx_group_count = cap_rings->mr_gnum; 32438275SEric Cheng mip->mi_rx_groups = groups; 32448275SEric Cheng break; 32458275SEric Cheng case MAC_RING_TYPE_TX: 32468275SEric Cheng mip->mi_tx_group_type = cap_rings->mr_group_type; 32478275SEric Cheng mip->mi_tx_group_count = cap_rings->mr_gnum; 32488275SEric Cheng mip->mi_tx_group_free = group_free; 32498275SEric Cheng mip->mi_tx_groups = groups; 32508275SEric Cheng 32518275SEric Cheng /* 32528275SEric Cheng * Ring 0 is used as the default one and it could be assigned 32538275SEric Cheng * to a client as well. 32548275SEric Cheng */ 32558275SEric Cheng group = groups + cap_rings->mr_gnum; 32568275SEric Cheng ring = group->mrg_rings; 32578275SEric Cheng while ((ring->mr_index != 0) && (ring->mr_next != NULL)) 32588275SEric Cheng ring = ring->mr_next; 32598275SEric Cheng ASSERT(ring->mr_index == 0); 32608275SEric Cheng mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 32618275SEric Cheng break; 32628275SEric Cheng default: 32638275SEric Cheng ASSERT(B_FALSE); 32648275SEric Cheng } 32658275SEric Cheng 32668275SEric Cheng if (err != 0) 32678275SEric Cheng mac_free_rings(mip, rtype); 32688275SEric Cheng 32698275SEric Cheng return (err); 32708275SEric Cheng } 32718275SEric Cheng 32728275SEric Cheng /* 32738275SEric Cheng * Called to free all ring groups with particular type. It's supposed all groups 32748275SEric Cheng * have been released by clinet. 32758275SEric Cheng */ 32768275SEric Cheng void 32778275SEric Cheng mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) 32788275SEric Cheng { 32798275SEric Cheng mac_group_t *group, *groups; 32808275SEric Cheng uint_t group_count; 32818275SEric Cheng 32828275SEric Cheng switch (rtype) { 32838275SEric Cheng case MAC_RING_TYPE_RX: 32848275SEric Cheng if (mip->mi_rx_groups == NULL) 32858275SEric Cheng return; 32868275SEric Cheng 32878275SEric Cheng groups = mip->mi_rx_groups; 32888275SEric Cheng group_count = mip->mi_rx_group_count; 32898275SEric Cheng 32908275SEric Cheng mip->mi_rx_groups = NULL; 32918275SEric Cheng mip->mi_rx_group_count = 0; 32928275SEric Cheng break; 32938275SEric Cheng case MAC_RING_TYPE_TX: 32948275SEric Cheng ASSERT(mip->mi_tx_group_count == mip->mi_tx_group_free); 32958275SEric Cheng 32968275SEric Cheng if (mip->mi_tx_groups == NULL) 32978275SEric Cheng return; 32988275SEric Cheng 32998275SEric Cheng groups = mip->mi_tx_groups; 33008275SEric Cheng group_count = mip->mi_tx_group_count; 33018275SEric Cheng 33028275SEric Cheng mip->mi_tx_groups = NULL; 33038275SEric Cheng mip->mi_tx_group_count = 0; 33048275SEric Cheng mip->mi_tx_group_free = 0; 33058275SEric Cheng mip->mi_default_tx_ring = NULL; 33068275SEric Cheng break; 33078275SEric Cheng default: 33088275SEric Cheng ASSERT(B_FALSE); 33098275SEric Cheng } 33108275SEric Cheng 33118275SEric Cheng for (group = groups; group != NULL; group = group->mrg_next) { 33128275SEric Cheng mac_ring_t *ring; 33138275SEric Cheng 33148275SEric Cheng if (group->mrg_cur_count == 0) 33158275SEric Cheng continue; 33168275SEric Cheng 33178275SEric Cheng ASSERT(group->mrg_rings != NULL); 33188275SEric Cheng 33198275SEric Cheng while ((ring = group->mrg_rings) != NULL) { 33208275SEric Cheng group->mrg_rings = ring->mr_next; 33218275SEric Cheng mac_ring_free(mip, ring); 33228275SEric Cheng } 33238275SEric Cheng } 33248275SEric Cheng 33258275SEric Cheng /* Free all the cached rings */ 33268275SEric Cheng mac_ring_freeall(mip); 33278275SEric Cheng /* Free the block of group data strutures */ 33288275SEric Cheng kmem_free(groups, sizeof (mac_group_t) * (group_count + 1)); 33298275SEric Cheng } 33308275SEric Cheng 33318275SEric Cheng /* 33328275SEric Cheng * Associate a MAC address with a receive group. 33338275SEric Cheng * 33348275SEric Cheng * The return value of this function should always be checked properly, because 33358275SEric Cheng * any type of failure could cause unexpected results. A group can be added 33368275SEric Cheng * or removed with a MAC address only after it has been reserved. Ideally, 33378275SEric Cheng * a successful reservation always leads to calling mac_group_addmac() to 33388275SEric Cheng * steer desired traffic. Failure of adding an unicast MAC address doesn't 33398275SEric Cheng * always imply that the group is functioning abnormally. 33408275SEric Cheng * 33418275SEric Cheng * Currently this function is called everywhere, and it reflects assumptions 33428275SEric Cheng * about MAC addresses in the implementation. CR 6735196. 33438275SEric Cheng */ 33448275SEric Cheng int 33458275SEric Cheng mac_group_addmac(mac_group_t *group, const uint8_t *addr) 33468275SEric Cheng { 33478275SEric Cheng ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 33488275SEric Cheng ASSERT(group->mrg_info.mgi_addmac != NULL); 33498275SEric Cheng 33508275SEric Cheng return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); 33518275SEric Cheng } 33528275SEric Cheng 33538275SEric Cheng /* 33548275SEric Cheng * Remove the association between MAC address and receive group. 33558275SEric Cheng */ 33568275SEric Cheng int 33578275SEric Cheng mac_group_remmac(mac_group_t *group, const uint8_t *addr) 33588275SEric Cheng { 33598275SEric Cheng ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 33608275SEric Cheng ASSERT(group->mrg_info.mgi_remmac != NULL); 33618275SEric Cheng 33628275SEric Cheng return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); 33638275SEric Cheng } 33648275SEric Cheng 33658275SEric Cheng /* 33668275SEric Cheng * Release a ring in use by marking it MR_FREE. 33678275SEric Cheng * Any other client may reserve it for its use. 33688275SEric Cheng */ 33698275SEric Cheng void 33708275SEric Cheng mac_release_tx_ring(mac_ring_handle_t rh) 33718275SEric Cheng { 33728275SEric Cheng mac_ring_t *ring = (mac_ring_t *)rh; 33738275SEric Cheng mac_group_t *group = (mac_group_t *)ring->mr_gh; 33748275SEric Cheng mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 33758275SEric Cheng 33768275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 33778275SEric Cheng ASSERT(ring->mr_state != MR_FREE); 33788275SEric Cheng 33798275SEric Cheng /* 33808275SEric Cheng * Default tx ring will be released by mac_stop(). 33818275SEric Cheng */ 33828275SEric Cheng if (rh == mip->mi_default_tx_ring) 33838275SEric Cheng return; 33848275SEric Cheng 33858275SEric Cheng mac_stop_ring(ring); 33868275SEric Cheng 33878275SEric Cheng ring->mr_state = MR_FREE; 33888275SEric Cheng ring->mr_flag = 0; 33898275SEric Cheng } 33908275SEric Cheng 33918275SEric Cheng /* 33928275SEric Cheng * Send packets through a selected tx ring. 33938275SEric Cheng */ 33948275SEric Cheng mblk_t * 33958275SEric Cheng mac_ring_tx(mac_ring_handle_t rh, mblk_t *mp) 33968275SEric Cheng { 33978275SEric Cheng mac_ring_t *ring = (mac_ring_t *)rh; 33988275SEric Cheng mac_ring_info_t *info = &ring->mr_info; 33998275SEric Cheng 34008275SEric Cheng ASSERT(ring->mr_type == MAC_RING_TYPE_TX); 34018275SEric Cheng ASSERT(ring->mr_state >= MR_INUSE); 34028275SEric Cheng ASSERT(info->mri_tx != NULL); 34038275SEric Cheng 34048275SEric Cheng return (info->mri_tx(info->mri_driver, mp)); 34058275SEric Cheng } 34068275SEric Cheng 34078275SEric Cheng /* 34088275SEric Cheng * Find a ring from its index. 34098275SEric Cheng */ 34108275SEric Cheng mac_ring_t * 34118275SEric Cheng mac_find_ring(mac_group_t *group, int index) 34128275SEric Cheng { 34138275SEric Cheng mac_ring_t *ring = group->mrg_rings; 34148275SEric Cheng 34158275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) 34168275SEric Cheng if (ring->mr_index == index) 34178275SEric Cheng break; 34188275SEric Cheng 34198275SEric Cheng return (ring); 34208275SEric Cheng } 34218275SEric Cheng /* 34228275SEric Cheng * Add a ring to an existing group. 34238275SEric Cheng * 34248275SEric Cheng * The ring must be either passed directly (for example if the ring 34258275SEric Cheng * movement is initiated by the framework), or specified through a driver 34268275SEric Cheng * index (for example when the ring is added by the driver. 34278275SEric Cheng * 34288275SEric Cheng * The caller needs to call mac_perim_enter() before calling this function. 34298275SEric Cheng */ 34308275SEric Cheng int 34318275SEric Cheng i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) 34328275SEric Cheng { 34338275SEric Cheng mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 34348275SEric Cheng mac_capab_rings_t *cap_rings; 34358275SEric Cheng boolean_t driver_call = (ring == NULL); 34368275SEric Cheng mac_group_type_t group_type; 34378275SEric Cheng int ret = 0; 34388275SEric Cheng 34398275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 34408275SEric Cheng 34418275SEric Cheng switch (group->mrg_type) { 34428275SEric Cheng case MAC_RING_TYPE_RX: 34438275SEric Cheng cap_rings = &mip->mi_rx_rings_cap; 34448275SEric Cheng group_type = mip->mi_rx_group_type; 34458275SEric Cheng break; 34468275SEric Cheng case MAC_RING_TYPE_TX: 34478275SEric Cheng cap_rings = &mip->mi_tx_rings_cap; 34488275SEric Cheng group_type = mip->mi_tx_group_type; 34498275SEric Cheng break; 34508275SEric Cheng default: 34518275SEric Cheng ASSERT(B_FALSE); 34528275SEric Cheng } 34538275SEric Cheng 34548275SEric Cheng /* 34558275SEric Cheng * There should be no ring with the same ring index in the target 34568275SEric Cheng * group. 34578275SEric Cheng */ 34588275SEric Cheng ASSERT(mac_find_ring(group, driver_call ? index : ring->mr_index) == 34598275SEric Cheng NULL); 34608275SEric Cheng 34618275SEric Cheng if (driver_call) { 34628275SEric Cheng /* 34638275SEric Cheng * The function is called as a result of a request from 34648275SEric Cheng * a driver to add a ring to an existing group, for example 34658275SEric Cheng * from the aggregation driver. Allocate a new mac_ring_t 34668275SEric Cheng * for that ring. 34678275SEric Cheng */ 34688275SEric Cheng ring = mac_init_ring(mip, group, index, cap_rings); 34698275SEric Cheng ASSERT(group->mrg_state > MAC_GROUP_STATE_UNINIT); 34708275SEric Cheng } else { 34718275SEric Cheng /* 34728275SEric Cheng * The function is called as a result of a MAC layer request 34738275SEric Cheng * to add a ring to an existing group. In this case the 34748275SEric Cheng * ring is being moved between groups, which requires 34758275SEric Cheng * the underlying driver to support dynamic grouping, 34768275SEric Cheng * and the mac_ring_t already exists. 34778275SEric Cheng */ 34788275SEric Cheng ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 34798275SEric Cheng ASSERT(cap_rings->mr_gaddring != NULL); 34808275SEric Cheng ASSERT(ring->mr_gh == NULL); 34818275SEric Cheng } 34828275SEric Cheng 34838275SEric Cheng /* 34848275SEric Cheng * At this point the ring should not be in use, and it should be 34858275SEric Cheng * of the right for the target group. 34868275SEric Cheng */ 34878275SEric Cheng ASSERT(ring->mr_state < MR_INUSE); 34888275SEric Cheng ASSERT(ring->mr_srs == NULL); 34898275SEric Cheng ASSERT(ring->mr_type == group->mrg_type); 34908275SEric Cheng 34918275SEric Cheng if (!driver_call) { 34928275SEric Cheng /* 34938275SEric Cheng * Add the driver level hardware ring if the process was not 34948275SEric Cheng * initiated by the driver, and the target group is not the 34958275SEric Cheng * group. 34968275SEric Cheng */ 34978275SEric Cheng if (group->mrg_driver != NULL) { 34988275SEric Cheng cap_rings->mr_gaddring(group->mrg_driver, 34998275SEric Cheng ring->mr_driver, ring->mr_type); 35008275SEric Cheng } 35018275SEric Cheng 35028275SEric Cheng /* 35038275SEric Cheng * Insert the ring ahead existing rings. 35048275SEric Cheng */ 35058275SEric Cheng ring->mr_next = group->mrg_rings; 35068275SEric Cheng group->mrg_rings = ring; 35078275SEric Cheng ring->mr_gh = (mac_group_handle_t)group; 35088275SEric Cheng group->mrg_cur_count++; 35098275SEric Cheng } 35108275SEric Cheng 35118275SEric Cheng /* 35128275SEric Cheng * If the group has not been actively used, we're done. 35138275SEric Cheng */ 35148275SEric Cheng if (group->mrg_index != -1 && 35158275SEric Cheng group->mrg_state < MAC_GROUP_STATE_RESERVED) 35168275SEric Cheng return (0); 35178275SEric Cheng 35188275SEric Cheng /* 35198275SEric Cheng * Set up SRS/SR according to the ring type. 35208275SEric Cheng */ 35218275SEric Cheng switch (ring->mr_type) { 35228275SEric Cheng case MAC_RING_TYPE_RX: 35238275SEric Cheng /* 35248275SEric Cheng * Setup SRS on top of the new ring if the group is 35258275SEric Cheng * reserved for someones exclusive use. 35268275SEric Cheng */ 35278275SEric Cheng if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { 35288275SEric Cheng flow_entry_t *flent; 35298275SEric Cheng mac_client_impl_t *mcip; 35308275SEric Cheng 35318275SEric Cheng mcip = MAC_RX_GROUP_ONLY_CLIENT(group); 35328275SEric Cheng ASSERT(mcip != NULL); 35338275SEric Cheng flent = mcip->mci_flent; 35348275SEric Cheng ASSERT(flent->fe_rx_srs_cnt > 0); 35358275SEric Cheng mac_srs_group_setup(mcip, flent, group, SRST_LINK); 35368275SEric Cheng } 35378275SEric Cheng break; 35388275SEric Cheng case MAC_RING_TYPE_TX: 35398275SEric Cheng /* 35408275SEric Cheng * For TX this function is only invoked during the 35418275SEric Cheng * initial creation of a group when a share is 35428275SEric Cheng * associated with a MAC client. So the datapath is not 35438275SEric Cheng * yet setup, and will be setup later after the 35448275SEric Cheng * group has been reserved and populated. 35458275SEric Cheng */ 35468275SEric Cheng break; 35478275SEric Cheng default: 35488275SEric Cheng ASSERT(B_FALSE); 35498275SEric Cheng } 35508275SEric Cheng 35518275SEric Cheng /* 35528275SEric Cheng * Start the ring if needed. Failure causes to undo the grouping action. 35538275SEric Cheng */ 35548275SEric Cheng if ((ret = mac_start_ring(ring)) != 0) { 35558275SEric Cheng if (ring->mr_type == MAC_RING_TYPE_RX) { 35568275SEric Cheng if (ring->mr_srs != NULL) { 35578275SEric Cheng mac_rx_srs_remove(ring->mr_srs); 35588275SEric Cheng ring->mr_srs = NULL; 35598275SEric Cheng } 35608275SEric Cheng } 35618275SEric Cheng if (!driver_call) { 35628275SEric Cheng cap_rings->mr_gremring(group->mrg_driver, 35638275SEric Cheng ring->mr_driver, ring->mr_type); 35648275SEric Cheng } 35658275SEric Cheng group->mrg_cur_count--; 35668275SEric Cheng group->mrg_rings = ring->mr_next; 35678275SEric Cheng 35688275SEric Cheng ring->mr_gh = NULL; 35698275SEric Cheng 35708275SEric Cheng if (driver_call) 35718275SEric Cheng mac_ring_free(mip, ring); 35728275SEric Cheng 35738275SEric Cheng return (ret); 35748275SEric Cheng } 35758275SEric Cheng 35768275SEric Cheng /* 35778275SEric Cheng * Update the ring's state. 35788275SEric Cheng */ 35798275SEric Cheng ring->mr_state = MR_INUSE; 35808275SEric Cheng MAC_RING_UNMARK(ring, MR_INCIPIENT); 35818275SEric Cheng return (0); 35828275SEric Cheng } 35838275SEric Cheng 35848275SEric Cheng /* 35858275SEric Cheng * Remove a ring from it's current group. MAC internal function for dynamic 35868275SEric Cheng * grouping. 35878275SEric Cheng * 35888275SEric Cheng * The caller needs to call mac_perim_enter() before calling this function. 35898275SEric Cheng */ 35908275SEric Cheng void 35918275SEric Cheng i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, 35928275SEric Cheng boolean_t driver_call) 35938275SEric Cheng { 35948275SEric Cheng mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 35958275SEric Cheng mac_capab_rings_t *cap_rings = NULL; 35968275SEric Cheng mac_group_type_t group_type; 35978275SEric Cheng 35988275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 35998275SEric Cheng 36008275SEric Cheng ASSERT(mac_find_ring(group, ring->mr_index) == ring); 36018275SEric Cheng ASSERT((mac_group_t *)ring->mr_gh == group); 36028275SEric Cheng ASSERT(ring->mr_type == group->mrg_type); 36038275SEric Cheng 36048275SEric Cheng switch (ring->mr_type) { 36058275SEric Cheng case MAC_RING_TYPE_RX: 36068275SEric Cheng group_type = mip->mi_rx_group_type; 36078275SEric Cheng cap_rings = &mip->mi_rx_rings_cap; 36088275SEric Cheng 36098275SEric Cheng if (group->mrg_state >= MAC_GROUP_STATE_RESERVED) 36108275SEric Cheng mac_stop_ring(ring); 36118275SEric Cheng 36128275SEric Cheng /* 36138275SEric Cheng * Only hardware classified packets hold a reference to the 36148275SEric Cheng * ring all the way up the Rx path. mac_rx_srs_remove() 36158275SEric Cheng * will take care of quiescing the Rx path and removing the 36168275SEric Cheng * SRS. The software classified path neither holds a reference 36178275SEric Cheng * nor any association with the ring in mac_rx. 36188275SEric Cheng */ 36198275SEric Cheng if (ring->mr_srs != NULL) { 36208275SEric Cheng mac_rx_srs_remove(ring->mr_srs); 36218275SEric Cheng ring->mr_srs = NULL; 36228275SEric Cheng } 36238275SEric Cheng ring->mr_state = MR_FREE; 36248275SEric Cheng ring->mr_flag = 0; 36258275SEric Cheng 36268275SEric Cheng break; 36278275SEric Cheng case MAC_RING_TYPE_TX: 36288275SEric Cheng /* 36298275SEric Cheng * For TX this function is only invoked in two 36308275SEric Cheng * cases: 36318275SEric Cheng * 36328275SEric Cheng * 1) In the case of a failure during the 36338275SEric Cheng * initial creation of a group when a share is 36348275SEric Cheng * associated with a MAC client. So the SRS is not 36358275SEric Cheng * yet setup, and will be setup later after the 36368275SEric Cheng * group has been reserved and populated. 36378275SEric Cheng * 36388275SEric Cheng * 2) From mac_release_tx_group() when freeing 36398275SEric Cheng * a TX SRS. 36408275SEric Cheng * 36418275SEric Cheng * In both cases the SRS and its soft rings are 36428275SEric Cheng * already quiesced. 36438275SEric Cheng */ 36448275SEric Cheng ASSERT(!driver_call); 36458275SEric Cheng group_type = mip->mi_tx_group_type; 36468275SEric Cheng cap_rings = &mip->mi_tx_rings_cap; 36478275SEric Cheng break; 36488275SEric Cheng default: 36498275SEric Cheng ASSERT(B_FALSE); 36508275SEric Cheng } 36518275SEric Cheng 36528275SEric Cheng /* 36538275SEric Cheng * Remove the ring from the group. 36548275SEric Cheng */ 36558275SEric Cheng if (ring == group->mrg_rings) 36568275SEric Cheng group->mrg_rings = ring->mr_next; 36578275SEric Cheng else { 36588275SEric Cheng mac_ring_t *pre; 36598275SEric Cheng 36608275SEric Cheng pre = group->mrg_rings; 36618275SEric Cheng while (pre->mr_next != ring) 36628275SEric Cheng pre = pre->mr_next; 36638275SEric Cheng pre->mr_next = ring->mr_next; 36648275SEric Cheng } 36658275SEric Cheng group->mrg_cur_count--; 36668275SEric Cheng 36678275SEric Cheng if (!driver_call) { 36688275SEric Cheng ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 36698275SEric Cheng ASSERT(cap_rings->mr_gremring != NULL); 36708275SEric Cheng 36718275SEric Cheng /* 36728275SEric Cheng * Remove the driver level hardware ring. 36738275SEric Cheng */ 36748275SEric Cheng if (group->mrg_driver != NULL) { 36758275SEric Cheng cap_rings->mr_gremring(group->mrg_driver, 36768275SEric Cheng ring->mr_driver, ring->mr_type); 36778275SEric Cheng } 36788275SEric Cheng } 36798275SEric Cheng 36808275SEric Cheng ring->mr_gh = NULL; 36818275SEric Cheng if (driver_call) { 36828275SEric Cheng mac_ring_free(mip, ring); 36838275SEric Cheng } else { 36848275SEric Cheng ring->mr_state = MR_FREE; 36858275SEric Cheng ring->mr_flag = 0; 36868275SEric Cheng } 36878275SEric Cheng } 36888275SEric Cheng 36898275SEric Cheng /* 36908275SEric Cheng * Move a ring to the target group. If needed, remove the ring from the group 36918275SEric Cheng * that it currently belongs to. 36928275SEric Cheng * 36938275SEric Cheng * The caller need to enter MAC's perimeter by calling mac_perim_enter(). 36948275SEric Cheng */ 36958275SEric Cheng static int 36968275SEric Cheng mac_group_mov_ring(mac_impl_t *mip, mac_group_t *d_group, mac_ring_t *ring) 36978275SEric Cheng { 36988275SEric Cheng mac_group_t *s_group = (mac_group_t *)ring->mr_gh; 36998275SEric Cheng int rv; 37008275SEric Cheng 37018275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 37028275SEric Cheng ASSERT(d_group != NULL); 37038275SEric Cheng ASSERT(s_group->mrg_mh == d_group->mrg_mh); 37048275SEric Cheng 37058275SEric Cheng if (s_group == d_group) 37068275SEric Cheng return (0); 37078275SEric Cheng 37088275SEric Cheng /* 37098275SEric Cheng * Remove it from current group first. 37108275SEric Cheng */ 37118275SEric Cheng if (s_group != NULL) 37128275SEric Cheng i_mac_group_rem_ring(s_group, ring, B_FALSE); 37138275SEric Cheng 37148275SEric Cheng /* 37158275SEric Cheng * Add it to the new group. 37168275SEric Cheng */ 37178275SEric Cheng rv = i_mac_group_add_ring(d_group, ring, 0); 37188275SEric Cheng if (rv != 0) { 37198275SEric Cheng /* 37208275SEric Cheng * Failed to add ring back to source group. If 37218275SEric Cheng * that fails, the ring is stuck in limbo, log message. 37228275SEric Cheng */ 37238275SEric Cheng if (i_mac_group_add_ring(s_group, ring, 0)) { 37248275SEric Cheng cmn_err(CE_WARN, "%s: failed to move ring %p\n", 37258275SEric Cheng mip->mi_name, (void *)ring); 37268275SEric Cheng } 37278275SEric Cheng } 37288275SEric Cheng 37298275SEric Cheng return (rv); 37308275SEric Cheng } 37318275SEric Cheng 37328275SEric Cheng /* 37338275SEric Cheng * Find a MAC address according to its value. 37348275SEric Cheng */ 37358275SEric Cheng mac_address_t * 37368275SEric Cheng mac_find_macaddr(mac_impl_t *mip, uint8_t *mac_addr) 37378275SEric Cheng { 37388275SEric Cheng mac_address_t *map; 37398275SEric Cheng 37408275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 37418275SEric Cheng 37428275SEric Cheng for (map = mip->mi_addresses; map != NULL; map = map->ma_next) { 37438275SEric Cheng if (bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) 37448275SEric Cheng break; 37458275SEric Cheng } 37468275SEric Cheng 37478275SEric Cheng return (map); 37488275SEric Cheng } 37498275SEric Cheng 37508275SEric Cheng /* 37518275SEric Cheng * Check whether the MAC address is shared by multiple clients. 37528275SEric Cheng */ 37538275SEric Cheng boolean_t 37548275SEric Cheng mac_check_macaddr_shared(mac_address_t *map) 37558275SEric Cheng { 37568275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)map->ma_mip)); 37578275SEric Cheng 37588275SEric Cheng return (map->ma_nusers > 1); 37598275SEric Cheng } 37608275SEric Cheng 37618275SEric Cheng /* 37628275SEric Cheng * Remove the specified MAC address from the MAC address list and free it. 37638275SEric Cheng */ 37648275SEric Cheng static void 37658275SEric Cheng mac_free_macaddr(mac_address_t *map) 37668275SEric Cheng { 37678275SEric Cheng mac_impl_t *mip = map->ma_mip; 37688275SEric Cheng 37698275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 37708275SEric Cheng ASSERT(mip->mi_addresses != NULL); 37718275SEric Cheng 37728275SEric Cheng map = mac_find_macaddr(mip, map->ma_addr); 37738275SEric Cheng 37748275SEric Cheng ASSERT(map != NULL); 37758275SEric Cheng ASSERT(map->ma_nusers == 0); 37768275SEric Cheng 37778275SEric Cheng if (map == mip->mi_addresses) { 37788275SEric Cheng mip->mi_addresses = map->ma_next; 37798275SEric Cheng } else { 37808275SEric Cheng mac_address_t *pre; 37818275SEric Cheng 37828275SEric Cheng pre = mip->mi_addresses; 37838275SEric Cheng while (pre->ma_next != map) 37848275SEric Cheng pre = pre->ma_next; 37858275SEric Cheng pre->ma_next = map->ma_next; 37868275SEric Cheng } 37878275SEric Cheng 37888275SEric Cheng kmem_free(map, sizeof (mac_address_t)); 37898275SEric Cheng } 37908275SEric Cheng 37918275SEric Cheng /* 37928275SEric Cheng * Add a MAC address reference for a client. If the desired MAC address 37938275SEric Cheng * exists, add a reference to it. Otherwise, add the new address by adding 37948275SEric Cheng * it to a reserved group or setting promiscuous mode. Won't try different 37958275SEric Cheng * group is the group is non-NULL, so the caller must explictly share 37968275SEric Cheng * default group when needed. 37978275SEric Cheng * 37988275SEric Cheng * Note, the primary MAC address is initialized at registration time, so 37998275SEric Cheng * to add it to default group only need to activate it if its reference 38008275SEric Cheng * count is still zero. Also, some drivers may not have advertised RINGS 38018275SEric Cheng * capability. 38028275SEric Cheng */ 38038275SEric Cheng int 3804*8400SNicolas.Droux@Sun.COM mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, 3805*8400SNicolas.Droux@Sun.COM boolean_t use_hw) 38068275SEric Cheng { 38078275SEric Cheng mac_address_t *map; 38088275SEric Cheng int err = 0; 38098275SEric Cheng boolean_t allocated_map = B_FALSE; 38108275SEric Cheng 38118275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 38128275SEric Cheng 38138275SEric Cheng map = mac_find_macaddr(mip, mac_addr); 38148275SEric Cheng 38158275SEric Cheng /* 38168275SEric Cheng * If the new MAC address has not been added. Allocate a new one 38178275SEric Cheng * and set it up. 38188275SEric Cheng */ 38198275SEric Cheng if (map == NULL) { 38208275SEric Cheng map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 38218275SEric Cheng map->ma_len = mip->mi_type->mt_addr_length; 38228275SEric Cheng bcopy(mac_addr, map->ma_addr, map->ma_len); 38238275SEric Cheng map->ma_nusers = 0; 38248275SEric Cheng map->ma_group = group; 38258275SEric Cheng map->ma_mip = mip; 38268275SEric Cheng 38278275SEric Cheng /* add the new MAC address to the head of the address list */ 38288275SEric Cheng map->ma_next = mip->mi_addresses; 38298275SEric Cheng mip->mi_addresses = map; 38308275SEric Cheng 38318275SEric Cheng allocated_map = B_TRUE; 38328275SEric Cheng } 38338275SEric Cheng 38348275SEric Cheng ASSERT(map->ma_group == group); 38358275SEric Cheng 38368275SEric Cheng /* 38378275SEric Cheng * If the MAC address is already in use, simply account for the 38388275SEric Cheng * new client. 38398275SEric Cheng */ 38408275SEric Cheng if (map->ma_nusers++ > 0) 38418275SEric Cheng return (0); 38428275SEric Cheng 38438275SEric Cheng /* 38448275SEric Cheng * Activate this MAC address by adding it to the reserved group. 38458275SEric Cheng */ 38468275SEric Cheng if (group != NULL) { 38478275SEric Cheng err = mac_group_addmac(group, (const uint8_t *)mac_addr); 38488275SEric Cheng if (err == 0) { 38498275SEric Cheng map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 38508275SEric Cheng return (0); 38518275SEric Cheng } 38528275SEric Cheng } 38538275SEric Cheng 38548275SEric Cheng /* 3855*8400SNicolas.Droux@Sun.COM * The MAC address addition failed. If the client requires a 3856*8400SNicolas.Droux@Sun.COM * hardware classified MAC address, fail the operation. 3857*8400SNicolas.Droux@Sun.COM */ 3858*8400SNicolas.Droux@Sun.COM if (use_hw) { 3859*8400SNicolas.Droux@Sun.COM err = ENOSPC; 3860*8400SNicolas.Droux@Sun.COM goto bail; 3861*8400SNicolas.Droux@Sun.COM } 3862*8400SNicolas.Droux@Sun.COM 3863*8400SNicolas.Droux@Sun.COM /* 3864*8400SNicolas.Droux@Sun.COM * Try promiscuous mode. 3865*8400SNicolas.Droux@Sun.COM * 3866*8400SNicolas.Droux@Sun.COM * For drivers that don't advertise RINGS capability, do 3867*8400SNicolas.Droux@Sun.COM * nothing for the primary address. 38688275SEric Cheng */ 3869*8400SNicolas.Droux@Sun.COM if ((group == NULL) && 3870*8400SNicolas.Droux@Sun.COM (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { 3871*8400SNicolas.Droux@Sun.COM map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 3872*8400SNicolas.Droux@Sun.COM return (0); 3873*8400SNicolas.Droux@Sun.COM } 3874*8400SNicolas.Droux@Sun.COM 3875*8400SNicolas.Droux@Sun.COM /* 3876*8400SNicolas.Droux@Sun.COM * Enable promiscuous mode in order to receive traffic 3877*8400SNicolas.Droux@Sun.COM * to the new MAC address. 3878*8400SNicolas.Droux@Sun.COM */ 3879*8400SNicolas.Droux@Sun.COM if ((err = i_mac_promisc_set(mip, B_TRUE, MAC_DEVPROMISC)) == 0) { 3880*8400SNicolas.Droux@Sun.COM map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; 3881*8400SNicolas.Droux@Sun.COM return (0); 38828275SEric Cheng } 38838275SEric Cheng 38848275SEric Cheng /* 38858275SEric Cheng * Free the MAC address that could not be added. Don't free 38868275SEric Cheng * a pre-existing address, it could have been the entry 38878275SEric Cheng * for the primary MAC address which was pre-allocated by 38888275SEric Cheng * mac_init_macaddr(), and which must remain on the list. 38898275SEric Cheng */ 3890*8400SNicolas.Droux@Sun.COM bail: 38918275SEric Cheng map->ma_nusers--; 38928275SEric Cheng if (allocated_map) 38938275SEric Cheng mac_free_macaddr(map); 38948275SEric Cheng return (err); 38958275SEric Cheng } 38968275SEric Cheng 38978275SEric Cheng /* 38988275SEric Cheng * Remove a reference to a MAC address. This may cause to remove the MAC 38998275SEric Cheng * address from an associated group or to turn off promiscuous mode. 39008275SEric Cheng * The caller needs to handle the failure properly. 39018275SEric Cheng */ 39028275SEric Cheng int 39038275SEric Cheng mac_remove_macaddr(mac_address_t *map) 39048275SEric Cheng { 39058275SEric Cheng mac_impl_t *mip = map->ma_mip; 39068275SEric Cheng int err = 0; 39078275SEric Cheng 39088275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 39098275SEric Cheng 39108275SEric Cheng ASSERT(map == mac_find_macaddr(mip, map->ma_addr)); 39118275SEric Cheng 39128275SEric Cheng /* 39138275SEric Cheng * If it's not the last client using this MAC address, only update 39148275SEric Cheng * the MAC clients count. 39158275SEric Cheng */ 39168275SEric Cheng if (--map->ma_nusers > 0) 39178275SEric Cheng return (0); 39188275SEric Cheng 39198275SEric Cheng /* 39208275SEric Cheng * The MAC address is no longer used by any MAC client, so remove 39218275SEric Cheng * it from its associated group, or turn off promiscuous mode 39228275SEric Cheng * if it was enabled for the MAC address. 39238275SEric Cheng */ 39248275SEric Cheng switch (map->ma_type) { 39258275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 39268275SEric Cheng /* 39278275SEric Cheng * Don't free the preset primary address for drivers that 39288275SEric Cheng * don't advertise RINGS capability. 39298275SEric Cheng */ 39308275SEric Cheng if (map->ma_group == NULL) 39318275SEric Cheng return (0); 39328275SEric Cheng 39338275SEric Cheng err = mac_group_remmac(map->ma_group, map->ma_addr); 39348275SEric Cheng break; 39358275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 3936*8400SNicolas.Droux@Sun.COM err = i_mac_promisc_set(mip, B_FALSE, MAC_DEVPROMISC); 39378275SEric Cheng break; 39388275SEric Cheng default: 39398275SEric Cheng ASSERT(B_FALSE); 39408275SEric Cheng } 39418275SEric Cheng 39428275SEric Cheng if (err != 0) 39438275SEric Cheng return (err); 39448275SEric Cheng 39458275SEric Cheng /* 39468275SEric Cheng * We created MAC address for the primary one at registration, so we 39478275SEric Cheng * won't free it here. mac_fini_macaddr() will take care of it. 39488275SEric Cheng */ 39498275SEric Cheng if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) != 0) 39508275SEric Cheng mac_free_macaddr(map); 39518275SEric Cheng 39528275SEric Cheng return (0); 39538275SEric Cheng } 39548275SEric Cheng 39558275SEric Cheng /* 39568275SEric Cheng * Update an existing MAC address. The caller need to make sure that the new 39578275SEric Cheng * value has not been used. 39588275SEric Cheng */ 39598275SEric Cheng int 39608275SEric Cheng mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) 39618275SEric Cheng { 39628275SEric Cheng mac_impl_t *mip = map->ma_mip; 39638275SEric Cheng int err = 0; 39648275SEric Cheng 39658275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 39668275SEric Cheng ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 39678275SEric Cheng 39688275SEric Cheng switch (map->ma_type) { 39698275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 39708275SEric Cheng /* 39718275SEric Cheng * Update the primary address for drivers that are not 39728275SEric Cheng * RINGS capable. 39738275SEric Cheng */ 39748275SEric Cheng if (map->ma_group == NULL) { 39758275SEric Cheng err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) 39768275SEric Cheng mac_addr); 39778275SEric Cheng if (err != 0) 39788275SEric Cheng return (err); 39798275SEric Cheng break; 39808275SEric Cheng } 39818275SEric Cheng 39828275SEric Cheng /* 39838275SEric Cheng * If this MAC address is not currently in use, 39848275SEric Cheng * simply break out and update the value. 39858275SEric Cheng */ 39868275SEric Cheng if (map->ma_nusers == 0) 39878275SEric Cheng break; 39888275SEric Cheng 39898275SEric Cheng /* 39908275SEric Cheng * Need to replace the MAC address associated with a group. 39918275SEric Cheng */ 39928275SEric Cheng err = mac_group_remmac(map->ma_group, map->ma_addr); 39938275SEric Cheng if (err != 0) 39948275SEric Cheng return (err); 39958275SEric Cheng 39968275SEric Cheng err = mac_group_addmac(map->ma_group, mac_addr); 39978275SEric Cheng 39988275SEric Cheng /* 39998275SEric Cheng * Failure hints hardware error. The MAC layer needs to 40008275SEric Cheng * have error notification facility to handle this. 40018275SEric Cheng * Now, simply try to restore the value. 40028275SEric Cheng */ 40038275SEric Cheng if (err != 0) 40048275SEric Cheng (void) mac_group_addmac(map->ma_group, map->ma_addr); 40058275SEric Cheng 40068275SEric Cheng break; 40078275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 40088275SEric Cheng /* 40098275SEric Cheng * Need to do nothing more if in promiscuous mode. 40108275SEric Cheng */ 40118275SEric Cheng break; 40128275SEric Cheng default: 40138275SEric Cheng ASSERT(B_FALSE); 40148275SEric Cheng } 40158275SEric Cheng 40168275SEric Cheng /* 40178275SEric Cheng * Successfully replaced the MAC address. 40188275SEric Cheng */ 40198275SEric Cheng if (err == 0) 40208275SEric Cheng bcopy(mac_addr, map->ma_addr, map->ma_len); 40218275SEric Cheng 40228275SEric Cheng return (err); 40238275SEric Cheng } 40248275SEric Cheng 40258275SEric Cheng /* 40268275SEric Cheng * Freshen the MAC address with new value. Its caller must have updated the 40278275SEric Cheng * hardware MAC address before calling this function. 40288275SEric Cheng * This funcitons is supposed to be used to handle the MAC address change 40298275SEric Cheng * notification from underlying drivers. 40308275SEric Cheng */ 40318275SEric Cheng void 40328275SEric Cheng mac_freshen_macaddr(mac_address_t *map, uint8_t *mac_addr) 40338275SEric Cheng { 40348275SEric Cheng mac_impl_t *mip = map->ma_mip; 40358275SEric Cheng 40368275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 40378275SEric Cheng ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 40388275SEric Cheng 40398275SEric Cheng /* 40408275SEric Cheng * Freshen the MAC address with new value. 40418275SEric Cheng */ 40428275SEric Cheng bcopy(mac_addr, map->ma_addr, map->ma_len); 40438275SEric Cheng bcopy(mac_addr, mip->mi_addr, map->ma_len); 40448275SEric Cheng 40458275SEric Cheng /* 40468275SEric Cheng * Update all MAC clients that share this MAC address. 40478275SEric Cheng */ 40488275SEric Cheng mac_unicast_update_clients(mip, map); 40498275SEric Cheng } 40508275SEric Cheng 40518275SEric Cheng /* 40528275SEric Cheng * Set up the primary MAC address. 40538275SEric Cheng */ 40548275SEric Cheng void 40558275SEric Cheng mac_init_macaddr(mac_impl_t *mip) 40568275SEric Cheng { 40578275SEric Cheng mac_address_t *map; 40588275SEric Cheng 40598275SEric Cheng /* 40608275SEric Cheng * The reference count is initialized to zero, until it's really 40618275SEric Cheng * activated. 40628275SEric Cheng */ 40638275SEric Cheng map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 40648275SEric Cheng map->ma_len = mip->mi_type->mt_addr_length; 40658275SEric Cheng bcopy(mip->mi_addr, map->ma_addr, map->ma_len); 40668275SEric Cheng 40678275SEric Cheng /* 40688275SEric Cheng * If driver advertises RINGS capability, it shouldn't have initialized 40698275SEric Cheng * its primary MAC address. For other drivers, including VNIC, the 40708275SEric Cheng * primary address must work after registration. 40718275SEric Cheng */ 40728275SEric Cheng if (mip->mi_rx_groups == NULL) 40738275SEric Cheng map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 40748275SEric Cheng 40758275SEric Cheng /* 40768275SEric Cheng * The primary MAC address is reserved for default group according 40778275SEric Cheng * to current design. 40788275SEric Cheng */ 40798275SEric Cheng map->ma_group = mip->mi_rx_groups; 40808275SEric Cheng map->ma_mip = mip; 40818275SEric Cheng 40828275SEric Cheng mip->mi_addresses = map; 40838275SEric Cheng } 40848275SEric Cheng 40858275SEric Cheng /* 40868275SEric Cheng * Clean up the primary MAC address. Note, only one primary MAC address 40878275SEric Cheng * is allowed. All other MAC addresses must have been freed appropriately. 40888275SEric Cheng */ 40898275SEric Cheng void 40908275SEric Cheng mac_fini_macaddr(mac_impl_t *mip) 40918275SEric Cheng { 40928275SEric Cheng mac_address_t *map = mip->mi_addresses; 40938275SEric Cheng 40948275SEric Cheng /* there should be exactly one entry left on the list */ 40958275SEric Cheng ASSERT(map != NULL); 40968275SEric Cheng ASSERT(map->ma_nusers == 0); 40978275SEric Cheng ASSERT(map->ma_next == NULL); 40988275SEric Cheng 40998275SEric Cheng kmem_free(map, sizeof (mac_address_t)); 41008275SEric Cheng mip->mi_addresses = NULL; 41018275SEric Cheng } 41028275SEric Cheng 41038275SEric Cheng /* 41048275SEric Cheng * Logging related functions. 41058275SEric Cheng */ 41068275SEric Cheng 41078275SEric Cheng /* Write the Flow description to the log file */ 41088275SEric Cheng int 41098275SEric Cheng mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) 41108275SEric Cheng { 41118275SEric Cheng flow_desc_t *fdesc; 41128275SEric Cheng mac_resource_props_t *mrp; 41138275SEric Cheng net_desc_t ndesc; 41148275SEric Cheng 41158275SEric Cheng bzero(&ndesc, sizeof (net_desc_t)); 41168275SEric Cheng 41178275SEric Cheng /* 41188275SEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 41198275SEric Cheng * Updates to the fe_flow_desc are done under the fe_lock 41208275SEric Cheng */ 41218275SEric Cheng mutex_enter(&flent->fe_lock); 41228275SEric Cheng fdesc = &flent->fe_flow_desc; 41238275SEric Cheng mrp = &flent->fe_resource_props; 41248275SEric Cheng 41258275SEric Cheng ndesc.nd_name = flent->fe_flow_name; 41268275SEric Cheng ndesc.nd_devname = mcip->mci_name; 41278275SEric Cheng bcopy(fdesc->fd_src_mac, ndesc.nd_ehost, ETHERADDRL); 41288275SEric Cheng bcopy(fdesc->fd_dst_mac, ndesc.nd_edest, ETHERADDRL); 41298275SEric Cheng ndesc.nd_sap = htonl(fdesc->fd_sap); 41308275SEric Cheng ndesc.nd_isv4 = (uint8_t)fdesc->fd_ipversion == IPV4_VERSION; 41318275SEric Cheng ndesc.nd_bw_limit = mrp->mrp_maxbw; 41328275SEric Cheng if (ndesc.nd_isv4) { 41338275SEric Cheng ndesc.nd_saddr[3] = htonl(fdesc->fd_local_addr.s6_addr32[3]); 41348275SEric Cheng ndesc.nd_daddr[3] = htonl(fdesc->fd_remote_addr.s6_addr32[3]); 41358275SEric Cheng } else { 41368275SEric Cheng bcopy(&fdesc->fd_local_addr, ndesc.nd_saddr, IPV6_ADDR_LEN); 41378275SEric Cheng bcopy(&fdesc->fd_remote_addr, ndesc.nd_daddr, IPV6_ADDR_LEN); 41388275SEric Cheng } 41398275SEric Cheng ndesc.nd_sport = htons(fdesc->fd_local_port); 41408275SEric Cheng ndesc.nd_dport = htons(fdesc->fd_remote_port); 41418275SEric Cheng ndesc.nd_protocol = (uint8_t)fdesc->fd_protocol; 41428275SEric Cheng mutex_exit(&flent->fe_lock); 41438275SEric Cheng 41448275SEric Cheng return (exacct_commit_netinfo((void *)&ndesc, EX_NET_FLDESC_REC)); 41458275SEric Cheng } 41468275SEric Cheng 41478275SEric Cheng /* Write the Flow statistics to the log file */ 41488275SEric Cheng int 41498275SEric Cheng mac_write_flow_stats(flow_entry_t *flent) 41508275SEric Cheng { 41518275SEric Cheng flow_stats_t *fl_stats; 41528275SEric Cheng net_stat_t nstat; 41538275SEric Cheng 41548275SEric Cheng fl_stats = &flent->fe_flowstats; 41558275SEric Cheng nstat.ns_name = flent->fe_flow_name; 41568275SEric Cheng nstat.ns_ibytes = fl_stats->fs_rbytes; 41578275SEric Cheng nstat.ns_obytes = fl_stats->fs_obytes; 41588275SEric Cheng nstat.ns_ipackets = fl_stats->fs_ipackets; 41598275SEric Cheng nstat.ns_opackets = fl_stats->fs_opackets; 41608275SEric Cheng nstat.ns_ierrors = fl_stats->fs_ierrors; 41618275SEric Cheng nstat.ns_oerrors = fl_stats->fs_oerrors; 41628275SEric Cheng 41638275SEric Cheng return (exacct_commit_netinfo((void *)&nstat, EX_NET_FLSTAT_REC)); 41648275SEric Cheng } 41658275SEric Cheng 41668275SEric Cheng /* Write the Link Description to the log file */ 41678275SEric Cheng int 41688275SEric Cheng mac_write_link_desc(mac_client_impl_t *mcip) 41698275SEric Cheng { 41708275SEric Cheng net_desc_t ndesc; 41718275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 41728275SEric Cheng 41738275SEric Cheng bzero(&ndesc, sizeof (net_desc_t)); 41748275SEric Cheng 41758275SEric Cheng ndesc.nd_name = mcip->mci_name; 41768275SEric Cheng ndesc.nd_devname = mcip->mci_name; 41778275SEric Cheng ndesc.nd_isv4 = B_TRUE; 41788275SEric Cheng /* 41798275SEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 41808275SEric Cheng * Updates to the fe_flow_desc are done under the fe_lock 41818275SEric Cheng * after removing the flent from the flow table. 41828275SEric Cheng */ 41838275SEric Cheng mutex_enter(&flent->fe_lock); 41848275SEric Cheng bcopy(flent->fe_flow_desc.fd_src_mac, ndesc.nd_ehost, ETHERADDRL); 41858275SEric Cheng mutex_exit(&flent->fe_lock); 41868275SEric Cheng 41878275SEric Cheng return (exacct_commit_netinfo((void *)&ndesc, EX_NET_LNDESC_REC)); 41888275SEric Cheng } 41898275SEric Cheng 41908275SEric Cheng /* Write the Link statistics to the log file */ 41918275SEric Cheng int 41928275SEric Cheng mac_write_link_stats(mac_client_impl_t *mcip) 41938275SEric Cheng { 41948275SEric Cheng net_stat_t nstat; 41958275SEric Cheng 41968275SEric Cheng nstat.ns_name = mcip->mci_name; 41978275SEric Cheng nstat.ns_ibytes = mcip->mci_stat_ibytes; 41988275SEric Cheng nstat.ns_obytes = mcip->mci_stat_obytes; 41998275SEric Cheng nstat.ns_ipackets = mcip->mci_stat_ipackets; 42008275SEric Cheng nstat.ns_opackets = mcip->mci_stat_opackets; 42018275SEric Cheng nstat.ns_ierrors = mcip->mci_stat_ierrors; 42028275SEric Cheng nstat.ns_oerrors = mcip->mci_stat_oerrors; 42038275SEric Cheng 42048275SEric Cheng return (exacct_commit_netinfo((void *)&nstat, EX_NET_LNSTAT_REC)); 42058275SEric Cheng } 42068275SEric Cheng 42078275SEric Cheng /* 42088275SEric Cheng * For a given flow, if the descrition has not been logged before, do it now. 42098275SEric Cheng * If it is a VNIC, then we have collected information about it from the MAC 42108275SEric Cheng * table, so skip it. 42118275SEric Cheng */ 42128275SEric Cheng /*ARGSUSED*/ 42138275SEric Cheng static int 42148275SEric Cheng mac_log_flowinfo(flow_entry_t *flent, void *args) 42158275SEric Cheng { 42168275SEric Cheng mac_client_impl_t *mcip = flent->fe_mcip; 42178275SEric Cheng 42188275SEric Cheng if (mcip == NULL) 42198275SEric Cheng return (0); 42208275SEric Cheng 42218275SEric Cheng /* 42228275SEric Cheng * If the name starts with "vnic", and fe_user_generated is true (to 42238275SEric Cheng * exclude the mcast and active flow entries created implicitly for 42248275SEric Cheng * a vnic, it is a VNIC flow. i.e. vnic1 is a vnic flow, 42258275SEric Cheng * vnic/bge1/mcast1 is not and neither is vnic/bge1/active. 42268275SEric Cheng */ 42278275SEric Cheng if (strncasecmp(flent->fe_flow_name, "vnic", 4) == 0 && 42288275SEric Cheng (flent->fe_type & FLOW_USER) != 0) { 42298275SEric Cheng return (0); 42308275SEric Cheng } 42318275SEric Cheng 42328275SEric Cheng if (!flent->fe_desc_logged) { 42338275SEric Cheng /* 42348275SEric Cheng * We don't return error because we want to continu the 42358275SEric Cheng * walk in case this is the last walk which means we 42368275SEric Cheng * need to reset fe_desc_logged in all the flows. 42378275SEric Cheng */ 42388275SEric Cheng if (mac_write_flow_desc(flent, mcip) != 0) 42398275SEric Cheng return (0); 42408275SEric Cheng flent->fe_desc_logged = B_TRUE; 42418275SEric Cheng } 42428275SEric Cheng 42438275SEric Cheng /* 42448275SEric Cheng * Regardless of the error, we want to proceed in case we have to 42458275SEric Cheng * reset fe_desc_logged. 42468275SEric Cheng */ 42478275SEric Cheng (void) mac_write_flow_stats(flent); 42488275SEric Cheng 42498275SEric Cheng if (mcip != NULL && !(mcip->mci_state_flags & MCIS_DESC_LOGGED)) 42508275SEric Cheng flent->fe_desc_logged = B_FALSE; 42518275SEric Cheng 42528275SEric Cheng return (0); 42538275SEric Cheng } 42548275SEric Cheng 42558275SEric Cheng typedef struct i_mac_log_state_s { 42568275SEric Cheng boolean_t mi_last; 42578275SEric Cheng int mi_fenable; 42588275SEric Cheng int mi_lenable; 42598275SEric Cheng } i_mac_log_state_t; 42608275SEric Cheng 42618275SEric Cheng /* 42628275SEric Cheng * Walk the mac_impl_ts and log the description for each mac client of this mac, 42638275SEric Cheng * if it hasn't already been done. Additionally, log statistics for the link as 42648275SEric Cheng * well. Walk the flow table and log information for each flow as well. 42658275SEric Cheng * If it is the last walk (mci_last), then we turn off mci_desc_logged (and 42668275SEric Cheng * also fe_desc_logged, if flow logging is on) since we want to log the 42678275SEric Cheng * description if and when logging is restarted. 42688275SEric Cheng */ 42698275SEric Cheng /*ARGSUSED*/ 42708275SEric Cheng static uint_t 42718275SEric Cheng i_mac_log_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 42728275SEric Cheng { 42738275SEric Cheng mac_impl_t *mip = (mac_impl_t *)val; 42748275SEric Cheng i_mac_log_state_t *lstate = (i_mac_log_state_t *)arg; 42758275SEric Cheng int ret; 42768275SEric Cheng mac_client_impl_t *mcip; 42778275SEric Cheng 42788275SEric Cheng /* 42798275SEric Cheng * Only walk the client list for NIC and etherstub 42808275SEric Cheng */ 42818275SEric Cheng if ((mip->mi_state_flags & MIS_DISABLED) || 42828275SEric Cheng ((mip->mi_state_flags & MIS_IS_VNIC) && 42838275SEric Cheng (mac_get_lower_mac_handle((mac_handle_t)mip) != NULL))) 42848275SEric Cheng return (MH_WALK_CONTINUE); 42858275SEric Cheng 42868275SEric Cheng for (mcip = mip->mi_clients_list; mcip != NULL; 42878275SEric Cheng mcip = mcip->mci_client_next) { 42888275SEric Cheng if (!MCIP_DATAPATH_SETUP(mcip)) 42898275SEric Cheng continue; 42908275SEric Cheng if (lstate->mi_lenable) { 42918275SEric Cheng if (!(mcip->mci_state_flags & MCIS_DESC_LOGGED)) { 42928275SEric Cheng ret = mac_write_link_desc(mcip); 42938275SEric Cheng if (ret != 0) { 42948275SEric Cheng /* 42958275SEric Cheng * We can't terminate it if this is the last 42968275SEric Cheng * walk, else there might be some links with 42978275SEric Cheng * mi_desc_logged set to true, which means 42988275SEric Cheng * their description won't be logged the next 42998275SEric Cheng * time logging is started (similarly for the 43008275SEric Cheng * flows within such links). We can continue 43018275SEric Cheng * without walking the flow table (i.e. to 43028275SEric Cheng * set fe_desc_logged to false) because we 43038275SEric Cheng * won't have written any flow stuff for this 43048275SEric Cheng * link as we haven't logged the link itself. 43058275SEric Cheng */ 43068275SEric Cheng if (lstate->mi_last) 43078275SEric Cheng return (MH_WALK_CONTINUE); 43088275SEric Cheng else 43098275SEric Cheng return (MH_WALK_TERMINATE); 43108275SEric Cheng } 43118275SEric Cheng mcip->mci_state_flags |= MCIS_DESC_LOGGED; 43128275SEric Cheng } 43138275SEric Cheng } 43148275SEric Cheng 43158275SEric Cheng if (mac_write_link_stats(mcip) != 0 && !lstate->mi_last) 43168275SEric Cheng return (MH_WALK_TERMINATE); 43178275SEric Cheng 43188275SEric Cheng if (lstate->mi_last) 43198275SEric Cheng mcip->mci_state_flags &= ~MCIS_DESC_LOGGED; 43208275SEric Cheng 43218275SEric Cheng if (lstate->mi_fenable) { 43228275SEric Cheng if (mcip->mci_subflow_tab != NULL) { 43238275SEric Cheng (void) mac_flow_walk(mcip->mci_subflow_tab, 43248275SEric Cheng mac_log_flowinfo, mip); 43258275SEric Cheng } 43268275SEric Cheng } 43278275SEric Cheng } 43288275SEric Cheng return (MH_WALK_CONTINUE); 43298275SEric Cheng } 43308275SEric Cheng 43318275SEric Cheng /* 43328275SEric Cheng * The timer thread that runs every mac_logging_interval seconds and logs 43338275SEric Cheng * link and/or flow information. 43348275SEric Cheng */ 43358275SEric Cheng /* ARGSUSED */ 43368275SEric Cheng void 43378275SEric Cheng mac_log_linkinfo(void *arg) 43388275SEric Cheng { 43398275SEric Cheng i_mac_log_state_t lstate; 43408275SEric Cheng 43418275SEric Cheng rw_enter(&i_mac_impl_lock, RW_READER); 43428275SEric Cheng if (!mac_flow_log_enable && !mac_link_log_enable) { 43438275SEric Cheng rw_exit(&i_mac_impl_lock); 43448275SEric Cheng return; 43458275SEric Cheng } 43468275SEric Cheng lstate.mi_fenable = mac_flow_log_enable; 43478275SEric Cheng lstate.mi_lenable = mac_link_log_enable; 43488275SEric Cheng lstate.mi_last = B_FALSE; 43498275SEric Cheng rw_exit(&i_mac_impl_lock); 43508275SEric Cheng 43518275SEric Cheng mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate); 43528275SEric Cheng 43538275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 43548275SEric Cheng if (mac_flow_log_enable || mac_link_log_enable) { 43558275SEric Cheng mac_logging_timer = timeout(mac_log_linkinfo, NULL, 43568275SEric Cheng SEC_TO_TICK(mac_logging_interval)); 43578275SEric Cheng } 43588275SEric Cheng rw_exit(&i_mac_impl_lock); 43598275SEric Cheng } 43608275SEric Cheng 43618275SEric Cheng /* 43628275SEric Cheng * Start the logging timer. 43638275SEric Cheng */ 43648275SEric Cheng void 43658275SEric Cheng mac_start_logusage(mac_logtype_t type, uint_t interval) 43668275SEric Cheng { 43678275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 43688275SEric Cheng switch (type) { 43698275SEric Cheng case MAC_LOGTYPE_FLOW: 43708275SEric Cheng if (mac_flow_log_enable) { 43718275SEric Cheng rw_exit(&i_mac_impl_lock); 43728275SEric Cheng return; 43738275SEric Cheng } 43748275SEric Cheng mac_flow_log_enable = B_TRUE; 43758275SEric Cheng /* FALLTHRU */ 43768275SEric Cheng case MAC_LOGTYPE_LINK: 43778275SEric Cheng if (mac_link_log_enable) { 43788275SEric Cheng rw_exit(&i_mac_impl_lock); 43798275SEric Cheng return; 43808275SEric Cheng } 43818275SEric Cheng mac_link_log_enable = B_TRUE; 43828275SEric Cheng break; 43838275SEric Cheng default: 43848275SEric Cheng ASSERT(0); 43858275SEric Cheng } 43868275SEric Cheng mac_logging_interval = interval; 43878275SEric Cheng rw_exit(&i_mac_impl_lock); 43888275SEric Cheng mac_log_linkinfo(NULL); 43898275SEric Cheng } 43908275SEric Cheng 43918275SEric Cheng /* 43928275SEric Cheng * Stop the logging timer if both Link and Flow logging are turned off. 43938275SEric Cheng */ 43948275SEric Cheng void 43958275SEric Cheng mac_stop_logusage(mac_logtype_t type) 43968275SEric Cheng { 43978275SEric Cheng i_mac_log_state_t lstate; 43988275SEric Cheng 43998275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 44008275SEric Cheng lstate.mi_fenable = mac_flow_log_enable; 44018275SEric Cheng lstate.mi_lenable = mac_link_log_enable; 44028275SEric Cheng 44038275SEric Cheng /* Last walk */ 44048275SEric Cheng lstate.mi_last = B_TRUE; 44058275SEric Cheng 44068275SEric Cheng switch (type) { 44078275SEric Cheng case MAC_LOGTYPE_FLOW: 44088275SEric Cheng if (lstate.mi_fenable) { 44098275SEric Cheng ASSERT(mac_link_log_enable); 44108275SEric Cheng mac_flow_log_enable = B_FALSE; 44118275SEric Cheng mac_link_log_enable = B_FALSE; 44128275SEric Cheng break; 44138275SEric Cheng } 44148275SEric Cheng /* FALLTHRU */ 44158275SEric Cheng case MAC_LOGTYPE_LINK: 44168275SEric Cheng if (!lstate.mi_lenable || mac_flow_log_enable) { 44178275SEric Cheng rw_exit(&i_mac_impl_lock); 44188275SEric Cheng return; 44198275SEric Cheng } 44208275SEric Cheng mac_link_log_enable = B_FALSE; 44218275SEric Cheng break; 44228275SEric Cheng default: 44238275SEric Cheng ASSERT(0); 44248275SEric Cheng } 44258275SEric Cheng rw_exit(&i_mac_impl_lock); 44268275SEric Cheng (void) untimeout(mac_logging_timer); 44278275SEric Cheng mac_logging_timer = 0; 44288275SEric Cheng 44298275SEric Cheng /* Last walk */ 44308275SEric Cheng mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate); 44318275SEric Cheng } 44328275SEric Cheng 44338275SEric Cheng /* 44348275SEric Cheng * Walk the rx and tx SRS/SRs for a flow and update the priority value. 44358275SEric Cheng */ 44368275SEric Cheng void 44378275SEric Cheng mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) 44388275SEric Cheng { 44398275SEric Cheng pri_t pri; 44408275SEric Cheng int count; 44418275SEric Cheng mac_soft_ring_set_t *mac_srs; 44428275SEric Cheng 44438275SEric Cheng if (flent->fe_rx_srs_cnt <= 0) 44448275SEric Cheng return; 44458275SEric Cheng 44468275SEric Cheng if (((mac_soft_ring_set_t *)flent->fe_rx_srs[0])->srs_type == 44478275SEric Cheng SRST_FLOW) { 44488275SEric Cheng pri = FLOW_PRIORITY(mcip->mci_min_pri, 44498275SEric Cheng mcip->mci_max_pri, 44508275SEric Cheng flent->fe_resource_props.mrp_priority); 44518275SEric Cheng } else { 44528275SEric Cheng pri = mcip->mci_max_pri; 44538275SEric Cheng } 44548275SEric Cheng 44558275SEric Cheng for (count = 0; count < flent->fe_rx_srs_cnt; count++) { 44568275SEric Cheng mac_srs = flent->fe_rx_srs[count]; 44578275SEric Cheng mac_update_srs_priority(mac_srs, pri); 44588275SEric Cheng } 44598275SEric Cheng /* 44608275SEric Cheng * If we have a Tx SRS, we need to modify all the threads associated 44618275SEric Cheng * with it. 44628275SEric Cheng */ 44638275SEric Cheng if (flent->fe_tx_srs != NULL) 44648275SEric Cheng mac_update_srs_priority(flent->fe_tx_srs, pri); 44658275SEric Cheng } 44668275SEric Cheng 44678275SEric Cheng /* 44688275SEric Cheng * RX and TX rings are reserved according to different semantics depending 44698275SEric Cheng * on the requests from the MAC clients and type of rings: 44708275SEric Cheng * 44718275SEric Cheng * On the Tx side, by default we reserve individual rings, independently from 44728275SEric Cheng * the groups. 44738275SEric Cheng * 44748275SEric Cheng * On the Rx side, the reservation is at the granularity of the group 44758275SEric Cheng * of rings, and used for v12n level 1 only. It has a special case for the 44768275SEric Cheng * primary client. 44778275SEric Cheng * 44788275SEric Cheng * If a share is allocated to a MAC client, we allocate a TX group and an 44798275SEric Cheng * RX group to the client, and assign TX rings and RX rings to these 44808275SEric Cheng * groups according to information gathered from the driver through 44818275SEric Cheng * the share capability. 44828275SEric Cheng * 44838275SEric Cheng * The foreseable evolution of Rx rings will handle v12n level 2 and higher 44848275SEric Cheng * to allocate individual rings out of a group and program the hw classifier 44858275SEric Cheng * based on IP address or higher level criteria. 44868275SEric Cheng */ 44878275SEric Cheng 44888275SEric Cheng /* 44898275SEric Cheng * mac_reserve_tx_ring() 44908275SEric Cheng * Reserve a unused ring by marking it with MR_INUSE state. 44918275SEric Cheng * As reserved, the ring is ready to function. 44928275SEric Cheng * 44938275SEric Cheng * Notes for Hybrid I/O: 44948275SEric Cheng * 44958275SEric Cheng * If a specific ring is needed, it is specified through the desired_ring 44968275SEric Cheng * argument. Otherwise that argument is set to NULL. 44978275SEric Cheng * If the desired ring was previous allocated to another client, this 44988275SEric Cheng * function swaps it with a new ring from the group of unassigned rings. 44998275SEric Cheng */ 45008275SEric Cheng mac_ring_t * 45018275SEric Cheng mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) 45028275SEric Cheng { 45038275SEric Cheng mac_group_t *group; 45048275SEric Cheng mac_ring_t *ring; 45058275SEric Cheng 45068275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 45078275SEric Cheng 45088275SEric Cheng if (mip->mi_tx_groups == NULL) 45098275SEric Cheng return (NULL); 45108275SEric Cheng 45118275SEric Cheng /* 45128275SEric Cheng * Find an available ring and start it before changing its status. 45138275SEric Cheng * The unassigned rings are at the end of the mi_tx_groups 45148275SEric Cheng * array. 45158275SEric Cheng */ 45168275SEric Cheng group = mip->mi_tx_groups + mip->mi_tx_group_count; 45178275SEric Cheng 45188275SEric Cheng for (ring = group->mrg_rings; ring != NULL; 45198275SEric Cheng ring = ring->mr_next) { 45208275SEric Cheng if (desired_ring == NULL) { 45218275SEric Cheng if (ring->mr_state == MR_FREE) 45228275SEric Cheng /* wanted any free ring and found one */ 45238275SEric Cheng break; 45248275SEric Cheng } else { 45258275SEric Cheng mac_ring_t *sring; 45268275SEric Cheng mac_client_impl_t *client; 45278275SEric Cheng mac_soft_ring_set_t *srs; 45288275SEric Cheng 45298275SEric Cheng if (ring != desired_ring) 45308275SEric Cheng /* wants a desired ring but this one ain't it */ 45318275SEric Cheng continue; 45328275SEric Cheng 45338275SEric Cheng if (ring->mr_state == MR_FREE) 45348275SEric Cheng break; 45358275SEric Cheng 45368275SEric Cheng /* 45378275SEric Cheng * Found the desired ring but it's already in use. 45388275SEric Cheng * Swap it with a new ring. 45398275SEric Cheng */ 45408275SEric Cheng 45418275SEric Cheng /* find the client which owns that ring */ 45428275SEric Cheng for (client = mip->mi_clients_list; client != NULL; 45438275SEric Cheng client = client->mci_client_next) { 45448275SEric Cheng srs = MCIP_TX_SRS(client); 45458275SEric Cheng if (srs != NULL && mac_tx_srs_ring_present(srs, 45468275SEric Cheng desired_ring)) { 45478275SEric Cheng /* found our ring */ 45488275SEric Cheng break; 45498275SEric Cheng } 45508275SEric Cheng } 4551*8400SNicolas.Droux@Sun.COM if (client == NULL) { 4552*8400SNicolas.Droux@Sun.COM /* 4553*8400SNicolas.Droux@Sun.COM * The TX ring is in use, but it's not 4554*8400SNicolas.Droux@Sun.COM * associated with any clients, so it 4555*8400SNicolas.Droux@Sun.COM * has to be the default ring. In that 4556*8400SNicolas.Droux@Sun.COM * case we can simply assign a new ring 4557*8400SNicolas.Droux@Sun.COM * as the default ring, and we're done. 4558*8400SNicolas.Droux@Sun.COM */ 4559*8400SNicolas.Droux@Sun.COM ASSERT(mip->mi_default_tx_ring == 4560*8400SNicolas.Droux@Sun.COM (mac_ring_handle_t)desired_ring); 4561*8400SNicolas.Droux@Sun.COM 4562*8400SNicolas.Droux@Sun.COM /* 4563*8400SNicolas.Droux@Sun.COM * Quiesce all clients on top of 4564*8400SNicolas.Droux@Sun.COM * the NIC to make sure there are no 4565*8400SNicolas.Droux@Sun.COM * pending threads still relying on 4566*8400SNicolas.Droux@Sun.COM * that default ring, for example 4567*8400SNicolas.Droux@Sun.COM * the multicast path. 4568*8400SNicolas.Droux@Sun.COM */ 4569*8400SNicolas.Droux@Sun.COM for (client = mip->mi_clients_list; 4570*8400SNicolas.Droux@Sun.COM client != NULL; 4571*8400SNicolas.Droux@Sun.COM client = client->mci_client_next) { 4572*8400SNicolas.Droux@Sun.COM mac_tx_client_quiesce(client, 4573*8400SNicolas.Droux@Sun.COM SRS_QUIESCE); 4574*8400SNicolas.Droux@Sun.COM } 4575*8400SNicolas.Droux@Sun.COM 4576*8400SNicolas.Droux@Sun.COM mip->mi_default_tx_ring = (mac_ring_handle_t) 4577*8400SNicolas.Droux@Sun.COM mac_reserve_tx_ring(mip, NULL); 4578*8400SNicolas.Droux@Sun.COM 4579*8400SNicolas.Droux@Sun.COM /* resume the clients */ 4580*8400SNicolas.Droux@Sun.COM for (client = mip->mi_clients_list; 4581*8400SNicolas.Droux@Sun.COM client != NULL; 4582*8400SNicolas.Droux@Sun.COM client = client->mci_client_next) 4583*8400SNicolas.Droux@Sun.COM mac_tx_client_restart(client); 4584*8400SNicolas.Droux@Sun.COM 4585*8400SNicolas.Droux@Sun.COM break; 4586*8400SNicolas.Droux@Sun.COM } 45878275SEric Cheng 45888275SEric Cheng /* 45898275SEric Cheng * Note that we cannot simply invoke the group 45908275SEric Cheng * add/rem routines since the client doesn't have a 45918275SEric Cheng * TX group. So we need to instead add/remove 45928275SEric Cheng * the rings from the SRS. 45938275SEric Cheng */ 45948275SEric Cheng ASSERT(client->mci_share == NULL); 45958275SEric Cheng 45968275SEric Cheng /* first quiece the client */ 45978275SEric Cheng mac_tx_client_quiesce(client, SRS_QUIESCE); 45988275SEric Cheng 45998275SEric Cheng /* give a new ring to the client... */ 46008275SEric Cheng sring = mac_reserve_tx_ring(mip, NULL); 46018275SEric Cheng if (sring != NULL) { 46028275SEric Cheng /* 46038275SEric Cheng * There are no other available ring 46048275SEric Cheng * on that MAC instance. The client 46058275SEric Cheng * will fallback to the shared TX 46068275SEric Cheng * ring. 46078275SEric Cheng */ 46088275SEric Cheng mac_tx_srs_add_ring(srs, sring); 46098275SEric Cheng } 46108275SEric Cheng 46118275SEric Cheng /* ... in exchange for our desired ring */ 46128275SEric Cheng mac_tx_srs_del_ring(srs, desired_ring); 46138275SEric Cheng 46148275SEric Cheng /* restart the client */ 46158275SEric Cheng mac_tx_client_restart(client); 46168275SEric Cheng 4617*8400SNicolas.Droux@Sun.COM if (mip->mi_default_tx_ring == 4618*8400SNicolas.Droux@Sun.COM (mac_ring_handle_t)desired_ring) { 4619*8400SNicolas.Droux@Sun.COM /* 4620*8400SNicolas.Droux@Sun.COM * The desired ring is the default ring, 4621*8400SNicolas.Droux@Sun.COM * and there are one or more clients 4622*8400SNicolas.Droux@Sun.COM * using that default ring directly. 4623*8400SNicolas.Droux@Sun.COM */ 4624*8400SNicolas.Droux@Sun.COM mip->mi_default_tx_ring = 4625*8400SNicolas.Droux@Sun.COM (mac_ring_handle_t)sring; 4626*8400SNicolas.Droux@Sun.COM /* 4627*8400SNicolas.Droux@Sun.COM * Find clients using default ring and 4628*8400SNicolas.Droux@Sun.COM * swap it with the new default ring. 4629*8400SNicolas.Droux@Sun.COM */ 4630*8400SNicolas.Droux@Sun.COM for (client = mip->mi_clients_list; 4631*8400SNicolas.Droux@Sun.COM client != NULL; 4632*8400SNicolas.Droux@Sun.COM client = client->mci_client_next) { 4633*8400SNicolas.Droux@Sun.COM srs = MCIP_TX_SRS(client); 4634*8400SNicolas.Droux@Sun.COM if (srs != NULL && 4635*8400SNicolas.Droux@Sun.COM mac_tx_srs_ring_present(srs, 4636*8400SNicolas.Droux@Sun.COM desired_ring)) { 4637*8400SNicolas.Droux@Sun.COM /* first quiece the client */ 4638*8400SNicolas.Droux@Sun.COM mac_tx_client_quiesce(client, 4639*8400SNicolas.Droux@Sun.COM SRS_QUIESCE); 4640*8400SNicolas.Droux@Sun.COM 4641*8400SNicolas.Droux@Sun.COM /* 4642*8400SNicolas.Droux@Sun.COM * Give it the new default 4643*8400SNicolas.Droux@Sun.COM * ring, and remove the old 4644*8400SNicolas.Droux@Sun.COM * one. 4645*8400SNicolas.Droux@Sun.COM */ 4646*8400SNicolas.Droux@Sun.COM if (sring != NULL) { 4647*8400SNicolas.Droux@Sun.COM mac_tx_srs_add_ring(srs, 4648*8400SNicolas.Droux@Sun.COM sring); 4649*8400SNicolas.Droux@Sun.COM } 4650*8400SNicolas.Droux@Sun.COM mac_tx_srs_del_ring(srs, 4651*8400SNicolas.Droux@Sun.COM desired_ring); 4652*8400SNicolas.Droux@Sun.COM 4653*8400SNicolas.Droux@Sun.COM /* restart the client */ 4654*8400SNicolas.Droux@Sun.COM mac_tx_client_restart(client); 4655*8400SNicolas.Droux@Sun.COM } 4656*8400SNicolas.Droux@Sun.COM } 4657*8400SNicolas.Droux@Sun.COM } 46588275SEric Cheng break; 46598275SEric Cheng } 46608275SEric Cheng } 46618275SEric Cheng 46628275SEric Cheng if (ring != NULL) { 46638275SEric Cheng if (mac_start_ring(ring) != 0) 46648275SEric Cheng return (NULL); 46658275SEric Cheng ring->mr_state = MR_INUSE; 46668275SEric Cheng } 46678275SEric Cheng 46688275SEric Cheng return (ring); 46698275SEric Cheng } 46708275SEric Cheng 46718275SEric Cheng /* 46728275SEric Cheng * Minimum number of rings to leave in the default TX group when allocating 46738275SEric Cheng * rings to new clients. 46748275SEric Cheng */ 46758275SEric Cheng static uint_t mac_min_rx_default_rings = 1; 46768275SEric Cheng 46778275SEric Cheng /* 46788275SEric Cheng * Populate a zero-ring group with rings. If the share is non-NULL, 46798275SEric Cheng * the rings are chosen according to that share. 46808275SEric Cheng * Invoked after allocating a new RX or TX group through 46818275SEric Cheng * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively. 46828275SEric Cheng * Returns zero on success, an errno otherwise. 46838275SEric Cheng */ 46848275SEric Cheng int 46858275SEric Cheng i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, 46868275SEric Cheng mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share) 46878275SEric Cheng { 46888275SEric Cheng mac_ring_t **rings, *tmp_ring[1], *ring; 46898275SEric Cheng uint_t nrings; 46908275SEric Cheng int rv, i, j; 46918275SEric Cheng 46928275SEric Cheng ASSERT(mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC && 46938275SEric Cheng mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); 46948275SEric Cheng ASSERT(new_group->mrg_cur_count == 0); 46958275SEric Cheng 46968275SEric Cheng /* 46978275SEric Cheng * First find the rings to allocate to the group. 46988275SEric Cheng */ 46998275SEric Cheng if (share != NULL) { 47008275SEric Cheng /* get rings through ms_squery() */ 47018275SEric Cheng mip->mi_share_capab.ms_squery(share, ring_type, NULL, &nrings); 47028275SEric Cheng ASSERT(nrings != 0); 47038275SEric Cheng rings = kmem_alloc(nrings * sizeof (mac_ring_handle_t), 47048275SEric Cheng KM_SLEEP); 47058275SEric Cheng mip->mi_share_capab.ms_squery(share, ring_type, 47068275SEric Cheng (mac_ring_handle_t *)rings, &nrings); 47078275SEric Cheng } else { 47088275SEric Cheng /* this function is called for TX only with a share */ 47098275SEric Cheng ASSERT(ring_type == MAC_RING_TYPE_RX); 47108275SEric Cheng /* 47118275SEric Cheng * Pick one ring from default group. 47128275SEric Cheng * 47138275SEric Cheng * for now pick the second ring which requires the first ring 47148275SEric Cheng * at index 0 to stay in the default group, since it is the 47158275SEric Cheng * ring which carries the multicast traffic. 47168275SEric Cheng * We need a better way for a driver to indicate this, 47178275SEric Cheng * for example a per-ring flag. 47188275SEric Cheng */ 47198275SEric Cheng for (ring = src_group->mrg_rings; ring != NULL; 47208275SEric Cheng ring = ring->mr_next) { 47218275SEric Cheng if (ring->mr_index != 0) 47228275SEric Cheng break; 47238275SEric Cheng } 47248275SEric Cheng ASSERT(ring != NULL); 47258275SEric Cheng nrings = 1; 47268275SEric Cheng tmp_ring[0] = ring; 47278275SEric Cheng rings = tmp_ring; 47288275SEric Cheng } 47298275SEric Cheng 47308275SEric Cheng switch (ring_type) { 47318275SEric Cheng case MAC_RING_TYPE_RX: 47328275SEric Cheng if (src_group->mrg_cur_count - nrings < 47338275SEric Cheng mac_min_rx_default_rings) { 47348275SEric Cheng /* we ran out of rings */ 47358275SEric Cheng return (ENOSPC); 47368275SEric Cheng } 47378275SEric Cheng 47388275SEric Cheng /* move receive rings to new group */ 47398275SEric Cheng for (i = 0; i < nrings; i++) { 47408275SEric Cheng rv = mac_group_mov_ring(mip, new_group, rings[i]); 47418275SEric Cheng if (rv != 0) { 47428275SEric Cheng /* move rings back on failure */ 47438275SEric Cheng for (j = 0; j < i; j++) { 47448275SEric Cheng (void) mac_group_mov_ring(mip, 47458275SEric Cheng src_group, rings[j]); 47468275SEric Cheng } 47478275SEric Cheng return (rv); 47488275SEric Cheng } 47498275SEric Cheng } 47508275SEric Cheng break; 47518275SEric Cheng 47528275SEric Cheng case MAC_RING_TYPE_TX: { 47538275SEric Cheng mac_ring_t *tmp_ring; 47548275SEric Cheng 47558275SEric Cheng /* move the TX rings to the new group */ 47568275SEric Cheng ASSERT(src_group == NULL); 47578275SEric Cheng for (i = 0; i < nrings; i++) { 47588275SEric Cheng /* get the desired ring */ 47598275SEric Cheng tmp_ring = mac_reserve_tx_ring(mip, rings[i]); 47608275SEric Cheng ASSERT(tmp_ring == rings[i]); 47618275SEric Cheng rv = mac_group_mov_ring(mip, new_group, rings[i]); 47628275SEric Cheng if (rv != 0) { 47638275SEric Cheng /* cleanup on failure */ 47648275SEric Cheng for (j = 0; j < i; j++) { 47658275SEric Cheng (void) mac_group_mov_ring(mip, 47668275SEric Cheng mip->mi_tx_groups + 47678275SEric Cheng mip->mi_tx_group_count, rings[j]); 47688275SEric Cheng } 47698275SEric Cheng } 47708275SEric Cheng } 47718275SEric Cheng break; 47728275SEric Cheng } 47738275SEric Cheng } 47748275SEric Cheng 47758275SEric Cheng if (share != NULL) { 47768275SEric Cheng /* add group to share */ 47778275SEric Cheng mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); 47788275SEric Cheng /* free temporary array of rings */ 47798275SEric Cheng kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); 47808275SEric Cheng } 47818275SEric Cheng 47828275SEric Cheng return (0); 47838275SEric Cheng } 47848275SEric Cheng 47858275SEric Cheng void 47868275SEric Cheng mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) 47878275SEric Cheng { 47888275SEric Cheng mac_grp_client_t *mgcp; 47898275SEric Cheng 47908275SEric Cheng for (mgcp = grp->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 47918275SEric Cheng if (mgcp->mgc_client == mcip) 47928275SEric Cheng break; 47938275SEric Cheng } 47948275SEric Cheng 47958275SEric Cheng VERIFY(mgcp == NULL); 47968275SEric Cheng 47978275SEric Cheng mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); 47988275SEric Cheng mgcp->mgc_client = mcip; 47998275SEric Cheng mgcp->mgc_next = grp->mrg_clients; 48008275SEric Cheng grp->mrg_clients = mgcp; 48018275SEric Cheng 48028275SEric Cheng } 48038275SEric Cheng 48048275SEric Cheng void 48058275SEric Cheng mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) 48068275SEric Cheng { 48078275SEric Cheng mac_grp_client_t *mgcp, **pprev; 48088275SEric Cheng 48098275SEric Cheng for (pprev = &grp->mrg_clients, mgcp = *pprev; mgcp != NULL; 48108275SEric Cheng pprev = &mgcp->mgc_next, mgcp = *pprev) { 48118275SEric Cheng if (mgcp->mgc_client == mcip) 48128275SEric Cheng break; 48138275SEric Cheng } 48148275SEric Cheng 48158275SEric Cheng ASSERT(mgcp != NULL); 48168275SEric Cheng 48178275SEric Cheng *pprev = mgcp->mgc_next; 48188275SEric Cheng kmem_free(mgcp, sizeof (mac_grp_client_t)); 48198275SEric Cheng } 48208275SEric Cheng 48218275SEric Cheng /* 48228275SEric Cheng * mac_reserve_rx_group() 48238275SEric Cheng * 48248275SEric Cheng * Finds an available group and exclusively reserves it for a client. 48258275SEric Cheng * The group is chosen to suit the flow's resource controls (bandwidth and 48268275SEric Cheng * fanout requirements) and the address type. 48278275SEric Cheng * If the requestor is the pimary MAC then return the group with the 48288275SEric Cheng * largest number of rings, otherwise the default ring when available. 48298275SEric Cheng */ 48308275SEric Cheng mac_group_t * 48318275SEric Cheng mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, 48328275SEric Cheng mac_rx_group_reserve_type_t rtype) 48338275SEric Cheng { 48348275SEric Cheng mac_share_handle_t share = mcip->mci_share; 48358275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 48368275SEric Cheng mac_group_t *grp = NULL; 48378275SEric Cheng int i, start, loopcount; 48388275SEric Cheng int err; 48398275SEric Cheng mac_address_t *map; 48408275SEric Cheng 48418275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 48428275SEric Cheng 48438275SEric Cheng /* Check if a group already has this mac address (case of VLANs) */ 48448275SEric Cheng if ((map = mac_find_macaddr(mip, mac_addr)) != NULL) 48458275SEric Cheng return (map->ma_group); 48468275SEric Cheng 48478275SEric Cheng if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0 || 48488275SEric Cheng rtype == MAC_RX_NO_RESERVE) 48498275SEric Cheng return (NULL); 48508275SEric Cheng 48518275SEric Cheng /* 48528275SEric Cheng * Try to exclusively reserve a RX group. 48538275SEric Cheng * 48548275SEric Cheng * For flows requires SW_RING it always goes to the default group 48558275SEric Cheng * (Until we can explicitely call out default groups (CR 6695600), 48568275SEric Cheng * we assume that the default group is always at position zero); 48578275SEric Cheng * 48588275SEric Cheng * For flows requires HW_DEFAULT_RING (unicast flow of the primary 48598275SEric Cheng * client), try to reserve the default RX group only. 48608275SEric Cheng * 48618275SEric Cheng * For flows requires HW_RING (unicast flow of other clients), try 48628275SEric Cheng * to reserve non-default RX group then the default group. 48638275SEric Cheng */ 48648275SEric Cheng switch (rtype) { 48658275SEric Cheng case MAC_RX_RESERVE_DEFAULT: 48668275SEric Cheng start = 0; 48678275SEric Cheng loopcount = 1; 48688275SEric Cheng break; 48698275SEric Cheng case MAC_RX_RESERVE_NONDEFAULT: 48708275SEric Cheng start = 1; 48718275SEric Cheng loopcount = mip->mi_rx_group_count; 48728275SEric Cheng } 48738275SEric Cheng 48748275SEric Cheng for (i = start; i < start + loopcount; i++) { 48758275SEric Cheng grp = &mip->mi_rx_groups[i % mip->mi_rx_group_count]; 48768275SEric Cheng 48778275SEric Cheng DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, 48788275SEric Cheng int, grp->mrg_index, mac_group_state_t, grp->mrg_state); 48798275SEric Cheng 48808275SEric Cheng /* 48818275SEric Cheng * Check to see whether this mac client is the only client 48828275SEric Cheng * on this RX group. If not, we cannot exclusively reserve 48838275SEric Cheng * this RX group. 48848275SEric Cheng */ 48858275SEric Cheng if (!MAC_RX_GROUP_NO_CLIENT(grp) && 48868275SEric Cheng (MAC_RX_GROUP_ONLY_CLIENT(grp) != mcip)) { 48878275SEric Cheng continue; 48888275SEric Cheng } 48898275SEric Cheng 48908275SEric Cheng /* 48918275SEric Cheng * This group could already be SHARED by other multicast 48928275SEric Cheng * flows on this client. In that case, the group would 48938275SEric Cheng * be shared and has already been started. 48948275SEric Cheng */ 48958275SEric Cheng ASSERT(grp->mrg_state != MAC_GROUP_STATE_UNINIT); 48968275SEric Cheng 48978275SEric Cheng if ((grp->mrg_state == MAC_GROUP_STATE_REGISTERED) && 48988275SEric Cheng (mac_start_group(grp) != 0)) { 48998275SEric Cheng continue; 49008275SEric Cheng } 49018275SEric Cheng 49028275SEric Cheng if ((i % mip->mi_rx_group_count) == 0 || 49038275SEric Cheng mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) { 49048275SEric Cheng break; 49058275SEric Cheng } 49068275SEric Cheng 49078275SEric Cheng ASSERT(grp->mrg_cur_count == 0); 49088275SEric Cheng 49098275SEric Cheng /* 49108275SEric Cheng * Populate the group. Rings should be taken 49118275SEric Cheng * from the default group at position 0 for now. 49128275SEric Cheng */ 49138275SEric Cheng 49148275SEric Cheng err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 49158275SEric Cheng &mip->mi_rx_groups[0], grp, share); 49168275SEric Cheng if (err == 0) 49178275SEric Cheng break; 49188275SEric Cheng 49198275SEric Cheng DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 49208275SEric Cheng mip->mi_name, int, grp->mrg_index, int, err); 49218275SEric Cheng 49228275SEric Cheng /* 49238275SEric Cheng * It's a dynamic group but the grouping operation failed. 49248275SEric Cheng */ 49258275SEric Cheng mac_stop_group(grp); 49268275SEric Cheng } 49278275SEric Cheng 49288275SEric Cheng if (i == start + loopcount) 49298275SEric Cheng return (NULL); 49308275SEric Cheng 49318275SEric Cheng ASSERT(grp != NULL); 49328275SEric Cheng 49338275SEric Cheng DTRACE_PROBE2(rx__group__reserved, 49348275SEric Cheng char *, mip->mi_name, int, grp->mrg_index); 49358275SEric Cheng return (grp); 49368275SEric Cheng } 49378275SEric Cheng 49388275SEric Cheng /* 49398275SEric Cheng * mac_rx_release_group() 49408275SEric Cheng * 49418275SEric Cheng * This is called when there are no clients left for the group. 49428275SEric Cheng * The group is stopped and marked MAC_GROUP_STATE_REGISTERED, 49438275SEric Cheng * and if it is a non default group, the shares are removed and 49448275SEric Cheng * all rings are assigned back to default group. 49458275SEric Cheng */ 49468275SEric Cheng void 49478275SEric Cheng mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) 49488275SEric Cheng { 49498275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 49508275SEric Cheng mac_ring_t *ring; 49518275SEric Cheng 49528275SEric Cheng ASSERT(group != &mip->mi_rx_groups[0]); 49538275SEric Cheng 49548275SEric Cheng /* 49558275SEric Cheng * This is the case where there are no clients left. Any 49568275SEric Cheng * SRS etc on this group have also be quiesced. 49578275SEric Cheng */ 49588275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 49598275SEric Cheng if (ring->mr_classify_type == MAC_HW_CLASSIFIER) { 49608275SEric Cheng ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 49618275SEric Cheng /* 49628275SEric Cheng * Remove the SRS associated with the HW ring. 49638275SEric Cheng * As a result, polling will be disabled. 49648275SEric Cheng */ 49658275SEric Cheng ring->mr_srs = NULL; 49668275SEric Cheng } 49678275SEric Cheng ASSERT(ring->mr_state == MR_INUSE); 49688275SEric Cheng mac_stop_ring(ring); 49698275SEric Cheng ring->mr_state = MR_FREE; 49708275SEric Cheng ring->mr_flag = 0; 49718275SEric Cheng } 49728275SEric Cheng 49738275SEric Cheng /* remove group from share */ 49748275SEric Cheng if (mcip->mci_share != NULL) { 49758275SEric Cheng mip->mi_share_capab.ms_sremove(mcip->mci_share, 49768275SEric Cheng group->mrg_driver); 49778275SEric Cheng } 49788275SEric Cheng 49798275SEric Cheng if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 49808275SEric Cheng mac_ring_t *ring; 49818275SEric Cheng 49828275SEric Cheng /* 49838275SEric Cheng * Rings were dynamically allocated to group. 49848275SEric Cheng * Move rings back to default group. 49858275SEric Cheng */ 49868275SEric Cheng while ((ring = group->mrg_rings) != NULL) { 49878275SEric Cheng (void) mac_group_mov_ring(mip, 49888275SEric Cheng &mip->mi_rx_groups[0], ring); 49898275SEric Cheng } 49908275SEric Cheng } 49918275SEric Cheng mac_stop_group(group); 49928275SEric Cheng /* 49938275SEric Cheng * Possible improvement: See if we can assign the group just released 49948275SEric Cheng * to a another client of the mip 49958275SEric Cheng */ 49968275SEric Cheng } 49978275SEric Cheng 49988275SEric Cheng /* 49998275SEric Cheng * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() 50008275SEric Cheng * when a share was allocated to the client. 50018275SEric Cheng */ 50028275SEric Cheng mac_group_t * 50038275SEric Cheng mac_reserve_tx_group(mac_impl_t *mip, mac_share_handle_t share) 50048275SEric Cheng { 50058275SEric Cheng mac_group_t *grp; 50068275SEric Cheng int rv, i; 50078275SEric Cheng 50088275SEric Cheng /* 50098275SEric Cheng * TX groups are currently allocated only to MAC clients 50108275SEric Cheng * which are associated with a share. Since we have a fixed 50118275SEric Cheng * number of share and groups, and we already successfully 50128275SEric Cheng * allocated a share, find an available TX group. 50138275SEric Cheng */ 50148275SEric Cheng ASSERT(share != NULL); 50158275SEric Cheng ASSERT(mip->mi_tx_group_free > 0); 50168275SEric Cheng 50178275SEric Cheng for (i = 0; i < mip->mi_tx_group_count; i++) { 50188275SEric Cheng grp = &mip->mi_tx_groups[i]; 50198275SEric Cheng 50208275SEric Cheng if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || 50218275SEric Cheng (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) 50228275SEric Cheng continue; 50238275SEric Cheng 50248275SEric Cheng rv = mac_start_group(grp); 50258275SEric Cheng ASSERT(rv == 0); 50268275SEric Cheng 50278275SEric Cheng grp->mrg_state = MAC_GROUP_STATE_RESERVED; 50288275SEric Cheng break; 50298275SEric Cheng } 50308275SEric Cheng 50318275SEric Cheng ASSERT(grp != NULL); 50328275SEric Cheng 50338275SEric Cheng /* 50348275SEric Cheng * Populate the group. Rings should be taken from the group 50358275SEric Cheng * of unassigned rings, which is past the array of TX 50368275SEric Cheng * groups adversized by the driver. 50378275SEric Cheng */ 50388275SEric Cheng rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, NULL, 50398275SEric Cheng grp, share); 50408275SEric Cheng if (rv != 0) { 50418275SEric Cheng DTRACE_PROBE3(tx__group__reserve__alloc__rings, 50428275SEric Cheng char *, mip->mi_name, int, grp->mrg_index, int, rv); 50438275SEric Cheng 50448275SEric Cheng mac_stop_group(grp); 50458275SEric Cheng grp->mrg_state = MAC_GROUP_STATE_UNINIT; 50468275SEric Cheng 50478275SEric Cheng return (NULL); 50488275SEric Cheng } 50498275SEric Cheng 50508275SEric Cheng mip->mi_tx_group_free--; 50518275SEric Cheng 50528275SEric Cheng return (grp); 50538275SEric Cheng } 50548275SEric Cheng 50558275SEric Cheng void 50568275SEric Cheng mac_release_tx_group(mac_impl_t *mip, mac_group_t *grp) 50578275SEric Cheng { 50588275SEric Cheng mac_client_impl_t *mcip = grp->mrg_tx_client; 50598275SEric Cheng mac_share_handle_t share = mcip->mci_share; 50608275SEric Cheng mac_ring_t *ring; 50618275SEric Cheng 50628275SEric Cheng ASSERT(mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); 50638275SEric Cheng ASSERT(share != NULL); 50648275SEric Cheng ASSERT(grp->mrg_state == MAC_GROUP_STATE_RESERVED); 50658275SEric Cheng 50668275SEric Cheng mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); 50678275SEric Cheng while ((ring = grp->mrg_rings) != NULL) { 50688275SEric Cheng /* move the ring back to the pool */ 50698275SEric Cheng (void) mac_group_mov_ring(mip, mip->mi_tx_groups + 50708275SEric Cheng mip->mi_tx_group_count, ring); 50718275SEric Cheng } 50728275SEric Cheng mac_stop_group(grp); 50738275SEric Cheng mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); 50748275SEric Cheng grp->mrg_tx_client = NULL; 50758275SEric Cheng mip->mi_tx_group_free++; 50768275SEric Cheng } 50778275SEric Cheng 50788275SEric Cheng /* 50798275SEric Cheng * This is a 1-time control path activity initiated by the client (IP). 50808275SEric Cheng * The mac perimeter protects against other simultaneous control activities, 50818275SEric Cheng * for example an ioctl that attempts to change the degree of fanout and 50828275SEric Cheng * increase or decrease the number of softrings associated with this Tx SRS. 50838275SEric Cheng */ 50848275SEric Cheng static mac_tx_notify_cb_t * 50858275SEric Cheng mac_client_tx_notify_add(mac_client_impl_t *mcip, 50868275SEric Cheng mac_tx_notify_t notify, void *arg) 50878275SEric Cheng { 50888275SEric Cheng mac_cb_info_t *mcbi; 50898275SEric Cheng mac_tx_notify_cb_t *mtnfp; 50908275SEric Cheng 50918275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 50928275SEric Cheng 50938275SEric Cheng mtnfp = kmem_zalloc(sizeof (mac_tx_notify_cb_t), KM_SLEEP); 50948275SEric Cheng mtnfp->mtnf_fn = notify; 50958275SEric Cheng mtnfp->mtnf_arg = arg; 50968275SEric Cheng mtnfp->mtnf_link.mcb_objp = mtnfp; 50978275SEric Cheng mtnfp->mtnf_link.mcb_objsize = sizeof (mac_tx_notify_cb_t); 50988275SEric Cheng mtnfp->mtnf_link.mcb_flags = MCB_TX_NOTIFY_CB_T; 50998275SEric Cheng 51008275SEric Cheng mcbi = &mcip->mci_tx_notify_cb_info; 51018275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 51028275SEric Cheng mac_callback_add(mcbi, &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link); 51038275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 51048275SEric Cheng return (mtnfp); 51058275SEric Cheng } 51068275SEric Cheng 51078275SEric Cheng static void 51088275SEric Cheng mac_client_tx_notify_remove(mac_client_impl_t *mcip, mac_tx_notify_cb_t *mtnfp) 51098275SEric Cheng { 51108275SEric Cheng mac_cb_info_t *mcbi; 51118275SEric Cheng mac_cb_t **cblist; 51128275SEric Cheng 51138275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 51148275SEric Cheng 51158275SEric Cheng if (!mac_callback_find(&mcip->mci_tx_notify_cb_info, 51168275SEric Cheng &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link)) { 51178275SEric Cheng cmn_err(CE_WARN, 51188275SEric Cheng "mac_client_tx_notify_remove: callback not " 51198275SEric Cheng "found, mcip 0x%p mtnfp 0x%p", (void *)mcip, (void *)mtnfp); 51208275SEric Cheng return; 51218275SEric Cheng } 51228275SEric Cheng 51238275SEric Cheng mcbi = &mcip->mci_tx_notify_cb_info; 51248275SEric Cheng cblist = &mcip->mci_tx_notify_cb_list; 51258275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 51268275SEric Cheng if (mac_callback_remove(mcbi, cblist, &mtnfp->mtnf_link)) 51278275SEric Cheng kmem_free(mtnfp, sizeof (mac_tx_notify_cb_t)); 51288275SEric Cheng else 51298275SEric Cheng mac_callback_remove_wait(&mcip->mci_tx_notify_cb_info); 51308275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 51318275SEric Cheng } 51328275SEric Cheng 51338275SEric Cheng /* 51348275SEric Cheng * mac_client_tx_notify(): 51358275SEric Cheng * call to add and remove flow control callback routine. 51368275SEric Cheng */ 51378275SEric Cheng mac_tx_notify_handle_t 51388275SEric Cheng mac_client_tx_notify(mac_client_handle_t mch, mac_tx_notify_t callb_func, 51398275SEric Cheng void *ptr) 51408275SEric Cheng { 51418275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 51428275SEric Cheng mac_tx_notify_cb_t *mtnfp = NULL; 51438275SEric Cheng 51448275SEric Cheng i_mac_perim_enter(mcip->mci_mip); 51458275SEric Cheng 51468275SEric Cheng if (callb_func != NULL) { 51478275SEric Cheng /* Add a notify callback */ 51488275SEric Cheng mtnfp = mac_client_tx_notify_add(mcip, callb_func, ptr); 51498275SEric Cheng } else { 51508275SEric Cheng mac_client_tx_notify_remove(mcip, (mac_tx_notify_cb_t *)ptr); 51518275SEric Cheng } 51528275SEric Cheng i_mac_perim_exit(mcip->mci_mip); 51538275SEric Cheng 51548275SEric Cheng return ((mac_tx_notify_handle_t)mtnfp); 51558275SEric Cheng } 5156