10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51852Syz147064 * Common Development and Distribution License (the "License"). 61852Syz147064 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 215084Sjohnlev 220Sstevel@tonic-gate /* 238603SGirish.Moodalbail@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* 280Sstevel@tonic-gate * MAC Services Module 298275SEric Cheng * 308275SEric Cheng * The GLDv3 framework locking - The MAC layer 318275SEric Cheng * -------------------------------------------- 328275SEric Cheng * 338275SEric Cheng * The MAC layer is central to the GLD framework and can provide the locking 348275SEric Cheng * framework needed for itself and for the use of MAC clients. MAC end points 358275SEric Cheng * are fairly disjoint and don't share a lot of state. So a coarse grained 368275SEric Cheng * multi-threading scheme is to single thread all create/modify/delete or set 378275SEric Cheng * type of control operations on a per mac end point while allowing data threads 388275SEric Cheng * concurrently. 398275SEric Cheng * 408275SEric Cheng * Control operations (set) that modify a mac end point are always serialized on 418275SEric Cheng * a per mac end point basis, We have at most 1 such thread per mac end point 428275SEric Cheng * at a time. 438275SEric Cheng * 448275SEric Cheng * All other operations that are not serialized are essentially multi-threaded. 458275SEric Cheng * For example a control operation (get) like getting statistics which may not 468275SEric Cheng * care about reading values atomically or data threads sending or receiving 478275SEric Cheng * data. Mostly these type of operations don't modify the control state. Any 488275SEric Cheng * state these operations care about are protected using traditional locks. 498275SEric Cheng * 508275SEric Cheng * The perimeter only serializes serial operations. It does not imply there 518275SEric Cheng * aren't any other concurrent operations. However a serialized operation may 528275SEric Cheng * sometimes need to make sure it is the only thread. In this case it needs 538275SEric Cheng * to use reference counting mechanisms to cv_wait until any current data 548275SEric Cheng * threads are done. 558275SEric Cheng * 568275SEric Cheng * The mac layer itself does not hold any locks across a call to another layer. 578275SEric Cheng * The perimeter is however held across a down call to the driver to make the 588275SEric Cheng * whole control operation atomic with respect to other control operations. 598275SEric Cheng * Also the data path and get type control operations may proceed concurrently. 608275SEric Cheng * These operations synchronize with the single serial operation on a given mac 618275SEric Cheng * end point using regular locks. The perimeter ensures that conflicting 628275SEric Cheng * operations like say a mac_multicast_add and a mac_multicast_remove on the 638275SEric Cheng * same mac end point don't interfere with each other and also ensures that the 648275SEric Cheng * changes in the mac layer and the call to the underlying driver to say add a 658275SEric Cheng * multicast address are done atomically without interference from a thread 668275SEric Cheng * trying to delete the same address. 678275SEric Cheng * 688275SEric Cheng * For example, consider 698275SEric Cheng * mac_multicst_add() 708275SEric Cheng * { 718275SEric Cheng * mac_perimeter_enter(); serialize all control operations 728275SEric Cheng * 738275SEric Cheng * grab list lock protect against access by data threads 748275SEric Cheng * add to list 758275SEric Cheng * drop list lock 768275SEric Cheng * 778275SEric Cheng * call driver's mi_multicst 788275SEric Cheng * 798275SEric Cheng * mac_perimeter_exit(); 808275SEric Cheng * } 818275SEric Cheng * 828275SEric Cheng * To lessen the number of serialization locks and simplify the lock hierarchy, 838275SEric Cheng * we serialize all the control operations on a per mac end point by using a 848275SEric Cheng * single serialization lock called the perimeter. We allow recursive entry into 858275SEric Cheng * the perimeter to facilitate use of this mechanism by both the mac client and 868275SEric Cheng * the MAC layer itself. 878275SEric Cheng * 888275SEric Cheng * MAC client means an entity that does an operation on a mac handle 898275SEric Cheng * obtained from a mac_open/mac_client_open. Similarly MAC driver means 908275SEric Cheng * an entity that does an operation on a mac handle obtained from a 918275SEric Cheng * mac_register. An entity could be both client and driver but on different 928275SEric Cheng * handles eg. aggr. and should only make the corresponding mac interface calls 938275SEric Cheng * i.e. mac driver interface or mac client interface as appropriate for that 948275SEric Cheng * mac handle. 958275SEric Cheng * 968275SEric Cheng * General rules. 978275SEric Cheng * ------------- 988275SEric Cheng * 998275SEric Cheng * R1. The lock order of upcall threads is natually opposite to downcall 1008275SEric Cheng * threads. Hence upcalls must not hold any locks across layers for fear of 1018275SEric Cheng * recursive lock enter and lock order violation. This applies to all layers. 1028275SEric Cheng * 1038275SEric Cheng * R2. The perimeter is just another lock. Since it is held in the down 1048275SEric Cheng * direction, acquiring the perimeter in an upcall is prohibited as it would 1058275SEric Cheng * cause a deadlock. This applies to all layers. 1068275SEric Cheng * 1078275SEric Cheng * Note that upcalls that need to grab the mac perimeter (for example 1088275SEric Cheng * mac_notify upcalls) can still achieve that by posting the request to a 1098275SEric Cheng * thread, which can then grab all the required perimeters and locks in the 1108275SEric Cheng * right global order. Note that in the above example the mac layer iself 1118275SEric Cheng * won't grab the mac perimeter in the mac_notify upcall, instead the upcall 1128275SEric Cheng * to the client must do that. Please see the aggr code for an example. 1138275SEric Cheng * 1148275SEric Cheng * MAC client rules 1158275SEric Cheng * ---------------- 1168275SEric Cheng * 1178275SEric Cheng * R3. A MAC client may use the MAC provided perimeter facility to serialize 1188275SEric Cheng * control operations on a per mac end point. It does this by by acquring 1198275SEric Cheng * and holding the perimeter across a sequence of calls to the mac layer. 1208275SEric Cheng * This ensures atomicity across the entire block of mac calls. In this 1218275SEric Cheng * model the MAC client must not hold any client locks across the calls to 1228275SEric Cheng * the mac layer. This model is the preferred solution. 1238275SEric Cheng * 1248275SEric Cheng * R4. However if a MAC client has a lot of global state across all mac end 1258275SEric Cheng * points the per mac end point serialization may not be sufficient. In this 1268275SEric Cheng * case the client may choose to use global locks or use its own serialization. 1278275SEric Cheng * To avoid deadlocks, these client layer locks held across the mac calls 1288275SEric Cheng * in the control path must never be acquired by the data path for the reason 1298275SEric Cheng * mentioned below. 1308275SEric Cheng * 1318275SEric Cheng * (Assume that a control operation that holds a client lock blocks in the 1328275SEric Cheng * mac layer waiting for upcall reference counts to drop to zero. If an upcall 1338275SEric Cheng * data thread that holds this reference count, tries to acquire the same 1348275SEric Cheng * client lock subsequently it will deadlock). 1358275SEric Cheng * 1368275SEric Cheng * A MAC client may follow either the R3 model or the R4 model, but can't 1378275SEric Cheng * mix both. In the former, the hierarchy is Perim -> client locks, but in 1388275SEric Cheng * the latter it is client locks -> Perim. 1398275SEric Cheng * 1408275SEric Cheng * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able 1418275SEric Cheng * context since they may block while trying to acquire the perimeter. 1428275SEric Cheng * In addition some calls may block waiting for upcall refcnts to come down to 1438275SEric Cheng * zero. 1448275SEric Cheng * 1458275SEric Cheng * R6. MAC clients must make sure that they are single threaded and all threads 1468275SEric Cheng * from the top (in particular data threads) have finished before calling 1478275SEric Cheng * mac_client_close. The MAC framework does not track the number of client 1488275SEric Cheng * threads using the mac client handle. Also mac clients must make sure 1498275SEric Cheng * they have undone all the control operations before calling mac_client_close. 1508275SEric Cheng * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding 1518275SEric Cheng * mac_unicast_add/mac_multicast_add. 1528275SEric Cheng * 1538275SEric Cheng * MAC framework rules 1548275SEric Cheng * ------------------- 1558275SEric Cheng * 1568275SEric Cheng * R7. The mac layer itself must not hold any mac layer locks (except the mac 1578275SEric Cheng * perimeter) across a call to any other layer from the mac layer. The call to 1588275SEric Cheng * any other layer could be via mi_* entry points, classifier entry points into 1598275SEric Cheng * the driver or via upcall pointers into layers above. The mac perimeter may 1608275SEric Cheng * be acquired or held only in the down direction, for e.g. when calling into 1618275SEric Cheng * a mi_* driver enty point to provide atomicity of the operation. 1628275SEric Cheng * 1638275SEric Cheng * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across 1648275SEric Cheng * mac driver interfaces, the MAC layer must provide a cut out for control 1658275SEric Cheng * interfaces like upcall notifications and start them in a separate thread. 1668275SEric Cheng * 1678275SEric Cheng * R9. Note that locking order also implies a plumbing order. For example 1688275SEric Cheng * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt 1698275SEric Cheng * to plumb in any other order must be failed at mac_open time, otherwise it 1708275SEric Cheng * could lead to deadlocks due to inverse locking order. 1718275SEric Cheng * 1728275SEric Cheng * R10. MAC driver interfaces must not block since the driver could call them 1738275SEric Cheng * in interrupt context. 1748275SEric Cheng * 1758275SEric Cheng * R11. Walkers must preferably not hold any locks while calling walker 1768275SEric Cheng * callbacks. Instead these can operate on reference counts. In simple 1778275SEric Cheng * callbacks it may be ok to hold a lock and call the callbacks, but this is 1788275SEric Cheng * harder to maintain in the general case of arbitrary callbacks. 1798275SEric Cheng * 1808275SEric Cheng * R12. The MAC layer must protect upcall notification callbacks using reference 1818275SEric Cheng * counts rather than holding locks across the callbacks. 1828275SEric Cheng * 1838275SEric Cheng * R13. Given the variety of drivers, it is preferable if the MAC layer can make 1848275SEric Cheng * sure that any pointers (such as mac ring pointers) it passes to the driver 1858275SEric Cheng * remain valid until mac unregister time. Currently the mac layer achieves 1868275SEric Cheng * this by using generation numbers for rings and freeing the mac rings only 1878275SEric Cheng * at unregister time. The MAC layer must provide a layer of indirection and 1888275SEric Cheng * must not expose underlying driver rings or driver data structures/pointers 1898275SEric Cheng * directly to MAC clients. 1908275SEric Cheng * 1918275SEric Cheng * MAC driver rules 1928275SEric Cheng * ---------------- 1938275SEric Cheng * 1948275SEric Cheng * R14. It would be preferable if MAC drivers don't hold any locks across any 1958275SEric Cheng * mac call. However at a minimum they must not hold any locks across data 1968275SEric Cheng * upcalls. They must also make sure that all references to mac data structures 1978275SEric Cheng * are cleaned up and that it is single threaded at mac_unregister time. 1988275SEric Cheng * 1998275SEric Cheng * R15. MAC driver interfaces don't block and so the action may be done 2008275SEric Cheng * asynchronously in a separate thread as for example handling notifications. 2018275SEric Cheng * The driver must not assume that the action is complete when the call 2028275SEric Cheng * returns. 2038275SEric Cheng * 2048275SEric Cheng * R16. Drivers must maintain a generation number per Rx ring, and pass it 2058275SEric Cheng * back to mac_rx_ring(); They are expected to increment the generation 2068275SEric Cheng * number whenever the ring's stop routine is invoked. 2078275SEric Cheng * See comments in mac_rx_ring(); 2088275SEric Cheng * 2098275SEric Cheng * R17 Similarly mi_stop is another synchronization point and the driver must 2108275SEric Cheng * ensure that all upcalls are done and there won't be any future upcall 2118275SEric Cheng * before returning from mi_stop. 2128275SEric Cheng * 2138275SEric Cheng * R18. The driver may assume that all set/modify control operations via 2148275SEric Cheng * the mi_* entry points are single threaded on a per mac end point. 2158275SEric Cheng * 2168275SEric Cheng * Lock and Perimeter hierarchy scenarios 2178275SEric Cheng * --------------------------------------- 2188275SEric Cheng * 2198275SEric Cheng * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify] 2208275SEric Cheng * 2218275SEric Cheng * ft_lock -> fe_lock [mac_flow_lookup] 2228275SEric Cheng * 2238275SEric Cheng * mi_rw_lock -> fe_lock [mac_bcast_send] 2248275SEric Cheng * 2258275SEric Cheng * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw] 2268275SEric Cheng * 2278275SEric Cheng * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind] 2288275SEric Cheng * 2298275SEric Cheng * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename] 2308275SEric Cheng * 2318275SEric Cheng * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac 2328275SEric Cheng * client to driver. In the case of clients that explictly use the mac provided 2338275SEric Cheng * perimeter mechanism for its serialization, the hierarchy is 2348275SEric Cheng * Perimeter -> mac layer locks, since the client never holds any locks across 2358275SEric Cheng * the mac calls. In the case of clients that use its own locks the hierarchy 2368275SEric Cheng * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly 2378275SEric Cheng * calls mac_perim_enter/exit in this case. 2388275SEric Cheng * 2398275SEric Cheng * Subflow creation rules 2408275SEric Cheng * --------------------------- 2418275SEric Cheng * o In case of a user specified cpulist present on underlying link and flows, 2428275SEric Cheng * the flows cpulist must be a subset of the underlying link. 2438275SEric Cheng * o In case of a user specified fanout mode present on link and flow, the 2448275SEric Cheng * subflow fanout count has to be less than or equal to that of the 2458275SEric Cheng * underlying link. The cpu-bindings for the subflows will be a subset of 2468275SEric Cheng * the underlying link. 2478275SEric Cheng * o In case if no cpulist specified on both underlying link and flow, the 2488275SEric Cheng * underlying link relies on a MAC tunable to provide out of box fanout. 2498275SEric Cheng * The subflow will have no cpulist (the subflow will be unbound) 2508275SEric Cheng * o In case if no cpulist is specified on the underlying link, a subflow can 2518275SEric Cheng * carry either a user-specified cpulist or fanout count. The cpu-bindings 2528275SEric Cheng * for the subflow will not adhere to restriction that they need to be subset 2538275SEric Cheng * of the underlying link. 2548275SEric Cheng * o In case where the underlying link is carrying either a user specified 2558275SEric Cheng * cpulist or fanout mode and for a unspecified subflow, the subflow will be 2568275SEric Cheng * created unbound. 2578275SEric Cheng * o While creating unbound subflows, bandwidth mode changes attempt to 2588275SEric Cheng * figure a right fanout count. In such cases the fanout count will override 2598275SEric Cheng * the unbound cpu-binding behavior. 2608275SEric Cheng * o In addition to this, while cycling between flow and link properties, we 2618275SEric Cheng * impose a restriction that if a link property has a subflow with 2628275SEric Cheng * user-specified attributes, we will not allow changing the link property. 2638275SEric Cheng * The administrator needs to reset all the user specified properties for the 2648275SEric Cheng * subflows before attempting a link property change. 2658275SEric Cheng * Some of the above rules can be overridden by specifying additional command 2668275SEric Cheng * line options while creating or modifying link or subflow properties. 2670Sstevel@tonic-gate */ 2680Sstevel@tonic-gate 2690Sstevel@tonic-gate #include <sys/types.h> 2700Sstevel@tonic-gate #include <sys/conf.h> 2715895Syz147064 #include <sys/id_space.h> 2726077Syz147064 #include <sys/esunddi.h> 2730Sstevel@tonic-gate #include <sys/stat.h> 2745895Syz147064 #include <sys/mkdev.h> 2750Sstevel@tonic-gate #include <sys/stream.h> 2760Sstevel@tonic-gate #include <sys/strsun.h> 2770Sstevel@tonic-gate #include <sys/strsubr.h> 2780Sstevel@tonic-gate #include <sys/dlpi.h> 2798275SEric Cheng #include <sys/modhash.h> 2808275SEric Cheng #include <sys/mac_provider.h> 2818275SEric Cheng #include <sys/mac_client_impl.h> 2828275SEric Cheng #include <sys/mac_soft_ring.h> 2838275SEric Cheng #include <sys/mac_impl.h> 2848275SEric Cheng #include <sys/mac.h> 2855895Syz147064 #include <sys/dls.h> 286269Sericheng #include <sys/dld.h> 2872311Sseb #include <sys/modctl.h> 2883448Sdh155122 #include <sys/fs/dv_node.h> 2895009Sgd78059 #include <sys/thread.h> 2905009Sgd78059 #include <sys/proc.h> 2915009Sgd78059 #include <sys/callb.h> 2925009Sgd78059 #include <sys/cpuvar.h> 2933288Sseb #include <sys/atomic.h> 2948275SEric Cheng #include <sys/bitmap.h> 2954913Sethindra #include <sys/sdt.h> 2968275SEric Cheng #include <sys/mac_flow.h> 2978275SEric Cheng #include <sys/ddi_intr_impl.h> 2988275SEric Cheng #include <sys/disp.h> 2998275SEric Cheng #include <sys/sdt.h> 3008275SEric Cheng #include <sys/vnic.h> 3018275SEric Cheng #include <sys/vnic_impl.h> 3028275SEric Cheng #include <sys/vlan.h> 3038275SEric Cheng #include <inet/ip.h> 3048275SEric Cheng #include <inet/ip6.h> 3058275SEric Cheng #include <sys/exacct.h> 3068275SEric Cheng #include <sys/exacct_impl.h> 3075903Ssowmini #include <inet/nd.h> 3086512Ssowmini #include <sys/ethernet.h> 3090Sstevel@tonic-gate 3100Sstevel@tonic-gate #define IMPL_HASHSZ 67 /* prime */ 3110Sstevel@tonic-gate 3128275SEric Cheng kmem_cache_t *i_mac_impl_cachep; 3138275SEric Cheng mod_hash_t *i_mac_impl_hash; 314269Sericheng krwlock_t i_mac_impl_lock; 315269Sericheng uint_t i_mac_impl_count; 3168275SEric Cheng static kmem_cache_t *mac_ring_cache; 3175895Syz147064 static id_space_t *minor_ids; 3185895Syz147064 static uint32_t minor_count; 3190Sstevel@tonic-gate 3208275SEric Cheng /* 3218275SEric Cheng * Logging stuff. Perhaps mac_logging_interval could be broken into 3228275SEric Cheng * mac_flow_log_interval and mac_link_log_interval if we want to be 3238275SEric Cheng * able to schedule them differently. 3248275SEric Cheng */ 3258275SEric Cheng uint_t mac_logging_interval; 3268275SEric Cheng boolean_t mac_flow_log_enable; 3278275SEric Cheng boolean_t mac_link_log_enable; 3288275SEric Cheng timeout_id_t mac_logging_timer; 3298275SEric Cheng 3308275SEric Cheng /* for debugging, see MAC_DBG_PRT() in mac_impl.h */ 3318275SEric Cheng int mac_dbg = 0; 3328275SEric Cheng 3332311Sseb #define MACTYPE_KMODDIR "mac" 3342311Sseb #define MACTYPE_HASHSZ 67 3352311Sseb static mod_hash_t *i_mactype_hash; 3363288Sseb /* 3373288Sseb * i_mactype_lock synchronizes threads that obtain references to mactype_t 3383288Sseb * structures through i_mactype_getplugin(). 3393288Sseb */ 3403288Sseb static kmutex_t i_mactype_lock; 3412311Sseb 3420Sstevel@tonic-gate /* 3438275SEric Cheng * mac_tx_percpu_cnt 3448275SEric Cheng * 3458275SEric Cheng * Number of per cpu locks per mac_client_impl_t. Used by the transmit side 3468275SEric Cheng * in mac_tx to reduce lock contention. This is sized at boot time in mac_init. 3478275SEric Cheng * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2. 3488275SEric Cheng * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1. 3495084Sjohnlev */ 3508275SEric Cheng int mac_tx_percpu_cnt; 3518275SEric Cheng int mac_tx_percpu_cnt_max = 128; 3528275SEric Cheng 3538275SEric Cheng static int i_mac_constructor(void *, void *, int); 3548275SEric Cheng static void i_mac_destructor(void *, void *); 3558275SEric Cheng static int i_mac_ring_ctor(void *, void *, int); 3568275SEric Cheng static void i_mac_ring_dtor(void *, void *); 3578275SEric Cheng static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *); 3588275SEric Cheng void mac_tx_client_flush(mac_client_impl_t *); 3598275SEric Cheng void mac_tx_client_block(mac_client_impl_t *); 3608275SEric Cheng static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); 3618275SEric Cheng static int mac_start_group_and_rings(mac_group_t *); 3628275SEric Cheng static void mac_stop_group_and_rings(mac_group_t *); 3632311Sseb 3640Sstevel@tonic-gate /* 3650Sstevel@tonic-gate * Module initialization functions. 3660Sstevel@tonic-gate */ 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate void 3690Sstevel@tonic-gate mac_init(void) 3700Sstevel@tonic-gate { 3718275SEric Cheng mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus : 3728275SEric Cheng boot_max_ncpus); 3738275SEric Cheng 3748275SEric Cheng /* Upper bound is mac_tx_percpu_cnt_max */ 3758275SEric Cheng if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max) 3768275SEric Cheng mac_tx_percpu_cnt = mac_tx_percpu_cnt_max; 3778275SEric Cheng 3788275SEric Cheng if (mac_tx_percpu_cnt < 1) { 3798275SEric Cheng /* Someone set max_tx_percpu_cnt_max to 0 or less */ 3808275SEric Cheng mac_tx_percpu_cnt = 1; 3818275SEric Cheng } 3828275SEric Cheng 3838275SEric Cheng ASSERT(mac_tx_percpu_cnt >= 1); 3848275SEric Cheng mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1)); 3858275SEric Cheng /* 3868275SEric Cheng * Make it of the form 2**N - 1 in the range 3878275SEric Cheng * [0 .. mac_tx_percpu_cnt_max - 1] 3888275SEric Cheng */ 3898275SEric Cheng mac_tx_percpu_cnt--; 3908275SEric Cheng 3910Sstevel@tonic-gate i_mac_impl_cachep = kmem_cache_create("mac_impl_cache", 3922311Sseb sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor, 3932311Sseb NULL, NULL, NULL, 0); 3940Sstevel@tonic-gate ASSERT(i_mac_impl_cachep != NULL); 3950Sstevel@tonic-gate 3968275SEric Cheng mac_ring_cache = kmem_cache_create("mac_ring_cache", 3978275SEric Cheng sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL, 3988275SEric Cheng NULL, NULL, 0); 3998275SEric Cheng ASSERT(mac_ring_cache != NULL); 4005084Sjohnlev 401269Sericheng i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash", 402269Sericheng IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 403269Sericheng mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 404269Sericheng rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL); 4058275SEric Cheng 4068275SEric Cheng mac_flow_init(); 4078275SEric Cheng mac_soft_ring_init(); 4088275SEric Cheng mac_bcast_init(); 4098275SEric Cheng mac_client_init(); 4108275SEric Cheng 411269Sericheng i_mac_impl_count = 0; 4122311Sseb 4132311Sseb i_mactype_hash = mod_hash_create_extended("mactype_hash", 4142311Sseb MACTYPE_HASHSZ, 4152311Sseb mod_hash_null_keydtor, mod_hash_null_valdtor, 4162311Sseb mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 4175895Syz147064 4185895Syz147064 /* 4195895Syz147064 * Allocate an id space to manage minor numbers. The range of the 4205895Syz147064 * space will be from MAC_MAX_MINOR+1 to MAXMIN32 (maximum legal 4215895Syz147064 * minor number is MAXMIN, but id_t is type of integer and does not 4225895Syz147064 * allow MAXMIN). 4235895Syz147064 */ 4245895Syz147064 minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, MAXMIN32); 4255895Syz147064 ASSERT(minor_ids != NULL); 4265895Syz147064 minor_count = 0; 4278275SEric Cheng 4288275SEric Cheng /* Let's default to 20 seconds */ 4298275SEric Cheng mac_logging_interval = 20; 4308275SEric Cheng mac_flow_log_enable = B_FALSE; 4318275SEric Cheng mac_link_log_enable = B_FALSE; 4328275SEric Cheng mac_logging_timer = 0; 4330Sstevel@tonic-gate } 4340Sstevel@tonic-gate 4350Sstevel@tonic-gate int 4360Sstevel@tonic-gate mac_fini(void) 4370Sstevel@tonic-gate { 4385895Syz147064 if (i_mac_impl_count > 0 || minor_count > 0) 439269Sericheng return (EBUSY); 4400Sstevel@tonic-gate 4415895Syz147064 id_space_destroy(minor_ids); 4428275SEric Cheng mac_flow_fini(); 4435895Syz147064 444269Sericheng mod_hash_destroy_hash(i_mac_impl_hash); 445269Sericheng rw_destroy(&i_mac_impl_lock); 4460Sstevel@tonic-gate 4478275SEric Cheng mac_client_fini(); 4488275SEric Cheng kmem_cache_destroy(mac_ring_cache); 4492311Sseb 4502311Sseb mod_hash_destroy_hash(i_mactype_hash); 4518275SEric Cheng mac_soft_ring_finish(); 4520Sstevel@tonic-gate return (0); 4530Sstevel@tonic-gate } 4540Sstevel@tonic-gate 4558275SEric Cheng void 4568275SEric Cheng mac_init_ops(struct dev_ops *ops, const char *name) 4578275SEric Cheng { 4588275SEric Cheng dld_init_ops(ops, name); 4598275SEric Cheng } 4608275SEric Cheng 4618275SEric Cheng void 4628275SEric Cheng mac_fini_ops(struct dev_ops *ops) 4638275SEric Cheng { 4648275SEric Cheng dld_fini_ops(ops); 4658275SEric Cheng } 4668275SEric Cheng 4678275SEric Cheng /*ARGSUSED*/ 4688275SEric Cheng static int 4698275SEric Cheng i_mac_constructor(void *buf, void *arg, int kmflag) 4708275SEric Cheng { 4718275SEric Cheng mac_impl_t *mip = buf; 4728275SEric Cheng 4738275SEric Cheng bzero(buf, sizeof (mac_impl_t)); 4748275SEric Cheng 4758275SEric Cheng mip->mi_linkstate = LINK_STATE_UNKNOWN; 4768275SEric Cheng mip->mi_nclients = 0; 4778275SEric Cheng 4788275SEric Cheng mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL); 4798275SEric Cheng rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); 4808275SEric Cheng mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); 4818275SEric Cheng mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); 4828275SEric Cheng mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL); 4838275SEric Cheng 4848275SEric Cheng mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock; 4858275SEric Cheng cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 4868275SEric Cheng mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock; 4878275SEric Cheng cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 4888275SEric Cheng return (0); 4898275SEric Cheng } 4908275SEric Cheng 4918275SEric Cheng /*ARGSUSED*/ 4928275SEric Cheng static void 4938275SEric Cheng i_mac_destructor(void *buf, void *arg) 4948275SEric Cheng { 4958275SEric Cheng mac_impl_t *mip = buf; 4968275SEric Cheng mac_cb_info_t *mcbi; 4978275SEric Cheng 4988275SEric Cheng ASSERT(mip->mi_ref == 0); 4998275SEric Cheng ASSERT(mip->mi_active == 0); 5008275SEric Cheng ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); 5018275SEric Cheng ASSERT(mip->mi_devpromisc == 0); 5028275SEric Cheng ASSERT(mip->mi_promisc == 0); 5038275SEric Cheng ASSERT(mip->mi_ksp == NULL); 5048275SEric Cheng ASSERT(mip->mi_kstat_count == 0); 5058275SEric Cheng ASSERT(mip->mi_nclients == 0); 5068275SEric Cheng ASSERT(mip->mi_nactiveclients == 0); 5078833SVenu.Iyer@Sun.COM ASSERT(mip->mi_single_active_client == NULL); 5088275SEric Cheng ASSERT(mip->mi_state_flags == 0); 5098275SEric Cheng ASSERT(mip->mi_factory_addr == NULL); 5108275SEric Cheng ASSERT(mip->mi_factory_addr_num == 0); 5118275SEric Cheng ASSERT(mip->mi_default_tx_ring == NULL); 5128275SEric Cheng 5138275SEric Cheng mcbi = &mip->mi_notify_cb_info; 5148275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0); 5158275SEric Cheng ASSERT(mip->mi_notify_bits == 0); 5168275SEric Cheng ASSERT(mip->mi_notify_thread == NULL); 5178275SEric Cheng ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock); 5188275SEric Cheng mcbi->mcbi_lockp = NULL; 5198275SEric Cheng 5208275SEric Cheng mcbi = &mip->mi_promisc_cb_info; 5218275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL); 5228275SEric Cheng ASSERT(mip->mi_promisc_list == NULL); 5238275SEric Cheng ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock); 5248275SEric Cheng mcbi->mcbi_lockp = NULL; 5258275SEric Cheng 5268275SEric Cheng ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); 5278275SEric Cheng ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); 5288275SEric Cheng 5298275SEric Cheng mutex_destroy(&mip->mi_lock); 5308275SEric Cheng rw_destroy(&mip->mi_rw_lock); 5318275SEric Cheng 5328275SEric Cheng mutex_destroy(&mip->mi_promisc_lock); 5338275SEric Cheng cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv); 5348275SEric Cheng mutex_destroy(&mip->mi_notify_lock); 5358275SEric Cheng cv_destroy(&mip->mi_notify_cb_info.mcbi_cv); 5368275SEric Cheng mutex_destroy(&mip->mi_ring_lock); 5378275SEric Cheng } 5388275SEric Cheng 5398275SEric Cheng /* ARGSUSED */ 5408275SEric Cheng static int 5418275SEric Cheng i_mac_ring_ctor(void *buf, void *arg, int kmflag) 5428275SEric Cheng { 5438275SEric Cheng mac_ring_t *ring = (mac_ring_t *)buf; 5448275SEric Cheng 5458275SEric Cheng bzero(ring, sizeof (mac_ring_t)); 5468275SEric Cheng cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL); 5478275SEric Cheng mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL); 5488275SEric Cheng ring->mr_state = MR_FREE; 5498275SEric Cheng return (0); 5508275SEric Cheng } 5518275SEric Cheng 5528275SEric Cheng /* ARGSUSED */ 5538275SEric Cheng static void 5548275SEric Cheng i_mac_ring_dtor(void *buf, void *arg) 5558275SEric Cheng { 5568275SEric Cheng mac_ring_t *ring = (mac_ring_t *)buf; 5578275SEric Cheng 5588275SEric Cheng cv_destroy(&ring->mr_cv); 5598275SEric Cheng mutex_destroy(&ring->mr_lock); 5608275SEric Cheng } 5618275SEric Cheng 5628275SEric Cheng /* 5638275SEric Cheng * Common functions to do mac callback addition and deletion. Currently this is 5648275SEric Cheng * used by promisc callbacks and notify callbacks. List addition and deletion 5658275SEric Cheng * need to take care of list walkers. List walkers in general, can't hold list 5668275SEric Cheng * locks and make upcall callbacks due to potential lock order and recursive 5678275SEric Cheng * reentry issues. Instead list walkers increment the list walker count to mark 5688275SEric Cheng * the presence of a walker thread. Addition can be carefully done to ensure 5698275SEric Cheng * that the list walker always sees either the old list or the new list. 5708275SEric Cheng * However the deletion can't be done while the walker is active, instead the 5718275SEric Cheng * deleting thread simply marks the entry as logically deleted. The last walker 5728275SEric Cheng * physically deletes and frees up the logically deleted entries when the walk 5738275SEric Cheng * is complete. 5748275SEric Cheng */ 5758275SEric Cheng void 5768275SEric Cheng mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 5778275SEric Cheng mac_cb_t *mcb_elem) 5788275SEric Cheng { 5798275SEric Cheng mac_cb_t *p; 5808275SEric Cheng mac_cb_t **pp; 5818275SEric Cheng 5828275SEric Cheng /* Verify it is not already in the list */ 5838275SEric Cheng for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 5848275SEric Cheng if (p == mcb_elem) 5858275SEric Cheng break; 5868275SEric Cheng } 5878275SEric Cheng VERIFY(p == NULL); 5888275SEric Cheng 5898275SEric Cheng /* 5908275SEric Cheng * Add it to the head of the callback list. The membar ensures that 5918275SEric Cheng * the following list pointer manipulations reach global visibility 5928275SEric Cheng * in exactly the program order below. 5938275SEric Cheng */ 5948275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 5958275SEric Cheng 5968275SEric Cheng mcb_elem->mcb_nextp = *mcb_head; 5978275SEric Cheng membar_producer(); 5988275SEric Cheng *mcb_head = mcb_elem; 5998275SEric Cheng } 6008275SEric Cheng 6018275SEric Cheng /* 6028275SEric Cheng * Mark the entry as logically deleted. If there aren't any walkers unlink 6038275SEric Cheng * from the list. In either case return the corresponding status. 6048275SEric Cheng */ 6058275SEric Cheng boolean_t 6068275SEric Cheng mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 6078275SEric Cheng mac_cb_t *mcb_elem) 6088275SEric Cheng { 6098275SEric Cheng mac_cb_t *p; 6108275SEric Cheng mac_cb_t **pp; 6118275SEric Cheng 6128275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 6138275SEric Cheng /* 6148275SEric Cheng * Search the callback list for the entry to be removed 6158275SEric Cheng */ 6168275SEric Cheng for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 6178275SEric Cheng if (p == mcb_elem) 6188275SEric Cheng break; 6198275SEric Cheng } 6208275SEric Cheng VERIFY(p != NULL); 6218275SEric Cheng 6228275SEric Cheng /* 6238275SEric Cheng * If there are walkers just mark it as deleted and the last walker 6248275SEric Cheng * will remove from the list and free it. 6258275SEric Cheng */ 6268275SEric Cheng if (mcbi->mcbi_walker_cnt != 0) { 6278275SEric Cheng p->mcb_flags |= MCB_CONDEMNED; 6288275SEric Cheng mcbi->mcbi_del_cnt++; 6298275SEric Cheng return (B_FALSE); 6308275SEric Cheng } 6318275SEric Cheng 6328275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == 0); 6338275SEric Cheng *pp = p->mcb_nextp; 6348275SEric Cheng p->mcb_nextp = NULL; 6358275SEric Cheng return (B_TRUE); 6368275SEric Cheng } 6378275SEric Cheng 6388275SEric Cheng /* 6398275SEric Cheng * Wait for all pending callback removals to be completed 6408275SEric Cheng */ 6418275SEric Cheng void 6428275SEric Cheng mac_callback_remove_wait(mac_cb_info_t *mcbi) 6438275SEric Cheng { 6448275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 6458275SEric Cheng while (mcbi->mcbi_del_cnt != 0) { 6468275SEric Cheng DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi); 6478275SEric Cheng cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 6488275SEric Cheng } 6498275SEric Cheng } 6508275SEric Cheng 6510Sstevel@tonic-gate /* 6528275SEric Cheng * The last mac callback walker does the cleanup. Walk the list and unlik 6538275SEric Cheng * all the logically deleted entries and construct a temporary list of 6548275SEric Cheng * removed entries. Return the list of removed entries to the caller. 6558275SEric Cheng */ 6568275SEric Cheng mac_cb_t * 6578275SEric Cheng mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head) 6588275SEric Cheng { 6598275SEric Cheng mac_cb_t *p; 6608275SEric Cheng mac_cb_t **pp; 6618275SEric Cheng mac_cb_t *rmlist = NULL; /* List of removed elements */ 6628275SEric Cheng int cnt = 0; 6638275SEric Cheng 6648275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 6658275SEric Cheng ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0); 6668275SEric Cheng 6678275SEric Cheng pp = mcb_head; 6688275SEric Cheng while (*pp != NULL) { 6698275SEric Cheng if ((*pp)->mcb_flags & MCB_CONDEMNED) { 6708275SEric Cheng p = *pp; 6718275SEric Cheng *pp = p->mcb_nextp; 6728275SEric Cheng p->mcb_nextp = rmlist; 6738275SEric Cheng rmlist = p; 6748275SEric Cheng cnt++; 6758275SEric Cheng continue; 6768275SEric Cheng } 6778275SEric Cheng pp = &(*pp)->mcb_nextp; 6788275SEric Cheng } 6798275SEric Cheng 6808275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == cnt); 6818275SEric Cheng mcbi->mcbi_del_cnt = 0; 6828275SEric Cheng return (rmlist); 6838275SEric Cheng } 6848275SEric Cheng 6858275SEric Cheng boolean_t 6868275SEric Cheng mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 6878275SEric Cheng { 6888275SEric Cheng mac_cb_t *mcb; 6898275SEric Cheng 6908275SEric Cheng /* Verify it is not already in the list */ 6918275SEric Cheng for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) { 6928275SEric Cheng if (mcb == mcb_elem) 6938275SEric Cheng return (B_TRUE); 6948275SEric Cheng } 6958275SEric Cheng 6968275SEric Cheng return (B_FALSE); 6978275SEric Cheng } 6988275SEric Cheng 6998275SEric Cheng boolean_t 7008275SEric Cheng mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 7018275SEric Cheng { 7028275SEric Cheng boolean_t found; 7038275SEric Cheng 7048275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 7058275SEric Cheng found = mac_callback_lookup(mcb_headp, mcb_elem); 7068275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 7078275SEric Cheng 7088275SEric Cheng return (found); 7098275SEric Cheng } 7108275SEric Cheng 7118275SEric Cheng /* Free the list of removed callbacks */ 7128275SEric Cheng void 7138275SEric Cheng mac_callback_free(mac_cb_t *rmlist) 7148275SEric Cheng { 7158275SEric Cheng mac_cb_t *mcb; 7168275SEric Cheng mac_cb_t *mcb_next; 7178275SEric Cheng 7188275SEric Cheng for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 7198275SEric Cheng mcb_next = mcb->mcb_nextp; 7208275SEric Cheng kmem_free(mcb->mcb_objp, mcb->mcb_objsize); 7218275SEric Cheng } 7228275SEric Cheng } 7238275SEric Cheng 7248275SEric Cheng /* 7258275SEric Cheng * The promisc callbacks are in 2 lists, one off the 'mip' and another off the 7268275SEric Cheng * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there 7278275SEric Cheng * is only a single shared total walker count, and an entry can't be physically 7288275SEric Cheng * unlinked if a walker is active on either list. The last walker does this 7298275SEric Cheng * cleanup of logically deleted entries. 7308275SEric Cheng */ 7318275SEric Cheng void 7328275SEric Cheng i_mac_promisc_walker_cleanup(mac_impl_t *mip) 7338275SEric Cheng { 7348275SEric Cheng mac_cb_t *rmlist; 7358275SEric Cheng mac_cb_t *mcb; 7368275SEric Cheng mac_cb_t *mcb_next; 7378275SEric Cheng mac_promisc_impl_t *mpip; 7388275SEric Cheng 7398275SEric Cheng /* 7408275SEric Cheng * Construct a temporary list of deleted callbacks by walking the 7418275SEric Cheng * the mi_promisc_list. Then for each entry in the temporary list, 7428275SEric Cheng * remove it from the mci_promisc_list and free the entry. 7438275SEric Cheng */ 7448275SEric Cheng rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info, 7458275SEric Cheng &mip->mi_promisc_list); 7468275SEric Cheng 7478275SEric Cheng for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 7488275SEric Cheng mcb_next = mcb->mcb_nextp; 7498275SEric Cheng mpip = (mac_promisc_impl_t *)mcb->mcb_objp; 7508275SEric Cheng VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info, 7518275SEric Cheng &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link)); 7528275SEric Cheng mcb->mcb_flags = 0; 7538275SEric Cheng mcb->mcb_nextp = NULL; 7548275SEric Cheng kmem_cache_free(mac_promisc_impl_cache, mpip); 7558275SEric Cheng } 7568275SEric Cheng } 7578275SEric Cheng 7588275SEric Cheng void 7598275SEric Cheng i_mac_notify(mac_impl_t *mip, mac_notify_type_t type) 7608275SEric Cheng { 7618275SEric Cheng mac_cb_info_t *mcbi; 7628275SEric Cheng 7638275SEric Cheng /* 7648275SEric Cheng * Signal the notify thread even after mi_ref has become zero and 7658275SEric Cheng * mi_disabled is set. The synchronization with the notify thread 7668275SEric Cheng * happens in mac_unregister and that implies the driver must make 7678275SEric Cheng * sure it is single-threaded (with respect to mac calls) and that 7688275SEric Cheng * all pending mac calls have returned before it calls mac_unregister 7698275SEric Cheng */ 7708275SEric Cheng rw_enter(&i_mac_impl_lock, RW_READER); 7718275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) 7728275SEric Cheng goto exit; 7738275SEric Cheng 7748275SEric Cheng /* 7758275SEric Cheng * Guard against incorrect notifications. (Running a newer 7768275SEric Cheng * mac client against an older implementation?) 7778275SEric Cheng */ 7788275SEric Cheng if (type >= MAC_NNOTE) 7798275SEric Cheng goto exit; 7808275SEric Cheng 7818275SEric Cheng mcbi = &mip->mi_notify_cb_info; 7828275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 7838275SEric Cheng mip->mi_notify_bits |= (1 << type); 7848275SEric Cheng cv_broadcast(&mcbi->mcbi_cv); 7858275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 7868275SEric Cheng 7878275SEric Cheng exit: 7888275SEric Cheng rw_exit(&i_mac_impl_lock); 7898275SEric Cheng } 7908275SEric Cheng 7918275SEric Cheng /* 7928275SEric Cheng * Mac serialization primitives. Please see the block comment at the 7938275SEric Cheng * top of the file. 7940Sstevel@tonic-gate */ 7958275SEric Cheng void 7968275SEric Cheng i_mac_perim_enter(mac_impl_t *mip) 7978275SEric Cheng { 7988275SEric Cheng mac_client_impl_t *mcip; 7998275SEric Cheng 8008275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8018275SEric Cheng /* 8028275SEric Cheng * This is a VNIC. Return the lower mac since that is what 8038275SEric Cheng * we want to serialize on. 8048275SEric Cheng */ 8058275SEric Cheng mcip = mac_vnic_lower(mip); 8068275SEric Cheng mip = mcip->mci_mip; 8078275SEric Cheng } 8088275SEric Cheng 8098275SEric Cheng mutex_enter(&mip->mi_perim_lock); 8108275SEric Cheng if (mip->mi_perim_owner == curthread) { 8118275SEric Cheng mip->mi_perim_ocnt++; 8128275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8138275SEric Cheng return; 8148275SEric Cheng } 8158275SEric Cheng 8168275SEric Cheng while (mip->mi_perim_owner != NULL) 8178275SEric Cheng cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock); 8188275SEric Cheng 8198275SEric Cheng mip->mi_perim_owner = curthread; 8208275SEric Cheng ASSERT(mip->mi_perim_ocnt == 0); 8218275SEric Cheng mip->mi_perim_ocnt++; 8228275SEric Cheng #ifdef DEBUG 8238275SEric Cheng mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack, 8248275SEric Cheng MAC_PERIM_STACK_DEPTH); 8258275SEric Cheng #endif 8268275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8278275SEric Cheng } 8288275SEric Cheng 8298275SEric Cheng int 8308275SEric Cheng i_mac_perim_enter_nowait(mac_impl_t *mip) 8318275SEric Cheng { 8328275SEric Cheng /* 8338275SEric Cheng * The vnic is a special case, since the serialization is done based 8348275SEric Cheng * on the lower mac. If the lower mac is busy, it does not imply the 8358275SEric Cheng * vnic can't be unregistered. But in the case of other drivers, 8368275SEric Cheng * a busy perimeter or open mac handles implies that the mac is busy 8378275SEric Cheng * and can't be unregistered. 8388275SEric Cheng */ 8398275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8408275SEric Cheng i_mac_perim_enter(mip); 8418275SEric Cheng return (0); 8428275SEric Cheng } 8438275SEric Cheng 8448275SEric Cheng mutex_enter(&mip->mi_perim_lock); 8458275SEric Cheng if (mip->mi_perim_owner != NULL) { 8468275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8478275SEric Cheng return (EBUSY); 8488275SEric Cheng } 8498275SEric Cheng ASSERT(mip->mi_perim_ocnt == 0); 8508275SEric Cheng mip->mi_perim_owner = curthread; 8518275SEric Cheng mip->mi_perim_ocnt++; 8528275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8538275SEric Cheng 8548275SEric Cheng return (0); 8558275SEric Cheng } 8568275SEric Cheng 8578275SEric Cheng void 8588275SEric Cheng i_mac_perim_exit(mac_impl_t *mip) 8598275SEric Cheng { 8608275SEric Cheng mac_client_impl_t *mcip; 8618275SEric Cheng 8628275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8638275SEric Cheng /* 8648275SEric Cheng * This is a VNIC. Return the lower mac since that is what 8658275SEric Cheng * we want to serialize on. 8668275SEric Cheng */ 8678275SEric Cheng mcip = mac_vnic_lower(mip); 8688275SEric Cheng mip = mcip->mci_mip; 8698275SEric Cheng } 8708275SEric Cheng 8718275SEric Cheng ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0); 8728275SEric Cheng 8738275SEric Cheng mutex_enter(&mip->mi_perim_lock); 8748275SEric Cheng if (--mip->mi_perim_ocnt == 0) { 8758275SEric Cheng mip->mi_perim_owner = NULL; 8768275SEric Cheng cv_signal(&mip->mi_perim_cv); 8778275SEric Cheng } 8788275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8798275SEric Cheng } 8808275SEric Cheng 8818275SEric Cheng /* 8828275SEric Cheng * Returns whether the current thread holds the mac perimeter. Used in making 8838275SEric Cheng * assertions. 8848275SEric Cheng */ 8858275SEric Cheng boolean_t 8868275SEric Cheng mac_perim_held(mac_handle_t mh) 8878275SEric Cheng { 8888275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 8898275SEric Cheng mac_client_impl_t *mcip; 8908275SEric Cheng 8918275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8928275SEric Cheng /* 8938275SEric Cheng * This is a VNIC. Return the lower mac since that is what 8948275SEric Cheng * we want to serialize on. 8958275SEric Cheng */ 8968275SEric Cheng mcip = mac_vnic_lower(mip); 8978275SEric Cheng mip = mcip->mci_mip; 8988275SEric Cheng } 8998275SEric Cheng return (mip->mi_perim_owner == curthread); 9008275SEric Cheng } 9018275SEric Cheng 9028275SEric Cheng /* 9038275SEric Cheng * mac client interfaces to enter the mac perimeter of a mac end point, given 9048275SEric Cheng * its mac handle, or macname or linkid. 9058275SEric Cheng */ 9068275SEric Cheng void 9078275SEric Cheng mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp) 9088275SEric Cheng { 9098275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 9108275SEric Cheng 9118275SEric Cheng i_mac_perim_enter(mip); 9128275SEric Cheng /* 9138275SEric Cheng * The mac_perim_handle_t returned encodes the 'mip' and whether a 9148275SEric Cheng * mac_open has been done internally while entering the perimeter. 9158275SEric Cheng * This information is used in mac_perim_exit 9168275SEric Cheng */ 9178275SEric Cheng MAC_ENCODE_MPH(*mphp, mip, 0); 9188275SEric Cheng } 9198275SEric Cheng 9208275SEric Cheng int 9218275SEric Cheng mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp) 9228275SEric Cheng { 9238275SEric Cheng int err; 9248275SEric Cheng mac_handle_t mh; 9258275SEric Cheng 9268275SEric Cheng if ((err = mac_open(name, &mh)) != 0) 9278275SEric Cheng return (err); 9288275SEric Cheng 9298275SEric Cheng mac_perim_enter_by_mh(mh, mphp); 9308275SEric Cheng MAC_ENCODE_MPH(*mphp, mh, 1); 9318275SEric Cheng return (0); 9328275SEric Cheng } 9338275SEric Cheng 9348275SEric Cheng int 9358275SEric Cheng mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp) 9368275SEric Cheng { 9378275SEric Cheng int err; 9388275SEric Cheng mac_handle_t mh; 9398275SEric Cheng 9408275SEric Cheng if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 9418275SEric Cheng return (err); 9428275SEric Cheng 9438275SEric Cheng mac_perim_enter_by_mh(mh, mphp); 9448275SEric Cheng MAC_ENCODE_MPH(*mphp, mh, 1); 9458275SEric Cheng return (0); 9468275SEric Cheng } 9478275SEric Cheng 9488275SEric Cheng void 9498275SEric Cheng mac_perim_exit(mac_perim_handle_t mph) 9508275SEric Cheng { 9518275SEric Cheng mac_impl_t *mip; 9528275SEric Cheng boolean_t need_close; 9538275SEric Cheng 9548275SEric Cheng MAC_DECODE_MPH(mph, mip, need_close); 9558275SEric Cheng i_mac_perim_exit(mip); 9568275SEric Cheng if (need_close) 9578275SEric Cheng mac_close((mac_handle_t)mip); 9588275SEric Cheng } 9598275SEric Cheng 9608275SEric Cheng int 9615895Syz147064 mac_hold(const char *macname, mac_impl_t **pmip) 9620Sstevel@tonic-gate { 9630Sstevel@tonic-gate mac_impl_t *mip; 9640Sstevel@tonic-gate int err; 9650Sstevel@tonic-gate 9660Sstevel@tonic-gate /* 9670Sstevel@tonic-gate * Check the device name length to make sure it won't overflow our 9680Sstevel@tonic-gate * buffer. 9690Sstevel@tonic-gate */ 9702311Sseb if (strlen(macname) >= MAXNAMELEN) 9710Sstevel@tonic-gate return (EINVAL); 9720Sstevel@tonic-gate 9730Sstevel@tonic-gate /* 9745895Syz147064 * Look up its entry in the global hash table. 9750Sstevel@tonic-gate */ 9765895Syz147064 rw_enter(&i_mac_impl_lock, RW_WRITER); 9775895Syz147064 err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, 9785895Syz147064 (mod_hash_val_t *)&mip); 9795895Syz147064 9805895Syz147064 if (err != 0) { 9815895Syz147064 rw_exit(&i_mac_impl_lock); 9825895Syz147064 return (ENOENT); 9835895Syz147064 } 9845895Syz147064 9858275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) { 9865895Syz147064 rw_exit(&i_mac_impl_lock); 9875895Syz147064 return (ENOENT); 9885895Syz147064 } 9895895Syz147064 9908275SEric Cheng if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) { 9915895Syz147064 rw_exit(&i_mac_impl_lock); 9925895Syz147064 return (EBUSY); 9935895Syz147064 } 9945895Syz147064 9955895Syz147064 mip->mi_ref++; 9965895Syz147064 rw_exit(&i_mac_impl_lock); 9975895Syz147064 9985895Syz147064 *pmip = mip; 9995895Syz147064 return (0); 10005895Syz147064 } 10015895Syz147064 10028275SEric Cheng void 10035895Syz147064 mac_rele(mac_impl_t *mip) 10045895Syz147064 { 10055895Syz147064 rw_enter(&i_mac_impl_lock, RW_WRITER); 10065895Syz147064 ASSERT(mip->mi_ref != 0); 10078275SEric Cheng if (--mip->mi_ref == 0) { 10088275SEric Cheng ASSERT(mip->mi_nactiveclients == 0 && 10098275SEric Cheng !(mip->mi_state_flags & MIS_EXCLUSIVE)); 10105895Syz147064 } 10115895Syz147064 rw_exit(&i_mac_impl_lock); 10125895Syz147064 } 10135895Syz147064 10148275SEric Cheng /* 10158893SMichael.Lim@Sun.COM * Private GLDv3 function to start a MAC instance. 10168275SEric Cheng */ 10175895Syz147064 int 10188893SMichael.Lim@Sun.COM mac_start(mac_handle_t mh) 10190Sstevel@tonic-gate { 10208893SMichael.Lim@Sun.COM mac_impl_t *mip = (mac_impl_t *)mh; 10218275SEric Cheng int err = 0; 10228275SEric Cheng 10238275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 10242311Sseb ASSERT(mip->mi_start != NULL); 10250Sstevel@tonic-gate 10260Sstevel@tonic-gate /* 10270Sstevel@tonic-gate * Check whether the device is already started. 10280Sstevel@tonic-gate */ 10298275SEric Cheng if (mip->mi_active++ == 0) { 10308275SEric Cheng mac_ring_t *ring = NULL; 10318275SEric Cheng 10328275SEric Cheng /* 10338275SEric Cheng * Start the device. 10348275SEric Cheng */ 10358275SEric Cheng err = mip->mi_start(mip->mi_driver); 10368275SEric Cheng if (err != 0) { 10378275SEric Cheng mip->mi_active--; 10388275SEric Cheng return (err); 10398275SEric Cheng } 10408275SEric Cheng 10410Sstevel@tonic-gate /* 10428275SEric Cheng * Start the default tx ring. 10430Sstevel@tonic-gate */ 10448275SEric Cheng if (mip->mi_default_tx_ring != NULL) { 10458275SEric Cheng 10468275SEric Cheng ring = (mac_ring_t *)mip->mi_default_tx_ring; 10478275SEric Cheng err = mac_start_ring(ring); 10488275SEric Cheng if (err != 0) { 10498275SEric Cheng mip->mi_active--; 10508275SEric Cheng return (err); 10518275SEric Cheng } 10528275SEric Cheng ring->mr_state = MR_INUSE; 10538275SEric Cheng } 10548275SEric Cheng 10558275SEric Cheng if (mip->mi_rx_groups != NULL) { 10568275SEric Cheng /* 10578275SEric Cheng * Start the default ring, since it will be needed 10588275SEric Cheng * to receive broadcast and multicast traffic for 10598275SEric Cheng * both primary and non-primary MAC clients. 10608275SEric Cheng */ 10618275SEric Cheng mac_group_t *grp = &mip->mi_rx_groups[0]; 10628275SEric Cheng 10638275SEric Cheng ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 10648275SEric Cheng err = mac_start_group_and_rings(grp); 10658275SEric Cheng if (err != 0) { 10668275SEric Cheng mip->mi_active--; 10678275SEric Cheng if (ring != NULL) { 10688275SEric Cheng mac_stop_ring(ring); 10698275SEric Cheng ring->mr_state = MR_FREE; 10708275SEric Cheng } 10718275SEric Cheng return (err); 10728275SEric Cheng } 10738275SEric Cheng mac_set_rx_group_state(grp, MAC_GROUP_STATE_SHARED); 10748275SEric Cheng } 10750Sstevel@tonic-gate } 10760Sstevel@tonic-gate 10770Sstevel@tonic-gate return (err); 10780Sstevel@tonic-gate } 10790Sstevel@tonic-gate 10808275SEric Cheng /* 10818893SMichael.Lim@Sun.COM * Private GLDv3 function to stop a MAC instance. 10828275SEric Cheng */ 10830Sstevel@tonic-gate void 10848893SMichael.Lim@Sun.COM mac_stop(mac_handle_t mh) 10850Sstevel@tonic-gate { 10868893SMichael.Lim@Sun.COM mac_impl_t *mip = (mac_impl_t *)mh; 10878893SMichael.Lim@Sun.COM 10882311Sseb ASSERT(mip->mi_stop != NULL); 10898275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 10900Sstevel@tonic-gate 10910Sstevel@tonic-gate /* 10920Sstevel@tonic-gate * Check whether the device is still needed. 10930Sstevel@tonic-gate */ 10940Sstevel@tonic-gate ASSERT(mip->mi_active != 0); 10958275SEric Cheng if (--mip->mi_active == 0) { 10968275SEric Cheng if (mip->mi_rx_groups != NULL) { 10970Sstevel@tonic-gate /* 10988275SEric Cheng * There should be no more active clients since the 10998275SEric Cheng * MAC is being stopped. Stop the default RX group 11008275SEric Cheng * and transition it back to registered state. 11010Sstevel@tonic-gate */ 11028275SEric Cheng mac_group_t *grp = &mip->mi_rx_groups[0]; 11030Sstevel@tonic-gate 11040Sstevel@tonic-gate /* 11058275SEric Cheng * When clients are torn down, the groups 11068275SEric Cheng * are release via mac_release_rx_group which 11078275SEric Cheng * knows the the default group is always in 11088275SEric Cheng * started mode since broadcast uses it. So 11098275SEric Cheng * we can assert that their are no clients 11108275SEric Cheng * (since mac_bcast_add doesn't register itself 11118275SEric Cheng * as a client) and group is in SHARED state. 11120Sstevel@tonic-gate */ 11138275SEric Cheng ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); 11148275SEric Cheng ASSERT(MAC_RX_GROUP_NO_CLIENT(grp) && 11158275SEric Cheng mip->mi_nactiveclients == 0); 11168275SEric Cheng mac_stop_group_and_rings(grp); 11178275SEric Cheng mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); 11180Sstevel@tonic-gate } 11198275SEric Cheng 11208275SEric Cheng if (mip->mi_default_tx_ring != NULL) { 11218275SEric Cheng mac_ring_t *ring; 11228275SEric Cheng 11238275SEric Cheng ring = (mac_ring_t *)mip->mi_default_tx_ring; 11248275SEric Cheng mac_stop_ring(ring); 11258275SEric Cheng ring->mr_state = MR_FREE; 11268275SEric Cheng } 11278275SEric Cheng 11288275SEric Cheng /* 11298275SEric Cheng * Stop the device. 11308275SEric Cheng */ 11318275SEric Cheng mip->mi_stop(mip->mi_driver); 11322331Skrgopi } 11332331Skrgopi } 11342331Skrgopi 11350Sstevel@tonic-gate int 11368275SEric Cheng i_mac_promisc_set(mac_impl_t *mip, boolean_t on, mac_promisc_type_t ptype) 11370Sstevel@tonic-gate { 11380Sstevel@tonic-gate int err = 0; 11390Sstevel@tonic-gate 11408275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 11412311Sseb ASSERT(mip->mi_setpromisc != NULL); 11420Sstevel@tonic-gate ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC); 11430Sstevel@tonic-gate 11440Sstevel@tonic-gate /* 11450Sstevel@tonic-gate * Determine whether we should enable or disable promiscuous mode. 11460Sstevel@tonic-gate * For details on the distinction between "device promiscuous mode" 11470Sstevel@tonic-gate * and "MAC promiscuous mode", see PSARC/2005/289. 11480Sstevel@tonic-gate */ 11490Sstevel@tonic-gate if (on) { 11500Sstevel@tonic-gate /* 11510Sstevel@tonic-gate * Enable promiscuous mode on the device if not yet enabled. 11520Sstevel@tonic-gate */ 11530Sstevel@tonic-gate if (mip->mi_devpromisc++ == 0) { 11542311Sseb err = mip->mi_setpromisc(mip->mi_driver, B_TRUE); 11552311Sseb if (err != 0) { 11560Sstevel@tonic-gate mip->mi_devpromisc--; 11578275SEric Cheng return (err); 11580Sstevel@tonic-gate } 11590Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 11600Sstevel@tonic-gate } 11610Sstevel@tonic-gate 11620Sstevel@tonic-gate /* 11630Sstevel@tonic-gate * Enable promiscuous mode on the MAC if not yet enabled. 11640Sstevel@tonic-gate */ 11650Sstevel@tonic-gate if (ptype == MAC_PROMISC && mip->mi_promisc++ == 0) 11660Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_PROMISC); 11670Sstevel@tonic-gate } else { 11688275SEric Cheng if (mip->mi_devpromisc == 0) 11698275SEric Cheng return (EPROTO); 11708275SEric Cheng 11710Sstevel@tonic-gate /* 11720Sstevel@tonic-gate * Disable promiscuous mode on the device if this is the last 11730Sstevel@tonic-gate * enabling. 11740Sstevel@tonic-gate */ 11750Sstevel@tonic-gate if (--mip->mi_devpromisc == 0) { 11762311Sseb err = mip->mi_setpromisc(mip->mi_driver, B_FALSE); 11772311Sseb if (err != 0) { 11780Sstevel@tonic-gate mip->mi_devpromisc++; 11798275SEric Cheng return (err); 11800Sstevel@tonic-gate } 11810Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 11820Sstevel@tonic-gate } 11830Sstevel@tonic-gate 11840Sstevel@tonic-gate /* 11850Sstevel@tonic-gate * Disable promiscuous mode on the MAC if this is the last 11860Sstevel@tonic-gate * enabling. 11870Sstevel@tonic-gate */ 11880Sstevel@tonic-gate if (ptype == MAC_PROMISC && --mip->mi_promisc == 0) 11890Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_PROMISC); 11900Sstevel@tonic-gate } 11910Sstevel@tonic-gate 11928275SEric Cheng return (0); 11930Sstevel@tonic-gate } 11940Sstevel@tonic-gate 11958275SEric Cheng int 11968275SEric Cheng mac_promisc_set(mac_handle_t mh, boolean_t on, mac_promisc_type_t ptype) 11978275SEric Cheng { 11988275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 11998275SEric Cheng int rv; 12008275SEric Cheng 12018275SEric Cheng i_mac_perim_enter(mip); 12028275SEric Cheng rv = i_mac_promisc_set(mip, on, ptype); 12039044SGirish.Moodalbail@Sun.COM if (rv != 0 && !on) { 12049044SGirish.Moodalbail@Sun.COM cmn_err(CE_WARN, "%s: failed to switch OFF promiscuous mode " 12059044SGirish.Moodalbail@Sun.COM "because of error 0x%x", mip->mi_name, rv); 12069044SGirish.Moodalbail@Sun.COM rv = 0; 12079044SGirish.Moodalbail@Sun.COM } 12088275SEric Cheng i_mac_perim_exit(mip); 12098275SEric Cheng return (rv); 12108275SEric Cheng } 12118275SEric Cheng 12128275SEric Cheng /* 12138275SEric Cheng * The promiscuity state can change any time. If the caller needs to take 12148275SEric Cheng * actions that are atomic with the promiscuity state, then the caller needs 12158275SEric Cheng * to bracket the entire sequence with mac_perim_enter/exit 12168275SEric Cheng */ 12170Sstevel@tonic-gate boolean_t 12180Sstevel@tonic-gate mac_promisc_get(mac_handle_t mh, mac_promisc_type_t ptype) 12190Sstevel@tonic-gate { 12200Sstevel@tonic-gate mac_impl_t *mip = (mac_impl_t *)mh; 12210Sstevel@tonic-gate 12220Sstevel@tonic-gate ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC); 12230Sstevel@tonic-gate 12240Sstevel@tonic-gate /* 12250Sstevel@tonic-gate * Return the current promiscuity. 12260Sstevel@tonic-gate */ 12270Sstevel@tonic-gate if (ptype == MAC_DEVPROMISC) 12280Sstevel@tonic-gate return (mip->mi_devpromisc != 0); 12290Sstevel@tonic-gate else 12300Sstevel@tonic-gate return (mip->mi_promisc != 0); 12310Sstevel@tonic-gate } 12320Sstevel@tonic-gate 12338275SEric Cheng /* 12348275SEric Cheng * Invoked at MAC instance attach time to initialize the list 12358275SEric Cheng * of factory MAC addresses supported by a MAC instance. This function 12368275SEric Cheng * builds a local cache in the mac_impl_t for the MAC addresses 12378275SEric Cheng * supported by the underlying hardware. The MAC clients themselves 12388275SEric Cheng * use the mac_addr_factory*() functions to query and reserve 12398275SEric Cheng * factory MAC addresses. 12408275SEric Cheng */ 12410Sstevel@tonic-gate void 12428275SEric Cheng mac_addr_factory_init(mac_impl_t *mip) 12435903Ssowmini { 12448275SEric Cheng mac_capab_multifactaddr_t capab; 12458275SEric Cheng uint8_t *addr; 12468275SEric Cheng int i; 12470Sstevel@tonic-gate 12480Sstevel@tonic-gate /* 12498275SEric Cheng * First round to see how many factory MAC addresses are available. 12500Sstevel@tonic-gate */ 12518275SEric Cheng bzero(&capab, sizeof (capab)); 12528275SEric Cheng if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR, 12538275SEric Cheng &capab) || (capab.mcm_naddr == 0)) { 12546512Ssowmini /* 12558275SEric Cheng * The MAC instance doesn't support multiple factory 12568275SEric Cheng * MAC addresses, we're done here. 12576512Ssowmini */ 12586512Ssowmini return; 12595903Ssowmini } 12606512Ssowmini 12610Sstevel@tonic-gate /* 12628275SEric Cheng * Allocate the space and get all the factory addresses. 126356Smeem */ 12648275SEric Cheng addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP); 12658275SEric Cheng capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr); 12668275SEric Cheng 12678275SEric Cheng mip->mi_factory_addr_num = capab.mcm_naddr; 12688275SEric Cheng mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num * 12698275SEric Cheng sizeof (mac_factory_addr_t), KM_SLEEP); 12708275SEric Cheng 12718275SEric Cheng for (i = 0; i < capab.mcm_naddr; i++) { 12728275SEric Cheng bcopy(addr + i * MAXMACADDRLEN, 12738275SEric Cheng mip->mi_factory_addr[i].mfa_addr, 12748275SEric Cheng mip->mi_type->mt_addr_length); 12758275SEric Cheng mip->mi_factory_addr[i].mfa_in_use = B_FALSE; 127656Smeem } 127756Smeem 12788275SEric Cheng kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN); 12798275SEric Cheng } 12808275SEric Cheng 12818275SEric Cheng void 12828275SEric Cheng mac_addr_factory_fini(mac_impl_t *mip) 12838275SEric Cheng { 12848275SEric Cheng if (mip->mi_factory_addr == NULL) { 12858275SEric Cheng ASSERT(mip->mi_factory_addr_num == 0); 12868275SEric Cheng return; 12878275SEric Cheng } 12888275SEric Cheng 12898275SEric Cheng kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num * 12908275SEric Cheng sizeof (mac_factory_addr_t)); 12918275SEric Cheng 12928275SEric Cheng mip->mi_factory_addr = NULL; 12938275SEric Cheng mip->mi_factory_addr_num = 0; 12940Sstevel@tonic-gate } 12950Sstevel@tonic-gate 12965084Sjohnlev /* 12978275SEric Cheng * Reserve a factory MAC address. If *slot is set to -1, the function 12988275SEric Cheng * attempts to reserve any of the available factory MAC addresses and 12998275SEric Cheng * returns the reserved slot id. If no slots are available, the function 13008275SEric Cheng * returns ENOSPC. If *slot is not set to -1, the function reserves 13018275SEric Cheng * the specified slot if it is available, or returns EBUSY is the slot 13028275SEric Cheng * is already used. Returns ENOTSUP if the underlying MAC does not 13038275SEric Cheng * support multiple factory addresses. If the slot number is not -1 but 13048275SEric Cheng * is invalid, returns EINVAL. 13058275SEric Cheng */ 13068275SEric Cheng int 13078275SEric Cheng mac_addr_factory_reserve(mac_client_handle_t mch, int *slot) 13088275SEric Cheng { 13098275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 13108275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 13118275SEric Cheng int i, ret = 0; 13128275SEric Cheng 13138275SEric Cheng i_mac_perim_enter(mip); 13148275SEric Cheng /* 13158275SEric Cheng * Protect against concurrent readers that may need a self-consistent 13168275SEric Cheng * view of the factory addresses 13178275SEric Cheng */ 13188275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_WRITER); 13198275SEric Cheng 13208275SEric Cheng if (mip->mi_factory_addr_num == 0) { 13218275SEric Cheng ret = ENOTSUP; 13228275SEric Cheng goto bail; 13238275SEric Cheng } 13248275SEric Cheng 13258275SEric Cheng if (*slot != -1) { 13268275SEric Cheng /* check the specified slot */ 13278275SEric Cheng if (*slot < 1 || *slot > mip->mi_factory_addr_num) { 13288275SEric Cheng ret = EINVAL; 13298275SEric Cheng goto bail; 13308275SEric Cheng } 13318275SEric Cheng if (mip->mi_factory_addr[*slot-1].mfa_in_use) { 13328275SEric Cheng ret = EBUSY; 13338275SEric Cheng goto bail; 13348275SEric Cheng } 13358275SEric Cheng } else { 13368275SEric Cheng /* pick the next available slot */ 13378275SEric Cheng for (i = 0; i < mip->mi_factory_addr_num; i++) { 13388275SEric Cheng if (!mip->mi_factory_addr[i].mfa_in_use) 13398275SEric Cheng break; 13408275SEric Cheng } 13418275SEric Cheng 13428275SEric Cheng if (i == mip->mi_factory_addr_num) { 13438275SEric Cheng ret = ENOSPC; 13448275SEric Cheng goto bail; 13458275SEric Cheng } 13468275SEric Cheng *slot = i+1; 13478275SEric Cheng } 13488275SEric Cheng 13498275SEric Cheng mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE; 13508275SEric Cheng mip->mi_factory_addr[*slot-1].mfa_client = mcip; 13518275SEric Cheng 13528275SEric Cheng bail: 13538275SEric Cheng rw_exit(&mip->mi_rw_lock); 13548275SEric Cheng i_mac_perim_exit(mip); 13558275SEric Cheng return (ret); 13568275SEric Cheng } 13578275SEric Cheng 13588275SEric Cheng /* 13598275SEric Cheng * Release the specified factory MAC address slot. 13605084Sjohnlev */ 13618275SEric Cheng void 13628275SEric Cheng mac_addr_factory_release(mac_client_handle_t mch, uint_t slot) 13638275SEric Cheng { 13648275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 13658275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 13668275SEric Cheng 13678275SEric Cheng i_mac_perim_enter(mip); 13688275SEric Cheng /* 13698275SEric Cheng * Protect against concurrent readers that may need a self-consistent 13708275SEric Cheng * view of the factory addresses 13718275SEric Cheng */ 13728275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_WRITER); 13738275SEric Cheng 13748275SEric Cheng ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 13758275SEric Cheng ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use); 13768275SEric Cheng 13778275SEric Cheng mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE; 13788275SEric Cheng 13798275SEric Cheng rw_exit(&mip->mi_rw_lock); 13808275SEric Cheng i_mac_perim_exit(mip); 13818275SEric Cheng } 13828275SEric Cheng 13838275SEric Cheng /* 13848275SEric Cheng * Stores in mac_addr the value of the specified MAC address. Returns 13858275SEric Cheng * 0 on success, or EINVAL if the slot number is not valid for the MAC. 13868275SEric Cheng * The caller must provide a string of at least MAXNAMELEN bytes. 13878275SEric Cheng */ 13888275SEric Cheng void 13898275SEric Cheng mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr, 13908275SEric Cheng uint_t *addr_len, char *client_name, boolean_t *in_use_arg) 13915084Sjohnlev { 13928275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 13938275SEric Cheng boolean_t in_use; 13948275SEric Cheng 13958275SEric Cheng ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 13968275SEric Cheng 13978275SEric Cheng /* 13988275SEric Cheng * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter 13998275SEric Cheng * and mi_rw_lock 14008275SEric Cheng */ 14018275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_READER); 14028275SEric Cheng bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN); 14038275SEric Cheng *addr_len = mip->mi_type->mt_addr_length; 14048275SEric Cheng in_use = mip->mi_factory_addr[slot-1].mfa_in_use; 14058275SEric Cheng if (in_use && client_name != NULL) { 14068275SEric Cheng bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name, 14078275SEric Cheng client_name, MAXNAMELEN); 14088275SEric Cheng } 14098275SEric Cheng if (in_use_arg != NULL) 14108275SEric Cheng *in_use_arg = in_use; 14118275SEric Cheng rw_exit(&mip->mi_rw_lock); 14128275SEric Cheng } 14138275SEric Cheng 14148275SEric Cheng /* 14158275SEric Cheng * Returns the number of factory MAC addresses (in addition to the 14168275SEric Cheng * primary MAC address), 0 if the underlying MAC doesn't support 14178275SEric Cheng * that feature. 14188275SEric Cheng */ 14198275SEric Cheng uint_t 14208275SEric Cheng mac_addr_factory_num(mac_handle_t mh) 14218275SEric Cheng { 14228275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 14238275SEric Cheng 14248275SEric Cheng return (mip->mi_factory_addr_num); 14258275SEric Cheng } 14268275SEric Cheng 14278275SEric Cheng 14288275SEric Cheng void 14298275SEric Cheng mac_rx_group_unmark(mac_group_t *grp, uint_t flag) 14308275SEric Cheng { 14318275SEric Cheng mac_ring_t *ring; 14328275SEric Cheng 14338275SEric Cheng for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) 14348275SEric Cheng ring->mr_flag &= ~flag; 14355084Sjohnlev } 14365084Sjohnlev 14375084Sjohnlev /* 14388275SEric Cheng * The following mac_hwrings_xxx() functions are private mac client functions 14398275SEric Cheng * used by the aggr driver to access and control the underlying HW Rx group 14408275SEric Cheng * and rings. In this case, the aggr driver has exclusive control of the 14418275SEric Cheng * underlying HW Rx group/rings, it calls the following functions to 14428275SEric Cheng * start/stop the HW Rx rings, disable/enable polling, add/remove mac' 14438275SEric Cheng * addresses, or set up the Rx callback. 14445084Sjohnlev */ 14458275SEric Cheng /* ARGSUSED */ 14468275SEric Cheng static void 14478275SEric Cheng mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs, 14488275SEric Cheng mblk_t *mp_chain, boolean_t loopback) 14490Sstevel@tonic-gate { 14508275SEric Cheng mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)srs; 14518275SEric Cheng mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; 14528275SEric Cheng mac_direct_rx_t proc; 14538275SEric Cheng void *arg1; 14548275SEric Cheng mac_resource_handle_t arg2; 14558275SEric Cheng 14568275SEric Cheng proc = srs_rx->sr_func; 14578275SEric Cheng arg1 = srs_rx->sr_arg1; 14588275SEric Cheng arg2 = mac_srs->srs_mrh; 14598275SEric Cheng 14608275SEric Cheng proc(arg1, arg2, mp_chain, NULL); 14610Sstevel@tonic-gate } 14620Sstevel@tonic-gate 14638275SEric Cheng /* 14648275SEric Cheng * This function is called to get the list of HW rings that are reserved by 14658275SEric Cheng * an exclusive mac client. 14668275SEric Cheng * 14678275SEric Cheng * Return value: the number of HW rings. 14688275SEric Cheng */ 14698275SEric Cheng int 14708275SEric Cheng mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, 14718275SEric Cheng mac_ring_handle_t *hwrh) 14720Sstevel@tonic-gate { 14738275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 14748275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 14758275SEric Cheng mac_group_t *grp = flent->fe_rx_ring_group; 14768275SEric Cheng mac_ring_t *ring; 14778275SEric Cheng int cnt = 0; 14780Sstevel@tonic-gate 14790Sstevel@tonic-gate /* 14808275SEric Cheng * The mac client did not reserve any RX group, return directly. 14818275SEric Cheng * This is probably because the underlying MAC does not support 14828275SEric Cheng * any RX groups. 14838275SEric Cheng */ 14848275SEric Cheng *hwgh = NULL; 14858275SEric Cheng if (grp == NULL) 14868275SEric Cheng return (0); 14878275SEric Cheng 14888275SEric Cheng /* 14898275SEric Cheng * This RX group must be reserved by this mac client. 14900Sstevel@tonic-gate */ 14918275SEric Cheng ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && 14928275SEric Cheng (mch == (mac_client_handle_t)(MAC_RX_GROUP_ONLY_CLIENT(grp)))); 14938275SEric Cheng 14948275SEric Cheng for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) { 14958275SEric Cheng ASSERT(cnt < MAX_RINGS_PER_GROUP); 14968275SEric Cheng hwrh[cnt++] = (mac_ring_handle_t)ring; 14978275SEric Cheng } 14988275SEric Cheng *hwgh = (mac_group_handle_t)grp; 14998275SEric Cheng return (cnt); 15008275SEric Cheng } 15018275SEric Cheng 15028275SEric Cheng /* 15038275SEric Cheng * Setup the RX callback of the mac client which exclusively controls HW ring. 15048275SEric Cheng */ 15058275SEric Cheng void 15068275SEric Cheng mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh) 15078275SEric Cheng { 15088275SEric Cheng mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 15098275SEric Cheng mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 15108275SEric Cheng 15118275SEric Cheng mac_srs->srs_mrh = prh; 15128275SEric Cheng mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; 15130Sstevel@tonic-gate } 15140Sstevel@tonic-gate 15150Sstevel@tonic-gate void 15168275SEric Cheng mac_hwring_teardown(mac_ring_handle_t hwrh) 15178275SEric Cheng { 15188275SEric Cheng mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 15198275SEric Cheng mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 15208275SEric Cheng 15218275SEric Cheng mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; 15228275SEric Cheng mac_srs->srs_mrh = NULL; 15238275SEric Cheng } 15248275SEric Cheng 15258275SEric Cheng int 15268275SEric Cheng mac_hwring_disable_intr(mac_ring_handle_t rh) 15270Sstevel@tonic-gate { 15288275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15298275SEric Cheng mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 15308275SEric Cheng 15318275SEric Cheng return (intr->mi_disable(intr->mi_handle)); 15328275SEric Cheng } 15338275SEric Cheng 15348275SEric Cheng int 15358275SEric Cheng mac_hwring_enable_intr(mac_ring_handle_t rh) 15368275SEric Cheng { 15378275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15388275SEric Cheng mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 15398275SEric Cheng 15408275SEric Cheng return (intr->mi_enable(intr->mi_handle)); 15418275SEric Cheng } 15428275SEric Cheng 15438275SEric Cheng int 15448275SEric Cheng mac_hwring_start(mac_ring_handle_t rh) 15458275SEric Cheng { 15468275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15478275SEric Cheng 15488275SEric Cheng MAC_RING_UNMARK(rr_ring, MR_QUIESCE); 15498275SEric Cheng return (0); 15500Sstevel@tonic-gate } 15510Sstevel@tonic-gate 15520Sstevel@tonic-gate void 15538275SEric Cheng mac_hwring_stop(mac_ring_handle_t rh) 15548275SEric Cheng { 15558275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15568275SEric Cheng 15578275SEric Cheng mac_rx_ring_quiesce(rr_ring, MR_QUIESCE); 15588275SEric Cheng } 15598275SEric Cheng 15608275SEric Cheng mblk_t * 15618275SEric Cheng mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) 15628275SEric Cheng { 15638275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15648275SEric Cheng mac_ring_info_t *info = &rr_ring->mr_info; 15658275SEric Cheng 15668275SEric Cheng return (info->mri_poll(info->mri_driver, bytes_to_pickup)); 15678275SEric Cheng } 15688275SEric Cheng 15698275SEric Cheng int 15708275SEric Cheng mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) 15718275SEric Cheng { 15728275SEric Cheng mac_group_t *group = (mac_group_t *)gh; 15738275SEric Cheng 15748275SEric Cheng return (mac_group_addmac(group, addr)); 15758275SEric Cheng } 15768275SEric Cheng 15778275SEric Cheng int 15788275SEric Cheng mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) 15798275SEric Cheng { 15808275SEric Cheng mac_group_t *group = (mac_group_t *)gh; 15818275SEric Cheng 15828275SEric Cheng return (mac_group_remmac(group, addr)); 15838275SEric Cheng } 15848275SEric Cheng 15858275SEric Cheng /* 15868275SEric Cheng * Set the RX group to be shared/reserved. Note that the group must be 15878275SEric Cheng * started/stopped outside of this function. 15888275SEric Cheng */ 15898275SEric Cheng void 15908275SEric Cheng mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) 15910Sstevel@tonic-gate { 15928275SEric Cheng /* 15938275SEric Cheng * If there is no change in the group state, just return. 15948275SEric Cheng */ 15958275SEric Cheng if (grp->mrg_state == state) 15968275SEric Cheng return; 15978275SEric Cheng 15988275SEric Cheng switch (state) { 15998275SEric Cheng case MAC_GROUP_STATE_RESERVED: 16008275SEric Cheng /* 16018275SEric Cheng * Successfully reserved the group. 16028275SEric Cheng * 16038275SEric Cheng * Given that there is an exclusive client controlling this 16048275SEric Cheng * group, we enable the group level polling when available, 16058275SEric Cheng * so that SRSs get to turn on/off individual rings they's 16068275SEric Cheng * assigned to. 16078275SEric Cheng */ 16088275SEric Cheng ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 16098275SEric Cheng 16108275SEric Cheng if (GROUP_INTR_DISABLE_FUNC(grp) != NULL) 16118275SEric Cheng GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 16128275SEric Cheng 16138275SEric Cheng break; 16148275SEric Cheng 16158275SEric Cheng case MAC_GROUP_STATE_SHARED: 16168275SEric Cheng /* 16178275SEric Cheng * Set all rings of this group to software classified. 16188275SEric Cheng * If the group has an overriding interrupt, then re-enable it. 16198275SEric Cheng */ 16208275SEric Cheng ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 16218275SEric Cheng 16228275SEric Cheng if (GROUP_INTR_ENABLE_FUNC(grp) != NULL) 16238275SEric Cheng GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 16248275SEric Cheng 16258275SEric Cheng /* The ring is not available for reservations any more */ 16268275SEric Cheng break; 16278275SEric Cheng 16288275SEric Cheng case MAC_GROUP_STATE_REGISTERED: 16298275SEric Cheng /* Also callable from mac_register, perim is not held */ 16308275SEric Cheng break; 16318275SEric Cheng 16328275SEric Cheng default: 16338275SEric Cheng ASSERT(B_FALSE); 16348275SEric Cheng break; 16358275SEric Cheng } 16368275SEric Cheng 16378275SEric Cheng grp->mrg_state = state; 16388275SEric Cheng } 16398275SEric Cheng 16408275SEric Cheng /* 16418275SEric Cheng * Quiesce future hardware classified packets for the specified Rx ring 16428275SEric Cheng */ 16438275SEric Cheng static void 16448275SEric Cheng mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag) 16458275SEric Cheng { 16468275SEric Cheng ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER); 16478275SEric Cheng ASSERT(ring_flag == MR_CONDEMNED || ring_flag == MR_QUIESCE); 16488275SEric Cheng 16498275SEric Cheng mutex_enter(&rx_ring->mr_lock); 16508275SEric Cheng rx_ring->mr_flag |= ring_flag; 16518275SEric Cheng while (rx_ring->mr_refcnt != 0) 16528275SEric Cheng cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock); 16538275SEric Cheng mutex_exit(&rx_ring->mr_lock); 16540Sstevel@tonic-gate } 16550Sstevel@tonic-gate 16564913Sethindra /* 16578275SEric Cheng * Please see mac_tx for details about the per cpu locking scheme 16584913Sethindra */ 16598275SEric Cheng static void 16608275SEric Cheng mac_tx_lock_all(mac_client_impl_t *mcip) 16618275SEric Cheng { 16628275SEric Cheng int i; 16638275SEric Cheng 16648275SEric Cheng for (i = 0; i <= mac_tx_percpu_cnt; i++) 16658275SEric Cheng mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 16668275SEric Cheng } 16678275SEric Cheng 16688275SEric Cheng static void 16698275SEric Cheng mac_tx_unlock_all(mac_client_impl_t *mcip) 16708275SEric Cheng { 16718275SEric Cheng int i; 16728275SEric Cheng 16738275SEric Cheng for (i = mac_tx_percpu_cnt; i >= 0; i--) 16748275SEric Cheng mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 16758275SEric Cheng } 16768275SEric Cheng 16778275SEric Cheng static void 16788275SEric Cheng mac_tx_unlock_allbutzero(mac_client_impl_t *mcip) 16798275SEric Cheng { 16808275SEric Cheng int i; 16818275SEric Cheng 16828275SEric Cheng for (i = mac_tx_percpu_cnt; i > 0; i--) 16838275SEric Cheng mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 16848275SEric Cheng } 16858275SEric Cheng 16868275SEric Cheng static int 16878275SEric Cheng mac_tx_sum_refcnt(mac_client_impl_t *mcip) 16880Sstevel@tonic-gate { 16898275SEric Cheng int i; 16908275SEric Cheng int refcnt = 0; 16918275SEric Cheng 16928275SEric Cheng for (i = 0; i <= mac_tx_percpu_cnt; i++) 16938275SEric Cheng refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt; 16948275SEric Cheng 16958275SEric Cheng return (refcnt); 16960Sstevel@tonic-gate } 16970Sstevel@tonic-gate 16988275SEric Cheng /* 16998275SEric Cheng * Stop future Tx packets coming down from the client in preparation for 17008275SEric Cheng * quiescing the Tx side. This is needed for dynamic reclaim and reassignment 17018275SEric Cheng * of rings between clients 17028275SEric Cheng */ 17038275SEric Cheng void 17048275SEric Cheng mac_tx_client_block(mac_client_impl_t *mcip) 17055084Sjohnlev { 17068275SEric Cheng mac_tx_lock_all(mcip); 17078275SEric Cheng mcip->mci_tx_flag |= MCI_TX_QUIESCE; 17088275SEric Cheng while (mac_tx_sum_refcnt(mcip) != 0) { 17098275SEric Cheng mac_tx_unlock_allbutzero(mcip); 17108275SEric Cheng cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock); 17118275SEric Cheng mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock); 17128275SEric Cheng mac_tx_lock_all(mcip); 17138275SEric Cheng } 17148275SEric Cheng mac_tx_unlock_all(mcip); 17155084Sjohnlev } 17165084Sjohnlev 17178275SEric Cheng void 17188275SEric Cheng mac_tx_client_unblock(mac_client_impl_t *mcip) 17195084Sjohnlev { 17208275SEric Cheng mac_tx_lock_all(mcip); 17218275SEric Cheng mcip->mci_tx_flag &= ~MCI_TX_QUIESCE; 17228275SEric Cheng mac_tx_unlock_all(mcip); 17238833SVenu.Iyer@Sun.COM /* 17248833SVenu.Iyer@Sun.COM * We may fail to disable flow control for the last MAC_NOTE_TX 17258833SVenu.Iyer@Sun.COM * notification because the MAC client is quiesced. Send the 17268833SVenu.Iyer@Sun.COM * notification again. 17278833SVenu.Iyer@Sun.COM */ 17288833SVenu.Iyer@Sun.COM i_mac_notify(mcip->mci_mip, MAC_NOTE_TX); 17295084Sjohnlev } 17305084Sjohnlev 17310Sstevel@tonic-gate /* 17328275SEric Cheng * Wait for an SRS to quiesce. The SRS worker will signal us when the 17338275SEric Cheng * quiesce is done. 17348275SEric Cheng */ 17358275SEric Cheng static void 17368275SEric Cheng mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag) 17378275SEric Cheng { 17388275SEric Cheng mutex_enter(&srs->srs_lock); 17398275SEric Cheng while (!(srs->srs_state & srs_flag)) 17408275SEric Cheng cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock); 17418275SEric Cheng mutex_exit(&srs->srs_lock); 17428275SEric Cheng } 17438275SEric Cheng 17448275SEric Cheng /* 17458275SEric Cheng * Quiescing an Rx SRS is achieved by the following sequence. The protocol 17468275SEric Cheng * works bottom up by cutting off packet flow from the bottommost point in the 17478275SEric Cheng * mac, then the SRS, and then the soft rings. There are 2 use cases of this 17488275SEric Cheng * mechanism. One is a temporary quiesce of the SRS, such as say while changing 17498275SEric Cheng * the Rx callbacks. Another use case is Rx SRS teardown. In the former case 17508275SEric Cheng * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used 17518275SEric Cheng * for the SRS and MR flags. In the former case the threads pause waiting for 17528275SEric Cheng * a restart, while in the latter case the threads exit. The Tx SRS teardown 17538275SEric Cheng * is also mostly similar to the above. 17548275SEric Cheng * 17558275SEric Cheng * 1. Stop future hardware classified packets at the lowest level in the mac. 17568275SEric Cheng * Remove any hardware classification rule (CONDEMNED case) and mark the 17578275SEric Cheng * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt 17588275SEric Cheng * from increasing. Upcalls from the driver that come through hardware 17598275SEric Cheng * classification will be dropped in mac_rx from now on. Then we wait for 17608275SEric Cheng * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are 17618275SEric Cheng * sure there aren't any upcall threads from the driver through hardware 17628275SEric Cheng * classification. In the case of SRS teardown we also remove the 17638275SEric Cheng * classification rule in the driver. 17648275SEric Cheng * 17658275SEric Cheng * 2. Stop future software classified packets by marking the flow entry with 17668275SEric Cheng * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from 17678275SEric Cheng * increasing. We also remove the flow entry from the table in the latter 17688275SEric Cheng * case. Then wait for the fe_refcnt to reach an appropriate quiescent value 17698275SEric Cheng * that indicates there aren't any active threads using that flow entry. 17708275SEric Cheng * 17718275SEric Cheng * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread, 17728275SEric Cheng * SRS worker thread, and the soft ring threads are quiesced in sequence 17738275SEric Cheng * with the SRS worker thread serving as a master controller. This 17748275SEric Cheng * mechansim is explained in mac_srs_worker_quiesce(). 17758275SEric Cheng * 17768275SEric Cheng * The restart mechanism to reactivate the SRS and softrings is explained 17778275SEric Cheng * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the 17788275SEric Cheng * restart sequence. 17790Sstevel@tonic-gate */ 17800Sstevel@tonic-gate void 17818275SEric Cheng mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 17820Sstevel@tonic-gate { 17838275SEric Cheng flow_entry_t *flent = srs->srs_flent; 17848275SEric Cheng uint_t mr_flag, srs_done_flag; 17858275SEric Cheng 17868275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 17878275SEric Cheng ASSERT(!(srs->srs_type & SRST_TX)); 17888275SEric Cheng 17898275SEric Cheng if (srs_quiesce_flag == SRS_CONDEMNED) { 17908275SEric Cheng mr_flag = MR_CONDEMNED; 17918275SEric Cheng srs_done_flag = SRS_CONDEMNED_DONE; 17928275SEric Cheng if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 17938275SEric Cheng mac_srs_client_poll_disable(srs->srs_mcip, srs); 17948275SEric Cheng } else { 17958275SEric Cheng ASSERT(srs_quiesce_flag == SRS_QUIESCE); 17968275SEric Cheng mr_flag = MR_QUIESCE; 17978275SEric Cheng srs_done_flag = SRS_QUIESCE_DONE; 17988275SEric Cheng if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 17998275SEric Cheng mac_srs_client_poll_quiesce(srs->srs_mcip, srs); 18008275SEric Cheng } 18018275SEric Cheng 18028275SEric Cheng if (srs->srs_ring != NULL) { 18038275SEric Cheng mac_rx_ring_quiesce(srs->srs_ring, mr_flag); 18048275SEric Cheng } else { 18058275SEric Cheng /* 18068275SEric Cheng * SRS is driven by software classification. In case 18078275SEric Cheng * of CONDEMNED, the top level teardown functions will 18088275SEric Cheng * deal with flow removal. 18098275SEric Cheng */ 18108275SEric Cheng if (srs_quiesce_flag != SRS_CONDEMNED) { 18118275SEric Cheng FLOW_MARK(flent, FE_QUIESCE); 18128275SEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 18138275SEric Cheng } 18148275SEric Cheng } 18150Sstevel@tonic-gate 18160Sstevel@tonic-gate /* 18178275SEric Cheng * Signal the SRS to quiesce itself, and then cv_wait for the 18188275SEric Cheng * SRS quiesce to complete. The SRS worker thread will wake us 18198275SEric Cheng * up when the quiesce is complete 18204913Sethindra */ 18218275SEric Cheng mac_srs_signal(srs, srs_quiesce_flag); 18228275SEric Cheng mac_srs_quiesce_wait(srs, srs_done_flag); 18234913Sethindra } 18244913Sethindra 18254913Sethindra /* 18268275SEric Cheng * Remove an SRS. 18274913Sethindra */ 18284913Sethindra void 18298275SEric Cheng mac_rx_srs_remove(mac_soft_ring_set_t *srs) 18304913Sethindra { 18318275SEric Cheng flow_entry_t *flent = srs->srs_flent; 18328275SEric Cheng int i; 18338275SEric Cheng 18348275SEric Cheng mac_rx_srs_quiesce(srs, SRS_CONDEMNED); 18358275SEric Cheng /* 18368275SEric Cheng * Locate and remove our entry in the fe_rx_srs[] array, and 18378275SEric Cheng * adjust the fe_rx_srs array entries and array count by 18388275SEric Cheng * moving the last entry into the vacated spot. 18398275SEric Cheng */ 18408275SEric Cheng mutex_enter(&flent->fe_lock); 18418275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 18428275SEric Cheng if (flent->fe_rx_srs[i] == srs) 18438275SEric Cheng break; 18444913Sethindra } 18458275SEric Cheng 18468275SEric Cheng ASSERT(i != 0 && i < flent->fe_rx_srs_cnt); 18478275SEric Cheng if (i != flent->fe_rx_srs_cnt - 1) { 18488275SEric Cheng flent->fe_rx_srs[i] = 18498275SEric Cheng flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1]; 18508275SEric Cheng i = flent->fe_rx_srs_cnt - 1; 18518275SEric Cheng } 18528275SEric Cheng 18538275SEric Cheng flent->fe_rx_srs[i] = NULL; 18548275SEric Cheng flent->fe_rx_srs_cnt--; 18558275SEric Cheng mutex_exit(&flent->fe_lock); 18568275SEric Cheng 18578275SEric Cheng mac_srs_free(srs); 18580Sstevel@tonic-gate } 18590Sstevel@tonic-gate 18608275SEric Cheng static void 18618275SEric Cheng mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag) 18620Sstevel@tonic-gate { 18638275SEric Cheng mutex_enter(&srs->srs_lock); 18648275SEric Cheng srs->srs_state &= ~flag; 18658275SEric Cheng mutex_exit(&srs->srs_lock); 18668275SEric Cheng } 18678275SEric Cheng 18688275SEric Cheng void 18698275SEric Cheng mac_rx_srs_restart(mac_soft_ring_set_t *srs) 18708275SEric Cheng { 18718275SEric Cheng flow_entry_t *flent = srs->srs_flent; 18728275SEric Cheng mac_ring_t *mr; 18738275SEric Cheng 18748275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 18758275SEric Cheng ASSERT((srs->srs_type & SRST_TX) == 0); 18760Sstevel@tonic-gate 18770Sstevel@tonic-gate /* 18788275SEric Cheng * This handles a change in the number of SRSs between the quiesce and 18798275SEric Cheng * and restart operation of a flow. 18808275SEric Cheng */ 18818275SEric Cheng if (!SRS_QUIESCED(srs)) 18828275SEric Cheng return; 18838275SEric Cheng 18848275SEric Cheng /* 18858275SEric Cheng * Signal the SRS to restart itself. Wait for the restart to complete 18868275SEric Cheng * Note that we only restart the SRS if it is not marked as 18878275SEric Cheng * permanently quiesced. 18880Sstevel@tonic-gate */ 18898275SEric Cheng if (!SRS_QUIESCED_PERMANENT(srs)) { 18908275SEric Cheng mac_srs_signal(srs, SRS_RESTART); 18918275SEric Cheng mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 18928275SEric Cheng mac_srs_clear_flag(srs, SRS_RESTART_DONE); 18938275SEric Cheng 18948275SEric Cheng mac_srs_client_poll_restart(srs->srs_mcip, srs); 18958275SEric Cheng } 18968275SEric Cheng 18978275SEric Cheng /* Finally clear the flags to let the packets in */ 18988275SEric Cheng mr = srs->srs_ring; 18998275SEric Cheng if (mr != NULL) { 19008275SEric Cheng MAC_RING_UNMARK(mr, MR_QUIESCE); 19018275SEric Cheng /* In case the ring was stopped, safely restart it */ 19028275SEric Cheng (void) mac_start_ring(mr); 19038275SEric Cheng } else { 19048275SEric Cheng FLOW_UNMARK(flent, FE_QUIESCE); 19058275SEric Cheng } 19068275SEric Cheng } 19078275SEric Cheng 19088275SEric Cheng /* 19098275SEric Cheng * Temporary quiesce of a flow and associated Rx SRS. 19108275SEric Cheng * Please see block comment above mac_rx_classify_flow_rem. 19118275SEric Cheng */ 19128275SEric Cheng /* ARGSUSED */ 19138275SEric Cheng int 19148275SEric Cheng mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg) 19158275SEric Cheng { 19168275SEric Cheng int i; 19178275SEric Cheng 19188275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 19198275SEric Cheng mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i], 19208275SEric Cheng SRS_QUIESCE); 19218275SEric Cheng } 19228275SEric Cheng return (0); 19230Sstevel@tonic-gate } 19240Sstevel@tonic-gate 19250Sstevel@tonic-gate /* 19268275SEric Cheng * Restart a flow and associated Rx SRS that has been quiesced temporarily 19278275SEric Cheng * Please see block comment above mac_rx_classify_flow_rem 19280Sstevel@tonic-gate */ 19298275SEric Cheng /* ARGSUSED */ 19308275SEric Cheng int 19318275SEric Cheng mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg) 19328275SEric Cheng { 19338275SEric Cheng int i; 19348275SEric Cheng 19358275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) 19368275SEric Cheng mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]); 19378275SEric Cheng 19388275SEric Cheng return (0); 19398275SEric Cheng } 19408275SEric Cheng 19410Sstevel@tonic-gate void 19428275SEric Cheng mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on) 19430Sstevel@tonic-gate { 19448275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 19458275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 19468275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 19478275SEric Cheng mac_soft_ring_set_t *mac_srs; 19488275SEric Cheng int i; 19498275SEric Cheng 19508275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 19518275SEric Cheng 19528275SEric Cheng if (flent == NULL) 19538275SEric Cheng return; 19548275SEric Cheng 19558275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 19568275SEric Cheng mac_srs = flent->fe_rx_srs[i]; 19578275SEric Cheng mutex_enter(&mac_srs->srs_lock); 19588275SEric Cheng if (on) 19598275SEric Cheng mac_srs->srs_state |= SRS_QUIESCE_PERM; 19608275SEric Cheng else 19618275SEric Cheng mac_srs->srs_state &= ~SRS_QUIESCE_PERM; 19628275SEric Cheng mutex_exit(&mac_srs->srs_lock); 19630Sstevel@tonic-gate } 19648275SEric Cheng } 19658275SEric Cheng 19668275SEric Cheng void 19678275SEric Cheng mac_rx_client_quiesce(mac_client_handle_t mch) 19688275SEric Cheng { 19698275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 19708275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 19718275SEric Cheng 19728275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 19738275SEric Cheng 19748275SEric Cheng if (MCIP_DATAPATH_SETUP(mcip)) { 19758275SEric Cheng (void) mac_rx_classify_flow_quiesce(mcip->mci_flent, 19768275SEric Cheng NULL); 19778275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 19788275SEric Cheng mac_rx_classify_flow_quiesce, NULL); 19798275SEric Cheng } 19800Sstevel@tonic-gate } 19810Sstevel@tonic-gate 19820Sstevel@tonic-gate void 19838275SEric Cheng mac_rx_client_restart(mac_client_handle_t mch) 19840Sstevel@tonic-gate { 19858275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 19868275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 19878275SEric Cheng 19888275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 19898275SEric Cheng 19908275SEric Cheng if (MCIP_DATAPATH_SETUP(mcip)) { 19918275SEric Cheng (void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL); 19928275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 19938275SEric Cheng mac_rx_classify_flow_restart, NULL); 19948275SEric Cheng } 19958275SEric Cheng } 19968275SEric Cheng 19978275SEric Cheng /* 19988275SEric Cheng * This function only quiesces the Tx SRS and softring worker threads. Callers 19998275SEric Cheng * need to make sure that there aren't any mac client threads doing current or 20008275SEric Cheng * future transmits in the mac before calling this function. 20018275SEric Cheng */ 20028275SEric Cheng void 20038275SEric Cheng mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 20048275SEric Cheng { 20058275SEric Cheng mac_client_impl_t *mcip = srs->srs_mcip; 20068275SEric Cheng 20078275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20088275SEric Cheng 20098275SEric Cheng ASSERT(srs->srs_type & SRST_TX); 20108275SEric Cheng ASSERT(srs_quiesce_flag == SRS_CONDEMNED || 20118275SEric Cheng srs_quiesce_flag == SRS_QUIESCE); 20120Sstevel@tonic-gate 20130Sstevel@tonic-gate /* 20148275SEric Cheng * Signal the SRS to quiesce itself, and then cv_wait for the 20158275SEric Cheng * SRS quiesce to complete. The SRS worker thread will wake us 20168275SEric Cheng * up when the quiesce is complete 20170Sstevel@tonic-gate */ 20188275SEric Cheng mac_srs_signal(srs, srs_quiesce_flag); 20198275SEric Cheng mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ? 20208275SEric Cheng SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE); 20218275SEric Cheng } 20228275SEric Cheng 20238275SEric Cheng void 20248275SEric Cheng mac_tx_srs_restart(mac_soft_ring_set_t *srs) 20258275SEric Cheng { 20268275SEric Cheng /* 20278275SEric Cheng * Resizing the fanout could result in creation of new SRSs. 20288275SEric Cheng * They may not necessarily be in the quiesced state in which 20298275SEric Cheng * case it need be restarted 20308275SEric Cheng */ 20318275SEric Cheng if (!SRS_QUIESCED(srs)) 20328275SEric Cheng return; 20338275SEric Cheng 20348275SEric Cheng mac_srs_signal(srs, SRS_RESTART); 20358275SEric Cheng mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 20368275SEric Cheng mac_srs_clear_flag(srs, SRS_RESTART_DONE); 20370Sstevel@tonic-gate } 20380Sstevel@tonic-gate 20390Sstevel@tonic-gate /* 20408275SEric Cheng * Temporary quiesce of a flow and associated Rx SRS. 20418275SEric Cheng * Please see block comment above mac_rx_srs_quiesce 20420Sstevel@tonic-gate */ 20438275SEric Cheng /* ARGSUSED */ 20448275SEric Cheng int 20458275SEric Cheng mac_tx_flow_quiesce(flow_entry_t *flent, void *arg) 20460Sstevel@tonic-gate { 20472311Sseb /* 20488275SEric Cheng * The fe_tx_srs is null for a subflow on an interface that is 20498275SEric Cheng * not plumbed 20502311Sseb */ 20518275SEric Cheng if (flent->fe_tx_srs != NULL) 20528275SEric Cheng mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE); 20538275SEric Cheng return (0); 20548275SEric Cheng } 20558275SEric Cheng 20568275SEric Cheng /* ARGSUSED */ 20578275SEric Cheng int 20588275SEric Cheng mac_tx_flow_restart(flow_entry_t *flent, void *arg) 20598275SEric Cheng { 20608275SEric Cheng /* 20618275SEric Cheng * The fe_tx_srs is null for a subflow on an interface that is 20628275SEric Cheng * not plumbed 20638275SEric Cheng */ 20648275SEric Cheng if (flent->fe_tx_srs != NULL) 20658275SEric Cheng mac_tx_srs_restart(flent->fe_tx_srs); 20668275SEric Cheng return (0); 20672311Sseb } 20682311Sseb 20692311Sseb void 20708275SEric Cheng mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag) 20718275SEric Cheng { 20728275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20738275SEric Cheng 20748275SEric Cheng mac_tx_client_block(mcip); 20758275SEric Cheng if (MCIP_TX_SRS(mcip) != NULL) { 20768275SEric Cheng mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag); 20778275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 20788275SEric Cheng mac_tx_flow_quiesce, NULL); 20798275SEric Cheng } 20808275SEric Cheng } 20818275SEric Cheng 20828275SEric Cheng void 20838275SEric Cheng mac_tx_client_restart(mac_client_impl_t *mcip) 20842311Sseb { 20858275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20868275SEric Cheng 20878275SEric Cheng mac_tx_client_unblock(mcip); 20888275SEric Cheng if (MCIP_TX_SRS(mcip) != NULL) { 20898275SEric Cheng mac_tx_srs_restart(MCIP_TX_SRS(mcip)); 20908275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 20918275SEric Cheng mac_tx_flow_restart, NULL); 20928275SEric Cheng } 20938275SEric Cheng } 20948275SEric Cheng 20958275SEric Cheng void 20968275SEric Cheng mac_tx_client_flush(mac_client_impl_t *mcip) 20978275SEric Cheng { 20988275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20998275SEric Cheng 21008275SEric Cheng mac_tx_client_quiesce(mcip, SRS_QUIESCE); 21018275SEric Cheng mac_tx_client_restart(mcip); 21028275SEric Cheng } 21038275SEric Cheng 21048275SEric Cheng void 21058275SEric Cheng mac_client_quiesce(mac_client_impl_t *mcip) 21068275SEric Cheng { 21078275SEric Cheng mac_rx_client_quiesce((mac_client_handle_t)mcip); 21088275SEric Cheng mac_tx_client_quiesce(mcip, SRS_QUIESCE); 21098275SEric Cheng } 21108275SEric Cheng 21118275SEric Cheng void 21128275SEric Cheng mac_client_restart(mac_client_impl_t *mcip) 21138275SEric Cheng { 21148275SEric Cheng mac_rx_client_restart((mac_client_handle_t)mcip); 21158275SEric Cheng mac_tx_client_restart(mcip); 21162311Sseb } 21172311Sseb 21182311Sseb /* 21195895Syz147064 * Allocate a minor number. 21205895Syz147064 */ 21215895Syz147064 minor_t 21225895Syz147064 mac_minor_hold(boolean_t sleep) 21235895Syz147064 { 21245895Syz147064 minor_t minor; 21255895Syz147064 21265895Syz147064 /* 21275895Syz147064 * Grab a value from the arena. 21285895Syz147064 */ 21295895Syz147064 atomic_add_32(&minor_count, 1); 21305895Syz147064 21315895Syz147064 if (sleep) 21325895Syz147064 minor = (uint_t)id_alloc(minor_ids); 21335895Syz147064 else 21345895Syz147064 minor = (uint_t)id_alloc_nosleep(minor_ids); 21355895Syz147064 21365895Syz147064 if (minor == 0) { 21375895Syz147064 atomic_add_32(&minor_count, -1); 21385895Syz147064 return (0); 21395895Syz147064 } 21405895Syz147064 21415895Syz147064 return (minor); 21425895Syz147064 } 21435895Syz147064 21445895Syz147064 /* 21455895Syz147064 * Release a previously allocated minor number. 21465895Syz147064 */ 21475895Syz147064 void 21485895Syz147064 mac_minor_rele(minor_t minor) 21495895Syz147064 { 21505895Syz147064 /* 21515895Syz147064 * Return the value to the arena. 21525895Syz147064 */ 21535895Syz147064 id_free(minor_ids, minor); 21545895Syz147064 atomic_add_32(&minor_count, -1); 21555895Syz147064 } 21565895Syz147064 21575895Syz147064 uint32_t 21585895Syz147064 mac_no_notification(mac_handle_t mh) 21595895Syz147064 { 21605895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 21619073SCathy.Zhou@Sun.COM 21629073SCathy.Zhou@Sun.COM return (((mip->mi_state_flags & MIS_LEGACY) != 0) ? 21639073SCathy.Zhou@Sun.COM mip->mi_capab_legacy.ml_unsup_note : 0); 21645895Syz147064 } 21655895Syz147064 21665895Syz147064 /* 21678275SEric Cheng * Prevent any new opens of this mac in preparation for unregister 21682311Sseb */ 21692311Sseb int 21708275SEric Cheng i_mac_disable(mac_impl_t *mip) 21712311Sseb { 21728275SEric Cheng mac_client_impl_t *mcip; 21738275SEric Cheng 21748275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 21758275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) { 21768275SEric Cheng /* Already disabled, return success */ 21778275SEric Cheng rw_exit(&i_mac_impl_lock); 21788275SEric Cheng return (0); 21795895Syz147064 } 21802311Sseb /* 21818275SEric Cheng * See if there are any other references to this mac_t (e.g., VLAN's). 21828275SEric Cheng * If so return failure. If all the other checks below pass, then 21838275SEric Cheng * set mi_disabled atomically under the i_mac_impl_lock to prevent 21848275SEric Cheng * any new VLAN's from being created or new mac client opens of this 21858275SEric Cheng * mac end point. 21862311Sseb */ 21878275SEric Cheng if (mip->mi_ref > 0) { 21888275SEric Cheng rw_exit(&i_mac_impl_lock); 21898275SEric Cheng return (EBUSY); 21902311Sseb } 21912311Sseb 21922311Sseb /* 21938275SEric Cheng * mac clients must delete all multicast groups they join before 21948275SEric Cheng * closing. bcast groups are reference counted, the last client 21958275SEric Cheng * to delete the group will wait till the group is physically 21968275SEric Cheng * deleted. Since all clients have closed this mac end point 21978275SEric Cheng * mi_bcast_ngrps must be zero at this point 21982311Sseb */ 21998275SEric Cheng ASSERT(mip->mi_bcast_ngrps == 0); 22005009Sgd78059 22015009Sgd78059 /* 22028275SEric Cheng * Don't let go of this if it has some flows. 22038275SEric Cheng * All other code guarantees no flows are added to a disabled 22048275SEric Cheng * mac, therefore it is sufficient to check for the flow table 22058275SEric Cheng * only here. 22062311Sseb */ 22078275SEric Cheng mcip = mac_primary_client_handle(mip); 22088275SEric Cheng if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) { 22098275SEric Cheng rw_exit(&i_mac_impl_lock); 22108275SEric Cheng return (ENOTEMPTY); 22115895Syz147064 } 22125895Syz147064 22138275SEric Cheng mip->mi_state_flags |= MIS_DISABLED; 22141852Syz147064 rw_exit(&i_mac_impl_lock); 2215269Sericheng return (0); 22168275SEric Cheng } 22178275SEric Cheng 22188275SEric Cheng int 22198275SEric Cheng mac_disable_nowait(mac_handle_t mh) 22208275SEric Cheng { 22218275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 22228275SEric Cheng int err; 22238275SEric Cheng 22248275SEric Cheng if ((err = i_mac_perim_enter_nowait(mip)) != 0) 22258275SEric Cheng return (err); 22268275SEric Cheng err = i_mac_disable(mip); 22278275SEric Cheng i_mac_perim_exit(mip); 2228269Sericheng return (err); 22290Sstevel@tonic-gate } 22300Sstevel@tonic-gate 22310Sstevel@tonic-gate int 22325084Sjohnlev mac_disable(mac_handle_t mh) 22330Sstevel@tonic-gate { 22348275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 22358275SEric Cheng int err; 22368275SEric Cheng 22378275SEric Cheng i_mac_perim_enter(mip); 22388275SEric Cheng err = i_mac_disable(mip); 22398275SEric Cheng i_mac_perim_exit(mip); 22405084Sjohnlev 22410Sstevel@tonic-gate /* 22428275SEric Cheng * Clean up notification thread and wait for it to exit. 22435009Sgd78059 */ 22448275SEric Cheng if (err == 0) 22458275SEric Cheng i_mac_notify_exit(mip); 22468275SEric Cheng 22478275SEric Cheng return (err); 22480Sstevel@tonic-gate } 22490Sstevel@tonic-gate 22504913Sethindra /* 22518275SEric Cheng * Called when the MAC instance has a non empty flow table, to de-multiplex 22528275SEric Cheng * incoming packets to the right flow. 22538275SEric Cheng * The MAC's rw lock is assumed held as a READER. 22544913Sethindra */ 22558275SEric Cheng /* ARGSUSED */ 22568275SEric Cheng static mblk_t * 22578275SEric Cheng mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) 22580Sstevel@tonic-gate { 22598275SEric Cheng flow_entry_t *flent = NULL; 22608275SEric Cheng uint_t flags = FLOW_INBOUND; 22618275SEric Cheng int err; 22624913Sethindra 22634913Sethindra /* 22648275SEric Cheng * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN 22658275SEric Cheng * to mac_flow_lookup() so that the VLAN packets can be successfully 22668275SEric Cheng * passed to the non-VLAN aggregation flows. 22678275SEric Cheng * 22688275SEric Cheng * Note that there is possibly a race between this and 22698275SEric Cheng * mac_unicast_remove/add() and VLAN packets could be incorrectly 22708275SEric Cheng * classified to non-VLAN flows of non-aggregation mac clients. These 22718275SEric Cheng * VLAN packets will be then filtered out by the mac module. 22724913Sethindra */ 22738275SEric Cheng if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) 22748275SEric Cheng flags |= FLOW_IGNORE_VLAN; 22758275SEric Cheng 22768275SEric Cheng err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent); 22778275SEric Cheng if (err != 0) { 22788275SEric Cheng /* no registered receive function */ 22798275SEric Cheng return (mp); 22808275SEric Cheng } else { 22818275SEric Cheng mac_client_impl_t *mcip; 22824913Sethindra 22834913Sethindra /* 22848275SEric Cheng * This flent might just be an additional one on the MAC client, 22858275SEric Cheng * i.e. for classification purposes (different fdesc), however 22868275SEric Cheng * the resources, SRS et. al., are in the mci_flent, so if 22878275SEric Cheng * this isn't the mci_flent, we need to get it. 22884913Sethindra */ 22898275SEric Cheng if ((mcip = flent->fe_mcip) != NULL && 22908275SEric Cheng mcip->mci_flent != flent) { 22918275SEric Cheng FLOW_REFRELE(flent); 22928275SEric Cheng flent = mcip->mci_flent; 22938275SEric Cheng FLOW_TRY_REFHOLD(flent, err); 22948275SEric Cheng if (err != 0) 22958275SEric Cheng return (mp); 22968275SEric Cheng } 22978275SEric Cheng (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp, 22988275SEric Cheng B_FALSE); 22998275SEric Cheng FLOW_REFRELE(flent); 23005084Sjohnlev } 23015084Sjohnlev return (NULL); 23025084Sjohnlev } 23035084Sjohnlev 23045084Sjohnlev mblk_t * 23058275SEric Cheng mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 23060Sstevel@tonic-gate { 23072311Sseb mac_impl_t *mip = (mac_impl_t *)mh; 23088275SEric Cheng mblk_t *bp, *bp1, **bpp, *list = NULL; 23090Sstevel@tonic-gate 23100Sstevel@tonic-gate /* 23118275SEric Cheng * We walk the chain and attempt to classify each packet. 23128275SEric Cheng * The packets that couldn't be classified will be returned 23138275SEric Cheng * back to the caller. 23140Sstevel@tonic-gate */ 23158275SEric Cheng bp = mp_chain; 23168275SEric Cheng bpp = &list; 23178275SEric Cheng while (bp != NULL) { 23188275SEric Cheng bp1 = bp; 23198275SEric Cheng bp = bp->b_next; 23208275SEric Cheng bp1->b_next = NULL; 23218275SEric Cheng 23228275SEric Cheng if (mac_rx_classify(mip, mrh, bp1) != NULL) { 23238275SEric Cheng *bpp = bp1; 23248275SEric Cheng bpp = &bp1->b_next; 23258275SEric Cheng } 23268275SEric Cheng } 23278275SEric Cheng return (list); 23280Sstevel@tonic-gate } 23290Sstevel@tonic-gate 23308275SEric Cheng static int 23318275SEric Cheng mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg) 23320Sstevel@tonic-gate { 23338275SEric Cheng mac_ring_handle_t ring = arg; 23348275SEric Cheng 23358275SEric Cheng if (flent->fe_tx_srs) 23368275SEric Cheng mac_tx_srs_wakeup(flent->fe_tx_srs, ring); 23372311Sseb return (0); 23382311Sseb } 23392311Sseb 23400Sstevel@tonic-gate void 23418275SEric Cheng i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) 23428275SEric Cheng { 23438275SEric Cheng mac_client_impl_t *cclient; 23448275SEric Cheng mac_soft_ring_set_t *mac_srs; 23458275SEric Cheng 23468275SEric Cheng /* 23478275SEric Cheng * After grabbing the mi_rw_lock, the list of clients can't change. 23488275SEric Cheng * If there are any clients mi_disabled must be B_FALSE and can't 23498275SEric Cheng * get set since there are clients. If there aren't any clients we 23508275SEric Cheng * don't do anything. In any case the mip has to be valid. The driver 23518275SEric Cheng * must make sure that it goes single threaded (with respect to mac 23528275SEric Cheng * calls) and wait for all pending mac calls to finish before calling 23538275SEric Cheng * mac_unregister. 23548275SEric Cheng */ 23558275SEric Cheng rw_enter(&i_mac_impl_lock, RW_READER); 23568275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) { 23578275SEric Cheng rw_exit(&i_mac_impl_lock); 23588275SEric Cheng return; 23598275SEric Cheng } 23608275SEric Cheng 23618275SEric Cheng /* 23628275SEric Cheng * Get MAC tx srs from walking mac_client_handle list. 23638275SEric Cheng */ 23648275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_READER); 23658275SEric Cheng for (cclient = mip->mi_clients_list; cclient != NULL; 23668275SEric Cheng cclient = cclient->mci_client_next) { 23678275SEric Cheng if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) 23688275SEric Cheng mac_tx_srs_wakeup(mac_srs, ring); 23698833SVenu.Iyer@Sun.COM (void) mac_flow_walk(cclient->mci_subflow_tab, 23708833SVenu.Iyer@Sun.COM mac_tx_flow_srs_wakeup, ring); 23718275SEric Cheng } 23728275SEric Cheng rw_exit(&mip->mi_rw_lock); 23738275SEric Cheng rw_exit(&i_mac_impl_lock); 23748275SEric Cheng } 23758275SEric Cheng 23768275SEric Cheng /* ARGSUSED */ 23778275SEric Cheng void 23788275SEric Cheng mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg, 23790Sstevel@tonic-gate boolean_t add) 23800Sstevel@tonic-gate { 23818275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 23828275SEric Cheng 23838275SEric Cheng i_mac_perim_enter((mac_impl_t *)mh); 23840Sstevel@tonic-gate /* 23850Sstevel@tonic-gate * If no specific refresh function was given then default to the 23860Sstevel@tonic-gate * driver's m_multicst entry point. 23870Sstevel@tonic-gate */ 23880Sstevel@tonic-gate if (refresh == NULL) { 23892311Sseb refresh = mip->mi_multicst; 23902311Sseb arg = mip->mi_driver; 23910Sstevel@tonic-gate } 23928275SEric Cheng 23938275SEric Cheng mac_bcast_refresh(mip, refresh, arg, add); 23948275SEric Cheng i_mac_perim_exit((mac_impl_t *)mh); 23950Sstevel@tonic-gate } 23960Sstevel@tonic-gate 23970Sstevel@tonic-gate void 23982311Sseb mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) 23990Sstevel@tonic-gate { 24002311Sseb mac_impl_t *mip = (mac_impl_t *)mh; 24010Sstevel@tonic-gate 24020Sstevel@tonic-gate /* 24030Sstevel@tonic-gate * If no specific refresh function was given then default to the 24040Sstevel@tonic-gate * driver's m_promisc entry point. 24050Sstevel@tonic-gate */ 24060Sstevel@tonic-gate if (refresh == NULL) { 24072311Sseb refresh = mip->mi_setpromisc; 24082311Sseb arg = mip->mi_driver; 24090Sstevel@tonic-gate } 24100Sstevel@tonic-gate ASSERT(refresh != NULL); 24110Sstevel@tonic-gate 24120Sstevel@tonic-gate /* 24130Sstevel@tonic-gate * Call the refresh function with the current promiscuity. 24140Sstevel@tonic-gate */ 24150Sstevel@tonic-gate refresh(arg, (mip->mi_devpromisc != 0)); 24160Sstevel@tonic-gate } 24170Sstevel@tonic-gate 24185895Syz147064 /* 24195895Syz147064 * The mac client requests that the mac not to change its margin size to 24205895Syz147064 * be less than the specified value. If "current" is B_TRUE, then the client 24215895Syz147064 * requests the mac not to change its margin size to be smaller than the 24225895Syz147064 * current size. Further, return the current margin size value in this case. 24235895Syz147064 * 24245895Syz147064 * We keep every requested size in an ordered list from largest to smallest. 24255895Syz147064 */ 24265895Syz147064 int 24275895Syz147064 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) 24285895Syz147064 { 24295895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 24305895Syz147064 mac_margin_req_t **pp, *p; 24315895Syz147064 int err = 0; 24325895Syz147064 24338275SEric Cheng rw_enter(&(mip->mi_rw_lock), RW_WRITER); 24345895Syz147064 if (current) 24355895Syz147064 *marginp = mip->mi_margin; 24365895Syz147064 24375895Syz147064 /* 24385895Syz147064 * If the current margin value cannot satisfy the margin requested, 24395895Syz147064 * return ENOTSUP directly. 24405895Syz147064 */ 24415895Syz147064 if (*marginp > mip->mi_margin) { 24425895Syz147064 err = ENOTSUP; 24435895Syz147064 goto done; 24445895Syz147064 } 24455895Syz147064 24465895Syz147064 /* 24475895Syz147064 * Check whether the given margin is already in the list. If so, 24485895Syz147064 * bump the reference count. 24495895Syz147064 */ 24508275SEric Cheng for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) { 24515895Syz147064 if (p->mmr_margin == *marginp) { 24525895Syz147064 /* 24535895Syz147064 * The margin requested is already in the list, 24545895Syz147064 * so just bump the reference count. 24555895Syz147064 */ 24565895Syz147064 p->mmr_ref++; 24575895Syz147064 goto done; 24585895Syz147064 } 24595895Syz147064 if (p->mmr_margin < *marginp) 24605895Syz147064 break; 24615895Syz147064 } 24625895Syz147064 24635895Syz147064 24648275SEric Cheng p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP); 24655895Syz147064 p->mmr_margin = *marginp; 24665895Syz147064 p->mmr_ref++; 24675895Syz147064 p->mmr_nextp = *pp; 24685895Syz147064 *pp = p; 24695895Syz147064 24705895Syz147064 done: 24718275SEric Cheng rw_exit(&(mip->mi_rw_lock)); 24725895Syz147064 return (err); 24735895Syz147064 } 24745895Syz147064 24755895Syz147064 /* 24765895Syz147064 * The mac client requests to cancel its previous mac_margin_add() request. 24775895Syz147064 * We remove the requested margin size from the list. 24785895Syz147064 */ 24795895Syz147064 int 24805895Syz147064 mac_margin_remove(mac_handle_t mh, uint32_t margin) 24815895Syz147064 { 24825895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 24835895Syz147064 mac_margin_req_t **pp, *p; 24845895Syz147064 int err = 0; 24855895Syz147064 24868275SEric Cheng rw_enter(&(mip->mi_rw_lock), RW_WRITER); 24875895Syz147064 /* 24885895Syz147064 * Find the entry in the list for the given margin. 24895895Syz147064 */ 24905895Syz147064 for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { 24915895Syz147064 if (p->mmr_margin == margin) { 24925895Syz147064 if (--p->mmr_ref == 0) 24935895Syz147064 break; 24945895Syz147064 24955895Syz147064 /* 24965895Syz147064 * There is still a reference to this address so 24975895Syz147064 * there's nothing more to do. 24985895Syz147064 */ 24995895Syz147064 goto done; 25005895Syz147064 } 25015895Syz147064 } 25025895Syz147064 25035895Syz147064 /* 25045895Syz147064 * We did not find an entry for the given margin. 25055895Syz147064 */ 25065895Syz147064 if (p == NULL) { 25075895Syz147064 err = ENOENT; 25085895Syz147064 goto done; 25095895Syz147064 } 25105895Syz147064 25115895Syz147064 ASSERT(p->mmr_ref == 0); 25125895Syz147064 25135895Syz147064 /* 25145895Syz147064 * Remove it from the list. 25155895Syz147064 */ 25165895Syz147064 *pp = p->mmr_nextp; 25175895Syz147064 kmem_free(p, sizeof (mac_margin_req_t)); 25185895Syz147064 done: 25198275SEric Cheng rw_exit(&(mip->mi_rw_lock)); 25205895Syz147064 return (err); 25215895Syz147064 } 25225895Syz147064 25235895Syz147064 boolean_t 25245895Syz147064 mac_margin_update(mac_handle_t mh, uint32_t margin) 25255895Syz147064 { 25265895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 25275895Syz147064 uint32_t margin_needed = 0; 25285895Syz147064 25298275SEric Cheng rw_enter(&(mip->mi_rw_lock), RW_WRITER); 25305895Syz147064 25315895Syz147064 if (mip->mi_mmrp != NULL) 25325895Syz147064 margin_needed = mip->mi_mmrp->mmr_margin; 25335895Syz147064 25345895Syz147064 if (margin_needed <= margin) 25355895Syz147064 mip->mi_margin = margin; 25365895Syz147064 25378275SEric Cheng rw_exit(&(mip->mi_rw_lock)); 25385895Syz147064 25395895Syz147064 if (margin_needed <= margin) 25405895Syz147064 i_mac_notify(mip, MAC_NOTE_MARGIN); 25415895Syz147064 25425895Syz147064 return (margin_needed <= margin); 25435895Syz147064 } 25445895Syz147064 25452311Sseb /* 25462311Sseb * MAC Type Plugin functions. 25472311Sseb */ 25482311Sseb 25498275SEric Cheng mactype_t * 25508275SEric Cheng mactype_getplugin(const char *pname) 25518275SEric Cheng { 25528275SEric Cheng mactype_t *mtype = NULL; 25538275SEric Cheng boolean_t tried_modload = B_FALSE; 25548275SEric Cheng 25558275SEric Cheng mutex_enter(&i_mactype_lock); 25568275SEric Cheng 25578275SEric Cheng find_registered_mactype: 25588275SEric Cheng if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname, 25598275SEric Cheng (mod_hash_val_t *)&mtype) != 0) { 25608275SEric Cheng if (!tried_modload) { 25618275SEric Cheng /* 25628275SEric Cheng * If the plugin has not yet been loaded, then 25638275SEric Cheng * attempt to load it now. If modload() succeeds, 25648275SEric Cheng * the plugin should have registered using 25658275SEric Cheng * mactype_register(), in which case we can go back 25668275SEric Cheng * and attempt to find it again. 25678275SEric Cheng */ 25688275SEric Cheng if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) { 25698275SEric Cheng tried_modload = B_TRUE; 25708275SEric Cheng goto find_registered_mactype; 25718275SEric Cheng } 25728275SEric Cheng } 25738275SEric Cheng } else { 25748275SEric Cheng /* 25758275SEric Cheng * Note that there's no danger that the plugin we've loaded 25768275SEric Cheng * could be unloaded between the modload() step and the 25778275SEric Cheng * reference count bump here, as we're holding 25788275SEric Cheng * i_mactype_lock, which mactype_unregister() also holds. 25798275SEric Cheng */ 25808275SEric Cheng atomic_inc_32(&mtype->mt_ref); 25818275SEric Cheng } 25828275SEric Cheng 25838275SEric Cheng mutex_exit(&i_mactype_lock); 25848275SEric Cheng return (mtype); 25858275SEric Cheng } 25868275SEric Cheng 25872311Sseb mactype_register_t * 25882311Sseb mactype_alloc(uint_t mactype_version) 25892311Sseb { 25902311Sseb mactype_register_t *mtrp; 25912311Sseb 25922311Sseb /* 25932311Sseb * Make sure there isn't a version mismatch between the plugin and 25942311Sseb * the framework. In the future, if multiple versions are 25952311Sseb * supported, this check could become more sophisticated. 25962311Sseb */ 25972311Sseb if (mactype_version != MACTYPE_VERSION) 25982311Sseb return (NULL); 25992311Sseb 26002311Sseb mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP); 26012311Sseb mtrp->mtr_version = mactype_version; 26022311Sseb return (mtrp); 26032311Sseb } 26042311Sseb 26052311Sseb void 26062311Sseb mactype_free(mactype_register_t *mtrp) 26072311Sseb { 26082311Sseb kmem_free(mtrp, sizeof (mactype_register_t)); 26092311Sseb } 26102311Sseb 26112311Sseb int 26122311Sseb mactype_register(mactype_register_t *mtrp) 26132311Sseb { 26142311Sseb mactype_t *mtp; 26152311Sseb mactype_ops_t *ops = mtrp->mtr_ops; 26162311Sseb 26172311Sseb /* Do some sanity checking before we register this MAC type. */ 26186353Sdr146992 if (mtrp->mtr_ident == NULL || ops == NULL) 26192311Sseb return (EINVAL); 26202311Sseb 26212311Sseb /* 26222311Sseb * Verify that all mandatory callbacks are set in the ops 26232311Sseb * vector. 26242311Sseb */ 26252311Sseb if (ops->mtops_unicst_verify == NULL || 26262311Sseb ops->mtops_multicst_verify == NULL || 26272311Sseb ops->mtops_sap_verify == NULL || 26282311Sseb ops->mtops_header == NULL || 26292311Sseb ops->mtops_header_info == NULL) { 26302311Sseb return (EINVAL); 26312311Sseb } 26322311Sseb 26332311Sseb mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP); 26342311Sseb mtp->mt_ident = mtrp->mtr_ident; 26352311Sseb mtp->mt_ops = *ops; 26362311Sseb mtp->mt_type = mtrp->mtr_mactype; 26373147Sxc151355 mtp->mt_nativetype = mtrp->mtr_nativetype; 26382311Sseb mtp->mt_addr_length = mtrp->mtr_addrlen; 26392311Sseb if (mtrp->mtr_brdcst_addr != NULL) { 26402311Sseb mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP); 26412311Sseb bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr, 26422311Sseb mtrp->mtr_addrlen); 26432311Sseb } 26442311Sseb 26452311Sseb mtp->mt_stats = mtrp->mtr_stats; 26462311Sseb mtp->mt_statcount = mtrp->mtr_statcount; 26472311Sseb 26486512Ssowmini mtp->mt_mapping = mtrp->mtr_mapping; 26496512Ssowmini mtp->mt_mappingcount = mtrp->mtr_mappingcount; 26506512Ssowmini 26512311Sseb if (mod_hash_insert(i_mactype_hash, 26522311Sseb (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) { 26532311Sseb kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 26542311Sseb kmem_free(mtp, sizeof (*mtp)); 26552311Sseb return (EEXIST); 26562311Sseb } 26572311Sseb return (0); 26582311Sseb } 26592311Sseb 26602311Sseb int 26612311Sseb mactype_unregister(const char *ident) 26622311Sseb { 26632311Sseb mactype_t *mtp; 26642311Sseb mod_hash_val_t val; 26652311Sseb int err; 26662311Sseb 26672311Sseb /* 26682311Sseb * Let's not allow MAC drivers to use this plugin while we're 26693288Sseb * trying to unregister it. Holding i_mactype_lock also prevents a 26703288Sseb * plugin from unregistering while a MAC driver is attempting to 26713288Sseb * hold a reference to it in i_mactype_getplugin(). 26722311Sseb */ 26733288Sseb mutex_enter(&i_mactype_lock); 26742311Sseb 26752311Sseb if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident, 26762311Sseb (mod_hash_val_t *)&mtp)) != 0) { 26772311Sseb /* A plugin is trying to unregister, but it never registered. */ 26783288Sseb err = ENXIO; 26793288Sseb goto done; 26802311Sseb } 26812311Sseb 26823288Sseb if (mtp->mt_ref != 0) { 26833288Sseb err = EBUSY; 26843288Sseb goto done; 26852311Sseb } 26862311Sseb 26872311Sseb err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val); 26882311Sseb ASSERT(err == 0); 26892311Sseb if (err != 0) { 26902311Sseb /* This should never happen, thus the ASSERT() above. */ 26913288Sseb err = EINVAL; 26923288Sseb goto done; 26932311Sseb } 26942311Sseb ASSERT(mtp == (mactype_t *)val); 26952311Sseb 26962311Sseb kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 26972311Sseb kmem_free(mtp, sizeof (mactype_t)); 26983288Sseb done: 26993288Sseb mutex_exit(&i_mactype_lock); 27003288Sseb return (err); 27012311Sseb } 27025903Ssowmini 27038275SEric Cheng /* 27048275SEric Cheng * Returns TRUE when the specified property is intended for the MAC framework, 27058275SEric Cheng * as opposed to driver defined properties. 27068275SEric Cheng */ 27078275SEric Cheng static boolean_t 27088275SEric Cheng mac_is_macprop(mac_prop_t *macprop) 27098275SEric Cheng { 27108275SEric Cheng switch (macprop->mp_id) { 27118275SEric Cheng case MAC_PROP_MAXBW: 27128275SEric Cheng case MAC_PROP_PRIO: 27138275SEric Cheng case MAC_PROP_BIND_CPU: 27148275SEric Cheng return (B_TRUE); 27158275SEric Cheng default: 27168275SEric Cheng return (B_FALSE); 27178275SEric Cheng } 27188275SEric Cheng } 27198275SEric Cheng 27208275SEric Cheng /* 27218275SEric Cheng * mac_set_prop() sets mac or hardware driver properties: 27228275SEric Cheng * mac properties include maxbw, priority, and cpu binding list. Driver 27238275SEric Cheng * properties are private properties to the hardware, such as mtu, speed 27248275SEric Cheng * etc. 27258275SEric Cheng * If the property is a driver property, mac_set_prop() calls driver's callback 27268275SEric Cheng * function to set it. 27278275SEric Cheng * If the property is a mac property, mac_set_prop() invokes mac_set_resources() 27288275SEric Cheng * which will cache the property value in mac_impl_t and may call 27298275SEric Cheng * mac_client_set_resource() to update property value of the primary mac client, 27308275SEric Cheng * if it exists. 27318275SEric Cheng */ 27325903Ssowmini int 27335903Ssowmini mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) 27345903Ssowmini { 27355903Ssowmini int err = ENOTSUP; 27365903Ssowmini mac_impl_t *mip = (mac_impl_t *)mh; 27375903Ssowmini 27388275SEric Cheng ASSERT(MAC_PERIM_HELD(mh)); 27398275SEric Cheng 27408275SEric Cheng /* If it is mac property, call mac_set_resources() */ 27418275SEric Cheng if (mac_is_macprop(macprop)) { 27428275SEric Cheng mac_resource_props_t mrp; 27438275SEric Cheng 27448275SEric Cheng if (valsize < sizeof (mac_resource_props_t)) 27458275SEric Cheng return (EINVAL); 27468275SEric Cheng bzero(&mrp, sizeof (mac_resource_props_t)); 27478275SEric Cheng bcopy(val, &mrp, sizeof (mrp)); 27488275SEric Cheng return (mac_set_resources(mh, &mrp)); 27498275SEric Cheng } 27508603SGirish.Moodalbail@Sun.COM switch (macprop->mp_id) { 27518603SGirish.Moodalbail@Sun.COM case MAC_PROP_MTU: { 27528603SGirish.Moodalbail@Sun.COM uint32_t mtu; 27538603SGirish.Moodalbail@Sun.COM 27548603SGirish.Moodalbail@Sun.COM if (valsize < sizeof (mtu)) 27558603SGirish.Moodalbail@Sun.COM return (EINVAL); 27568603SGirish.Moodalbail@Sun.COM bcopy(val, &mtu, sizeof (mtu)); 27578603SGirish.Moodalbail@Sun.COM err = mac_set_mtu(mh, mtu, NULL); 27588603SGirish.Moodalbail@Sun.COM break; 27595903Ssowmini } 27608603SGirish.Moodalbail@Sun.COM default: 27618603SGirish.Moodalbail@Sun.COM /* For other driver properties, call driver's callback */ 27628603SGirish.Moodalbail@Sun.COM if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { 27638603SGirish.Moodalbail@Sun.COM err = mip->mi_callbacks->mc_setprop(mip->mi_driver, 27648603SGirish.Moodalbail@Sun.COM macprop->mp_name, macprop->mp_id, valsize, val); 27658603SGirish.Moodalbail@Sun.COM } 27668603SGirish.Moodalbail@Sun.COM } 27675903Ssowmini return (err); 27685903Ssowmini } 27695903Ssowmini 27708275SEric Cheng /* 27718275SEric Cheng * mac_get_prop() gets mac or hardware driver properties. 27728275SEric Cheng * 27738275SEric Cheng * If the property is a driver property, mac_get_prop() calls driver's callback 27748275SEric Cheng * function to get it. 27758275SEric Cheng * If the property is a mac property, mac_get_prop() invokes mac_get_resources() 27768275SEric Cheng * which returns the cached value in mac_impl_t. 27778275SEric Cheng */ 27785903Ssowmini int 27798118SVasumathi.Sundaram@Sun.COM mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, 27808118SVasumathi.Sundaram@Sun.COM uint_t *perm) 27815903Ssowmini { 27825903Ssowmini int err = ENOTSUP; 27835903Ssowmini mac_impl_t *mip = (mac_impl_t *)mh; 27846512Ssowmini link_state_t link_state; 2785*9514SGirish.Moodalbail@Sun.COM boolean_t is_getprop, is_setprop; 2786*9514SGirish.Moodalbail@Sun.COM 2787*9514SGirish.Moodalbail@Sun.COM is_getprop = (mip->mi_callbacks->mc_callbacks & MC_GETPROP); 2788*9514SGirish.Moodalbail@Sun.COM is_setprop = (mip->mi_callbacks->mc_callbacks & MC_SETPROP); 27896512Ssowmini 27908275SEric Cheng /* If mac property, read from cache */ 27918275SEric Cheng if (mac_is_macprop(macprop)) { 27928275SEric Cheng mac_resource_props_t mrp; 27938275SEric Cheng 27948275SEric Cheng if (valsize < sizeof (mac_resource_props_t)) 27958275SEric Cheng return (EINVAL); 27968275SEric Cheng bzero(&mrp, sizeof (mac_resource_props_t)); 27978275SEric Cheng mac_get_resources(mh, &mrp); 27988275SEric Cheng bcopy(&mrp, val, sizeof (mac_resource_props_t)); 27998275SEric Cheng return (0); 28008275SEric Cheng } 28018275SEric Cheng 28026512Ssowmini switch (macprop->mp_id) { 2803*9514SGirish.Moodalbail@Sun.COM case MAC_PROP_MTU: { 2804*9514SGirish.Moodalbail@Sun.COM uint32_t sdu; 2805*9514SGirish.Moodalbail@Sun.COM mac_propval_range_t range; 2806*9514SGirish.Moodalbail@Sun.COM 2807*9514SGirish.Moodalbail@Sun.COM if ((macprop->mp_flags & MAC_PROP_POSSIBLE) != 0) { 2808*9514SGirish.Moodalbail@Sun.COM if (valsize < sizeof (mac_propval_range_t)) 2809*9514SGirish.Moodalbail@Sun.COM return (EINVAL); 2810*9514SGirish.Moodalbail@Sun.COM if (is_getprop) { 2811*9514SGirish.Moodalbail@Sun.COM err = mip->mi_callbacks->mc_getprop(mip-> 2812*9514SGirish.Moodalbail@Sun.COM mi_driver, macprop->mp_name, macprop->mp_id, 2813*9514SGirish.Moodalbail@Sun.COM macprop->mp_flags, valsize, val, perm); 2814*9514SGirish.Moodalbail@Sun.COM } 2815*9514SGirish.Moodalbail@Sun.COM /* 2816*9514SGirish.Moodalbail@Sun.COM * If the driver doesn't have *_m_getprop defined or 2817*9514SGirish.Moodalbail@Sun.COM * if the driver doesn't support setting MTU then 2818*9514SGirish.Moodalbail@Sun.COM * return the CURRENT value as POSSIBLE value. 2819*9514SGirish.Moodalbail@Sun.COM */ 2820*9514SGirish.Moodalbail@Sun.COM if (!is_getprop || err == ENOTSUP) { 2821*9514SGirish.Moodalbail@Sun.COM mac_sdu_get(mh, NULL, &sdu); 2822*9514SGirish.Moodalbail@Sun.COM range.mpr_count = 1; 2823*9514SGirish.Moodalbail@Sun.COM range.mpr_type = MAC_PROPVAL_UINT32; 2824*9514SGirish.Moodalbail@Sun.COM range.range_uint32[0].mpur_min = 2825*9514SGirish.Moodalbail@Sun.COM range.range_uint32[0].mpur_max = sdu; 2826*9514SGirish.Moodalbail@Sun.COM bcopy(&range, val, sizeof (range)); 2827*9514SGirish.Moodalbail@Sun.COM err = 0; 2828*9514SGirish.Moodalbail@Sun.COM } 2829*9514SGirish.Moodalbail@Sun.COM return (err); 2830*9514SGirish.Moodalbail@Sun.COM } 28316512Ssowmini if (valsize < sizeof (sdu)) 28326512Ssowmini return (EINVAL); 28336789Sam223141 if ((macprop->mp_flags & MAC_PROP_DEFAULT) == 0) { 28346512Ssowmini mac_sdu_get(mh, NULL, &sdu); 28356512Ssowmini bcopy(&sdu, val, sizeof (sdu)); 2836*9514SGirish.Moodalbail@Sun.COM if (is_setprop && (mip->mi_callbacks->mc_setprop(mip-> 2837*9514SGirish.Moodalbail@Sun.COM mi_driver, macprop->mp_name, macprop->mp_id, 2838*9514SGirish.Moodalbail@Sun.COM valsize, val) == 0)) { 28398603SGirish.Moodalbail@Sun.COM *perm = MAC_PROP_PERM_RW; 28408603SGirish.Moodalbail@Sun.COM } else { 28418118SVasumathi.Sundaram@Sun.COM *perm = MAC_PROP_PERM_READ; 28428603SGirish.Moodalbail@Sun.COM } 28436512Ssowmini return (0); 28446512Ssowmini } else { 28456512Ssowmini if (mip->mi_info.mi_media == DL_ETHER) { 28466512Ssowmini sdu = ETHERMTU; 28476512Ssowmini bcopy(&sdu, val, sizeof (sdu)); 28488603SGirish.Moodalbail@Sun.COM 28496512Ssowmini return (0); 28506512Ssowmini } 28516512Ssowmini /* 28526512Ssowmini * ask driver for its default. 28536512Ssowmini */ 28546512Ssowmini break; 28556512Ssowmini } 2856*9514SGirish.Moodalbail@Sun.COM } 28576789Sam223141 case MAC_PROP_STATUS: 28586512Ssowmini if (valsize < sizeof (link_state)) 28596512Ssowmini return (EINVAL); 28608118SVasumathi.Sundaram@Sun.COM *perm = MAC_PROP_PERM_READ; 28616512Ssowmini link_state = mac_link_get(mh); 28626512Ssowmini bcopy(&link_state, val, sizeof (link_state)); 28636512Ssowmini return (0); 28646512Ssowmini default: 28656512Ssowmini break; 28668275SEric Cheng 28676512Ssowmini } 28688275SEric Cheng /* If driver property, request from driver */ 2869*9514SGirish.Moodalbail@Sun.COM if (is_getprop) { 28705903Ssowmini err = mip->mi_callbacks->mc_getprop(mip->mi_driver, 28716512Ssowmini macprop->mp_name, macprop->mp_id, macprop->mp_flags, 28728118SVasumathi.Sundaram@Sun.COM valsize, val, perm); 28735903Ssowmini } 28745903Ssowmini return (err); 28755903Ssowmini } 28765903Ssowmini 28779073SCathy.Zhou@Sun.COM int 28789073SCathy.Zhou@Sun.COM mac_fastpath_disable(mac_handle_t mh) 28799073SCathy.Zhou@Sun.COM { 28809073SCathy.Zhou@Sun.COM mac_impl_t *mip = (mac_impl_t *)mh; 28819073SCathy.Zhou@Sun.COM 28829073SCathy.Zhou@Sun.COM if ((mip->mi_state_flags & MIS_LEGACY) == 0) 28839073SCathy.Zhou@Sun.COM return (0); 28849073SCathy.Zhou@Sun.COM 28859073SCathy.Zhou@Sun.COM return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver)); 28869073SCathy.Zhou@Sun.COM } 28879073SCathy.Zhou@Sun.COM 28889073SCathy.Zhou@Sun.COM void 28899073SCathy.Zhou@Sun.COM mac_fastpath_enable(mac_handle_t mh) 28909073SCathy.Zhou@Sun.COM { 28919073SCathy.Zhou@Sun.COM mac_impl_t *mip = (mac_impl_t *)mh; 28929073SCathy.Zhou@Sun.COM 28939073SCathy.Zhou@Sun.COM if ((mip->mi_state_flags & MIS_LEGACY) == 0) 28949073SCathy.Zhou@Sun.COM return; 28959073SCathy.Zhou@Sun.COM 28969073SCathy.Zhou@Sun.COM mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver); 28979073SCathy.Zhou@Sun.COM } 28989073SCathy.Zhou@Sun.COM 28998275SEric Cheng void 29006512Ssowmini mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop) 29016512Ssowmini { 29026512Ssowmini mac_priv_prop_t *mpriv; 29036512Ssowmini 29046512Ssowmini if (mpp == NULL) 29056512Ssowmini return; 29066512Ssowmini 29076512Ssowmini mpriv = kmem_zalloc(nprop * sizeof (*mpriv), KM_SLEEP); 29086512Ssowmini (void) memcpy(mpriv, mpp, nprop * sizeof (*mpriv)); 29096512Ssowmini mip->mi_priv_prop = mpriv; 29106512Ssowmini mip->mi_priv_prop_count = nprop; 29116512Ssowmini } 29127406SSowmini.Varadhan@Sun.COM 29138275SEric Cheng void 29147406SSowmini.Varadhan@Sun.COM mac_unregister_priv_prop(mac_impl_t *mip) 29157406SSowmini.Varadhan@Sun.COM { 29167406SSowmini.Varadhan@Sun.COM mac_priv_prop_t *mpriv; 29177406SSowmini.Varadhan@Sun.COM 29187406SSowmini.Varadhan@Sun.COM mpriv = mip->mi_priv_prop; 29197406SSowmini.Varadhan@Sun.COM if (mpriv != NULL) { 29207406SSowmini.Varadhan@Sun.COM kmem_free(mpriv, mip->mi_priv_prop_count * sizeof (*mpriv)); 29217406SSowmini.Varadhan@Sun.COM mip->mi_priv_prop = NULL; 29227406SSowmini.Varadhan@Sun.COM } 29237406SSowmini.Varadhan@Sun.COM mip->mi_priv_prop_count = 0; 29247406SSowmini.Varadhan@Sun.COM } 29258275SEric Cheng 29268275SEric Cheng /* 29278275SEric Cheng * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure 29288275SEric Cheng * (by invoking mac_rx()) even after processing mac_stop_ring(). In such 29298275SEric Cheng * cases if MAC free's the ring structure after mac_stop_ring(), any 29308275SEric Cheng * illegal access to the ring structure coming from the driver will panic 29318275SEric Cheng * the system. In order to protect the system from such inadverent access, 29328275SEric Cheng * we maintain a cache of rings in the mac_impl_t after they get free'd up. 29338275SEric Cheng * When packets are received on free'd up rings, MAC (through the generation 29348275SEric Cheng * count mechanism) will drop such packets. 29358275SEric Cheng */ 29368275SEric Cheng static mac_ring_t * 29378275SEric Cheng mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings) 29388275SEric Cheng { 29398275SEric Cheng mac_ring_t *ring; 29408275SEric Cheng 29418275SEric Cheng if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 29428275SEric Cheng mutex_enter(&mip->mi_ring_lock); 29438275SEric Cheng if (mip->mi_ring_freelist != NULL) { 29448275SEric Cheng ring = mip->mi_ring_freelist; 29458275SEric Cheng mip->mi_ring_freelist = ring->mr_next; 29468275SEric Cheng bzero(ring, sizeof (mac_ring_t)); 29478275SEric Cheng } else { 29488275SEric Cheng ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); 29498275SEric Cheng } 29508275SEric Cheng mutex_exit(&mip->mi_ring_lock); 29518275SEric Cheng } else { 29528275SEric Cheng ring = kmem_zalloc(sizeof (mac_ring_t), KM_SLEEP); 29538275SEric Cheng } 29548275SEric Cheng ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); 29558275SEric Cheng return (ring); 29568275SEric Cheng } 29578275SEric Cheng 29588275SEric Cheng static void 29598275SEric Cheng mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) 29608275SEric Cheng { 29618275SEric Cheng if (ring->mr_type == MAC_RING_TYPE_RX) { 29628275SEric Cheng mutex_enter(&mip->mi_ring_lock); 29638275SEric Cheng ring->mr_state = MR_FREE; 29648275SEric Cheng ring->mr_flag = 0; 29658275SEric Cheng ring->mr_next = mip->mi_ring_freelist; 29668275SEric Cheng mip->mi_ring_freelist = ring; 29678275SEric Cheng mutex_exit(&mip->mi_ring_lock); 29688275SEric Cheng } else { 29698275SEric Cheng kmem_free(ring, sizeof (mac_ring_t)); 29708275SEric Cheng } 29718275SEric Cheng } 29728275SEric Cheng 29738275SEric Cheng static void 29748275SEric Cheng mac_ring_freeall(mac_impl_t *mip) 29758275SEric Cheng { 29768275SEric Cheng mac_ring_t *ring_next; 29778275SEric Cheng mutex_enter(&mip->mi_ring_lock); 29788275SEric Cheng mac_ring_t *ring = mip->mi_ring_freelist; 29798275SEric Cheng while (ring != NULL) { 29808275SEric Cheng ring_next = ring->mr_next; 29818275SEric Cheng kmem_cache_free(mac_ring_cache, ring); 29828275SEric Cheng ring = ring_next; 29838275SEric Cheng } 29848275SEric Cheng mip->mi_ring_freelist = NULL; 29858275SEric Cheng mutex_exit(&mip->mi_ring_lock); 29868275SEric Cheng } 29878275SEric Cheng 29888275SEric Cheng int 29898275SEric Cheng mac_start_ring(mac_ring_t *ring) 29908275SEric Cheng { 29918275SEric Cheng int rv = 0; 29928275SEric Cheng 29938275SEric Cheng if (ring->mr_start != NULL) 29948275SEric Cheng rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); 29958275SEric Cheng 29968275SEric Cheng return (rv); 29978275SEric Cheng } 29988275SEric Cheng 29998275SEric Cheng void 30008275SEric Cheng mac_stop_ring(mac_ring_t *ring) 30018275SEric Cheng { 30028275SEric Cheng if (ring->mr_stop != NULL) 30038275SEric Cheng ring->mr_stop(ring->mr_driver); 30048275SEric Cheng 30058275SEric Cheng /* 30068275SEric Cheng * Increment the ring generation number for this ring. 30078275SEric Cheng */ 30088275SEric Cheng ring->mr_gen_num++; 30098275SEric Cheng } 30108275SEric Cheng 30118275SEric Cheng int 30128275SEric Cheng mac_start_group(mac_group_t *group) 30138275SEric Cheng { 30148275SEric Cheng int rv = 0; 30158275SEric Cheng 30168275SEric Cheng if (group->mrg_start != NULL) 30178275SEric Cheng rv = group->mrg_start(group->mrg_driver); 30188275SEric Cheng 30198275SEric Cheng return (rv); 30208275SEric Cheng } 30218275SEric Cheng 30228275SEric Cheng void 30238275SEric Cheng mac_stop_group(mac_group_t *group) 30248275SEric Cheng { 30258275SEric Cheng if (group->mrg_stop != NULL) 30268275SEric Cheng group->mrg_stop(group->mrg_driver); 30278275SEric Cheng } 30288275SEric Cheng 30298275SEric Cheng /* 30308275SEric Cheng * Called from mac_start() on the default Rx group. Broadcast and multicast 30318275SEric Cheng * packets are received only on the default group. Hence the default group 30328275SEric Cheng * needs to be up even if the primary client is not up, for the other groups 30338275SEric Cheng * to be functional. We do this by calling this function at mac_start time 30348275SEric Cheng * itself. However the broadcast packets that are received can't make their 30358275SEric Cheng * way beyond mac_rx until a mac client creates a broadcast flow. 30368275SEric Cheng */ 30378275SEric Cheng static int 30388275SEric Cheng mac_start_group_and_rings(mac_group_t *group) 30398275SEric Cheng { 30408275SEric Cheng mac_ring_t *ring; 30418275SEric Cheng int rv = 0; 30428275SEric Cheng 30438275SEric Cheng ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED); 30448275SEric Cheng if ((rv = mac_start_group(group)) != 0) 30458275SEric Cheng return (rv); 30468275SEric Cheng 30478275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 30488275SEric Cheng ASSERT(ring->mr_state == MR_FREE); 30498275SEric Cheng if ((rv = mac_start_ring(ring)) != 0) 30508275SEric Cheng goto error; 30518275SEric Cheng ring->mr_state = MR_INUSE; 30528275SEric Cheng ring->mr_classify_type = MAC_SW_CLASSIFIER; 30538275SEric Cheng } 30548275SEric Cheng return (0); 30558275SEric Cheng 30568275SEric Cheng error: 30578275SEric Cheng mac_stop_group_and_rings(group); 30588275SEric Cheng return (rv); 30598275SEric Cheng } 30608275SEric Cheng 30618275SEric Cheng /* Called from mac_stop on the default Rx group */ 30628275SEric Cheng static void 30638275SEric Cheng mac_stop_group_and_rings(mac_group_t *group) 30648275SEric Cheng { 30658275SEric Cheng mac_ring_t *ring; 30668275SEric Cheng 30678275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 30688275SEric Cheng if (ring->mr_state != MR_FREE) { 30698275SEric Cheng mac_stop_ring(ring); 30708275SEric Cheng ring->mr_state = MR_FREE; 30718275SEric Cheng ring->mr_flag = 0; 30728275SEric Cheng ring->mr_classify_type = MAC_NO_CLASSIFIER; 30738275SEric Cheng } 30748275SEric Cheng } 30758275SEric Cheng mac_stop_group(group); 30768275SEric Cheng } 30778275SEric Cheng 30788275SEric Cheng 30798275SEric Cheng static mac_ring_t * 30808275SEric Cheng mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, 30818275SEric Cheng mac_capab_rings_t *cap_rings) 30828275SEric Cheng { 30838275SEric Cheng mac_ring_t *ring; 30848275SEric Cheng mac_ring_info_t ring_info; 30858275SEric Cheng 30868275SEric Cheng ring = mac_ring_alloc(mip, cap_rings); 30878275SEric Cheng 30888275SEric Cheng /* Prepare basic information of ring */ 30898275SEric Cheng ring->mr_index = index; 30908275SEric Cheng ring->mr_type = group->mrg_type; 30918275SEric Cheng ring->mr_gh = (mac_group_handle_t)group; 30928275SEric Cheng 30938275SEric Cheng /* Insert the new ring to the list. */ 30948275SEric Cheng ring->mr_next = group->mrg_rings; 30958275SEric Cheng group->mrg_rings = ring; 30968275SEric Cheng 30978275SEric Cheng /* Zero to reuse the info data structure */ 30988275SEric Cheng bzero(&ring_info, sizeof (ring_info)); 30998275SEric Cheng 31008275SEric Cheng /* Query ring information from driver */ 31018275SEric Cheng cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index, 31028275SEric Cheng index, &ring_info, (mac_ring_handle_t)ring); 31038275SEric Cheng 31048275SEric Cheng ring->mr_info = ring_info; 31058275SEric Cheng 31068275SEric Cheng /* Update ring's status */ 31078275SEric Cheng ring->mr_state = MR_FREE; 31088275SEric Cheng ring->mr_flag = 0; 31098275SEric Cheng 31108275SEric Cheng /* Update the ring count of the group */ 31118275SEric Cheng group->mrg_cur_count++; 31128275SEric Cheng return (ring); 31138275SEric Cheng } 31148275SEric Cheng 31158275SEric Cheng /* 31168275SEric Cheng * Rings are chained together for easy regrouping. 31178275SEric Cheng */ 31188275SEric Cheng static void 31198275SEric Cheng mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, 31208275SEric Cheng mac_capab_rings_t *cap_rings) 31218275SEric Cheng { 31228275SEric Cheng int index; 31238275SEric Cheng 31248275SEric Cheng /* 31258275SEric Cheng * Initialize all ring members of this group. Size of zero will not 31268275SEric Cheng * enter the loop, so it's safe for initializing an empty group. 31278275SEric Cheng */ 31288275SEric Cheng for (index = size - 1; index >= 0; index--) 31298275SEric Cheng (void) mac_init_ring(mip, group, index, cap_rings); 31308275SEric Cheng } 31318275SEric Cheng 31328275SEric Cheng int 31338275SEric Cheng mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) 31348275SEric Cheng { 31358275SEric Cheng mac_capab_rings_t *cap_rings; 31368275SEric Cheng mac_group_t *group, *groups; 31378275SEric Cheng mac_group_info_t group_info; 31388275SEric Cheng uint_t group_free = 0; 31398275SEric Cheng uint_t ring_left; 31408275SEric Cheng mac_ring_t *ring; 31418275SEric Cheng int g, err = 0; 31428275SEric Cheng 31438275SEric Cheng switch (rtype) { 31448275SEric Cheng case MAC_RING_TYPE_RX: 31458275SEric Cheng ASSERT(mip->mi_rx_groups == NULL); 31468275SEric Cheng 31478275SEric Cheng cap_rings = &mip->mi_rx_rings_cap; 31488275SEric Cheng cap_rings->mr_type = MAC_RING_TYPE_RX; 31498275SEric Cheng break; 31508275SEric Cheng case MAC_RING_TYPE_TX: 31518275SEric Cheng ASSERT(mip->mi_tx_groups == NULL); 31528275SEric Cheng 31538275SEric Cheng cap_rings = &mip->mi_tx_rings_cap; 31548275SEric Cheng cap_rings->mr_type = MAC_RING_TYPE_TX; 31558275SEric Cheng break; 31568275SEric Cheng default: 31578275SEric Cheng ASSERT(B_FALSE); 31588275SEric Cheng } 31598275SEric Cheng 31608275SEric Cheng if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, 31618275SEric Cheng cap_rings)) 31628275SEric Cheng return (0); 31638275SEric Cheng 31648275SEric Cheng /* 31658275SEric Cheng * Allocate a contiguous buffer for all groups. 31668275SEric Cheng */ 31678275SEric Cheng groups = kmem_zalloc(sizeof (mac_group_t) * (cap_rings->mr_gnum + 1), 31688275SEric Cheng KM_SLEEP); 31698275SEric Cheng 31708275SEric Cheng ring_left = cap_rings->mr_rnum; 31718275SEric Cheng 31728275SEric Cheng /* 31738275SEric Cheng * Get all ring groups if any, and get their ring members 31748275SEric Cheng * if any. 31758275SEric Cheng */ 31768275SEric Cheng for (g = 0; g < cap_rings->mr_gnum; g++) { 31778275SEric Cheng group = groups + g; 31788275SEric Cheng 31798275SEric Cheng /* Prepare basic information of the group */ 31808275SEric Cheng group->mrg_index = g; 31818275SEric Cheng group->mrg_type = rtype; 31828275SEric Cheng group->mrg_state = MAC_GROUP_STATE_UNINIT; 31838275SEric Cheng group->mrg_mh = (mac_handle_t)mip; 31848275SEric Cheng group->mrg_next = group + 1; 31858275SEric Cheng 31868275SEric Cheng /* Zero to reuse the info data structure */ 31878275SEric Cheng bzero(&group_info, sizeof (group_info)); 31888275SEric Cheng 31898275SEric Cheng /* Query group information from driver */ 31908275SEric Cheng cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, 31918275SEric Cheng (mac_group_handle_t)group); 31928275SEric Cheng 31938275SEric Cheng switch (cap_rings->mr_group_type) { 31948275SEric Cheng case MAC_GROUP_TYPE_DYNAMIC: 31958275SEric Cheng if (cap_rings->mr_gaddring == NULL || 31968275SEric Cheng cap_rings->mr_gremring == NULL) { 31978275SEric Cheng DTRACE_PROBE3( 31988275SEric Cheng mac__init__rings_no_addremring, 31998275SEric Cheng char *, mip->mi_name, 32008275SEric Cheng mac_group_add_ring_t, 32018275SEric Cheng cap_rings->mr_gaddring, 32028275SEric Cheng mac_group_add_ring_t, 32038275SEric Cheng cap_rings->mr_gremring); 32048275SEric Cheng err = EINVAL; 32058275SEric Cheng goto bail; 32068275SEric Cheng } 32078275SEric Cheng 32088275SEric Cheng switch (rtype) { 32098275SEric Cheng case MAC_RING_TYPE_RX: 32108275SEric Cheng /* 32118275SEric Cheng * The first RX group must have non-zero 32128275SEric Cheng * rings, and the following groups must 32138275SEric Cheng * have zero rings. 32148275SEric Cheng */ 32158275SEric Cheng if (g == 0 && group_info.mgi_count == 0) { 32168275SEric Cheng DTRACE_PROBE1( 32178275SEric Cheng mac__init__rings__rx__def__zero, 32188275SEric Cheng char *, mip->mi_name); 32198275SEric Cheng err = EINVAL; 32208275SEric Cheng goto bail; 32218275SEric Cheng } 32228275SEric Cheng if (g > 0 && group_info.mgi_count != 0) { 32238275SEric Cheng DTRACE_PROBE3( 32248275SEric Cheng mac__init__rings__rx__nonzero, 32258275SEric Cheng char *, mip->mi_name, 32268275SEric Cheng int, g, int, group_info.mgi_count); 32278275SEric Cheng err = EINVAL; 32288275SEric Cheng goto bail; 32298275SEric Cheng } 32308275SEric Cheng break; 32318275SEric Cheng case MAC_RING_TYPE_TX: 32328275SEric Cheng /* 32338275SEric Cheng * All TX ring groups must have zero rings. 32348275SEric Cheng */ 32358275SEric Cheng if (group_info.mgi_count != 0) { 32368275SEric Cheng DTRACE_PROBE3( 32378275SEric Cheng mac__init__rings__tx__nonzero, 32388275SEric Cheng char *, mip->mi_name, 32398275SEric Cheng int, g, int, group_info.mgi_count); 32408275SEric Cheng err = EINVAL; 32418275SEric Cheng goto bail; 32428275SEric Cheng } 32438275SEric Cheng break; 32448275SEric Cheng } 32458275SEric Cheng break; 32468275SEric Cheng case MAC_GROUP_TYPE_STATIC: 32478275SEric Cheng /* 32488275SEric Cheng * Note that an empty group is allowed, e.g., an aggr 32498275SEric Cheng * would start with an empty group. 32508275SEric Cheng */ 32518275SEric Cheng break; 32528275SEric Cheng default: 32538275SEric Cheng /* unknown group type */ 32548275SEric Cheng DTRACE_PROBE2(mac__init__rings__unknown__type, 32558275SEric Cheng char *, mip->mi_name, 32568275SEric Cheng int, cap_rings->mr_group_type); 32578275SEric Cheng err = EINVAL; 32588275SEric Cheng goto bail; 32598275SEric Cheng } 32608275SEric Cheng 32618275SEric Cheng 32628275SEric Cheng /* 32638275SEric Cheng * Driver must register group->mgi_addmac/remmac() for rx groups 32648275SEric Cheng * to support multiple MAC addresses. 32658275SEric Cheng */ 32668275SEric Cheng if (rtype == MAC_RING_TYPE_RX) { 32678275SEric Cheng if ((group_info.mgi_addmac == NULL) || 32688275SEric Cheng (group_info.mgi_addmac == NULL)) 32698275SEric Cheng goto bail; 32708275SEric Cheng } 32718275SEric Cheng 32728275SEric Cheng /* Cache driver-supplied information */ 32738275SEric Cheng group->mrg_info = group_info; 32748275SEric Cheng 32758275SEric Cheng /* Update the group's status and group count. */ 32768275SEric Cheng mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); 32778275SEric Cheng group_free++; 32788275SEric Cheng 32798275SEric Cheng group->mrg_rings = NULL; 32808275SEric Cheng group->mrg_cur_count = 0; 32818275SEric Cheng mac_init_group(mip, group, group_info.mgi_count, cap_rings); 32828275SEric Cheng ring_left -= group_info.mgi_count; 32838275SEric Cheng 32848275SEric Cheng /* The current group size should be equal to default value */ 32858275SEric Cheng ASSERT(group->mrg_cur_count == group_info.mgi_count); 32868275SEric Cheng } 32878275SEric Cheng 32888275SEric Cheng /* Build up a dummy group for free resources as a pool */ 32898275SEric Cheng group = groups + cap_rings->mr_gnum; 32908275SEric Cheng 32918275SEric Cheng /* Prepare basic information of the group */ 32928275SEric Cheng group->mrg_index = -1; 32938275SEric Cheng group->mrg_type = rtype; 32948275SEric Cheng group->mrg_state = MAC_GROUP_STATE_UNINIT; 32958275SEric Cheng group->mrg_mh = (mac_handle_t)mip; 32968275SEric Cheng group->mrg_next = NULL; 32978275SEric Cheng 32988275SEric Cheng /* 32998275SEric Cheng * If there are ungrouped rings, allocate a continuous buffer for 33008275SEric Cheng * remaining resources. 33018275SEric Cheng */ 33028275SEric Cheng if (ring_left != 0) { 33038275SEric Cheng group->mrg_rings = NULL; 33048275SEric Cheng group->mrg_cur_count = 0; 33058275SEric Cheng mac_init_group(mip, group, ring_left, cap_rings); 33068275SEric Cheng 33078275SEric Cheng /* The current group size should be equal to ring_left */ 33088275SEric Cheng ASSERT(group->mrg_cur_count == ring_left); 33098275SEric Cheng 33108275SEric Cheng ring_left = 0; 33118275SEric Cheng 33128275SEric Cheng /* Update this group's status */ 33138275SEric Cheng mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); 33148275SEric Cheng } else 33158275SEric Cheng group->mrg_rings = NULL; 33168275SEric Cheng 33178275SEric Cheng ASSERT(ring_left == 0); 33188275SEric Cheng 33198275SEric Cheng bail: 33208275SEric Cheng /* Cache other important information to finalize the initialization */ 33218275SEric Cheng switch (rtype) { 33228275SEric Cheng case MAC_RING_TYPE_RX: 33238275SEric Cheng mip->mi_rx_group_type = cap_rings->mr_group_type; 33248275SEric Cheng mip->mi_rx_group_count = cap_rings->mr_gnum; 33258275SEric Cheng mip->mi_rx_groups = groups; 33268275SEric Cheng break; 33278275SEric Cheng case MAC_RING_TYPE_TX: 33288275SEric Cheng mip->mi_tx_group_type = cap_rings->mr_group_type; 33298275SEric Cheng mip->mi_tx_group_count = cap_rings->mr_gnum; 33308275SEric Cheng mip->mi_tx_group_free = group_free; 33318275SEric Cheng mip->mi_tx_groups = groups; 33328275SEric Cheng 33338275SEric Cheng /* 33348275SEric Cheng * Ring 0 is used as the default one and it could be assigned 33358275SEric Cheng * to a client as well. 33368275SEric Cheng */ 33378275SEric Cheng group = groups + cap_rings->mr_gnum; 33388275SEric Cheng ring = group->mrg_rings; 33398275SEric Cheng while ((ring->mr_index != 0) && (ring->mr_next != NULL)) 33408275SEric Cheng ring = ring->mr_next; 33418275SEric Cheng ASSERT(ring->mr_index == 0); 33428275SEric Cheng mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 33438275SEric Cheng break; 33448275SEric Cheng default: 33458275SEric Cheng ASSERT(B_FALSE); 33468275SEric Cheng } 33478275SEric Cheng 33488275SEric Cheng if (err != 0) 33498275SEric Cheng mac_free_rings(mip, rtype); 33508275SEric Cheng 33518275SEric Cheng return (err); 33528275SEric Cheng } 33538275SEric Cheng 33548275SEric Cheng /* 33558275SEric Cheng * Called to free all ring groups with particular type. It's supposed all groups 33568275SEric Cheng * have been released by clinet. 33578275SEric Cheng */ 33588275SEric Cheng void 33598275SEric Cheng mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) 33608275SEric Cheng { 33618275SEric Cheng mac_group_t *group, *groups; 33628275SEric Cheng uint_t group_count; 33638275SEric Cheng 33648275SEric Cheng switch (rtype) { 33658275SEric Cheng case MAC_RING_TYPE_RX: 33668275SEric Cheng if (mip->mi_rx_groups == NULL) 33678275SEric Cheng return; 33688275SEric Cheng 33698275SEric Cheng groups = mip->mi_rx_groups; 33708275SEric Cheng group_count = mip->mi_rx_group_count; 33718275SEric Cheng 33728275SEric Cheng mip->mi_rx_groups = NULL; 33738275SEric Cheng mip->mi_rx_group_count = 0; 33748275SEric Cheng break; 33758275SEric Cheng case MAC_RING_TYPE_TX: 33768275SEric Cheng ASSERT(mip->mi_tx_group_count == mip->mi_tx_group_free); 33778275SEric Cheng 33788275SEric Cheng if (mip->mi_tx_groups == NULL) 33798275SEric Cheng return; 33808275SEric Cheng 33818275SEric Cheng groups = mip->mi_tx_groups; 33828275SEric Cheng group_count = mip->mi_tx_group_count; 33838275SEric Cheng 33848275SEric Cheng mip->mi_tx_groups = NULL; 33858275SEric Cheng mip->mi_tx_group_count = 0; 33868275SEric Cheng mip->mi_tx_group_free = 0; 33878275SEric Cheng mip->mi_default_tx_ring = NULL; 33888275SEric Cheng break; 33898275SEric Cheng default: 33908275SEric Cheng ASSERT(B_FALSE); 33918275SEric Cheng } 33928275SEric Cheng 33938275SEric Cheng for (group = groups; group != NULL; group = group->mrg_next) { 33948275SEric Cheng mac_ring_t *ring; 33958275SEric Cheng 33968275SEric Cheng if (group->mrg_cur_count == 0) 33978275SEric Cheng continue; 33988275SEric Cheng 33998275SEric Cheng ASSERT(group->mrg_rings != NULL); 34008275SEric Cheng 34018275SEric Cheng while ((ring = group->mrg_rings) != NULL) { 34028275SEric Cheng group->mrg_rings = ring->mr_next; 34038275SEric Cheng mac_ring_free(mip, ring); 34048275SEric Cheng } 34058275SEric Cheng } 34068275SEric Cheng 34078275SEric Cheng /* Free all the cached rings */ 34088275SEric Cheng mac_ring_freeall(mip); 34098275SEric Cheng /* Free the block of group data strutures */ 34108275SEric Cheng kmem_free(groups, sizeof (mac_group_t) * (group_count + 1)); 34118275SEric Cheng } 34128275SEric Cheng 34138275SEric Cheng /* 34148275SEric Cheng * Associate a MAC address with a receive group. 34158275SEric Cheng * 34168275SEric Cheng * The return value of this function should always be checked properly, because 34178275SEric Cheng * any type of failure could cause unexpected results. A group can be added 34188275SEric Cheng * or removed with a MAC address only after it has been reserved. Ideally, 34198275SEric Cheng * a successful reservation always leads to calling mac_group_addmac() to 34208275SEric Cheng * steer desired traffic. Failure of adding an unicast MAC address doesn't 34218275SEric Cheng * always imply that the group is functioning abnormally. 34228275SEric Cheng * 34238275SEric Cheng * Currently this function is called everywhere, and it reflects assumptions 34248275SEric Cheng * about MAC addresses in the implementation. CR 6735196. 34258275SEric Cheng */ 34268275SEric Cheng int 34278275SEric Cheng mac_group_addmac(mac_group_t *group, const uint8_t *addr) 34288275SEric Cheng { 34298275SEric Cheng ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 34308275SEric Cheng ASSERT(group->mrg_info.mgi_addmac != NULL); 34318275SEric Cheng 34328275SEric Cheng return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); 34338275SEric Cheng } 34348275SEric Cheng 34358275SEric Cheng /* 34368275SEric Cheng * Remove the association between MAC address and receive group. 34378275SEric Cheng */ 34388275SEric Cheng int 34398275SEric Cheng mac_group_remmac(mac_group_t *group, const uint8_t *addr) 34408275SEric Cheng { 34418275SEric Cheng ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 34428275SEric Cheng ASSERT(group->mrg_info.mgi_remmac != NULL); 34438275SEric Cheng 34448275SEric Cheng return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); 34458275SEric Cheng } 34468275SEric Cheng 34478275SEric Cheng /* 34488275SEric Cheng * Release a ring in use by marking it MR_FREE. 34498275SEric Cheng * Any other client may reserve it for its use. 34508275SEric Cheng */ 34518275SEric Cheng void 34528275SEric Cheng mac_release_tx_ring(mac_ring_handle_t rh) 34538275SEric Cheng { 34548275SEric Cheng mac_ring_t *ring = (mac_ring_t *)rh; 34558275SEric Cheng mac_group_t *group = (mac_group_t *)ring->mr_gh; 34568275SEric Cheng mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 34578275SEric Cheng 34588275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 34598275SEric Cheng ASSERT(ring->mr_state != MR_FREE); 34608275SEric Cheng 34618275SEric Cheng /* 34628275SEric Cheng * Default tx ring will be released by mac_stop(). 34638275SEric Cheng */ 34648275SEric Cheng if (rh == mip->mi_default_tx_ring) 34658275SEric Cheng return; 34668275SEric Cheng 34678275SEric Cheng mac_stop_ring(ring); 34688275SEric Cheng 34698275SEric Cheng ring->mr_state = MR_FREE; 34708275SEric Cheng ring->mr_flag = 0; 34718275SEric Cheng } 34728275SEric Cheng 34738275SEric Cheng /* 34748275SEric Cheng * Send packets through a selected tx ring. 34758275SEric Cheng */ 34768275SEric Cheng mblk_t * 34778275SEric Cheng mac_ring_tx(mac_ring_handle_t rh, mblk_t *mp) 34788275SEric Cheng { 34798275SEric Cheng mac_ring_t *ring = (mac_ring_t *)rh; 34808275SEric Cheng mac_ring_info_t *info = &ring->mr_info; 34818275SEric Cheng 34828275SEric Cheng ASSERT(ring->mr_type == MAC_RING_TYPE_TX); 34838275SEric Cheng ASSERT(ring->mr_state >= MR_INUSE); 34848275SEric Cheng ASSERT(info->mri_tx != NULL); 34858275SEric Cheng 34868275SEric Cheng return (info->mri_tx(info->mri_driver, mp)); 34878275SEric Cheng } 34888275SEric Cheng 34898275SEric Cheng /* 34908275SEric Cheng * Find a ring from its index. 34918275SEric Cheng */ 34928275SEric Cheng mac_ring_t * 34938275SEric Cheng mac_find_ring(mac_group_t *group, int index) 34948275SEric Cheng { 34958275SEric Cheng mac_ring_t *ring = group->mrg_rings; 34968275SEric Cheng 34978275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) 34988275SEric Cheng if (ring->mr_index == index) 34998275SEric Cheng break; 35008275SEric Cheng 35018275SEric Cheng return (ring); 35028275SEric Cheng } 35038275SEric Cheng /* 35048275SEric Cheng * Add a ring to an existing group. 35058275SEric Cheng * 35068275SEric Cheng * The ring must be either passed directly (for example if the ring 35078275SEric Cheng * movement is initiated by the framework), or specified through a driver 35088275SEric Cheng * index (for example when the ring is added by the driver. 35098275SEric Cheng * 35108275SEric Cheng * The caller needs to call mac_perim_enter() before calling this function. 35118275SEric Cheng */ 35128275SEric Cheng int 35138275SEric Cheng i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) 35148275SEric Cheng { 35158275SEric Cheng mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 35168275SEric Cheng mac_capab_rings_t *cap_rings; 35178275SEric Cheng boolean_t driver_call = (ring == NULL); 35188275SEric Cheng mac_group_type_t group_type; 35198275SEric Cheng int ret = 0; 35208275SEric Cheng 35218275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 35228275SEric Cheng 35238275SEric Cheng switch (group->mrg_type) { 35248275SEric Cheng case MAC_RING_TYPE_RX: 35258275SEric Cheng cap_rings = &mip->mi_rx_rings_cap; 35268275SEric Cheng group_type = mip->mi_rx_group_type; 35278275SEric Cheng break; 35288275SEric Cheng case MAC_RING_TYPE_TX: 35298275SEric Cheng cap_rings = &mip->mi_tx_rings_cap; 35308275SEric Cheng group_type = mip->mi_tx_group_type; 35318275SEric Cheng break; 35328275SEric Cheng default: 35338275SEric Cheng ASSERT(B_FALSE); 35348275SEric Cheng } 35358275SEric Cheng 35368275SEric Cheng /* 35378275SEric Cheng * There should be no ring with the same ring index in the target 35388275SEric Cheng * group. 35398275SEric Cheng */ 35408275SEric Cheng ASSERT(mac_find_ring(group, driver_call ? index : ring->mr_index) == 35418275SEric Cheng NULL); 35428275SEric Cheng 35438275SEric Cheng if (driver_call) { 35448275SEric Cheng /* 35458275SEric Cheng * The function is called as a result of a request from 35468275SEric Cheng * a driver to add a ring to an existing group, for example 35478275SEric Cheng * from the aggregation driver. Allocate a new mac_ring_t 35488275SEric Cheng * for that ring. 35498275SEric Cheng */ 35508275SEric Cheng ring = mac_init_ring(mip, group, index, cap_rings); 35518275SEric Cheng ASSERT(group->mrg_state > MAC_GROUP_STATE_UNINIT); 35528275SEric Cheng } else { 35538275SEric Cheng /* 35548275SEric Cheng * The function is called as a result of a MAC layer request 35558275SEric Cheng * to add a ring to an existing group. In this case the 35568275SEric Cheng * ring is being moved between groups, which requires 35578275SEric Cheng * the underlying driver to support dynamic grouping, 35588275SEric Cheng * and the mac_ring_t already exists. 35598275SEric Cheng */ 35608275SEric Cheng ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 35618275SEric Cheng ASSERT(cap_rings->mr_gaddring != NULL); 35628275SEric Cheng ASSERT(ring->mr_gh == NULL); 35638275SEric Cheng } 35648275SEric Cheng 35658275SEric Cheng /* 35668275SEric Cheng * At this point the ring should not be in use, and it should be 35678275SEric Cheng * of the right for the target group. 35688275SEric Cheng */ 35698275SEric Cheng ASSERT(ring->mr_state < MR_INUSE); 35708275SEric Cheng ASSERT(ring->mr_srs == NULL); 35718275SEric Cheng ASSERT(ring->mr_type == group->mrg_type); 35728275SEric Cheng 35738275SEric Cheng if (!driver_call) { 35748275SEric Cheng /* 35758275SEric Cheng * Add the driver level hardware ring if the process was not 35768275SEric Cheng * initiated by the driver, and the target group is not the 35778275SEric Cheng * group. 35788275SEric Cheng */ 35798275SEric Cheng if (group->mrg_driver != NULL) { 35808275SEric Cheng cap_rings->mr_gaddring(group->mrg_driver, 35818275SEric Cheng ring->mr_driver, ring->mr_type); 35828275SEric Cheng } 35838275SEric Cheng 35848275SEric Cheng /* 35858275SEric Cheng * Insert the ring ahead existing rings. 35868275SEric Cheng */ 35878275SEric Cheng ring->mr_next = group->mrg_rings; 35888275SEric Cheng group->mrg_rings = ring; 35898275SEric Cheng ring->mr_gh = (mac_group_handle_t)group; 35908275SEric Cheng group->mrg_cur_count++; 35918275SEric Cheng } 35928275SEric Cheng 35938275SEric Cheng /* 35948275SEric Cheng * If the group has not been actively used, we're done. 35958275SEric Cheng */ 35968275SEric Cheng if (group->mrg_index != -1 && 35978275SEric Cheng group->mrg_state < MAC_GROUP_STATE_RESERVED) 35988275SEric Cheng return (0); 35998275SEric Cheng 36008275SEric Cheng /* 36018275SEric Cheng * Set up SRS/SR according to the ring type. 36028275SEric Cheng */ 36038275SEric Cheng switch (ring->mr_type) { 36048275SEric Cheng case MAC_RING_TYPE_RX: 36058275SEric Cheng /* 36068275SEric Cheng * Setup SRS on top of the new ring if the group is 36078275SEric Cheng * reserved for someones exclusive use. 36088275SEric Cheng */ 36098275SEric Cheng if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { 36108275SEric Cheng flow_entry_t *flent; 36118275SEric Cheng mac_client_impl_t *mcip; 36128275SEric Cheng 36138275SEric Cheng mcip = MAC_RX_GROUP_ONLY_CLIENT(group); 36148275SEric Cheng ASSERT(mcip != NULL); 36158275SEric Cheng flent = mcip->mci_flent; 36168275SEric Cheng ASSERT(flent->fe_rx_srs_cnt > 0); 36178275SEric Cheng mac_srs_group_setup(mcip, flent, group, SRST_LINK); 36188275SEric Cheng } 36198275SEric Cheng break; 36208275SEric Cheng case MAC_RING_TYPE_TX: 36218275SEric Cheng /* 36228275SEric Cheng * For TX this function is only invoked during the 36238275SEric Cheng * initial creation of a group when a share is 36248275SEric Cheng * associated with a MAC client. So the datapath is not 36258275SEric Cheng * yet setup, and will be setup later after the 36268275SEric Cheng * group has been reserved and populated. 36278275SEric Cheng */ 36288275SEric Cheng break; 36298275SEric Cheng default: 36308275SEric Cheng ASSERT(B_FALSE); 36318275SEric Cheng } 36328275SEric Cheng 36338275SEric Cheng /* 36348275SEric Cheng * Start the ring if needed. Failure causes to undo the grouping action. 36358275SEric Cheng */ 36368275SEric Cheng if ((ret = mac_start_ring(ring)) != 0) { 36378275SEric Cheng if (ring->mr_type == MAC_RING_TYPE_RX) { 36388275SEric Cheng if (ring->mr_srs != NULL) { 36398275SEric Cheng mac_rx_srs_remove(ring->mr_srs); 36408275SEric Cheng ring->mr_srs = NULL; 36418275SEric Cheng } 36428275SEric Cheng } 36438275SEric Cheng if (!driver_call) { 36448275SEric Cheng cap_rings->mr_gremring(group->mrg_driver, 36458275SEric Cheng ring->mr_driver, ring->mr_type); 36468275SEric Cheng } 36478275SEric Cheng group->mrg_cur_count--; 36488275SEric Cheng group->mrg_rings = ring->mr_next; 36498275SEric Cheng 36508275SEric Cheng ring->mr_gh = NULL; 36518275SEric Cheng 36528275SEric Cheng if (driver_call) 36538275SEric Cheng mac_ring_free(mip, ring); 36548275SEric Cheng 36558275SEric Cheng return (ret); 36568275SEric Cheng } 36578275SEric Cheng 36588275SEric Cheng /* 36598275SEric Cheng * Update the ring's state. 36608275SEric Cheng */ 36618275SEric Cheng ring->mr_state = MR_INUSE; 36628275SEric Cheng MAC_RING_UNMARK(ring, MR_INCIPIENT); 36638275SEric Cheng return (0); 36648275SEric Cheng } 36658275SEric Cheng 36668275SEric Cheng /* 36678275SEric Cheng * Remove a ring from it's current group. MAC internal function for dynamic 36688275SEric Cheng * grouping. 36698275SEric Cheng * 36708275SEric Cheng * The caller needs to call mac_perim_enter() before calling this function. 36718275SEric Cheng */ 36728275SEric Cheng void 36738275SEric Cheng i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, 36748275SEric Cheng boolean_t driver_call) 36758275SEric Cheng { 36768275SEric Cheng mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 36778275SEric Cheng mac_capab_rings_t *cap_rings = NULL; 36788275SEric Cheng mac_group_type_t group_type; 36798275SEric Cheng 36808275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 36818275SEric Cheng 36828275SEric Cheng ASSERT(mac_find_ring(group, ring->mr_index) == ring); 36838275SEric Cheng ASSERT((mac_group_t *)ring->mr_gh == group); 36848275SEric Cheng ASSERT(ring->mr_type == group->mrg_type); 36858275SEric Cheng 36868275SEric Cheng switch (ring->mr_type) { 36878275SEric Cheng case MAC_RING_TYPE_RX: 36888275SEric Cheng group_type = mip->mi_rx_group_type; 36898275SEric Cheng cap_rings = &mip->mi_rx_rings_cap; 36908275SEric Cheng 36918275SEric Cheng if (group->mrg_state >= MAC_GROUP_STATE_RESERVED) 36928275SEric Cheng mac_stop_ring(ring); 36938275SEric Cheng 36948275SEric Cheng /* 36958275SEric Cheng * Only hardware classified packets hold a reference to the 36968275SEric Cheng * ring all the way up the Rx path. mac_rx_srs_remove() 36978275SEric Cheng * will take care of quiescing the Rx path and removing the 36988275SEric Cheng * SRS. The software classified path neither holds a reference 36998275SEric Cheng * nor any association with the ring in mac_rx. 37008275SEric Cheng */ 37018275SEric Cheng if (ring->mr_srs != NULL) { 37028275SEric Cheng mac_rx_srs_remove(ring->mr_srs); 37038275SEric Cheng ring->mr_srs = NULL; 37048275SEric Cheng } 37058275SEric Cheng ring->mr_state = MR_FREE; 37068275SEric Cheng ring->mr_flag = 0; 37078275SEric Cheng 37088275SEric Cheng break; 37098275SEric Cheng case MAC_RING_TYPE_TX: 37108275SEric Cheng /* 37118275SEric Cheng * For TX this function is only invoked in two 37128275SEric Cheng * cases: 37138275SEric Cheng * 37148275SEric Cheng * 1) In the case of a failure during the 37158275SEric Cheng * initial creation of a group when a share is 37168275SEric Cheng * associated with a MAC client. So the SRS is not 37178275SEric Cheng * yet setup, and will be setup later after the 37188275SEric Cheng * group has been reserved and populated. 37198275SEric Cheng * 37208275SEric Cheng * 2) From mac_release_tx_group() when freeing 37218275SEric Cheng * a TX SRS. 37228275SEric Cheng * 37238275SEric Cheng * In both cases the SRS and its soft rings are 37248275SEric Cheng * already quiesced. 37258275SEric Cheng */ 37268275SEric Cheng ASSERT(!driver_call); 37278275SEric Cheng group_type = mip->mi_tx_group_type; 37288275SEric Cheng cap_rings = &mip->mi_tx_rings_cap; 37298275SEric Cheng break; 37308275SEric Cheng default: 37318275SEric Cheng ASSERT(B_FALSE); 37328275SEric Cheng } 37338275SEric Cheng 37348275SEric Cheng /* 37358275SEric Cheng * Remove the ring from the group. 37368275SEric Cheng */ 37378275SEric Cheng if (ring == group->mrg_rings) 37388275SEric Cheng group->mrg_rings = ring->mr_next; 37398275SEric Cheng else { 37408275SEric Cheng mac_ring_t *pre; 37418275SEric Cheng 37428275SEric Cheng pre = group->mrg_rings; 37438275SEric Cheng while (pre->mr_next != ring) 37448275SEric Cheng pre = pre->mr_next; 37458275SEric Cheng pre->mr_next = ring->mr_next; 37468275SEric Cheng } 37478275SEric Cheng group->mrg_cur_count--; 37488275SEric Cheng 37498275SEric Cheng if (!driver_call) { 37508275SEric Cheng ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 37518275SEric Cheng ASSERT(cap_rings->mr_gremring != NULL); 37528275SEric Cheng 37538275SEric Cheng /* 37548275SEric Cheng * Remove the driver level hardware ring. 37558275SEric Cheng */ 37568275SEric Cheng if (group->mrg_driver != NULL) { 37578275SEric Cheng cap_rings->mr_gremring(group->mrg_driver, 37588275SEric Cheng ring->mr_driver, ring->mr_type); 37598275SEric Cheng } 37608275SEric Cheng } 37618275SEric Cheng 37628275SEric Cheng ring->mr_gh = NULL; 37638275SEric Cheng if (driver_call) { 37648275SEric Cheng mac_ring_free(mip, ring); 37658275SEric Cheng } else { 37668275SEric Cheng ring->mr_state = MR_FREE; 37678275SEric Cheng ring->mr_flag = 0; 37688275SEric Cheng } 37698275SEric Cheng } 37708275SEric Cheng 37718275SEric Cheng /* 37728275SEric Cheng * Move a ring to the target group. If needed, remove the ring from the group 37738275SEric Cheng * that it currently belongs to. 37748275SEric Cheng * 37758275SEric Cheng * The caller need to enter MAC's perimeter by calling mac_perim_enter(). 37768275SEric Cheng */ 37778275SEric Cheng static int 37788275SEric Cheng mac_group_mov_ring(mac_impl_t *mip, mac_group_t *d_group, mac_ring_t *ring) 37798275SEric Cheng { 37808275SEric Cheng mac_group_t *s_group = (mac_group_t *)ring->mr_gh; 37818275SEric Cheng int rv; 37828275SEric Cheng 37838275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 37848275SEric Cheng ASSERT(d_group != NULL); 37858275SEric Cheng ASSERT(s_group->mrg_mh == d_group->mrg_mh); 37868275SEric Cheng 37878275SEric Cheng if (s_group == d_group) 37888275SEric Cheng return (0); 37898275SEric Cheng 37908275SEric Cheng /* 37918275SEric Cheng * Remove it from current group first. 37928275SEric Cheng */ 37938275SEric Cheng if (s_group != NULL) 37948275SEric Cheng i_mac_group_rem_ring(s_group, ring, B_FALSE); 37958275SEric Cheng 37968275SEric Cheng /* 37978275SEric Cheng * Add it to the new group. 37988275SEric Cheng */ 37998275SEric Cheng rv = i_mac_group_add_ring(d_group, ring, 0); 38008275SEric Cheng if (rv != 0) { 38018275SEric Cheng /* 38028275SEric Cheng * Failed to add ring back to source group. If 38038275SEric Cheng * that fails, the ring is stuck in limbo, log message. 38048275SEric Cheng */ 38058275SEric Cheng if (i_mac_group_add_ring(s_group, ring, 0)) { 38068275SEric Cheng cmn_err(CE_WARN, "%s: failed to move ring %p\n", 38078275SEric Cheng mip->mi_name, (void *)ring); 38088275SEric Cheng } 38098275SEric Cheng } 38108275SEric Cheng 38118275SEric Cheng return (rv); 38128275SEric Cheng } 38138275SEric Cheng 38148275SEric Cheng /* 38158275SEric Cheng * Find a MAC address according to its value. 38168275SEric Cheng */ 38178275SEric Cheng mac_address_t * 38188275SEric Cheng mac_find_macaddr(mac_impl_t *mip, uint8_t *mac_addr) 38198275SEric Cheng { 38208275SEric Cheng mac_address_t *map; 38218275SEric Cheng 38228275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 38238275SEric Cheng 38248275SEric Cheng for (map = mip->mi_addresses; map != NULL; map = map->ma_next) { 38258275SEric Cheng if (bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) 38268275SEric Cheng break; 38278275SEric Cheng } 38288275SEric Cheng 38298275SEric Cheng return (map); 38308275SEric Cheng } 38318275SEric Cheng 38328275SEric Cheng /* 38338275SEric Cheng * Check whether the MAC address is shared by multiple clients. 38348275SEric Cheng */ 38358275SEric Cheng boolean_t 38368275SEric Cheng mac_check_macaddr_shared(mac_address_t *map) 38378275SEric Cheng { 38388275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)map->ma_mip)); 38398275SEric Cheng 38408275SEric Cheng return (map->ma_nusers > 1); 38418275SEric Cheng } 38428275SEric Cheng 38438275SEric Cheng /* 38448275SEric Cheng * Remove the specified MAC address from the MAC address list and free it. 38458275SEric Cheng */ 38468275SEric Cheng static void 38478275SEric Cheng mac_free_macaddr(mac_address_t *map) 38488275SEric Cheng { 38498275SEric Cheng mac_impl_t *mip = map->ma_mip; 38508275SEric Cheng 38518275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 38528275SEric Cheng ASSERT(mip->mi_addresses != NULL); 38538275SEric Cheng 38548275SEric Cheng map = mac_find_macaddr(mip, map->ma_addr); 38558275SEric Cheng 38568275SEric Cheng ASSERT(map != NULL); 38578275SEric Cheng ASSERT(map->ma_nusers == 0); 38588275SEric Cheng 38598275SEric Cheng if (map == mip->mi_addresses) { 38608275SEric Cheng mip->mi_addresses = map->ma_next; 38618275SEric Cheng } else { 38628275SEric Cheng mac_address_t *pre; 38638275SEric Cheng 38648275SEric Cheng pre = mip->mi_addresses; 38658275SEric Cheng while (pre->ma_next != map) 38668275SEric Cheng pre = pre->ma_next; 38678275SEric Cheng pre->ma_next = map->ma_next; 38688275SEric Cheng } 38698275SEric Cheng 38708275SEric Cheng kmem_free(map, sizeof (mac_address_t)); 38718275SEric Cheng } 38728275SEric Cheng 38738275SEric Cheng /* 38748275SEric Cheng * Add a MAC address reference for a client. If the desired MAC address 38758275SEric Cheng * exists, add a reference to it. Otherwise, add the new address by adding 38768275SEric Cheng * it to a reserved group or setting promiscuous mode. Won't try different 38778275SEric Cheng * group is the group is non-NULL, so the caller must explictly share 38788275SEric Cheng * default group when needed. 38798275SEric Cheng * 38808275SEric Cheng * Note, the primary MAC address is initialized at registration time, so 38818275SEric Cheng * to add it to default group only need to activate it if its reference 38828275SEric Cheng * count is still zero. Also, some drivers may not have advertised RINGS 38838275SEric Cheng * capability. 38848275SEric Cheng */ 38858275SEric Cheng int 38868400SNicolas.Droux@Sun.COM mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, 38878400SNicolas.Droux@Sun.COM boolean_t use_hw) 38888275SEric Cheng { 38898275SEric Cheng mac_address_t *map; 38908275SEric Cheng int err = 0; 38918275SEric Cheng boolean_t allocated_map = B_FALSE; 38928275SEric Cheng 38938275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 38948275SEric Cheng 38958275SEric Cheng map = mac_find_macaddr(mip, mac_addr); 38968275SEric Cheng 38978275SEric Cheng /* 38988275SEric Cheng * If the new MAC address has not been added. Allocate a new one 38998275SEric Cheng * and set it up. 39008275SEric Cheng */ 39018275SEric Cheng if (map == NULL) { 39028275SEric Cheng map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 39038275SEric Cheng map->ma_len = mip->mi_type->mt_addr_length; 39048275SEric Cheng bcopy(mac_addr, map->ma_addr, map->ma_len); 39058275SEric Cheng map->ma_nusers = 0; 39068275SEric Cheng map->ma_group = group; 39078275SEric Cheng map->ma_mip = mip; 39088275SEric Cheng 39098275SEric Cheng /* add the new MAC address to the head of the address list */ 39108275SEric Cheng map->ma_next = mip->mi_addresses; 39118275SEric Cheng mip->mi_addresses = map; 39128275SEric Cheng 39138275SEric Cheng allocated_map = B_TRUE; 39148275SEric Cheng } 39158275SEric Cheng 39168275SEric Cheng ASSERT(map->ma_group == group); 39178275SEric Cheng 39188275SEric Cheng /* 39198275SEric Cheng * If the MAC address is already in use, simply account for the 39208275SEric Cheng * new client. 39218275SEric Cheng */ 39228275SEric Cheng if (map->ma_nusers++ > 0) 39238275SEric Cheng return (0); 39248275SEric Cheng 39258275SEric Cheng /* 39268275SEric Cheng * Activate this MAC address by adding it to the reserved group. 39278275SEric Cheng */ 39288275SEric Cheng if (group != NULL) { 39298275SEric Cheng err = mac_group_addmac(group, (const uint8_t *)mac_addr); 39308275SEric Cheng if (err == 0) { 39318275SEric Cheng map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 39328275SEric Cheng return (0); 39338275SEric Cheng } 39348275SEric Cheng } 39358275SEric Cheng 39368275SEric Cheng /* 39378400SNicolas.Droux@Sun.COM * The MAC address addition failed. If the client requires a 39388400SNicolas.Droux@Sun.COM * hardware classified MAC address, fail the operation. 39398400SNicolas.Droux@Sun.COM */ 39408400SNicolas.Droux@Sun.COM if (use_hw) { 39418400SNicolas.Droux@Sun.COM err = ENOSPC; 39428400SNicolas.Droux@Sun.COM goto bail; 39438400SNicolas.Droux@Sun.COM } 39448400SNicolas.Droux@Sun.COM 39458400SNicolas.Droux@Sun.COM /* 39468400SNicolas.Droux@Sun.COM * Try promiscuous mode. 39478400SNicolas.Droux@Sun.COM * 39488400SNicolas.Droux@Sun.COM * For drivers that don't advertise RINGS capability, do 39498400SNicolas.Droux@Sun.COM * nothing for the primary address. 39508275SEric Cheng */ 39518400SNicolas.Droux@Sun.COM if ((group == NULL) && 39528400SNicolas.Droux@Sun.COM (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { 39538400SNicolas.Droux@Sun.COM map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 39548400SNicolas.Droux@Sun.COM return (0); 39558400SNicolas.Droux@Sun.COM } 39568400SNicolas.Droux@Sun.COM 39578400SNicolas.Droux@Sun.COM /* 39588400SNicolas.Droux@Sun.COM * Enable promiscuous mode in order to receive traffic 39598400SNicolas.Droux@Sun.COM * to the new MAC address. 39608400SNicolas.Droux@Sun.COM */ 39618400SNicolas.Droux@Sun.COM if ((err = i_mac_promisc_set(mip, B_TRUE, MAC_DEVPROMISC)) == 0) { 39628400SNicolas.Droux@Sun.COM map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; 39638400SNicolas.Droux@Sun.COM return (0); 39648275SEric Cheng } 39658275SEric Cheng 39668275SEric Cheng /* 39678275SEric Cheng * Free the MAC address that could not be added. Don't free 39688275SEric Cheng * a pre-existing address, it could have been the entry 39698275SEric Cheng * for the primary MAC address which was pre-allocated by 39708275SEric Cheng * mac_init_macaddr(), and which must remain on the list. 39718275SEric Cheng */ 39728400SNicolas.Droux@Sun.COM bail: 39738275SEric Cheng map->ma_nusers--; 39748275SEric Cheng if (allocated_map) 39758275SEric Cheng mac_free_macaddr(map); 39768275SEric Cheng return (err); 39778275SEric Cheng } 39788275SEric Cheng 39798275SEric Cheng /* 39808275SEric Cheng * Remove a reference to a MAC address. This may cause to remove the MAC 39818275SEric Cheng * address from an associated group or to turn off promiscuous mode. 39828275SEric Cheng * The caller needs to handle the failure properly. 39838275SEric Cheng */ 39848275SEric Cheng int 39858275SEric Cheng mac_remove_macaddr(mac_address_t *map) 39868275SEric Cheng { 39878275SEric Cheng mac_impl_t *mip = map->ma_mip; 39888275SEric Cheng int err = 0; 39898275SEric Cheng 39908275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 39918275SEric Cheng 39928275SEric Cheng ASSERT(map == mac_find_macaddr(mip, map->ma_addr)); 39938275SEric Cheng 39948275SEric Cheng /* 39958275SEric Cheng * If it's not the last client using this MAC address, only update 39968275SEric Cheng * the MAC clients count. 39978275SEric Cheng */ 39988275SEric Cheng if (--map->ma_nusers > 0) 39998275SEric Cheng return (0); 40008275SEric Cheng 40018275SEric Cheng /* 40028275SEric Cheng * The MAC address is no longer used by any MAC client, so remove 40038275SEric Cheng * it from its associated group, or turn off promiscuous mode 40048275SEric Cheng * if it was enabled for the MAC address. 40058275SEric Cheng */ 40068275SEric Cheng switch (map->ma_type) { 40078275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 40088275SEric Cheng /* 40098275SEric Cheng * Don't free the preset primary address for drivers that 40108275SEric Cheng * don't advertise RINGS capability. 40118275SEric Cheng */ 40128275SEric Cheng if (map->ma_group == NULL) 40138275SEric Cheng return (0); 40148275SEric Cheng 40158275SEric Cheng err = mac_group_remmac(map->ma_group, map->ma_addr); 40168275SEric Cheng break; 40178275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 40188400SNicolas.Droux@Sun.COM err = i_mac_promisc_set(mip, B_FALSE, MAC_DEVPROMISC); 40198275SEric Cheng break; 40208275SEric Cheng default: 40218275SEric Cheng ASSERT(B_FALSE); 40228275SEric Cheng } 40238275SEric Cheng 40248275SEric Cheng if (err != 0) 40258275SEric Cheng return (err); 40268275SEric Cheng 40278275SEric Cheng /* 40288275SEric Cheng * We created MAC address for the primary one at registration, so we 40298275SEric Cheng * won't free it here. mac_fini_macaddr() will take care of it. 40308275SEric Cheng */ 40318275SEric Cheng if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) != 0) 40328275SEric Cheng mac_free_macaddr(map); 40338275SEric Cheng 40348275SEric Cheng return (0); 40358275SEric Cheng } 40368275SEric Cheng 40378275SEric Cheng /* 40388275SEric Cheng * Update an existing MAC address. The caller need to make sure that the new 40398275SEric Cheng * value has not been used. 40408275SEric Cheng */ 40418275SEric Cheng int 40428275SEric Cheng mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) 40438275SEric Cheng { 40448275SEric Cheng mac_impl_t *mip = map->ma_mip; 40458275SEric Cheng int err = 0; 40468275SEric Cheng 40478275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 40488275SEric Cheng ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 40498275SEric Cheng 40508275SEric Cheng switch (map->ma_type) { 40518275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 40528275SEric Cheng /* 40538275SEric Cheng * Update the primary address for drivers that are not 40548275SEric Cheng * RINGS capable. 40558275SEric Cheng */ 40568275SEric Cheng if (map->ma_group == NULL) { 40578275SEric Cheng err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) 40588275SEric Cheng mac_addr); 40598275SEric Cheng if (err != 0) 40608275SEric Cheng return (err); 40618275SEric Cheng break; 40628275SEric Cheng } 40638275SEric Cheng 40648275SEric Cheng /* 40658275SEric Cheng * If this MAC address is not currently in use, 40668275SEric Cheng * simply break out and update the value. 40678275SEric Cheng */ 40688275SEric Cheng if (map->ma_nusers == 0) 40698275SEric Cheng break; 40708275SEric Cheng 40718275SEric Cheng /* 40728275SEric Cheng * Need to replace the MAC address associated with a group. 40738275SEric Cheng */ 40748275SEric Cheng err = mac_group_remmac(map->ma_group, map->ma_addr); 40758275SEric Cheng if (err != 0) 40768275SEric Cheng return (err); 40778275SEric Cheng 40788275SEric Cheng err = mac_group_addmac(map->ma_group, mac_addr); 40798275SEric Cheng 40808275SEric Cheng /* 40818275SEric Cheng * Failure hints hardware error. The MAC layer needs to 40828275SEric Cheng * have error notification facility to handle this. 40838275SEric Cheng * Now, simply try to restore the value. 40848275SEric Cheng */ 40858275SEric Cheng if (err != 0) 40868275SEric Cheng (void) mac_group_addmac(map->ma_group, map->ma_addr); 40878275SEric Cheng 40888275SEric Cheng break; 40898275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 40908275SEric Cheng /* 40918275SEric Cheng * Need to do nothing more if in promiscuous mode. 40928275SEric Cheng */ 40938275SEric Cheng break; 40948275SEric Cheng default: 40958275SEric Cheng ASSERT(B_FALSE); 40968275SEric Cheng } 40978275SEric Cheng 40988275SEric Cheng /* 40998275SEric Cheng * Successfully replaced the MAC address. 41008275SEric Cheng */ 41018275SEric Cheng if (err == 0) 41028275SEric Cheng bcopy(mac_addr, map->ma_addr, map->ma_len); 41038275SEric Cheng 41048275SEric Cheng return (err); 41058275SEric Cheng } 41068275SEric Cheng 41078275SEric Cheng /* 41088275SEric Cheng * Freshen the MAC address with new value. Its caller must have updated the 41098275SEric Cheng * hardware MAC address before calling this function. 41108275SEric Cheng * This funcitons is supposed to be used to handle the MAC address change 41118275SEric Cheng * notification from underlying drivers. 41128275SEric Cheng */ 41138275SEric Cheng void 41148275SEric Cheng mac_freshen_macaddr(mac_address_t *map, uint8_t *mac_addr) 41158275SEric Cheng { 41168275SEric Cheng mac_impl_t *mip = map->ma_mip; 41178275SEric Cheng 41188275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 41198275SEric Cheng ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 41208275SEric Cheng 41218275SEric Cheng /* 41228275SEric Cheng * Freshen the MAC address with new value. 41238275SEric Cheng */ 41248275SEric Cheng bcopy(mac_addr, map->ma_addr, map->ma_len); 41258275SEric Cheng bcopy(mac_addr, mip->mi_addr, map->ma_len); 41268275SEric Cheng 41278275SEric Cheng /* 41288275SEric Cheng * Update all MAC clients that share this MAC address. 41298275SEric Cheng */ 41308275SEric Cheng mac_unicast_update_clients(mip, map); 41318275SEric Cheng } 41328275SEric Cheng 41338275SEric Cheng /* 41348275SEric Cheng * Set up the primary MAC address. 41358275SEric Cheng */ 41368275SEric Cheng void 41378275SEric Cheng mac_init_macaddr(mac_impl_t *mip) 41388275SEric Cheng { 41398275SEric Cheng mac_address_t *map; 41408275SEric Cheng 41418275SEric Cheng /* 41428275SEric Cheng * The reference count is initialized to zero, until it's really 41438275SEric Cheng * activated. 41448275SEric Cheng */ 41458275SEric Cheng map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 41468275SEric Cheng map->ma_len = mip->mi_type->mt_addr_length; 41478275SEric Cheng bcopy(mip->mi_addr, map->ma_addr, map->ma_len); 41488275SEric Cheng 41498275SEric Cheng /* 41508275SEric Cheng * If driver advertises RINGS capability, it shouldn't have initialized 41518275SEric Cheng * its primary MAC address. For other drivers, including VNIC, the 41528275SEric Cheng * primary address must work after registration. 41538275SEric Cheng */ 41548275SEric Cheng if (mip->mi_rx_groups == NULL) 41558275SEric Cheng map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 41568275SEric Cheng 41578275SEric Cheng /* 41588275SEric Cheng * The primary MAC address is reserved for default group according 41598275SEric Cheng * to current design. 41608275SEric Cheng */ 41618275SEric Cheng map->ma_group = mip->mi_rx_groups; 41628275SEric Cheng map->ma_mip = mip; 41638275SEric Cheng 41648275SEric Cheng mip->mi_addresses = map; 41658275SEric Cheng } 41668275SEric Cheng 41678275SEric Cheng /* 41688275SEric Cheng * Clean up the primary MAC address. Note, only one primary MAC address 41698275SEric Cheng * is allowed. All other MAC addresses must have been freed appropriately. 41708275SEric Cheng */ 41718275SEric Cheng void 41728275SEric Cheng mac_fini_macaddr(mac_impl_t *mip) 41738275SEric Cheng { 41748275SEric Cheng mac_address_t *map = mip->mi_addresses; 41758275SEric Cheng 41768833SVenu.Iyer@Sun.COM if (map == NULL) 41778833SVenu.Iyer@Sun.COM return; 41788833SVenu.Iyer@Sun.COM 41798833SVenu.Iyer@Sun.COM /* 41808833SVenu.Iyer@Sun.COM * If mi_addresses is initialized, there should be exactly one 41818833SVenu.Iyer@Sun.COM * entry left on the list with no users. 41828833SVenu.Iyer@Sun.COM */ 41838275SEric Cheng ASSERT(map->ma_nusers == 0); 41848275SEric Cheng ASSERT(map->ma_next == NULL); 41858275SEric Cheng 41868275SEric Cheng kmem_free(map, sizeof (mac_address_t)); 41878275SEric Cheng mip->mi_addresses = NULL; 41888275SEric Cheng } 41898275SEric Cheng 41908275SEric Cheng /* 41918275SEric Cheng * Logging related functions. 41928275SEric Cheng */ 41938275SEric Cheng 41948275SEric Cheng /* Write the Flow description to the log file */ 41958275SEric Cheng int 41968275SEric Cheng mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) 41978275SEric Cheng { 41988275SEric Cheng flow_desc_t *fdesc; 41998275SEric Cheng mac_resource_props_t *mrp; 42008275SEric Cheng net_desc_t ndesc; 42018275SEric Cheng 42028275SEric Cheng bzero(&ndesc, sizeof (net_desc_t)); 42038275SEric Cheng 42048275SEric Cheng /* 42058275SEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 42068275SEric Cheng * Updates to the fe_flow_desc are done under the fe_lock 42078275SEric Cheng */ 42088275SEric Cheng mutex_enter(&flent->fe_lock); 42098275SEric Cheng fdesc = &flent->fe_flow_desc; 42108275SEric Cheng mrp = &flent->fe_resource_props; 42118275SEric Cheng 42128275SEric Cheng ndesc.nd_name = flent->fe_flow_name; 42138275SEric Cheng ndesc.nd_devname = mcip->mci_name; 42148275SEric Cheng bcopy(fdesc->fd_src_mac, ndesc.nd_ehost, ETHERADDRL); 42158275SEric Cheng bcopy(fdesc->fd_dst_mac, ndesc.nd_edest, ETHERADDRL); 42168275SEric Cheng ndesc.nd_sap = htonl(fdesc->fd_sap); 42178275SEric Cheng ndesc.nd_isv4 = (uint8_t)fdesc->fd_ipversion == IPV4_VERSION; 42188275SEric Cheng ndesc.nd_bw_limit = mrp->mrp_maxbw; 42198275SEric Cheng if (ndesc.nd_isv4) { 42208275SEric Cheng ndesc.nd_saddr[3] = htonl(fdesc->fd_local_addr.s6_addr32[3]); 42218275SEric Cheng ndesc.nd_daddr[3] = htonl(fdesc->fd_remote_addr.s6_addr32[3]); 42228275SEric Cheng } else { 42238275SEric Cheng bcopy(&fdesc->fd_local_addr, ndesc.nd_saddr, IPV6_ADDR_LEN); 42248275SEric Cheng bcopy(&fdesc->fd_remote_addr, ndesc.nd_daddr, IPV6_ADDR_LEN); 42258275SEric Cheng } 42268275SEric Cheng ndesc.nd_sport = htons(fdesc->fd_local_port); 42278275SEric Cheng ndesc.nd_dport = htons(fdesc->fd_remote_port); 42288275SEric Cheng ndesc.nd_protocol = (uint8_t)fdesc->fd_protocol; 42298275SEric Cheng mutex_exit(&flent->fe_lock); 42308275SEric Cheng 42318275SEric Cheng return (exacct_commit_netinfo((void *)&ndesc, EX_NET_FLDESC_REC)); 42328275SEric Cheng } 42338275SEric Cheng 42348275SEric Cheng /* Write the Flow statistics to the log file */ 42358275SEric Cheng int 42368275SEric Cheng mac_write_flow_stats(flow_entry_t *flent) 42378275SEric Cheng { 42388275SEric Cheng flow_stats_t *fl_stats; 42398275SEric Cheng net_stat_t nstat; 42408275SEric Cheng 42418275SEric Cheng fl_stats = &flent->fe_flowstats; 42428275SEric Cheng nstat.ns_name = flent->fe_flow_name; 42438275SEric Cheng nstat.ns_ibytes = fl_stats->fs_rbytes; 42448275SEric Cheng nstat.ns_obytes = fl_stats->fs_obytes; 42458275SEric Cheng nstat.ns_ipackets = fl_stats->fs_ipackets; 42468275SEric Cheng nstat.ns_opackets = fl_stats->fs_opackets; 42478275SEric Cheng nstat.ns_ierrors = fl_stats->fs_ierrors; 42488275SEric Cheng nstat.ns_oerrors = fl_stats->fs_oerrors; 42498275SEric Cheng 42508275SEric Cheng return (exacct_commit_netinfo((void *)&nstat, EX_NET_FLSTAT_REC)); 42518275SEric Cheng } 42528275SEric Cheng 42538275SEric Cheng /* Write the Link Description to the log file */ 42548275SEric Cheng int 42558275SEric Cheng mac_write_link_desc(mac_client_impl_t *mcip) 42568275SEric Cheng { 42578275SEric Cheng net_desc_t ndesc; 42588275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 42598275SEric Cheng 42608275SEric Cheng bzero(&ndesc, sizeof (net_desc_t)); 42618275SEric Cheng 42628275SEric Cheng ndesc.nd_name = mcip->mci_name; 42638275SEric Cheng ndesc.nd_devname = mcip->mci_name; 42648275SEric Cheng ndesc.nd_isv4 = B_TRUE; 42658275SEric Cheng /* 42668275SEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 42678275SEric Cheng * Updates to the fe_flow_desc are done under the fe_lock 42688275SEric Cheng * after removing the flent from the flow table. 42698275SEric Cheng */ 42708275SEric Cheng mutex_enter(&flent->fe_lock); 42718275SEric Cheng bcopy(flent->fe_flow_desc.fd_src_mac, ndesc.nd_ehost, ETHERADDRL); 42728275SEric Cheng mutex_exit(&flent->fe_lock); 42738275SEric Cheng 42748275SEric Cheng return (exacct_commit_netinfo((void *)&ndesc, EX_NET_LNDESC_REC)); 42758275SEric Cheng } 42768275SEric Cheng 42778275SEric Cheng /* Write the Link statistics to the log file */ 42788275SEric Cheng int 42798275SEric Cheng mac_write_link_stats(mac_client_impl_t *mcip) 42808275SEric Cheng { 42818275SEric Cheng net_stat_t nstat; 42828275SEric Cheng 42838275SEric Cheng nstat.ns_name = mcip->mci_name; 42848275SEric Cheng nstat.ns_ibytes = mcip->mci_stat_ibytes; 42858275SEric Cheng nstat.ns_obytes = mcip->mci_stat_obytes; 42868275SEric Cheng nstat.ns_ipackets = mcip->mci_stat_ipackets; 42878275SEric Cheng nstat.ns_opackets = mcip->mci_stat_opackets; 42888275SEric Cheng nstat.ns_ierrors = mcip->mci_stat_ierrors; 42898275SEric Cheng nstat.ns_oerrors = mcip->mci_stat_oerrors; 42908275SEric Cheng 42918275SEric Cheng return (exacct_commit_netinfo((void *)&nstat, EX_NET_LNSTAT_REC)); 42928275SEric Cheng } 42938275SEric Cheng 42948275SEric Cheng /* 42958275SEric Cheng * For a given flow, if the descrition has not been logged before, do it now. 42968275SEric Cheng * If it is a VNIC, then we have collected information about it from the MAC 42978275SEric Cheng * table, so skip it. 42988275SEric Cheng */ 42998275SEric Cheng /*ARGSUSED*/ 43008275SEric Cheng static int 43018275SEric Cheng mac_log_flowinfo(flow_entry_t *flent, void *args) 43028275SEric Cheng { 43038275SEric Cheng mac_client_impl_t *mcip = flent->fe_mcip; 43048275SEric Cheng 43058275SEric Cheng if (mcip == NULL) 43068275SEric Cheng return (0); 43078275SEric Cheng 43088275SEric Cheng /* 43098275SEric Cheng * If the name starts with "vnic", and fe_user_generated is true (to 43108275SEric Cheng * exclude the mcast and active flow entries created implicitly for 43118275SEric Cheng * a vnic, it is a VNIC flow. i.e. vnic1 is a vnic flow, 43128275SEric Cheng * vnic/bge1/mcast1 is not and neither is vnic/bge1/active. 43138275SEric Cheng */ 43148275SEric Cheng if (strncasecmp(flent->fe_flow_name, "vnic", 4) == 0 && 43158275SEric Cheng (flent->fe_type & FLOW_USER) != 0) { 43168275SEric Cheng return (0); 43178275SEric Cheng } 43188275SEric Cheng 43198275SEric Cheng if (!flent->fe_desc_logged) { 43208275SEric Cheng /* 43218275SEric Cheng * We don't return error because we want to continu the 43228275SEric Cheng * walk in case this is the last walk which means we 43238275SEric Cheng * need to reset fe_desc_logged in all the flows. 43248275SEric Cheng */ 43258275SEric Cheng if (mac_write_flow_desc(flent, mcip) != 0) 43268275SEric Cheng return (0); 43278275SEric Cheng flent->fe_desc_logged = B_TRUE; 43288275SEric Cheng } 43298275SEric Cheng 43308275SEric Cheng /* 43318275SEric Cheng * Regardless of the error, we want to proceed in case we have to 43328275SEric Cheng * reset fe_desc_logged. 43338275SEric Cheng */ 43348275SEric Cheng (void) mac_write_flow_stats(flent); 43358275SEric Cheng 43368275SEric Cheng if (mcip != NULL && !(mcip->mci_state_flags & MCIS_DESC_LOGGED)) 43378275SEric Cheng flent->fe_desc_logged = B_FALSE; 43388275SEric Cheng 43398275SEric Cheng return (0); 43408275SEric Cheng } 43418275SEric Cheng 43428275SEric Cheng typedef struct i_mac_log_state_s { 43438275SEric Cheng boolean_t mi_last; 43448275SEric Cheng int mi_fenable; 43458275SEric Cheng int mi_lenable; 43468275SEric Cheng } i_mac_log_state_t; 43478275SEric Cheng 43488275SEric Cheng /* 43498275SEric Cheng * Walk the mac_impl_ts and log the description for each mac client of this mac, 43508275SEric Cheng * if it hasn't already been done. Additionally, log statistics for the link as 43518275SEric Cheng * well. Walk the flow table and log information for each flow as well. 43528275SEric Cheng * If it is the last walk (mci_last), then we turn off mci_desc_logged (and 43538275SEric Cheng * also fe_desc_logged, if flow logging is on) since we want to log the 43548275SEric Cheng * description if and when logging is restarted. 43558275SEric Cheng */ 43568275SEric Cheng /*ARGSUSED*/ 43578275SEric Cheng static uint_t 43588275SEric Cheng i_mac_log_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 43598275SEric Cheng { 43608275SEric Cheng mac_impl_t *mip = (mac_impl_t *)val; 43618275SEric Cheng i_mac_log_state_t *lstate = (i_mac_log_state_t *)arg; 43628275SEric Cheng int ret; 43638275SEric Cheng mac_client_impl_t *mcip; 43648275SEric Cheng 43658275SEric Cheng /* 43668275SEric Cheng * Only walk the client list for NIC and etherstub 43678275SEric Cheng */ 43688275SEric Cheng if ((mip->mi_state_flags & MIS_DISABLED) || 43698275SEric Cheng ((mip->mi_state_flags & MIS_IS_VNIC) && 43708275SEric Cheng (mac_get_lower_mac_handle((mac_handle_t)mip) != NULL))) 43718275SEric Cheng return (MH_WALK_CONTINUE); 43728275SEric Cheng 43738275SEric Cheng for (mcip = mip->mi_clients_list; mcip != NULL; 43748275SEric Cheng mcip = mcip->mci_client_next) { 43758275SEric Cheng if (!MCIP_DATAPATH_SETUP(mcip)) 43768275SEric Cheng continue; 43778275SEric Cheng if (lstate->mi_lenable) { 43788275SEric Cheng if (!(mcip->mci_state_flags & MCIS_DESC_LOGGED)) { 43798275SEric Cheng ret = mac_write_link_desc(mcip); 43808275SEric Cheng if (ret != 0) { 43818275SEric Cheng /* 43828275SEric Cheng * We can't terminate it if this is the last 43838275SEric Cheng * walk, else there might be some links with 43848275SEric Cheng * mi_desc_logged set to true, which means 43858275SEric Cheng * their description won't be logged the next 43868275SEric Cheng * time logging is started (similarly for the 43878275SEric Cheng * flows within such links). We can continue 43888275SEric Cheng * without walking the flow table (i.e. to 43898275SEric Cheng * set fe_desc_logged to false) because we 43908275SEric Cheng * won't have written any flow stuff for this 43918275SEric Cheng * link as we haven't logged the link itself. 43928275SEric Cheng */ 43938275SEric Cheng if (lstate->mi_last) 43948275SEric Cheng return (MH_WALK_CONTINUE); 43958275SEric Cheng else 43968275SEric Cheng return (MH_WALK_TERMINATE); 43978275SEric Cheng } 43988275SEric Cheng mcip->mci_state_flags |= MCIS_DESC_LOGGED; 43998275SEric Cheng } 44008275SEric Cheng } 44018275SEric Cheng 44028275SEric Cheng if (mac_write_link_stats(mcip) != 0 && !lstate->mi_last) 44038275SEric Cheng return (MH_WALK_TERMINATE); 44048275SEric Cheng 44058275SEric Cheng if (lstate->mi_last) 44068275SEric Cheng mcip->mci_state_flags &= ~MCIS_DESC_LOGGED; 44078275SEric Cheng 44088275SEric Cheng if (lstate->mi_fenable) { 44098275SEric Cheng if (mcip->mci_subflow_tab != NULL) { 44108275SEric Cheng (void) mac_flow_walk(mcip->mci_subflow_tab, 44118275SEric Cheng mac_log_flowinfo, mip); 44128275SEric Cheng } 44138275SEric Cheng } 44148275SEric Cheng } 44158275SEric Cheng return (MH_WALK_CONTINUE); 44168275SEric Cheng } 44178275SEric Cheng 44188275SEric Cheng /* 44198275SEric Cheng * The timer thread that runs every mac_logging_interval seconds and logs 44208275SEric Cheng * link and/or flow information. 44218275SEric Cheng */ 44228275SEric Cheng /* ARGSUSED */ 44238275SEric Cheng void 44248275SEric Cheng mac_log_linkinfo(void *arg) 44258275SEric Cheng { 44268275SEric Cheng i_mac_log_state_t lstate; 44278275SEric Cheng 44288275SEric Cheng rw_enter(&i_mac_impl_lock, RW_READER); 44298275SEric Cheng if (!mac_flow_log_enable && !mac_link_log_enable) { 44308275SEric Cheng rw_exit(&i_mac_impl_lock); 44318275SEric Cheng return; 44328275SEric Cheng } 44338275SEric Cheng lstate.mi_fenable = mac_flow_log_enable; 44348275SEric Cheng lstate.mi_lenable = mac_link_log_enable; 44358275SEric Cheng lstate.mi_last = B_FALSE; 44368275SEric Cheng rw_exit(&i_mac_impl_lock); 44378275SEric Cheng 44388275SEric Cheng mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate); 44398275SEric Cheng 44408275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 44418275SEric Cheng if (mac_flow_log_enable || mac_link_log_enable) { 44428275SEric Cheng mac_logging_timer = timeout(mac_log_linkinfo, NULL, 44438275SEric Cheng SEC_TO_TICK(mac_logging_interval)); 44448275SEric Cheng } 44458275SEric Cheng rw_exit(&i_mac_impl_lock); 44468275SEric Cheng } 44478275SEric Cheng 44489073SCathy.Zhou@Sun.COM typedef struct i_mac_fastpath_state_s { 44499073SCathy.Zhou@Sun.COM boolean_t mf_disable; 44509073SCathy.Zhou@Sun.COM int mf_err; 44519073SCathy.Zhou@Sun.COM } i_mac_fastpath_state_t; 44529073SCathy.Zhou@Sun.COM 44539073SCathy.Zhou@Sun.COM /*ARGSUSED*/ 44549073SCathy.Zhou@Sun.COM static uint_t 44559073SCathy.Zhou@Sun.COM i_mac_fastpath_disable_walker(mod_hash_key_t key, mod_hash_val_t *val, 44569073SCathy.Zhou@Sun.COM void *arg) 44579073SCathy.Zhou@Sun.COM { 44589073SCathy.Zhou@Sun.COM i_mac_fastpath_state_t *state = arg; 44599073SCathy.Zhou@Sun.COM mac_handle_t mh = (mac_handle_t)val; 44609073SCathy.Zhou@Sun.COM 44619073SCathy.Zhou@Sun.COM if (state->mf_disable) 44629073SCathy.Zhou@Sun.COM state->mf_err = mac_fastpath_disable(mh); 44639073SCathy.Zhou@Sun.COM else 44649073SCathy.Zhou@Sun.COM mac_fastpath_enable(mh); 44659073SCathy.Zhou@Sun.COM 44669073SCathy.Zhou@Sun.COM return (state->mf_err == 0 ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 44679073SCathy.Zhou@Sun.COM } 44689073SCathy.Zhou@Sun.COM 44698275SEric Cheng /* 44708275SEric Cheng * Start the logging timer. 44718275SEric Cheng */ 44729073SCathy.Zhou@Sun.COM int 44738275SEric Cheng mac_start_logusage(mac_logtype_t type, uint_t interval) 44748275SEric Cheng { 44759073SCathy.Zhou@Sun.COM i_mac_fastpath_state_t state = {B_TRUE, 0}; 44769073SCathy.Zhou@Sun.COM int err; 44779073SCathy.Zhou@Sun.COM 44788275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 44798275SEric Cheng switch (type) { 44808275SEric Cheng case MAC_LOGTYPE_FLOW: 44818275SEric Cheng if (mac_flow_log_enable) { 44828275SEric Cheng rw_exit(&i_mac_impl_lock); 44839073SCathy.Zhou@Sun.COM return (0); 44848275SEric Cheng } 44858275SEric Cheng /* FALLTHRU */ 44868275SEric Cheng case MAC_LOGTYPE_LINK: 44878275SEric Cheng if (mac_link_log_enable) { 44888275SEric Cheng rw_exit(&i_mac_impl_lock); 44899073SCathy.Zhou@Sun.COM return (0); 44908275SEric Cheng } 44918275SEric Cheng break; 44928275SEric Cheng default: 44938275SEric Cheng ASSERT(0); 44948275SEric Cheng } 44959073SCathy.Zhou@Sun.COM 44969073SCathy.Zhou@Sun.COM /* Disable fastpath */ 44979073SCathy.Zhou@Sun.COM mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state); 44989073SCathy.Zhou@Sun.COM if ((err = state.mf_err) != 0) { 44999073SCathy.Zhou@Sun.COM /* Reenable fastpath */ 45009073SCathy.Zhou@Sun.COM state.mf_disable = B_FALSE; 45019073SCathy.Zhou@Sun.COM state.mf_err = 0; 45029073SCathy.Zhou@Sun.COM mod_hash_walk(i_mac_impl_hash, 45039073SCathy.Zhou@Sun.COM i_mac_fastpath_disable_walker, &state); 45049073SCathy.Zhou@Sun.COM rw_exit(&i_mac_impl_lock); 45059073SCathy.Zhou@Sun.COM return (err); 45069073SCathy.Zhou@Sun.COM } 45079073SCathy.Zhou@Sun.COM 45089073SCathy.Zhou@Sun.COM switch (type) { 45099073SCathy.Zhou@Sun.COM case MAC_LOGTYPE_FLOW: 45109073SCathy.Zhou@Sun.COM mac_flow_log_enable = B_TRUE; 45119073SCathy.Zhou@Sun.COM /* FALLTHRU */ 45129073SCathy.Zhou@Sun.COM case MAC_LOGTYPE_LINK: 45139073SCathy.Zhou@Sun.COM mac_link_log_enable = B_TRUE; 45149073SCathy.Zhou@Sun.COM break; 45159073SCathy.Zhou@Sun.COM } 45169073SCathy.Zhou@Sun.COM 45178275SEric Cheng mac_logging_interval = interval; 45188275SEric Cheng rw_exit(&i_mac_impl_lock); 45198275SEric Cheng mac_log_linkinfo(NULL); 45209073SCathy.Zhou@Sun.COM return (0); 45218275SEric Cheng } 45228275SEric Cheng 45238275SEric Cheng /* 45248275SEric Cheng * Stop the logging timer if both Link and Flow logging are turned off. 45258275SEric Cheng */ 45268275SEric Cheng void 45278275SEric Cheng mac_stop_logusage(mac_logtype_t type) 45288275SEric Cheng { 45298275SEric Cheng i_mac_log_state_t lstate; 45309073SCathy.Zhou@Sun.COM i_mac_fastpath_state_t state = {B_FALSE, 0}; 45318275SEric Cheng 45328275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 45338275SEric Cheng lstate.mi_fenable = mac_flow_log_enable; 45348275SEric Cheng lstate.mi_lenable = mac_link_log_enable; 45358275SEric Cheng 45368275SEric Cheng /* Last walk */ 45378275SEric Cheng lstate.mi_last = B_TRUE; 45388275SEric Cheng 45398275SEric Cheng switch (type) { 45408275SEric Cheng case MAC_LOGTYPE_FLOW: 45418275SEric Cheng if (lstate.mi_fenable) { 45428275SEric Cheng ASSERT(mac_link_log_enable); 45438275SEric Cheng mac_flow_log_enable = B_FALSE; 45448275SEric Cheng mac_link_log_enable = B_FALSE; 45458275SEric Cheng break; 45468275SEric Cheng } 45478275SEric Cheng /* FALLTHRU */ 45488275SEric Cheng case MAC_LOGTYPE_LINK: 45498275SEric Cheng if (!lstate.mi_lenable || mac_flow_log_enable) { 45508275SEric Cheng rw_exit(&i_mac_impl_lock); 45518275SEric Cheng return; 45528275SEric Cheng } 45538275SEric Cheng mac_link_log_enable = B_FALSE; 45548275SEric Cheng break; 45558275SEric Cheng default: 45568275SEric Cheng ASSERT(0); 45578275SEric Cheng } 45589073SCathy.Zhou@Sun.COM 45599073SCathy.Zhou@Sun.COM /* Reenable fastpath */ 45609073SCathy.Zhou@Sun.COM mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state); 45619073SCathy.Zhou@Sun.COM 45628275SEric Cheng rw_exit(&i_mac_impl_lock); 45638275SEric Cheng (void) untimeout(mac_logging_timer); 45648275SEric Cheng mac_logging_timer = 0; 45658275SEric Cheng 45668275SEric Cheng /* Last walk */ 45678275SEric Cheng mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate); 45688275SEric Cheng } 45698275SEric Cheng 45708275SEric Cheng /* 45718275SEric Cheng * Walk the rx and tx SRS/SRs for a flow and update the priority value. 45728275SEric Cheng */ 45738275SEric Cheng void 45748275SEric Cheng mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) 45758275SEric Cheng { 45768275SEric Cheng pri_t pri; 45778275SEric Cheng int count; 45788275SEric Cheng mac_soft_ring_set_t *mac_srs; 45798275SEric Cheng 45808275SEric Cheng if (flent->fe_rx_srs_cnt <= 0) 45818275SEric Cheng return; 45828275SEric Cheng 45838275SEric Cheng if (((mac_soft_ring_set_t *)flent->fe_rx_srs[0])->srs_type == 45848275SEric Cheng SRST_FLOW) { 45858275SEric Cheng pri = FLOW_PRIORITY(mcip->mci_min_pri, 45868275SEric Cheng mcip->mci_max_pri, 45878275SEric Cheng flent->fe_resource_props.mrp_priority); 45888275SEric Cheng } else { 45898275SEric Cheng pri = mcip->mci_max_pri; 45908275SEric Cheng } 45918275SEric Cheng 45928275SEric Cheng for (count = 0; count < flent->fe_rx_srs_cnt; count++) { 45938275SEric Cheng mac_srs = flent->fe_rx_srs[count]; 45948275SEric Cheng mac_update_srs_priority(mac_srs, pri); 45958275SEric Cheng } 45968275SEric Cheng /* 45978275SEric Cheng * If we have a Tx SRS, we need to modify all the threads associated 45988275SEric Cheng * with it. 45998275SEric Cheng */ 46008275SEric Cheng if (flent->fe_tx_srs != NULL) 46018275SEric Cheng mac_update_srs_priority(flent->fe_tx_srs, pri); 46028275SEric Cheng } 46038275SEric Cheng 46048275SEric Cheng /* 46058275SEric Cheng * RX and TX rings are reserved according to different semantics depending 46068275SEric Cheng * on the requests from the MAC clients and type of rings: 46078275SEric Cheng * 46088275SEric Cheng * On the Tx side, by default we reserve individual rings, independently from 46098275SEric Cheng * the groups. 46108275SEric Cheng * 46118275SEric Cheng * On the Rx side, the reservation is at the granularity of the group 46128275SEric Cheng * of rings, and used for v12n level 1 only. It has a special case for the 46138275SEric Cheng * primary client. 46148275SEric Cheng * 46158275SEric Cheng * If a share is allocated to a MAC client, we allocate a TX group and an 46168275SEric Cheng * RX group to the client, and assign TX rings and RX rings to these 46178275SEric Cheng * groups according to information gathered from the driver through 46188275SEric Cheng * the share capability. 46198275SEric Cheng * 46208275SEric Cheng * The foreseable evolution of Rx rings will handle v12n level 2 and higher 46218275SEric Cheng * to allocate individual rings out of a group and program the hw classifier 46228275SEric Cheng * based on IP address or higher level criteria. 46238275SEric Cheng */ 46248275SEric Cheng 46258275SEric Cheng /* 46268275SEric Cheng * mac_reserve_tx_ring() 46278275SEric Cheng * Reserve a unused ring by marking it with MR_INUSE state. 46288275SEric Cheng * As reserved, the ring is ready to function. 46298275SEric Cheng * 46308275SEric Cheng * Notes for Hybrid I/O: 46318275SEric Cheng * 46328275SEric Cheng * If a specific ring is needed, it is specified through the desired_ring 46338275SEric Cheng * argument. Otherwise that argument is set to NULL. 46348275SEric Cheng * If the desired ring was previous allocated to another client, this 46358275SEric Cheng * function swaps it with a new ring from the group of unassigned rings. 46368275SEric Cheng */ 46378275SEric Cheng mac_ring_t * 46388275SEric Cheng mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) 46398275SEric Cheng { 46408275SEric Cheng mac_group_t *group; 46418275SEric Cheng mac_ring_t *ring; 46428275SEric Cheng 46438275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 46448275SEric Cheng 46458275SEric Cheng if (mip->mi_tx_groups == NULL) 46468275SEric Cheng return (NULL); 46478275SEric Cheng 46488275SEric Cheng /* 46498275SEric Cheng * Find an available ring and start it before changing its status. 46508275SEric Cheng * The unassigned rings are at the end of the mi_tx_groups 46518275SEric Cheng * array. 46528275SEric Cheng */ 46538275SEric Cheng group = mip->mi_tx_groups + mip->mi_tx_group_count; 46548275SEric Cheng 46558275SEric Cheng for (ring = group->mrg_rings; ring != NULL; 46568275SEric Cheng ring = ring->mr_next) { 46578275SEric Cheng if (desired_ring == NULL) { 46588275SEric Cheng if (ring->mr_state == MR_FREE) 46598275SEric Cheng /* wanted any free ring and found one */ 46608275SEric Cheng break; 46618275SEric Cheng } else { 46628275SEric Cheng mac_ring_t *sring; 46638275SEric Cheng mac_client_impl_t *client; 46648275SEric Cheng mac_soft_ring_set_t *srs; 46658275SEric Cheng 46668275SEric Cheng if (ring != desired_ring) 46678275SEric Cheng /* wants a desired ring but this one ain't it */ 46688275SEric Cheng continue; 46698275SEric Cheng 46708275SEric Cheng if (ring->mr_state == MR_FREE) 46718275SEric Cheng break; 46728275SEric Cheng 46738275SEric Cheng /* 46748275SEric Cheng * Found the desired ring but it's already in use. 46758275SEric Cheng * Swap it with a new ring. 46768275SEric Cheng */ 46778275SEric Cheng 46788275SEric Cheng /* find the client which owns that ring */ 46798275SEric Cheng for (client = mip->mi_clients_list; client != NULL; 46808275SEric Cheng client = client->mci_client_next) { 46818275SEric Cheng srs = MCIP_TX_SRS(client); 46828275SEric Cheng if (srs != NULL && mac_tx_srs_ring_present(srs, 46838275SEric Cheng desired_ring)) { 46848275SEric Cheng /* found our ring */ 46858275SEric Cheng break; 46868275SEric Cheng } 46878275SEric Cheng } 46888400SNicolas.Droux@Sun.COM if (client == NULL) { 46898400SNicolas.Droux@Sun.COM /* 46908400SNicolas.Droux@Sun.COM * The TX ring is in use, but it's not 46918400SNicolas.Droux@Sun.COM * associated with any clients, so it 46928400SNicolas.Droux@Sun.COM * has to be the default ring. In that 46938400SNicolas.Droux@Sun.COM * case we can simply assign a new ring 46948400SNicolas.Droux@Sun.COM * as the default ring, and we're done. 46958400SNicolas.Droux@Sun.COM */ 46968400SNicolas.Droux@Sun.COM ASSERT(mip->mi_default_tx_ring == 46978400SNicolas.Droux@Sun.COM (mac_ring_handle_t)desired_ring); 46988400SNicolas.Droux@Sun.COM 46998400SNicolas.Droux@Sun.COM /* 47008400SNicolas.Droux@Sun.COM * Quiesce all clients on top of 47018400SNicolas.Droux@Sun.COM * the NIC to make sure there are no 47028400SNicolas.Droux@Sun.COM * pending threads still relying on 47038400SNicolas.Droux@Sun.COM * that default ring, for example 47048400SNicolas.Droux@Sun.COM * the multicast path. 47058400SNicolas.Droux@Sun.COM */ 47068400SNicolas.Droux@Sun.COM for (client = mip->mi_clients_list; 47078400SNicolas.Droux@Sun.COM client != NULL; 47088400SNicolas.Droux@Sun.COM client = client->mci_client_next) { 47098400SNicolas.Droux@Sun.COM mac_tx_client_quiesce(client, 47108400SNicolas.Droux@Sun.COM SRS_QUIESCE); 47118400SNicolas.Droux@Sun.COM } 47128400SNicolas.Droux@Sun.COM 47138400SNicolas.Droux@Sun.COM mip->mi_default_tx_ring = (mac_ring_handle_t) 47148400SNicolas.Droux@Sun.COM mac_reserve_tx_ring(mip, NULL); 47158400SNicolas.Droux@Sun.COM 47168400SNicolas.Droux@Sun.COM /* resume the clients */ 47178400SNicolas.Droux@Sun.COM for (client = mip->mi_clients_list; 47188400SNicolas.Droux@Sun.COM client != NULL; 47198400SNicolas.Droux@Sun.COM client = client->mci_client_next) 47208400SNicolas.Droux@Sun.COM mac_tx_client_restart(client); 47218400SNicolas.Droux@Sun.COM 47228400SNicolas.Droux@Sun.COM break; 47238400SNicolas.Droux@Sun.COM } 47248275SEric Cheng 47258275SEric Cheng /* 47268275SEric Cheng * Note that we cannot simply invoke the group 47278275SEric Cheng * add/rem routines since the client doesn't have a 47288275SEric Cheng * TX group. So we need to instead add/remove 47298275SEric Cheng * the rings from the SRS. 47308275SEric Cheng */ 47318275SEric Cheng ASSERT(client->mci_share == NULL); 47328275SEric Cheng 47338275SEric Cheng /* first quiece the client */ 47348275SEric Cheng mac_tx_client_quiesce(client, SRS_QUIESCE); 47358275SEric Cheng 47368275SEric Cheng /* give a new ring to the client... */ 47378275SEric Cheng sring = mac_reserve_tx_ring(mip, NULL); 47388275SEric Cheng if (sring != NULL) { 47398275SEric Cheng /* 47408275SEric Cheng * There are no other available ring 47418275SEric Cheng * on that MAC instance. The client 47428275SEric Cheng * will fallback to the shared TX 47438275SEric Cheng * ring. 47448275SEric Cheng */ 47458275SEric Cheng mac_tx_srs_add_ring(srs, sring); 47468275SEric Cheng } 47478275SEric Cheng 47488275SEric Cheng /* ... in exchange for our desired ring */ 47498275SEric Cheng mac_tx_srs_del_ring(srs, desired_ring); 47508275SEric Cheng 47518275SEric Cheng /* restart the client */ 47528275SEric Cheng mac_tx_client_restart(client); 47538275SEric Cheng 47548400SNicolas.Droux@Sun.COM if (mip->mi_default_tx_ring == 47558400SNicolas.Droux@Sun.COM (mac_ring_handle_t)desired_ring) { 47568400SNicolas.Droux@Sun.COM /* 47578400SNicolas.Droux@Sun.COM * The desired ring is the default ring, 47588400SNicolas.Droux@Sun.COM * and there are one or more clients 47598400SNicolas.Droux@Sun.COM * using that default ring directly. 47608400SNicolas.Droux@Sun.COM */ 47618400SNicolas.Droux@Sun.COM mip->mi_default_tx_ring = 47628400SNicolas.Droux@Sun.COM (mac_ring_handle_t)sring; 47638400SNicolas.Droux@Sun.COM /* 47648400SNicolas.Droux@Sun.COM * Find clients using default ring and 47658400SNicolas.Droux@Sun.COM * swap it with the new default ring. 47668400SNicolas.Droux@Sun.COM */ 47678400SNicolas.Droux@Sun.COM for (client = mip->mi_clients_list; 47688400SNicolas.Droux@Sun.COM client != NULL; 47698400SNicolas.Droux@Sun.COM client = client->mci_client_next) { 47708400SNicolas.Droux@Sun.COM srs = MCIP_TX_SRS(client); 47718400SNicolas.Droux@Sun.COM if (srs != NULL && 47728400SNicolas.Droux@Sun.COM mac_tx_srs_ring_present(srs, 47738400SNicolas.Droux@Sun.COM desired_ring)) { 47748400SNicolas.Droux@Sun.COM /* first quiece the client */ 47758400SNicolas.Droux@Sun.COM mac_tx_client_quiesce(client, 47768400SNicolas.Droux@Sun.COM SRS_QUIESCE); 47778400SNicolas.Droux@Sun.COM 47788400SNicolas.Droux@Sun.COM /* 47798400SNicolas.Droux@Sun.COM * Give it the new default 47808400SNicolas.Droux@Sun.COM * ring, and remove the old 47818400SNicolas.Droux@Sun.COM * one. 47828400SNicolas.Droux@Sun.COM */ 47838400SNicolas.Droux@Sun.COM if (sring != NULL) { 47848400SNicolas.Droux@Sun.COM mac_tx_srs_add_ring(srs, 47858400SNicolas.Droux@Sun.COM sring); 47868400SNicolas.Droux@Sun.COM } 47878400SNicolas.Droux@Sun.COM mac_tx_srs_del_ring(srs, 47888400SNicolas.Droux@Sun.COM desired_ring); 47898400SNicolas.Droux@Sun.COM 47908400SNicolas.Droux@Sun.COM /* restart the client */ 47918400SNicolas.Droux@Sun.COM mac_tx_client_restart(client); 47928400SNicolas.Droux@Sun.COM } 47938400SNicolas.Droux@Sun.COM } 47948400SNicolas.Droux@Sun.COM } 47958275SEric Cheng break; 47968275SEric Cheng } 47978275SEric Cheng } 47988275SEric Cheng 47998275SEric Cheng if (ring != NULL) { 48008275SEric Cheng if (mac_start_ring(ring) != 0) 48018275SEric Cheng return (NULL); 48028275SEric Cheng ring->mr_state = MR_INUSE; 48038275SEric Cheng } 48048275SEric Cheng 48058275SEric Cheng return (ring); 48068275SEric Cheng } 48078275SEric Cheng 48088275SEric Cheng /* 48098275SEric Cheng * Minimum number of rings to leave in the default TX group when allocating 48108275SEric Cheng * rings to new clients. 48118275SEric Cheng */ 48128275SEric Cheng static uint_t mac_min_rx_default_rings = 1; 48138275SEric Cheng 48148275SEric Cheng /* 48158275SEric Cheng * Populate a zero-ring group with rings. If the share is non-NULL, 48168275SEric Cheng * the rings are chosen according to that share. 48178275SEric Cheng * Invoked after allocating a new RX or TX group through 48188275SEric Cheng * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively. 48198275SEric Cheng * Returns zero on success, an errno otherwise. 48208275SEric Cheng */ 48218275SEric Cheng int 48228275SEric Cheng i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, 48238275SEric Cheng mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share) 48248275SEric Cheng { 48258275SEric Cheng mac_ring_t **rings, *tmp_ring[1], *ring; 48268275SEric Cheng uint_t nrings; 48278275SEric Cheng int rv, i, j; 48288275SEric Cheng 48298275SEric Cheng ASSERT(mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC && 48308275SEric Cheng mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); 48318275SEric Cheng ASSERT(new_group->mrg_cur_count == 0); 48328275SEric Cheng 48338275SEric Cheng /* 48348275SEric Cheng * First find the rings to allocate to the group. 48358275SEric Cheng */ 48368275SEric Cheng if (share != NULL) { 48378275SEric Cheng /* get rings through ms_squery() */ 48388275SEric Cheng mip->mi_share_capab.ms_squery(share, ring_type, NULL, &nrings); 48398275SEric Cheng ASSERT(nrings != 0); 48408275SEric Cheng rings = kmem_alloc(nrings * sizeof (mac_ring_handle_t), 48418275SEric Cheng KM_SLEEP); 48428275SEric Cheng mip->mi_share_capab.ms_squery(share, ring_type, 48438275SEric Cheng (mac_ring_handle_t *)rings, &nrings); 48448275SEric Cheng } else { 48458275SEric Cheng /* this function is called for TX only with a share */ 48468275SEric Cheng ASSERT(ring_type == MAC_RING_TYPE_RX); 48478275SEric Cheng /* 48488275SEric Cheng * Pick one ring from default group. 48498275SEric Cheng * 48508275SEric Cheng * for now pick the second ring which requires the first ring 48518275SEric Cheng * at index 0 to stay in the default group, since it is the 48528275SEric Cheng * ring which carries the multicast traffic. 48538275SEric Cheng * We need a better way for a driver to indicate this, 48548275SEric Cheng * for example a per-ring flag. 48558275SEric Cheng */ 48568275SEric Cheng for (ring = src_group->mrg_rings; ring != NULL; 48578275SEric Cheng ring = ring->mr_next) { 48588275SEric Cheng if (ring->mr_index != 0) 48598275SEric Cheng break; 48608275SEric Cheng } 48618275SEric Cheng ASSERT(ring != NULL); 48628275SEric Cheng nrings = 1; 48638275SEric Cheng tmp_ring[0] = ring; 48648275SEric Cheng rings = tmp_ring; 48658275SEric Cheng } 48668275SEric Cheng 48678275SEric Cheng switch (ring_type) { 48688275SEric Cheng case MAC_RING_TYPE_RX: 48698275SEric Cheng if (src_group->mrg_cur_count - nrings < 48708275SEric Cheng mac_min_rx_default_rings) { 48718275SEric Cheng /* we ran out of rings */ 48728275SEric Cheng return (ENOSPC); 48738275SEric Cheng } 48748275SEric Cheng 48758275SEric Cheng /* move receive rings to new group */ 48768275SEric Cheng for (i = 0; i < nrings; i++) { 48778275SEric Cheng rv = mac_group_mov_ring(mip, new_group, rings[i]); 48788275SEric Cheng if (rv != 0) { 48798275SEric Cheng /* move rings back on failure */ 48808275SEric Cheng for (j = 0; j < i; j++) { 48818275SEric Cheng (void) mac_group_mov_ring(mip, 48828275SEric Cheng src_group, rings[j]); 48838275SEric Cheng } 48848275SEric Cheng return (rv); 48858275SEric Cheng } 48868275SEric Cheng } 48878275SEric Cheng break; 48888275SEric Cheng 48898275SEric Cheng case MAC_RING_TYPE_TX: { 48908275SEric Cheng mac_ring_t *tmp_ring; 48918275SEric Cheng 48928275SEric Cheng /* move the TX rings to the new group */ 48938275SEric Cheng ASSERT(src_group == NULL); 48948275SEric Cheng for (i = 0; i < nrings; i++) { 48958275SEric Cheng /* get the desired ring */ 48968275SEric Cheng tmp_ring = mac_reserve_tx_ring(mip, rings[i]); 48978275SEric Cheng ASSERT(tmp_ring == rings[i]); 48988275SEric Cheng rv = mac_group_mov_ring(mip, new_group, rings[i]); 48998275SEric Cheng if (rv != 0) { 49008275SEric Cheng /* cleanup on failure */ 49018275SEric Cheng for (j = 0; j < i; j++) { 49028275SEric Cheng (void) mac_group_mov_ring(mip, 49038275SEric Cheng mip->mi_tx_groups + 49048275SEric Cheng mip->mi_tx_group_count, rings[j]); 49058275SEric Cheng } 49068275SEric Cheng } 49078275SEric Cheng } 49088275SEric Cheng break; 49098275SEric Cheng } 49108275SEric Cheng } 49118275SEric Cheng 49128275SEric Cheng if (share != NULL) { 49138275SEric Cheng /* add group to share */ 49148275SEric Cheng mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); 49158275SEric Cheng /* free temporary array of rings */ 49168275SEric Cheng kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); 49178275SEric Cheng } 49188275SEric Cheng 49198275SEric Cheng return (0); 49208275SEric Cheng } 49218275SEric Cheng 49228275SEric Cheng void 49238275SEric Cheng mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) 49248275SEric Cheng { 49258275SEric Cheng mac_grp_client_t *mgcp; 49268275SEric Cheng 49278275SEric Cheng for (mgcp = grp->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 49288275SEric Cheng if (mgcp->mgc_client == mcip) 49298275SEric Cheng break; 49308275SEric Cheng } 49318275SEric Cheng 49328275SEric Cheng VERIFY(mgcp == NULL); 49338275SEric Cheng 49348275SEric Cheng mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); 49358275SEric Cheng mgcp->mgc_client = mcip; 49368275SEric Cheng mgcp->mgc_next = grp->mrg_clients; 49378275SEric Cheng grp->mrg_clients = mgcp; 49388275SEric Cheng 49398275SEric Cheng } 49408275SEric Cheng 49418275SEric Cheng void 49428275SEric Cheng mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) 49438275SEric Cheng { 49448275SEric Cheng mac_grp_client_t *mgcp, **pprev; 49458275SEric Cheng 49468275SEric Cheng for (pprev = &grp->mrg_clients, mgcp = *pprev; mgcp != NULL; 49478275SEric Cheng pprev = &mgcp->mgc_next, mgcp = *pprev) { 49488275SEric Cheng if (mgcp->mgc_client == mcip) 49498275SEric Cheng break; 49508275SEric Cheng } 49518275SEric Cheng 49528275SEric Cheng ASSERT(mgcp != NULL); 49538275SEric Cheng 49548275SEric Cheng *pprev = mgcp->mgc_next; 49558275SEric Cheng kmem_free(mgcp, sizeof (mac_grp_client_t)); 49568275SEric Cheng } 49578275SEric Cheng 49588275SEric Cheng /* 49598275SEric Cheng * mac_reserve_rx_group() 49608275SEric Cheng * 49618275SEric Cheng * Finds an available group and exclusively reserves it for a client. 49628275SEric Cheng * The group is chosen to suit the flow's resource controls (bandwidth and 49638275SEric Cheng * fanout requirements) and the address type. 49648275SEric Cheng * If the requestor is the pimary MAC then return the group with the 49658275SEric Cheng * largest number of rings, otherwise the default ring when available. 49668275SEric Cheng */ 49678275SEric Cheng mac_group_t * 49688275SEric Cheng mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, 49698275SEric Cheng mac_rx_group_reserve_type_t rtype) 49708275SEric Cheng { 49718275SEric Cheng mac_share_handle_t share = mcip->mci_share; 49728275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 49738275SEric Cheng mac_group_t *grp = NULL; 49748275SEric Cheng int i, start, loopcount; 49758275SEric Cheng int err; 49768275SEric Cheng mac_address_t *map; 49778275SEric Cheng 49788275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 49798275SEric Cheng 49808275SEric Cheng /* Check if a group already has this mac address (case of VLANs) */ 49818275SEric Cheng if ((map = mac_find_macaddr(mip, mac_addr)) != NULL) 49828275SEric Cheng return (map->ma_group); 49838275SEric Cheng 49848275SEric Cheng if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0 || 49858275SEric Cheng rtype == MAC_RX_NO_RESERVE) 49868275SEric Cheng return (NULL); 49878275SEric Cheng 49888275SEric Cheng /* 49898275SEric Cheng * Try to exclusively reserve a RX group. 49908275SEric Cheng * 49918275SEric Cheng * For flows requires SW_RING it always goes to the default group 49928275SEric Cheng * (Until we can explicitely call out default groups (CR 6695600), 49938275SEric Cheng * we assume that the default group is always at position zero); 49948275SEric Cheng * 49958275SEric Cheng * For flows requires HW_DEFAULT_RING (unicast flow of the primary 49968275SEric Cheng * client), try to reserve the default RX group only. 49978275SEric Cheng * 49988275SEric Cheng * For flows requires HW_RING (unicast flow of other clients), try 49998275SEric Cheng * to reserve non-default RX group then the default group. 50008275SEric Cheng */ 50018275SEric Cheng switch (rtype) { 50028275SEric Cheng case MAC_RX_RESERVE_DEFAULT: 50038275SEric Cheng start = 0; 50048275SEric Cheng loopcount = 1; 50058275SEric Cheng break; 50068275SEric Cheng case MAC_RX_RESERVE_NONDEFAULT: 50078275SEric Cheng start = 1; 50088275SEric Cheng loopcount = mip->mi_rx_group_count; 50098275SEric Cheng } 50108275SEric Cheng 50118275SEric Cheng for (i = start; i < start + loopcount; i++) { 50128275SEric Cheng grp = &mip->mi_rx_groups[i % mip->mi_rx_group_count]; 50138275SEric Cheng 50148275SEric Cheng DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, 50158275SEric Cheng int, grp->mrg_index, mac_group_state_t, grp->mrg_state); 50168275SEric Cheng 50178275SEric Cheng /* 50188275SEric Cheng * Check to see whether this mac client is the only client 50198275SEric Cheng * on this RX group. If not, we cannot exclusively reserve 50208275SEric Cheng * this RX group. 50218275SEric Cheng */ 50228275SEric Cheng if (!MAC_RX_GROUP_NO_CLIENT(grp) && 50238275SEric Cheng (MAC_RX_GROUP_ONLY_CLIENT(grp) != mcip)) { 50248275SEric Cheng continue; 50258275SEric Cheng } 50268275SEric Cheng 50278275SEric Cheng /* 50288275SEric Cheng * This group could already be SHARED by other multicast 50298275SEric Cheng * flows on this client. In that case, the group would 50308275SEric Cheng * be shared and has already been started. 50318275SEric Cheng */ 50328275SEric Cheng ASSERT(grp->mrg_state != MAC_GROUP_STATE_UNINIT); 50338275SEric Cheng 50348275SEric Cheng if ((grp->mrg_state == MAC_GROUP_STATE_REGISTERED) && 50358275SEric Cheng (mac_start_group(grp) != 0)) { 50368275SEric Cheng continue; 50378275SEric Cheng } 50388275SEric Cheng 50398275SEric Cheng if ((i % mip->mi_rx_group_count) == 0 || 50408275SEric Cheng mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) { 50418275SEric Cheng break; 50428275SEric Cheng } 50438275SEric Cheng 50448275SEric Cheng ASSERT(grp->mrg_cur_count == 0); 50458275SEric Cheng 50468275SEric Cheng /* 50478275SEric Cheng * Populate the group. Rings should be taken 50488275SEric Cheng * from the default group at position 0 for now. 50498275SEric Cheng */ 50508275SEric Cheng 50518275SEric Cheng err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 50528275SEric Cheng &mip->mi_rx_groups[0], grp, share); 50538275SEric Cheng if (err == 0) 50548275SEric Cheng break; 50558275SEric Cheng 50568275SEric Cheng DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 50578275SEric Cheng mip->mi_name, int, grp->mrg_index, int, err); 50588275SEric Cheng 50598275SEric Cheng /* 50608275SEric Cheng * It's a dynamic group but the grouping operation failed. 50618275SEric Cheng */ 50628275SEric Cheng mac_stop_group(grp); 50638275SEric Cheng } 50648275SEric Cheng 50658275SEric Cheng if (i == start + loopcount) 50668275SEric Cheng return (NULL); 50678275SEric Cheng 50688275SEric Cheng ASSERT(grp != NULL); 50698275SEric Cheng 50708275SEric Cheng DTRACE_PROBE2(rx__group__reserved, 50718275SEric Cheng char *, mip->mi_name, int, grp->mrg_index); 50728275SEric Cheng return (grp); 50738275SEric Cheng } 50748275SEric Cheng 50758275SEric Cheng /* 50768275SEric Cheng * mac_rx_release_group() 50778275SEric Cheng * 50788275SEric Cheng * This is called when there are no clients left for the group. 50798275SEric Cheng * The group is stopped and marked MAC_GROUP_STATE_REGISTERED, 50808275SEric Cheng * and if it is a non default group, the shares are removed and 50818275SEric Cheng * all rings are assigned back to default group. 50828275SEric Cheng */ 50838275SEric Cheng void 50848275SEric Cheng mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) 50858275SEric Cheng { 50868275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 50878275SEric Cheng mac_ring_t *ring; 50888275SEric Cheng 50898275SEric Cheng ASSERT(group != &mip->mi_rx_groups[0]); 50908275SEric Cheng 50918275SEric Cheng /* 50928275SEric Cheng * This is the case where there are no clients left. Any 50938275SEric Cheng * SRS etc on this group have also be quiesced. 50948275SEric Cheng */ 50958275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 50968275SEric Cheng if (ring->mr_classify_type == MAC_HW_CLASSIFIER) { 50978275SEric Cheng ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 50988275SEric Cheng /* 50998275SEric Cheng * Remove the SRS associated with the HW ring. 51008275SEric Cheng * As a result, polling will be disabled. 51018275SEric Cheng */ 51028275SEric Cheng ring->mr_srs = NULL; 51038275SEric Cheng } 51048275SEric Cheng ASSERT(ring->mr_state == MR_INUSE); 51058275SEric Cheng mac_stop_ring(ring); 51068275SEric Cheng ring->mr_state = MR_FREE; 51078275SEric Cheng ring->mr_flag = 0; 51088275SEric Cheng } 51098275SEric Cheng 51108275SEric Cheng /* remove group from share */ 51118275SEric Cheng if (mcip->mci_share != NULL) { 51128275SEric Cheng mip->mi_share_capab.ms_sremove(mcip->mci_share, 51138275SEric Cheng group->mrg_driver); 51148275SEric Cheng } 51158275SEric Cheng 51168275SEric Cheng if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 51178275SEric Cheng mac_ring_t *ring; 51188275SEric Cheng 51198275SEric Cheng /* 51208275SEric Cheng * Rings were dynamically allocated to group. 51218275SEric Cheng * Move rings back to default group. 51228275SEric Cheng */ 51238275SEric Cheng while ((ring = group->mrg_rings) != NULL) { 51248275SEric Cheng (void) mac_group_mov_ring(mip, 51258275SEric Cheng &mip->mi_rx_groups[0], ring); 51268275SEric Cheng } 51278275SEric Cheng } 51288275SEric Cheng mac_stop_group(group); 51298275SEric Cheng /* 51308275SEric Cheng * Possible improvement: See if we can assign the group just released 51318275SEric Cheng * to a another client of the mip 51328275SEric Cheng */ 51338275SEric Cheng } 51348275SEric Cheng 51358275SEric Cheng /* 51368275SEric Cheng * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() 51378275SEric Cheng * when a share was allocated to the client. 51388275SEric Cheng */ 51398275SEric Cheng mac_group_t * 51408275SEric Cheng mac_reserve_tx_group(mac_impl_t *mip, mac_share_handle_t share) 51418275SEric Cheng { 51428275SEric Cheng mac_group_t *grp; 51438275SEric Cheng int rv, i; 51448275SEric Cheng 51458275SEric Cheng /* 51468275SEric Cheng * TX groups are currently allocated only to MAC clients 51478275SEric Cheng * which are associated with a share. Since we have a fixed 51488275SEric Cheng * number of share and groups, and we already successfully 51498275SEric Cheng * allocated a share, find an available TX group. 51508275SEric Cheng */ 51518275SEric Cheng ASSERT(share != NULL); 51528275SEric Cheng ASSERT(mip->mi_tx_group_free > 0); 51538275SEric Cheng 51548275SEric Cheng for (i = 0; i < mip->mi_tx_group_count; i++) { 51558275SEric Cheng grp = &mip->mi_tx_groups[i]; 51568275SEric Cheng 51578275SEric Cheng if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || 51588275SEric Cheng (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) 51598275SEric Cheng continue; 51608275SEric Cheng 51618275SEric Cheng rv = mac_start_group(grp); 51628275SEric Cheng ASSERT(rv == 0); 51638275SEric Cheng 51648275SEric Cheng grp->mrg_state = MAC_GROUP_STATE_RESERVED; 51658275SEric Cheng break; 51668275SEric Cheng } 51678275SEric Cheng 51688275SEric Cheng ASSERT(grp != NULL); 51698275SEric Cheng 51708275SEric Cheng /* 51718275SEric Cheng * Populate the group. Rings should be taken from the group 51728275SEric Cheng * of unassigned rings, which is past the array of TX 51738275SEric Cheng * groups adversized by the driver. 51748275SEric Cheng */ 51758275SEric Cheng rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, NULL, 51768275SEric Cheng grp, share); 51778275SEric Cheng if (rv != 0) { 51788275SEric Cheng DTRACE_PROBE3(tx__group__reserve__alloc__rings, 51798275SEric Cheng char *, mip->mi_name, int, grp->mrg_index, int, rv); 51808275SEric Cheng 51818275SEric Cheng mac_stop_group(grp); 51828275SEric Cheng grp->mrg_state = MAC_GROUP_STATE_UNINIT; 51838275SEric Cheng 51848275SEric Cheng return (NULL); 51858275SEric Cheng } 51868275SEric Cheng 51878275SEric Cheng mip->mi_tx_group_free--; 51888275SEric Cheng 51898275SEric Cheng return (grp); 51908275SEric Cheng } 51918275SEric Cheng 51928275SEric Cheng void 51938275SEric Cheng mac_release_tx_group(mac_impl_t *mip, mac_group_t *grp) 51948275SEric Cheng { 51958275SEric Cheng mac_client_impl_t *mcip = grp->mrg_tx_client; 51968275SEric Cheng mac_share_handle_t share = mcip->mci_share; 51978275SEric Cheng mac_ring_t *ring; 51988275SEric Cheng 51998275SEric Cheng ASSERT(mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); 52008275SEric Cheng ASSERT(share != NULL); 52018275SEric Cheng ASSERT(grp->mrg_state == MAC_GROUP_STATE_RESERVED); 52028275SEric Cheng 52038275SEric Cheng mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); 52048275SEric Cheng while ((ring = grp->mrg_rings) != NULL) { 52058275SEric Cheng /* move the ring back to the pool */ 52068275SEric Cheng (void) mac_group_mov_ring(mip, mip->mi_tx_groups + 52078275SEric Cheng mip->mi_tx_group_count, ring); 52088275SEric Cheng } 52098275SEric Cheng mac_stop_group(grp); 52108275SEric Cheng mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); 52118275SEric Cheng grp->mrg_tx_client = NULL; 52128275SEric Cheng mip->mi_tx_group_free++; 52138275SEric Cheng } 52148275SEric Cheng 52158275SEric Cheng /* 52168275SEric Cheng * This is a 1-time control path activity initiated by the client (IP). 52178275SEric Cheng * The mac perimeter protects against other simultaneous control activities, 52188275SEric Cheng * for example an ioctl that attempts to change the degree of fanout and 52198275SEric Cheng * increase or decrease the number of softrings associated with this Tx SRS. 52208275SEric Cheng */ 52218275SEric Cheng static mac_tx_notify_cb_t * 52228275SEric Cheng mac_client_tx_notify_add(mac_client_impl_t *mcip, 52238275SEric Cheng mac_tx_notify_t notify, void *arg) 52248275SEric Cheng { 52258275SEric Cheng mac_cb_info_t *mcbi; 52268275SEric Cheng mac_tx_notify_cb_t *mtnfp; 52278275SEric Cheng 52288275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 52298275SEric Cheng 52308275SEric Cheng mtnfp = kmem_zalloc(sizeof (mac_tx_notify_cb_t), KM_SLEEP); 52318275SEric Cheng mtnfp->mtnf_fn = notify; 52328275SEric Cheng mtnfp->mtnf_arg = arg; 52338275SEric Cheng mtnfp->mtnf_link.mcb_objp = mtnfp; 52348275SEric Cheng mtnfp->mtnf_link.mcb_objsize = sizeof (mac_tx_notify_cb_t); 52358275SEric Cheng mtnfp->mtnf_link.mcb_flags = MCB_TX_NOTIFY_CB_T; 52368275SEric Cheng 52378275SEric Cheng mcbi = &mcip->mci_tx_notify_cb_info; 52388275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 52398275SEric Cheng mac_callback_add(mcbi, &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link); 52408275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 52418275SEric Cheng return (mtnfp); 52428275SEric Cheng } 52438275SEric Cheng 52448275SEric Cheng static void 52458275SEric Cheng mac_client_tx_notify_remove(mac_client_impl_t *mcip, mac_tx_notify_cb_t *mtnfp) 52468275SEric Cheng { 52478275SEric Cheng mac_cb_info_t *mcbi; 52488275SEric Cheng mac_cb_t **cblist; 52498275SEric Cheng 52508275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 52518275SEric Cheng 52528275SEric Cheng if (!mac_callback_find(&mcip->mci_tx_notify_cb_info, 52538275SEric Cheng &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link)) { 52548275SEric Cheng cmn_err(CE_WARN, 52558275SEric Cheng "mac_client_tx_notify_remove: callback not " 52568275SEric Cheng "found, mcip 0x%p mtnfp 0x%p", (void *)mcip, (void *)mtnfp); 52578275SEric Cheng return; 52588275SEric Cheng } 52598275SEric Cheng 52608275SEric Cheng mcbi = &mcip->mci_tx_notify_cb_info; 52618275SEric Cheng cblist = &mcip->mci_tx_notify_cb_list; 52628275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 52638275SEric Cheng if (mac_callback_remove(mcbi, cblist, &mtnfp->mtnf_link)) 52648275SEric Cheng kmem_free(mtnfp, sizeof (mac_tx_notify_cb_t)); 52658275SEric Cheng else 52668275SEric Cheng mac_callback_remove_wait(&mcip->mci_tx_notify_cb_info); 52678275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 52688275SEric Cheng } 52698275SEric Cheng 52708275SEric Cheng /* 52718275SEric Cheng * mac_client_tx_notify(): 52728275SEric Cheng * call to add and remove flow control callback routine. 52738275SEric Cheng */ 52748275SEric Cheng mac_tx_notify_handle_t 52758275SEric Cheng mac_client_tx_notify(mac_client_handle_t mch, mac_tx_notify_t callb_func, 52768275SEric Cheng void *ptr) 52778275SEric Cheng { 52788275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 52798275SEric Cheng mac_tx_notify_cb_t *mtnfp = NULL; 52808275SEric Cheng 52818275SEric Cheng i_mac_perim_enter(mcip->mci_mip); 52828275SEric Cheng 52838275SEric Cheng if (callb_func != NULL) { 52848275SEric Cheng /* Add a notify callback */ 52858275SEric Cheng mtnfp = mac_client_tx_notify_add(mcip, callb_func, ptr); 52868275SEric Cheng } else { 52878275SEric Cheng mac_client_tx_notify_remove(mcip, (mac_tx_notify_cb_t *)ptr); 52888275SEric Cheng } 52898275SEric Cheng i_mac_perim_exit(mcip->mci_mip); 52908275SEric Cheng 52918275SEric Cheng return ((mac_tx_notify_handle_t)mtnfp); 52928275SEric Cheng } 5293