10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51852Syz147064 * Common Development and Distribution License (the "License"). 61852Syz147064 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 215084Sjohnlev 220Sstevel@tonic-gate /* 238603SGirish.Moodalbail@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* 280Sstevel@tonic-gate * MAC Services Module 298275SEric Cheng * 308275SEric Cheng * The GLDv3 framework locking - The MAC layer 318275SEric Cheng * -------------------------------------------- 328275SEric Cheng * 338275SEric Cheng * The MAC layer is central to the GLD framework and can provide the locking 348275SEric Cheng * framework needed for itself and for the use of MAC clients. MAC end points 358275SEric Cheng * are fairly disjoint and don't share a lot of state. So a coarse grained 368275SEric Cheng * multi-threading scheme is to single thread all create/modify/delete or set 378275SEric Cheng * type of control operations on a per mac end point while allowing data threads 388275SEric Cheng * concurrently. 398275SEric Cheng * 408275SEric Cheng * Control operations (set) that modify a mac end point are always serialized on 418275SEric Cheng * a per mac end point basis, We have at most 1 such thread per mac end point 428275SEric Cheng * at a time. 438275SEric Cheng * 448275SEric Cheng * All other operations that are not serialized are essentially multi-threaded. 458275SEric Cheng * For example a control operation (get) like getting statistics which may not 468275SEric Cheng * care about reading values atomically or data threads sending or receiving 478275SEric Cheng * data. Mostly these type of operations don't modify the control state. Any 488275SEric Cheng * state these operations care about are protected using traditional locks. 498275SEric Cheng * 508275SEric Cheng * The perimeter only serializes serial operations. It does not imply there 518275SEric Cheng * aren't any other concurrent operations. However a serialized operation may 528275SEric Cheng * sometimes need to make sure it is the only thread. In this case it needs 538275SEric Cheng * to use reference counting mechanisms to cv_wait until any current data 548275SEric Cheng * threads are done. 558275SEric Cheng * 568275SEric Cheng * The mac layer itself does not hold any locks across a call to another layer. 578275SEric Cheng * The perimeter is however held across a down call to the driver to make the 588275SEric Cheng * whole control operation atomic with respect to other control operations. 598275SEric Cheng * Also the data path and get type control operations may proceed concurrently. 608275SEric Cheng * These operations synchronize with the single serial operation on a given mac 618275SEric Cheng * end point using regular locks. The perimeter ensures that conflicting 628275SEric Cheng * operations like say a mac_multicast_add and a mac_multicast_remove on the 638275SEric Cheng * same mac end point don't interfere with each other and also ensures that the 648275SEric Cheng * changes in the mac layer and the call to the underlying driver to say add a 658275SEric Cheng * multicast address are done atomically without interference from a thread 668275SEric Cheng * trying to delete the same address. 678275SEric Cheng * 688275SEric Cheng * For example, consider 698275SEric Cheng * mac_multicst_add() 708275SEric Cheng * { 718275SEric Cheng * mac_perimeter_enter(); serialize all control operations 728275SEric Cheng * 738275SEric Cheng * grab list lock protect against access by data threads 748275SEric Cheng * add to list 758275SEric Cheng * drop list lock 768275SEric Cheng * 778275SEric Cheng * call driver's mi_multicst 788275SEric Cheng * 798275SEric Cheng * mac_perimeter_exit(); 808275SEric Cheng * } 818275SEric Cheng * 828275SEric Cheng * To lessen the number of serialization locks and simplify the lock hierarchy, 838275SEric Cheng * we serialize all the control operations on a per mac end point by using a 848275SEric Cheng * single serialization lock called the perimeter. We allow recursive entry into 858275SEric Cheng * the perimeter to facilitate use of this mechanism by both the mac client and 868275SEric Cheng * the MAC layer itself. 878275SEric Cheng * 888275SEric Cheng * MAC client means an entity that does an operation on a mac handle 898275SEric Cheng * obtained from a mac_open/mac_client_open. Similarly MAC driver means 908275SEric Cheng * an entity that does an operation on a mac handle obtained from a 918275SEric Cheng * mac_register. An entity could be both client and driver but on different 928275SEric Cheng * handles eg. aggr. and should only make the corresponding mac interface calls 938275SEric Cheng * i.e. mac driver interface or mac client interface as appropriate for that 948275SEric Cheng * mac handle. 958275SEric Cheng * 968275SEric Cheng * General rules. 978275SEric Cheng * ------------- 988275SEric Cheng * 998275SEric Cheng * R1. The lock order of upcall threads is natually opposite to downcall 1008275SEric Cheng * threads. Hence upcalls must not hold any locks across layers for fear of 1018275SEric Cheng * recursive lock enter and lock order violation. This applies to all layers. 1028275SEric Cheng * 1038275SEric Cheng * R2. The perimeter is just another lock. Since it is held in the down 1048275SEric Cheng * direction, acquiring the perimeter in an upcall is prohibited as it would 1058275SEric Cheng * cause a deadlock. This applies to all layers. 1068275SEric Cheng * 1078275SEric Cheng * Note that upcalls that need to grab the mac perimeter (for example 1088275SEric Cheng * mac_notify upcalls) can still achieve that by posting the request to a 1098275SEric Cheng * thread, which can then grab all the required perimeters and locks in the 1108275SEric Cheng * right global order. Note that in the above example the mac layer iself 1118275SEric Cheng * won't grab the mac perimeter in the mac_notify upcall, instead the upcall 1128275SEric Cheng * to the client must do that. Please see the aggr code for an example. 1138275SEric Cheng * 1148275SEric Cheng * MAC client rules 1158275SEric Cheng * ---------------- 1168275SEric Cheng * 1178275SEric Cheng * R3. A MAC client may use the MAC provided perimeter facility to serialize 1188275SEric Cheng * control operations on a per mac end point. It does this by by acquring 1198275SEric Cheng * and holding the perimeter across a sequence of calls to the mac layer. 1208275SEric Cheng * This ensures atomicity across the entire block of mac calls. In this 1218275SEric Cheng * model the MAC client must not hold any client locks across the calls to 1228275SEric Cheng * the mac layer. This model is the preferred solution. 1238275SEric Cheng * 1248275SEric Cheng * R4. However if a MAC client has a lot of global state across all mac end 1258275SEric Cheng * points the per mac end point serialization may not be sufficient. In this 1268275SEric Cheng * case the client may choose to use global locks or use its own serialization. 1278275SEric Cheng * To avoid deadlocks, these client layer locks held across the mac calls 1288275SEric Cheng * in the control path must never be acquired by the data path for the reason 1298275SEric Cheng * mentioned below. 1308275SEric Cheng * 1318275SEric Cheng * (Assume that a control operation that holds a client lock blocks in the 1328275SEric Cheng * mac layer waiting for upcall reference counts to drop to zero. If an upcall 1338275SEric Cheng * data thread that holds this reference count, tries to acquire the same 1348275SEric Cheng * client lock subsequently it will deadlock). 1358275SEric Cheng * 1368275SEric Cheng * A MAC client may follow either the R3 model or the R4 model, but can't 1378275SEric Cheng * mix both. In the former, the hierarchy is Perim -> client locks, but in 1388275SEric Cheng * the latter it is client locks -> Perim. 1398275SEric Cheng * 1408275SEric Cheng * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able 1418275SEric Cheng * context since they may block while trying to acquire the perimeter. 1428275SEric Cheng * In addition some calls may block waiting for upcall refcnts to come down to 1438275SEric Cheng * zero. 1448275SEric Cheng * 1458275SEric Cheng * R6. MAC clients must make sure that they are single threaded and all threads 1468275SEric Cheng * from the top (in particular data threads) have finished before calling 1478275SEric Cheng * mac_client_close. The MAC framework does not track the number of client 1488275SEric Cheng * threads using the mac client handle. Also mac clients must make sure 1498275SEric Cheng * they have undone all the control operations before calling mac_client_close. 1508275SEric Cheng * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding 1518275SEric Cheng * mac_unicast_add/mac_multicast_add. 1528275SEric Cheng * 1538275SEric Cheng * MAC framework rules 1548275SEric Cheng * ------------------- 1558275SEric Cheng * 1568275SEric Cheng * R7. The mac layer itself must not hold any mac layer locks (except the mac 1578275SEric Cheng * perimeter) across a call to any other layer from the mac layer. The call to 1588275SEric Cheng * any other layer could be via mi_* entry points, classifier entry points into 1598275SEric Cheng * the driver or via upcall pointers into layers above. The mac perimeter may 1608275SEric Cheng * be acquired or held only in the down direction, for e.g. when calling into 1618275SEric Cheng * a mi_* driver enty point to provide atomicity of the operation. 1628275SEric Cheng * 1638275SEric Cheng * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across 1648275SEric Cheng * mac driver interfaces, the MAC layer must provide a cut out for control 1658275SEric Cheng * interfaces like upcall notifications and start them in a separate thread. 1668275SEric Cheng * 1678275SEric Cheng * R9. Note that locking order also implies a plumbing order. For example 1688275SEric Cheng * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt 1698275SEric Cheng * to plumb in any other order must be failed at mac_open time, otherwise it 1708275SEric Cheng * could lead to deadlocks due to inverse locking order. 1718275SEric Cheng * 1728275SEric Cheng * R10. MAC driver interfaces must not block since the driver could call them 1738275SEric Cheng * in interrupt context. 1748275SEric Cheng * 1758275SEric Cheng * R11. Walkers must preferably not hold any locks while calling walker 1768275SEric Cheng * callbacks. Instead these can operate on reference counts. In simple 1778275SEric Cheng * callbacks it may be ok to hold a lock and call the callbacks, but this is 1788275SEric Cheng * harder to maintain in the general case of arbitrary callbacks. 1798275SEric Cheng * 1808275SEric Cheng * R12. The MAC layer must protect upcall notification callbacks using reference 1818275SEric Cheng * counts rather than holding locks across the callbacks. 1828275SEric Cheng * 1838275SEric Cheng * R13. Given the variety of drivers, it is preferable if the MAC layer can make 1848275SEric Cheng * sure that any pointers (such as mac ring pointers) it passes to the driver 1858275SEric Cheng * remain valid until mac unregister time. Currently the mac layer achieves 1868275SEric Cheng * this by using generation numbers for rings and freeing the mac rings only 1878275SEric Cheng * at unregister time. The MAC layer must provide a layer of indirection and 1888275SEric Cheng * must not expose underlying driver rings or driver data structures/pointers 1898275SEric Cheng * directly to MAC clients. 1908275SEric Cheng * 1918275SEric Cheng * MAC driver rules 1928275SEric Cheng * ---------------- 1938275SEric Cheng * 1948275SEric Cheng * R14. It would be preferable if MAC drivers don't hold any locks across any 1958275SEric Cheng * mac call. However at a minimum they must not hold any locks across data 1968275SEric Cheng * upcalls. They must also make sure that all references to mac data structures 1978275SEric Cheng * are cleaned up and that it is single threaded at mac_unregister time. 1988275SEric Cheng * 1998275SEric Cheng * R15. MAC driver interfaces don't block and so the action may be done 2008275SEric Cheng * asynchronously in a separate thread as for example handling notifications. 2018275SEric Cheng * The driver must not assume that the action is complete when the call 2028275SEric Cheng * returns. 2038275SEric Cheng * 2048275SEric Cheng * R16. Drivers must maintain a generation number per Rx ring, and pass it 2058275SEric Cheng * back to mac_rx_ring(); They are expected to increment the generation 2068275SEric Cheng * number whenever the ring's stop routine is invoked. 2078275SEric Cheng * See comments in mac_rx_ring(); 2088275SEric Cheng * 2098275SEric Cheng * R17 Similarly mi_stop is another synchronization point and the driver must 2108275SEric Cheng * ensure that all upcalls are done and there won't be any future upcall 2118275SEric Cheng * before returning from mi_stop. 2128275SEric Cheng * 2138275SEric Cheng * R18. The driver may assume that all set/modify control operations via 2148275SEric Cheng * the mi_* entry points are single threaded on a per mac end point. 2158275SEric Cheng * 2168275SEric Cheng * Lock and Perimeter hierarchy scenarios 2178275SEric Cheng * --------------------------------------- 2188275SEric Cheng * 2198275SEric Cheng * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify] 2208275SEric Cheng * 2218275SEric Cheng * ft_lock -> fe_lock [mac_flow_lookup] 2228275SEric Cheng * 2238275SEric Cheng * mi_rw_lock -> fe_lock [mac_bcast_send] 2248275SEric Cheng * 2258275SEric Cheng * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw] 2268275SEric Cheng * 2278275SEric Cheng * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind] 2288275SEric Cheng * 2298275SEric Cheng * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename] 2308275SEric Cheng * 2318275SEric Cheng * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac 2328275SEric Cheng * client to driver. In the case of clients that explictly use the mac provided 2338275SEric Cheng * perimeter mechanism for its serialization, the hierarchy is 2348275SEric Cheng * Perimeter -> mac layer locks, since the client never holds any locks across 2358275SEric Cheng * the mac calls. In the case of clients that use its own locks the hierarchy 2368275SEric Cheng * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly 2378275SEric Cheng * calls mac_perim_enter/exit in this case. 2388275SEric Cheng * 2398275SEric Cheng * Subflow creation rules 2408275SEric Cheng * --------------------------- 2418275SEric Cheng * o In case of a user specified cpulist present on underlying link and flows, 2428275SEric Cheng * the flows cpulist must be a subset of the underlying link. 2438275SEric Cheng * o In case of a user specified fanout mode present on link and flow, the 2448275SEric Cheng * subflow fanout count has to be less than or equal to that of the 2458275SEric Cheng * underlying link. The cpu-bindings for the subflows will be a subset of 2468275SEric Cheng * the underlying link. 2478275SEric Cheng * o In case if no cpulist specified on both underlying link and flow, the 2488275SEric Cheng * underlying link relies on a MAC tunable to provide out of box fanout. 2498275SEric Cheng * The subflow will have no cpulist (the subflow will be unbound) 2508275SEric Cheng * o In case if no cpulist is specified on the underlying link, a subflow can 2518275SEric Cheng * carry either a user-specified cpulist or fanout count. The cpu-bindings 2528275SEric Cheng * for the subflow will not adhere to restriction that they need to be subset 2538275SEric Cheng * of the underlying link. 2548275SEric Cheng * o In case where the underlying link is carrying either a user specified 2558275SEric Cheng * cpulist or fanout mode and for a unspecified subflow, the subflow will be 2568275SEric Cheng * created unbound. 2578275SEric Cheng * o While creating unbound subflows, bandwidth mode changes attempt to 2588275SEric Cheng * figure a right fanout count. In such cases the fanout count will override 2598275SEric Cheng * the unbound cpu-binding behavior. 2608275SEric Cheng * o In addition to this, while cycling between flow and link properties, we 2618275SEric Cheng * impose a restriction that if a link property has a subflow with 2628275SEric Cheng * user-specified attributes, we will not allow changing the link property. 2638275SEric Cheng * The administrator needs to reset all the user specified properties for the 2648275SEric Cheng * subflows before attempting a link property change. 2658275SEric Cheng * Some of the above rules can be overridden by specifying additional command 2668275SEric Cheng * line options while creating or modifying link or subflow properties. 2670Sstevel@tonic-gate */ 2680Sstevel@tonic-gate 2690Sstevel@tonic-gate #include <sys/types.h> 2700Sstevel@tonic-gate #include <sys/conf.h> 2715895Syz147064 #include <sys/id_space.h> 2726077Syz147064 #include <sys/esunddi.h> 2730Sstevel@tonic-gate #include <sys/stat.h> 2745895Syz147064 #include <sys/mkdev.h> 2750Sstevel@tonic-gate #include <sys/stream.h> 2760Sstevel@tonic-gate #include <sys/strsun.h> 2770Sstevel@tonic-gate #include <sys/strsubr.h> 2780Sstevel@tonic-gate #include <sys/dlpi.h> 2798275SEric Cheng #include <sys/modhash.h> 2808275SEric Cheng #include <sys/mac_provider.h> 2818275SEric Cheng #include <sys/mac_client_impl.h> 2828275SEric Cheng #include <sys/mac_soft_ring.h> 2838275SEric Cheng #include <sys/mac_impl.h> 2848275SEric Cheng #include <sys/mac.h> 2855895Syz147064 #include <sys/dls.h> 286269Sericheng #include <sys/dld.h> 2872311Sseb #include <sys/modctl.h> 2883448Sdh155122 #include <sys/fs/dv_node.h> 2895009Sgd78059 #include <sys/thread.h> 2905009Sgd78059 #include <sys/proc.h> 2915009Sgd78059 #include <sys/callb.h> 2925009Sgd78059 #include <sys/cpuvar.h> 2933288Sseb #include <sys/atomic.h> 2948275SEric Cheng #include <sys/bitmap.h> 2954913Sethindra #include <sys/sdt.h> 2968275SEric Cheng #include <sys/mac_flow.h> 2978275SEric Cheng #include <sys/ddi_intr_impl.h> 2988275SEric Cheng #include <sys/disp.h> 2998275SEric Cheng #include <sys/sdt.h> 3008275SEric Cheng #include <sys/vnic.h> 3018275SEric Cheng #include <sys/vnic_impl.h> 3028275SEric Cheng #include <sys/vlan.h> 3038275SEric Cheng #include <inet/ip.h> 3048275SEric Cheng #include <inet/ip6.h> 3058275SEric Cheng #include <sys/exacct.h> 3068275SEric Cheng #include <sys/exacct_impl.h> 3075903Ssowmini #include <inet/nd.h> 3086512Ssowmini #include <sys/ethernet.h> 3090Sstevel@tonic-gate 3100Sstevel@tonic-gate #define IMPL_HASHSZ 67 /* prime */ 3110Sstevel@tonic-gate 3128275SEric Cheng kmem_cache_t *i_mac_impl_cachep; 3138275SEric Cheng mod_hash_t *i_mac_impl_hash; 314269Sericheng krwlock_t i_mac_impl_lock; 315269Sericheng uint_t i_mac_impl_count; 3168275SEric Cheng static kmem_cache_t *mac_ring_cache; 3175895Syz147064 static id_space_t *minor_ids; 3185895Syz147064 static uint32_t minor_count; 3190Sstevel@tonic-gate 3208275SEric Cheng /* 3218275SEric Cheng * Logging stuff. Perhaps mac_logging_interval could be broken into 3228275SEric Cheng * mac_flow_log_interval and mac_link_log_interval if we want to be 3238275SEric Cheng * able to schedule them differently. 3248275SEric Cheng */ 3258275SEric Cheng uint_t mac_logging_interval; 3268275SEric Cheng boolean_t mac_flow_log_enable; 3278275SEric Cheng boolean_t mac_link_log_enable; 3288275SEric Cheng timeout_id_t mac_logging_timer; 3298275SEric Cheng 3308275SEric Cheng /* for debugging, see MAC_DBG_PRT() in mac_impl.h */ 3318275SEric Cheng int mac_dbg = 0; 3328275SEric Cheng 3332311Sseb #define MACTYPE_KMODDIR "mac" 3342311Sseb #define MACTYPE_HASHSZ 67 3352311Sseb static mod_hash_t *i_mactype_hash; 3363288Sseb /* 3373288Sseb * i_mactype_lock synchronizes threads that obtain references to mactype_t 3383288Sseb * structures through i_mactype_getplugin(). 3393288Sseb */ 3403288Sseb static kmutex_t i_mactype_lock; 3412311Sseb 3420Sstevel@tonic-gate /* 3438275SEric Cheng * mac_tx_percpu_cnt 3448275SEric Cheng * 3458275SEric Cheng * Number of per cpu locks per mac_client_impl_t. Used by the transmit side 3468275SEric Cheng * in mac_tx to reduce lock contention. This is sized at boot time in mac_init. 3478275SEric Cheng * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2. 3488275SEric Cheng * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1. 3495084Sjohnlev */ 3508275SEric Cheng int mac_tx_percpu_cnt; 3518275SEric Cheng int mac_tx_percpu_cnt_max = 128; 3528275SEric Cheng 3538275SEric Cheng static int i_mac_constructor(void *, void *, int); 3548275SEric Cheng static void i_mac_destructor(void *, void *); 3558275SEric Cheng static int i_mac_ring_ctor(void *, void *, int); 3568275SEric Cheng static void i_mac_ring_dtor(void *, void *); 3578275SEric Cheng static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *); 3588275SEric Cheng void mac_tx_client_flush(mac_client_impl_t *); 3598275SEric Cheng void mac_tx_client_block(mac_client_impl_t *); 3608275SEric Cheng static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); 3618275SEric Cheng static int mac_start_group_and_rings(mac_group_t *); 3628275SEric Cheng static void mac_stop_group_and_rings(mac_group_t *); 3632311Sseb 3640Sstevel@tonic-gate /* 3650Sstevel@tonic-gate * Module initialization functions. 3660Sstevel@tonic-gate */ 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate void 3690Sstevel@tonic-gate mac_init(void) 3700Sstevel@tonic-gate { 3718275SEric Cheng mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus : 3728275SEric Cheng boot_max_ncpus); 3738275SEric Cheng 3748275SEric Cheng /* Upper bound is mac_tx_percpu_cnt_max */ 3758275SEric Cheng if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max) 3768275SEric Cheng mac_tx_percpu_cnt = mac_tx_percpu_cnt_max; 3778275SEric Cheng 3788275SEric Cheng if (mac_tx_percpu_cnt < 1) { 3798275SEric Cheng /* Someone set max_tx_percpu_cnt_max to 0 or less */ 3808275SEric Cheng mac_tx_percpu_cnt = 1; 3818275SEric Cheng } 3828275SEric Cheng 3838275SEric Cheng ASSERT(mac_tx_percpu_cnt >= 1); 3848275SEric Cheng mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1)); 3858275SEric Cheng /* 3868275SEric Cheng * Make it of the form 2**N - 1 in the range 3878275SEric Cheng * [0 .. mac_tx_percpu_cnt_max - 1] 3888275SEric Cheng */ 3898275SEric Cheng mac_tx_percpu_cnt--; 3908275SEric Cheng 3910Sstevel@tonic-gate i_mac_impl_cachep = kmem_cache_create("mac_impl_cache", 3922311Sseb sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor, 3932311Sseb NULL, NULL, NULL, 0); 3940Sstevel@tonic-gate ASSERT(i_mac_impl_cachep != NULL); 3950Sstevel@tonic-gate 3968275SEric Cheng mac_ring_cache = kmem_cache_create("mac_ring_cache", 3978275SEric Cheng sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL, 3988275SEric Cheng NULL, NULL, 0); 3998275SEric Cheng ASSERT(mac_ring_cache != NULL); 4005084Sjohnlev 401269Sericheng i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash", 402269Sericheng IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 403269Sericheng mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 404269Sericheng rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL); 4058275SEric Cheng 4068275SEric Cheng mac_flow_init(); 4078275SEric Cheng mac_soft_ring_init(); 4088275SEric Cheng mac_bcast_init(); 4098275SEric Cheng mac_client_init(); 4108275SEric Cheng 411269Sericheng i_mac_impl_count = 0; 4122311Sseb 4132311Sseb i_mactype_hash = mod_hash_create_extended("mactype_hash", 4142311Sseb MACTYPE_HASHSZ, 4152311Sseb mod_hash_null_keydtor, mod_hash_null_valdtor, 4162311Sseb mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 4175895Syz147064 4185895Syz147064 /* 4195895Syz147064 * Allocate an id space to manage minor numbers. The range of the 4205895Syz147064 * space will be from MAC_MAX_MINOR+1 to MAXMIN32 (maximum legal 4215895Syz147064 * minor number is MAXMIN, but id_t is type of integer and does not 4225895Syz147064 * allow MAXMIN). 4235895Syz147064 */ 4245895Syz147064 minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, MAXMIN32); 4255895Syz147064 ASSERT(minor_ids != NULL); 4265895Syz147064 minor_count = 0; 4278275SEric Cheng 4288275SEric Cheng /* Let's default to 20 seconds */ 4298275SEric Cheng mac_logging_interval = 20; 4308275SEric Cheng mac_flow_log_enable = B_FALSE; 4318275SEric Cheng mac_link_log_enable = B_FALSE; 4328275SEric Cheng mac_logging_timer = 0; 4330Sstevel@tonic-gate } 4340Sstevel@tonic-gate 4350Sstevel@tonic-gate int 4360Sstevel@tonic-gate mac_fini(void) 4370Sstevel@tonic-gate { 4385895Syz147064 if (i_mac_impl_count > 0 || minor_count > 0) 439269Sericheng return (EBUSY); 4400Sstevel@tonic-gate 4415895Syz147064 id_space_destroy(minor_ids); 4428275SEric Cheng mac_flow_fini(); 4435895Syz147064 444269Sericheng mod_hash_destroy_hash(i_mac_impl_hash); 445269Sericheng rw_destroy(&i_mac_impl_lock); 4460Sstevel@tonic-gate 4478275SEric Cheng mac_client_fini(); 4488275SEric Cheng kmem_cache_destroy(mac_ring_cache); 4492311Sseb 4502311Sseb mod_hash_destroy_hash(i_mactype_hash); 4518275SEric Cheng mac_soft_ring_finish(); 4520Sstevel@tonic-gate return (0); 4530Sstevel@tonic-gate } 4540Sstevel@tonic-gate 4558275SEric Cheng void 4568275SEric Cheng mac_init_ops(struct dev_ops *ops, const char *name) 4578275SEric Cheng { 4588275SEric Cheng dld_init_ops(ops, name); 4598275SEric Cheng } 4608275SEric Cheng 4618275SEric Cheng void 4628275SEric Cheng mac_fini_ops(struct dev_ops *ops) 4638275SEric Cheng { 4648275SEric Cheng dld_fini_ops(ops); 4658275SEric Cheng } 4668275SEric Cheng 4678275SEric Cheng /*ARGSUSED*/ 4688275SEric Cheng static int 4698275SEric Cheng i_mac_constructor(void *buf, void *arg, int kmflag) 4708275SEric Cheng { 4718275SEric Cheng mac_impl_t *mip = buf; 4728275SEric Cheng 4738275SEric Cheng bzero(buf, sizeof (mac_impl_t)); 4748275SEric Cheng 4758275SEric Cheng mip->mi_linkstate = LINK_STATE_UNKNOWN; 4768275SEric Cheng mip->mi_nclients = 0; 4778275SEric Cheng 4788275SEric Cheng mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL); 4798275SEric Cheng rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); 4808275SEric Cheng mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); 4818275SEric Cheng mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); 4828275SEric Cheng mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL); 4838275SEric Cheng 4848275SEric Cheng mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock; 4858275SEric Cheng cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 4868275SEric Cheng mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock; 4878275SEric Cheng cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 4888275SEric Cheng return (0); 4898275SEric Cheng } 4908275SEric Cheng 4918275SEric Cheng /*ARGSUSED*/ 4928275SEric Cheng static void 4938275SEric Cheng i_mac_destructor(void *buf, void *arg) 4948275SEric Cheng { 4958275SEric Cheng mac_impl_t *mip = buf; 4968275SEric Cheng mac_cb_info_t *mcbi; 4978275SEric Cheng 4988275SEric Cheng ASSERT(mip->mi_ref == 0); 4998275SEric Cheng ASSERT(mip->mi_active == 0); 5008275SEric Cheng ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); 5018275SEric Cheng ASSERT(mip->mi_devpromisc == 0); 5028275SEric Cheng ASSERT(mip->mi_promisc == 0); 5038275SEric Cheng ASSERT(mip->mi_ksp == NULL); 5048275SEric Cheng ASSERT(mip->mi_kstat_count == 0); 5058275SEric Cheng ASSERT(mip->mi_nclients == 0); 5068275SEric Cheng ASSERT(mip->mi_nactiveclients == 0); 507*8833SVenu.Iyer@Sun.COM ASSERT(mip->mi_single_active_client == NULL); 5088275SEric Cheng ASSERT(mip->mi_state_flags == 0); 5098275SEric Cheng ASSERT(mip->mi_factory_addr == NULL); 5108275SEric Cheng ASSERT(mip->mi_factory_addr_num == 0); 5118275SEric Cheng ASSERT(mip->mi_default_tx_ring == NULL); 5128275SEric Cheng 5138275SEric Cheng mcbi = &mip->mi_notify_cb_info; 5148275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0); 5158275SEric Cheng ASSERT(mip->mi_notify_bits == 0); 5168275SEric Cheng ASSERT(mip->mi_notify_thread == NULL); 5178275SEric Cheng ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock); 5188275SEric Cheng mcbi->mcbi_lockp = NULL; 5198275SEric Cheng 5208275SEric Cheng mcbi = &mip->mi_promisc_cb_info; 5218275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL); 5228275SEric Cheng ASSERT(mip->mi_promisc_list == NULL); 5238275SEric Cheng ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock); 5248275SEric Cheng mcbi->mcbi_lockp = NULL; 5258275SEric Cheng 5268275SEric Cheng ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); 5278275SEric Cheng ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); 5288275SEric Cheng 5298275SEric Cheng mutex_destroy(&mip->mi_lock); 5308275SEric Cheng rw_destroy(&mip->mi_rw_lock); 5318275SEric Cheng 5328275SEric Cheng mutex_destroy(&mip->mi_promisc_lock); 5338275SEric Cheng cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv); 5348275SEric Cheng mutex_destroy(&mip->mi_notify_lock); 5358275SEric Cheng cv_destroy(&mip->mi_notify_cb_info.mcbi_cv); 5368275SEric Cheng mutex_destroy(&mip->mi_ring_lock); 5378275SEric Cheng } 5388275SEric Cheng 5398275SEric Cheng /* ARGSUSED */ 5408275SEric Cheng static int 5418275SEric Cheng i_mac_ring_ctor(void *buf, void *arg, int kmflag) 5428275SEric Cheng { 5438275SEric Cheng mac_ring_t *ring = (mac_ring_t *)buf; 5448275SEric Cheng 5458275SEric Cheng bzero(ring, sizeof (mac_ring_t)); 5468275SEric Cheng cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL); 5478275SEric Cheng mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL); 5488275SEric Cheng ring->mr_state = MR_FREE; 5498275SEric Cheng return (0); 5508275SEric Cheng } 5518275SEric Cheng 5528275SEric Cheng /* ARGSUSED */ 5538275SEric Cheng static void 5548275SEric Cheng i_mac_ring_dtor(void *buf, void *arg) 5558275SEric Cheng { 5568275SEric Cheng mac_ring_t *ring = (mac_ring_t *)buf; 5578275SEric Cheng 5588275SEric Cheng cv_destroy(&ring->mr_cv); 5598275SEric Cheng mutex_destroy(&ring->mr_lock); 5608275SEric Cheng } 5618275SEric Cheng 5628275SEric Cheng /* 5638275SEric Cheng * Common functions to do mac callback addition and deletion. Currently this is 5648275SEric Cheng * used by promisc callbacks and notify callbacks. List addition and deletion 5658275SEric Cheng * need to take care of list walkers. List walkers in general, can't hold list 5668275SEric Cheng * locks and make upcall callbacks due to potential lock order and recursive 5678275SEric Cheng * reentry issues. Instead list walkers increment the list walker count to mark 5688275SEric Cheng * the presence of a walker thread. Addition can be carefully done to ensure 5698275SEric Cheng * that the list walker always sees either the old list or the new list. 5708275SEric Cheng * However the deletion can't be done while the walker is active, instead the 5718275SEric Cheng * deleting thread simply marks the entry as logically deleted. The last walker 5728275SEric Cheng * physically deletes and frees up the logically deleted entries when the walk 5738275SEric Cheng * is complete. 5748275SEric Cheng */ 5758275SEric Cheng void 5768275SEric Cheng mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 5778275SEric Cheng mac_cb_t *mcb_elem) 5788275SEric Cheng { 5798275SEric Cheng mac_cb_t *p; 5808275SEric Cheng mac_cb_t **pp; 5818275SEric Cheng 5828275SEric Cheng /* Verify it is not already in the list */ 5838275SEric Cheng for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 5848275SEric Cheng if (p == mcb_elem) 5858275SEric Cheng break; 5868275SEric Cheng } 5878275SEric Cheng VERIFY(p == NULL); 5888275SEric Cheng 5898275SEric Cheng /* 5908275SEric Cheng * Add it to the head of the callback list. The membar ensures that 5918275SEric Cheng * the following list pointer manipulations reach global visibility 5928275SEric Cheng * in exactly the program order below. 5938275SEric Cheng */ 5948275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 5958275SEric Cheng 5968275SEric Cheng mcb_elem->mcb_nextp = *mcb_head; 5978275SEric Cheng membar_producer(); 5988275SEric Cheng *mcb_head = mcb_elem; 5998275SEric Cheng } 6008275SEric Cheng 6018275SEric Cheng /* 6028275SEric Cheng * Mark the entry as logically deleted. If there aren't any walkers unlink 6038275SEric Cheng * from the list. In either case return the corresponding status. 6048275SEric Cheng */ 6058275SEric Cheng boolean_t 6068275SEric Cheng mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 6078275SEric Cheng mac_cb_t *mcb_elem) 6088275SEric Cheng { 6098275SEric Cheng mac_cb_t *p; 6108275SEric Cheng mac_cb_t **pp; 6118275SEric Cheng 6128275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 6138275SEric Cheng /* 6148275SEric Cheng * Search the callback list for the entry to be removed 6158275SEric Cheng */ 6168275SEric Cheng for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 6178275SEric Cheng if (p == mcb_elem) 6188275SEric Cheng break; 6198275SEric Cheng } 6208275SEric Cheng VERIFY(p != NULL); 6218275SEric Cheng 6228275SEric Cheng /* 6238275SEric Cheng * If there are walkers just mark it as deleted and the last walker 6248275SEric Cheng * will remove from the list and free it. 6258275SEric Cheng */ 6268275SEric Cheng if (mcbi->mcbi_walker_cnt != 0) { 6278275SEric Cheng p->mcb_flags |= MCB_CONDEMNED; 6288275SEric Cheng mcbi->mcbi_del_cnt++; 6298275SEric Cheng return (B_FALSE); 6308275SEric Cheng } 6318275SEric Cheng 6328275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == 0); 6338275SEric Cheng *pp = p->mcb_nextp; 6348275SEric Cheng p->mcb_nextp = NULL; 6358275SEric Cheng return (B_TRUE); 6368275SEric Cheng } 6378275SEric Cheng 6388275SEric Cheng /* 6398275SEric Cheng * Wait for all pending callback removals to be completed 6408275SEric Cheng */ 6418275SEric Cheng void 6428275SEric Cheng mac_callback_remove_wait(mac_cb_info_t *mcbi) 6438275SEric Cheng { 6448275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 6458275SEric Cheng while (mcbi->mcbi_del_cnt != 0) { 6468275SEric Cheng DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi); 6478275SEric Cheng cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 6488275SEric Cheng } 6498275SEric Cheng } 6508275SEric Cheng 6510Sstevel@tonic-gate /* 6528275SEric Cheng * The last mac callback walker does the cleanup. Walk the list and unlik 6538275SEric Cheng * all the logically deleted entries and construct a temporary list of 6548275SEric Cheng * removed entries. Return the list of removed entries to the caller. 6558275SEric Cheng */ 6568275SEric Cheng mac_cb_t * 6578275SEric Cheng mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head) 6588275SEric Cheng { 6598275SEric Cheng mac_cb_t *p; 6608275SEric Cheng mac_cb_t **pp; 6618275SEric Cheng mac_cb_t *rmlist = NULL; /* List of removed elements */ 6628275SEric Cheng int cnt = 0; 6638275SEric Cheng 6648275SEric Cheng ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 6658275SEric Cheng ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0); 6668275SEric Cheng 6678275SEric Cheng pp = mcb_head; 6688275SEric Cheng while (*pp != NULL) { 6698275SEric Cheng if ((*pp)->mcb_flags & MCB_CONDEMNED) { 6708275SEric Cheng p = *pp; 6718275SEric Cheng *pp = p->mcb_nextp; 6728275SEric Cheng p->mcb_nextp = rmlist; 6738275SEric Cheng rmlist = p; 6748275SEric Cheng cnt++; 6758275SEric Cheng continue; 6768275SEric Cheng } 6778275SEric Cheng pp = &(*pp)->mcb_nextp; 6788275SEric Cheng } 6798275SEric Cheng 6808275SEric Cheng ASSERT(mcbi->mcbi_del_cnt == cnt); 6818275SEric Cheng mcbi->mcbi_del_cnt = 0; 6828275SEric Cheng return (rmlist); 6838275SEric Cheng } 6848275SEric Cheng 6858275SEric Cheng boolean_t 6868275SEric Cheng mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 6878275SEric Cheng { 6888275SEric Cheng mac_cb_t *mcb; 6898275SEric Cheng 6908275SEric Cheng /* Verify it is not already in the list */ 6918275SEric Cheng for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) { 6928275SEric Cheng if (mcb == mcb_elem) 6938275SEric Cheng return (B_TRUE); 6948275SEric Cheng } 6958275SEric Cheng 6968275SEric Cheng return (B_FALSE); 6978275SEric Cheng } 6988275SEric Cheng 6998275SEric Cheng boolean_t 7008275SEric Cheng mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 7018275SEric Cheng { 7028275SEric Cheng boolean_t found; 7038275SEric Cheng 7048275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 7058275SEric Cheng found = mac_callback_lookup(mcb_headp, mcb_elem); 7068275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 7078275SEric Cheng 7088275SEric Cheng return (found); 7098275SEric Cheng } 7108275SEric Cheng 7118275SEric Cheng /* Free the list of removed callbacks */ 7128275SEric Cheng void 7138275SEric Cheng mac_callback_free(mac_cb_t *rmlist) 7148275SEric Cheng { 7158275SEric Cheng mac_cb_t *mcb; 7168275SEric Cheng mac_cb_t *mcb_next; 7178275SEric Cheng 7188275SEric Cheng for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 7198275SEric Cheng mcb_next = mcb->mcb_nextp; 7208275SEric Cheng kmem_free(mcb->mcb_objp, mcb->mcb_objsize); 7218275SEric Cheng } 7228275SEric Cheng } 7238275SEric Cheng 7248275SEric Cheng /* 7258275SEric Cheng * The promisc callbacks are in 2 lists, one off the 'mip' and another off the 7268275SEric Cheng * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there 7278275SEric Cheng * is only a single shared total walker count, and an entry can't be physically 7288275SEric Cheng * unlinked if a walker is active on either list. The last walker does this 7298275SEric Cheng * cleanup of logically deleted entries. 7308275SEric Cheng */ 7318275SEric Cheng void 7328275SEric Cheng i_mac_promisc_walker_cleanup(mac_impl_t *mip) 7338275SEric Cheng { 7348275SEric Cheng mac_cb_t *rmlist; 7358275SEric Cheng mac_cb_t *mcb; 7368275SEric Cheng mac_cb_t *mcb_next; 7378275SEric Cheng mac_promisc_impl_t *mpip; 7388275SEric Cheng 7398275SEric Cheng /* 7408275SEric Cheng * Construct a temporary list of deleted callbacks by walking the 7418275SEric Cheng * the mi_promisc_list. Then for each entry in the temporary list, 7428275SEric Cheng * remove it from the mci_promisc_list and free the entry. 7438275SEric Cheng */ 7448275SEric Cheng rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info, 7458275SEric Cheng &mip->mi_promisc_list); 7468275SEric Cheng 7478275SEric Cheng for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 7488275SEric Cheng mcb_next = mcb->mcb_nextp; 7498275SEric Cheng mpip = (mac_promisc_impl_t *)mcb->mcb_objp; 7508275SEric Cheng VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info, 7518275SEric Cheng &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link)); 7528275SEric Cheng mcb->mcb_flags = 0; 7538275SEric Cheng mcb->mcb_nextp = NULL; 7548275SEric Cheng kmem_cache_free(mac_promisc_impl_cache, mpip); 7558275SEric Cheng } 7568275SEric Cheng } 7578275SEric Cheng 7588275SEric Cheng void 7598275SEric Cheng i_mac_notify(mac_impl_t *mip, mac_notify_type_t type) 7608275SEric Cheng { 7618275SEric Cheng mac_cb_info_t *mcbi; 7628275SEric Cheng 7638275SEric Cheng /* 7648275SEric Cheng * Signal the notify thread even after mi_ref has become zero and 7658275SEric Cheng * mi_disabled is set. The synchronization with the notify thread 7668275SEric Cheng * happens in mac_unregister and that implies the driver must make 7678275SEric Cheng * sure it is single-threaded (with respect to mac calls) and that 7688275SEric Cheng * all pending mac calls have returned before it calls mac_unregister 7698275SEric Cheng */ 7708275SEric Cheng rw_enter(&i_mac_impl_lock, RW_READER); 7718275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) 7728275SEric Cheng goto exit; 7738275SEric Cheng 7748275SEric Cheng /* 7758275SEric Cheng * Guard against incorrect notifications. (Running a newer 7768275SEric Cheng * mac client against an older implementation?) 7778275SEric Cheng */ 7788275SEric Cheng if (type >= MAC_NNOTE) 7798275SEric Cheng goto exit; 7808275SEric Cheng 7818275SEric Cheng mcbi = &mip->mi_notify_cb_info; 7828275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 7838275SEric Cheng mip->mi_notify_bits |= (1 << type); 7848275SEric Cheng cv_broadcast(&mcbi->mcbi_cv); 7858275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 7868275SEric Cheng 7878275SEric Cheng exit: 7888275SEric Cheng rw_exit(&i_mac_impl_lock); 7898275SEric Cheng } 7908275SEric Cheng 7918275SEric Cheng /* 7928275SEric Cheng * Mac serialization primitives. Please see the block comment at the 7938275SEric Cheng * top of the file. 7940Sstevel@tonic-gate */ 7958275SEric Cheng void 7968275SEric Cheng i_mac_perim_enter(mac_impl_t *mip) 7978275SEric Cheng { 7988275SEric Cheng mac_client_impl_t *mcip; 7998275SEric Cheng 8008275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8018275SEric Cheng /* 8028275SEric Cheng * This is a VNIC. Return the lower mac since that is what 8038275SEric Cheng * we want to serialize on. 8048275SEric Cheng */ 8058275SEric Cheng mcip = mac_vnic_lower(mip); 8068275SEric Cheng mip = mcip->mci_mip; 8078275SEric Cheng } 8088275SEric Cheng 8098275SEric Cheng mutex_enter(&mip->mi_perim_lock); 8108275SEric Cheng if (mip->mi_perim_owner == curthread) { 8118275SEric Cheng mip->mi_perim_ocnt++; 8128275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8138275SEric Cheng return; 8148275SEric Cheng } 8158275SEric Cheng 8168275SEric Cheng while (mip->mi_perim_owner != NULL) 8178275SEric Cheng cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock); 8188275SEric Cheng 8198275SEric Cheng mip->mi_perim_owner = curthread; 8208275SEric Cheng ASSERT(mip->mi_perim_ocnt == 0); 8218275SEric Cheng mip->mi_perim_ocnt++; 8228275SEric Cheng #ifdef DEBUG 8238275SEric Cheng mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack, 8248275SEric Cheng MAC_PERIM_STACK_DEPTH); 8258275SEric Cheng #endif 8268275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8278275SEric Cheng } 8288275SEric Cheng 8298275SEric Cheng int 8308275SEric Cheng i_mac_perim_enter_nowait(mac_impl_t *mip) 8318275SEric Cheng { 8328275SEric Cheng /* 8338275SEric Cheng * The vnic is a special case, since the serialization is done based 8348275SEric Cheng * on the lower mac. If the lower mac is busy, it does not imply the 8358275SEric Cheng * vnic can't be unregistered. But in the case of other drivers, 8368275SEric Cheng * a busy perimeter or open mac handles implies that the mac is busy 8378275SEric Cheng * and can't be unregistered. 8388275SEric Cheng */ 8398275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8408275SEric Cheng i_mac_perim_enter(mip); 8418275SEric Cheng return (0); 8428275SEric Cheng } 8438275SEric Cheng 8448275SEric Cheng mutex_enter(&mip->mi_perim_lock); 8458275SEric Cheng if (mip->mi_perim_owner != NULL) { 8468275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8478275SEric Cheng return (EBUSY); 8488275SEric Cheng } 8498275SEric Cheng ASSERT(mip->mi_perim_ocnt == 0); 8508275SEric Cheng mip->mi_perim_owner = curthread; 8518275SEric Cheng mip->mi_perim_ocnt++; 8528275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8538275SEric Cheng 8548275SEric Cheng return (0); 8558275SEric Cheng } 8568275SEric Cheng 8578275SEric Cheng void 8588275SEric Cheng i_mac_perim_exit(mac_impl_t *mip) 8598275SEric Cheng { 8608275SEric Cheng mac_client_impl_t *mcip; 8618275SEric Cheng 8628275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8638275SEric Cheng /* 8648275SEric Cheng * This is a VNIC. Return the lower mac since that is what 8658275SEric Cheng * we want to serialize on. 8668275SEric Cheng */ 8678275SEric Cheng mcip = mac_vnic_lower(mip); 8688275SEric Cheng mip = mcip->mci_mip; 8698275SEric Cheng } 8708275SEric Cheng 8718275SEric Cheng ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0); 8728275SEric Cheng 8738275SEric Cheng mutex_enter(&mip->mi_perim_lock); 8748275SEric Cheng if (--mip->mi_perim_ocnt == 0) { 8758275SEric Cheng mip->mi_perim_owner = NULL; 8768275SEric Cheng cv_signal(&mip->mi_perim_cv); 8778275SEric Cheng } 8788275SEric Cheng mutex_exit(&mip->mi_perim_lock); 8798275SEric Cheng } 8808275SEric Cheng 8818275SEric Cheng /* 8828275SEric Cheng * Returns whether the current thread holds the mac perimeter. Used in making 8838275SEric Cheng * assertions. 8848275SEric Cheng */ 8858275SEric Cheng boolean_t 8868275SEric Cheng mac_perim_held(mac_handle_t mh) 8878275SEric Cheng { 8888275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 8898275SEric Cheng mac_client_impl_t *mcip; 8908275SEric Cheng 8918275SEric Cheng if (mip->mi_state_flags & MIS_IS_VNIC) { 8928275SEric Cheng /* 8938275SEric Cheng * This is a VNIC. Return the lower mac since that is what 8948275SEric Cheng * we want to serialize on. 8958275SEric Cheng */ 8968275SEric Cheng mcip = mac_vnic_lower(mip); 8978275SEric Cheng mip = mcip->mci_mip; 8988275SEric Cheng } 8998275SEric Cheng return (mip->mi_perim_owner == curthread); 9008275SEric Cheng } 9018275SEric Cheng 9028275SEric Cheng /* 9038275SEric Cheng * mac client interfaces to enter the mac perimeter of a mac end point, given 9048275SEric Cheng * its mac handle, or macname or linkid. 9058275SEric Cheng */ 9068275SEric Cheng void 9078275SEric Cheng mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp) 9088275SEric Cheng { 9098275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 9108275SEric Cheng 9118275SEric Cheng i_mac_perim_enter(mip); 9128275SEric Cheng /* 9138275SEric Cheng * The mac_perim_handle_t returned encodes the 'mip' and whether a 9148275SEric Cheng * mac_open has been done internally while entering the perimeter. 9158275SEric Cheng * This information is used in mac_perim_exit 9168275SEric Cheng */ 9178275SEric Cheng MAC_ENCODE_MPH(*mphp, mip, 0); 9188275SEric Cheng } 9198275SEric Cheng 9208275SEric Cheng int 9218275SEric Cheng mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp) 9228275SEric Cheng { 9238275SEric Cheng int err; 9248275SEric Cheng mac_handle_t mh; 9258275SEric Cheng 9268275SEric Cheng if ((err = mac_open(name, &mh)) != 0) 9278275SEric Cheng return (err); 9288275SEric Cheng 9298275SEric Cheng mac_perim_enter_by_mh(mh, mphp); 9308275SEric Cheng MAC_ENCODE_MPH(*mphp, mh, 1); 9318275SEric Cheng return (0); 9328275SEric Cheng } 9338275SEric Cheng 9348275SEric Cheng int 9358275SEric Cheng mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp) 9368275SEric Cheng { 9378275SEric Cheng int err; 9388275SEric Cheng mac_handle_t mh; 9398275SEric Cheng 9408275SEric Cheng if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 9418275SEric Cheng return (err); 9428275SEric Cheng 9438275SEric Cheng mac_perim_enter_by_mh(mh, mphp); 9448275SEric Cheng MAC_ENCODE_MPH(*mphp, mh, 1); 9458275SEric Cheng return (0); 9468275SEric Cheng } 9478275SEric Cheng 9488275SEric Cheng void 9498275SEric Cheng mac_perim_exit(mac_perim_handle_t mph) 9508275SEric Cheng { 9518275SEric Cheng mac_impl_t *mip; 9528275SEric Cheng boolean_t need_close; 9538275SEric Cheng 9548275SEric Cheng MAC_DECODE_MPH(mph, mip, need_close); 9558275SEric Cheng i_mac_perim_exit(mip); 9568275SEric Cheng if (need_close) 9578275SEric Cheng mac_close((mac_handle_t)mip); 9588275SEric Cheng } 9598275SEric Cheng 9608275SEric Cheng int 9615895Syz147064 mac_hold(const char *macname, mac_impl_t **pmip) 9620Sstevel@tonic-gate { 9630Sstevel@tonic-gate mac_impl_t *mip; 9640Sstevel@tonic-gate int err; 9650Sstevel@tonic-gate 9660Sstevel@tonic-gate /* 9670Sstevel@tonic-gate * Check the device name length to make sure it won't overflow our 9680Sstevel@tonic-gate * buffer. 9690Sstevel@tonic-gate */ 9702311Sseb if (strlen(macname) >= MAXNAMELEN) 9710Sstevel@tonic-gate return (EINVAL); 9720Sstevel@tonic-gate 9730Sstevel@tonic-gate /* 9745895Syz147064 * Look up its entry in the global hash table. 9750Sstevel@tonic-gate */ 9765895Syz147064 rw_enter(&i_mac_impl_lock, RW_WRITER); 9775895Syz147064 err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, 9785895Syz147064 (mod_hash_val_t *)&mip); 9795895Syz147064 9805895Syz147064 if (err != 0) { 9815895Syz147064 rw_exit(&i_mac_impl_lock); 9825895Syz147064 return (ENOENT); 9835895Syz147064 } 9845895Syz147064 9858275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) { 9865895Syz147064 rw_exit(&i_mac_impl_lock); 9875895Syz147064 return (ENOENT); 9885895Syz147064 } 9895895Syz147064 9908275SEric Cheng if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) { 9915895Syz147064 rw_exit(&i_mac_impl_lock); 9925895Syz147064 return (EBUSY); 9935895Syz147064 } 9945895Syz147064 9955895Syz147064 mip->mi_ref++; 9965895Syz147064 rw_exit(&i_mac_impl_lock); 9975895Syz147064 9985895Syz147064 *pmip = mip; 9995895Syz147064 return (0); 10005895Syz147064 } 10015895Syz147064 10028275SEric Cheng void 10035895Syz147064 mac_rele(mac_impl_t *mip) 10045895Syz147064 { 10055895Syz147064 rw_enter(&i_mac_impl_lock, RW_WRITER); 10065895Syz147064 ASSERT(mip->mi_ref != 0); 10078275SEric Cheng if (--mip->mi_ref == 0) { 10088275SEric Cheng ASSERT(mip->mi_nactiveclients == 0 && 10098275SEric Cheng !(mip->mi_state_flags & MIS_EXCLUSIVE)); 10105895Syz147064 } 10115895Syz147064 rw_exit(&i_mac_impl_lock); 10125895Syz147064 } 10135895Syz147064 10148275SEric Cheng /* 10158275SEric Cheng * This function is called only by mac_client_open. 10168275SEric Cheng */ 10175895Syz147064 int 10188275SEric Cheng mac_start(mac_impl_t *mip) 10190Sstevel@tonic-gate { 10208275SEric Cheng int err = 0; 10218275SEric Cheng 10228275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 10232311Sseb ASSERT(mip->mi_start != NULL); 10240Sstevel@tonic-gate 10250Sstevel@tonic-gate /* 10260Sstevel@tonic-gate * Check whether the device is already started. 10270Sstevel@tonic-gate */ 10288275SEric Cheng if (mip->mi_active++ == 0) { 10298275SEric Cheng mac_ring_t *ring = NULL; 10308275SEric Cheng 10318275SEric Cheng /* 10328275SEric Cheng * Start the device. 10338275SEric Cheng */ 10348275SEric Cheng err = mip->mi_start(mip->mi_driver); 10358275SEric Cheng if (err != 0) { 10368275SEric Cheng mip->mi_active--; 10378275SEric Cheng return (err); 10388275SEric Cheng } 10398275SEric Cheng 10400Sstevel@tonic-gate /* 10418275SEric Cheng * Start the default tx ring. 10420Sstevel@tonic-gate */ 10438275SEric Cheng if (mip->mi_default_tx_ring != NULL) { 10448275SEric Cheng 10458275SEric Cheng ring = (mac_ring_t *)mip->mi_default_tx_ring; 10468275SEric Cheng err = mac_start_ring(ring); 10478275SEric Cheng if (err != 0) { 10488275SEric Cheng mip->mi_active--; 10498275SEric Cheng return (err); 10508275SEric Cheng } 10518275SEric Cheng ring->mr_state = MR_INUSE; 10528275SEric Cheng } 10538275SEric Cheng 10548275SEric Cheng if (mip->mi_rx_groups != NULL) { 10558275SEric Cheng /* 10568275SEric Cheng * Start the default ring, since it will be needed 10578275SEric Cheng * to receive broadcast and multicast traffic for 10588275SEric Cheng * both primary and non-primary MAC clients. 10598275SEric Cheng */ 10608275SEric Cheng mac_group_t *grp = &mip->mi_rx_groups[0]; 10618275SEric Cheng 10628275SEric Cheng ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 10638275SEric Cheng err = mac_start_group_and_rings(grp); 10648275SEric Cheng if (err != 0) { 10658275SEric Cheng mip->mi_active--; 10668275SEric Cheng if (ring != NULL) { 10678275SEric Cheng mac_stop_ring(ring); 10688275SEric Cheng ring->mr_state = MR_FREE; 10698275SEric Cheng } 10708275SEric Cheng return (err); 10718275SEric Cheng } 10728275SEric Cheng mac_set_rx_group_state(grp, MAC_GROUP_STATE_SHARED); 10738275SEric Cheng } 10740Sstevel@tonic-gate } 10750Sstevel@tonic-gate 10760Sstevel@tonic-gate return (err); 10770Sstevel@tonic-gate } 10780Sstevel@tonic-gate 10798275SEric Cheng /* 10808275SEric Cheng * This function is called only by mac_client_close. 10818275SEric Cheng */ 10820Sstevel@tonic-gate void 10838275SEric Cheng mac_stop(mac_impl_t *mip) 10840Sstevel@tonic-gate { 10852311Sseb ASSERT(mip->mi_stop != NULL); 10868275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 10870Sstevel@tonic-gate 10880Sstevel@tonic-gate /* 10890Sstevel@tonic-gate * Check whether the device is still needed. 10900Sstevel@tonic-gate */ 10910Sstevel@tonic-gate ASSERT(mip->mi_active != 0); 10928275SEric Cheng if (--mip->mi_active == 0) { 10938275SEric Cheng if (mip->mi_rx_groups != NULL) { 10940Sstevel@tonic-gate /* 10958275SEric Cheng * There should be no more active clients since the 10968275SEric Cheng * MAC is being stopped. Stop the default RX group 10978275SEric Cheng * and transition it back to registered state. 10980Sstevel@tonic-gate */ 10998275SEric Cheng mac_group_t *grp = &mip->mi_rx_groups[0]; 11000Sstevel@tonic-gate 11010Sstevel@tonic-gate /* 11028275SEric Cheng * When clients are torn down, the groups 11038275SEric Cheng * are release via mac_release_rx_group which 11048275SEric Cheng * knows the the default group is always in 11058275SEric Cheng * started mode since broadcast uses it. So 11068275SEric Cheng * we can assert that their are no clients 11078275SEric Cheng * (since mac_bcast_add doesn't register itself 11088275SEric Cheng * as a client) and group is in SHARED state. 11090Sstevel@tonic-gate */ 11108275SEric Cheng ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); 11118275SEric Cheng ASSERT(MAC_RX_GROUP_NO_CLIENT(grp) && 11128275SEric Cheng mip->mi_nactiveclients == 0); 11138275SEric Cheng mac_stop_group_and_rings(grp); 11148275SEric Cheng mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); 11150Sstevel@tonic-gate } 11168275SEric Cheng 11178275SEric Cheng if (mip->mi_default_tx_ring != NULL) { 11188275SEric Cheng mac_ring_t *ring; 11198275SEric Cheng 11208275SEric Cheng ring = (mac_ring_t *)mip->mi_default_tx_ring; 11218275SEric Cheng mac_stop_ring(ring); 11228275SEric Cheng ring->mr_state = MR_FREE; 11238275SEric Cheng } 11248275SEric Cheng 11258275SEric Cheng /* 11268275SEric Cheng * Stop the device. 11278275SEric Cheng */ 11288275SEric Cheng mip->mi_stop(mip->mi_driver); 11292331Skrgopi } 11302331Skrgopi } 11312331Skrgopi 11320Sstevel@tonic-gate int 11338275SEric Cheng i_mac_promisc_set(mac_impl_t *mip, boolean_t on, mac_promisc_type_t ptype) 11340Sstevel@tonic-gate { 11350Sstevel@tonic-gate int err = 0; 11360Sstevel@tonic-gate 11378275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 11382311Sseb ASSERT(mip->mi_setpromisc != NULL); 11390Sstevel@tonic-gate ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC); 11400Sstevel@tonic-gate 11410Sstevel@tonic-gate /* 11420Sstevel@tonic-gate * Determine whether we should enable or disable promiscuous mode. 11430Sstevel@tonic-gate * For details on the distinction between "device promiscuous mode" 11440Sstevel@tonic-gate * and "MAC promiscuous mode", see PSARC/2005/289. 11450Sstevel@tonic-gate */ 11460Sstevel@tonic-gate if (on) { 11470Sstevel@tonic-gate /* 11480Sstevel@tonic-gate * Enable promiscuous mode on the device if not yet enabled. 11490Sstevel@tonic-gate */ 11500Sstevel@tonic-gate if (mip->mi_devpromisc++ == 0) { 11512311Sseb err = mip->mi_setpromisc(mip->mi_driver, B_TRUE); 11522311Sseb if (err != 0) { 11530Sstevel@tonic-gate mip->mi_devpromisc--; 11548275SEric Cheng return (err); 11550Sstevel@tonic-gate } 11560Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 11570Sstevel@tonic-gate } 11580Sstevel@tonic-gate 11590Sstevel@tonic-gate /* 11600Sstevel@tonic-gate * Enable promiscuous mode on the MAC if not yet enabled. 11610Sstevel@tonic-gate */ 11620Sstevel@tonic-gate if (ptype == MAC_PROMISC && mip->mi_promisc++ == 0) 11630Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_PROMISC); 11640Sstevel@tonic-gate } else { 11658275SEric Cheng if (mip->mi_devpromisc == 0) 11668275SEric Cheng return (EPROTO); 11678275SEric Cheng 11680Sstevel@tonic-gate /* 11690Sstevel@tonic-gate * Disable promiscuous mode on the device if this is the last 11700Sstevel@tonic-gate * enabling. 11710Sstevel@tonic-gate */ 11720Sstevel@tonic-gate if (--mip->mi_devpromisc == 0) { 11732311Sseb err = mip->mi_setpromisc(mip->mi_driver, B_FALSE); 11742311Sseb if (err != 0) { 11750Sstevel@tonic-gate mip->mi_devpromisc++; 11768275SEric Cheng return (err); 11770Sstevel@tonic-gate } 11780Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 11790Sstevel@tonic-gate } 11800Sstevel@tonic-gate 11810Sstevel@tonic-gate /* 11820Sstevel@tonic-gate * Disable promiscuous mode on the MAC if this is the last 11830Sstevel@tonic-gate * enabling. 11840Sstevel@tonic-gate */ 11850Sstevel@tonic-gate if (ptype == MAC_PROMISC && --mip->mi_promisc == 0) 11860Sstevel@tonic-gate i_mac_notify(mip, MAC_NOTE_PROMISC); 11870Sstevel@tonic-gate } 11880Sstevel@tonic-gate 11898275SEric Cheng return (0); 11900Sstevel@tonic-gate } 11910Sstevel@tonic-gate 11928275SEric Cheng int 11938275SEric Cheng mac_promisc_set(mac_handle_t mh, boolean_t on, mac_promisc_type_t ptype) 11948275SEric Cheng { 11958275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 11968275SEric Cheng int rv; 11978275SEric Cheng 11988275SEric Cheng i_mac_perim_enter(mip); 11998275SEric Cheng rv = i_mac_promisc_set(mip, on, ptype); 12008275SEric Cheng i_mac_perim_exit(mip); 12018275SEric Cheng 12028275SEric Cheng return (rv); 12038275SEric Cheng } 12048275SEric Cheng 12058275SEric Cheng /* 12068275SEric Cheng * The promiscuity state can change any time. If the caller needs to take 12078275SEric Cheng * actions that are atomic with the promiscuity state, then the caller needs 12088275SEric Cheng * to bracket the entire sequence with mac_perim_enter/exit 12098275SEric Cheng */ 12100Sstevel@tonic-gate boolean_t 12110Sstevel@tonic-gate mac_promisc_get(mac_handle_t mh, mac_promisc_type_t ptype) 12120Sstevel@tonic-gate { 12130Sstevel@tonic-gate mac_impl_t *mip = (mac_impl_t *)mh; 12140Sstevel@tonic-gate 12150Sstevel@tonic-gate ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC); 12160Sstevel@tonic-gate 12170Sstevel@tonic-gate /* 12180Sstevel@tonic-gate * Return the current promiscuity. 12190Sstevel@tonic-gate */ 12200Sstevel@tonic-gate if (ptype == MAC_DEVPROMISC) 12210Sstevel@tonic-gate return (mip->mi_devpromisc != 0); 12220Sstevel@tonic-gate else 12230Sstevel@tonic-gate return (mip->mi_promisc != 0); 12240Sstevel@tonic-gate } 12250Sstevel@tonic-gate 12268275SEric Cheng /* 12278275SEric Cheng * Invoked at MAC instance attach time to initialize the list 12288275SEric Cheng * of factory MAC addresses supported by a MAC instance. This function 12298275SEric Cheng * builds a local cache in the mac_impl_t for the MAC addresses 12308275SEric Cheng * supported by the underlying hardware. The MAC clients themselves 12318275SEric Cheng * use the mac_addr_factory*() functions to query and reserve 12328275SEric Cheng * factory MAC addresses. 12338275SEric Cheng */ 12340Sstevel@tonic-gate void 12358275SEric Cheng mac_addr_factory_init(mac_impl_t *mip) 12365903Ssowmini { 12378275SEric Cheng mac_capab_multifactaddr_t capab; 12388275SEric Cheng uint8_t *addr; 12398275SEric Cheng int i; 12400Sstevel@tonic-gate 12410Sstevel@tonic-gate /* 12428275SEric Cheng * First round to see how many factory MAC addresses are available. 12430Sstevel@tonic-gate */ 12448275SEric Cheng bzero(&capab, sizeof (capab)); 12458275SEric Cheng if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR, 12468275SEric Cheng &capab) || (capab.mcm_naddr == 0)) { 12476512Ssowmini /* 12488275SEric Cheng * The MAC instance doesn't support multiple factory 12498275SEric Cheng * MAC addresses, we're done here. 12506512Ssowmini */ 12516512Ssowmini return; 12525903Ssowmini } 12536512Ssowmini 12540Sstevel@tonic-gate /* 12558275SEric Cheng * Allocate the space and get all the factory addresses. 125656Smeem */ 12578275SEric Cheng addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP); 12588275SEric Cheng capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr); 12598275SEric Cheng 12608275SEric Cheng mip->mi_factory_addr_num = capab.mcm_naddr; 12618275SEric Cheng mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num * 12628275SEric Cheng sizeof (mac_factory_addr_t), KM_SLEEP); 12638275SEric Cheng 12648275SEric Cheng for (i = 0; i < capab.mcm_naddr; i++) { 12658275SEric Cheng bcopy(addr + i * MAXMACADDRLEN, 12668275SEric Cheng mip->mi_factory_addr[i].mfa_addr, 12678275SEric Cheng mip->mi_type->mt_addr_length); 12688275SEric Cheng mip->mi_factory_addr[i].mfa_in_use = B_FALSE; 126956Smeem } 127056Smeem 12718275SEric Cheng kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN); 12728275SEric Cheng } 12738275SEric Cheng 12748275SEric Cheng void 12758275SEric Cheng mac_addr_factory_fini(mac_impl_t *mip) 12768275SEric Cheng { 12778275SEric Cheng if (mip->mi_factory_addr == NULL) { 12788275SEric Cheng ASSERT(mip->mi_factory_addr_num == 0); 12798275SEric Cheng return; 12808275SEric Cheng } 12818275SEric Cheng 12828275SEric Cheng kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num * 12838275SEric Cheng sizeof (mac_factory_addr_t)); 12848275SEric Cheng 12858275SEric Cheng mip->mi_factory_addr = NULL; 12868275SEric Cheng mip->mi_factory_addr_num = 0; 12870Sstevel@tonic-gate } 12880Sstevel@tonic-gate 12895084Sjohnlev /* 12908275SEric Cheng * Reserve a factory MAC address. If *slot is set to -1, the function 12918275SEric Cheng * attempts to reserve any of the available factory MAC addresses and 12928275SEric Cheng * returns the reserved slot id. If no slots are available, the function 12938275SEric Cheng * returns ENOSPC. If *slot is not set to -1, the function reserves 12948275SEric Cheng * the specified slot if it is available, or returns EBUSY is the slot 12958275SEric Cheng * is already used. Returns ENOTSUP if the underlying MAC does not 12968275SEric Cheng * support multiple factory addresses. If the slot number is not -1 but 12978275SEric Cheng * is invalid, returns EINVAL. 12988275SEric Cheng */ 12998275SEric Cheng int 13008275SEric Cheng mac_addr_factory_reserve(mac_client_handle_t mch, int *slot) 13018275SEric Cheng { 13028275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 13038275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 13048275SEric Cheng int i, ret = 0; 13058275SEric Cheng 13068275SEric Cheng i_mac_perim_enter(mip); 13078275SEric Cheng /* 13088275SEric Cheng * Protect against concurrent readers that may need a self-consistent 13098275SEric Cheng * view of the factory addresses 13108275SEric Cheng */ 13118275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_WRITER); 13128275SEric Cheng 13138275SEric Cheng if (mip->mi_factory_addr_num == 0) { 13148275SEric Cheng ret = ENOTSUP; 13158275SEric Cheng goto bail; 13168275SEric Cheng } 13178275SEric Cheng 13188275SEric Cheng if (*slot != -1) { 13198275SEric Cheng /* check the specified slot */ 13208275SEric Cheng if (*slot < 1 || *slot > mip->mi_factory_addr_num) { 13218275SEric Cheng ret = EINVAL; 13228275SEric Cheng goto bail; 13238275SEric Cheng } 13248275SEric Cheng if (mip->mi_factory_addr[*slot-1].mfa_in_use) { 13258275SEric Cheng ret = EBUSY; 13268275SEric Cheng goto bail; 13278275SEric Cheng } 13288275SEric Cheng } else { 13298275SEric Cheng /* pick the next available slot */ 13308275SEric Cheng for (i = 0; i < mip->mi_factory_addr_num; i++) { 13318275SEric Cheng if (!mip->mi_factory_addr[i].mfa_in_use) 13328275SEric Cheng break; 13338275SEric Cheng } 13348275SEric Cheng 13358275SEric Cheng if (i == mip->mi_factory_addr_num) { 13368275SEric Cheng ret = ENOSPC; 13378275SEric Cheng goto bail; 13388275SEric Cheng } 13398275SEric Cheng *slot = i+1; 13408275SEric Cheng } 13418275SEric Cheng 13428275SEric Cheng mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE; 13438275SEric Cheng mip->mi_factory_addr[*slot-1].mfa_client = mcip; 13448275SEric Cheng 13458275SEric Cheng bail: 13468275SEric Cheng rw_exit(&mip->mi_rw_lock); 13478275SEric Cheng i_mac_perim_exit(mip); 13488275SEric Cheng return (ret); 13498275SEric Cheng } 13508275SEric Cheng 13518275SEric Cheng /* 13528275SEric Cheng * Release the specified factory MAC address slot. 13535084Sjohnlev */ 13548275SEric Cheng void 13558275SEric Cheng mac_addr_factory_release(mac_client_handle_t mch, uint_t slot) 13568275SEric Cheng { 13578275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 13588275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 13598275SEric Cheng 13608275SEric Cheng i_mac_perim_enter(mip); 13618275SEric Cheng /* 13628275SEric Cheng * Protect against concurrent readers that may need a self-consistent 13638275SEric Cheng * view of the factory addresses 13648275SEric Cheng */ 13658275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_WRITER); 13668275SEric Cheng 13678275SEric Cheng ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 13688275SEric Cheng ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use); 13698275SEric Cheng 13708275SEric Cheng mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE; 13718275SEric Cheng 13728275SEric Cheng rw_exit(&mip->mi_rw_lock); 13738275SEric Cheng i_mac_perim_exit(mip); 13748275SEric Cheng } 13758275SEric Cheng 13768275SEric Cheng /* 13778275SEric Cheng * Stores in mac_addr the value of the specified MAC address. Returns 13788275SEric Cheng * 0 on success, or EINVAL if the slot number is not valid for the MAC. 13798275SEric Cheng * The caller must provide a string of at least MAXNAMELEN bytes. 13808275SEric Cheng */ 13818275SEric Cheng void 13828275SEric Cheng mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr, 13838275SEric Cheng uint_t *addr_len, char *client_name, boolean_t *in_use_arg) 13845084Sjohnlev { 13858275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 13868275SEric Cheng boolean_t in_use; 13878275SEric Cheng 13888275SEric Cheng ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 13898275SEric Cheng 13908275SEric Cheng /* 13918275SEric Cheng * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter 13928275SEric Cheng * and mi_rw_lock 13938275SEric Cheng */ 13948275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_READER); 13958275SEric Cheng bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN); 13968275SEric Cheng *addr_len = mip->mi_type->mt_addr_length; 13978275SEric Cheng in_use = mip->mi_factory_addr[slot-1].mfa_in_use; 13988275SEric Cheng if (in_use && client_name != NULL) { 13998275SEric Cheng bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name, 14008275SEric Cheng client_name, MAXNAMELEN); 14018275SEric Cheng } 14028275SEric Cheng if (in_use_arg != NULL) 14038275SEric Cheng *in_use_arg = in_use; 14048275SEric Cheng rw_exit(&mip->mi_rw_lock); 14058275SEric Cheng } 14068275SEric Cheng 14078275SEric Cheng /* 14088275SEric Cheng * Returns the number of factory MAC addresses (in addition to the 14098275SEric Cheng * primary MAC address), 0 if the underlying MAC doesn't support 14108275SEric Cheng * that feature. 14118275SEric Cheng */ 14128275SEric Cheng uint_t 14138275SEric Cheng mac_addr_factory_num(mac_handle_t mh) 14148275SEric Cheng { 14158275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 14168275SEric Cheng 14178275SEric Cheng return (mip->mi_factory_addr_num); 14188275SEric Cheng } 14198275SEric Cheng 14208275SEric Cheng 14218275SEric Cheng void 14228275SEric Cheng mac_rx_group_unmark(mac_group_t *grp, uint_t flag) 14238275SEric Cheng { 14248275SEric Cheng mac_ring_t *ring; 14258275SEric Cheng 14268275SEric Cheng for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) 14278275SEric Cheng ring->mr_flag &= ~flag; 14285084Sjohnlev } 14295084Sjohnlev 14305084Sjohnlev /* 14318275SEric Cheng * The following mac_hwrings_xxx() functions are private mac client functions 14328275SEric Cheng * used by the aggr driver to access and control the underlying HW Rx group 14338275SEric Cheng * and rings. In this case, the aggr driver has exclusive control of the 14348275SEric Cheng * underlying HW Rx group/rings, it calls the following functions to 14358275SEric Cheng * start/stop the HW Rx rings, disable/enable polling, add/remove mac' 14368275SEric Cheng * addresses, or set up the Rx callback. 14375084Sjohnlev */ 14388275SEric Cheng /* ARGSUSED */ 14398275SEric Cheng static void 14408275SEric Cheng mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs, 14418275SEric Cheng mblk_t *mp_chain, boolean_t loopback) 14420Sstevel@tonic-gate { 14438275SEric Cheng mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)srs; 14448275SEric Cheng mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; 14458275SEric Cheng mac_direct_rx_t proc; 14468275SEric Cheng void *arg1; 14478275SEric Cheng mac_resource_handle_t arg2; 14488275SEric Cheng 14498275SEric Cheng proc = srs_rx->sr_func; 14508275SEric Cheng arg1 = srs_rx->sr_arg1; 14518275SEric Cheng arg2 = mac_srs->srs_mrh; 14528275SEric Cheng 14538275SEric Cheng proc(arg1, arg2, mp_chain, NULL); 14540Sstevel@tonic-gate } 14550Sstevel@tonic-gate 14568275SEric Cheng /* 14578275SEric Cheng * This function is called to get the list of HW rings that are reserved by 14588275SEric Cheng * an exclusive mac client. 14598275SEric Cheng * 14608275SEric Cheng * Return value: the number of HW rings. 14618275SEric Cheng */ 14628275SEric Cheng int 14638275SEric Cheng mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, 14648275SEric Cheng mac_ring_handle_t *hwrh) 14650Sstevel@tonic-gate { 14668275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 14678275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 14688275SEric Cheng mac_group_t *grp = flent->fe_rx_ring_group; 14698275SEric Cheng mac_ring_t *ring; 14708275SEric Cheng int cnt = 0; 14710Sstevel@tonic-gate 14720Sstevel@tonic-gate /* 14738275SEric Cheng * The mac client did not reserve any RX group, return directly. 14748275SEric Cheng * This is probably because the underlying MAC does not support 14758275SEric Cheng * any RX groups. 14768275SEric Cheng */ 14778275SEric Cheng *hwgh = NULL; 14788275SEric Cheng if (grp == NULL) 14798275SEric Cheng return (0); 14808275SEric Cheng 14818275SEric Cheng /* 14828275SEric Cheng * This RX group must be reserved by this mac client. 14830Sstevel@tonic-gate */ 14848275SEric Cheng ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && 14858275SEric Cheng (mch == (mac_client_handle_t)(MAC_RX_GROUP_ONLY_CLIENT(grp)))); 14868275SEric Cheng 14878275SEric Cheng for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) { 14888275SEric Cheng ASSERT(cnt < MAX_RINGS_PER_GROUP); 14898275SEric Cheng hwrh[cnt++] = (mac_ring_handle_t)ring; 14908275SEric Cheng } 14918275SEric Cheng *hwgh = (mac_group_handle_t)grp; 14928275SEric Cheng return (cnt); 14938275SEric Cheng } 14948275SEric Cheng 14958275SEric Cheng /* 14968275SEric Cheng * Setup the RX callback of the mac client which exclusively controls HW ring. 14978275SEric Cheng */ 14988275SEric Cheng void 14998275SEric Cheng mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh) 15008275SEric Cheng { 15018275SEric Cheng mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 15028275SEric Cheng mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 15038275SEric Cheng 15048275SEric Cheng mac_srs->srs_mrh = prh; 15058275SEric Cheng mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; 15060Sstevel@tonic-gate } 15070Sstevel@tonic-gate 15080Sstevel@tonic-gate void 15098275SEric Cheng mac_hwring_teardown(mac_ring_handle_t hwrh) 15108275SEric Cheng { 15118275SEric Cheng mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 15128275SEric Cheng mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 15138275SEric Cheng 15148275SEric Cheng mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; 15158275SEric Cheng mac_srs->srs_mrh = NULL; 15168275SEric Cheng } 15178275SEric Cheng 15188275SEric Cheng int 15198275SEric Cheng mac_hwring_disable_intr(mac_ring_handle_t rh) 15200Sstevel@tonic-gate { 15218275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15228275SEric Cheng mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 15238275SEric Cheng 15248275SEric Cheng return (intr->mi_disable(intr->mi_handle)); 15258275SEric Cheng } 15268275SEric Cheng 15278275SEric Cheng int 15288275SEric Cheng mac_hwring_enable_intr(mac_ring_handle_t rh) 15298275SEric Cheng { 15308275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15318275SEric Cheng mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 15328275SEric Cheng 15338275SEric Cheng return (intr->mi_enable(intr->mi_handle)); 15348275SEric Cheng } 15358275SEric Cheng 15368275SEric Cheng int 15378275SEric Cheng mac_hwring_start(mac_ring_handle_t rh) 15388275SEric Cheng { 15398275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15408275SEric Cheng 15418275SEric Cheng MAC_RING_UNMARK(rr_ring, MR_QUIESCE); 15428275SEric Cheng return (0); 15430Sstevel@tonic-gate } 15440Sstevel@tonic-gate 15450Sstevel@tonic-gate void 15468275SEric Cheng mac_hwring_stop(mac_ring_handle_t rh) 15478275SEric Cheng { 15488275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15498275SEric Cheng 15508275SEric Cheng mac_rx_ring_quiesce(rr_ring, MR_QUIESCE); 15518275SEric Cheng } 15528275SEric Cheng 15538275SEric Cheng mblk_t * 15548275SEric Cheng mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) 15558275SEric Cheng { 15568275SEric Cheng mac_ring_t *rr_ring = (mac_ring_t *)rh; 15578275SEric Cheng mac_ring_info_t *info = &rr_ring->mr_info; 15588275SEric Cheng 15598275SEric Cheng return (info->mri_poll(info->mri_driver, bytes_to_pickup)); 15608275SEric Cheng } 15618275SEric Cheng 15628275SEric Cheng int 15638275SEric Cheng mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) 15648275SEric Cheng { 15658275SEric Cheng mac_group_t *group = (mac_group_t *)gh; 15668275SEric Cheng 15678275SEric Cheng return (mac_group_addmac(group, addr)); 15688275SEric Cheng } 15698275SEric Cheng 15708275SEric Cheng int 15718275SEric Cheng mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) 15728275SEric Cheng { 15738275SEric Cheng mac_group_t *group = (mac_group_t *)gh; 15748275SEric Cheng 15758275SEric Cheng return (mac_group_remmac(group, addr)); 15768275SEric Cheng } 15778275SEric Cheng 15788275SEric Cheng /* 15798275SEric Cheng * Set the RX group to be shared/reserved. Note that the group must be 15808275SEric Cheng * started/stopped outside of this function. 15818275SEric Cheng */ 15828275SEric Cheng void 15838275SEric Cheng mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) 15840Sstevel@tonic-gate { 15858275SEric Cheng /* 15868275SEric Cheng * If there is no change in the group state, just return. 15878275SEric Cheng */ 15888275SEric Cheng if (grp->mrg_state == state) 15898275SEric Cheng return; 15908275SEric Cheng 15918275SEric Cheng switch (state) { 15928275SEric Cheng case MAC_GROUP_STATE_RESERVED: 15938275SEric Cheng /* 15948275SEric Cheng * Successfully reserved the group. 15958275SEric Cheng * 15968275SEric Cheng * Given that there is an exclusive client controlling this 15978275SEric Cheng * group, we enable the group level polling when available, 15988275SEric Cheng * so that SRSs get to turn on/off individual rings they's 15998275SEric Cheng * assigned to. 16008275SEric Cheng */ 16018275SEric Cheng ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 16028275SEric Cheng 16038275SEric Cheng if (GROUP_INTR_DISABLE_FUNC(grp) != NULL) 16048275SEric Cheng GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 16058275SEric Cheng 16068275SEric Cheng break; 16078275SEric Cheng 16088275SEric Cheng case MAC_GROUP_STATE_SHARED: 16098275SEric Cheng /* 16108275SEric Cheng * Set all rings of this group to software classified. 16118275SEric Cheng * If the group has an overriding interrupt, then re-enable it. 16128275SEric Cheng */ 16138275SEric Cheng ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 16148275SEric Cheng 16158275SEric Cheng if (GROUP_INTR_ENABLE_FUNC(grp) != NULL) 16168275SEric Cheng GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 16178275SEric Cheng 16188275SEric Cheng /* The ring is not available for reservations any more */ 16198275SEric Cheng break; 16208275SEric Cheng 16218275SEric Cheng case MAC_GROUP_STATE_REGISTERED: 16228275SEric Cheng /* Also callable from mac_register, perim is not held */ 16238275SEric Cheng break; 16248275SEric Cheng 16258275SEric Cheng default: 16268275SEric Cheng ASSERT(B_FALSE); 16278275SEric Cheng break; 16288275SEric Cheng } 16298275SEric Cheng 16308275SEric Cheng grp->mrg_state = state; 16318275SEric Cheng } 16328275SEric Cheng 16338275SEric Cheng /* 16348275SEric Cheng * Quiesce future hardware classified packets for the specified Rx ring 16358275SEric Cheng */ 16368275SEric Cheng static void 16378275SEric Cheng mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag) 16388275SEric Cheng { 16398275SEric Cheng ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER); 16408275SEric Cheng ASSERT(ring_flag == MR_CONDEMNED || ring_flag == MR_QUIESCE); 16418275SEric Cheng 16428275SEric Cheng mutex_enter(&rx_ring->mr_lock); 16438275SEric Cheng rx_ring->mr_flag |= ring_flag; 16448275SEric Cheng while (rx_ring->mr_refcnt != 0) 16458275SEric Cheng cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock); 16468275SEric Cheng mutex_exit(&rx_ring->mr_lock); 16470Sstevel@tonic-gate } 16480Sstevel@tonic-gate 16494913Sethindra /* 16508275SEric Cheng * Please see mac_tx for details about the per cpu locking scheme 16514913Sethindra */ 16528275SEric Cheng static void 16538275SEric Cheng mac_tx_lock_all(mac_client_impl_t *mcip) 16548275SEric Cheng { 16558275SEric Cheng int i; 16568275SEric Cheng 16578275SEric Cheng for (i = 0; i <= mac_tx_percpu_cnt; i++) 16588275SEric Cheng mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 16598275SEric Cheng } 16608275SEric Cheng 16618275SEric Cheng static void 16628275SEric Cheng mac_tx_unlock_all(mac_client_impl_t *mcip) 16638275SEric Cheng { 16648275SEric Cheng int i; 16658275SEric Cheng 16668275SEric Cheng for (i = mac_tx_percpu_cnt; i >= 0; i--) 16678275SEric Cheng mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 16688275SEric Cheng } 16698275SEric Cheng 16708275SEric Cheng static void 16718275SEric Cheng mac_tx_unlock_allbutzero(mac_client_impl_t *mcip) 16728275SEric Cheng { 16738275SEric Cheng int i; 16748275SEric Cheng 16758275SEric Cheng for (i = mac_tx_percpu_cnt; i > 0; i--) 16768275SEric Cheng mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 16778275SEric Cheng } 16788275SEric Cheng 16798275SEric Cheng static int 16808275SEric Cheng mac_tx_sum_refcnt(mac_client_impl_t *mcip) 16810Sstevel@tonic-gate { 16828275SEric Cheng int i; 16838275SEric Cheng int refcnt = 0; 16848275SEric Cheng 16858275SEric Cheng for (i = 0; i <= mac_tx_percpu_cnt; i++) 16868275SEric Cheng refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt; 16878275SEric Cheng 16888275SEric Cheng return (refcnt); 16890Sstevel@tonic-gate } 16900Sstevel@tonic-gate 16918275SEric Cheng /* 16928275SEric Cheng * Stop future Tx packets coming down from the client in preparation for 16938275SEric Cheng * quiescing the Tx side. This is needed for dynamic reclaim and reassignment 16948275SEric Cheng * of rings between clients 16958275SEric Cheng */ 16968275SEric Cheng void 16978275SEric Cheng mac_tx_client_block(mac_client_impl_t *mcip) 16985084Sjohnlev { 16998275SEric Cheng mac_tx_lock_all(mcip); 17008275SEric Cheng mcip->mci_tx_flag |= MCI_TX_QUIESCE; 17018275SEric Cheng while (mac_tx_sum_refcnt(mcip) != 0) { 17028275SEric Cheng mac_tx_unlock_allbutzero(mcip); 17038275SEric Cheng cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock); 17048275SEric Cheng mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock); 17058275SEric Cheng mac_tx_lock_all(mcip); 17068275SEric Cheng } 17078275SEric Cheng mac_tx_unlock_all(mcip); 17085084Sjohnlev } 17095084Sjohnlev 17108275SEric Cheng void 17118275SEric Cheng mac_tx_client_unblock(mac_client_impl_t *mcip) 17125084Sjohnlev { 17138275SEric Cheng mac_tx_lock_all(mcip); 17148275SEric Cheng mcip->mci_tx_flag &= ~MCI_TX_QUIESCE; 17158275SEric Cheng mac_tx_unlock_all(mcip); 1716*8833SVenu.Iyer@Sun.COM /* 1717*8833SVenu.Iyer@Sun.COM * We may fail to disable flow control for the last MAC_NOTE_TX 1718*8833SVenu.Iyer@Sun.COM * notification because the MAC client is quiesced. Send the 1719*8833SVenu.Iyer@Sun.COM * notification again. 1720*8833SVenu.Iyer@Sun.COM */ 1721*8833SVenu.Iyer@Sun.COM i_mac_notify(mcip->mci_mip, MAC_NOTE_TX); 17225084Sjohnlev } 17235084Sjohnlev 17240Sstevel@tonic-gate /* 17258275SEric Cheng * Wait for an SRS to quiesce. The SRS worker will signal us when the 17268275SEric Cheng * quiesce is done. 17278275SEric Cheng */ 17288275SEric Cheng static void 17298275SEric Cheng mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag) 17308275SEric Cheng { 17318275SEric Cheng mutex_enter(&srs->srs_lock); 17328275SEric Cheng while (!(srs->srs_state & srs_flag)) 17338275SEric Cheng cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock); 17348275SEric Cheng mutex_exit(&srs->srs_lock); 17358275SEric Cheng } 17368275SEric Cheng 17378275SEric Cheng /* 17388275SEric Cheng * Quiescing an Rx SRS is achieved by the following sequence. The protocol 17398275SEric Cheng * works bottom up by cutting off packet flow from the bottommost point in the 17408275SEric Cheng * mac, then the SRS, and then the soft rings. There are 2 use cases of this 17418275SEric Cheng * mechanism. One is a temporary quiesce of the SRS, such as say while changing 17428275SEric Cheng * the Rx callbacks. Another use case is Rx SRS teardown. In the former case 17438275SEric Cheng * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used 17448275SEric Cheng * for the SRS and MR flags. In the former case the threads pause waiting for 17458275SEric Cheng * a restart, while in the latter case the threads exit. The Tx SRS teardown 17468275SEric Cheng * is also mostly similar to the above. 17478275SEric Cheng * 17488275SEric Cheng * 1. Stop future hardware classified packets at the lowest level in the mac. 17498275SEric Cheng * Remove any hardware classification rule (CONDEMNED case) and mark the 17508275SEric Cheng * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt 17518275SEric Cheng * from increasing. Upcalls from the driver that come through hardware 17528275SEric Cheng * classification will be dropped in mac_rx from now on. Then we wait for 17538275SEric Cheng * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are 17548275SEric Cheng * sure there aren't any upcall threads from the driver through hardware 17558275SEric Cheng * classification. In the case of SRS teardown we also remove the 17568275SEric Cheng * classification rule in the driver. 17578275SEric Cheng * 17588275SEric Cheng * 2. Stop future software classified packets by marking the flow entry with 17598275SEric Cheng * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from 17608275SEric Cheng * increasing. We also remove the flow entry from the table in the latter 17618275SEric Cheng * case. Then wait for the fe_refcnt to reach an appropriate quiescent value 17628275SEric Cheng * that indicates there aren't any active threads using that flow entry. 17638275SEric Cheng * 17648275SEric Cheng * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread, 17658275SEric Cheng * SRS worker thread, and the soft ring threads are quiesced in sequence 17668275SEric Cheng * with the SRS worker thread serving as a master controller. This 17678275SEric Cheng * mechansim is explained in mac_srs_worker_quiesce(). 17688275SEric Cheng * 17698275SEric Cheng * The restart mechanism to reactivate the SRS and softrings is explained 17708275SEric Cheng * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the 17718275SEric Cheng * restart sequence. 17720Sstevel@tonic-gate */ 17730Sstevel@tonic-gate void 17748275SEric Cheng mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 17750Sstevel@tonic-gate { 17768275SEric Cheng flow_entry_t *flent = srs->srs_flent; 17778275SEric Cheng uint_t mr_flag, srs_done_flag; 17788275SEric Cheng 17798275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 17808275SEric Cheng ASSERT(!(srs->srs_type & SRST_TX)); 17818275SEric Cheng 17828275SEric Cheng if (srs_quiesce_flag == SRS_CONDEMNED) { 17838275SEric Cheng mr_flag = MR_CONDEMNED; 17848275SEric Cheng srs_done_flag = SRS_CONDEMNED_DONE; 17858275SEric Cheng if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 17868275SEric Cheng mac_srs_client_poll_disable(srs->srs_mcip, srs); 17878275SEric Cheng } else { 17888275SEric Cheng ASSERT(srs_quiesce_flag == SRS_QUIESCE); 17898275SEric Cheng mr_flag = MR_QUIESCE; 17908275SEric Cheng srs_done_flag = SRS_QUIESCE_DONE; 17918275SEric Cheng if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 17928275SEric Cheng mac_srs_client_poll_quiesce(srs->srs_mcip, srs); 17938275SEric Cheng } 17948275SEric Cheng 17958275SEric Cheng if (srs->srs_ring != NULL) { 17968275SEric Cheng mac_rx_ring_quiesce(srs->srs_ring, mr_flag); 17978275SEric Cheng } else { 17988275SEric Cheng /* 17998275SEric Cheng * SRS is driven by software classification. In case 18008275SEric Cheng * of CONDEMNED, the top level teardown functions will 18018275SEric Cheng * deal with flow removal. 18028275SEric Cheng */ 18038275SEric Cheng if (srs_quiesce_flag != SRS_CONDEMNED) { 18048275SEric Cheng FLOW_MARK(flent, FE_QUIESCE); 18058275SEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 18068275SEric Cheng } 18078275SEric Cheng } 18080Sstevel@tonic-gate 18090Sstevel@tonic-gate /* 18108275SEric Cheng * Signal the SRS to quiesce itself, and then cv_wait for the 18118275SEric Cheng * SRS quiesce to complete. The SRS worker thread will wake us 18128275SEric Cheng * up when the quiesce is complete 18134913Sethindra */ 18148275SEric Cheng mac_srs_signal(srs, srs_quiesce_flag); 18158275SEric Cheng mac_srs_quiesce_wait(srs, srs_done_flag); 18164913Sethindra } 18174913Sethindra 18184913Sethindra /* 18198275SEric Cheng * Remove an SRS. 18204913Sethindra */ 18214913Sethindra void 18228275SEric Cheng mac_rx_srs_remove(mac_soft_ring_set_t *srs) 18234913Sethindra { 18248275SEric Cheng flow_entry_t *flent = srs->srs_flent; 18258275SEric Cheng int i; 18268275SEric Cheng 18278275SEric Cheng mac_rx_srs_quiesce(srs, SRS_CONDEMNED); 18288275SEric Cheng /* 18298275SEric Cheng * Locate and remove our entry in the fe_rx_srs[] array, and 18308275SEric Cheng * adjust the fe_rx_srs array entries and array count by 18318275SEric Cheng * moving the last entry into the vacated spot. 18328275SEric Cheng */ 18338275SEric Cheng mutex_enter(&flent->fe_lock); 18348275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 18358275SEric Cheng if (flent->fe_rx_srs[i] == srs) 18368275SEric Cheng break; 18374913Sethindra } 18388275SEric Cheng 18398275SEric Cheng ASSERT(i != 0 && i < flent->fe_rx_srs_cnt); 18408275SEric Cheng if (i != flent->fe_rx_srs_cnt - 1) { 18418275SEric Cheng flent->fe_rx_srs[i] = 18428275SEric Cheng flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1]; 18438275SEric Cheng i = flent->fe_rx_srs_cnt - 1; 18448275SEric Cheng } 18458275SEric Cheng 18468275SEric Cheng flent->fe_rx_srs[i] = NULL; 18478275SEric Cheng flent->fe_rx_srs_cnt--; 18488275SEric Cheng mutex_exit(&flent->fe_lock); 18498275SEric Cheng 18508275SEric Cheng mac_srs_free(srs); 18510Sstevel@tonic-gate } 18520Sstevel@tonic-gate 18538275SEric Cheng static void 18548275SEric Cheng mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag) 18550Sstevel@tonic-gate { 18568275SEric Cheng mutex_enter(&srs->srs_lock); 18578275SEric Cheng srs->srs_state &= ~flag; 18588275SEric Cheng mutex_exit(&srs->srs_lock); 18598275SEric Cheng } 18608275SEric Cheng 18618275SEric Cheng void 18628275SEric Cheng mac_rx_srs_restart(mac_soft_ring_set_t *srs) 18638275SEric Cheng { 18648275SEric Cheng flow_entry_t *flent = srs->srs_flent; 18658275SEric Cheng mac_ring_t *mr; 18668275SEric Cheng 18678275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 18688275SEric Cheng ASSERT((srs->srs_type & SRST_TX) == 0); 18690Sstevel@tonic-gate 18700Sstevel@tonic-gate /* 18718275SEric Cheng * This handles a change in the number of SRSs between the quiesce and 18728275SEric Cheng * and restart operation of a flow. 18738275SEric Cheng */ 18748275SEric Cheng if (!SRS_QUIESCED(srs)) 18758275SEric Cheng return; 18768275SEric Cheng 18778275SEric Cheng /* 18788275SEric Cheng * Signal the SRS to restart itself. Wait for the restart to complete 18798275SEric Cheng * Note that we only restart the SRS if it is not marked as 18808275SEric Cheng * permanently quiesced. 18810Sstevel@tonic-gate */ 18828275SEric Cheng if (!SRS_QUIESCED_PERMANENT(srs)) { 18838275SEric Cheng mac_srs_signal(srs, SRS_RESTART); 18848275SEric Cheng mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 18858275SEric Cheng mac_srs_clear_flag(srs, SRS_RESTART_DONE); 18868275SEric Cheng 18878275SEric Cheng mac_srs_client_poll_restart(srs->srs_mcip, srs); 18888275SEric Cheng } 18898275SEric Cheng 18908275SEric Cheng /* Finally clear the flags to let the packets in */ 18918275SEric Cheng mr = srs->srs_ring; 18928275SEric Cheng if (mr != NULL) { 18938275SEric Cheng MAC_RING_UNMARK(mr, MR_QUIESCE); 18948275SEric Cheng /* In case the ring was stopped, safely restart it */ 18958275SEric Cheng (void) mac_start_ring(mr); 18968275SEric Cheng } else { 18978275SEric Cheng FLOW_UNMARK(flent, FE_QUIESCE); 18988275SEric Cheng } 18998275SEric Cheng } 19008275SEric Cheng 19018275SEric Cheng /* 19028275SEric Cheng * Temporary quiesce of a flow and associated Rx SRS. 19038275SEric Cheng * Please see block comment above mac_rx_classify_flow_rem. 19048275SEric Cheng */ 19058275SEric Cheng /* ARGSUSED */ 19068275SEric Cheng int 19078275SEric Cheng mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg) 19088275SEric Cheng { 19098275SEric Cheng int i; 19108275SEric Cheng 19118275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 19128275SEric Cheng mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i], 19138275SEric Cheng SRS_QUIESCE); 19148275SEric Cheng } 19158275SEric Cheng return (0); 19160Sstevel@tonic-gate } 19170Sstevel@tonic-gate 19180Sstevel@tonic-gate /* 19198275SEric Cheng * Restart a flow and associated Rx SRS that has been quiesced temporarily 19208275SEric Cheng * Please see block comment above mac_rx_classify_flow_rem 19210Sstevel@tonic-gate */ 19228275SEric Cheng /* ARGSUSED */ 19238275SEric Cheng int 19248275SEric Cheng mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg) 19258275SEric Cheng { 19268275SEric Cheng int i; 19278275SEric Cheng 19288275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) 19298275SEric Cheng mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]); 19308275SEric Cheng 19318275SEric Cheng return (0); 19328275SEric Cheng } 19338275SEric Cheng 19340Sstevel@tonic-gate void 19358275SEric Cheng mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on) 19360Sstevel@tonic-gate { 19378275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 19388275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 19398275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 19408275SEric Cheng mac_soft_ring_set_t *mac_srs; 19418275SEric Cheng int i; 19428275SEric Cheng 19438275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 19448275SEric Cheng 19458275SEric Cheng if (flent == NULL) 19468275SEric Cheng return; 19478275SEric Cheng 19488275SEric Cheng for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 19498275SEric Cheng mac_srs = flent->fe_rx_srs[i]; 19508275SEric Cheng mutex_enter(&mac_srs->srs_lock); 19518275SEric Cheng if (on) 19528275SEric Cheng mac_srs->srs_state |= SRS_QUIESCE_PERM; 19538275SEric Cheng else 19548275SEric Cheng mac_srs->srs_state &= ~SRS_QUIESCE_PERM; 19558275SEric Cheng mutex_exit(&mac_srs->srs_lock); 19560Sstevel@tonic-gate } 19578275SEric Cheng } 19588275SEric Cheng 19598275SEric Cheng void 19608275SEric Cheng mac_rx_client_quiesce(mac_client_handle_t mch) 19618275SEric Cheng { 19628275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 19638275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 19648275SEric Cheng 19658275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 19668275SEric Cheng 19678275SEric Cheng if (MCIP_DATAPATH_SETUP(mcip)) { 19688275SEric Cheng (void) mac_rx_classify_flow_quiesce(mcip->mci_flent, 19698275SEric Cheng NULL); 19708275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 19718275SEric Cheng mac_rx_classify_flow_quiesce, NULL); 19728275SEric Cheng } 19730Sstevel@tonic-gate } 19740Sstevel@tonic-gate 19750Sstevel@tonic-gate void 19768275SEric Cheng mac_rx_client_restart(mac_client_handle_t mch) 19770Sstevel@tonic-gate { 19788275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 19798275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 19808275SEric Cheng 19818275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 19828275SEric Cheng 19838275SEric Cheng if (MCIP_DATAPATH_SETUP(mcip)) { 19848275SEric Cheng (void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL); 19858275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 19868275SEric Cheng mac_rx_classify_flow_restart, NULL); 19878275SEric Cheng } 19888275SEric Cheng } 19898275SEric Cheng 19908275SEric Cheng /* 19918275SEric Cheng * This function only quiesces the Tx SRS and softring worker threads. Callers 19928275SEric Cheng * need to make sure that there aren't any mac client threads doing current or 19938275SEric Cheng * future transmits in the mac before calling this function. 19948275SEric Cheng */ 19958275SEric Cheng void 19968275SEric Cheng mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 19978275SEric Cheng { 19988275SEric Cheng mac_client_impl_t *mcip = srs->srs_mcip; 19998275SEric Cheng 20008275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20018275SEric Cheng 20028275SEric Cheng ASSERT(srs->srs_type & SRST_TX); 20038275SEric Cheng ASSERT(srs_quiesce_flag == SRS_CONDEMNED || 20048275SEric Cheng srs_quiesce_flag == SRS_QUIESCE); 20050Sstevel@tonic-gate 20060Sstevel@tonic-gate /* 20078275SEric Cheng * Signal the SRS to quiesce itself, and then cv_wait for the 20088275SEric Cheng * SRS quiesce to complete. The SRS worker thread will wake us 20098275SEric Cheng * up when the quiesce is complete 20100Sstevel@tonic-gate */ 20118275SEric Cheng mac_srs_signal(srs, srs_quiesce_flag); 20128275SEric Cheng mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ? 20138275SEric Cheng SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE); 20148275SEric Cheng } 20158275SEric Cheng 20168275SEric Cheng void 20178275SEric Cheng mac_tx_srs_restart(mac_soft_ring_set_t *srs) 20188275SEric Cheng { 20198275SEric Cheng /* 20208275SEric Cheng * Resizing the fanout could result in creation of new SRSs. 20218275SEric Cheng * They may not necessarily be in the quiesced state in which 20228275SEric Cheng * case it need be restarted 20238275SEric Cheng */ 20248275SEric Cheng if (!SRS_QUIESCED(srs)) 20258275SEric Cheng return; 20268275SEric Cheng 20278275SEric Cheng mac_srs_signal(srs, SRS_RESTART); 20288275SEric Cheng mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 20298275SEric Cheng mac_srs_clear_flag(srs, SRS_RESTART_DONE); 20300Sstevel@tonic-gate } 20310Sstevel@tonic-gate 20320Sstevel@tonic-gate /* 20338275SEric Cheng * Temporary quiesce of a flow and associated Rx SRS. 20348275SEric Cheng * Please see block comment above mac_rx_srs_quiesce 20350Sstevel@tonic-gate */ 20368275SEric Cheng /* ARGSUSED */ 20378275SEric Cheng int 20388275SEric Cheng mac_tx_flow_quiesce(flow_entry_t *flent, void *arg) 20390Sstevel@tonic-gate { 20402311Sseb /* 20418275SEric Cheng * The fe_tx_srs is null for a subflow on an interface that is 20428275SEric Cheng * not plumbed 20432311Sseb */ 20448275SEric Cheng if (flent->fe_tx_srs != NULL) 20458275SEric Cheng mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE); 20468275SEric Cheng return (0); 20478275SEric Cheng } 20488275SEric Cheng 20498275SEric Cheng /* ARGSUSED */ 20508275SEric Cheng int 20518275SEric Cheng mac_tx_flow_restart(flow_entry_t *flent, void *arg) 20528275SEric Cheng { 20538275SEric Cheng /* 20548275SEric Cheng * The fe_tx_srs is null for a subflow on an interface that is 20558275SEric Cheng * not plumbed 20568275SEric Cheng */ 20578275SEric Cheng if (flent->fe_tx_srs != NULL) 20588275SEric Cheng mac_tx_srs_restart(flent->fe_tx_srs); 20598275SEric Cheng return (0); 20602311Sseb } 20612311Sseb 20622311Sseb void 20638275SEric Cheng mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag) 20648275SEric Cheng { 20658275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20668275SEric Cheng 20678275SEric Cheng mac_tx_client_block(mcip); 20688275SEric Cheng if (MCIP_TX_SRS(mcip) != NULL) { 20698275SEric Cheng mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag); 20708275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 20718275SEric Cheng mac_tx_flow_quiesce, NULL); 20728275SEric Cheng } 20738275SEric Cheng } 20748275SEric Cheng 20758275SEric Cheng void 20768275SEric Cheng mac_tx_client_restart(mac_client_impl_t *mcip) 20772311Sseb { 20788275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20798275SEric Cheng 20808275SEric Cheng mac_tx_client_unblock(mcip); 20818275SEric Cheng if (MCIP_TX_SRS(mcip) != NULL) { 20828275SEric Cheng mac_tx_srs_restart(MCIP_TX_SRS(mcip)); 20838275SEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 20848275SEric Cheng mac_tx_flow_restart, NULL); 20858275SEric Cheng } 20868275SEric Cheng } 20878275SEric Cheng 20888275SEric Cheng void 20898275SEric Cheng mac_tx_client_flush(mac_client_impl_t *mcip) 20908275SEric Cheng { 20918275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 20928275SEric Cheng 20938275SEric Cheng mac_tx_client_quiesce(mcip, SRS_QUIESCE); 20948275SEric Cheng mac_tx_client_restart(mcip); 20958275SEric Cheng } 20968275SEric Cheng 20978275SEric Cheng void 20988275SEric Cheng mac_client_quiesce(mac_client_impl_t *mcip) 20998275SEric Cheng { 21008275SEric Cheng mac_rx_client_quiesce((mac_client_handle_t)mcip); 21018275SEric Cheng mac_tx_client_quiesce(mcip, SRS_QUIESCE); 21028275SEric Cheng } 21038275SEric Cheng 21048275SEric Cheng void 21058275SEric Cheng mac_client_restart(mac_client_impl_t *mcip) 21068275SEric Cheng { 21078275SEric Cheng mac_rx_client_restart((mac_client_handle_t)mcip); 21088275SEric Cheng mac_tx_client_restart(mcip); 21092311Sseb } 21102311Sseb 21112311Sseb /* 21125895Syz147064 * Allocate a minor number. 21135895Syz147064 */ 21145895Syz147064 minor_t 21155895Syz147064 mac_minor_hold(boolean_t sleep) 21165895Syz147064 { 21175895Syz147064 minor_t minor; 21185895Syz147064 21195895Syz147064 /* 21205895Syz147064 * Grab a value from the arena. 21215895Syz147064 */ 21225895Syz147064 atomic_add_32(&minor_count, 1); 21235895Syz147064 21245895Syz147064 if (sleep) 21255895Syz147064 minor = (uint_t)id_alloc(minor_ids); 21265895Syz147064 else 21275895Syz147064 minor = (uint_t)id_alloc_nosleep(minor_ids); 21285895Syz147064 21295895Syz147064 if (minor == 0) { 21305895Syz147064 atomic_add_32(&minor_count, -1); 21315895Syz147064 return (0); 21325895Syz147064 } 21335895Syz147064 21345895Syz147064 return (minor); 21355895Syz147064 } 21365895Syz147064 21375895Syz147064 /* 21385895Syz147064 * Release a previously allocated minor number. 21395895Syz147064 */ 21405895Syz147064 void 21415895Syz147064 mac_minor_rele(minor_t minor) 21425895Syz147064 { 21435895Syz147064 /* 21445895Syz147064 * Return the value to the arena. 21455895Syz147064 */ 21465895Syz147064 id_free(minor_ids, minor); 21475895Syz147064 atomic_add_32(&minor_count, -1); 21485895Syz147064 } 21495895Syz147064 21505895Syz147064 uint32_t 21515895Syz147064 mac_no_notification(mac_handle_t mh) 21525895Syz147064 { 21535895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 21545895Syz147064 return (mip->mi_unsup_note); 21555895Syz147064 } 21565895Syz147064 21575895Syz147064 /* 21588275SEric Cheng * Prevent any new opens of this mac in preparation for unregister 21592311Sseb */ 21602311Sseb int 21618275SEric Cheng i_mac_disable(mac_impl_t *mip) 21622311Sseb { 21638275SEric Cheng mac_client_impl_t *mcip; 21648275SEric Cheng 21658275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 21668275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) { 21678275SEric Cheng /* Already disabled, return success */ 21688275SEric Cheng rw_exit(&i_mac_impl_lock); 21698275SEric Cheng return (0); 21705895Syz147064 } 21712311Sseb /* 21728275SEric Cheng * See if there are any other references to this mac_t (e.g., VLAN's). 21738275SEric Cheng * If so return failure. If all the other checks below pass, then 21748275SEric Cheng * set mi_disabled atomically under the i_mac_impl_lock to prevent 21758275SEric Cheng * any new VLAN's from being created or new mac client opens of this 21768275SEric Cheng * mac end point. 21772311Sseb */ 21788275SEric Cheng if (mip->mi_ref > 0) { 21798275SEric Cheng rw_exit(&i_mac_impl_lock); 21808275SEric Cheng return (EBUSY); 21812311Sseb } 21822311Sseb 21832311Sseb /* 21848275SEric Cheng * mac clients must delete all multicast groups they join before 21858275SEric Cheng * closing. bcast groups are reference counted, the last client 21868275SEric Cheng * to delete the group will wait till the group is physically 21878275SEric Cheng * deleted. Since all clients have closed this mac end point 21888275SEric Cheng * mi_bcast_ngrps must be zero at this point 21892311Sseb */ 21908275SEric Cheng ASSERT(mip->mi_bcast_ngrps == 0); 21915009Sgd78059 21925009Sgd78059 /* 21938275SEric Cheng * Don't let go of this if it has some flows. 21948275SEric Cheng * All other code guarantees no flows are added to a disabled 21958275SEric Cheng * mac, therefore it is sufficient to check for the flow table 21968275SEric Cheng * only here. 21972311Sseb */ 21988275SEric Cheng mcip = mac_primary_client_handle(mip); 21998275SEric Cheng if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) { 22008275SEric Cheng rw_exit(&i_mac_impl_lock); 22018275SEric Cheng return (ENOTEMPTY); 22025895Syz147064 } 22035895Syz147064 22048275SEric Cheng mip->mi_state_flags |= MIS_DISABLED; 22051852Syz147064 rw_exit(&i_mac_impl_lock); 2206269Sericheng return (0); 22078275SEric Cheng } 22088275SEric Cheng 22098275SEric Cheng int 22108275SEric Cheng mac_disable_nowait(mac_handle_t mh) 22118275SEric Cheng { 22128275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 22138275SEric Cheng int err; 22148275SEric Cheng 22158275SEric Cheng if ((err = i_mac_perim_enter_nowait(mip)) != 0) 22168275SEric Cheng return (err); 22178275SEric Cheng err = i_mac_disable(mip); 22188275SEric Cheng i_mac_perim_exit(mip); 2219269Sericheng return (err); 22200Sstevel@tonic-gate } 22210Sstevel@tonic-gate 22220Sstevel@tonic-gate int 22235084Sjohnlev mac_disable(mac_handle_t mh) 22240Sstevel@tonic-gate { 22258275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 22268275SEric Cheng int err; 22278275SEric Cheng 22288275SEric Cheng i_mac_perim_enter(mip); 22298275SEric Cheng err = i_mac_disable(mip); 22308275SEric Cheng i_mac_perim_exit(mip); 22315084Sjohnlev 22320Sstevel@tonic-gate /* 22338275SEric Cheng * Clean up notification thread and wait for it to exit. 22345009Sgd78059 */ 22358275SEric Cheng if (err == 0) 22368275SEric Cheng i_mac_notify_exit(mip); 22378275SEric Cheng 22388275SEric Cheng return (err); 22390Sstevel@tonic-gate } 22400Sstevel@tonic-gate 22414913Sethindra /* 22428275SEric Cheng * Called when the MAC instance has a non empty flow table, to de-multiplex 22438275SEric Cheng * incoming packets to the right flow. 22448275SEric Cheng * The MAC's rw lock is assumed held as a READER. 22454913Sethindra */ 22468275SEric Cheng /* ARGSUSED */ 22478275SEric Cheng static mblk_t * 22488275SEric Cheng mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) 22490Sstevel@tonic-gate { 22508275SEric Cheng flow_entry_t *flent = NULL; 22518275SEric Cheng uint_t flags = FLOW_INBOUND; 22528275SEric Cheng int err; 22534913Sethindra 22544913Sethindra /* 22558275SEric Cheng * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN 22568275SEric Cheng * to mac_flow_lookup() so that the VLAN packets can be successfully 22578275SEric Cheng * passed to the non-VLAN aggregation flows. 22588275SEric Cheng * 22598275SEric Cheng * Note that there is possibly a race between this and 22608275SEric Cheng * mac_unicast_remove/add() and VLAN packets could be incorrectly 22618275SEric Cheng * classified to non-VLAN flows of non-aggregation mac clients. These 22628275SEric Cheng * VLAN packets will be then filtered out by the mac module. 22634913Sethindra */ 22648275SEric Cheng if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) 22658275SEric Cheng flags |= FLOW_IGNORE_VLAN; 22668275SEric Cheng 22678275SEric Cheng err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent); 22688275SEric Cheng if (err != 0) { 22698275SEric Cheng /* no registered receive function */ 22708275SEric Cheng return (mp); 22718275SEric Cheng } else { 22728275SEric Cheng mac_client_impl_t *mcip; 22734913Sethindra 22744913Sethindra /* 22758275SEric Cheng * This flent might just be an additional one on the MAC client, 22768275SEric Cheng * i.e. for classification purposes (different fdesc), however 22778275SEric Cheng * the resources, SRS et. al., are in the mci_flent, so if 22788275SEric Cheng * this isn't the mci_flent, we need to get it. 22794913Sethindra */ 22808275SEric Cheng if ((mcip = flent->fe_mcip) != NULL && 22818275SEric Cheng mcip->mci_flent != flent) { 22828275SEric Cheng FLOW_REFRELE(flent); 22838275SEric Cheng flent = mcip->mci_flent; 22848275SEric Cheng FLOW_TRY_REFHOLD(flent, err); 22858275SEric Cheng if (err != 0) 22868275SEric Cheng return (mp); 22878275SEric Cheng } 22888275SEric Cheng (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp, 22898275SEric Cheng B_FALSE); 22908275SEric Cheng FLOW_REFRELE(flent); 22915084Sjohnlev } 22925084Sjohnlev return (NULL); 22935084Sjohnlev } 22945084Sjohnlev 22955084Sjohnlev mblk_t * 22968275SEric Cheng mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 22970Sstevel@tonic-gate { 22982311Sseb mac_impl_t *mip = (mac_impl_t *)mh; 22998275SEric Cheng mblk_t *bp, *bp1, **bpp, *list = NULL; 23000Sstevel@tonic-gate 23010Sstevel@tonic-gate /* 23028275SEric Cheng * We walk the chain and attempt to classify each packet. 23038275SEric Cheng * The packets that couldn't be classified will be returned 23048275SEric Cheng * back to the caller. 23050Sstevel@tonic-gate */ 23068275SEric Cheng bp = mp_chain; 23078275SEric Cheng bpp = &list; 23088275SEric Cheng while (bp != NULL) { 23098275SEric Cheng bp1 = bp; 23108275SEric Cheng bp = bp->b_next; 23118275SEric Cheng bp1->b_next = NULL; 23128275SEric Cheng 23138275SEric Cheng if (mac_rx_classify(mip, mrh, bp1) != NULL) { 23148275SEric Cheng *bpp = bp1; 23158275SEric Cheng bpp = &bp1->b_next; 23168275SEric Cheng } 23178275SEric Cheng } 23188275SEric Cheng return (list); 23190Sstevel@tonic-gate } 23200Sstevel@tonic-gate 23218275SEric Cheng static int 23228275SEric Cheng mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg) 23230Sstevel@tonic-gate { 23248275SEric Cheng mac_ring_handle_t ring = arg; 23258275SEric Cheng 23268275SEric Cheng if (flent->fe_tx_srs) 23278275SEric Cheng mac_tx_srs_wakeup(flent->fe_tx_srs, ring); 23282311Sseb return (0); 23292311Sseb } 23302311Sseb 23310Sstevel@tonic-gate void 23328275SEric Cheng i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) 23338275SEric Cheng { 23348275SEric Cheng mac_client_impl_t *cclient; 23358275SEric Cheng mac_soft_ring_set_t *mac_srs; 23368275SEric Cheng 23378275SEric Cheng /* 23388275SEric Cheng * After grabbing the mi_rw_lock, the list of clients can't change. 23398275SEric Cheng * If there are any clients mi_disabled must be B_FALSE and can't 23408275SEric Cheng * get set since there are clients. If there aren't any clients we 23418275SEric Cheng * don't do anything. In any case the mip has to be valid. The driver 23428275SEric Cheng * must make sure that it goes single threaded (with respect to mac 23438275SEric Cheng * calls) and wait for all pending mac calls to finish before calling 23448275SEric Cheng * mac_unregister. 23458275SEric Cheng */ 23468275SEric Cheng rw_enter(&i_mac_impl_lock, RW_READER); 23478275SEric Cheng if (mip->mi_state_flags & MIS_DISABLED) { 23488275SEric Cheng rw_exit(&i_mac_impl_lock); 23498275SEric Cheng return; 23508275SEric Cheng } 23518275SEric Cheng 23528275SEric Cheng /* 23538275SEric Cheng * Get MAC tx srs from walking mac_client_handle list. 23548275SEric Cheng */ 23558275SEric Cheng rw_enter(&mip->mi_rw_lock, RW_READER); 23568275SEric Cheng for (cclient = mip->mi_clients_list; cclient != NULL; 23578275SEric Cheng cclient = cclient->mci_client_next) { 23588275SEric Cheng if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) 23598275SEric Cheng mac_tx_srs_wakeup(mac_srs, ring); 2360*8833SVenu.Iyer@Sun.COM (void) mac_flow_walk(cclient->mci_subflow_tab, 2361*8833SVenu.Iyer@Sun.COM mac_tx_flow_srs_wakeup, ring); 23628275SEric Cheng } 23638275SEric Cheng rw_exit(&mip->mi_rw_lock); 23648275SEric Cheng rw_exit(&i_mac_impl_lock); 23658275SEric Cheng } 23668275SEric Cheng 23678275SEric Cheng /* ARGSUSED */ 23688275SEric Cheng void 23698275SEric Cheng mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg, 23700Sstevel@tonic-gate boolean_t add) 23710Sstevel@tonic-gate { 23728275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 23738275SEric Cheng 23748275SEric Cheng i_mac_perim_enter((mac_impl_t *)mh); 23750Sstevel@tonic-gate /* 23760Sstevel@tonic-gate * If no specific refresh function was given then default to the 23770Sstevel@tonic-gate * driver's m_multicst entry point. 23780Sstevel@tonic-gate */ 23790Sstevel@tonic-gate if (refresh == NULL) { 23802311Sseb refresh = mip->mi_multicst; 23812311Sseb arg = mip->mi_driver; 23820Sstevel@tonic-gate } 23838275SEric Cheng 23848275SEric Cheng mac_bcast_refresh(mip, refresh, arg, add); 23858275SEric Cheng i_mac_perim_exit((mac_impl_t *)mh); 23860Sstevel@tonic-gate } 23870Sstevel@tonic-gate 23880Sstevel@tonic-gate void 23892311Sseb mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) 23900Sstevel@tonic-gate { 23912311Sseb mac_impl_t *mip = (mac_impl_t *)mh; 23920Sstevel@tonic-gate 23930Sstevel@tonic-gate /* 23940Sstevel@tonic-gate * If no specific refresh function was given then default to the 23950Sstevel@tonic-gate * driver's m_promisc entry point. 23960Sstevel@tonic-gate */ 23970Sstevel@tonic-gate if (refresh == NULL) { 23982311Sseb refresh = mip->mi_setpromisc; 23992311Sseb arg = mip->mi_driver; 24000Sstevel@tonic-gate } 24010Sstevel@tonic-gate ASSERT(refresh != NULL); 24020Sstevel@tonic-gate 24030Sstevel@tonic-gate /* 24040Sstevel@tonic-gate * Call the refresh function with the current promiscuity. 24050Sstevel@tonic-gate */ 24060Sstevel@tonic-gate refresh(arg, (mip->mi_devpromisc != 0)); 24070Sstevel@tonic-gate } 24080Sstevel@tonic-gate 24095895Syz147064 /* 24105895Syz147064 * The mac client requests that the mac not to change its margin size to 24115895Syz147064 * be less than the specified value. If "current" is B_TRUE, then the client 24125895Syz147064 * requests the mac not to change its margin size to be smaller than the 24135895Syz147064 * current size. Further, return the current margin size value in this case. 24145895Syz147064 * 24155895Syz147064 * We keep every requested size in an ordered list from largest to smallest. 24165895Syz147064 */ 24175895Syz147064 int 24185895Syz147064 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) 24195895Syz147064 { 24205895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 24215895Syz147064 mac_margin_req_t **pp, *p; 24225895Syz147064 int err = 0; 24235895Syz147064 24248275SEric Cheng rw_enter(&(mip->mi_rw_lock), RW_WRITER); 24255895Syz147064 if (current) 24265895Syz147064 *marginp = mip->mi_margin; 24275895Syz147064 24285895Syz147064 /* 24295895Syz147064 * If the current margin value cannot satisfy the margin requested, 24305895Syz147064 * return ENOTSUP directly. 24315895Syz147064 */ 24325895Syz147064 if (*marginp > mip->mi_margin) { 24335895Syz147064 err = ENOTSUP; 24345895Syz147064 goto done; 24355895Syz147064 } 24365895Syz147064 24375895Syz147064 /* 24385895Syz147064 * Check whether the given margin is already in the list. If so, 24395895Syz147064 * bump the reference count. 24405895Syz147064 */ 24418275SEric Cheng for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) { 24425895Syz147064 if (p->mmr_margin == *marginp) { 24435895Syz147064 /* 24445895Syz147064 * The margin requested is already in the list, 24455895Syz147064 * so just bump the reference count. 24465895Syz147064 */ 24475895Syz147064 p->mmr_ref++; 24485895Syz147064 goto done; 24495895Syz147064 } 24505895Syz147064 if (p->mmr_margin < *marginp) 24515895Syz147064 break; 24525895Syz147064 } 24535895Syz147064 24545895Syz147064 24558275SEric Cheng p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP); 24565895Syz147064 p->mmr_margin = *marginp; 24575895Syz147064 p->mmr_ref++; 24585895Syz147064 p->mmr_nextp = *pp; 24595895Syz147064 *pp = p; 24605895Syz147064 24615895Syz147064 done: 24628275SEric Cheng rw_exit(&(mip->mi_rw_lock)); 24635895Syz147064 return (err); 24645895Syz147064 } 24655895Syz147064 24665895Syz147064 /* 24675895Syz147064 * The mac client requests to cancel its previous mac_margin_add() request. 24685895Syz147064 * We remove the requested margin size from the list. 24695895Syz147064 */ 24705895Syz147064 int 24715895Syz147064 mac_margin_remove(mac_handle_t mh, uint32_t margin) 24725895Syz147064 { 24735895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 24745895Syz147064 mac_margin_req_t **pp, *p; 24755895Syz147064 int err = 0; 24765895Syz147064 24778275SEric Cheng rw_enter(&(mip->mi_rw_lock), RW_WRITER); 24785895Syz147064 /* 24795895Syz147064 * Find the entry in the list for the given margin. 24805895Syz147064 */ 24815895Syz147064 for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { 24825895Syz147064 if (p->mmr_margin == margin) { 24835895Syz147064 if (--p->mmr_ref == 0) 24845895Syz147064 break; 24855895Syz147064 24865895Syz147064 /* 24875895Syz147064 * There is still a reference to this address so 24885895Syz147064 * there's nothing more to do. 24895895Syz147064 */ 24905895Syz147064 goto done; 24915895Syz147064 } 24925895Syz147064 } 24935895Syz147064 24945895Syz147064 /* 24955895Syz147064 * We did not find an entry for the given margin. 24965895Syz147064 */ 24975895Syz147064 if (p == NULL) { 24985895Syz147064 err = ENOENT; 24995895Syz147064 goto done; 25005895Syz147064 } 25015895Syz147064 25025895Syz147064 ASSERT(p->mmr_ref == 0); 25035895Syz147064 25045895Syz147064 /* 25055895Syz147064 * Remove it from the list. 25065895Syz147064 */ 25075895Syz147064 *pp = p->mmr_nextp; 25085895Syz147064 kmem_free(p, sizeof (mac_margin_req_t)); 25095895Syz147064 done: 25108275SEric Cheng rw_exit(&(mip->mi_rw_lock)); 25115895Syz147064 return (err); 25125895Syz147064 } 25135895Syz147064 25145895Syz147064 boolean_t 25155895Syz147064 mac_margin_update(mac_handle_t mh, uint32_t margin) 25165895Syz147064 { 25175895Syz147064 mac_impl_t *mip = (mac_impl_t *)mh; 25185895Syz147064 uint32_t margin_needed = 0; 25195895Syz147064 25208275SEric Cheng rw_enter(&(mip->mi_rw_lock), RW_WRITER); 25215895Syz147064 25225895Syz147064 if (mip->mi_mmrp != NULL) 25235895Syz147064 margin_needed = mip->mi_mmrp->mmr_margin; 25245895Syz147064 25255895Syz147064 if (margin_needed <= margin) 25265895Syz147064 mip->mi_margin = margin; 25275895Syz147064 25288275SEric Cheng rw_exit(&(mip->mi_rw_lock)); 25295895Syz147064 25305895Syz147064 if (margin_needed <= margin) 25315895Syz147064 i_mac_notify(mip, MAC_NOTE_MARGIN); 25325895Syz147064 25335895Syz147064 return (margin_needed <= margin); 25345895Syz147064 } 25355895Syz147064 25362311Sseb /* 25372311Sseb * MAC Type Plugin functions. 25382311Sseb */ 25392311Sseb 25408275SEric Cheng mactype_t * 25418275SEric Cheng mactype_getplugin(const char *pname) 25428275SEric Cheng { 25438275SEric Cheng mactype_t *mtype = NULL; 25448275SEric Cheng boolean_t tried_modload = B_FALSE; 25458275SEric Cheng 25468275SEric Cheng mutex_enter(&i_mactype_lock); 25478275SEric Cheng 25488275SEric Cheng find_registered_mactype: 25498275SEric Cheng if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname, 25508275SEric Cheng (mod_hash_val_t *)&mtype) != 0) { 25518275SEric Cheng if (!tried_modload) { 25528275SEric Cheng /* 25538275SEric Cheng * If the plugin has not yet been loaded, then 25548275SEric Cheng * attempt to load it now. If modload() succeeds, 25558275SEric Cheng * the plugin should have registered using 25568275SEric Cheng * mactype_register(), in which case we can go back 25578275SEric Cheng * and attempt to find it again. 25588275SEric Cheng */ 25598275SEric Cheng if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) { 25608275SEric Cheng tried_modload = B_TRUE; 25618275SEric Cheng goto find_registered_mactype; 25628275SEric Cheng } 25638275SEric Cheng } 25648275SEric Cheng } else { 25658275SEric Cheng /* 25668275SEric Cheng * Note that there's no danger that the plugin we've loaded 25678275SEric Cheng * could be unloaded between the modload() step and the 25688275SEric Cheng * reference count bump here, as we're holding 25698275SEric Cheng * i_mactype_lock, which mactype_unregister() also holds. 25708275SEric Cheng */ 25718275SEric Cheng atomic_inc_32(&mtype->mt_ref); 25728275SEric Cheng } 25738275SEric Cheng 25748275SEric Cheng mutex_exit(&i_mactype_lock); 25758275SEric Cheng return (mtype); 25768275SEric Cheng } 25778275SEric Cheng 25782311Sseb mactype_register_t * 25792311Sseb mactype_alloc(uint_t mactype_version) 25802311Sseb { 25812311Sseb mactype_register_t *mtrp; 25822311Sseb 25832311Sseb /* 25842311Sseb * Make sure there isn't a version mismatch between the plugin and 25852311Sseb * the framework. In the future, if multiple versions are 25862311Sseb * supported, this check could become more sophisticated. 25872311Sseb */ 25882311Sseb if (mactype_version != MACTYPE_VERSION) 25892311Sseb return (NULL); 25902311Sseb 25912311Sseb mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP); 25922311Sseb mtrp->mtr_version = mactype_version; 25932311Sseb return (mtrp); 25942311Sseb } 25952311Sseb 25962311Sseb void 25972311Sseb mactype_free(mactype_register_t *mtrp) 25982311Sseb { 25992311Sseb kmem_free(mtrp, sizeof (mactype_register_t)); 26002311Sseb } 26012311Sseb 26022311Sseb int 26032311Sseb mactype_register(mactype_register_t *mtrp) 26042311Sseb { 26052311Sseb mactype_t *mtp; 26062311Sseb mactype_ops_t *ops = mtrp->mtr_ops; 26072311Sseb 26082311Sseb /* Do some sanity checking before we register this MAC type. */ 26096353Sdr146992 if (mtrp->mtr_ident == NULL || ops == NULL) 26102311Sseb return (EINVAL); 26112311Sseb 26122311Sseb /* 26132311Sseb * Verify that all mandatory callbacks are set in the ops 26142311Sseb * vector. 26152311Sseb */ 26162311Sseb if (ops->mtops_unicst_verify == NULL || 26172311Sseb ops->mtops_multicst_verify == NULL || 26182311Sseb ops->mtops_sap_verify == NULL || 26192311Sseb ops->mtops_header == NULL || 26202311Sseb ops->mtops_header_info == NULL) { 26212311Sseb return (EINVAL); 26222311Sseb } 26232311Sseb 26242311Sseb mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP); 26252311Sseb mtp->mt_ident = mtrp->mtr_ident; 26262311Sseb mtp->mt_ops = *ops; 26272311Sseb mtp->mt_type = mtrp->mtr_mactype; 26283147Sxc151355 mtp->mt_nativetype = mtrp->mtr_nativetype; 26292311Sseb mtp->mt_addr_length = mtrp->mtr_addrlen; 26302311Sseb if (mtrp->mtr_brdcst_addr != NULL) { 26312311Sseb mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP); 26322311Sseb bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr, 26332311Sseb mtrp->mtr_addrlen); 26342311Sseb } 26352311Sseb 26362311Sseb mtp->mt_stats = mtrp->mtr_stats; 26372311Sseb mtp->mt_statcount = mtrp->mtr_statcount; 26382311Sseb 26396512Ssowmini mtp->mt_mapping = mtrp->mtr_mapping; 26406512Ssowmini mtp->mt_mappingcount = mtrp->mtr_mappingcount; 26416512Ssowmini 26422311Sseb if (mod_hash_insert(i_mactype_hash, 26432311Sseb (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) { 26442311Sseb kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 26452311Sseb kmem_free(mtp, sizeof (*mtp)); 26462311Sseb return (EEXIST); 26472311Sseb } 26482311Sseb return (0); 26492311Sseb } 26502311Sseb 26512311Sseb int 26522311Sseb mactype_unregister(const char *ident) 26532311Sseb { 26542311Sseb mactype_t *mtp; 26552311Sseb mod_hash_val_t val; 26562311Sseb int err; 26572311Sseb 26582311Sseb /* 26592311Sseb * Let's not allow MAC drivers to use this plugin while we're 26603288Sseb * trying to unregister it. Holding i_mactype_lock also prevents a 26613288Sseb * plugin from unregistering while a MAC driver is attempting to 26623288Sseb * hold a reference to it in i_mactype_getplugin(). 26632311Sseb */ 26643288Sseb mutex_enter(&i_mactype_lock); 26652311Sseb 26662311Sseb if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident, 26672311Sseb (mod_hash_val_t *)&mtp)) != 0) { 26682311Sseb /* A plugin is trying to unregister, but it never registered. */ 26693288Sseb err = ENXIO; 26703288Sseb goto done; 26712311Sseb } 26722311Sseb 26733288Sseb if (mtp->mt_ref != 0) { 26743288Sseb err = EBUSY; 26753288Sseb goto done; 26762311Sseb } 26772311Sseb 26782311Sseb err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val); 26792311Sseb ASSERT(err == 0); 26802311Sseb if (err != 0) { 26812311Sseb /* This should never happen, thus the ASSERT() above. */ 26823288Sseb err = EINVAL; 26833288Sseb goto done; 26842311Sseb } 26852311Sseb ASSERT(mtp == (mactype_t *)val); 26862311Sseb 26872311Sseb kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 26882311Sseb kmem_free(mtp, sizeof (mactype_t)); 26893288Sseb done: 26903288Sseb mutex_exit(&i_mactype_lock); 26913288Sseb return (err); 26922311Sseb } 26935903Ssowmini 26948275SEric Cheng /* 26958275SEric Cheng * Returns TRUE when the specified property is intended for the MAC framework, 26968275SEric Cheng * as opposed to driver defined properties. 26978275SEric Cheng */ 26988275SEric Cheng static boolean_t 26998275SEric Cheng mac_is_macprop(mac_prop_t *macprop) 27008275SEric Cheng { 27018275SEric Cheng switch (macprop->mp_id) { 27028275SEric Cheng case MAC_PROP_MAXBW: 27038275SEric Cheng case MAC_PROP_PRIO: 27048275SEric Cheng case MAC_PROP_BIND_CPU: 27058275SEric Cheng return (B_TRUE); 27068275SEric Cheng default: 27078275SEric Cheng return (B_FALSE); 27088275SEric Cheng } 27098275SEric Cheng } 27108275SEric Cheng 27118275SEric Cheng /* 27128275SEric Cheng * mac_set_prop() sets mac or hardware driver properties: 27138275SEric Cheng * mac properties include maxbw, priority, and cpu binding list. Driver 27148275SEric Cheng * properties are private properties to the hardware, such as mtu, speed 27158275SEric Cheng * etc. 27168275SEric Cheng * If the property is a driver property, mac_set_prop() calls driver's callback 27178275SEric Cheng * function to set it. 27188275SEric Cheng * If the property is a mac property, mac_set_prop() invokes mac_set_resources() 27198275SEric Cheng * which will cache the property value in mac_impl_t and may call 27208275SEric Cheng * mac_client_set_resource() to update property value of the primary mac client, 27218275SEric Cheng * if it exists. 27228275SEric Cheng */ 27235903Ssowmini int 27245903Ssowmini mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) 27255903Ssowmini { 27265903Ssowmini int err = ENOTSUP; 27275903Ssowmini mac_impl_t *mip = (mac_impl_t *)mh; 27285903Ssowmini 27298275SEric Cheng ASSERT(MAC_PERIM_HELD(mh)); 27308275SEric Cheng 27318275SEric Cheng /* If it is mac property, call mac_set_resources() */ 27328275SEric Cheng if (mac_is_macprop(macprop)) { 27338275SEric Cheng mac_resource_props_t mrp; 27348275SEric Cheng 27358275SEric Cheng if (valsize < sizeof (mac_resource_props_t)) 27368275SEric Cheng return (EINVAL); 27378275SEric Cheng bzero(&mrp, sizeof (mac_resource_props_t)); 27388275SEric Cheng bcopy(val, &mrp, sizeof (mrp)); 27398275SEric Cheng return (mac_set_resources(mh, &mrp)); 27408275SEric Cheng } 27418603SGirish.Moodalbail@Sun.COM switch (macprop->mp_id) { 27428603SGirish.Moodalbail@Sun.COM case MAC_PROP_MTU: { 27438603SGirish.Moodalbail@Sun.COM uint32_t mtu; 27448603SGirish.Moodalbail@Sun.COM 27458603SGirish.Moodalbail@Sun.COM if (valsize < sizeof (mtu)) 27468603SGirish.Moodalbail@Sun.COM return (EINVAL); 27478603SGirish.Moodalbail@Sun.COM bcopy(val, &mtu, sizeof (mtu)); 27488603SGirish.Moodalbail@Sun.COM err = mac_set_mtu(mh, mtu, NULL); 27498603SGirish.Moodalbail@Sun.COM break; 27505903Ssowmini } 27518603SGirish.Moodalbail@Sun.COM default: 27528603SGirish.Moodalbail@Sun.COM /* For other driver properties, call driver's callback */ 27538603SGirish.Moodalbail@Sun.COM if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { 27548603SGirish.Moodalbail@Sun.COM err = mip->mi_callbacks->mc_setprop(mip->mi_driver, 27558603SGirish.Moodalbail@Sun.COM macprop->mp_name, macprop->mp_id, valsize, val); 27568603SGirish.Moodalbail@Sun.COM } 27578603SGirish.Moodalbail@Sun.COM } 27585903Ssowmini return (err); 27595903Ssowmini } 27605903Ssowmini 27618275SEric Cheng /* 27628275SEric Cheng * mac_get_prop() gets mac or hardware driver properties. 27638275SEric Cheng * 27648275SEric Cheng * If the property is a driver property, mac_get_prop() calls driver's callback 27658275SEric Cheng * function to get it. 27668275SEric Cheng * If the property is a mac property, mac_get_prop() invokes mac_get_resources() 27678275SEric Cheng * which returns the cached value in mac_impl_t. 27688275SEric Cheng */ 27695903Ssowmini int 27708118SVasumathi.Sundaram@Sun.COM mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, 27718118SVasumathi.Sundaram@Sun.COM uint_t *perm) 27725903Ssowmini { 27735903Ssowmini int err = ENOTSUP; 27745903Ssowmini mac_impl_t *mip = (mac_impl_t *)mh; 27756512Ssowmini uint32_t sdu; 27766512Ssowmini link_state_t link_state; 27776512Ssowmini 27788275SEric Cheng /* If mac property, read from cache */ 27798275SEric Cheng if (mac_is_macprop(macprop)) { 27808275SEric Cheng mac_resource_props_t mrp; 27818275SEric Cheng 27828275SEric Cheng if (valsize < sizeof (mac_resource_props_t)) 27838275SEric Cheng return (EINVAL); 27848275SEric Cheng bzero(&mrp, sizeof (mac_resource_props_t)); 27858275SEric Cheng mac_get_resources(mh, &mrp); 27868275SEric Cheng bcopy(&mrp, val, sizeof (mac_resource_props_t)); 27878275SEric Cheng return (0); 27888275SEric Cheng } 27898275SEric Cheng 27906512Ssowmini switch (macprop->mp_id) { 27916789Sam223141 case MAC_PROP_MTU: 27926512Ssowmini if (valsize < sizeof (sdu)) 27936512Ssowmini return (EINVAL); 27946789Sam223141 if ((macprop->mp_flags & MAC_PROP_DEFAULT) == 0) { 27956512Ssowmini mac_sdu_get(mh, NULL, &sdu); 27966512Ssowmini bcopy(&sdu, val, sizeof (sdu)); 27978603SGirish.Moodalbail@Sun.COM if ((mip->mi_callbacks->mc_callbacks & MC_SETPROP) && 27988603SGirish.Moodalbail@Sun.COM (mip->mi_callbacks->mc_setprop(mip->mi_driver, 27998603SGirish.Moodalbail@Sun.COM macprop->mp_name, macprop->mp_id, valsize, 28008603SGirish.Moodalbail@Sun.COM val) == 0)) { 28018603SGirish.Moodalbail@Sun.COM *perm = MAC_PROP_PERM_RW; 28028603SGirish.Moodalbail@Sun.COM } else { 28038118SVasumathi.Sundaram@Sun.COM *perm = MAC_PROP_PERM_READ; 28048603SGirish.Moodalbail@Sun.COM } 28056512Ssowmini return (0); 28066512Ssowmini } else { 28076512Ssowmini if (mip->mi_info.mi_media == DL_ETHER) { 28086512Ssowmini sdu = ETHERMTU; 28096512Ssowmini bcopy(&sdu, val, sizeof (sdu)); 28108603SGirish.Moodalbail@Sun.COM 28116512Ssowmini return (0); 28126512Ssowmini } 28136512Ssowmini /* 28146512Ssowmini * ask driver for its default. 28156512Ssowmini */ 28166512Ssowmini break; 28176512Ssowmini } 28186789Sam223141 case MAC_PROP_STATUS: 28196512Ssowmini if (valsize < sizeof (link_state)) 28206512Ssowmini return (EINVAL); 28218118SVasumathi.Sundaram@Sun.COM *perm = MAC_PROP_PERM_READ; 28226512Ssowmini link_state = mac_link_get(mh); 28236512Ssowmini bcopy(&link_state, val, sizeof (link_state)); 28246512Ssowmini return (0); 28256512Ssowmini default: 28266512Ssowmini break; 28278275SEric Cheng 28286512Ssowmini } 28298275SEric Cheng /* If driver property, request from driver */ 28305903Ssowmini if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { 28315903Ssowmini err = mip->mi_callbacks->mc_getprop(mip->mi_driver, 28326512Ssowmini macprop->mp_name, macprop->mp_id, macprop->mp_flags, 28338118SVasumathi.Sundaram@Sun.COM valsize, val, perm); 28345903Ssowmini } 28355903Ssowmini return (err); 28365903Ssowmini } 28375903Ssowmini 28388275SEric Cheng void 28396512Ssowmini mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop) 28406512Ssowmini { 28416512Ssowmini mac_priv_prop_t *mpriv; 28426512Ssowmini 28436512Ssowmini if (mpp == NULL) 28446512Ssowmini return; 28456512Ssowmini 28466512Ssowmini mpriv = kmem_zalloc(nprop * sizeof (*mpriv), KM_SLEEP); 28476512Ssowmini (void) memcpy(mpriv, mpp, nprop * sizeof (*mpriv)); 28486512Ssowmini mip->mi_priv_prop = mpriv; 28496512Ssowmini mip->mi_priv_prop_count = nprop; 28506512Ssowmini } 28517406SSowmini.Varadhan@Sun.COM 28528275SEric Cheng void 28537406SSowmini.Varadhan@Sun.COM mac_unregister_priv_prop(mac_impl_t *mip) 28547406SSowmini.Varadhan@Sun.COM { 28557406SSowmini.Varadhan@Sun.COM mac_priv_prop_t *mpriv; 28567406SSowmini.Varadhan@Sun.COM 28577406SSowmini.Varadhan@Sun.COM mpriv = mip->mi_priv_prop; 28587406SSowmini.Varadhan@Sun.COM if (mpriv != NULL) { 28597406SSowmini.Varadhan@Sun.COM kmem_free(mpriv, mip->mi_priv_prop_count * sizeof (*mpriv)); 28607406SSowmini.Varadhan@Sun.COM mip->mi_priv_prop = NULL; 28617406SSowmini.Varadhan@Sun.COM } 28627406SSowmini.Varadhan@Sun.COM mip->mi_priv_prop_count = 0; 28637406SSowmini.Varadhan@Sun.COM } 28648275SEric Cheng 28658275SEric Cheng /* 28668275SEric Cheng * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure 28678275SEric Cheng * (by invoking mac_rx()) even after processing mac_stop_ring(). In such 28688275SEric Cheng * cases if MAC free's the ring structure after mac_stop_ring(), any 28698275SEric Cheng * illegal access to the ring structure coming from the driver will panic 28708275SEric Cheng * the system. In order to protect the system from such inadverent access, 28718275SEric Cheng * we maintain a cache of rings in the mac_impl_t after they get free'd up. 28728275SEric Cheng * When packets are received on free'd up rings, MAC (through the generation 28738275SEric Cheng * count mechanism) will drop such packets. 28748275SEric Cheng */ 28758275SEric Cheng static mac_ring_t * 28768275SEric Cheng mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings) 28778275SEric Cheng { 28788275SEric Cheng mac_ring_t *ring; 28798275SEric Cheng 28808275SEric Cheng if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 28818275SEric Cheng mutex_enter(&mip->mi_ring_lock); 28828275SEric Cheng if (mip->mi_ring_freelist != NULL) { 28838275SEric Cheng ring = mip->mi_ring_freelist; 28848275SEric Cheng mip->mi_ring_freelist = ring->mr_next; 28858275SEric Cheng bzero(ring, sizeof (mac_ring_t)); 28868275SEric Cheng } else { 28878275SEric Cheng ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); 28888275SEric Cheng } 28898275SEric Cheng mutex_exit(&mip->mi_ring_lock); 28908275SEric Cheng } else { 28918275SEric Cheng ring = kmem_zalloc(sizeof (mac_ring_t), KM_SLEEP); 28928275SEric Cheng } 28938275SEric Cheng ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); 28948275SEric Cheng return (ring); 28958275SEric Cheng } 28968275SEric Cheng 28978275SEric Cheng static void 28988275SEric Cheng mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) 28998275SEric Cheng { 29008275SEric Cheng if (ring->mr_type == MAC_RING_TYPE_RX) { 29018275SEric Cheng mutex_enter(&mip->mi_ring_lock); 29028275SEric Cheng ring->mr_state = MR_FREE; 29038275SEric Cheng ring->mr_flag = 0; 29048275SEric Cheng ring->mr_next = mip->mi_ring_freelist; 29058275SEric Cheng mip->mi_ring_freelist = ring; 29068275SEric Cheng mutex_exit(&mip->mi_ring_lock); 29078275SEric Cheng } else { 29088275SEric Cheng kmem_free(ring, sizeof (mac_ring_t)); 29098275SEric Cheng } 29108275SEric Cheng } 29118275SEric Cheng 29128275SEric Cheng static void 29138275SEric Cheng mac_ring_freeall(mac_impl_t *mip) 29148275SEric Cheng { 29158275SEric Cheng mac_ring_t *ring_next; 29168275SEric Cheng mutex_enter(&mip->mi_ring_lock); 29178275SEric Cheng mac_ring_t *ring = mip->mi_ring_freelist; 29188275SEric Cheng while (ring != NULL) { 29198275SEric Cheng ring_next = ring->mr_next; 29208275SEric Cheng kmem_cache_free(mac_ring_cache, ring); 29218275SEric Cheng ring = ring_next; 29228275SEric Cheng } 29238275SEric Cheng mip->mi_ring_freelist = NULL; 29248275SEric Cheng mutex_exit(&mip->mi_ring_lock); 29258275SEric Cheng } 29268275SEric Cheng 29278275SEric Cheng int 29288275SEric Cheng mac_start_ring(mac_ring_t *ring) 29298275SEric Cheng { 29308275SEric Cheng int rv = 0; 29318275SEric Cheng 29328275SEric Cheng if (ring->mr_start != NULL) 29338275SEric Cheng rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); 29348275SEric Cheng 29358275SEric Cheng return (rv); 29368275SEric Cheng } 29378275SEric Cheng 29388275SEric Cheng void 29398275SEric Cheng mac_stop_ring(mac_ring_t *ring) 29408275SEric Cheng { 29418275SEric Cheng if (ring->mr_stop != NULL) 29428275SEric Cheng ring->mr_stop(ring->mr_driver); 29438275SEric Cheng 29448275SEric Cheng /* 29458275SEric Cheng * Increment the ring generation number for this ring. 29468275SEric Cheng */ 29478275SEric Cheng ring->mr_gen_num++; 29488275SEric Cheng } 29498275SEric Cheng 29508275SEric Cheng int 29518275SEric Cheng mac_start_group(mac_group_t *group) 29528275SEric Cheng { 29538275SEric Cheng int rv = 0; 29548275SEric Cheng 29558275SEric Cheng if (group->mrg_start != NULL) 29568275SEric Cheng rv = group->mrg_start(group->mrg_driver); 29578275SEric Cheng 29588275SEric Cheng return (rv); 29598275SEric Cheng } 29608275SEric Cheng 29618275SEric Cheng void 29628275SEric Cheng mac_stop_group(mac_group_t *group) 29638275SEric Cheng { 29648275SEric Cheng if (group->mrg_stop != NULL) 29658275SEric Cheng group->mrg_stop(group->mrg_driver); 29668275SEric Cheng } 29678275SEric Cheng 29688275SEric Cheng /* 29698275SEric Cheng * Called from mac_start() on the default Rx group. Broadcast and multicast 29708275SEric Cheng * packets are received only on the default group. Hence the default group 29718275SEric Cheng * needs to be up even if the primary client is not up, for the other groups 29728275SEric Cheng * to be functional. We do this by calling this function at mac_start time 29738275SEric Cheng * itself. However the broadcast packets that are received can't make their 29748275SEric Cheng * way beyond mac_rx until a mac client creates a broadcast flow. 29758275SEric Cheng */ 29768275SEric Cheng static int 29778275SEric Cheng mac_start_group_and_rings(mac_group_t *group) 29788275SEric Cheng { 29798275SEric Cheng mac_ring_t *ring; 29808275SEric Cheng int rv = 0; 29818275SEric Cheng 29828275SEric Cheng ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED); 29838275SEric Cheng if ((rv = mac_start_group(group)) != 0) 29848275SEric Cheng return (rv); 29858275SEric Cheng 29868275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 29878275SEric Cheng ASSERT(ring->mr_state == MR_FREE); 29888275SEric Cheng if ((rv = mac_start_ring(ring)) != 0) 29898275SEric Cheng goto error; 29908275SEric Cheng ring->mr_state = MR_INUSE; 29918275SEric Cheng ring->mr_classify_type = MAC_SW_CLASSIFIER; 29928275SEric Cheng } 29938275SEric Cheng return (0); 29948275SEric Cheng 29958275SEric Cheng error: 29968275SEric Cheng mac_stop_group_and_rings(group); 29978275SEric Cheng return (rv); 29988275SEric Cheng } 29998275SEric Cheng 30008275SEric Cheng /* Called from mac_stop on the default Rx group */ 30018275SEric Cheng static void 30028275SEric Cheng mac_stop_group_and_rings(mac_group_t *group) 30038275SEric Cheng { 30048275SEric Cheng mac_ring_t *ring; 30058275SEric Cheng 30068275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 30078275SEric Cheng if (ring->mr_state != MR_FREE) { 30088275SEric Cheng mac_stop_ring(ring); 30098275SEric Cheng ring->mr_state = MR_FREE; 30108275SEric Cheng ring->mr_flag = 0; 30118275SEric Cheng ring->mr_classify_type = MAC_NO_CLASSIFIER; 30128275SEric Cheng } 30138275SEric Cheng } 30148275SEric Cheng mac_stop_group(group); 30158275SEric Cheng } 30168275SEric Cheng 30178275SEric Cheng 30188275SEric Cheng static mac_ring_t * 30198275SEric Cheng mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, 30208275SEric Cheng mac_capab_rings_t *cap_rings) 30218275SEric Cheng { 30228275SEric Cheng mac_ring_t *ring; 30238275SEric Cheng mac_ring_info_t ring_info; 30248275SEric Cheng 30258275SEric Cheng ring = mac_ring_alloc(mip, cap_rings); 30268275SEric Cheng 30278275SEric Cheng /* Prepare basic information of ring */ 30288275SEric Cheng ring->mr_index = index; 30298275SEric Cheng ring->mr_type = group->mrg_type; 30308275SEric Cheng ring->mr_gh = (mac_group_handle_t)group; 30318275SEric Cheng 30328275SEric Cheng /* Insert the new ring to the list. */ 30338275SEric Cheng ring->mr_next = group->mrg_rings; 30348275SEric Cheng group->mrg_rings = ring; 30358275SEric Cheng 30368275SEric Cheng /* Zero to reuse the info data structure */ 30378275SEric Cheng bzero(&ring_info, sizeof (ring_info)); 30388275SEric Cheng 30398275SEric Cheng /* Query ring information from driver */ 30408275SEric Cheng cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index, 30418275SEric Cheng index, &ring_info, (mac_ring_handle_t)ring); 30428275SEric Cheng 30438275SEric Cheng ring->mr_info = ring_info; 30448275SEric Cheng 30458275SEric Cheng /* Update ring's status */ 30468275SEric Cheng ring->mr_state = MR_FREE; 30478275SEric Cheng ring->mr_flag = 0; 30488275SEric Cheng 30498275SEric Cheng /* Update the ring count of the group */ 30508275SEric Cheng group->mrg_cur_count++; 30518275SEric Cheng return (ring); 30528275SEric Cheng } 30538275SEric Cheng 30548275SEric Cheng /* 30558275SEric Cheng * Rings are chained together for easy regrouping. 30568275SEric Cheng */ 30578275SEric Cheng static void 30588275SEric Cheng mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, 30598275SEric Cheng mac_capab_rings_t *cap_rings) 30608275SEric Cheng { 30618275SEric Cheng int index; 30628275SEric Cheng 30638275SEric Cheng /* 30648275SEric Cheng * Initialize all ring members of this group. Size of zero will not 30658275SEric Cheng * enter the loop, so it's safe for initializing an empty group. 30668275SEric Cheng */ 30678275SEric Cheng for (index = size - 1; index >= 0; index--) 30688275SEric Cheng (void) mac_init_ring(mip, group, index, cap_rings); 30698275SEric Cheng } 30708275SEric Cheng 30718275SEric Cheng int 30728275SEric Cheng mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) 30738275SEric Cheng { 30748275SEric Cheng mac_capab_rings_t *cap_rings; 30758275SEric Cheng mac_group_t *group, *groups; 30768275SEric Cheng mac_group_info_t group_info; 30778275SEric Cheng uint_t group_free = 0; 30788275SEric Cheng uint_t ring_left; 30798275SEric Cheng mac_ring_t *ring; 30808275SEric Cheng int g, err = 0; 30818275SEric Cheng 30828275SEric Cheng switch (rtype) { 30838275SEric Cheng case MAC_RING_TYPE_RX: 30848275SEric Cheng ASSERT(mip->mi_rx_groups == NULL); 30858275SEric Cheng 30868275SEric Cheng cap_rings = &mip->mi_rx_rings_cap; 30878275SEric Cheng cap_rings->mr_type = MAC_RING_TYPE_RX; 30888275SEric Cheng break; 30898275SEric Cheng case MAC_RING_TYPE_TX: 30908275SEric Cheng ASSERT(mip->mi_tx_groups == NULL); 30918275SEric Cheng 30928275SEric Cheng cap_rings = &mip->mi_tx_rings_cap; 30938275SEric Cheng cap_rings->mr_type = MAC_RING_TYPE_TX; 30948275SEric Cheng break; 30958275SEric Cheng default: 30968275SEric Cheng ASSERT(B_FALSE); 30978275SEric Cheng } 30988275SEric Cheng 30998275SEric Cheng if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, 31008275SEric Cheng cap_rings)) 31018275SEric Cheng return (0); 31028275SEric Cheng 31038275SEric Cheng /* 31048275SEric Cheng * Allocate a contiguous buffer for all groups. 31058275SEric Cheng */ 31068275SEric Cheng groups = kmem_zalloc(sizeof (mac_group_t) * (cap_rings->mr_gnum + 1), 31078275SEric Cheng KM_SLEEP); 31088275SEric Cheng 31098275SEric Cheng ring_left = cap_rings->mr_rnum; 31108275SEric Cheng 31118275SEric Cheng /* 31128275SEric Cheng * Get all ring groups if any, and get their ring members 31138275SEric Cheng * if any. 31148275SEric Cheng */ 31158275SEric Cheng for (g = 0; g < cap_rings->mr_gnum; g++) { 31168275SEric Cheng group = groups + g; 31178275SEric Cheng 31188275SEric Cheng /* Prepare basic information of the group */ 31198275SEric Cheng group->mrg_index = g; 31208275SEric Cheng group->mrg_type = rtype; 31218275SEric Cheng group->mrg_state = MAC_GROUP_STATE_UNINIT; 31228275SEric Cheng group->mrg_mh = (mac_handle_t)mip; 31238275SEric Cheng group->mrg_next = group + 1; 31248275SEric Cheng 31258275SEric Cheng /* Zero to reuse the info data structure */ 31268275SEric Cheng bzero(&group_info, sizeof (group_info)); 31278275SEric Cheng 31288275SEric Cheng /* Query group information from driver */ 31298275SEric Cheng cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, 31308275SEric Cheng (mac_group_handle_t)group); 31318275SEric Cheng 31328275SEric Cheng switch (cap_rings->mr_group_type) { 31338275SEric Cheng case MAC_GROUP_TYPE_DYNAMIC: 31348275SEric Cheng if (cap_rings->mr_gaddring == NULL || 31358275SEric Cheng cap_rings->mr_gremring == NULL) { 31368275SEric Cheng DTRACE_PROBE3( 31378275SEric Cheng mac__init__rings_no_addremring, 31388275SEric Cheng char *, mip->mi_name, 31398275SEric Cheng mac_group_add_ring_t, 31408275SEric Cheng cap_rings->mr_gaddring, 31418275SEric Cheng mac_group_add_ring_t, 31428275SEric Cheng cap_rings->mr_gremring); 31438275SEric Cheng err = EINVAL; 31448275SEric Cheng goto bail; 31458275SEric Cheng } 31468275SEric Cheng 31478275SEric Cheng switch (rtype) { 31488275SEric Cheng case MAC_RING_TYPE_RX: 31498275SEric Cheng /* 31508275SEric Cheng * The first RX group must have non-zero 31518275SEric Cheng * rings, and the following groups must 31528275SEric Cheng * have zero rings. 31538275SEric Cheng */ 31548275SEric Cheng if (g == 0 && group_info.mgi_count == 0) { 31558275SEric Cheng DTRACE_PROBE1( 31568275SEric Cheng mac__init__rings__rx__def__zero, 31578275SEric Cheng char *, mip->mi_name); 31588275SEric Cheng err = EINVAL; 31598275SEric Cheng goto bail; 31608275SEric Cheng } 31618275SEric Cheng if (g > 0 && group_info.mgi_count != 0) { 31628275SEric Cheng DTRACE_PROBE3( 31638275SEric Cheng mac__init__rings__rx__nonzero, 31648275SEric Cheng char *, mip->mi_name, 31658275SEric Cheng int, g, int, group_info.mgi_count); 31668275SEric Cheng err = EINVAL; 31678275SEric Cheng goto bail; 31688275SEric Cheng } 31698275SEric Cheng break; 31708275SEric Cheng case MAC_RING_TYPE_TX: 31718275SEric Cheng /* 31728275SEric Cheng * All TX ring groups must have zero rings. 31738275SEric Cheng */ 31748275SEric Cheng if (group_info.mgi_count != 0) { 31758275SEric Cheng DTRACE_PROBE3( 31768275SEric Cheng mac__init__rings__tx__nonzero, 31778275SEric Cheng char *, mip->mi_name, 31788275SEric Cheng int, g, int, group_info.mgi_count); 31798275SEric Cheng err = EINVAL; 31808275SEric Cheng goto bail; 31818275SEric Cheng } 31828275SEric Cheng break; 31838275SEric Cheng } 31848275SEric Cheng break; 31858275SEric Cheng case MAC_GROUP_TYPE_STATIC: 31868275SEric Cheng /* 31878275SEric Cheng * Note that an empty group is allowed, e.g., an aggr 31888275SEric Cheng * would start with an empty group. 31898275SEric Cheng */ 31908275SEric Cheng break; 31918275SEric Cheng default: 31928275SEric Cheng /* unknown group type */ 31938275SEric Cheng DTRACE_PROBE2(mac__init__rings__unknown__type, 31948275SEric Cheng char *, mip->mi_name, 31958275SEric Cheng int, cap_rings->mr_group_type); 31968275SEric Cheng err = EINVAL; 31978275SEric Cheng goto bail; 31988275SEric Cheng } 31998275SEric Cheng 32008275SEric Cheng 32018275SEric Cheng /* 32028275SEric Cheng * Driver must register group->mgi_addmac/remmac() for rx groups 32038275SEric Cheng * to support multiple MAC addresses. 32048275SEric Cheng */ 32058275SEric Cheng if (rtype == MAC_RING_TYPE_RX) { 32068275SEric Cheng if ((group_info.mgi_addmac == NULL) || 32078275SEric Cheng (group_info.mgi_addmac == NULL)) 32088275SEric Cheng goto bail; 32098275SEric Cheng } 32108275SEric Cheng 32118275SEric Cheng /* Cache driver-supplied information */ 32128275SEric Cheng group->mrg_info = group_info; 32138275SEric Cheng 32148275SEric Cheng /* Update the group's status and group count. */ 32158275SEric Cheng mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); 32168275SEric Cheng group_free++; 32178275SEric Cheng 32188275SEric Cheng group->mrg_rings = NULL; 32198275SEric Cheng group->mrg_cur_count = 0; 32208275SEric Cheng mac_init_group(mip, group, group_info.mgi_count, cap_rings); 32218275SEric Cheng ring_left -= group_info.mgi_count; 32228275SEric Cheng 32238275SEric Cheng /* The current group size should be equal to default value */ 32248275SEric Cheng ASSERT(group->mrg_cur_count == group_info.mgi_count); 32258275SEric Cheng } 32268275SEric Cheng 32278275SEric Cheng /* Build up a dummy group for free resources as a pool */ 32288275SEric Cheng group = groups + cap_rings->mr_gnum; 32298275SEric Cheng 32308275SEric Cheng /* Prepare basic information of the group */ 32318275SEric Cheng group->mrg_index = -1; 32328275SEric Cheng group->mrg_type = rtype; 32338275SEric Cheng group->mrg_state = MAC_GROUP_STATE_UNINIT; 32348275SEric Cheng group->mrg_mh = (mac_handle_t)mip; 32358275SEric Cheng group->mrg_next = NULL; 32368275SEric Cheng 32378275SEric Cheng /* 32388275SEric Cheng * If there are ungrouped rings, allocate a continuous buffer for 32398275SEric Cheng * remaining resources. 32408275SEric Cheng */ 32418275SEric Cheng if (ring_left != 0) { 32428275SEric Cheng group->mrg_rings = NULL; 32438275SEric Cheng group->mrg_cur_count = 0; 32448275SEric Cheng mac_init_group(mip, group, ring_left, cap_rings); 32458275SEric Cheng 32468275SEric Cheng /* The current group size should be equal to ring_left */ 32478275SEric Cheng ASSERT(group->mrg_cur_count == ring_left); 32488275SEric Cheng 32498275SEric Cheng ring_left = 0; 32508275SEric Cheng 32518275SEric Cheng /* Update this group's status */ 32528275SEric Cheng mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); 32538275SEric Cheng } else 32548275SEric Cheng group->mrg_rings = NULL; 32558275SEric Cheng 32568275SEric Cheng ASSERT(ring_left == 0); 32578275SEric Cheng 32588275SEric Cheng bail: 32598275SEric Cheng /* Cache other important information to finalize the initialization */ 32608275SEric Cheng switch (rtype) { 32618275SEric Cheng case MAC_RING_TYPE_RX: 32628275SEric Cheng mip->mi_rx_group_type = cap_rings->mr_group_type; 32638275SEric Cheng mip->mi_rx_group_count = cap_rings->mr_gnum; 32648275SEric Cheng mip->mi_rx_groups = groups; 32658275SEric Cheng break; 32668275SEric Cheng case MAC_RING_TYPE_TX: 32678275SEric Cheng mip->mi_tx_group_type = cap_rings->mr_group_type; 32688275SEric Cheng mip->mi_tx_group_count = cap_rings->mr_gnum; 32698275SEric Cheng mip->mi_tx_group_free = group_free; 32708275SEric Cheng mip->mi_tx_groups = groups; 32718275SEric Cheng 32728275SEric Cheng /* 32738275SEric Cheng * Ring 0 is used as the default one and it could be assigned 32748275SEric Cheng * to a client as well. 32758275SEric Cheng */ 32768275SEric Cheng group = groups + cap_rings->mr_gnum; 32778275SEric Cheng ring = group->mrg_rings; 32788275SEric Cheng while ((ring->mr_index != 0) && (ring->mr_next != NULL)) 32798275SEric Cheng ring = ring->mr_next; 32808275SEric Cheng ASSERT(ring->mr_index == 0); 32818275SEric Cheng mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 32828275SEric Cheng break; 32838275SEric Cheng default: 32848275SEric Cheng ASSERT(B_FALSE); 32858275SEric Cheng } 32868275SEric Cheng 32878275SEric Cheng if (err != 0) 32888275SEric Cheng mac_free_rings(mip, rtype); 32898275SEric Cheng 32908275SEric Cheng return (err); 32918275SEric Cheng } 32928275SEric Cheng 32938275SEric Cheng /* 32948275SEric Cheng * Called to free all ring groups with particular type. It's supposed all groups 32958275SEric Cheng * have been released by clinet. 32968275SEric Cheng */ 32978275SEric Cheng void 32988275SEric Cheng mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) 32998275SEric Cheng { 33008275SEric Cheng mac_group_t *group, *groups; 33018275SEric Cheng uint_t group_count; 33028275SEric Cheng 33038275SEric Cheng switch (rtype) { 33048275SEric Cheng case MAC_RING_TYPE_RX: 33058275SEric Cheng if (mip->mi_rx_groups == NULL) 33068275SEric Cheng return; 33078275SEric Cheng 33088275SEric Cheng groups = mip->mi_rx_groups; 33098275SEric Cheng group_count = mip->mi_rx_group_count; 33108275SEric Cheng 33118275SEric Cheng mip->mi_rx_groups = NULL; 33128275SEric Cheng mip->mi_rx_group_count = 0; 33138275SEric Cheng break; 33148275SEric Cheng case MAC_RING_TYPE_TX: 33158275SEric Cheng ASSERT(mip->mi_tx_group_count == mip->mi_tx_group_free); 33168275SEric Cheng 33178275SEric Cheng if (mip->mi_tx_groups == NULL) 33188275SEric Cheng return; 33198275SEric Cheng 33208275SEric Cheng groups = mip->mi_tx_groups; 33218275SEric Cheng group_count = mip->mi_tx_group_count; 33228275SEric Cheng 33238275SEric Cheng mip->mi_tx_groups = NULL; 33248275SEric Cheng mip->mi_tx_group_count = 0; 33258275SEric Cheng mip->mi_tx_group_free = 0; 33268275SEric Cheng mip->mi_default_tx_ring = NULL; 33278275SEric Cheng break; 33288275SEric Cheng default: 33298275SEric Cheng ASSERT(B_FALSE); 33308275SEric Cheng } 33318275SEric Cheng 33328275SEric Cheng for (group = groups; group != NULL; group = group->mrg_next) { 33338275SEric Cheng mac_ring_t *ring; 33348275SEric Cheng 33358275SEric Cheng if (group->mrg_cur_count == 0) 33368275SEric Cheng continue; 33378275SEric Cheng 33388275SEric Cheng ASSERT(group->mrg_rings != NULL); 33398275SEric Cheng 33408275SEric Cheng while ((ring = group->mrg_rings) != NULL) { 33418275SEric Cheng group->mrg_rings = ring->mr_next; 33428275SEric Cheng mac_ring_free(mip, ring); 33438275SEric Cheng } 33448275SEric Cheng } 33458275SEric Cheng 33468275SEric Cheng /* Free all the cached rings */ 33478275SEric Cheng mac_ring_freeall(mip); 33488275SEric Cheng /* Free the block of group data strutures */ 33498275SEric Cheng kmem_free(groups, sizeof (mac_group_t) * (group_count + 1)); 33508275SEric Cheng } 33518275SEric Cheng 33528275SEric Cheng /* 33538275SEric Cheng * Associate a MAC address with a receive group. 33548275SEric Cheng * 33558275SEric Cheng * The return value of this function should always be checked properly, because 33568275SEric Cheng * any type of failure could cause unexpected results. A group can be added 33578275SEric Cheng * or removed with a MAC address only after it has been reserved. Ideally, 33588275SEric Cheng * a successful reservation always leads to calling mac_group_addmac() to 33598275SEric Cheng * steer desired traffic. Failure of adding an unicast MAC address doesn't 33608275SEric Cheng * always imply that the group is functioning abnormally. 33618275SEric Cheng * 33628275SEric Cheng * Currently this function is called everywhere, and it reflects assumptions 33638275SEric Cheng * about MAC addresses in the implementation. CR 6735196. 33648275SEric Cheng */ 33658275SEric Cheng int 33668275SEric Cheng mac_group_addmac(mac_group_t *group, const uint8_t *addr) 33678275SEric Cheng { 33688275SEric Cheng ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 33698275SEric Cheng ASSERT(group->mrg_info.mgi_addmac != NULL); 33708275SEric Cheng 33718275SEric Cheng return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); 33728275SEric Cheng } 33738275SEric Cheng 33748275SEric Cheng /* 33758275SEric Cheng * Remove the association between MAC address and receive group. 33768275SEric Cheng */ 33778275SEric Cheng int 33788275SEric Cheng mac_group_remmac(mac_group_t *group, const uint8_t *addr) 33798275SEric Cheng { 33808275SEric Cheng ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 33818275SEric Cheng ASSERT(group->mrg_info.mgi_remmac != NULL); 33828275SEric Cheng 33838275SEric Cheng return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); 33848275SEric Cheng } 33858275SEric Cheng 33868275SEric Cheng /* 33878275SEric Cheng * Release a ring in use by marking it MR_FREE. 33888275SEric Cheng * Any other client may reserve it for its use. 33898275SEric Cheng */ 33908275SEric Cheng void 33918275SEric Cheng mac_release_tx_ring(mac_ring_handle_t rh) 33928275SEric Cheng { 33938275SEric Cheng mac_ring_t *ring = (mac_ring_t *)rh; 33948275SEric Cheng mac_group_t *group = (mac_group_t *)ring->mr_gh; 33958275SEric Cheng mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 33968275SEric Cheng 33978275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 33988275SEric Cheng ASSERT(ring->mr_state != MR_FREE); 33998275SEric Cheng 34008275SEric Cheng /* 34018275SEric Cheng * Default tx ring will be released by mac_stop(). 34028275SEric Cheng */ 34038275SEric Cheng if (rh == mip->mi_default_tx_ring) 34048275SEric Cheng return; 34058275SEric Cheng 34068275SEric Cheng mac_stop_ring(ring); 34078275SEric Cheng 34088275SEric Cheng ring->mr_state = MR_FREE; 34098275SEric Cheng ring->mr_flag = 0; 34108275SEric Cheng } 34118275SEric Cheng 34128275SEric Cheng /* 34138275SEric Cheng * Send packets through a selected tx ring. 34148275SEric Cheng */ 34158275SEric Cheng mblk_t * 34168275SEric Cheng mac_ring_tx(mac_ring_handle_t rh, mblk_t *mp) 34178275SEric Cheng { 34188275SEric Cheng mac_ring_t *ring = (mac_ring_t *)rh; 34198275SEric Cheng mac_ring_info_t *info = &ring->mr_info; 34208275SEric Cheng 34218275SEric Cheng ASSERT(ring->mr_type == MAC_RING_TYPE_TX); 34228275SEric Cheng ASSERT(ring->mr_state >= MR_INUSE); 34238275SEric Cheng ASSERT(info->mri_tx != NULL); 34248275SEric Cheng 34258275SEric Cheng return (info->mri_tx(info->mri_driver, mp)); 34268275SEric Cheng } 34278275SEric Cheng 34288275SEric Cheng /* 34298275SEric Cheng * Find a ring from its index. 34308275SEric Cheng */ 34318275SEric Cheng mac_ring_t * 34328275SEric Cheng mac_find_ring(mac_group_t *group, int index) 34338275SEric Cheng { 34348275SEric Cheng mac_ring_t *ring = group->mrg_rings; 34358275SEric Cheng 34368275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) 34378275SEric Cheng if (ring->mr_index == index) 34388275SEric Cheng break; 34398275SEric Cheng 34408275SEric Cheng return (ring); 34418275SEric Cheng } 34428275SEric Cheng /* 34438275SEric Cheng * Add a ring to an existing group. 34448275SEric Cheng * 34458275SEric Cheng * The ring must be either passed directly (for example if the ring 34468275SEric Cheng * movement is initiated by the framework), or specified through a driver 34478275SEric Cheng * index (for example when the ring is added by the driver. 34488275SEric Cheng * 34498275SEric Cheng * The caller needs to call mac_perim_enter() before calling this function. 34508275SEric Cheng */ 34518275SEric Cheng int 34528275SEric Cheng i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) 34538275SEric Cheng { 34548275SEric Cheng mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 34558275SEric Cheng mac_capab_rings_t *cap_rings; 34568275SEric Cheng boolean_t driver_call = (ring == NULL); 34578275SEric Cheng mac_group_type_t group_type; 34588275SEric Cheng int ret = 0; 34598275SEric Cheng 34608275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 34618275SEric Cheng 34628275SEric Cheng switch (group->mrg_type) { 34638275SEric Cheng case MAC_RING_TYPE_RX: 34648275SEric Cheng cap_rings = &mip->mi_rx_rings_cap; 34658275SEric Cheng group_type = mip->mi_rx_group_type; 34668275SEric Cheng break; 34678275SEric Cheng case MAC_RING_TYPE_TX: 34688275SEric Cheng cap_rings = &mip->mi_tx_rings_cap; 34698275SEric Cheng group_type = mip->mi_tx_group_type; 34708275SEric Cheng break; 34718275SEric Cheng default: 34728275SEric Cheng ASSERT(B_FALSE); 34738275SEric Cheng } 34748275SEric Cheng 34758275SEric Cheng /* 34768275SEric Cheng * There should be no ring with the same ring index in the target 34778275SEric Cheng * group. 34788275SEric Cheng */ 34798275SEric Cheng ASSERT(mac_find_ring(group, driver_call ? index : ring->mr_index) == 34808275SEric Cheng NULL); 34818275SEric Cheng 34828275SEric Cheng if (driver_call) { 34838275SEric Cheng /* 34848275SEric Cheng * The function is called as a result of a request from 34858275SEric Cheng * a driver to add a ring to an existing group, for example 34868275SEric Cheng * from the aggregation driver. Allocate a new mac_ring_t 34878275SEric Cheng * for that ring. 34888275SEric Cheng */ 34898275SEric Cheng ring = mac_init_ring(mip, group, index, cap_rings); 34908275SEric Cheng ASSERT(group->mrg_state > MAC_GROUP_STATE_UNINIT); 34918275SEric Cheng } else { 34928275SEric Cheng /* 34938275SEric Cheng * The function is called as a result of a MAC layer request 34948275SEric Cheng * to add a ring to an existing group. In this case the 34958275SEric Cheng * ring is being moved between groups, which requires 34968275SEric Cheng * the underlying driver to support dynamic grouping, 34978275SEric Cheng * and the mac_ring_t already exists. 34988275SEric Cheng */ 34998275SEric Cheng ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 35008275SEric Cheng ASSERT(cap_rings->mr_gaddring != NULL); 35018275SEric Cheng ASSERT(ring->mr_gh == NULL); 35028275SEric Cheng } 35038275SEric Cheng 35048275SEric Cheng /* 35058275SEric Cheng * At this point the ring should not be in use, and it should be 35068275SEric Cheng * of the right for the target group. 35078275SEric Cheng */ 35088275SEric Cheng ASSERT(ring->mr_state < MR_INUSE); 35098275SEric Cheng ASSERT(ring->mr_srs == NULL); 35108275SEric Cheng ASSERT(ring->mr_type == group->mrg_type); 35118275SEric Cheng 35128275SEric Cheng if (!driver_call) { 35138275SEric Cheng /* 35148275SEric Cheng * Add the driver level hardware ring if the process was not 35158275SEric Cheng * initiated by the driver, and the target group is not the 35168275SEric Cheng * group. 35178275SEric Cheng */ 35188275SEric Cheng if (group->mrg_driver != NULL) { 35198275SEric Cheng cap_rings->mr_gaddring(group->mrg_driver, 35208275SEric Cheng ring->mr_driver, ring->mr_type); 35218275SEric Cheng } 35228275SEric Cheng 35238275SEric Cheng /* 35248275SEric Cheng * Insert the ring ahead existing rings. 35258275SEric Cheng */ 35268275SEric Cheng ring->mr_next = group->mrg_rings; 35278275SEric Cheng group->mrg_rings = ring; 35288275SEric Cheng ring->mr_gh = (mac_group_handle_t)group; 35298275SEric Cheng group->mrg_cur_count++; 35308275SEric Cheng } 35318275SEric Cheng 35328275SEric Cheng /* 35338275SEric Cheng * If the group has not been actively used, we're done. 35348275SEric Cheng */ 35358275SEric Cheng if (group->mrg_index != -1 && 35368275SEric Cheng group->mrg_state < MAC_GROUP_STATE_RESERVED) 35378275SEric Cheng return (0); 35388275SEric Cheng 35398275SEric Cheng /* 35408275SEric Cheng * Set up SRS/SR according to the ring type. 35418275SEric Cheng */ 35428275SEric Cheng switch (ring->mr_type) { 35438275SEric Cheng case MAC_RING_TYPE_RX: 35448275SEric Cheng /* 35458275SEric Cheng * Setup SRS on top of the new ring if the group is 35468275SEric Cheng * reserved for someones exclusive use. 35478275SEric Cheng */ 35488275SEric Cheng if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { 35498275SEric Cheng flow_entry_t *flent; 35508275SEric Cheng mac_client_impl_t *mcip; 35518275SEric Cheng 35528275SEric Cheng mcip = MAC_RX_GROUP_ONLY_CLIENT(group); 35538275SEric Cheng ASSERT(mcip != NULL); 35548275SEric Cheng flent = mcip->mci_flent; 35558275SEric Cheng ASSERT(flent->fe_rx_srs_cnt > 0); 35568275SEric Cheng mac_srs_group_setup(mcip, flent, group, SRST_LINK); 35578275SEric Cheng } 35588275SEric Cheng break; 35598275SEric Cheng case MAC_RING_TYPE_TX: 35608275SEric Cheng /* 35618275SEric Cheng * For TX this function is only invoked during the 35628275SEric Cheng * initial creation of a group when a share is 35638275SEric Cheng * associated with a MAC client. So the datapath is not 35648275SEric Cheng * yet setup, and will be setup later after the 35658275SEric Cheng * group has been reserved and populated. 35668275SEric Cheng */ 35678275SEric Cheng break; 35688275SEric Cheng default: 35698275SEric Cheng ASSERT(B_FALSE); 35708275SEric Cheng } 35718275SEric Cheng 35728275SEric Cheng /* 35738275SEric Cheng * Start the ring if needed. Failure causes to undo the grouping action. 35748275SEric Cheng */ 35758275SEric Cheng if ((ret = mac_start_ring(ring)) != 0) { 35768275SEric Cheng if (ring->mr_type == MAC_RING_TYPE_RX) { 35778275SEric Cheng if (ring->mr_srs != NULL) { 35788275SEric Cheng mac_rx_srs_remove(ring->mr_srs); 35798275SEric Cheng ring->mr_srs = NULL; 35808275SEric Cheng } 35818275SEric Cheng } 35828275SEric Cheng if (!driver_call) { 35838275SEric Cheng cap_rings->mr_gremring(group->mrg_driver, 35848275SEric Cheng ring->mr_driver, ring->mr_type); 35858275SEric Cheng } 35868275SEric Cheng group->mrg_cur_count--; 35878275SEric Cheng group->mrg_rings = ring->mr_next; 35888275SEric Cheng 35898275SEric Cheng ring->mr_gh = NULL; 35908275SEric Cheng 35918275SEric Cheng if (driver_call) 35928275SEric Cheng mac_ring_free(mip, ring); 35938275SEric Cheng 35948275SEric Cheng return (ret); 35958275SEric Cheng } 35968275SEric Cheng 35978275SEric Cheng /* 35988275SEric Cheng * Update the ring's state. 35998275SEric Cheng */ 36008275SEric Cheng ring->mr_state = MR_INUSE; 36018275SEric Cheng MAC_RING_UNMARK(ring, MR_INCIPIENT); 36028275SEric Cheng return (0); 36038275SEric Cheng } 36048275SEric Cheng 36058275SEric Cheng /* 36068275SEric Cheng * Remove a ring from it's current group. MAC internal function for dynamic 36078275SEric Cheng * grouping. 36088275SEric Cheng * 36098275SEric Cheng * The caller needs to call mac_perim_enter() before calling this function. 36108275SEric Cheng */ 36118275SEric Cheng void 36128275SEric Cheng i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, 36138275SEric Cheng boolean_t driver_call) 36148275SEric Cheng { 36158275SEric Cheng mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 36168275SEric Cheng mac_capab_rings_t *cap_rings = NULL; 36178275SEric Cheng mac_group_type_t group_type; 36188275SEric Cheng 36198275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 36208275SEric Cheng 36218275SEric Cheng ASSERT(mac_find_ring(group, ring->mr_index) == ring); 36228275SEric Cheng ASSERT((mac_group_t *)ring->mr_gh == group); 36238275SEric Cheng ASSERT(ring->mr_type == group->mrg_type); 36248275SEric Cheng 36258275SEric Cheng switch (ring->mr_type) { 36268275SEric Cheng case MAC_RING_TYPE_RX: 36278275SEric Cheng group_type = mip->mi_rx_group_type; 36288275SEric Cheng cap_rings = &mip->mi_rx_rings_cap; 36298275SEric Cheng 36308275SEric Cheng if (group->mrg_state >= MAC_GROUP_STATE_RESERVED) 36318275SEric Cheng mac_stop_ring(ring); 36328275SEric Cheng 36338275SEric Cheng /* 36348275SEric Cheng * Only hardware classified packets hold a reference to the 36358275SEric Cheng * ring all the way up the Rx path. mac_rx_srs_remove() 36368275SEric Cheng * will take care of quiescing the Rx path and removing the 36378275SEric Cheng * SRS. The software classified path neither holds a reference 36388275SEric Cheng * nor any association with the ring in mac_rx. 36398275SEric Cheng */ 36408275SEric Cheng if (ring->mr_srs != NULL) { 36418275SEric Cheng mac_rx_srs_remove(ring->mr_srs); 36428275SEric Cheng ring->mr_srs = NULL; 36438275SEric Cheng } 36448275SEric Cheng ring->mr_state = MR_FREE; 36458275SEric Cheng ring->mr_flag = 0; 36468275SEric Cheng 36478275SEric Cheng break; 36488275SEric Cheng case MAC_RING_TYPE_TX: 36498275SEric Cheng /* 36508275SEric Cheng * For TX this function is only invoked in two 36518275SEric Cheng * cases: 36528275SEric Cheng * 36538275SEric Cheng * 1) In the case of a failure during the 36548275SEric Cheng * initial creation of a group when a share is 36558275SEric Cheng * associated with a MAC client. So the SRS is not 36568275SEric Cheng * yet setup, and will be setup later after the 36578275SEric Cheng * group has been reserved and populated. 36588275SEric Cheng * 36598275SEric Cheng * 2) From mac_release_tx_group() when freeing 36608275SEric Cheng * a TX SRS. 36618275SEric Cheng * 36628275SEric Cheng * In both cases the SRS and its soft rings are 36638275SEric Cheng * already quiesced. 36648275SEric Cheng */ 36658275SEric Cheng ASSERT(!driver_call); 36668275SEric Cheng group_type = mip->mi_tx_group_type; 36678275SEric Cheng cap_rings = &mip->mi_tx_rings_cap; 36688275SEric Cheng break; 36698275SEric Cheng default: 36708275SEric Cheng ASSERT(B_FALSE); 36718275SEric Cheng } 36728275SEric Cheng 36738275SEric Cheng /* 36748275SEric Cheng * Remove the ring from the group. 36758275SEric Cheng */ 36768275SEric Cheng if (ring == group->mrg_rings) 36778275SEric Cheng group->mrg_rings = ring->mr_next; 36788275SEric Cheng else { 36798275SEric Cheng mac_ring_t *pre; 36808275SEric Cheng 36818275SEric Cheng pre = group->mrg_rings; 36828275SEric Cheng while (pre->mr_next != ring) 36838275SEric Cheng pre = pre->mr_next; 36848275SEric Cheng pre->mr_next = ring->mr_next; 36858275SEric Cheng } 36868275SEric Cheng group->mrg_cur_count--; 36878275SEric Cheng 36888275SEric Cheng if (!driver_call) { 36898275SEric Cheng ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 36908275SEric Cheng ASSERT(cap_rings->mr_gremring != NULL); 36918275SEric Cheng 36928275SEric Cheng /* 36938275SEric Cheng * Remove the driver level hardware ring. 36948275SEric Cheng */ 36958275SEric Cheng if (group->mrg_driver != NULL) { 36968275SEric Cheng cap_rings->mr_gremring(group->mrg_driver, 36978275SEric Cheng ring->mr_driver, ring->mr_type); 36988275SEric Cheng } 36998275SEric Cheng } 37008275SEric Cheng 37018275SEric Cheng ring->mr_gh = NULL; 37028275SEric Cheng if (driver_call) { 37038275SEric Cheng mac_ring_free(mip, ring); 37048275SEric Cheng } else { 37058275SEric Cheng ring->mr_state = MR_FREE; 37068275SEric Cheng ring->mr_flag = 0; 37078275SEric Cheng } 37088275SEric Cheng } 37098275SEric Cheng 37108275SEric Cheng /* 37118275SEric Cheng * Move a ring to the target group. If needed, remove the ring from the group 37128275SEric Cheng * that it currently belongs to. 37138275SEric Cheng * 37148275SEric Cheng * The caller need to enter MAC's perimeter by calling mac_perim_enter(). 37158275SEric Cheng */ 37168275SEric Cheng static int 37178275SEric Cheng mac_group_mov_ring(mac_impl_t *mip, mac_group_t *d_group, mac_ring_t *ring) 37188275SEric Cheng { 37198275SEric Cheng mac_group_t *s_group = (mac_group_t *)ring->mr_gh; 37208275SEric Cheng int rv; 37218275SEric Cheng 37228275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 37238275SEric Cheng ASSERT(d_group != NULL); 37248275SEric Cheng ASSERT(s_group->mrg_mh == d_group->mrg_mh); 37258275SEric Cheng 37268275SEric Cheng if (s_group == d_group) 37278275SEric Cheng return (0); 37288275SEric Cheng 37298275SEric Cheng /* 37308275SEric Cheng * Remove it from current group first. 37318275SEric Cheng */ 37328275SEric Cheng if (s_group != NULL) 37338275SEric Cheng i_mac_group_rem_ring(s_group, ring, B_FALSE); 37348275SEric Cheng 37358275SEric Cheng /* 37368275SEric Cheng * Add it to the new group. 37378275SEric Cheng */ 37388275SEric Cheng rv = i_mac_group_add_ring(d_group, ring, 0); 37398275SEric Cheng if (rv != 0) { 37408275SEric Cheng /* 37418275SEric Cheng * Failed to add ring back to source group. If 37428275SEric Cheng * that fails, the ring is stuck in limbo, log message. 37438275SEric Cheng */ 37448275SEric Cheng if (i_mac_group_add_ring(s_group, ring, 0)) { 37458275SEric Cheng cmn_err(CE_WARN, "%s: failed to move ring %p\n", 37468275SEric Cheng mip->mi_name, (void *)ring); 37478275SEric Cheng } 37488275SEric Cheng } 37498275SEric Cheng 37508275SEric Cheng return (rv); 37518275SEric Cheng } 37528275SEric Cheng 37538275SEric Cheng /* 37548275SEric Cheng * Find a MAC address according to its value. 37558275SEric Cheng */ 37568275SEric Cheng mac_address_t * 37578275SEric Cheng mac_find_macaddr(mac_impl_t *mip, uint8_t *mac_addr) 37588275SEric Cheng { 37598275SEric Cheng mac_address_t *map; 37608275SEric Cheng 37618275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 37628275SEric Cheng 37638275SEric Cheng for (map = mip->mi_addresses; map != NULL; map = map->ma_next) { 37648275SEric Cheng if (bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) 37658275SEric Cheng break; 37668275SEric Cheng } 37678275SEric Cheng 37688275SEric Cheng return (map); 37698275SEric Cheng } 37708275SEric Cheng 37718275SEric Cheng /* 37728275SEric Cheng * Check whether the MAC address is shared by multiple clients. 37738275SEric Cheng */ 37748275SEric Cheng boolean_t 37758275SEric Cheng mac_check_macaddr_shared(mac_address_t *map) 37768275SEric Cheng { 37778275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)map->ma_mip)); 37788275SEric Cheng 37798275SEric Cheng return (map->ma_nusers > 1); 37808275SEric Cheng } 37818275SEric Cheng 37828275SEric Cheng /* 37838275SEric Cheng * Remove the specified MAC address from the MAC address list and free it. 37848275SEric Cheng */ 37858275SEric Cheng static void 37868275SEric Cheng mac_free_macaddr(mac_address_t *map) 37878275SEric Cheng { 37888275SEric Cheng mac_impl_t *mip = map->ma_mip; 37898275SEric Cheng 37908275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 37918275SEric Cheng ASSERT(mip->mi_addresses != NULL); 37928275SEric Cheng 37938275SEric Cheng map = mac_find_macaddr(mip, map->ma_addr); 37948275SEric Cheng 37958275SEric Cheng ASSERT(map != NULL); 37968275SEric Cheng ASSERT(map->ma_nusers == 0); 37978275SEric Cheng 37988275SEric Cheng if (map == mip->mi_addresses) { 37998275SEric Cheng mip->mi_addresses = map->ma_next; 38008275SEric Cheng } else { 38018275SEric Cheng mac_address_t *pre; 38028275SEric Cheng 38038275SEric Cheng pre = mip->mi_addresses; 38048275SEric Cheng while (pre->ma_next != map) 38058275SEric Cheng pre = pre->ma_next; 38068275SEric Cheng pre->ma_next = map->ma_next; 38078275SEric Cheng } 38088275SEric Cheng 38098275SEric Cheng kmem_free(map, sizeof (mac_address_t)); 38108275SEric Cheng } 38118275SEric Cheng 38128275SEric Cheng /* 38138275SEric Cheng * Add a MAC address reference for a client. If the desired MAC address 38148275SEric Cheng * exists, add a reference to it. Otherwise, add the new address by adding 38158275SEric Cheng * it to a reserved group or setting promiscuous mode. Won't try different 38168275SEric Cheng * group is the group is non-NULL, so the caller must explictly share 38178275SEric Cheng * default group when needed. 38188275SEric Cheng * 38198275SEric Cheng * Note, the primary MAC address is initialized at registration time, so 38208275SEric Cheng * to add it to default group only need to activate it if its reference 38218275SEric Cheng * count is still zero. Also, some drivers may not have advertised RINGS 38228275SEric Cheng * capability. 38238275SEric Cheng */ 38248275SEric Cheng int 38258400SNicolas.Droux@Sun.COM mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, 38268400SNicolas.Droux@Sun.COM boolean_t use_hw) 38278275SEric Cheng { 38288275SEric Cheng mac_address_t *map; 38298275SEric Cheng int err = 0; 38308275SEric Cheng boolean_t allocated_map = B_FALSE; 38318275SEric Cheng 38328275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 38338275SEric Cheng 38348275SEric Cheng map = mac_find_macaddr(mip, mac_addr); 38358275SEric Cheng 38368275SEric Cheng /* 38378275SEric Cheng * If the new MAC address has not been added. Allocate a new one 38388275SEric Cheng * and set it up. 38398275SEric Cheng */ 38408275SEric Cheng if (map == NULL) { 38418275SEric Cheng map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 38428275SEric Cheng map->ma_len = mip->mi_type->mt_addr_length; 38438275SEric Cheng bcopy(mac_addr, map->ma_addr, map->ma_len); 38448275SEric Cheng map->ma_nusers = 0; 38458275SEric Cheng map->ma_group = group; 38468275SEric Cheng map->ma_mip = mip; 38478275SEric Cheng 38488275SEric Cheng /* add the new MAC address to the head of the address list */ 38498275SEric Cheng map->ma_next = mip->mi_addresses; 38508275SEric Cheng mip->mi_addresses = map; 38518275SEric Cheng 38528275SEric Cheng allocated_map = B_TRUE; 38538275SEric Cheng } 38548275SEric Cheng 38558275SEric Cheng ASSERT(map->ma_group == group); 38568275SEric Cheng 38578275SEric Cheng /* 38588275SEric Cheng * If the MAC address is already in use, simply account for the 38598275SEric Cheng * new client. 38608275SEric Cheng */ 38618275SEric Cheng if (map->ma_nusers++ > 0) 38628275SEric Cheng return (0); 38638275SEric Cheng 38648275SEric Cheng /* 38658275SEric Cheng * Activate this MAC address by adding it to the reserved group. 38668275SEric Cheng */ 38678275SEric Cheng if (group != NULL) { 38688275SEric Cheng err = mac_group_addmac(group, (const uint8_t *)mac_addr); 38698275SEric Cheng if (err == 0) { 38708275SEric Cheng map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 38718275SEric Cheng return (0); 38728275SEric Cheng } 38738275SEric Cheng } 38748275SEric Cheng 38758275SEric Cheng /* 38768400SNicolas.Droux@Sun.COM * The MAC address addition failed. If the client requires a 38778400SNicolas.Droux@Sun.COM * hardware classified MAC address, fail the operation. 38788400SNicolas.Droux@Sun.COM */ 38798400SNicolas.Droux@Sun.COM if (use_hw) { 38808400SNicolas.Droux@Sun.COM err = ENOSPC; 38818400SNicolas.Droux@Sun.COM goto bail; 38828400SNicolas.Droux@Sun.COM } 38838400SNicolas.Droux@Sun.COM 38848400SNicolas.Droux@Sun.COM /* 38858400SNicolas.Droux@Sun.COM * Try promiscuous mode. 38868400SNicolas.Droux@Sun.COM * 38878400SNicolas.Droux@Sun.COM * For drivers that don't advertise RINGS capability, do 38888400SNicolas.Droux@Sun.COM * nothing for the primary address. 38898275SEric Cheng */ 38908400SNicolas.Droux@Sun.COM if ((group == NULL) && 38918400SNicolas.Droux@Sun.COM (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { 38928400SNicolas.Droux@Sun.COM map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 38938400SNicolas.Droux@Sun.COM return (0); 38948400SNicolas.Droux@Sun.COM } 38958400SNicolas.Droux@Sun.COM 38968400SNicolas.Droux@Sun.COM /* 38978400SNicolas.Droux@Sun.COM * Enable promiscuous mode in order to receive traffic 38988400SNicolas.Droux@Sun.COM * to the new MAC address. 38998400SNicolas.Droux@Sun.COM */ 39008400SNicolas.Droux@Sun.COM if ((err = i_mac_promisc_set(mip, B_TRUE, MAC_DEVPROMISC)) == 0) { 39018400SNicolas.Droux@Sun.COM map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; 39028400SNicolas.Droux@Sun.COM return (0); 39038275SEric Cheng } 39048275SEric Cheng 39058275SEric Cheng /* 39068275SEric Cheng * Free the MAC address that could not be added. Don't free 39078275SEric Cheng * a pre-existing address, it could have been the entry 39088275SEric Cheng * for the primary MAC address which was pre-allocated by 39098275SEric Cheng * mac_init_macaddr(), and which must remain on the list. 39108275SEric Cheng */ 39118400SNicolas.Droux@Sun.COM bail: 39128275SEric Cheng map->ma_nusers--; 39138275SEric Cheng if (allocated_map) 39148275SEric Cheng mac_free_macaddr(map); 39158275SEric Cheng return (err); 39168275SEric Cheng } 39178275SEric Cheng 39188275SEric Cheng /* 39198275SEric Cheng * Remove a reference to a MAC address. This may cause to remove the MAC 39208275SEric Cheng * address from an associated group or to turn off promiscuous mode. 39218275SEric Cheng * The caller needs to handle the failure properly. 39228275SEric Cheng */ 39238275SEric Cheng int 39248275SEric Cheng mac_remove_macaddr(mac_address_t *map) 39258275SEric Cheng { 39268275SEric Cheng mac_impl_t *mip = map->ma_mip; 39278275SEric Cheng int err = 0; 39288275SEric Cheng 39298275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 39308275SEric Cheng 39318275SEric Cheng ASSERT(map == mac_find_macaddr(mip, map->ma_addr)); 39328275SEric Cheng 39338275SEric Cheng /* 39348275SEric Cheng * If it's not the last client using this MAC address, only update 39358275SEric Cheng * the MAC clients count. 39368275SEric Cheng */ 39378275SEric Cheng if (--map->ma_nusers > 0) 39388275SEric Cheng return (0); 39398275SEric Cheng 39408275SEric Cheng /* 39418275SEric Cheng * The MAC address is no longer used by any MAC client, so remove 39428275SEric Cheng * it from its associated group, or turn off promiscuous mode 39438275SEric Cheng * if it was enabled for the MAC address. 39448275SEric Cheng */ 39458275SEric Cheng switch (map->ma_type) { 39468275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 39478275SEric Cheng /* 39488275SEric Cheng * Don't free the preset primary address for drivers that 39498275SEric Cheng * don't advertise RINGS capability. 39508275SEric Cheng */ 39518275SEric Cheng if (map->ma_group == NULL) 39528275SEric Cheng return (0); 39538275SEric Cheng 39548275SEric Cheng err = mac_group_remmac(map->ma_group, map->ma_addr); 39558275SEric Cheng break; 39568275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 39578400SNicolas.Droux@Sun.COM err = i_mac_promisc_set(mip, B_FALSE, MAC_DEVPROMISC); 39588275SEric Cheng break; 39598275SEric Cheng default: 39608275SEric Cheng ASSERT(B_FALSE); 39618275SEric Cheng } 39628275SEric Cheng 39638275SEric Cheng if (err != 0) 39648275SEric Cheng return (err); 39658275SEric Cheng 39668275SEric Cheng /* 39678275SEric Cheng * We created MAC address for the primary one at registration, so we 39688275SEric Cheng * won't free it here. mac_fini_macaddr() will take care of it. 39698275SEric Cheng */ 39708275SEric Cheng if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) != 0) 39718275SEric Cheng mac_free_macaddr(map); 39728275SEric Cheng 39738275SEric Cheng return (0); 39748275SEric Cheng } 39758275SEric Cheng 39768275SEric Cheng /* 39778275SEric Cheng * Update an existing MAC address. The caller need to make sure that the new 39788275SEric Cheng * value has not been used. 39798275SEric Cheng */ 39808275SEric Cheng int 39818275SEric Cheng mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) 39828275SEric Cheng { 39838275SEric Cheng mac_impl_t *mip = map->ma_mip; 39848275SEric Cheng int err = 0; 39858275SEric Cheng 39868275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 39878275SEric Cheng ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 39888275SEric Cheng 39898275SEric Cheng switch (map->ma_type) { 39908275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 39918275SEric Cheng /* 39928275SEric Cheng * Update the primary address for drivers that are not 39938275SEric Cheng * RINGS capable. 39948275SEric Cheng */ 39958275SEric Cheng if (map->ma_group == NULL) { 39968275SEric Cheng err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) 39978275SEric Cheng mac_addr); 39988275SEric Cheng if (err != 0) 39998275SEric Cheng return (err); 40008275SEric Cheng break; 40018275SEric Cheng } 40028275SEric Cheng 40038275SEric Cheng /* 40048275SEric Cheng * If this MAC address is not currently in use, 40058275SEric Cheng * simply break out and update the value. 40068275SEric Cheng */ 40078275SEric Cheng if (map->ma_nusers == 0) 40088275SEric Cheng break; 40098275SEric Cheng 40108275SEric Cheng /* 40118275SEric Cheng * Need to replace the MAC address associated with a group. 40128275SEric Cheng */ 40138275SEric Cheng err = mac_group_remmac(map->ma_group, map->ma_addr); 40148275SEric Cheng if (err != 0) 40158275SEric Cheng return (err); 40168275SEric Cheng 40178275SEric Cheng err = mac_group_addmac(map->ma_group, mac_addr); 40188275SEric Cheng 40198275SEric Cheng /* 40208275SEric Cheng * Failure hints hardware error. The MAC layer needs to 40218275SEric Cheng * have error notification facility to handle this. 40228275SEric Cheng * Now, simply try to restore the value. 40238275SEric Cheng */ 40248275SEric Cheng if (err != 0) 40258275SEric Cheng (void) mac_group_addmac(map->ma_group, map->ma_addr); 40268275SEric Cheng 40278275SEric Cheng break; 40288275SEric Cheng case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 40298275SEric Cheng /* 40308275SEric Cheng * Need to do nothing more if in promiscuous mode. 40318275SEric Cheng */ 40328275SEric Cheng break; 40338275SEric Cheng default: 40348275SEric Cheng ASSERT(B_FALSE); 40358275SEric Cheng } 40368275SEric Cheng 40378275SEric Cheng /* 40388275SEric Cheng * Successfully replaced the MAC address. 40398275SEric Cheng */ 40408275SEric Cheng if (err == 0) 40418275SEric Cheng bcopy(mac_addr, map->ma_addr, map->ma_len); 40428275SEric Cheng 40438275SEric Cheng return (err); 40448275SEric Cheng } 40458275SEric Cheng 40468275SEric Cheng /* 40478275SEric Cheng * Freshen the MAC address with new value. Its caller must have updated the 40488275SEric Cheng * hardware MAC address before calling this function. 40498275SEric Cheng * This funcitons is supposed to be used to handle the MAC address change 40508275SEric Cheng * notification from underlying drivers. 40518275SEric Cheng */ 40528275SEric Cheng void 40538275SEric Cheng mac_freshen_macaddr(mac_address_t *map, uint8_t *mac_addr) 40548275SEric Cheng { 40558275SEric Cheng mac_impl_t *mip = map->ma_mip; 40568275SEric Cheng 40578275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 40588275SEric Cheng ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 40598275SEric Cheng 40608275SEric Cheng /* 40618275SEric Cheng * Freshen the MAC address with new value. 40628275SEric Cheng */ 40638275SEric Cheng bcopy(mac_addr, map->ma_addr, map->ma_len); 40648275SEric Cheng bcopy(mac_addr, mip->mi_addr, map->ma_len); 40658275SEric Cheng 40668275SEric Cheng /* 40678275SEric Cheng * Update all MAC clients that share this MAC address. 40688275SEric Cheng */ 40698275SEric Cheng mac_unicast_update_clients(mip, map); 40708275SEric Cheng } 40718275SEric Cheng 40728275SEric Cheng /* 40738275SEric Cheng * Set up the primary MAC address. 40748275SEric Cheng */ 40758275SEric Cheng void 40768275SEric Cheng mac_init_macaddr(mac_impl_t *mip) 40778275SEric Cheng { 40788275SEric Cheng mac_address_t *map; 40798275SEric Cheng 40808275SEric Cheng /* 40818275SEric Cheng * The reference count is initialized to zero, until it's really 40828275SEric Cheng * activated. 40838275SEric Cheng */ 40848275SEric Cheng map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 40858275SEric Cheng map->ma_len = mip->mi_type->mt_addr_length; 40868275SEric Cheng bcopy(mip->mi_addr, map->ma_addr, map->ma_len); 40878275SEric Cheng 40888275SEric Cheng /* 40898275SEric Cheng * If driver advertises RINGS capability, it shouldn't have initialized 40908275SEric Cheng * its primary MAC address. For other drivers, including VNIC, the 40918275SEric Cheng * primary address must work after registration. 40928275SEric Cheng */ 40938275SEric Cheng if (mip->mi_rx_groups == NULL) 40948275SEric Cheng map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 40958275SEric Cheng 40968275SEric Cheng /* 40978275SEric Cheng * The primary MAC address is reserved for default group according 40988275SEric Cheng * to current design. 40998275SEric Cheng */ 41008275SEric Cheng map->ma_group = mip->mi_rx_groups; 41018275SEric Cheng map->ma_mip = mip; 41028275SEric Cheng 41038275SEric Cheng mip->mi_addresses = map; 41048275SEric Cheng } 41058275SEric Cheng 41068275SEric Cheng /* 41078275SEric Cheng * Clean up the primary MAC address. Note, only one primary MAC address 41088275SEric Cheng * is allowed. All other MAC addresses must have been freed appropriately. 41098275SEric Cheng */ 41108275SEric Cheng void 41118275SEric Cheng mac_fini_macaddr(mac_impl_t *mip) 41128275SEric Cheng { 41138275SEric Cheng mac_address_t *map = mip->mi_addresses; 41148275SEric Cheng 4115*8833SVenu.Iyer@Sun.COM if (map == NULL) 4116*8833SVenu.Iyer@Sun.COM return; 4117*8833SVenu.Iyer@Sun.COM 4118*8833SVenu.Iyer@Sun.COM /* 4119*8833SVenu.Iyer@Sun.COM * If mi_addresses is initialized, there should be exactly one 4120*8833SVenu.Iyer@Sun.COM * entry left on the list with no users. 4121*8833SVenu.Iyer@Sun.COM */ 41228275SEric Cheng ASSERT(map->ma_nusers == 0); 41238275SEric Cheng ASSERT(map->ma_next == NULL); 41248275SEric Cheng 41258275SEric Cheng kmem_free(map, sizeof (mac_address_t)); 41268275SEric Cheng mip->mi_addresses = NULL; 41278275SEric Cheng } 41288275SEric Cheng 41298275SEric Cheng /* 41308275SEric Cheng * Logging related functions. 41318275SEric Cheng */ 41328275SEric Cheng 41338275SEric Cheng /* Write the Flow description to the log file */ 41348275SEric Cheng int 41358275SEric Cheng mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) 41368275SEric Cheng { 41378275SEric Cheng flow_desc_t *fdesc; 41388275SEric Cheng mac_resource_props_t *mrp; 41398275SEric Cheng net_desc_t ndesc; 41408275SEric Cheng 41418275SEric Cheng bzero(&ndesc, sizeof (net_desc_t)); 41428275SEric Cheng 41438275SEric Cheng /* 41448275SEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 41458275SEric Cheng * Updates to the fe_flow_desc are done under the fe_lock 41468275SEric Cheng */ 41478275SEric Cheng mutex_enter(&flent->fe_lock); 41488275SEric Cheng fdesc = &flent->fe_flow_desc; 41498275SEric Cheng mrp = &flent->fe_resource_props; 41508275SEric Cheng 41518275SEric Cheng ndesc.nd_name = flent->fe_flow_name; 41528275SEric Cheng ndesc.nd_devname = mcip->mci_name; 41538275SEric Cheng bcopy(fdesc->fd_src_mac, ndesc.nd_ehost, ETHERADDRL); 41548275SEric Cheng bcopy(fdesc->fd_dst_mac, ndesc.nd_edest, ETHERADDRL); 41558275SEric Cheng ndesc.nd_sap = htonl(fdesc->fd_sap); 41568275SEric Cheng ndesc.nd_isv4 = (uint8_t)fdesc->fd_ipversion == IPV4_VERSION; 41578275SEric Cheng ndesc.nd_bw_limit = mrp->mrp_maxbw; 41588275SEric Cheng if (ndesc.nd_isv4) { 41598275SEric Cheng ndesc.nd_saddr[3] = htonl(fdesc->fd_local_addr.s6_addr32[3]); 41608275SEric Cheng ndesc.nd_daddr[3] = htonl(fdesc->fd_remote_addr.s6_addr32[3]); 41618275SEric Cheng } else { 41628275SEric Cheng bcopy(&fdesc->fd_local_addr, ndesc.nd_saddr, IPV6_ADDR_LEN); 41638275SEric Cheng bcopy(&fdesc->fd_remote_addr, ndesc.nd_daddr, IPV6_ADDR_LEN); 41648275SEric Cheng } 41658275SEric Cheng ndesc.nd_sport = htons(fdesc->fd_local_port); 41668275SEric Cheng ndesc.nd_dport = htons(fdesc->fd_remote_port); 41678275SEric Cheng ndesc.nd_protocol = (uint8_t)fdesc->fd_protocol; 41688275SEric Cheng mutex_exit(&flent->fe_lock); 41698275SEric Cheng 41708275SEric Cheng return (exacct_commit_netinfo((void *)&ndesc, EX_NET_FLDESC_REC)); 41718275SEric Cheng } 41728275SEric Cheng 41738275SEric Cheng /* Write the Flow statistics to the log file */ 41748275SEric Cheng int 41758275SEric Cheng mac_write_flow_stats(flow_entry_t *flent) 41768275SEric Cheng { 41778275SEric Cheng flow_stats_t *fl_stats; 41788275SEric Cheng net_stat_t nstat; 41798275SEric Cheng 41808275SEric Cheng fl_stats = &flent->fe_flowstats; 41818275SEric Cheng nstat.ns_name = flent->fe_flow_name; 41828275SEric Cheng nstat.ns_ibytes = fl_stats->fs_rbytes; 41838275SEric Cheng nstat.ns_obytes = fl_stats->fs_obytes; 41848275SEric Cheng nstat.ns_ipackets = fl_stats->fs_ipackets; 41858275SEric Cheng nstat.ns_opackets = fl_stats->fs_opackets; 41868275SEric Cheng nstat.ns_ierrors = fl_stats->fs_ierrors; 41878275SEric Cheng nstat.ns_oerrors = fl_stats->fs_oerrors; 41888275SEric Cheng 41898275SEric Cheng return (exacct_commit_netinfo((void *)&nstat, EX_NET_FLSTAT_REC)); 41908275SEric Cheng } 41918275SEric Cheng 41928275SEric Cheng /* Write the Link Description to the log file */ 41938275SEric Cheng int 41948275SEric Cheng mac_write_link_desc(mac_client_impl_t *mcip) 41958275SEric Cheng { 41968275SEric Cheng net_desc_t ndesc; 41978275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 41988275SEric Cheng 41998275SEric Cheng bzero(&ndesc, sizeof (net_desc_t)); 42008275SEric Cheng 42018275SEric Cheng ndesc.nd_name = mcip->mci_name; 42028275SEric Cheng ndesc.nd_devname = mcip->mci_name; 42038275SEric Cheng ndesc.nd_isv4 = B_TRUE; 42048275SEric Cheng /* 42058275SEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 42068275SEric Cheng * Updates to the fe_flow_desc are done under the fe_lock 42078275SEric Cheng * after removing the flent from the flow table. 42088275SEric Cheng */ 42098275SEric Cheng mutex_enter(&flent->fe_lock); 42108275SEric Cheng bcopy(flent->fe_flow_desc.fd_src_mac, ndesc.nd_ehost, ETHERADDRL); 42118275SEric Cheng mutex_exit(&flent->fe_lock); 42128275SEric Cheng 42138275SEric Cheng return (exacct_commit_netinfo((void *)&ndesc, EX_NET_LNDESC_REC)); 42148275SEric Cheng } 42158275SEric Cheng 42168275SEric Cheng /* Write the Link statistics to the log file */ 42178275SEric Cheng int 42188275SEric Cheng mac_write_link_stats(mac_client_impl_t *mcip) 42198275SEric Cheng { 42208275SEric Cheng net_stat_t nstat; 42218275SEric Cheng 42228275SEric Cheng nstat.ns_name = mcip->mci_name; 42238275SEric Cheng nstat.ns_ibytes = mcip->mci_stat_ibytes; 42248275SEric Cheng nstat.ns_obytes = mcip->mci_stat_obytes; 42258275SEric Cheng nstat.ns_ipackets = mcip->mci_stat_ipackets; 42268275SEric Cheng nstat.ns_opackets = mcip->mci_stat_opackets; 42278275SEric Cheng nstat.ns_ierrors = mcip->mci_stat_ierrors; 42288275SEric Cheng nstat.ns_oerrors = mcip->mci_stat_oerrors; 42298275SEric Cheng 42308275SEric Cheng return (exacct_commit_netinfo((void *)&nstat, EX_NET_LNSTAT_REC)); 42318275SEric Cheng } 42328275SEric Cheng 42338275SEric Cheng /* 42348275SEric Cheng * For a given flow, if the descrition has not been logged before, do it now. 42358275SEric Cheng * If it is a VNIC, then we have collected information about it from the MAC 42368275SEric Cheng * table, so skip it. 42378275SEric Cheng */ 42388275SEric Cheng /*ARGSUSED*/ 42398275SEric Cheng static int 42408275SEric Cheng mac_log_flowinfo(flow_entry_t *flent, void *args) 42418275SEric Cheng { 42428275SEric Cheng mac_client_impl_t *mcip = flent->fe_mcip; 42438275SEric Cheng 42448275SEric Cheng if (mcip == NULL) 42458275SEric Cheng return (0); 42468275SEric Cheng 42478275SEric Cheng /* 42488275SEric Cheng * If the name starts with "vnic", and fe_user_generated is true (to 42498275SEric Cheng * exclude the mcast and active flow entries created implicitly for 42508275SEric Cheng * a vnic, it is a VNIC flow. i.e. vnic1 is a vnic flow, 42518275SEric Cheng * vnic/bge1/mcast1 is not and neither is vnic/bge1/active. 42528275SEric Cheng */ 42538275SEric Cheng if (strncasecmp(flent->fe_flow_name, "vnic", 4) == 0 && 42548275SEric Cheng (flent->fe_type & FLOW_USER) != 0) { 42558275SEric Cheng return (0); 42568275SEric Cheng } 42578275SEric Cheng 42588275SEric Cheng if (!flent->fe_desc_logged) { 42598275SEric Cheng /* 42608275SEric Cheng * We don't return error because we want to continu the 42618275SEric Cheng * walk in case this is the last walk which means we 42628275SEric Cheng * need to reset fe_desc_logged in all the flows. 42638275SEric Cheng */ 42648275SEric Cheng if (mac_write_flow_desc(flent, mcip) != 0) 42658275SEric Cheng return (0); 42668275SEric Cheng flent->fe_desc_logged = B_TRUE; 42678275SEric Cheng } 42688275SEric Cheng 42698275SEric Cheng /* 42708275SEric Cheng * Regardless of the error, we want to proceed in case we have to 42718275SEric Cheng * reset fe_desc_logged. 42728275SEric Cheng */ 42738275SEric Cheng (void) mac_write_flow_stats(flent); 42748275SEric Cheng 42758275SEric Cheng if (mcip != NULL && !(mcip->mci_state_flags & MCIS_DESC_LOGGED)) 42768275SEric Cheng flent->fe_desc_logged = B_FALSE; 42778275SEric Cheng 42788275SEric Cheng return (0); 42798275SEric Cheng } 42808275SEric Cheng 42818275SEric Cheng typedef struct i_mac_log_state_s { 42828275SEric Cheng boolean_t mi_last; 42838275SEric Cheng int mi_fenable; 42848275SEric Cheng int mi_lenable; 42858275SEric Cheng } i_mac_log_state_t; 42868275SEric Cheng 42878275SEric Cheng /* 42888275SEric Cheng * Walk the mac_impl_ts and log the description for each mac client of this mac, 42898275SEric Cheng * if it hasn't already been done. Additionally, log statistics for the link as 42908275SEric Cheng * well. Walk the flow table and log information for each flow as well. 42918275SEric Cheng * If it is the last walk (mci_last), then we turn off mci_desc_logged (and 42928275SEric Cheng * also fe_desc_logged, if flow logging is on) since we want to log the 42938275SEric Cheng * description if and when logging is restarted. 42948275SEric Cheng */ 42958275SEric Cheng /*ARGSUSED*/ 42968275SEric Cheng static uint_t 42978275SEric Cheng i_mac_log_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 42988275SEric Cheng { 42998275SEric Cheng mac_impl_t *mip = (mac_impl_t *)val; 43008275SEric Cheng i_mac_log_state_t *lstate = (i_mac_log_state_t *)arg; 43018275SEric Cheng int ret; 43028275SEric Cheng mac_client_impl_t *mcip; 43038275SEric Cheng 43048275SEric Cheng /* 43058275SEric Cheng * Only walk the client list for NIC and etherstub 43068275SEric Cheng */ 43078275SEric Cheng if ((mip->mi_state_flags & MIS_DISABLED) || 43088275SEric Cheng ((mip->mi_state_flags & MIS_IS_VNIC) && 43098275SEric Cheng (mac_get_lower_mac_handle((mac_handle_t)mip) != NULL))) 43108275SEric Cheng return (MH_WALK_CONTINUE); 43118275SEric Cheng 43128275SEric Cheng for (mcip = mip->mi_clients_list; mcip != NULL; 43138275SEric Cheng mcip = mcip->mci_client_next) { 43148275SEric Cheng if (!MCIP_DATAPATH_SETUP(mcip)) 43158275SEric Cheng continue; 43168275SEric Cheng if (lstate->mi_lenable) { 43178275SEric Cheng if (!(mcip->mci_state_flags & MCIS_DESC_LOGGED)) { 43188275SEric Cheng ret = mac_write_link_desc(mcip); 43198275SEric Cheng if (ret != 0) { 43208275SEric Cheng /* 43218275SEric Cheng * We can't terminate it if this is the last 43228275SEric Cheng * walk, else there might be some links with 43238275SEric Cheng * mi_desc_logged set to true, which means 43248275SEric Cheng * their description won't be logged the next 43258275SEric Cheng * time logging is started (similarly for the 43268275SEric Cheng * flows within such links). We can continue 43278275SEric Cheng * without walking the flow table (i.e. to 43288275SEric Cheng * set fe_desc_logged to false) because we 43298275SEric Cheng * won't have written any flow stuff for this 43308275SEric Cheng * link as we haven't logged the link itself. 43318275SEric Cheng */ 43328275SEric Cheng if (lstate->mi_last) 43338275SEric Cheng return (MH_WALK_CONTINUE); 43348275SEric Cheng else 43358275SEric Cheng return (MH_WALK_TERMINATE); 43368275SEric Cheng } 43378275SEric Cheng mcip->mci_state_flags |= MCIS_DESC_LOGGED; 43388275SEric Cheng } 43398275SEric Cheng } 43408275SEric Cheng 43418275SEric Cheng if (mac_write_link_stats(mcip) != 0 && !lstate->mi_last) 43428275SEric Cheng return (MH_WALK_TERMINATE); 43438275SEric Cheng 43448275SEric Cheng if (lstate->mi_last) 43458275SEric Cheng mcip->mci_state_flags &= ~MCIS_DESC_LOGGED; 43468275SEric Cheng 43478275SEric Cheng if (lstate->mi_fenable) { 43488275SEric Cheng if (mcip->mci_subflow_tab != NULL) { 43498275SEric Cheng (void) mac_flow_walk(mcip->mci_subflow_tab, 43508275SEric Cheng mac_log_flowinfo, mip); 43518275SEric Cheng } 43528275SEric Cheng } 43538275SEric Cheng } 43548275SEric Cheng return (MH_WALK_CONTINUE); 43558275SEric Cheng } 43568275SEric Cheng 43578275SEric Cheng /* 43588275SEric Cheng * The timer thread that runs every mac_logging_interval seconds and logs 43598275SEric Cheng * link and/or flow information. 43608275SEric Cheng */ 43618275SEric Cheng /* ARGSUSED */ 43628275SEric Cheng void 43638275SEric Cheng mac_log_linkinfo(void *arg) 43648275SEric Cheng { 43658275SEric Cheng i_mac_log_state_t lstate; 43668275SEric Cheng 43678275SEric Cheng rw_enter(&i_mac_impl_lock, RW_READER); 43688275SEric Cheng if (!mac_flow_log_enable && !mac_link_log_enable) { 43698275SEric Cheng rw_exit(&i_mac_impl_lock); 43708275SEric Cheng return; 43718275SEric Cheng } 43728275SEric Cheng lstate.mi_fenable = mac_flow_log_enable; 43738275SEric Cheng lstate.mi_lenable = mac_link_log_enable; 43748275SEric Cheng lstate.mi_last = B_FALSE; 43758275SEric Cheng rw_exit(&i_mac_impl_lock); 43768275SEric Cheng 43778275SEric Cheng mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate); 43788275SEric Cheng 43798275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 43808275SEric Cheng if (mac_flow_log_enable || mac_link_log_enable) { 43818275SEric Cheng mac_logging_timer = timeout(mac_log_linkinfo, NULL, 43828275SEric Cheng SEC_TO_TICK(mac_logging_interval)); 43838275SEric Cheng } 43848275SEric Cheng rw_exit(&i_mac_impl_lock); 43858275SEric Cheng } 43868275SEric Cheng 43878275SEric Cheng /* 43888275SEric Cheng * Start the logging timer. 43898275SEric Cheng */ 43908275SEric Cheng void 43918275SEric Cheng mac_start_logusage(mac_logtype_t type, uint_t interval) 43928275SEric Cheng { 43938275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 43948275SEric Cheng switch (type) { 43958275SEric Cheng case MAC_LOGTYPE_FLOW: 43968275SEric Cheng if (mac_flow_log_enable) { 43978275SEric Cheng rw_exit(&i_mac_impl_lock); 43988275SEric Cheng return; 43998275SEric Cheng } 44008275SEric Cheng mac_flow_log_enable = B_TRUE; 44018275SEric Cheng /* FALLTHRU */ 44028275SEric Cheng case MAC_LOGTYPE_LINK: 44038275SEric Cheng if (mac_link_log_enable) { 44048275SEric Cheng rw_exit(&i_mac_impl_lock); 44058275SEric Cheng return; 44068275SEric Cheng } 44078275SEric Cheng mac_link_log_enable = B_TRUE; 44088275SEric Cheng break; 44098275SEric Cheng default: 44108275SEric Cheng ASSERT(0); 44118275SEric Cheng } 44128275SEric Cheng mac_logging_interval = interval; 44138275SEric Cheng rw_exit(&i_mac_impl_lock); 44148275SEric Cheng mac_log_linkinfo(NULL); 44158275SEric Cheng } 44168275SEric Cheng 44178275SEric Cheng /* 44188275SEric Cheng * Stop the logging timer if both Link and Flow logging are turned off. 44198275SEric Cheng */ 44208275SEric Cheng void 44218275SEric Cheng mac_stop_logusage(mac_logtype_t type) 44228275SEric Cheng { 44238275SEric Cheng i_mac_log_state_t lstate; 44248275SEric Cheng 44258275SEric Cheng rw_enter(&i_mac_impl_lock, RW_WRITER); 44268275SEric Cheng lstate.mi_fenable = mac_flow_log_enable; 44278275SEric Cheng lstate.mi_lenable = mac_link_log_enable; 44288275SEric Cheng 44298275SEric Cheng /* Last walk */ 44308275SEric Cheng lstate.mi_last = B_TRUE; 44318275SEric Cheng 44328275SEric Cheng switch (type) { 44338275SEric Cheng case MAC_LOGTYPE_FLOW: 44348275SEric Cheng if (lstate.mi_fenable) { 44358275SEric Cheng ASSERT(mac_link_log_enable); 44368275SEric Cheng mac_flow_log_enable = B_FALSE; 44378275SEric Cheng mac_link_log_enable = B_FALSE; 44388275SEric Cheng break; 44398275SEric Cheng } 44408275SEric Cheng /* FALLTHRU */ 44418275SEric Cheng case MAC_LOGTYPE_LINK: 44428275SEric Cheng if (!lstate.mi_lenable || mac_flow_log_enable) { 44438275SEric Cheng rw_exit(&i_mac_impl_lock); 44448275SEric Cheng return; 44458275SEric Cheng } 44468275SEric Cheng mac_link_log_enable = B_FALSE; 44478275SEric Cheng break; 44488275SEric Cheng default: 44498275SEric Cheng ASSERT(0); 44508275SEric Cheng } 44518275SEric Cheng rw_exit(&i_mac_impl_lock); 44528275SEric Cheng (void) untimeout(mac_logging_timer); 44538275SEric Cheng mac_logging_timer = 0; 44548275SEric Cheng 44558275SEric Cheng /* Last walk */ 44568275SEric Cheng mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate); 44578275SEric Cheng } 44588275SEric Cheng 44598275SEric Cheng /* 44608275SEric Cheng * Walk the rx and tx SRS/SRs for a flow and update the priority value. 44618275SEric Cheng */ 44628275SEric Cheng void 44638275SEric Cheng mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) 44648275SEric Cheng { 44658275SEric Cheng pri_t pri; 44668275SEric Cheng int count; 44678275SEric Cheng mac_soft_ring_set_t *mac_srs; 44688275SEric Cheng 44698275SEric Cheng if (flent->fe_rx_srs_cnt <= 0) 44708275SEric Cheng return; 44718275SEric Cheng 44728275SEric Cheng if (((mac_soft_ring_set_t *)flent->fe_rx_srs[0])->srs_type == 44738275SEric Cheng SRST_FLOW) { 44748275SEric Cheng pri = FLOW_PRIORITY(mcip->mci_min_pri, 44758275SEric Cheng mcip->mci_max_pri, 44768275SEric Cheng flent->fe_resource_props.mrp_priority); 44778275SEric Cheng } else { 44788275SEric Cheng pri = mcip->mci_max_pri; 44798275SEric Cheng } 44808275SEric Cheng 44818275SEric Cheng for (count = 0; count < flent->fe_rx_srs_cnt; count++) { 44828275SEric Cheng mac_srs = flent->fe_rx_srs[count]; 44838275SEric Cheng mac_update_srs_priority(mac_srs, pri); 44848275SEric Cheng } 44858275SEric Cheng /* 44868275SEric Cheng * If we have a Tx SRS, we need to modify all the threads associated 44878275SEric Cheng * with it. 44888275SEric Cheng */ 44898275SEric Cheng if (flent->fe_tx_srs != NULL) 44908275SEric Cheng mac_update_srs_priority(flent->fe_tx_srs, pri); 44918275SEric Cheng } 44928275SEric Cheng 44938275SEric Cheng /* 44948275SEric Cheng * RX and TX rings are reserved according to different semantics depending 44958275SEric Cheng * on the requests from the MAC clients and type of rings: 44968275SEric Cheng * 44978275SEric Cheng * On the Tx side, by default we reserve individual rings, independently from 44988275SEric Cheng * the groups. 44998275SEric Cheng * 45008275SEric Cheng * On the Rx side, the reservation is at the granularity of the group 45018275SEric Cheng * of rings, and used for v12n level 1 only. It has a special case for the 45028275SEric Cheng * primary client. 45038275SEric Cheng * 45048275SEric Cheng * If a share is allocated to a MAC client, we allocate a TX group and an 45058275SEric Cheng * RX group to the client, and assign TX rings and RX rings to these 45068275SEric Cheng * groups according to information gathered from the driver through 45078275SEric Cheng * the share capability. 45088275SEric Cheng * 45098275SEric Cheng * The foreseable evolution of Rx rings will handle v12n level 2 and higher 45108275SEric Cheng * to allocate individual rings out of a group and program the hw classifier 45118275SEric Cheng * based on IP address or higher level criteria. 45128275SEric Cheng */ 45138275SEric Cheng 45148275SEric Cheng /* 45158275SEric Cheng * mac_reserve_tx_ring() 45168275SEric Cheng * Reserve a unused ring by marking it with MR_INUSE state. 45178275SEric Cheng * As reserved, the ring is ready to function. 45188275SEric Cheng * 45198275SEric Cheng * Notes for Hybrid I/O: 45208275SEric Cheng * 45218275SEric Cheng * If a specific ring is needed, it is specified through the desired_ring 45228275SEric Cheng * argument. Otherwise that argument is set to NULL. 45238275SEric Cheng * If the desired ring was previous allocated to another client, this 45248275SEric Cheng * function swaps it with a new ring from the group of unassigned rings. 45258275SEric Cheng */ 45268275SEric Cheng mac_ring_t * 45278275SEric Cheng mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) 45288275SEric Cheng { 45298275SEric Cheng mac_group_t *group; 45308275SEric Cheng mac_ring_t *ring; 45318275SEric Cheng 45328275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 45338275SEric Cheng 45348275SEric Cheng if (mip->mi_tx_groups == NULL) 45358275SEric Cheng return (NULL); 45368275SEric Cheng 45378275SEric Cheng /* 45388275SEric Cheng * Find an available ring and start it before changing its status. 45398275SEric Cheng * The unassigned rings are at the end of the mi_tx_groups 45408275SEric Cheng * array. 45418275SEric Cheng */ 45428275SEric Cheng group = mip->mi_tx_groups + mip->mi_tx_group_count; 45438275SEric Cheng 45448275SEric Cheng for (ring = group->mrg_rings; ring != NULL; 45458275SEric Cheng ring = ring->mr_next) { 45468275SEric Cheng if (desired_ring == NULL) { 45478275SEric Cheng if (ring->mr_state == MR_FREE) 45488275SEric Cheng /* wanted any free ring and found one */ 45498275SEric Cheng break; 45508275SEric Cheng } else { 45518275SEric Cheng mac_ring_t *sring; 45528275SEric Cheng mac_client_impl_t *client; 45538275SEric Cheng mac_soft_ring_set_t *srs; 45548275SEric Cheng 45558275SEric Cheng if (ring != desired_ring) 45568275SEric Cheng /* wants a desired ring but this one ain't it */ 45578275SEric Cheng continue; 45588275SEric Cheng 45598275SEric Cheng if (ring->mr_state == MR_FREE) 45608275SEric Cheng break; 45618275SEric Cheng 45628275SEric Cheng /* 45638275SEric Cheng * Found the desired ring but it's already in use. 45648275SEric Cheng * Swap it with a new ring. 45658275SEric Cheng */ 45668275SEric Cheng 45678275SEric Cheng /* find the client which owns that ring */ 45688275SEric Cheng for (client = mip->mi_clients_list; client != NULL; 45698275SEric Cheng client = client->mci_client_next) { 45708275SEric Cheng srs = MCIP_TX_SRS(client); 45718275SEric Cheng if (srs != NULL && mac_tx_srs_ring_present(srs, 45728275SEric Cheng desired_ring)) { 45738275SEric Cheng /* found our ring */ 45748275SEric Cheng break; 45758275SEric Cheng } 45768275SEric Cheng } 45778400SNicolas.Droux@Sun.COM if (client == NULL) { 45788400SNicolas.Droux@Sun.COM /* 45798400SNicolas.Droux@Sun.COM * The TX ring is in use, but it's not 45808400SNicolas.Droux@Sun.COM * associated with any clients, so it 45818400SNicolas.Droux@Sun.COM * has to be the default ring. In that 45828400SNicolas.Droux@Sun.COM * case we can simply assign a new ring 45838400SNicolas.Droux@Sun.COM * as the default ring, and we're done. 45848400SNicolas.Droux@Sun.COM */ 45858400SNicolas.Droux@Sun.COM ASSERT(mip->mi_default_tx_ring == 45868400SNicolas.Droux@Sun.COM (mac_ring_handle_t)desired_ring); 45878400SNicolas.Droux@Sun.COM 45888400SNicolas.Droux@Sun.COM /* 45898400SNicolas.Droux@Sun.COM * Quiesce all clients on top of 45908400SNicolas.Droux@Sun.COM * the NIC to make sure there are no 45918400SNicolas.Droux@Sun.COM * pending threads still relying on 45928400SNicolas.Droux@Sun.COM * that default ring, for example 45938400SNicolas.Droux@Sun.COM * the multicast path. 45948400SNicolas.Droux@Sun.COM */ 45958400SNicolas.Droux@Sun.COM for (client = mip->mi_clients_list; 45968400SNicolas.Droux@Sun.COM client != NULL; 45978400SNicolas.Droux@Sun.COM client = client->mci_client_next) { 45988400SNicolas.Droux@Sun.COM mac_tx_client_quiesce(client, 45998400SNicolas.Droux@Sun.COM SRS_QUIESCE); 46008400SNicolas.Droux@Sun.COM } 46018400SNicolas.Droux@Sun.COM 46028400SNicolas.Droux@Sun.COM mip->mi_default_tx_ring = (mac_ring_handle_t) 46038400SNicolas.Droux@Sun.COM mac_reserve_tx_ring(mip, NULL); 46048400SNicolas.Droux@Sun.COM 46058400SNicolas.Droux@Sun.COM /* resume the clients */ 46068400SNicolas.Droux@Sun.COM for (client = mip->mi_clients_list; 46078400SNicolas.Droux@Sun.COM client != NULL; 46088400SNicolas.Droux@Sun.COM client = client->mci_client_next) 46098400SNicolas.Droux@Sun.COM mac_tx_client_restart(client); 46108400SNicolas.Droux@Sun.COM 46118400SNicolas.Droux@Sun.COM break; 46128400SNicolas.Droux@Sun.COM } 46138275SEric Cheng 46148275SEric Cheng /* 46158275SEric Cheng * Note that we cannot simply invoke the group 46168275SEric Cheng * add/rem routines since the client doesn't have a 46178275SEric Cheng * TX group. So we need to instead add/remove 46188275SEric Cheng * the rings from the SRS. 46198275SEric Cheng */ 46208275SEric Cheng ASSERT(client->mci_share == NULL); 46218275SEric Cheng 46228275SEric Cheng /* first quiece the client */ 46238275SEric Cheng mac_tx_client_quiesce(client, SRS_QUIESCE); 46248275SEric Cheng 46258275SEric Cheng /* give a new ring to the client... */ 46268275SEric Cheng sring = mac_reserve_tx_ring(mip, NULL); 46278275SEric Cheng if (sring != NULL) { 46288275SEric Cheng /* 46298275SEric Cheng * There are no other available ring 46308275SEric Cheng * on that MAC instance. The client 46318275SEric Cheng * will fallback to the shared TX 46328275SEric Cheng * ring. 46338275SEric Cheng */ 46348275SEric Cheng mac_tx_srs_add_ring(srs, sring); 46358275SEric Cheng } 46368275SEric Cheng 46378275SEric Cheng /* ... in exchange for our desired ring */ 46388275SEric Cheng mac_tx_srs_del_ring(srs, desired_ring); 46398275SEric Cheng 46408275SEric Cheng /* restart the client */ 46418275SEric Cheng mac_tx_client_restart(client); 46428275SEric Cheng 46438400SNicolas.Droux@Sun.COM if (mip->mi_default_tx_ring == 46448400SNicolas.Droux@Sun.COM (mac_ring_handle_t)desired_ring) { 46458400SNicolas.Droux@Sun.COM /* 46468400SNicolas.Droux@Sun.COM * The desired ring is the default ring, 46478400SNicolas.Droux@Sun.COM * and there are one or more clients 46488400SNicolas.Droux@Sun.COM * using that default ring directly. 46498400SNicolas.Droux@Sun.COM */ 46508400SNicolas.Droux@Sun.COM mip->mi_default_tx_ring = 46518400SNicolas.Droux@Sun.COM (mac_ring_handle_t)sring; 46528400SNicolas.Droux@Sun.COM /* 46538400SNicolas.Droux@Sun.COM * Find clients using default ring and 46548400SNicolas.Droux@Sun.COM * swap it with the new default ring. 46558400SNicolas.Droux@Sun.COM */ 46568400SNicolas.Droux@Sun.COM for (client = mip->mi_clients_list; 46578400SNicolas.Droux@Sun.COM client != NULL; 46588400SNicolas.Droux@Sun.COM client = client->mci_client_next) { 46598400SNicolas.Droux@Sun.COM srs = MCIP_TX_SRS(client); 46608400SNicolas.Droux@Sun.COM if (srs != NULL && 46618400SNicolas.Droux@Sun.COM mac_tx_srs_ring_present(srs, 46628400SNicolas.Droux@Sun.COM desired_ring)) { 46638400SNicolas.Droux@Sun.COM /* first quiece the client */ 46648400SNicolas.Droux@Sun.COM mac_tx_client_quiesce(client, 46658400SNicolas.Droux@Sun.COM SRS_QUIESCE); 46668400SNicolas.Droux@Sun.COM 46678400SNicolas.Droux@Sun.COM /* 46688400SNicolas.Droux@Sun.COM * Give it the new default 46698400SNicolas.Droux@Sun.COM * ring, and remove the old 46708400SNicolas.Droux@Sun.COM * one. 46718400SNicolas.Droux@Sun.COM */ 46728400SNicolas.Droux@Sun.COM if (sring != NULL) { 46738400SNicolas.Droux@Sun.COM mac_tx_srs_add_ring(srs, 46748400SNicolas.Droux@Sun.COM sring); 46758400SNicolas.Droux@Sun.COM } 46768400SNicolas.Droux@Sun.COM mac_tx_srs_del_ring(srs, 46778400SNicolas.Droux@Sun.COM desired_ring); 46788400SNicolas.Droux@Sun.COM 46798400SNicolas.Droux@Sun.COM /* restart the client */ 46808400SNicolas.Droux@Sun.COM mac_tx_client_restart(client); 46818400SNicolas.Droux@Sun.COM } 46828400SNicolas.Droux@Sun.COM } 46838400SNicolas.Droux@Sun.COM } 46848275SEric Cheng break; 46858275SEric Cheng } 46868275SEric Cheng } 46878275SEric Cheng 46888275SEric Cheng if (ring != NULL) { 46898275SEric Cheng if (mac_start_ring(ring) != 0) 46908275SEric Cheng return (NULL); 46918275SEric Cheng ring->mr_state = MR_INUSE; 46928275SEric Cheng } 46938275SEric Cheng 46948275SEric Cheng return (ring); 46958275SEric Cheng } 46968275SEric Cheng 46978275SEric Cheng /* 46988275SEric Cheng * Minimum number of rings to leave in the default TX group when allocating 46998275SEric Cheng * rings to new clients. 47008275SEric Cheng */ 47018275SEric Cheng static uint_t mac_min_rx_default_rings = 1; 47028275SEric Cheng 47038275SEric Cheng /* 47048275SEric Cheng * Populate a zero-ring group with rings. If the share is non-NULL, 47058275SEric Cheng * the rings are chosen according to that share. 47068275SEric Cheng * Invoked after allocating a new RX or TX group through 47078275SEric Cheng * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively. 47088275SEric Cheng * Returns zero on success, an errno otherwise. 47098275SEric Cheng */ 47108275SEric Cheng int 47118275SEric Cheng i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, 47128275SEric Cheng mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share) 47138275SEric Cheng { 47148275SEric Cheng mac_ring_t **rings, *tmp_ring[1], *ring; 47158275SEric Cheng uint_t nrings; 47168275SEric Cheng int rv, i, j; 47178275SEric Cheng 47188275SEric Cheng ASSERT(mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC && 47198275SEric Cheng mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); 47208275SEric Cheng ASSERT(new_group->mrg_cur_count == 0); 47218275SEric Cheng 47228275SEric Cheng /* 47238275SEric Cheng * First find the rings to allocate to the group. 47248275SEric Cheng */ 47258275SEric Cheng if (share != NULL) { 47268275SEric Cheng /* get rings through ms_squery() */ 47278275SEric Cheng mip->mi_share_capab.ms_squery(share, ring_type, NULL, &nrings); 47288275SEric Cheng ASSERT(nrings != 0); 47298275SEric Cheng rings = kmem_alloc(nrings * sizeof (mac_ring_handle_t), 47308275SEric Cheng KM_SLEEP); 47318275SEric Cheng mip->mi_share_capab.ms_squery(share, ring_type, 47328275SEric Cheng (mac_ring_handle_t *)rings, &nrings); 47338275SEric Cheng } else { 47348275SEric Cheng /* this function is called for TX only with a share */ 47358275SEric Cheng ASSERT(ring_type == MAC_RING_TYPE_RX); 47368275SEric Cheng /* 47378275SEric Cheng * Pick one ring from default group. 47388275SEric Cheng * 47398275SEric Cheng * for now pick the second ring which requires the first ring 47408275SEric Cheng * at index 0 to stay in the default group, since it is the 47418275SEric Cheng * ring which carries the multicast traffic. 47428275SEric Cheng * We need a better way for a driver to indicate this, 47438275SEric Cheng * for example a per-ring flag. 47448275SEric Cheng */ 47458275SEric Cheng for (ring = src_group->mrg_rings; ring != NULL; 47468275SEric Cheng ring = ring->mr_next) { 47478275SEric Cheng if (ring->mr_index != 0) 47488275SEric Cheng break; 47498275SEric Cheng } 47508275SEric Cheng ASSERT(ring != NULL); 47518275SEric Cheng nrings = 1; 47528275SEric Cheng tmp_ring[0] = ring; 47538275SEric Cheng rings = tmp_ring; 47548275SEric Cheng } 47558275SEric Cheng 47568275SEric Cheng switch (ring_type) { 47578275SEric Cheng case MAC_RING_TYPE_RX: 47588275SEric Cheng if (src_group->mrg_cur_count - nrings < 47598275SEric Cheng mac_min_rx_default_rings) { 47608275SEric Cheng /* we ran out of rings */ 47618275SEric Cheng return (ENOSPC); 47628275SEric Cheng } 47638275SEric Cheng 47648275SEric Cheng /* move receive rings to new group */ 47658275SEric Cheng for (i = 0; i < nrings; i++) { 47668275SEric Cheng rv = mac_group_mov_ring(mip, new_group, rings[i]); 47678275SEric Cheng if (rv != 0) { 47688275SEric Cheng /* move rings back on failure */ 47698275SEric Cheng for (j = 0; j < i; j++) { 47708275SEric Cheng (void) mac_group_mov_ring(mip, 47718275SEric Cheng src_group, rings[j]); 47728275SEric Cheng } 47738275SEric Cheng return (rv); 47748275SEric Cheng } 47758275SEric Cheng } 47768275SEric Cheng break; 47778275SEric Cheng 47788275SEric Cheng case MAC_RING_TYPE_TX: { 47798275SEric Cheng mac_ring_t *tmp_ring; 47808275SEric Cheng 47818275SEric Cheng /* move the TX rings to the new group */ 47828275SEric Cheng ASSERT(src_group == NULL); 47838275SEric Cheng for (i = 0; i < nrings; i++) { 47848275SEric Cheng /* get the desired ring */ 47858275SEric Cheng tmp_ring = mac_reserve_tx_ring(mip, rings[i]); 47868275SEric Cheng ASSERT(tmp_ring == rings[i]); 47878275SEric Cheng rv = mac_group_mov_ring(mip, new_group, rings[i]); 47888275SEric Cheng if (rv != 0) { 47898275SEric Cheng /* cleanup on failure */ 47908275SEric Cheng for (j = 0; j < i; j++) { 47918275SEric Cheng (void) mac_group_mov_ring(mip, 47928275SEric Cheng mip->mi_tx_groups + 47938275SEric Cheng mip->mi_tx_group_count, rings[j]); 47948275SEric Cheng } 47958275SEric Cheng } 47968275SEric Cheng } 47978275SEric Cheng break; 47988275SEric Cheng } 47998275SEric Cheng } 48008275SEric Cheng 48018275SEric Cheng if (share != NULL) { 48028275SEric Cheng /* add group to share */ 48038275SEric Cheng mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); 48048275SEric Cheng /* free temporary array of rings */ 48058275SEric Cheng kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); 48068275SEric Cheng } 48078275SEric Cheng 48088275SEric Cheng return (0); 48098275SEric Cheng } 48108275SEric Cheng 48118275SEric Cheng void 48128275SEric Cheng mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) 48138275SEric Cheng { 48148275SEric Cheng mac_grp_client_t *mgcp; 48158275SEric Cheng 48168275SEric Cheng for (mgcp = grp->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 48178275SEric Cheng if (mgcp->mgc_client == mcip) 48188275SEric Cheng break; 48198275SEric Cheng } 48208275SEric Cheng 48218275SEric Cheng VERIFY(mgcp == NULL); 48228275SEric Cheng 48238275SEric Cheng mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); 48248275SEric Cheng mgcp->mgc_client = mcip; 48258275SEric Cheng mgcp->mgc_next = grp->mrg_clients; 48268275SEric Cheng grp->mrg_clients = mgcp; 48278275SEric Cheng 48288275SEric Cheng } 48298275SEric Cheng 48308275SEric Cheng void 48318275SEric Cheng mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) 48328275SEric Cheng { 48338275SEric Cheng mac_grp_client_t *mgcp, **pprev; 48348275SEric Cheng 48358275SEric Cheng for (pprev = &grp->mrg_clients, mgcp = *pprev; mgcp != NULL; 48368275SEric Cheng pprev = &mgcp->mgc_next, mgcp = *pprev) { 48378275SEric Cheng if (mgcp->mgc_client == mcip) 48388275SEric Cheng break; 48398275SEric Cheng } 48408275SEric Cheng 48418275SEric Cheng ASSERT(mgcp != NULL); 48428275SEric Cheng 48438275SEric Cheng *pprev = mgcp->mgc_next; 48448275SEric Cheng kmem_free(mgcp, sizeof (mac_grp_client_t)); 48458275SEric Cheng } 48468275SEric Cheng 48478275SEric Cheng /* 48488275SEric Cheng * mac_reserve_rx_group() 48498275SEric Cheng * 48508275SEric Cheng * Finds an available group and exclusively reserves it for a client. 48518275SEric Cheng * The group is chosen to suit the flow's resource controls (bandwidth and 48528275SEric Cheng * fanout requirements) and the address type. 48538275SEric Cheng * If the requestor is the pimary MAC then return the group with the 48548275SEric Cheng * largest number of rings, otherwise the default ring when available. 48558275SEric Cheng */ 48568275SEric Cheng mac_group_t * 48578275SEric Cheng mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, 48588275SEric Cheng mac_rx_group_reserve_type_t rtype) 48598275SEric Cheng { 48608275SEric Cheng mac_share_handle_t share = mcip->mci_share; 48618275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 48628275SEric Cheng mac_group_t *grp = NULL; 48638275SEric Cheng int i, start, loopcount; 48648275SEric Cheng int err; 48658275SEric Cheng mac_address_t *map; 48668275SEric Cheng 48678275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 48688275SEric Cheng 48698275SEric Cheng /* Check if a group already has this mac address (case of VLANs) */ 48708275SEric Cheng if ((map = mac_find_macaddr(mip, mac_addr)) != NULL) 48718275SEric Cheng return (map->ma_group); 48728275SEric Cheng 48738275SEric Cheng if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0 || 48748275SEric Cheng rtype == MAC_RX_NO_RESERVE) 48758275SEric Cheng return (NULL); 48768275SEric Cheng 48778275SEric Cheng /* 48788275SEric Cheng * Try to exclusively reserve a RX group. 48798275SEric Cheng * 48808275SEric Cheng * For flows requires SW_RING it always goes to the default group 48818275SEric Cheng * (Until we can explicitely call out default groups (CR 6695600), 48828275SEric Cheng * we assume that the default group is always at position zero); 48838275SEric Cheng * 48848275SEric Cheng * For flows requires HW_DEFAULT_RING (unicast flow of the primary 48858275SEric Cheng * client), try to reserve the default RX group only. 48868275SEric Cheng * 48878275SEric Cheng * For flows requires HW_RING (unicast flow of other clients), try 48888275SEric Cheng * to reserve non-default RX group then the default group. 48898275SEric Cheng */ 48908275SEric Cheng switch (rtype) { 48918275SEric Cheng case MAC_RX_RESERVE_DEFAULT: 48928275SEric Cheng start = 0; 48938275SEric Cheng loopcount = 1; 48948275SEric Cheng break; 48958275SEric Cheng case MAC_RX_RESERVE_NONDEFAULT: 48968275SEric Cheng start = 1; 48978275SEric Cheng loopcount = mip->mi_rx_group_count; 48988275SEric Cheng } 48998275SEric Cheng 49008275SEric Cheng for (i = start; i < start + loopcount; i++) { 49018275SEric Cheng grp = &mip->mi_rx_groups[i % mip->mi_rx_group_count]; 49028275SEric Cheng 49038275SEric Cheng DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, 49048275SEric Cheng int, grp->mrg_index, mac_group_state_t, grp->mrg_state); 49058275SEric Cheng 49068275SEric Cheng /* 49078275SEric Cheng * Check to see whether this mac client is the only client 49088275SEric Cheng * on this RX group. If not, we cannot exclusively reserve 49098275SEric Cheng * this RX group. 49108275SEric Cheng */ 49118275SEric Cheng if (!MAC_RX_GROUP_NO_CLIENT(grp) && 49128275SEric Cheng (MAC_RX_GROUP_ONLY_CLIENT(grp) != mcip)) { 49138275SEric Cheng continue; 49148275SEric Cheng } 49158275SEric Cheng 49168275SEric Cheng /* 49178275SEric Cheng * This group could already be SHARED by other multicast 49188275SEric Cheng * flows on this client. In that case, the group would 49198275SEric Cheng * be shared and has already been started. 49208275SEric Cheng */ 49218275SEric Cheng ASSERT(grp->mrg_state != MAC_GROUP_STATE_UNINIT); 49228275SEric Cheng 49238275SEric Cheng if ((grp->mrg_state == MAC_GROUP_STATE_REGISTERED) && 49248275SEric Cheng (mac_start_group(grp) != 0)) { 49258275SEric Cheng continue; 49268275SEric Cheng } 49278275SEric Cheng 49288275SEric Cheng if ((i % mip->mi_rx_group_count) == 0 || 49298275SEric Cheng mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) { 49308275SEric Cheng break; 49318275SEric Cheng } 49328275SEric Cheng 49338275SEric Cheng ASSERT(grp->mrg_cur_count == 0); 49348275SEric Cheng 49358275SEric Cheng /* 49368275SEric Cheng * Populate the group. Rings should be taken 49378275SEric Cheng * from the default group at position 0 for now. 49388275SEric Cheng */ 49398275SEric Cheng 49408275SEric Cheng err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 49418275SEric Cheng &mip->mi_rx_groups[0], grp, share); 49428275SEric Cheng if (err == 0) 49438275SEric Cheng break; 49448275SEric Cheng 49458275SEric Cheng DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 49468275SEric Cheng mip->mi_name, int, grp->mrg_index, int, err); 49478275SEric Cheng 49488275SEric Cheng /* 49498275SEric Cheng * It's a dynamic group but the grouping operation failed. 49508275SEric Cheng */ 49518275SEric Cheng mac_stop_group(grp); 49528275SEric Cheng } 49538275SEric Cheng 49548275SEric Cheng if (i == start + loopcount) 49558275SEric Cheng return (NULL); 49568275SEric Cheng 49578275SEric Cheng ASSERT(grp != NULL); 49588275SEric Cheng 49598275SEric Cheng DTRACE_PROBE2(rx__group__reserved, 49608275SEric Cheng char *, mip->mi_name, int, grp->mrg_index); 49618275SEric Cheng return (grp); 49628275SEric Cheng } 49638275SEric Cheng 49648275SEric Cheng /* 49658275SEric Cheng * mac_rx_release_group() 49668275SEric Cheng * 49678275SEric Cheng * This is called when there are no clients left for the group. 49688275SEric Cheng * The group is stopped and marked MAC_GROUP_STATE_REGISTERED, 49698275SEric Cheng * and if it is a non default group, the shares are removed and 49708275SEric Cheng * all rings are assigned back to default group. 49718275SEric Cheng */ 49728275SEric Cheng void 49738275SEric Cheng mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) 49748275SEric Cheng { 49758275SEric Cheng mac_impl_t *mip = mcip->mci_mip; 49768275SEric Cheng mac_ring_t *ring; 49778275SEric Cheng 49788275SEric Cheng ASSERT(group != &mip->mi_rx_groups[0]); 49798275SEric Cheng 49808275SEric Cheng /* 49818275SEric Cheng * This is the case where there are no clients left. Any 49828275SEric Cheng * SRS etc on this group have also be quiesced. 49838275SEric Cheng */ 49848275SEric Cheng for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 49858275SEric Cheng if (ring->mr_classify_type == MAC_HW_CLASSIFIER) { 49868275SEric Cheng ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 49878275SEric Cheng /* 49888275SEric Cheng * Remove the SRS associated with the HW ring. 49898275SEric Cheng * As a result, polling will be disabled. 49908275SEric Cheng */ 49918275SEric Cheng ring->mr_srs = NULL; 49928275SEric Cheng } 49938275SEric Cheng ASSERT(ring->mr_state == MR_INUSE); 49948275SEric Cheng mac_stop_ring(ring); 49958275SEric Cheng ring->mr_state = MR_FREE; 49968275SEric Cheng ring->mr_flag = 0; 49978275SEric Cheng } 49988275SEric Cheng 49998275SEric Cheng /* remove group from share */ 50008275SEric Cheng if (mcip->mci_share != NULL) { 50018275SEric Cheng mip->mi_share_capab.ms_sremove(mcip->mci_share, 50028275SEric Cheng group->mrg_driver); 50038275SEric Cheng } 50048275SEric Cheng 50058275SEric Cheng if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 50068275SEric Cheng mac_ring_t *ring; 50078275SEric Cheng 50088275SEric Cheng /* 50098275SEric Cheng * Rings were dynamically allocated to group. 50108275SEric Cheng * Move rings back to default group. 50118275SEric Cheng */ 50128275SEric Cheng while ((ring = group->mrg_rings) != NULL) { 50138275SEric Cheng (void) mac_group_mov_ring(mip, 50148275SEric Cheng &mip->mi_rx_groups[0], ring); 50158275SEric Cheng } 50168275SEric Cheng } 50178275SEric Cheng mac_stop_group(group); 50188275SEric Cheng /* 50198275SEric Cheng * Possible improvement: See if we can assign the group just released 50208275SEric Cheng * to a another client of the mip 50218275SEric Cheng */ 50228275SEric Cheng } 50238275SEric Cheng 50248275SEric Cheng /* 50258275SEric Cheng * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() 50268275SEric Cheng * when a share was allocated to the client. 50278275SEric Cheng */ 50288275SEric Cheng mac_group_t * 50298275SEric Cheng mac_reserve_tx_group(mac_impl_t *mip, mac_share_handle_t share) 50308275SEric Cheng { 50318275SEric Cheng mac_group_t *grp; 50328275SEric Cheng int rv, i; 50338275SEric Cheng 50348275SEric Cheng /* 50358275SEric Cheng * TX groups are currently allocated only to MAC clients 50368275SEric Cheng * which are associated with a share. Since we have a fixed 50378275SEric Cheng * number of share and groups, and we already successfully 50388275SEric Cheng * allocated a share, find an available TX group. 50398275SEric Cheng */ 50408275SEric Cheng ASSERT(share != NULL); 50418275SEric Cheng ASSERT(mip->mi_tx_group_free > 0); 50428275SEric Cheng 50438275SEric Cheng for (i = 0; i < mip->mi_tx_group_count; i++) { 50448275SEric Cheng grp = &mip->mi_tx_groups[i]; 50458275SEric Cheng 50468275SEric Cheng if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || 50478275SEric Cheng (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) 50488275SEric Cheng continue; 50498275SEric Cheng 50508275SEric Cheng rv = mac_start_group(grp); 50518275SEric Cheng ASSERT(rv == 0); 50528275SEric Cheng 50538275SEric Cheng grp->mrg_state = MAC_GROUP_STATE_RESERVED; 50548275SEric Cheng break; 50558275SEric Cheng } 50568275SEric Cheng 50578275SEric Cheng ASSERT(grp != NULL); 50588275SEric Cheng 50598275SEric Cheng /* 50608275SEric Cheng * Populate the group. Rings should be taken from the group 50618275SEric Cheng * of unassigned rings, which is past the array of TX 50628275SEric Cheng * groups adversized by the driver. 50638275SEric Cheng */ 50648275SEric Cheng rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, NULL, 50658275SEric Cheng grp, share); 50668275SEric Cheng if (rv != 0) { 50678275SEric Cheng DTRACE_PROBE3(tx__group__reserve__alloc__rings, 50688275SEric Cheng char *, mip->mi_name, int, grp->mrg_index, int, rv); 50698275SEric Cheng 50708275SEric Cheng mac_stop_group(grp); 50718275SEric Cheng grp->mrg_state = MAC_GROUP_STATE_UNINIT; 50728275SEric Cheng 50738275SEric Cheng return (NULL); 50748275SEric Cheng } 50758275SEric Cheng 50768275SEric Cheng mip->mi_tx_group_free--; 50778275SEric Cheng 50788275SEric Cheng return (grp); 50798275SEric Cheng } 50808275SEric Cheng 50818275SEric Cheng void 50828275SEric Cheng mac_release_tx_group(mac_impl_t *mip, mac_group_t *grp) 50838275SEric Cheng { 50848275SEric Cheng mac_client_impl_t *mcip = grp->mrg_tx_client; 50858275SEric Cheng mac_share_handle_t share = mcip->mci_share; 50868275SEric Cheng mac_ring_t *ring; 50878275SEric Cheng 50888275SEric Cheng ASSERT(mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); 50898275SEric Cheng ASSERT(share != NULL); 50908275SEric Cheng ASSERT(grp->mrg_state == MAC_GROUP_STATE_RESERVED); 50918275SEric Cheng 50928275SEric Cheng mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); 50938275SEric Cheng while ((ring = grp->mrg_rings) != NULL) { 50948275SEric Cheng /* move the ring back to the pool */ 50958275SEric Cheng (void) mac_group_mov_ring(mip, mip->mi_tx_groups + 50968275SEric Cheng mip->mi_tx_group_count, ring); 50978275SEric Cheng } 50988275SEric Cheng mac_stop_group(grp); 50998275SEric Cheng mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); 51008275SEric Cheng grp->mrg_tx_client = NULL; 51018275SEric Cheng mip->mi_tx_group_free++; 51028275SEric Cheng } 51038275SEric Cheng 51048275SEric Cheng /* 51058275SEric Cheng * This is a 1-time control path activity initiated by the client (IP). 51068275SEric Cheng * The mac perimeter protects against other simultaneous control activities, 51078275SEric Cheng * for example an ioctl that attempts to change the degree of fanout and 51088275SEric Cheng * increase or decrease the number of softrings associated with this Tx SRS. 51098275SEric Cheng */ 51108275SEric Cheng static mac_tx_notify_cb_t * 51118275SEric Cheng mac_client_tx_notify_add(mac_client_impl_t *mcip, 51128275SEric Cheng mac_tx_notify_t notify, void *arg) 51138275SEric Cheng { 51148275SEric Cheng mac_cb_info_t *mcbi; 51158275SEric Cheng mac_tx_notify_cb_t *mtnfp; 51168275SEric Cheng 51178275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 51188275SEric Cheng 51198275SEric Cheng mtnfp = kmem_zalloc(sizeof (mac_tx_notify_cb_t), KM_SLEEP); 51208275SEric Cheng mtnfp->mtnf_fn = notify; 51218275SEric Cheng mtnfp->mtnf_arg = arg; 51228275SEric Cheng mtnfp->mtnf_link.mcb_objp = mtnfp; 51238275SEric Cheng mtnfp->mtnf_link.mcb_objsize = sizeof (mac_tx_notify_cb_t); 51248275SEric Cheng mtnfp->mtnf_link.mcb_flags = MCB_TX_NOTIFY_CB_T; 51258275SEric Cheng 51268275SEric Cheng mcbi = &mcip->mci_tx_notify_cb_info; 51278275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 51288275SEric Cheng mac_callback_add(mcbi, &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link); 51298275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 51308275SEric Cheng return (mtnfp); 51318275SEric Cheng } 51328275SEric Cheng 51338275SEric Cheng static void 51348275SEric Cheng mac_client_tx_notify_remove(mac_client_impl_t *mcip, mac_tx_notify_cb_t *mtnfp) 51358275SEric Cheng { 51368275SEric Cheng mac_cb_info_t *mcbi; 51378275SEric Cheng mac_cb_t **cblist; 51388275SEric Cheng 51398275SEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 51408275SEric Cheng 51418275SEric Cheng if (!mac_callback_find(&mcip->mci_tx_notify_cb_info, 51428275SEric Cheng &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link)) { 51438275SEric Cheng cmn_err(CE_WARN, 51448275SEric Cheng "mac_client_tx_notify_remove: callback not " 51458275SEric Cheng "found, mcip 0x%p mtnfp 0x%p", (void *)mcip, (void *)mtnfp); 51468275SEric Cheng return; 51478275SEric Cheng } 51488275SEric Cheng 51498275SEric Cheng mcbi = &mcip->mci_tx_notify_cb_info; 51508275SEric Cheng cblist = &mcip->mci_tx_notify_cb_list; 51518275SEric Cheng mutex_enter(mcbi->mcbi_lockp); 51528275SEric Cheng if (mac_callback_remove(mcbi, cblist, &mtnfp->mtnf_link)) 51538275SEric Cheng kmem_free(mtnfp, sizeof (mac_tx_notify_cb_t)); 51548275SEric Cheng else 51558275SEric Cheng mac_callback_remove_wait(&mcip->mci_tx_notify_cb_info); 51568275SEric Cheng mutex_exit(mcbi->mcbi_lockp); 51578275SEric Cheng } 51588275SEric Cheng 51598275SEric Cheng /* 51608275SEric Cheng * mac_client_tx_notify(): 51618275SEric Cheng * call to add and remove flow control callback routine. 51628275SEric Cheng */ 51638275SEric Cheng mac_tx_notify_handle_t 51648275SEric Cheng mac_client_tx_notify(mac_client_handle_t mch, mac_tx_notify_t callb_func, 51658275SEric Cheng void *ptr) 51668275SEric Cheng { 51678275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 51688275SEric Cheng mac_tx_notify_cb_t *mtnfp = NULL; 51698275SEric Cheng 51708275SEric Cheng i_mac_perim_enter(mcip->mci_mip); 51718275SEric Cheng 51728275SEric Cheng if (callb_func != NULL) { 51738275SEric Cheng /* Add a notify callback */ 51748275SEric Cheng mtnfp = mac_client_tx_notify_add(mcip, callb_func, ptr); 51758275SEric Cheng } else { 51768275SEric Cheng mac_client_tx_notify_remove(mcip, (mac_tx_notify_cb_t *)ptr); 51778275SEric Cheng } 51788275SEric Cheng i_mac_perim_exit(mcip->mci_mip); 51798275SEric Cheng 51808275SEric Cheng return ((mac_tx_notify_handle_t)mtnfp); 51818275SEric Cheng } 5182