18275SEric Cheng /* 28275SEric Cheng * CDDL HEADER START 38275SEric Cheng * 48275SEric Cheng * The contents of this file are subject to the terms of the 58275SEric Cheng * Common Development and Distribution License (the "License"). 68275SEric Cheng * You may not use this file except in compliance with the License. 78275SEric Cheng * 88275SEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 98275SEric Cheng * or http://www.opensolaris.org/os/licensing. 108275SEric Cheng * See the License for the specific language governing permissions 118275SEric Cheng * and limitations under the License. 128275SEric Cheng * 138275SEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 148275SEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 158275SEric Cheng * If applicable, add the following below this CDDL HEADER, with the 168275SEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying 178275SEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner] 188275SEric Cheng * 198275SEric Cheng * CDDL HEADER END 208275SEric Cheng */ 218275SEric Cheng 228275SEric Cheng /* 23*11878SVenu.Iyer@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 248275SEric Cheng * Use is subject to license terms. 258275SEric Cheng */ 268275SEric Cheng 278275SEric Cheng #ifndef _SYS_MAC_SOFT_RING_H 288275SEric Cheng #define _SYS_MAC_SOFT_RING_H 298275SEric Cheng 308275SEric Cheng #ifdef __cplusplus 318275SEric Cheng extern "C" { 328275SEric Cheng #endif 338275SEric Cheng 348275SEric Cheng #include <sys/types.h> 358275SEric Cheng #include <sys/cpuvar.h> 36*11878SVenu.Iyer@Sun.COM #include <sys/cpupart.h> 378275SEric Cheng #include <sys/processor.h> 388275SEric Cheng #include <sys/stream.h> 398275SEric Cheng #include <sys/squeue.h> 408275SEric Cheng #include <sys/dlpi.h> 418275SEric Cheng #include <sys/mac_impl.h> 42*11878SVenu.Iyer@Sun.COM #include <sys/mac_stat.h> 438275SEric Cheng 448275SEric Cheng #define S_RING_NAMELEN 64 458275SEric Cheng 4610616SSebastien.Roy@Sun.COM #define MAX_SR_FANOUT 24 478275SEric Cheng 488275SEric Cheng extern boolean_t mac_soft_ring_enable; 498275SEric Cheng extern boolean_t mac_latency_optimize; 508275SEric Cheng 518275SEric Cheng typedef struct mac_soft_ring_s mac_soft_ring_t; 528275SEric Cheng typedef struct mac_soft_ring_set_s mac_soft_ring_set_t; 538275SEric Cheng 548275SEric Cheng typedef void (*mac_soft_ring_drain_func_t)(mac_soft_ring_t *); 558275SEric Cheng typedef mac_tx_cookie_t (*mac_tx_func_t)(mac_soft_ring_set_t *, mblk_t *, 568275SEric Cheng uintptr_t, uint16_t, mblk_t **); 578275SEric Cheng 588275SEric Cheng 598275SEric Cheng /* Tx notify callback */ 608275SEric Cheng typedef struct mac_tx_notify_cb_s { 618275SEric Cheng mac_cb_t mtnf_link; /* Linked list of callbacks */ 628275SEric Cheng mac_tx_notify_t mtnf_fn; /* The callback function */ 638275SEric Cheng void *mtnf_arg; /* Callback function argument */ 648275SEric Cheng } mac_tx_notify_cb_t; 658275SEric Cheng 668275SEric Cheng struct mac_soft_ring_s { 678275SEric Cheng /* Keep the most used members 64bytes cache aligned */ 688275SEric Cheng kmutex_t s_ring_lock; /* lock before using any member */ 698275SEric Cheng uint16_t s_ring_type; /* processing model of the sq */ 708275SEric Cheng uint16_t s_ring_state; /* state flags and message count */ 718275SEric Cheng int s_ring_count; /* # of mblocks in mac_soft_ring */ 728275SEric Cheng size_t s_ring_size; /* Size of data queued */ 738275SEric Cheng mblk_t *s_ring_first; /* first mblk chain or NULL */ 748275SEric Cheng mblk_t *s_ring_last; /* last mblk chain or NULL */ 758275SEric Cheng 768275SEric Cheng mac_direct_rx_t s_ring_rx_func; 778275SEric Cheng void *s_ring_rx_arg1; 788275SEric Cheng mac_resource_handle_t s_ring_rx_arg2; 798275SEric Cheng 808275SEric Cheng /* 818275SEric Cheng * Threshold after which packets get dropped. 828275SEric Cheng * Is always greater than s_ring_tx_hiwat 838275SEric Cheng */ 848275SEric Cheng int s_ring_tx_max_q_cnt; 858275SEric Cheng /* # of mblocks after which to apply flow control */ 868275SEric Cheng int s_ring_tx_hiwat; 878275SEric Cheng /* # of mblocks after which to relieve flow control */ 888275SEric Cheng int s_ring_tx_lowat; 898275SEric Cheng boolean_t s_ring_tx_woken_up; 908275SEric Cheng uint32_t s_ring_hiwat_cnt; /* times blocked for Tx descs */ 918275SEric Cheng 928275SEric Cheng void *s_ring_tx_arg1; 938275SEric Cheng void *s_ring_tx_arg2; 948275SEric Cheng 958275SEric Cheng /* Tx notify callback */ 968275SEric Cheng mac_cb_info_t s_ring_notify_cb_info; /* cb list info */ 978275SEric Cheng mac_cb_t *s_ring_notify_cb_list; /* The cb list */ 988275SEric Cheng 998275SEric Cheng clock_t s_ring_awaken; /* time async thread was awakened */ 1008275SEric Cheng 1018275SEric Cheng kthread_t *s_ring_run; /* Current thread processing sq */ 1028275SEric Cheng processorid_t s_ring_cpuid; /* processor to bind to */ 1038275SEric Cheng processorid_t s_ring_cpuid_save; /* saved cpuid during offline */ 1048275SEric Cheng kcondvar_t s_ring_async; /* async thread blocks on */ 1058275SEric Cheng clock_t s_ring_wait; /* lbolts to wait after a fill() */ 1068275SEric Cheng timeout_id_t s_ring_tid; /* timer id of pending timeout() */ 1078275SEric Cheng kthread_t *s_ring_worker; /* kernel thread id */ 1088275SEric Cheng char s_ring_name[S_RING_NAMELEN + 1]; 1098275SEric Cheng uint32_t s_ring_total_inpkt; 110*11878SVenu.Iyer@Sun.COM uint32_t s_ring_total_rbytes; 1118275SEric Cheng uint32_t s_ring_drops; 1128275SEric Cheng struct mac_client_impl_s *s_ring_mcip; 1138275SEric Cheng kstat_t *s_ring_ksp; 1148275SEric Cheng 1158275SEric Cheng /* Teardown, poll disable control ops */ 1168275SEric Cheng kcondvar_t s_ring_client_cv; /* Client wait for control op */ 1178275SEric Cheng 1188275SEric Cheng mac_soft_ring_set_t *s_ring_set; /* The SRS this ring belongs to */ 1198275SEric Cheng mac_soft_ring_t *s_ring_next; 1208275SEric Cheng mac_soft_ring_t *s_ring_prev; 1218275SEric Cheng mac_soft_ring_drain_func_t s_ring_drain_func; 122*11878SVenu.Iyer@Sun.COM 123*11878SVenu.Iyer@Sun.COM mac_tx_stats_t s_st_stat; 1248275SEric Cheng }; 1258275SEric Cheng 1268275SEric Cheng typedef void (*mac_srs_drain_proc_t)(mac_soft_ring_set_t *, uint_t); 1278275SEric Cheng 1288275SEric Cheng /* Transmit side Soft Ring Set */ 1298275SEric Cheng typedef struct mac_srs_tx_s { 1308275SEric Cheng /* Members for Tx size processing */ 1318275SEric Cheng uint32_t st_mode; 1328275SEric Cheng mac_tx_func_t st_func; 1338275SEric Cheng void *st_arg1; 1348275SEric Cheng void *st_arg2; 1358275SEric Cheng mac_group_t *st_group; /* TX group for share */ 1368275SEric Cheng boolean_t st_woken_up; 1378275SEric Cheng 1388275SEric Cheng /* 1398275SEric Cheng * st_max_q_cnt is the queue depth threshold to limit 1408275SEric Cheng * outstanding packets on the Tx SRS. Once the limit 1418275SEric Cheng * is reached, Tx SRS will drop packets until the 1428275SEric Cheng * limit goes below the threshold. 1438275SEric Cheng */ 1448275SEric Cheng uint32_t st_max_q_cnt; /* max. outstanding packets */ 1458275SEric Cheng /* 1468275SEric Cheng * st_hiwat is used Tx serializer and bandwidth mode. 1478275SEric Cheng * This is the queue depth threshold upto which 1488275SEric Cheng * packets will get buffered with no flow-control 1498275SEric Cheng * back pressure applied to the caller. Once this 1508275SEric Cheng * threshold is reached, back pressure will be 1518275SEric Cheng * applied to the caller of mac_tx() (mac_tx() starts 1528275SEric Cheng * returning a cookie to indicate a blocked SRS). 1538275SEric Cheng * st_hiwat should always be lesser than or equal to 1548275SEric Cheng * st_max_q_cnt. 1558275SEric Cheng */ 1568275SEric Cheng uint32_t st_hiwat; /* mblk cnt to apply flow control */ 1578275SEric Cheng uint32_t st_lowat; /* mblk cnt to relieve flow control */ 158*11878SVenu.Iyer@Sun.COM uint32_t st_hiwat_cnt; /* times blocked for Tx descs */ 159*11878SVenu.Iyer@Sun.COM mac_tx_stats_t st_stat; 160*11878SVenu.Iyer@Sun.COM mac_capab_aggr_t st_capab_aggr; 1618275SEric Cheng /* 162*11878SVenu.Iyer@Sun.COM * st_soft_rings is used as an array to store aggr Tx soft 163*11878SVenu.Iyer@Sun.COM * rings. When aggr_find_tx_ring() returns a pseudo ring, 164*11878SVenu.Iyer@Sun.COM * the associated soft ring has to be found. st_soft_rings 165*11878SVenu.Iyer@Sun.COM * array stores the soft ring associated with a pseudo Tx 166*11878SVenu.Iyer@Sun.COM * ring and it can be accessed using the pseudo ring 167*11878SVenu.Iyer@Sun.COM * index (mr_index). Note that the ring index is unique 168*11878SVenu.Iyer@Sun.COM * for each ring in a group. 1698275SEric Cheng */ 170*11878SVenu.Iyer@Sun.COM mac_soft_ring_t **st_soft_rings; 1718275SEric Cheng } mac_srs_tx_t; 1728275SEric Cheng 1738275SEric Cheng /* Receive side Soft Ring Set */ 1748275SEric Cheng typedef struct mac_srs_rx_s { 1758275SEric Cheng /* 1768275SEric Cheng * Upcall Function for fanout, Rx processing etc. Perhaps 1778275SEric Cheng * the same 3 members below can be used for Tx 1788275SEric Cheng * processing, but looking around, mac_rx_func_t has 1798275SEric Cheng * proliferated too much into various files at different 1808275SEric Cheng * places. I am leaving the consolidation battle for 1818275SEric Cheng * another day. 1828275SEric Cheng */ 1838275SEric Cheng mac_direct_rx_t sr_func; /* srs_lock */ 1848275SEric Cheng void *sr_arg1; /* srs_lock */ 1858275SEric Cheng mac_resource_handle_t sr_arg2; /* srs_lock */ 1868275SEric Cheng mac_rx_func_t sr_lower_proc; /* Atomically changed */ 1878275SEric Cheng uint32_t sr_poll_pkt_cnt; 1888275SEric Cheng uint32_t sr_poll_thres; 1898275SEric Cheng 1908275SEric Cheng /* mblk cnt to apply flow control */ 1918275SEric Cheng uint32_t sr_hiwat; 1928275SEric Cheng /* mblk cnt to relieve flow control */ 1938275SEric Cheng uint32_t sr_lowat; 194*11878SVenu.Iyer@Sun.COM mac_rx_stats_t sr_stat; 1958275SEric Cheng 1968275SEric Cheng /* Times polling was enabled */ 1978275SEric Cheng uint32_t sr_poll_on; 1988275SEric Cheng /* Times polling was enabled by worker thread */ 1998275SEric Cheng uint32_t sr_worker_poll_on; 2008275SEric Cheng /* Times polling was disabled */ 2018275SEric Cheng uint32_t sr_poll_off; 2028275SEric Cheng /* Poll thread signalled count */ 2038275SEric Cheng uint32_t sr_poll_thr_sig; 2048275SEric Cheng /* Poll thread busy */ 2058275SEric Cheng uint32_t sr_poll_thr_busy; 2068275SEric Cheng /* SRS drains, stays in poll mode but doesn't poll */ 2078275SEric Cheng uint32_t sr_poll_drain_no_poll; 2088275SEric Cheng /* 2098275SEric Cheng * SRS has nothing to do and no packets in H/W but 2108275SEric Cheng * there is a backlog in softrings. SRS stays in 2118275SEric Cheng * poll mode but doesn't do polling. 2128275SEric Cheng */ 2138275SEric Cheng uint32_t sr_poll_no_poll; 2148275SEric Cheng /* Active polling restarted */ 2158275SEric Cheng uint32_t sr_below_hiwat; 2168275SEric Cheng /* Found packets in last poll so try and poll again */ 2178275SEric Cheng uint32_t sr_poll_again; 2188275SEric Cheng /* 2198275SEric Cheng * Packets in queue but poll thread not allowed to process so 2208275SEric Cheng * signal the worker thread. 2218275SEric Cheng */ 2228275SEric Cheng uint32_t sr_poll_sig_worker; 2238275SEric Cheng /* 2248275SEric Cheng * Poll thread has nothing to do and H/W has nothing so 2258275SEric Cheng * reenable the interrupts. 2268275SEric Cheng */ 2278275SEric Cheng uint32_t sr_poll_intr_enable; 2288275SEric Cheng /* 2298275SEric Cheng * Poll thread has nothing to do and worker thread was already 2308275SEric Cheng * running so it can decide to reenable interrupt or poll again. 2318275SEric Cheng */ 2328275SEric Cheng uint32_t sr_poll_goto_sleep; 2338275SEric Cheng /* Worker thread goes back to draining the queue */ 2348275SEric Cheng uint32_t sr_drain_again; 2359820SEric Cheng /* More Packets in queue so signal the poll thread to drain */ 2369820SEric Cheng uint32_t sr_drain_poll_sig; 2378275SEric Cheng /* More Packets in queue so signal the worker thread to drain */ 2389820SEric Cheng uint32_t sr_drain_worker_sig; 2398275SEric Cheng /* Poll thread is already running so worker has nothing to do */ 2408275SEric Cheng uint32_t sr_drain_poll_running; 2418275SEric Cheng /* We have packets already queued so keep polling */ 2428275SEric Cheng uint32_t sr_drain_keep_polling; 2438275SEric Cheng /* Drain is done and interrupts are reenabled */ 2448275SEric Cheng uint32_t sr_drain_finish_intr; 2458275SEric Cheng /* Polling thread needs to schedule worker wakeup */ 2468275SEric Cheng uint32_t sr_poll_worker_wakeup; 2478275SEric Cheng } mac_srs_rx_t; 2488275SEric Cheng 2498275SEric Cheng /* 2508275SEric Cheng * mac_soft_ring_set_s: 2518275SEric Cheng * This is used both for Tx and Rx side. The srs_type identifies Rx or 2528275SEric Cheng * Tx type. 2538275SEric Cheng * 2548275SEric Cheng * Note that the structure is carefully crafted, with Rx elements coming 2558275SEric Cheng * first followed by Tx specific members. Future additions to this 2568275SEric Cheng * structure should follow the same guidelines. 2578275SEric Cheng * 2588275SEric Cheng * Rx-side notes: 2598275SEric Cheng * mac_rx_classify_flow_add() always creates a mac_soft_ring_set_t and fn_flow 2608275SEric Cheng * points to info from it (func = srs_lower_proc, arg = soft_ring_set). On 2618275SEric Cheng * interrupt path, srs_lower_proc does B/W adjustment and switch to polling mode 2628275SEric Cheng * (if poll capable) and feeds the packets to soft_ring_list via choosen 2638275SEric Cheng * fanout type (specified by srs_type). In poll mode, the poll thread which is 2648275SEric Cheng * also a pointer can pick up the packets and feed them to various 2658275SEric Cheng * soft_ring_list. 2668275SEric Cheng * 2678275SEric Cheng * The srs_type can either be protocol based or fanout based where fanout itelf 2688275SEric Cheng * can be various types 2698275SEric Cheng * 2708275SEric Cheng * The polling works by turning off interrupts as soon as a packets 2718275SEric Cheng * are queued on the soft ring set. Once the backlog is clear and poll 2728275SEric Cheng * thread return empty handed i.e. Rx ring doesn't have anything, the 2738275SEric Cheng * interrupt is turned back on. For this purpose we keep a separate 2748275SEric Cheng * srs_poll_pkt_cnt counter which tracks the packets queued between SRS 2758275SEric Cheng * and the soft rings as well. The counter is incremented when packets 2768275SEric Cheng * are queued and decremented when SRS processes them (in case it has 2778275SEric Cheng * no soft rings) or the soft ring process them. Its important that 2788275SEric Cheng * in case SRS has softrings, the decrement doesn't happen till the 2798275SEric Cheng * packet is processed by the soft rings since it takes very little time 2808275SEric Cheng * for SRS to queue packet from SRS to soft rings and it will keep 2818275SEric Cheng * bringing more packets in the system faster than soft rings can 2828275SEric Cheng * process them. 2838275SEric Cheng * 2848275SEric Cheng * Tx side notes: 2858275SEric Cheng * The srs structure acts as a serializer with a worker thread. The 2868275SEric Cheng * default behavior of srs though is to act as a pass-thru. The queues 2878275SEric Cheng * (srs_first, srs_last, srs_count) get used when Tx ring runs out of Tx 2888275SEric Cheng * descriptors or to enforce bandwidth limits. 2898275SEric Cheng * 2908275SEric Cheng * When multiple Tx rings are present, the SRS state will be set to 2918275SEric Cheng * SRS_FANOUT_OTH. Outgoing packets coming into mac_tx_srs_process() 2928275SEric Cheng * function will be fanned out to one of the Tx side soft rings based on 2938275SEric Cheng * a hint passed in mac_tx_srs_process(). Each soft ring, in turn, will 2948275SEric Cheng * be associated with a distinct h/w Tx ring. 2958275SEric Cheng */ 2968275SEric Cheng 2978275SEric Cheng struct mac_soft_ring_set_s { 2988275SEric Cheng /* 2998275SEric Cheng * Common elements, common to both Rx and Tx SRS type. 3008275SEric Cheng * The following block of fields are protected by srs_lock 3018275SEric Cheng */ 3028275SEric Cheng kmutex_t srs_lock; 3038275SEric Cheng uint32_t srs_type; 3048275SEric Cheng uint32_t srs_state; /* state flags */ 3058275SEric Cheng uint32_t srs_count; 3068275SEric Cheng mblk_t *srs_first; /* first mblk chain or NULL */ 3078275SEric Cheng mblk_t *srs_last; /* last mblk chain or NULL */ 3088275SEric Cheng kcondvar_t srs_async; /* cv for worker thread */ 3098275SEric Cheng kcondvar_t srs_cv; /* cv for poll thread */ 3108275SEric Cheng kcondvar_t srs_quiesce_done_cv; /* cv for removal */ 3118275SEric Cheng timeout_id_t srs_tid; /* timeout id for pending timeout */ 3128275SEric Cheng 3138275SEric Cheng /* 3148275SEric Cheng * List of soft rings & processing function. 3158275SEric Cheng * The following block is protected by Rx quiescence. 3168275SEric Cheng * i.e. they can be changed only after quiescing the SRS 3178275SEric Cheng * Protected by srs_lock. 3188275SEric Cheng */ 3198275SEric Cheng mac_soft_ring_t *srs_soft_ring_head; 3208275SEric Cheng mac_soft_ring_t *srs_soft_ring_tail; 3218275SEric Cheng int srs_soft_ring_count; 3228275SEric Cheng int srs_soft_ring_quiesced_count; 3238275SEric Cheng int srs_soft_ring_condemned_count; 3248275SEric Cheng mac_soft_ring_t **srs_tcp_soft_rings; 3258275SEric Cheng int srs_tcp_ring_count; 3268275SEric Cheng mac_soft_ring_t **srs_udp_soft_rings; 3278275SEric Cheng int srs_udp_ring_count; 328*11878SVenu.Iyer@Sun.COM mac_soft_ring_t **srs_oth_soft_rings; 329*11878SVenu.Iyer@Sun.COM int srs_oth_ring_count; 3308275SEric Cheng /* 331*11878SVenu.Iyer@Sun.COM * srs_tx_soft_rings is used by tx_srs in 3328275SEric Cheng * when operating in multi tx ring mode. 3338275SEric Cheng */ 334*11878SVenu.Iyer@Sun.COM mac_soft_ring_t **srs_tx_soft_rings; 335*11878SVenu.Iyer@Sun.COM int srs_tx_ring_count; 3368275SEric Cheng 3378275SEric Cheng /* 3388275SEric Cheng * Bandwidth control related members. 3398275SEric Cheng * They are common to both Rx- and Tx-side. 3408275SEric Cheng * Following protected by srs_lock 3418275SEric Cheng */ 3428275SEric Cheng mac_bw_ctl_t *srs_bw; 3438275SEric Cheng size_t srs_size; /* Size of packets queued in bytes */ 3448275SEric Cheng pri_t srs_pri; 3458275SEric Cheng 3468275SEric Cheng mac_soft_ring_set_t *srs_next; /* mac_srs_g_lock */ 3478275SEric Cheng mac_soft_ring_set_t *srs_prev; /* mac_srs_g_lock */ 3488275SEric Cheng 3498275SEric Cheng /* Attribute specific drain func (BW ctl vs non-BW ctl) */ 3508275SEric Cheng mac_srs_drain_proc_t srs_drain_func; /* Write once (WO) */ 3518275SEric Cheng 3528275SEric Cheng /* 3538275SEric Cheng * If the associated ring is exclusively used by a mac client, e.g., 3548275SEric Cheng * an aggregation, this fields is used to keep a reference to the 3558275SEric Cheng * MAC client's pseudo ring. 3568275SEric Cheng */ 3578275SEric Cheng mac_resource_handle_t srs_mrh; 3588275SEric Cheng /* 3598275SEric Cheng * The following blocks are write once (WO) and valid for the life 3608275SEric Cheng * of the SRS 3618275SEric Cheng */ 3628275SEric Cheng struct mac_client_impl_s *srs_mcip; /* back ptr to mac client */ 3638275SEric Cheng void *srs_flent; /* back ptr to flent */ 3648275SEric Cheng mac_ring_t *srs_ring; /* Ring Descriptor */ 3658275SEric Cheng 3668275SEric Cheng /* Teardown, disable control ops */ 3678275SEric Cheng kcondvar_t srs_client_cv; /* Client wait for the control op */ 3688275SEric Cheng 3698275SEric Cheng kthread_t *srs_worker; /* WO, worker thread */ 3708275SEric Cheng kthread_t *srs_poll_thr; /* WO, poll thread */ 3718275SEric Cheng 3728275SEric Cheng uint_t srs_ind; /* Round Robin indx for picking up SR */ 3738275SEric Cheng processorid_t srs_worker_cpuid; /* processor to bind to */ 3748275SEric Cheng processorid_t srs_worker_cpuid_save; /* saved cpuid during offline */ 3758275SEric Cheng processorid_t srs_poll_cpuid; /* processor to bind to */ 3768275SEric Cheng processorid_t srs_poll_cpuid_save; /* saved cpuid during offline */ 3778275SEric Cheng uint_t srs_fanout_state; 3788275SEric Cheng mac_cpus_t srs_cpu; 3798275SEric Cheng 3808275SEric Cheng mac_srs_rx_t srs_rx; 3818275SEric Cheng mac_srs_tx_t srs_tx; 382*11878SVenu.Iyer@Sun.COM kstat_t *srs_ksp; 3838275SEric Cheng }; 3848275SEric Cheng 3858275SEric Cheng /* 3868275SEric Cheng * type flags - combination allowed to process and drain the queue 3878275SEric Cheng */ 3888275SEric Cheng #define ST_RING_WORKER_ONLY 0x0001 /* Worker thread only */ 3898275SEric Cheng #define ST_RING_ANY 0x0002 /* Any thread can process the queue */ 3908275SEric Cheng #define ST_RING_TCP 0x0004 3918275SEric Cheng #define ST_RING_UDP 0x0008 3928275SEric Cheng #define ST_RING_OTH 0x0010 3938275SEric Cheng 3948275SEric Cheng #define ST_RING_BW_CTL 0x0020 3958275SEric Cheng #define ST_RING_TX 0x0040 3968275SEric Cheng 3978275SEric Cheng /* 3988275SEric Cheng * State flags. 3998275SEric Cheng */ 4008275SEric Cheng #define S_RING_PROC 0x0001 /* being processed */ 4018275SEric Cheng #define S_RING_BOUND 0x0002 /* Worker thread is bound to a cpu */ 4028275SEric Cheng #define S_RING_BLOCK 0x0004 /* No Tx descs */ 4038275SEric Cheng #define S_RING_TX_HIWAT 0x0008 /* Tx high watermark reached */ 4048275SEric Cheng 4058275SEric Cheng #define S_RING_WAKEUP_CLIENT 0x0010 /* flow ctrl, client wakeup needed */ 4068275SEric Cheng #define S_RING_BLANK 0x0020 /* Has been put into polling mode */ 4078275SEric Cheng #define S_RING_CLIENT_WAIT 0x0040 /* Client waiting for control op */ 4088275SEric Cheng 4098275SEric Cheng #define S_RING_CONDEMNED 0x0100 /* Being torn down */ 4108275SEric Cheng #define S_RING_CONDEMNED_DONE 0x0200 /* Being torn down */ 4118275SEric Cheng #define S_RING_QUIESCE 0x0400 /* No traffic flow, transient flag */ 4128275SEric Cheng #define S_RING_QUIESCE_DONE 0x0800 /* No traffic flow, transient flag */ 4138275SEric Cheng 4148275SEric Cheng #define S_RING_RESTART 0x1000 /* Go back to normal traffic flow */ 4158275SEric Cheng #define S_RING_ENQUEUED 0x2000 /* Pkts enqueued in Tx soft ring */ 4168275SEric Cheng 4178275SEric Cheng /* 4188275SEric Cheng * arguments for processors to bind to 4198275SEric Cheng */ 4208275SEric Cheng #define S_RING_BIND_NONE -1 4218275SEric Cheng 4228275SEric Cheng /* 4238275SEric Cheng * defines for srs_type - identifies a link or a sub-flow 4248275SEric Cheng * and other static characteristics of a SRS like a tx 4258275SEric Cheng * srs, tcp only srs, etc. 4268275SEric Cheng */ 4278275SEric Cheng #define SRST_LINK 0x00000001 4288275SEric Cheng #define SRST_FLOW 0x00000002 4298275SEric Cheng #define SRST_NO_SOFT_RINGS 0x00000004 4308275SEric Cheng #define SRST_TCP_ONLY 0x00000008 4318275SEric Cheng 4328275SEric Cheng #define SRST_FANOUT_PROTO 0x00000010 4338275SEric Cheng #define SRST_FANOUT_SRC_IP 0x00000020 4348275SEric Cheng #define SRST_FANOUT_OTH 0x00000040 4358275SEric Cheng #define SRST_DEFAULT_GRP 0x00000080 4368275SEric Cheng 4378275SEric Cheng #define SRST_TX 0x00000100 4388275SEric Cheng #define SRST_BW_CONTROL 0x00000200 4398275SEric Cheng #define SRST_DIRECT_POLL 0x00000400 4408275SEric Cheng 4418275SEric Cheng #define SRST_DLS_BYPASS 0x00001000 4428275SEric Cheng #define SRST_CLIENT_POLL_ENABLED 0x00002000 4438275SEric Cheng 4448275SEric Cheng /* 4458275SEric Cheng * soft ring set flags. These bits are dynamic in nature and get 4468275SEric Cheng * applied to srs_state. They reflect the state of SRS at any 4478275SEric Cheng * point of time 4488275SEric Cheng */ 4498275SEric Cheng #define SRS_BLANK 0x00000001 4508275SEric Cheng #define SRS_WORKER_BOUND 0x00000002 4518275SEric Cheng #define SRS_POLL_BOUND 0x00000004 4528275SEric Cheng #define SRS_POLLING_CAPAB 0x00000008 4538275SEric Cheng 4548275SEric Cheng #define SRS_PROC 0x00000010 4558275SEric Cheng #define SRS_GET_PKTS 0x00000020 4568275SEric Cheng #define SRS_POLLING 0x00000040 4578275SEric Cheng #define SRS_BW_ENFORCED 0x00000080 4588275SEric Cheng 4598275SEric Cheng #define SRS_WORKER 0x00000100 4608275SEric Cheng #define SRS_ENQUEUED 0x00000200 4618275SEric Cheng #define SRS_ANY_PROCESS 0x00000400 4628275SEric Cheng #define SRS_PROC_FAST 0x00000800 4638275SEric Cheng 4648275SEric Cheng #define SRS_POLL_PROC 0x00001000 4658275SEric Cheng #define SRS_TX_BLOCKED 0x00002000 /* out of Tx descs */ 4668275SEric Cheng #define SRS_TX_HIWAT 0x00004000 /* Tx count exceeds hiwat */ 4678275SEric Cheng #define SRS_TX_WAKEUP_CLIENT 0x00008000 /* Flow-ctl: wakeup client */ 4688275SEric Cheng 4698275SEric Cheng #define SRS_CLIENT_PROC 0x00010000 4708275SEric Cheng #define SRS_CLIENT_WAIT 0x00020000 4718275SEric Cheng #define SRS_QUIESCE 0x00040000 4728275SEric Cheng #define SRS_QUIESCE_DONE 0x00080000 4738275SEric Cheng 4748275SEric Cheng #define SRS_CONDEMNED 0x00100000 4758275SEric Cheng #define SRS_CONDEMNED_DONE 0x00200000 4768275SEric Cheng #define SRS_POLL_THR_QUIESCED 0x00400000 4778275SEric Cheng #define SRS_RESTART 0x00800000 4788275SEric Cheng 4798275SEric Cheng #define SRS_RESTART_DONE 0x01000000 4808275SEric Cheng #define SRS_POLL_THR_RESTART 0x02000000 4818275SEric Cheng #define SRS_IN_GLIST 0x04000000 4828275SEric Cheng #define SRS_POLL_THR_EXITED 0x08000000 4838275SEric Cheng 4848275SEric Cheng #define SRS_QUIESCE_PERM 0x10000000 4858275SEric Cheng #define SRS_LATENCY_OPT 0x20000000 4868833SVenu.Iyer@Sun.COM #define SRS_SOFTRING_QUEUE 0x40000000 4878275SEric Cheng 4888275SEric Cheng #define SRS_QUIESCED(srs) (srs->srs_state & SRS_QUIESCE_DONE) 4898275SEric Cheng 4908275SEric Cheng /* 4918275SEric Cheng * If the SRS_QUIESCE_PERM flag is set, the SRS worker thread will not be 4928275SEric Cheng * able to be restarted. 4938275SEric Cheng */ 4948275SEric Cheng #define SRS_QUIESCED_PERMANENT(srs) (srs->srs_state & SRS_QUIESCE_PERM) 4958275SEric Cheng 4968275SEric Cheng /* 4978275SEric Cheng * soft ring set (SRS) Tx modes 4988275SEric Cheng */ 4998275SEric Cheng typedef enum { 5008275SEric Cheng SRS_TX_DEFAULT = 0, 5018275SEric Cheng SRS_TX_SERIALIZE, 5028275SEric Cheng SRS_TX_FANOUT, 5038275SEric Cheng SRS_TX_BW, 504*11878SVenu.Iyer@Sun.COM SRS_TX_BW_FANOUT, 505*11878SVenu.Iyer@Sun.COM SRS_TX_AGGR, 506*11878SVenu.Iyer@Sun.COM SRS_TX_BW_AGGR 5078275SEric Cheng } mac_tx_srs_mode_t; 5088275SEric Cheng 5098275SEric Cheng /* 5108275SEric Cheng * SRS fanout states 5118275SEric Cheng */ 5128275SEric Cheng typedef enum { 5138275SEric Cheng SRS_FANOUT_UNINIT = 0, 5148275SEric Cheng SRS_FANOUT_INIT, 5158275SEric Cheng SRS_FANOUT_REINIT 5168275SEric Cheng } mac_srs_fanout_state_t; 5178275SEric Cheng 5188275SEric Cheng /* 5198275SEric Cheng * Structure for dls statistics 5208275SEric Cheng */ 5218275SEric Cheng struct dls_kstats { 5228275SEric Cheng kstat_named_t dlss_soft_ring_pkt_drop; 5238275SEric Cheng }; 5248275SEric Cheng 5258275SEric Cheng extern struct dls_kstats dls_kstat; 5268275SEric Cheng 5278275SEric Cheng #define DLS_BUMP_STAT(x, y) (dls_kstat.x.value.ui32 += y) 5288275SEric Cheng 5298275SEric Cheng /* Turn dynamic polling off */ 5308275SEric Cheng #define MAC_SRS_POLLING_OFF(mac_srs) { \ 5318275SEric Cheng ASSERT(MUTEX_HELD(&(mac_srs)->srs_lock)); \ 5328275SEric Cheng if (((mac_srs)->srs_state & (SRS_POLLING_CAPAB|SRS_POLLING)) == \ 5338275SEric Cheng (SRS_POLLING_CAPAB|SRS_POLLING)) { \ 5348275SEric Cheng (mac_srs)->srs_state &= ~SRS_POLLING; \ 5358275SEric Cheng (void) mac_hwring_enable_intr((mac_ring_handle_t) \ 5368275SEric Cheng (mac_srs)->srs_ring); \ 5378275SEric Cheng (mac_srs)->srs_rx.sr_poll_off++; \ 5388275SEric Cheng } \ 5398275SEric Cheng } 5408275SEric Cheng 5418275SEric Cheng #define MAC_COUNT_CHAIN(mac_srs, head, tail, cnt, sz) { \ 5428275SEric Cheng mblk_t *tmp; \ 5438275SEric Cheng boolean_t bw_ctl = B_FALSE; \ 5448275SEric Cheng \ 5458275SEric Cheng ASSERT((head) != NULL); \ 5468275SEric Cheng cnt = 0; \ 5478275SEric Cheng sz = 0; \ 5488275SEric Cheng if ((mac_srs)->srs_type & SRST_BW_CONTROL) \ 5498275SEric Cheng bw_ctl = B_TRUE; \ 5508275SEric Cheng tmp = tail = (head); \ 5518275SEric Cheng if ((head)->b_next == NULL) { \ 5528275SEric Cheng cnt = 1; \ 5538275SEric Cheng if (bw_ctl) \ 5548275SEric Cheng sz += msgdsize(head); \ 5558275SEric Cheng } else { \ 5568275SEric Cheng while (tmp != NULL) { \ 5578275SEric Cheng tail = tmp; \ 5588275SEric Cheng cnt++; \ 5598275SEric Cheng if (bw_ctl) \ 5608275SEric Cheng sz += msgdsize(tmp); \ 5618275SEric Cheng tmp = tmp->b_next; \ 5628275SEric Cheng } \ 5638275SEric Cheng } \ 5648275SEric Cheng } 5658275SEric Cheng 5668275SEric Cheng /* 5678275SEric Cheng * Decrement the cumulative packet count in SRS and its 5688275SEric Cheng * soft rings. If the srs_poll_pkt_cnt goes below lowat, then check 5698275SEric Cheng * if if the interface was left in a polling mode and no one 5708275SEric Cheng * is really processing the queue (to get the interface out 5718275SEric Cheng * of poll mode). If no one is processing the queue, then 5728275SEric Cheng * acquire the PROC and signal the poll thread to check the 5738275SEric Cheng * interface for packets and get the interface back to interrupt 5748275SEric Cheng * mode if nothing is found. 5758275SEric Cheng */ 5768275SEric Cheng #define MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt) { \ 5778275SEric Cheng mac_srs_rx_t *srs_rx = &(mac_srs)->srs_rx; \ 5788275SEric Cheng ASSERT(MUTEX_HELD(&(mac_srs)->srs_lock)); \ 5798275SEric Cheng \ 5808275SEric Cheng srs_rx->sr_poll_pkt_cnt -= cnt; \ 5818275SEric Cheng if ((srs_rx->sr_poll_pkt_cnt <= srs_rx->sr_poll_thres) && \ 5828275SEric Cheng (((mac_srs)->srs_state & \ 5838275SEric Cheng (SRS_POLLING|SRS_PROC|SRS_GET_PKTS)) == SRS_POLLING)) \ 5848275SEric Cheng { \ 5858275SEric Cheng (mac_srs)->srs_state |= (SRS_PROC|SRS_GET_PKTS); \ 5868275SEric Cheng cv_signal(&(mac_srs)->srs_cv); \ 5878275SEric Cheng srs_rx->sr_below_hiwat++; \ 5888275SEric Cheng } \ 5898275SEric Cheng } 5908275SEric Cheng 5918275SEric Cheng /* 5928275SEric Cheng * The following two macros are used to update the inbound packet and byte. 5938275SEric Cheng * count. The packet and byte count reflect the packets and bytes that are 5948275SEric Cheng * taken out of the SRS's queue, i.e. indicating they are being delivered. 5958275SEric Cheng * The srs_count and srs_size are updated in different locations as the 5968275SEric Cheng * srs_size is also used to take into account any bandwidth limits. The 5978275SEric Cheng * srs_size is updated only when a soft ring, if any, sends a packet up, 5988275SEric Cheng * as opposed to updating it when the SRS sends a packet to the SR, i.e. 5998275SEric Cheng * the srs_size reflects the packets in the SRS and SRs. These 6008275SEric Cheng * macros decrement the srs_size and srs_count and also increment the 6018275SEric Cheng * ipackets and ibytes stats resp. 6028275SEric Cheng * 6038275SEric Cheng * xxx-venu These are done under srs_lock, for now we still update 6048275SEric Cheng * mci_stat_ibytes/mci_stat_ipackets atomically, need to check if 6058275SEric Cheng * just updating them would be accurate enough. 6068275SEric Cheng * 6078275SEric Cheng * If we are updating these for a sub-flow SRS, then we need to also 6088275SEric Cheng * updated it's MAC client bandwidth info, if the MAC client is also 6098275SEric Cheng * bandwidth regulated. 6108275SEric Cheng */ 6118275SEric Cheng #define MAC_UPDATE_SRS_SIZE_LOCKED(srs, sz) { \ 6128275SEric Cheng if ((srs)->srs_type & SRST_BW_CONTROL) { \ 6138275SEric Cheng mutex_enter(&(srs)->srs_bw->mac_bw_lock); \ 6148275SEric Cheng (srs)->srs_bw->mac_bw_sz -= (sz); \ 6158275SEric Cheng (srs)->srs_bw->mac_bw_used += (sz); \ 6168275SEric Cheng mutex_exit(&(srs)->srs_bw->mac_bw_lock); \ 6178275SEric Cheng } \ 6188275SEric Cheng } 6198275SEric Cheng 6208275SEric Cheng #define MAC_TX_UPDATE_BW_INFO(srs, sz) { \ 6218275SEric Cheng (srs)->srs_bw->mac_bw_sz -= (sz); \ 6228275SEric Cheng (srs)->srs_bw->mac_bw_used += (sz); \ 6238275SEric Cheng } 6248275SEric Cheng 625*11878SVenu.Iyer@Sun.COM #define MAC_TX_SOFT_RINGS(mac_srs) ((mac_srs)->srs_tx_ring_count >= 1) 6268275SEric Cheng 6278275SEric Cheng /* Soft ring flags for teardown */ 6288275SEric Cheng #define SRS_POLL_THR_OWNER (SRS_PROC | SRS_POLLING | SRS_GET_PKTS) 6298275SEric Cheng #define SRS_PAUSE (SRS_CONDEMNED | SRS_QUIESCE) 6308275SEric Cheng #define S_RING_PAUSE (S_RING_CONDEMNED | S_RING_QUIESCE) 6318275SEric Cheng 6328275SEric Cheng /* Soft rings */ 6338275SEric Cheng extern void mac_soft_ring_init(void); 6348275SEric Cheng extern void mac_soft_ring_finish(void); 6358275SEric Cheng extern void mac_fanout_setup(mac_client_impl_t *, flow_entry_t *, 636*11878SVenu.Iyer@Sun.COM mac_resource_props_t *, mac_direct_rx_t, void *, mac_resource_handle_t, 637*11878SVenu.Iyer@Sun.COM cpupart_t *); 6388275SEric Cheng 6398275SEric Cheng extern void mac_soft_ring_worker_wakeup(mac_soft_ring_t *); 6408275SEric Cheng extern void mac_soft_ring_blank(void *, time_t, uint_t, int); 6418275SEric Cheng extern mblk_t *mac_soft_ring_poll(mac_soft_ring_t *, int); 6428275SEric Cheng extern void mac_soft_ring_destroy(mac_soft_ring_t *); 6438275SEric Cheng extern void mac_soft_ring_dls_bypass(void *, mac_direct_rx_t, void *); 6448275SEric Cheng 6458275SEric Cheng /* Rx SRS */ 6468275SEric Cheng extern mac_soft_ring_set_t *mac_srs_create(struct mac_client_impl_s *, 6478275SEric Cheng flow_entry_t *, uint32_t, mac_direct_rx_t, void *, mac_resource_handle_t, 6488275SEric Cheng mac_ring_t *); 6498275SEric Cheng extern void mac_srs_free(mac_soft_ring_set_t *); 6508275SEric Cheng extern void mac_srs_signal(mac_soft_ring_set_t *, uint_t); 6518275SEric Cheng extern cpu_t *mac_srs_bind(mac_soft_ring_set_t *, processorid_t); 652*11878SVenu.Iyer@Sun.COM extern void mac_rx_srs_retarget_intr(mac_soft_ring_set_t *, processorid_t); 653*11878SVenu.Iyer@Sun.COM extern void mac_tx_srs_retarget_intr(mac_soft_ring_set_t *); 6548275SEric Cheng 6558275SEric Cheng extern void mac_srs_change_upcall(void *, mac_direct_rx_t, void *); 6568275SEric Cheng extern void mac_srs_quiesce_initiate(mac_soft_ring_set_t *); 6578275SEric Cheng extern void mac_srs_client_poll_enable(struct mac_client_impl_s *, 6588275SEric Cheng mac_soft_ring_set_t *); 6598275SEric Cheng extern void mac_srs_client_poll_disable(struct mac_client_impl_s *, 6608275SEric Cheng mac_soft_ring_set_t *); 6618275SEric Cheng extern void mac_srs_client_poll_quiesce(struct mac_client_impl_s *, 6628275SEric Cheng mac_soft_ring_set_t *); 6638275SEric Cheng extern void mac_srs_client_poll_restart(struct mac_client_impl_s *, 6648275SEric Cheng mac_soft_ring_set_t *); 6658275SEric Cheng extern void mac_rx_srs_quiesce(mac_soft_ring_set_t *, uint_t); 6668275SEric Cheng extern void mac_rx_srs_restart(mac_soft_ring_set_t *); 6678275SEric Cheng extern void mac_rx_srs_subflow_process(void *, mac_resource_handle_t, mblk_t *, 6688275SEric Cheng boolean_t); 6698275SEric Cheng extern void mac_tx_srs_quiesce(mac_soft_ring_set_t *, uint_t); 6708275SEric Cheng 6718275SEric Cheng /* Tx SRS, Tx softring */ 6728275SEric Cheng extern void mac_tx_srs_wakeup(mac_soft_ring_set_t *, mac_ring_handle_t); 673*11878SVenu.Iyer@Sun.COM extern void mac_tx_srs_setup(struct mac_client_impl_s *, flow_entry_t *); 6748275SEric Cheng extern mac_tx_func_t mac_tx_get_func(uint32_t); 6758275SEric Cheng extern mblk_t *mac_tx_send(mac_client_handle_t, mac_ring_handle_t, mblk_t *, 6768275SEric Cheng mac_tx_stats_t *); 6778275SEric Cheng extern boolean_t mac_tx_srs_ring_present(mac_soft_ring_set_t *, mac_ring_t *); 678*11878SVenu.Iyer@Sun.COM extern mac_soft_ring_t *mac_tx_srs_get_soft_ring(mac_soft_ring_set_t *, 679*11878SVenu.Iyer@Sun.COM mac_ring_t *); 6808275SEric Cheng extern void mac_tx_srs_add_ring(mac_soft_ring_set_t *, mac_ring_t *); 6818275SEric Cheng extern void mac_tx_srs_del_ring(mac_soft_ring_set_t *, mac_ring_t *); 6828275SEric Cheng extern mac_tx_cookie_t mac_tx_srs_no_desc(mac_soft_ring_set_t *, mblk_t *, 6838275SEric Cheng uint16_t, mblk_t **); 6848275SEric Cheng 6858275SEric Cheng /* Subflow specific stuff */ 6868275SEric Cheng extern int mac_srs_flow_create(struct mac_client_impl_s *, flow_entry_t *, 6878275SEric Cheng mac_resource_props_t *, int, int, mac_direct_rx_t); 6888275SEric Cheng extern void mac_srs_update_bwlimit(flow_entry_t *, mac_resource_props_t *); 6898275SEric Cheng extern void mac_srs_adjust_subflow_bwlimit(struct mac_client_impl_s *); 6908275SEric Cheng extern void mac_srs_update_drv(struct mac_client_impl_s *); 6918275SEric Cheng extern void mac_update_srs_priority(mac_soft_ring_set_t *, pri_t); 6928275SEric Cheng extern void mac_client_update_classifier(mac_client_impl_t *, boolean_t); 6938275SEric Cheng 6948275SEric Cheng extern void mac_soft_ring_intr_enable(void *); 6959883SRajagopal.Kunhappan@Sun.COM extern boolean_t mac_soft_ring_intr_disable(void *); 696*11878SVenu.Iyer@Sun.COM extern mac_soft_ring_t *mac_soft_ring_create(int, clock_t, uint16_t, 6978275SEric Cheng pri_t, mac_client_impl_t *, mac_soft_ring_set_t *, 6988275SEric Cheng processorid_t, mac_direct_rx_t, void *, mac_resource_handle_t); 6998275SEric Cheng extern cpu_t *mac_soft_ring_bind(mac_soft_ring_t *, processorid_t); 7008275SEric Cheng extern void mac_soft_ring_unbind(mac_soft_ring_t *); 701*11878SVenu.Iyer@Sun.COM extern void mac_soft_ring_free(mac_soft_ring_t *); 7028275SEric Cheng extern void mac_soft_ring_signal(mac_soft_ring_t *, uint_t); 7038275SEric Cheng extern void mac_rx_soft_ring_process(mac_client_impl_t *, mac_soft_ring_t *, 7048275SEric Cheng mblk_t *, mblk_t *, int, size_t); 7058275SEric Cheng extern mac_tx_cookie_t mac_tx_soft_ring_process(mac_soft_ring_t *, 7068275SEric Cheng mblk_t *, uint16_t, mblk_t **); 7078275SEric Cheng extern void mac_srs_worker_quiesce(mac_soft_ring_set_t *); 7088275SEric Cheng extern void mac_srs_worker_restart(mac_soft_ring_set_t *); 7098275SEric Cheng extern void mac_rx_attach_flow_srs(mac_impl_t *, flow_entry_t *, 7108275SEric Cheng mac_soft_ring_set_t *, mac_ring_t *, mac_classify_type_t); 7118275SEric Cheng 7128275SEric Cheng extern void mac_rx_srs_drain_bw(mac_soft_ring_set_t *, uint_t); 7138275SEric Cheng extern void mac_rx_srs_drain(mac_soft_ring_set_t *, uint_t); 7148275SEric Cheng extern void mac_rx_srs_process(void *, mac_resource_handle_t, mblk_t *, 7158275SEric Cheng boolean_t); 7168275SEric Cheng extern void mac_srs_worker(mac_soft_ring_set_t *); 7178275SEric Cheng extern void mac_rx_srs_poll_ring(mac_soft_ring_set_t *); 7188275SEric Cheng extern void mac_tx_srs_drain(mac_soft_ring_set_t *, uint_t); 7198275SEric Cheng 7208275SEric Cheng extern void mac_tx_srs_restart(mac_soft_ring_set_t *); 7218275SEric Cheng extern void mac_rx_srs_remove(mac_soft_ring_set_t *); 7228275SEric Cheng 7238275SEric Cheng #ifdef __cplusplus 7248275SEric Cheng } 7258275SEric Cheng #endif 7268275SEric Cheng 7278275SEric Cheng #endif /* _SYS_MAC_SOFT_RING_H */ 728