15779Sxy150489 /* 25779Sxy150489 * CDDL HEADER START 35779Sxy150489 * 45779Sxy150489 * Copyright(c) 2007-2008 Intel Corporation. All rights reserved. 55779Sxy150489 * The contents of this file are subject to the terms of the 65779Sxy150489 * Common Development and Distribution License (the "License"). 75779Sxy150489 * You may not use this file except in compliance with the License. 85779Sxy150489 * 95779Sxy150489 * You can obtain a copy of the license at: 105779Sxy150489 * http://www.opensolaris.org/os/licensing. 115779Sxy150489 * See the License for the specific language governing permissions 125779Sxy150489 * and limitations under the License. 135779Sxy150489 * 145779Sxy150489 * When using or redistributing this file, you may do so under the 155779Sxy150489 * License only. No other modification of this header is permitted. 165779Sxy150489 * 175779Sxy150489 * If applicable, add the following below this CDDL HEADER, with the 185779Sxy150489 * fields enclosed by brackets "[]" replaced with your own identifying 195779Sxy150489 * information: Portions Copyright [yyyy] [name of copyright owner] 205779Sxy150489 * 215779Sxy150489 * CDDL HEADER END 225779Sxy150489 */ 235779Sxy150489 245779Sxy150489 /* 255779Sxy150489 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 265779Sxy150489 * Use is subject to license terms of the CDDL. 275779Sxy150489 */ 285779Sxy150489 295779Sxy150489 #pragma ident "%Z%%M% %I% %E% SMI" 305779Sxy150489 315779Sxy150489 #include "igb_sw.h" 325779Sxy150489 335779Sxy150489 static boolean_t igb_tx(igb_tx_ring_t *, mblk_t *); 345779Sxy150489 static int igb_tx_copy(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 355779Sxy150489 uint32_t, boolean_t, boolean_t); 365779Sxy150489 static int igb_tx_bind(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 375779Sxy150489 uint32_t); 385779Sxy150489 static int igb_tx_fill_ring(igb_tx_ring_t *, link_list_t *, hcksum_context_t *); 395779Sxy150489 static void igb_save_desc(tx_control_block_t *, uint64_t, size_t); 405779Sxy150489 static tx_control_block_t *igb_get_free_list(igb_tx_ring_t *); 415779Sxy150489 425779Sxy150489 static void igb_get_hcksum_context(mblk_t *, hcksum_context_t *); 435779Sxy150489 static boolean_t igb_check_hcksum_context(igb_tx_ring_t *, hcksum_context_t *); 445779Sxy150489 static void igb_fill_hcksum_context(struct e1000_adv_tx_context_desc *, 455779Sxy150489 hcksum_context_t *); 465779Sxy150489 475779Sxy150489 #ifndef IGB_DEBUG 485779Sxy150489 #pragma inline(igb_save_desc) 495779Sxy150489 #pragma inline(igb_get_hcksum_context) 505779Sxy150489 #pragma inline(igb_check_hcksum_context) 515779Sxy150489 #pragma inline(igb_fill_hcksum_context) 525779Sxy150489 #endif 535779Sxy150489 545779Sxy150489 /* 555779Sxy150489 * igb_m_tx 565779Sxy150489 * 575779Sxy150489 * The GLDv3 interface to call driver's tx routine to transmit 585779Sxy150489 * the mblks. 595779Sxy150489 */ 605779Sxy150489 mblk_t * 615779Sxy150489 igb_m_tx(void *arg, mblk_t *mp) 625779Sxy150489 { 635779Sxy150489 igb_t *igb = (igb_t *)arg; 645779Sxy150489 mblk_t *next; 655779Sxy150489 igb_tx_ring_t *tx_ring; 665779Sxy150489 675779Sxy150489 /* 685779Sxy150489 * If the adapter is suspended, or it is not started, or the link 695779Sxy150489 * is not up, the mblks are simply dropped. 705779Sxy150489 */ 715779Sxy150489 if (((igb->igb_state & IGB_SUSPENDED) != 0) || 725779Sxy150489 ((igb->igb_state & IGB_STARTED) == 0) || 735779Sxy150489 (igb->link_state != LINK_STATE_UP)) { 745779Sxy150489 /* Free the mblk chain */ 755779Sxy150489 while (mp != NULL) { 765779Sxy150489 next = mp->b_next; 775779Sxy150489 mp->b_next = NULL; 785779Sxy150489 795779Sxy150489 freemsg(mp); 805779Sxy150489 mp = next; 815779Sxy150489 } 825779Sxy150489 835779Sxy150489 return (NULL); 845779Sxy150489 } 855779Sxy150489 865779Sxy150489 /* 875779Sxy150489 * Decide which tx ring is used to transmit the packets. 885779Sxy150489 * This needs to be updated later to fit the new interface 895779Sxy150489 * of the multiple rings support. 905779Sxy150489 */ 915779Sxy150489 tx_ring = &igb->tx_rings[0]; 925779Sxy150489 935779Sxy150489 while (mp != NULL) { 945779Sxy150489 next = mp->b_next; 955779Sxy150489 mp->b_next = NULL; 965779Sxy150489 975779Sxy150489 if (!igb_tx(tx_ring, mp)) { 985779Sxy150489 mp->b_next = next; 995779Sxy150489 break; 1005779Sxy150489 } 1015779Sxy150489 1025779Sxy150489 mp = next; 1035779Sxy150489 } 1045779Sxy150489 1055779Sxy150489 return (mp); 1065779Sxy150489 } 1075779Sxy150489 1085779Sxy150489 /* 1095779Sxy150489 * igb_tx - Main transmit processing 1105779Sxy150489 * 1115779Sxy150489 * Called from igb_m_tx with an mblk ready to transmit. this 1125779Sxy150489 * routine sets up the transmit descriptors and sends data to 1135779Sxy150489 * the wire. 1145779Sxy150489 * 1155779Sxy150489 * One mblk can consist of several fragments, each fragment 1165779Sxy150489 * will be processed with different methods based on the size. 1175779Sxy150489 * For the fragments with size less than the bcopy threshold, 1185779Sxy150489 * they will be processed by using bcopy; otherwise, they will 1195779Sxy150489 * be processed by using DMA binding. 1205779Sxy150489 * 1215779Sxy150489 * To process the mblk, a tx control block is got from the 1225779Sxy150489 * free list. One tx control block contains one tx buffer, which 1235779Sxy150489 * is used to copy mblk fragments' data; and one tx DMA handle, 1245779Sxy150489 * which is used to bind a mblk fragment with DMA resource. 1255779Sxy150489 * 1265779Sxy150489 * Several small mblk fragments can be copied into one tx control 1275779Sxy150489 * block's buffer, and then the buffer will be transmitted with 1285779Sxy150489 * one tx descriptor. 1295779Sxy150489 * 1305779Sxy150489 * A large fragment only binds with one tx control block's DMA 1315779Sxy150489 * handle, and it can span several tx descriptors for transmitting. 1325779Sxy150489 * 1335779Sxy150489 * So to transmit a packet (mblk), several tx control blocks can 1345779Sxy150489 * be used. After the processing, those tx control blocks will 1355779Sxy150489 * be put to the work list. 1365779Sxy150489 */ 1375779Sxy150489 static boolean_t 1385779Sxy150489 igb_tx(igb_tx_ring_t *tx_ring, mblk_t *mp) 1395779Sxy150489 { 1405779Sxy150489 igb_t *igb = tx_ring->igb; 1415779Sxy150489 tx_type_t current_flag, next_flag; 1425779Sxy150489 uint32_t current_len, next_len; 1435779Sxy150489 uint32_t desc_total; 1445779Sxy150489 size_t mbsize; 1455779Sxy150489 int desc_num; 1465779Sxy150489 boolean_t copy_done, eop; 1475779Sxy150489 mblk_t *current_mp, *next_mp, *nmp; 1485779Sxy150489 tx_control_block_t *tcb; 1495779Sxy150489 hcksum_context_t hcksum_context, *hcksum; 1505779Sxy150489 link_list_t pending_list; 1515779Sxy150489 1525779Sxy150489 /* Get the mblk size */ 1535779Sxy150489 mbsize = 0; 1545779Sxy150489 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 1555779Sxy150489 mbsize += MBLK_LEN(nmp); 1565779Sxy150489 } 1575779Sxy150489 1585779Sxy150489 /* 1595779Sxy150489 * If the mblk size exceeds the max frame size, 1605779Sxy150489 * discard this mblk, and return B_TRUE 1615779Sxy150489 */ 1625779Sxy150489 if (mbsize > (igb->max_frame_size - ETHERFCSL)) { 1635779Sxy150489 freemsg(mp); 1645779Sxy150489 IGB_DEBUGLOG_0(igb, "igb_tx: packet oversize"); 1655779Sxy150489 return (B_TRUE); 1665779Sxy150489 } 1675779Sxy150489 1685779Sxy150489 /* 1695779Sxy150489 * Check and recycle tx descriptors. 1705779Sxy150489 * The recycle threshold here should be selected carefully 1715779Sxy150489 */ 1725779Sxy150489 if (tx_ring->tbd_free < tx_ring->recycle_thresh) 1735779Sxy150489 tx_ring->tx_recycle(tx_ring); 1745779Sxy150489 1755779Sxy150489 /* 1765779Sxy150489 * After the recycling, if the tbd_free is less than the 1775779Sxy150489 * overload_threshold, assert overload, return B_FALSE; 1785779Sxy150489 * and we need to re-schedule the tx again. 1795779Sxy150489 */ 1805779Sxy150489 if (tx_ring->tbd_free < tx_ring->overload_thresh) { 1815779Sxy150489 tx_ring->reschedule = B_TRUE; 1825779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_overload); 1835779Sxy150489 return (B_FALSE); 1845779Sxy150489 } 1855779Sxy150489 1865779Sxy150489 /* 1875779Sxy150489 * The pending_list is a linked list that is used to save 1885779Sxy150489 * the tx control blocks that have packet data processed 1895779Sxy150489 * but have not put the data to the tx descriptor ring. 1905779Sxy150489 * It is used to reduce the lock contention of the tx_lock. 1915779Sxy150489 */ 1925779Sxy150489 LINK_LIST_INIT(&pending_list); 1935779Sxy150489 desc_num = 0; 1945779Sxy150489 desc_total = 0; 1955779Sxy150489 1965779Sxy150489 current_mp = mp; 1975779Sxy150489 current_len = MBLK_LEN(current_mp); 1985779Sxy150489 /* 1995779Sxy150489 * Decide which method to use for the first fragment 2005779Sxy150489 */ 2015779Sxy150489 current_flag = (current_len <= tx_ring->copy_thresh) ? 2025779Sxy150489 USE_COPY : USE_DMA; 2035779Sxy150489 /* 2045779Sxy150489 * If the mblk includes several contiguous small fragments, 2055779Sxy150489 * they may be copied into one buffer. This flag is used to 2065779Sxy150489 * indicate whether there are pending fragments that need to 2075779Sxy150489 * be copied to the current tx buffer. 2085779Sxy150489 * 2095779Sxy150489 * If this flag is B_TRUE, it indicates that a new tx control 2105779Sxy150489 * block is needed to process the next fragment using either 2115779Sxy150489 * copy or DMA binding. 2125779Sxy150489 * 2135779Sxy150489 * Otherwise, it indicates that the next fragment will be 2145779Sxy150489 * copied to the current tx buffer that is maintained by the 2155779Sxy150489 * current tx control block. No new tx control block is needed. 2165779Sxy150489 */ 2175779Sxy150489 copy_done = B_TRUE; 2185779Sxy150489 while (current_mp) { 2195779Sxy150489 next_mp = current_mp->b_cont; 2205779Sxy150489 eop = (next_mp == NULL); /* Last fragment of the packet? */ 2215779Sxy150489 next_len = eop ? 0: MBLK_LEN(next_mp); 2225779Sxy150489 2235779Sxy150489 /* 2245779Sxy150489 * When the current fragment is an empty fragment, if 2255779Sxy150489 * the next fragment will still be copied to the current 2265779Sxy150489 * tx buffer, we cannot skip this fragment here. Because 2275779Sxy150489 * the copy processing is pending for completion. We have 2285779Sxy150489 * to process this empty fragment in the tx_copy routine. 2295779Sxy150489 * 2305779Sxy150489 * If the copy processing is completed or a DMA binding 2315779Sxy150489 * processing is just completed, we can just skip this 2325779Sxy150489 * empty fragment. 2335779Sxy150489 */ 2345779Sxy150489 if ((current_len == 0) && (copy_done)) { 2355779Sxy150489 current_mp = next_mp; 2365779Sxy150489 current_len = next_len; 2375779Sxy150489 current_flag = (current_len <= tx_ring->copy_thresh) ? 2385779Sxy150489 USE_COPY : USE_DMA; 2395779Sxy150489 continue; 2405779Sxy150489 } 2415779Sxy150489 2425779Sxy150489 if (copy_done) { 2435779Sxy150489 /* 2445779Sxy150489 * Get a new tx control block from the free list 2455779Sxy150489 */ 2465779Sxy150489 tcb = igb_get_free_list(tx_ring); 2475779Sxy150489 2485779Sxy150489 if (tcb == NULL) { 2495779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 2505779Sxy150489 goto tx_failure; 2515779Sxy150489 } 2525779Sxy150489 2535779Sxy150489 /* 2545779Sxy150489 * Push the tx control block to the pending list 2555779Sxy150489 * to avoid using lock too early 2565779Sxy150489 */ 2575779Sxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 2585779Sxy150489 } 2595779Sxy150489 2605779Sxy150489 if (current_flag == USE_COPY) { 2615779Sxy150489 /* 2625779Sxy150489 * Check whether to use bcopy or DMA binding to process 2635779Sxy150489 * the next fragment, and if using bcopy, whether we 2645779Sxy150489 * need to continue copying the next fragment into the 2655779Sxy150489 * current tx buffer. 2665779Sxy150489 */ 2675779Sxy150489 ASSERT((tcb->tx_buf.len + current_len) <= 2685779Sxy150489 tcb->tx_buf.size); 2695779Sxy150489 2705779Sxy150489 if (eop) { 2715779Sxy150489 /* 2725779Sxy150489 * This is the last fragment of the packet, so 2735779Sxy150489 * the copy processing will be completed with 2745779Sxy150489 * this fragment. 2755779Sxy150489 */ 2765779Sxy150489 next_flag = USE_NONE; 2775779Sxy150489 copy_done = B_TRUE; 2785779Sxy150489 } else if ((tcb->tx_buf.len + current_len + next_len) > 2795779Sxy150489 tcb->tx_buf.size) { 2805779Sxy150489 /* 2815779Sxy150489 * If the next fragment is too large to be 2825779Sxy150489 * copied to the current tx buffer, we need 2835779Sxy150489 * to complete the current copy processing. 2845779Sxy150489 */ 2855779Sxy150489 next_flag = (next_len > tx_ring->copy_thresh) ? 2865779Sxy150489 USE_DMA: USE_COPY; 2875779Sxy150489 copy_done = B_TRUE; 2885779Sxy150489 } else if (next_len > tx_ring->copy_thresh) { 2895779Sxy150489 /* 2905779Sxy150489 * The next fragment needs to be processed with 2915779Sxy150489 * DMA binding. So the copy prcessing will be 2925779Sxy150489 * completed with the current fragment. 2935779Sxy150489 */ 2945779Sxy150489 next_flag = USE_DMA; 2955779Sxy150489 copy_done = B_TRUE; 2965779Sxy150489 } else { 2975779Sxy150489 /* 2985779Sxy150489 * Continue to copy the next fragment to the 2995779Sxy150489 * current tx buffer. 3005779Sxy150489 */ 3015779Sxy150489 next_flag = USE_COPY; 3025779Sxy150489 copy_done = B_FALSE; 3035779Sxy150489 } 3045779Sxy150489 3055779Sxy150489 desc_num = igb_tx_copy(tx_ring, tcb, current_mp, 3065779Sxy150489 current_len, copy_done, eop); 3075779Sxy150489 } else { 3085779Sxy150489 /* 3095779Sxy150489 * Check whether to use bcopy or DMA binding to process 3105779Sxy150489 * the next fragment. 3115779Sxy150489 */ 3125779Sxy150489 next_flag = (next_len > tx_ring->copy_thresh) ? 3135779Sxy150489 USE_DMA: USE_COPY; 3145779Sxy150489 ASSERT(copy_done == B_TRUE); 3155779Sxy150489 3165779Sxy150489 desc_num = igb_tx_bind(tx_ring, tcb, current_mp, 3175779Sxy150489 current_len); 3185779Sxy150489 } 3195779Sxy150489 3205779Sxy150489 if (desc_num > 0) 3215779Sxy150489 desc_total += desc_num; 3225779Sxy150489 else if (desc_num < 0) 3235779Sxy150489 goto tx_failure; 3245779Sxy150489 3255779Sxy150489 current_mp = next_mp; 3265779Sxy150489 current_len = next_len; 3275779Sxy150489 current_flag = next_flag; 3285779Sxy150489 } 3295779Sxy150489 3305779Sxy150489 /* 3315779Sxy150489 * Attach the mblk to the last tx control block 3325779Sxy150489 */ 3335779Sxy150489 ASSERT(tcb); 3345779Sxy150489 ASSERT(tcb->mp == NULL); 3355779Sxy150489 tcb->mp = mp; 3365779Sxy150489 3375779Sxy150489 if (igb->tx_hcksum_enable) { 3385779Sxy150489 /* 3395779Sxy150489 * Retrieve checksum context information from the mblk that will 3405779Sxy150489 * be used to decide whether/how to fill the context descriptor. 3415779Sxy150489 */ 3425779Sxy150489 hcksum = &hcksum_context; 3435779Sxy150489 igb_get_hcksum_context(mp, hcksum); 3445779Sxy150489 } else { 3455779Sxy150489 hcksum = NULL; 3465779Sxy150489 } 3475779Sxy150489 3485779Sxy150489 /* 3495779Sxy150489 * Before fill the tx descriptor ring with the data, we need to 3505779Sxy150489 * ensure there are adequate free descriptors for transmit 3515779Sxy150489 * (including one context descriptor). 3525779Sxy150489 */ 3535779Sxy150489 if (tx_ring->tbd_free < (desc_total + 1)) { 3545779Sxy150489 tx_ring->tx_recycle(tx_ring); 3555779Sxy150489 } 3565779Sxy150489 3575779Sxy150489 mutex_enter(&tx_ring->tx_lock); 3585779Sxy150489 3595779Sxy150489 /* 3605779Sxy150489 * If the number of free tx descriptors is not enough for transmit 3615779Sxy150489 * then return failure. 3625779Sxy150489 * 3635779Sxy150489 * Note: we must put this check under the mutex protection to 3645779Sxy150489 * ensure the correctness when multiple threads access it in 3655779Sxy150489 * parallel. 3665779Sxy150489 */ 3675779Sxy150489 if (tx_ring->tbd_free < (desc_total + 1)) { 3685779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 3695779Sxy150489 mutex_exit(&tx_ring->tx_lock); 3705779Sxy150489 goto tx_failure; 3715779Sxy150489 } 3725779Sxy150489 3735779Sxy150489 desc_num = igb_tx_fill_ring(tx_ring, &pending_list, hcksum); 3745779Sxy150489 3755779Sxy150489 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 3765779Sxy150489 3775779Sxy150489 mutex_exit(&tx_ring->tx_lock); 3785779Sxy150489 3795779Sxy150489 return (B_TRUE); 3805779Sxy150489 3815779Sxy150489 tx_failure: 3825779Sxy150489 /* 3835779Sxy150489 * Discard the mblk and free the used resources 3845779Sxy150489 */ 3855779Sxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 3865779Sxy150489 while (tcb) { 3875779Sxy150489 tcb->mp = NULL; 3885779Sxy150489 3895779Sxy150489 igb_free_tcb(tcb); 3905779Sxy150489 3915779Sxy150489 tcb = (tx_control_block_t *) 3925779Sxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 3935779Sxy150489 } 3945779Sxy150489 3955779Sxy150489 /* 3965779Sxy150489 * Return the tx control blocks in the pending list to the free list. 3975779Sxy150489 */ 3985779Sxy150489 igb_put_free_list(tx_ring, &pending_list); 3995779Sxy150489 4005779Sxy150489 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 4015779Sxy150489 tx_ring->reschedule = B_TRUE; 4025779Sxy150489 4035779Sxy150489 return (B_FALSE); 4045779Sxy150489 } 4055779Sxy150489 4065779Sxy150489 /* 4075779Sxy150489 * igb_tx_copy 4085779Sxy150489 * 4095779Sxy150489 * Copy the mblk fragment to the pre-allocated tx buffer 4105779Sxy150489 */ 4115779Sxy150489 static int 4125779Sxy150489 igb_tx_copy(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 4135779Sxy150489 uint32_t len, boolean_t copy_done, boolean_t eop) 4145779Sxy150489 { 4155779Sxy150489 dma_buffer_t *tx_buf; 4165779Sxy150489 uint32_t desc_num; 4175779Sxy150489 _NOTE(ARGUNUSED(tx_ring)); 4185779Sxy150489 4195779Sxy150489 tx_buf = &tcb->tx_buf; 4205779Sxy150489 4215779Sxy150489 /* 4225779Sxy150489 * Copy the packet data of the mblk fragment into the 4235779Sxy150489 * pre-allocated tx buffer, which is maintained by the 4245779Sxy150489 * tx control block. 4255779Sxy150489 * 4265779Sxy150489 * Several mblk fragments can be copied into one tx buffer. 4275779Sxy150489 * The destination address of the current copied fragment in 4285779Sxy150489 * the tx buffer is next to the end of the previous copied 4295779Sxy150489 * fragment. 4305779Sxy150489 */ 4315779Sxy150489 if (len > 0) { 4325779Sxy150489 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 4335779Sxy150489 4345779Sxy150489 tx_buf->len += len; 4355779Sxy150489 tcb->frag_num++; 4365779Sxy150489 } 4375779Sxy150489 4385779Sxy150489 desc_num = 0; 4395779Sxy150489 4405779Sxy150489 /* 4415779Sxy150489 * If it is the last fragment copied to the current tx buffer, 4425779Sxy150489 * in other words, if there's no remaining fragment or the remaining 4435779Sxy150489 * fragment requires a new tx control block to process, we need to 4445779Sxy150489 * complete the current copy processing by syncing up the current 4455779Sxy150489 * DMA buffer and saving the descriptor data. 4465779Sxy150489 */ 4475779Sxy150489 if (copy_done) { 4485779Sxy150489 /* 4495779Sxy150489 * For the packet smaller than 64 bytes, we need to 4505779Sxy150489 * pad it to 60 bytes. The NIC hardware will add 4 4515779Sxy150489 * bytes of CRC. 4525779Sxy150489 */ 4535779Sxy150489 if (eop && (tx_buf->len < ETHERMIN)) { 4545779Sxy150489 bzero(tx_buf->address + tx_buf->len, 4555779Sxy150489 ETHERMIN - tx_buf->len); 4565779Sxy150489 tx_buf->len = ETHERMIN; 4575779Sxy150489 } 4585779Sxy150489 4595779Sxy150489 /* 4605779Sxy150489 * Sync the DMA buffer of the packet data 4615779Sxy150489 */ 4625779Sxy150489 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 4635779Sxy150489 4645779Sxy150489 tcb->tx_type = USE_COPY; 4655779Sxy150489 4665779Sxy150489 /* 4675779Sxy150489 * Save the address and length to the private data structure 4685779Sxy150489 * of the tx control block, which will be used to fill the 4695779Sxy150489 * tx descriptor ring after all the fragments are processed. 4705779Sxy150489 */ 4715779Sxy150489 igb_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 4725779Sxy150489 desc_num++; 4735779Sxy150489 } 4745779Sxy150489 4755779Sxy150489 return (desc_num); 4765779Sxy150489 } 4775779Sxy150489 4785779Sxy150489 /* 4795779Sxy150489 * igb_tx_bind 4805779Sxy150489 * 4815779Sxy150489 * Bind the mblk fragment with DMA 4825779Sxy150489 */ 4835779Sxy150489 static int 4845779Sxy150489 igb_tx_bind(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 4855779Sxy150489 uint32_t len) 4865779Sxy150489 { 4875779Sxy150489 int status, i; 4885779Sxy150489 ddi_dma_cookie_t dma_cookie; 4895779Sxy150489 uint_t ncookies; 4905779Sxy150489 int desc_num; 4915779Sxy150489 4925779Sxy150489 /* 4935779Sxy150489 * Use DMA binding to process the mblk fragment 4945779Sxy150489 */ 4955779Sxy150489 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 4965779Sxy150489 (caddr_t)mp->b_rptr, len, 4975779Sxy150489 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 4985779Sxy150489 0, &dma_cookie, &ncookies); 4995779Sxy150489 5005779Sxy150489 if (status != DDI_DMA_MAPPED) { 5015779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 5025779Sxy150489 return (-1); 5035779Sxy150489 } 5045779Sxy150489 5055779Sxy150489 tcb->frag_num++; 5065779Sxy150489 tcb->tx_type = USE_DMA; 5075779Sxy150489 /* 5085779Sxy150489 * Each fragment can span several cookies. One cookie will have 5095779Sxy150489 * one tx descriptor to transmit. 5105779Sxy150489 */ 5115779Sxy150489 desc_num = 0; 5125779Sxy150489 for (i = ncookies; i > 0; i--) { 5135779Sxy150489 /* 5145779Sxy150489 * Save the address and length to the private data structure 5155779Sxy150489 * of the tx control block, which will be used to fill the 5165779Sxy150489 * tx descriptor ring after all the fragments are processed. 5175779Sxy150489 */ 5185779Sxy150489 igb_save_desc(tcb, 5195779Sxy150489 dma_cookie.dmac_laddress, 5205779Sxy150489 dma_cookie.dmac_size); 5215779Sxy150489 5225779Sxy150489 desc_num++; 5235779Sxy150489 5245779Sxy150489 if (i > 1) 5255779Sxy150489 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 5265779Sxy150489 } 5275779Sxy150489 5285779Sxy150489 return (desc_num); 5295779Sxy150489 } 5305779Sxy150489 5315779Sxy150489 /* 5325779Sxy150489 * igb_get_hcksum_context 5335779Sxy150489 * 5345779Sxy150489 * Get the hcksum context information from the mblk 5355779Sxy150489 */ 5365779Sxy150489 static void 5375779Sxy150489 igb_get_hcksum_context(mblk_t *mp, hcksum_context_t *hcksum) 5385779Sxy150489 { 5395779Sxy150489 uint32_t start; 5405779Sxy150489 uint32_t flags; 5415779Sxy150489 uint32_t len; 5425779Sxy150489 uint32_t size; 5435779Sxy150489 uint32_t offset; 5445779Sxy150489 unsigned char *pos; 5455779Sxy150489 ushort_t etype; 5465779Sxy150489 uint32_t mac_hdr_len; 5475779Sxy150489 uint32_t l4_proto; 5485779Sxy150489 5495779Sxy150489 ASSERT(mp != NULL); 5505779Sxy150489 5515779Sxy150489 hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &flags); 5525779Sxy150489 5535779Sxy150489 hcksum->hcksum_flags = flags; 5545779Sxy150489 5555779Sxy150489 if (flags == 0) 5565779Sxy150489 return; 5575779Sxy150489 5585779Sxy150489 etype = 0; 5595779Sxy150489 mac_hdr_len = 0; 5605779Sxy150489 l4_proto = 0; 5615779Sxy150489 5625779Sxy150489 /* 5635779Sxy150489 * Firstly get the position of the ether_type/ether_tpid. 5645779Sxy150489 * Here we don't assume the ether (VLAN) header is fully included 5655779Sxy150489 * in one mblk fragment, so we go thourgh the fragments to parse 5665779Sxy150489 * the ether type. 5675779Sxy150489 */ 5685779Sxy150489 size = len = MBLK_LEN(mp); 5695779Sxy150489 offset = offsetof(struct ether_header, ether_type); 5705779Sxy150489 while (size <= offset) { 5715779Sxy150489 mp = mp->b_cont; 5725779Sxy150489 ASSERT(mp != NULL); 5735779Sxy150489 len = MBLK_LEN(mp); 5745779Sxy150489 size += len; 5755779Sxy150489 } 5765779Sxy150489 pos = mp->b_rptr + offset + len - size; 5775779Sxy150489 5785779Sxy150489 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 5795779Sxy150489 if (etype == ETHERTYPE_VLAN) { 5805779Sxy150489 /* 5815779Sxy150489 * Get the position of the ether_type in VLAN header 5825779Sxy150489 */ 5835779Sxy150489 offset = offsetof(struct ether_vlan_header, ether_type); 5845779Sxy150489 while (size <= offset) { 5855779Sxy150489 mp = mp->b_cont; 5865779Sxy150489 ASSERT(mp != NULL); 5875779Sxy150489 len = MBLK_LEN(mp); 5885779Sxy150489 size += len; 5895779Sxy150489 } 5905779Sxy150489 pos = mp->b_rptr + offset + len - size; 5915779Sxy150489 5925779Sxy150489 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 5935779Sxy150489 mac_hdr_len = sizeof (struct ether_vlan_header); 5945779Sxy150489 } else { 5955779Sxy150489 mac_hdr_len = sizeof (struct ether_header); 5965779Sxy150489 } 5975779Sxy150489 5985779Sxy150489 /* 5995779Sxy150489 * Here we don't assume the IP(V6) header is fully included in 6005779Sxy150489 * one mblk fragment, so we go thourgh the fragments to parse 6015779Sxy150489 * the protocol type. 6025779Sxy150489 */ 6035779Sxy150489 switch (etype) { 6045779Sxy150489 case ETHERTYPE_IP: 6055779Sxy150489 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 6065779Sxy150489 while (size <= offset) { 6075779Sxy150489 mp = mp->b_cont; 6085779Sxy150489 ASSERT(mp != NULL); 6095779Sxy150489 len = MBLK_LEN(mp); 6105779Sxy150489 size += len; 6115779Sxy150489 } 6125779Sxy150489 pos = mp->b_rptr + offset + len - size; 6135779Sxy150489 6145779Sxy150489 l4_proto = *(uint8_t *)pos; 6155779Sxy150489 break; 6165779Sxy150489 case ETHERTYPE_IPV6: 6175779Sxy150489 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 6185779Sxy150489 while (size <= offset) { 6195779Sxy150489 mp = mp->b_cont; 6205779Sxy150489 ASSERT(mp != NULL); 6215779Sxy150489 len = MBLK_LEN(mp); 6225779Sxy150489 size += len; 6235779Sxy150489 } 6245779Sxy150489 pos = mp->b_rptr + offset + len - size; 6255779Sxy150489 6265779Sxy150489 l4_proto = *(uint8_t *)pos; 6275779Sxy150489 break; 6285779Sxy150489 default: 6295779Sxy150489 /* Unrecoverable error */ 6305779Sxy150489 IGB_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 6315779Sxy150489 return; 6325779Sxy150489 } 6335779Sxy150489 6345779Sxy150489 hcksum->mac_hdr_len = mac_hdr_len; 6355779Sxy150489 hcksum->ip_hdr_len = start; 6365779Sxy150489 hcksum->l4_proto = l4_proto; 6375779Sxy150489 } 6385779Sxy150489 6395779Sxy150489 /* 6405779Sxy150489 * igb_check_hcksum_context 6415779Sxy150489 * 6425779Sxy150489 * Check if a new context descriptor is needed 6435779Sxy150489 */ 6445779Sxy150489 static boolean_t 6455779Sxy150489 igb_check_hcksum_context(igb_tx_ring_t *tx_ring, hcksum_context_t *hcksum) 6465779Sxy150489 { 6475779Sxy150489 hcksum_context_t *last; 6485779Sxy150489 6495779Sxy150489 if (hcksum == NULL) 6505779Sxy150489 return (B_FALSE); 6515779Sxy150489 6525779Sxy150489 /* 6535779Sxy150489 * Compare the checksum data retrieved from the mblk and the 6545779Sxy150489 * stored checksum data of the last context descriptor. The data 6555779Sxy150489 * need to be checked are: 6565779Sxy150489 * hcksum_flags 6575779Sxy150489 * l4_proto 6585779Sxy150489 * mac_hdr_len 6595779Sxy150489 * ip_hdr_len 6605779Sxy150489 * Either one of the above data is changed, a new context descriptor 6615779Sxy150489 * will be needed. 6625779Sxy150489 */ 6635779Sxy150489 last = &tx_ring->hcksum_context; 6645779Sxy150489 6655779Sxy150489 if (hcksum->hcksum_flags != 0) { 6665779Sxy150489 if ((hcksum->hcksum_flags != last->hcksum_flags) || 6675779Sxy150489 (hcksum->l4_proto != last->l4_proto) || 6685779Sxy150489 (hcksum->mac_hdr_len != last->mac_hdr_len) || 6695779Sxy150489 (hcksum->ip_hdr_len != last->ip_hdr_len)) { 6705779Sxy150489 6715779Sxy150489 return (B_TRUE); 6725779Sxy150489 } 6735779Sxy150489 } 6745779Sxy150489 6755779Sxy150489 return (B_FALSE); 6765779Sxy150489 } 6775779Sxy150489 6785779Sxy150489 /* 6795779Sxy150489 * igb_fill_hcksum_context 6805779Sxy150489 * 6815779Sxy150489 * Fill the context descriptor with hardware checksum informations 6825779Sxy150489 */ 6835779Sxy150489 static void 6845779Sxy150489 igb_fill_hcksum_context(struct e1000_adv_tx_context_desc *ctx_tbd, 6855779Sxy150489 hcksum_context_t *hcksum) 6865779Sxy150489 { 6875779Sxy150489 /* 6885779Sxy150489 * Fill the context descriptor with the checksum 6895779Sxy150489 * context information we've got 6905779Sxy150489 */ 6915779Sxy150489 ctx_tbd->vlan_macip_lens = hcksum->ip_hdr_len; 6925779Sxy150489 ctx_tbd->vlan_macip_lens |= hcksum->mac_hdr_len << 6935779Sxy150489 E1000_ADVTXD_MACLEN_SHIFT; 6945779Sxy150489 6955779Sxy150489 ctx_tbd->type_tucmd_mlhl = 6965779Sxy150489 E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 6975779Sxy150489 6985779Sxy150489 if (hcksum->hcksum_flags & HCK_IPV4_HDRCKSUM) 6995779Sxy150489 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 7005779Sxy150489 7015779Sxy150489 if (hcksum->hcksum_flags & HCK_PARTIALCKSUM) { 7025779Sxy150489 switch (hcksum->l4_proto) { 7035779Sxy150489 case IPPROTO_TCP: 7045779Sxy150489 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 7055779Sxy150489 break; 7065779Sxy150489 case IPPROTO_UDP: 7075779Sxy150489 /* 7085779Sxy150489 * We don't have to explicitly set: 7095779Sxy150489 * ctx_tbd->type_tucmd_mlhl |= 7105779Sxy150489 * E1000_ADVTXD_TUCMD_L4T_UDP; 7115779Sxy150489 * Because E1000_ADVTXD_TUCMD_L4T_UDP == 0b 7125779Sxy150489 */ 7135779Sxy150489 break; 7145779Sxy150489 default: 7155779Sxy150489 /* Unrecoverable error */ 7165779Sxy150489 IGB_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 7175779Sxy150489 break; 7185779Sxy150489 } 7195779Sxy150489 } 7205779Sxy150489 7215779Sxy150489 ctx_tbd->seqnum_seed = 0; 7225779Sxy150489 ctx_tbd->mss_l4len_idx = 0; 7235779Sxy150489 } 7245779Sxy150489 7255779Sxy150489 /* 7265779Sxy150489 * igb_tx_fill_ring 7275779Sxy150489 * 7285779Sxy150489 * Fill the tx descriptor ring with the data 7295779Sxy150489 */ 7305779Sxy150489 static int 7315779Sxy150489 igb_tx_fill_ring(igb_tx_ring_t *tx_ring, link_list_t *pending_list, 7325779Sxy150489 hcksum_context_t *hcksum) 7335779Sxy150489 { 7345779Sxy150489 struct e1000_hw *hw = &tx_ring->igb->hw; 7355779Sxy150489 boolean_t load_context; 7365779Sxy150489 uint32_t index, tcb_index, desc_num; 7375779Sxy150489 union e1000_adv_tx_desc *tbd, *first_tbd; 7385779Sxy150489 tx_control_block_t *tcb, *first_tcb; 7395779Sxy150489 uint32_t hcksum_flags; 7405779Sxy150489 int i; 741*6624Sgl147354 igb_t *igb = tx_ring->igb; 7425779Sxy150489 7435779Sxy150489 ASSERT(mutex_owned(&tx_ring->tx_lock)); 7445779Sxy150489 7455779Sxy150489 tbd = NULL; 7465779Sxy150489 first_tbd = NULL; 7475779Sxy150489 first_tcb = NULL; 7485779Sxy150489 desc_num = 0; 7495779Sxy150489 hcksum_flags = 0; 7505779Sxy150489 load_context = B_FALSE; 7515779Sxy150489 7525779Sxy150489 /* 7535779Sxy150489 * Get the index of the first tx descriptor that will be filled, 7545779Sxy150489 * and the index of the first work list item that will be attached 7555779Sxy150489 * with the first used tx control block in the pending list. 7565779Sxy150489 * Note: the two indexes are the same. 7575779Sxy150489 */ 7585779Sxy150489 index = tx_ring->tbd_tail; 7595779Sxy150489 tcb_index = tx_ring->tbd_tail; 7605779Sxy150489 7615779Sxy150489 if (hcksum != NULL) { 7625779Sxy150489 hcksum_flags = hcksum->hcksum_flags; 7635779Sxy150489 7645779Sxy150489 /* 7655779Sxy150489 * Check if a new context descriptor is needed for this packet 7665779Sxy150489 */ 7675779Sxy150489 load_context = igb_check_hcksum_context(tx_ring, hcksum); 7685779Sxy150489 if (load_context) { 7695779Sxy150489 first_tcb = (tx_control_block_t *) 7705779Sxy150489 LIST_GET_HEAD(pending_list); 7715779Sxy150489 tbd = &tx_ring->tbd_ring[index]; 7725779Sxy150489 7735779Sxy150489 /* 7745779Sxy150489 * Fill the context descriptor with the 7755779Sxy150489 * hardware checksum offload informations. 7765779Sxy150489 */ 7775779Sxy150489 igb_fill_hcksum_context( 7785779Sxy150489 (struct e1000_adv_tx_context_desc *)tbd, hcksum); 7795779Sxy150489 7805779Sxy150489 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 7815779Sxy150489 desc_num++; 7825779Sxy150489 7835779Sxy150489 /* 7845779Sxy150489 * Store the checksum context data if 7855779Sxy150489 * a new context descriptor is added 7865779Sxy150489 */ 7875779Sxy150489 tx_ring->hcksum_context = *hcksum; 7885779Sxy150489 } 7895779Sxy150489 } 7905779Sxy150489 7915779Sxy150489 first_tbd = &tx_ring->tbd_ring[index]; 7925779Sxy150489 7935779Sxy150489 /* 7945779Sxy150489 * Fill tx data descriptors with the data saved in the pending list. 7955779Sxy150489 * The tx control blocks in the pending list are added to the work list 7965779Sxy150489 * at the same time. 7975779Sxy150489 * 7985779Sxy150489 * The work list is strictly 1:1 corresponding to the descriptor ring. 7995779Sxy150489 * One item of the work list corresponds to one tx descriptor. Because 8005779Sxy150489 * one tx control block can span multiple tx descriptors, the tx 8015779Sxy150489 * control block will be added to the first work list item that 8025779Sxy150489 * corresponds to the first tx descriptor generated from that tx 8035779Sxy150489 * control block. 8045779Sxy150489 */ 8055779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 8065779Sxy150489 while (tcb != NULL) { 8075779Sxy150489 8085779Sxy150489 for (i = 0; i < tcb->desc_num; i++) { 8095779Sxy150489 tbd = &tx_ring->tbd_ring[index]; 8105779Sxy150489 8115779Sxy150489 tbd->read.buffer_addr = tcb->desc[i].address; 8125779Sxy150489 tbd->read.cmd_type_len = tcb->desc[i].length; 8135779Sxy150489 8145779Sxy150489 tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_RS | 8155779Sxy150489 E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_DATA; 8165779Sxy150489 8175779Sxy150489 tbd->read.olinfo_status = 0; 8185779Sxy150489 8195779Sxy150489 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 8205779Sxy150489 desc_num++; 8215779Sxy150489 } 8225779Sxy150489 8235779Sxy150489 if (first_tcb != NULL) { 8245779Sxy150489 /* 8255779Sxy150489 * Count the checksum context descriptor for 8265779Sxy150489 * the first tx control block. 8275779Sxy150489 */ 8285779Sxy150489 first_tcb->desc_num++; 8295779Sxy150489 first_tcb = NULL; 8305779Sxy150489 } 8315779Sxy150489 8325779Sxy150489 /* 8335779Sxy150489 * Add the tx control block to the work list 8345779Sxy150489 */ 8355779Sxy150489 ASSERT(tx_ring->work_list[tcb_index] == NULL); 8365779Sxy150489 tx_ring->work_list[tcb_index] = tcb; 8375779Sxy150489 8385779Sxy150489 tcb_index = index; 8395779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 8405779Sxy150489 } 8415779Sxy150489 8425779Sxy150489 /* 8435779Sxy150489 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 8445779Sxy150489 * valid in the first descriptor of the packet. 8455779Sxy150489 */ 8465779Sxy150489 ASSERT(first_tbd != NULL); 8475779Sxy150489 first_tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_IFCS; 8485779Sxy150489 8495779Sxy150489 /* Set hardware checksum bits */ 8505779Sxy150489 if (hcksum_flags != 0) { 8515779Sxy150489 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 8525779Sxy150489 first_tbd->read.olinfo_status |= 8535779Sxy150489 E1000_TXD_POPTS_IXSM << 8; 8545779Sxy150489 if (hcksum_flags & HCK_PARTIALCKSUM) 8555779Sxy150489 first_tbd->read.olinfo_status |= 8565779Sxy150489 E1000_TXD_POPTS_TXSM << 8; 8575779Sxy150489 } 8585779Sxy150489 8595779Sxy150489 /* 8605779Sxy150489 * The last descriptor of packet needs End Of Packet (EOP), 8615779Sxy150489 * and Report Status (RS) bits set 8625779Sxy150489 */ 8635779Sxy150489 ASSERT(tbd != NULL); 8645779Sxy150489 tbd->read.cmd_type_len |= 8655779Sxy150489 E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS; 8665779Sxy150489 8675779Sxy150489 /* 8685779Sxy150489 * Sync the DMA buffer of the tx descriptor ring 8695779Sxy150489 */ 8705779Sxy150489 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 8715779Sxy150489 8725779Sxy150489 /* 8735779Sxy150489 * Update the number of the free tx descriptors. 8745779Sxy150489 * The mutual exclusion between the transmission and the recycling 8755779Sxy150489 * (for the tx descriptor ring and the work list) is implemented 8765779Sxy150489 * with the atomic operation on the number of the free tx descriptors. 8775779Sxy150489 * 8785779Sxy150489 * Note: we should always decrement the counter tbd_free before 8795779Sxy150489 * advancing the hardware TDT pointer to avoid the race condition - 8805779Sxy150489 * before the counter tbd_free is decremented, the transmit of the 8815779Sxy150489 * tx descriptors has done and the counter tbd_free is increased by 8825779Sxy150489 * the tx recycling. 8835779Sxy150489 */ 8845779Sxy150489 i = igb_atomic_reserve(&tx_ring->tbd_free, desc_num); 8855779Sxy150489 ASSERT(i >= 0); 8865779Sxy150489 8875779Sxy150489 tx_ring->tbd_tail = index; 8885779Sxy150489 8895779Sxy150489 /* 8905779Sxy150489 * Advance the hardware TDT pointer of the tx descriptor ring 8915779Sxy150489 */ 8925779Sxy150489 E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), index); 8935779Sxy150489 894*6624Sgl147354 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) { 895*6624Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 896*6624Sgl147354 } 897*6624Sgl147354 8985779Sxy150489 return (desc_num); 8995779Sxy150489 } 9005779Sxy150489 9015779Sxy150489 /* 9025779Sxy150489 * igb_save_desc 9035779Sxy150489 * 9045779Sxy150489 * Save the address/length pair to the private array 9055779Sxy150489 * of the tx control block. The address/length pairs 9065779Sxy150489 * will be filled into the tx descriptor ring later. 9075779Sxy150489 */ 9085779Sxy150489 static void 9095779Sxy150489 igb_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 9105779Sxy150489 { 9115779Sxy150489 sw_desc_t *desc; 9125779Sxy150489 9135779Sxy150489 desc = &tcb->desc[tcb->desc_num]; 9145779Sxy150489 desc->address = address; 9155779Sxy150489 desc->length = length; 9165779Sxy150489 9175779Sxy150489 tcb->desc_num++; 9185779Sxy150489 } 9195779Sxy150489 9205779Sxy150489 /* 9215779Sxy150489 * igb_tx_recycle_legacy 9225779Sxy150489 * 9235779Sxy150489 * Recycle the tx descriptors and tx control blocks. 9245779Sxy150489 * 9255779Sxy150489 * The work list is traversed to check if the corresponding 9265779Sxy150489 * tx descriptors have been transmitted. If so, the resources 9275779Sxy150489 * bound to the tx control blocks will be freed, and those 9285779Sxy150489 * tx control blocks will be returned to the free list. 9295779Sxy150489 */ 9305779Sxy150489 uint32_t 9315779Sxy150489 igb_tx_recycle_legacy(igb_tx_ring_t *tx_ring) 9325779Sxy150489 { 9335779Sxy150489 uint32_t index, last_index; 9345779Sxy150489 int desc_num; 9355779Sxy150489 boolean_t desc_done; 9365779Sxy150489 tx_control_block_t *tcb; 9375779Sxy150489 link_list_t pending_list; 938*6624Sgl147354 igb_t *igb = tx_ring->igb; 9395779Sxy150489 9405779Sxy150489 /* 9415779Sxy150489 * The mutex_tryenter() is used to avoid unnecessary 9425779Sxy150489 * lock contention. 9435779Sxy150489 */ 9445779Sxy150489 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 9455779Sxy150489 return (0); 9465779Sxy150489 9475779Sxy150489 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 9485779Sxy150489 9495779Sxy150489 if (tx_ring->tbd_free == tx_ring->ring_size) { 9505779Sxy150489 tx_ring->recycle_fail = 0; 9515779Sxy150489 tx_ring->stall_watchdog = 0; 9525779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 9535779Sxy150489 return (0); 9545779Sxy150489 } 9555779Sxy150489 9565779Sxy150489 /* 9575779Sxy150489 * Sync the DMA buffer of the tx descriptor ring 9585779Sxy150489 */ 9595779Sxy150489 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 9605779Sxy150489 961*6624Sgl147354 if (igb_check_dma_handle( 962*6624Sgl147354 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 963*6624Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 964*6624Sgl147354 } 965*6624Sgl147354 9665779Sxy150489 LINK_LIST_INIT(&pending_list); 9675779Sxy150489 desc_num = 0; 9685779Sxy150489 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 9695779Sxy150489 9705779Sxy150489 tcb = tx_ring->work_list[index]; 9715779Sxy150489 ASSERT(tcb != NULL); 9725779Sxy150489 9735779Sxy150489 desc_done = B_TRUE; 9745779Sxy150489 while (desc_done && (tcb != NULL)) { 9755779Sxy150489 9765779Sxy150489 /* 9775779Sxy150489 * Get the last tx descriptor of the tx control block. 9785779Sxy150489 * If the last tx descriptor is done, it is done with 9795779Sxy150489 * all the tx descriptors of the tx control block. 9805779Sxy150489 * Then the tx control block and all the corresponding 9815779Sxy150489 * tx descriptors can be recycled. 9825779Sxy150489 */ 9835779Sxy150489 last_index = NEXT_INDEX(index, tcb->desc_num - 1, 9845779Sxy150489 tx_ring->ring_size); 9855779Sxy150489 9865779Sxy150489 /* 9875779Sxy150489 * Check if the Descriptor Done bit is set 9885779Sxy150489 */ 9895779Sxy150489 desc_done = tx_ring->tbd_ring[last_index].wb.status & 9905779Sxy150489 E1000_TXD_STAT_DD; 9915779Sxy150489 if (desc_done) { 9925779Sxy150489 /* 9935779Sxy150489 * Strip off the tx control block from the work list, 9945779Sxy150489 * and add it to the pending list. 9955779Sxy150489 */ 9965779Sxy150489 tx_ring->work_list[index] = NULL; 9975779Sxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 9985779Sxy150489 9995779Sxy150489 /* 10005779Sxy150489 * Count the total number of the tx descriptors recycled 10015779Sxy150489 */ 10025779Sxy150489 desc_num += tcb->desc_num; 10035779Sxy150489 10045779Sxy150489 /* 10055779Sxy150489 * Advance the index of the tx descriptor ring 10065779Sxy150489 */ 10075779Sxy150489 index = NEXT_INDEX(last_index, 1, tx_ring->ring_size); 10085779Sxy150489 10095779Sxy150489 tcb = tx_ring->work_list[index]; 10105779Sxy150489 } 10115779Sxy150489 } 10125779Sxy150489 10135779Sxy150489 /* 10145779Sxy150489 * If no tx descriptors are recycled, no need to do more processing 10155779Sxy150489 */ 10165779Sxy150489 if (desc_num == 0) { 10175779Sxy150489 tx_ring->recycle_fail++; 10185779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 10195779Sxy150489 return (0); 10205779Sxy150489 } 10215779Sxy150489 10225779Sxy150489 tx_ring->recycle_fail = 0; 10235779Sxy150489 tx_ring->stall_watchdog = 0; 10245779Sxy150489 10255779Sxy150489 /* 10265779Sxy150489 * Update the head index of the tx descriptor ring 10275779Sxy150489 */ 10285779Sxy150489 tx_ring->tbd_head = index; 10295779Sxy150489 10305779Sxy150489 /* 10315779Sxy150489 * Update the number of the free tx descriptors with atomic operations 10325779Sxy150489 */ 10335779Sxy150489 atomic_add_32(&tx_ring->tbd_free, desc_num); 10345779Sxy150489 10355779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 10365779Sxy150489 10375779Sxy150489 /* 10385779Sxy150489 * Free the resources used by the tx control blocks 10395779Sxy150489 * in the pending list 10405779Sxy150489 */ 10415779Sxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 10425779Sxy150489 while (tcb != NULL) { 10435779Sxy150489 /* 10445779Sxy150489 * Release the resources occupied by the tx control block 10455779Sxy150489 */ 10465779Sxy150489 igb_free_tcb(tcb); 10475779Sxy150489 10485779Sxy150489 tcb = (tx_control_block_t *) 10495779Sxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 10505779Sxy150489 } 10515779Sxy150489 10525779Sxy150489 /* 10535779Sxy150489 * Add the tx control blocks in the pending list to the free list. 10545779Sxy150489 */ 10555779Sxy150489 igb_put_free_list(tx_ring, &pending_list); 10565779Sxy150489 10575779Sxy150489 return (desc_num); 10585779Sxy150489 } 10595779Sxy150489 10605779Sxy150489 /* 10615779Sxy150489 * igb_tx_recycle_head_wb 10625779Sxy150489 * 10635779Sxy150489 * Check the head write-back, and recycle all the transmitted 10645779Sxy150489 * tx descriptors and tx control blocks. 10655779Sxy150489 */ 10665779Sxy150489 uint32_t 10675779Sxy150489 igb_tx_recycle_head_wb(igb_tx_ring_t *tx_ring) 10685779Sxy150489 { 10695779Sxy150489 uint32_t index; 10705779Sxy150489 uint32_t head_wb; 10715779Sxy150489 int desc_num; 10725779Sxy150489 tx_control_block_t *tcb; 10735779Sxy150489 link_list_t pending_list; 1074*6624Sgl147354 igb_t *igb = tx_ring->igb; 10755779Sxy150489 10765779Sxy150489 /* 10775779Sxy150489 * The mutex_tryenter() is used to avoid unnecessary 10785779Sxy150489 * lock contention. 10795779Sxy150489 */ 10805779Sxy150489 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 10815779Sxy150489 return (0); 10825779Sxy150489 10835779Sxy150489 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 10845779Sxy150489 10855779Sxy150489 if (tx_ring->tbd_free == tx_ring->ring_size) { 10865779Sxy150489 tx_ring->recycle_fail = 0; 10875779Sxy150489 tx_ring->stall_watchdog = 0; 10885779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 10895779Sxy150489 return (0); 10905779Sxy150489 } 10915779Sxy150489 10925779Sxy150489 /* 10935779Sxy150489 * Sync the DMA buffer of the tx descriptor ring 10945779Sxy150489 * 10955779Sxy150489 * Note: For head write-back mode, the tx descriptors will not 10965779Sxy150489 * be written back, but the head write-back value is stored at 10975779Sxy150489 * the last extra tbd at the end of the DMA area, we still need 10985779Sxy150489 * to sync the head write-back value for kernel. 10995779Sxy150489 * 11005779Sxy150489 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 11015779Sxy150489 */ 11025779Sxy150489 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 11035779Sxy150489 sizeof (union e1000_adv_tx_desc) * tx_ring->ring_size, 11045779Sxy150489 sizeof (uint32_t), 11055779Sxy150489 DDI_DMA_SYNC_FORKERNEL); 11065779Sxy150489 1107*6624Sgl147354 if (igb_check_dma_handle( 1108*6624Sgl147354 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1109*6624Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 1110*6624Sgl147354 } 1111*6624Sgl147354 11125779Sxy150489 LINK_LIST_INIT(&pending_list); 11135779Sxy150489 desc_num = 0; 11145779Sxy150489 index = tx_ring->tbd_head; /* Next index to clean */ 11155779Sxy150489 11165779Sxy150489 /* 11175779Sxy150489 * Get the value of head write-back 11185779Sxy150489 */ 11195779Sxy150489 head_wb = *tx_ring->tbd_head_wb; 11205779Sxy150489 while (index != head_wb) { 11215779Sxy150489 tcb = tx_ring->work_list[index]; 11225779Sxy150489 ASSERT(tcb != NULL); 11235779Sxy150489 11245779Sxy150489 if (OFFSET(index, head_wb, tx_ring->ring_size) < 11255779Sxy150489 tcb->desc_num) { 11265779Sxy150489 /* 11275779Sxy150489 * The current tx control block is not 11285779Sxy150489 * completely transmitted, stop recycling 11295779Sxy150489 */ 11305779Sxy150489 break; 11315779Sxy150489 } 11325779Sxy150489 11335779Sxy150489 /* 11345779Sxy150489 * Strip off the tx control block from the work list, 11355779Sxy150489 * and add it to the pending list. 11365779Sxy150489 */ 11375779Sxy150489 tx_ring->work_list[index] = NULL; 11385779Sxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 11395779Sxy150489 11405779Sxy150489 /* 11415779Sxy150489 * Advance the index of the tx descriptor ring 11425779Sxy150489 */ 11435779Sxy150489 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 11445779Sxy150489 11455779Sxy150489 /* 11465779Sxy150489 * Count the total number of the tx descriptors recycled 11475779Sxy150489 */ 11485779Sxy150489 desc_num += tcb->desc_num; 11495779Sxy150489 } 11505779Sxy150489 11515779Sxy150489 /* 11525779Sxy150489 * If no tx descriptors are recycled, no need to do more processing 11535779Sxy150489 */ 11545779Sxy150489 if (desc_num == 0) { 11555779Sxy150489 tx_ring->recycle_fail++; 11565779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 11575779Sxy150489 return (0); 11585779Sxy150489 } 11595779Sxy150489 11605779Sxy150489 tx_ring->recycle_fail = 0; 11615779Sxy150489 tx_ring->stall_watchdog = 0; 11625779Sxy150489 11635779Sxy150489 /* 11645779Sxy150489 * Update the head index of the tx descriptor ring 11655779Sxy150489 */ 11665779Sxy150489 tx_ring->tbd_head = index; 11675779Sxy150489 11685779Sxy150489 /* 11695779Sxy150489 * Update the number of the free tx descriptors with atomic operations 11705779Sxy150489 */ 11715779Sxy150489 atomic_add_32(&tx_ring->tbd_free, desc_num); 11725779Sxy150489 11735779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 11745779Sxy150489 11755779Sxy150489 /* 11765779Sxy150489 * Free the resources used by the tx control blocks 11775779Sxy150489 * in the pending list 11785779Sxy150489 */ 11795779Sxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 11805779Sxy150489 while (tcb) { 11815779Sxy150489 /* 11825779Sxy150489 * Release the resources occupied by the tx control block 11835779Sxy150489 */ 11845779Sxy150489 igb_free_tcb(tcb); 11855779Sxy150489 11865779Sxy150489 tcb = (tx_control_block_t *) 11875779Sxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 11885779Sxy150489 } 11895779Sxy150489 11905779Sxy150489 /* 11915779Sxy150489 * Add the tx control blocks in the pending list to the free list. 11925779Sxy150489 */ 11935779Sxy150489 igb_put_free_list(tx_ring, &pending_list); 11945779Sxy150489 11955779Sxy150489 return (desc_num); 11965779Sxy150489 } 11975779Sxy150489 11985779Sxy150489 /* 11995779Sxy150489 * igb_free_tcb - free up the tx control block 12005779Sxy150489 * 12015779Sxy150489 * Free the resources of the tx control block, including 12025779Sxy150489 * unbind the previously bound DMA handle, and reset other 12035779Sxy150489 * control fields. 12045779Sxy150489 */ 12055779Sxy150489 void 12065779Sxy150489 igb_free_tcb(tx_control_block_t *tcb) 12075779Sxy150489 { 12085779Sxy150489 switch (tcb->tx_type) { 12095779Sxy150489 case USE_COPY: 12105779Sxy150489 /* 12115779Sxy150489 * Reset the buffer length that is used for copy 12125779Sxy150489 */ 12135779Sxy150489 tcb->tx_buf.len = 0; 12145779Sxy150489 break; 12155779Sxy150489 case USE_DMA: 12165779Sxy150489 /* 12175779Sxy150489 * Release the DMA resource that is used for 12185779Sxy150489 * DMA binding. 12195779Sxy150489 */ 12205779Sxy150489 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 12215779Sxy150489 break; 12225779Sxy150489 default: 12235779Sxy150489 break; 12245779Sxy150489 } 12255779Sxy150489 12265779Sxy150489 /* 12275779Sxy150489 * Free the mblk 12285779Sxy150489 */ 12295779Sxy150489 if (tcb->mp != NULL) { 12305779Sxy150489 freemsg(tcb->mp); 12315779Sxy150489 tcb->mp = NULL; 12325779Sxy150489 } 12335779Sxy150489 12345779Sxy150489 tcb->tx_type = USE_NONE; 12355779Sxy150489 tcb->frag_num = 0; 12365779Sxy150489 tcb->desc_num = 0; 12375779Sxy150489 } 12385779Sxy150489 12395779Sxy150489 /* 12405779Sxy150489 * igb_get_free_list - Get a free tx control block from the free list 12415779Sxy150489 * 12425779Sxy150489 * The atomic operation on the number of the available tx control block 12435779Sxy150489 * in the free list is used to keep this routine mutual exclusive with 12445779Sxy150489 * the routine igb_put_check_list. 12455779Sxy150489 */ 12465779Sxy150489 static tx_control_block_t * 12475779Sxy150489 igb_get_free_list(igb_tx_ring_t *tx_ring) 12485779Sxy150489 { 12495779Sxy150489 tx_control_block_t *tcb; 12505779Sxy150489 12515779Sxy150489 /* 12525779Sxy150489 * Check and update the number of the free tx control block 12535779Sxy150489 * in the free list. 12545779Sxy150489 */ 12555779Sxy150489 if (igb_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 12565779Sxy150489 return (NULL); 12575779Sxy150489 12585779Sxy150489 mutex_enter(&tx_ring->tcb_head_lock); 12595779Sxy150489 12605779Sxy150489 tcb = tx_ring->free_list[tx_ring->tcb_head]; 12615779Sxy150489 ASSERT(tcb != NULL); 12625779Sxy150489 tx_ring->free_list[tx_ring->tcb_head] = NULL; 12635779Sxy150489 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 12645779Sxy150489 tx_ring->free_list_size); 12655779Sxy150489 12665779Sxy150489 mutex_exit(&tx_ring->tcb_head_lock); 12675779Sxy150489 12685779Sxy150489 return (tcb); 12695779Sxy150489 } 12705779Sxy150489 12715779Sxy150489 /* 12725779Sxy150489 * igb_put_free_list 12735779Sxy150489 * 12745779Sxy150489 * Put a list of used tx control blocks back to the free list 12755779Sxy150489 * 12765779Sxy150489 * A mutex is used here to ensure the serialization. The mutual exclusion 12775779Sxy150489 * between igb_get_free_list and igb_put_free_list is implemented with 12785779Sxy150489 * the atomic operation on the counter tcb_free. 12795779Sxy150489 */ 12805779Sxy150489 void 12815779Sxy150489 igb_put_free_list(igb_tx_ring_t *tx_ring, link_list_t *pending_list) 12825779Sxy150489 { 12835779Sxy150489 uint32_t index; 12845779Sxy150489 int tcb_num; 12855779Sxy150489 tx_control_block_t *tcb; 12865779Sxy150489 12875779Sxy150489 mutex_enter(&tx_ring->tcb_tail_lock); 12885779Sxy150489 12895779Sxy150489 index = tx_ring->tcb_tail; 12905779Sxy150489 12915779Sxy150489 tcb_num = 0; 12925779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 12935779Sxy150489 while (tcb != NULL) { 12945779Sxy150489 ASSERT(tx_ring->free_list[index] == NULL); 12955779Sxy150489 tx_ring->free_list[index] = tcb; 12965779Sxy150489 12975779Sxy150489 tcb_num++; 12985779Sxy150489 12995779Sxy150489 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 13005779Sxy150489 13015779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 13025779Sxy150489 } 13035779Sxy150489 13045779Sxy150489 tx_ring->tcb_tail = index; 13055779Sxy150489 13065779Sxy150489 /* 13075779Sxy150489 * Update the number of the free tx control block 13085779Sxy150489 * in the free list. This operation must be placed 13095779Sxy150489 * under the protection of the lock. 13105779Sxy150489 */ 13115779Sxy150489 atomic_add_32(&tx_ring->tcb_free, tcb_num); 13125779Sxy150489 13135779Sxy150489 mutex_exit(&tx_ring->tcb_tail_lock); 13145779Sxy150489 } 1315