15779Sxy150489 /* 25779Sxy150489 * CDDL HEADER START 35779Sxy150489 * 4*8571SChenlu.Chen@Sun.COM * Copyright(c) 2007-2009 Intel Corporation. All rights reserved. 55779Sxy150489 * The contents of this file are subject to the terms of the 65779Sxy150489 * Common Development and Distribution License (the "License"). 75779Sxy150489 * You may not use this file except in compliance with the License. 85779Sxy150489 * 9*8571SChenlu.Chen@Sun.COM * You can obtain a copy of the license at: 10*8571SChenlu.Chen@Sun.COM * http://www.opensolaris.org/os/licensing. 115779Sxy150489 * See the License for the specific language governing permissions 125779Sxy150489 * and limitations under the License. 135779Sxy150489 * 14*8571SChenlu.Chen@Sun.COM * When using or redistributing this file, you may do so under the 15*8571SChenlu.Chen@Sun.COM * License only. No other modification of this header is permitted. 16*8571SChenlu.Chen@Sun.COM * 175779Sxy150489 * If applicable, add the following below this CDDL HEADER, with the 185779Sxy150489 * fields enclosed by brackets "[]" replaced with your own identifying 195779Sxy150489 * information: Portions Copyright [yyyy] [name of copyright owner] 205779Sxy150489 * 215779Sxy150489 * CDDL HEADER END 225779Sxy150489 */ 235779Sxy150489 245779Sxy150489 /* 25*8571SChenlu.Chen@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26*8571SChenlu.Chen@Sun.COM * Use is subject to license terms of the CDDL. 278275SEric Cheng */ 285779Sxy150489 295779Sxy150489 #include "igb_sw.h" 305779Sxy150489 315779Sxy150489 static boolean_t igb_tx(igb_tx_ring_t *, mblk_t *); 325779Sxy150489 static int igb_tx_copy(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 337072Sxy150489 uint32_t, boolean_t); 345779Sxy150489 static int igb_tx_bind(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 355779Sxy150489 uint32_t); 365779Sxy150489 static int igb_tx_fill_ring(igb_tx_ring_t *, link_list_t *, hcksum_context_t *); 375779Sxy150489 static void igb_save_desc(tx_control_block_t *, uint64_t, size_t); 385779Sxy150489 static tx_control_block_t *igb_get_free_list(igb_tx_ring_t *); 395779Sxy150489 405779Sxy150489 static void igb_get_hcksum_context(mblk_t *, hcksum_context_t *); 415779Sxy150489 static boolean_t igb_check_hcksum_context(igb_tx_ring_t *, hcksum_context_t *); 425779Sxy150489 static void igb_fill_hcksum_context(struct e1000_adv_tx_context_desc *, 438275SEric Cheng hcksum_context_t *, uint32_t); 445779Sxy150489 455779Sxy150489 #ifndef IGB_DEBUG 465779Sxy150489 #pragma inline(igb_save_desc) 475779Sxy150489 #pragma inline(igb_get_hcksum_context) 485779Sxy150489 #pragma inline(igb_check_hcksum_context) 495779Sxy150489 #pragma inline(igb_fill_hcksum_context) 505779Sxy150489 #endif 515779Sxy150489 525779Sxy150489 mblk_t * 538275SEric Cheng igb_tx_ring_send(void *arg, mblk_t *mp) 545779Sxy150489 { 558275SEric Cheng igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)arg; 565779Sxy150489 578275SEric Cheng ASSERT(tx_ring != NULL); 585779Sxy150489 598275SEric Cheng return ((igb_tx(tx_ring, mp)) ? NULL : mp); 605779Sxy150489 } 615779Sxy150489 625779Sxy150489 /* 635779Sxy150489 * igb_tx - Main transmit processing 645779Sxy150489 * 655779Sxy150489 * Called from igb_m_tx with an mblk ready to transmit. this 665779Sxy150489 * routine sets up the transmit descriptors and sends data to 675779Sxy150489 * the wire. 685779Sxy150489 * 695779Sxy150489 * One mblk can consist of several fragments, each fragment 705779Sxy150489 * will be processed with different methods based on the size. 715779Sxy150489 * For the fragments with size less than the bcopy threshold, 725779Sxy150489 * they will be processed by using bcopy; otherwise, they will 735779Sxy150489 * be processed by using DMA binding. 745779Sxy150489 * 755779Sxy150489 * To process the mblk, a tx control block is got from the 765779Sxy150489 * free list. One tx control block contains one tx buffer, which 775779Sxy150489 * is used to copy mblk fragments' data; and one tx DMA handle, 785779Sxy150489 * which is used to bind a mblk fragment with DMA resource. 795779Sxy150489 * 805779Sxy150489 * Several small mblk fragments can be copied into one tx control 815779Sxy150489 * block's buffer, and then the buffer will be transmitted with 825779Sxy150489 * one tx descriptor. 835779Sxy150489 * 845779Sxy150489 * A large fragment only binds with one tx control block's DMA 855779Sxy150489 * handle, and it can span several tx descriptors for transmitting. 865779Sxy150489 * 875779Sxy150489 * So to transmit a packet (mblk), several tx control blocks can 885779Sxy150489 * be used. After the processing, those tx control blocks will 895779Sxy150489 * be put to the work list. 905779Sxy150489 */ 915779Sxy150489 static boolean_t 925779Sxy150489 igb_tx(igb_tx_ring_t *tx_ring, mblk_t *mp) 935779Sxy150489 { 945779Sxy150489 igb_t *igb = tx_ring->igb; 955779Sxy150489 tx_type_t current_flag, next_flag; 965779Sxy150489 uint32_t current_len, next_len; 975779Sxy150489 uint32_t desc_total; 985779Sxy150489 size_t mbsize; 995779Sxy150489 int desc_num; 1005779Sxy150489 boolean_t copy_done, eop; 1015779Sxy150489 mblk_t *current_mp, *next_mp, *nmp; 1025779Sxy150489 tx_control_block_t *tcb; 1035779Sxy150489 hcksum_context_t hcksum_context, *hcksum; 1045779Sxy150489 link_list_t pending_list; 1055779Sxy150489 1065779Sxy150489 /* Get the mblk size */ 1075779Sxy150489 mbsize = 0; 1085779Sxy150489 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 1095779Sxy150489 mbsize += MBLK_LEN(nmp); 1105779Sxy150489 } 1115779Sxy150489 1125779Sxy150489 /* 1135779Sxy150489 * If the mblk size exceeds the max frame size, 1145779Sxy150489 * discard this mblk, and return B_TRUE 1155779Sxy150489 */ 1165779Sxy150489 if (mbsize > (igb->max_frame_size - ETHERFCSL)) { 1175779Sxy150489 freemsg(mp); 1185779Sxy150489 IGB_DEBUGLOG_0(igb, "igb_tx: packet oversize"); 1195779Sxy150489 return (B_TRUE); 1205779Sxy150489 } 1215779Sxy150489 1225779Sxy150489 /* 1235779Sxy150489 * Check and recycle tx descriptors. 1245779Sxy150489 * The recycle threshold here should be selected carefully 1255779Sxy150489 */ 1265779Sxy150489 if (tx_ring->tbd_free < tx_ring->recycle_thresh) 1275779Sxy150489 tx_ring->tx_recycle(tx_ring); 1285779Sxy150489 1295779Sxy150489 /* 1305779Sxy150489 * After the recycling, if the tbd_free is less than the 1315779Sxy150489 * overload_threshold, assert overload, return B_FALSE; 1325779Sxy150489 * and we need to re-schedule the tx again. 1335779Sxy150489 */ 1345779Sxy150489 if (tx_ring->tbd_free < tx_ring->overload_thresh) { 1355779Sxy150489 tx_ring->reschedule = B_TRUE; 1365779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_overload); 1375779Sxy150489 return (B_FALSE); 1385779Sxy150489 } 1395779Sxy150489 1405779Sxy150489 /* 1415779Sxy150489 * The pending_list is a linked list that is used to save 1425779Sxy150489 * the tx control blocks that have packet data processed 1435779Sxy150489 * but have not put the data to the tx descriptor ring. 1445779Sxy150489 * It is used to reduce the lock contention of the tx_lock. 1455779Sxy150489 */ 1465779Sxy150489 LINK_LIST_INIT(&pending_list); 1475779Sxy150489 desc_num = 0; 1485779Sxy150489 desc_total = 0; 1495779Sxy150489 1505779Sxy150489 current_mp = mp; 1515779Sxy150489 current_len = MBLK_LEN(current_mp); 1525779Sxy150489 /* 1535779Sxy150489 * Decide which method to use for the first fragment 1545779Sxy150489 */ 1555779Sxy150489 current_flag = (current_len <= tx_ring->copy_thresh) ? 1565779Sxy150489 USE_COPY : USE_DMA; 1575779Sxy150489 /* 1585779Sxy150489 * If the mblk includes several contiguous small fragments, 1595779Sxy150489 * they may be copied into one buffer. This flag is used to 1605779Sxy150489 * indicate whether there are pending fragments that need to 1615779Sxy150489 * be copied to the current tx buffer. 1625779Sxy150489 * 1635779Sxy150489 * If this flag is B_TRUE, it indicates that a new tx control 1645779Sxy150489 * block is needed to process the next fragment using either 1655779Sxy150489 * copy or DMA binding. 1665779Sxy150489 * 1675779Sxy150489 * Otherwise, it indicates that the next fragment will be 1685779Sxy150489 * copied to the current tx buffer that is maintained by the 1695779Sxy150489 * current tx control block. No new tx control block is needed. 1705779Sxy150489 */ 1715779Sxy150489 copy_done = B_TRUE; 1725779Sxy150489 while (current_mp) { 1735779Sxy150489 next_mp = current_mp->b_cont; 1745779Sxy150489 eop = (next_mp == NULL); /* Last fragment of the packet? */ 1755779Sxy150489 next_len = eop ? 0: MBLK_LEN(next_mp); 1765779Sxy150489 1775779Sxy150489 /* 1785779Sxy150489 * When the current fragment is an empty fragment, if 1795779Sxy150489 * the next fragment will still be copied to the current 1805779Sxy150489 * tx buffer, we cannot skip this fragment here. Because 1815779Sxy150489 * the copy processing is pending for completion. We have 1825779Sxy150489 * to process this empty fragment in the tx_copy routine. 1835779Sxy150489 * 1845779Sxy150489 * If the copy processing is completed or a DMA binding 1855779Sxy150489 * processing is just completed, we can just skip this 1865779Sxy150489 * empty fragment. 1875779Sxy150489 */ 1885779Sxy150489 if ((current_len == 0) && (copy_done)) { 1895779Sxy150489 current_mp = next_mp; 1905779Sxy150489 current_len = next_len; 1915779Sxy150489 current_flag = (current_len <= tx_ring->copy_thresh) ? 1925779Sxy150489 USE_COPY : USE_DMA; 1935779Sxy150489 continue; 1945779Sxy150489 } 1955779Sxy150489 1965779Sxy150489 if (copy_done) { 1975779Sxy150489 /* 1985779Sxy150489 * Get a new tx control block from the free list 1995779Sxy150489 */ 2005779Sxy150489 tcb = igb_get_free_list(tx_ring); 2015779Sxy150489 2025779Sxy150489 if (tcb == NULL) { 2035779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 2045779Sxy150489 goto tx_failure; 2055779Sxy150489 } 2065779Sxy150489 2075779Sxy150489 /* 2085779Sxy150489 * Push the tx control block to the pending list 2095779Sxy150489 * to avoid using lock too early 2105779Sxy150489 */ 2115779Sxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 2125779Sxy150489 } 2135779Sxy150489 2145779Sxy150489 if (current_flag == USE_COPY) { 2155779Sxy150489 /* 2165779Sxy150489 * Check whether to use bcopy or DMA binding to process 2175779Sxy150489 * the next fragment, and if using bcopy, whether we 2185779Sxy150489 * need to continue copying the next fragment into the 2195779Sxy150489 * current tx buffer. 2205779Sxy150489 */ 2215779Sxy150489 ASSERT((tcb->tx_buf.len + current_len) <= 2225779Sxy150489 tcb->tx_buf.size); 2235779Sxy150489 2245779Sxy150489 if (eop) { 2255779Sxy150489 /* 2265779Sxy150489 * This is the last fragment of the packet, so 2275779Sxy150489 * the copy processing will be completed with 2285779Sxy150489 * this fragment. 2295779Sxy150489 */ 2305779Sxy150489 next_flag = USE_NONE; 2315779Sxy150489 copy_done = B_TRUE; 2325779Sxy150489 } else if ((tcb->tx_buf.len + current_len + next_len) > 2335779Sxy150489 tcb->tx_buf.size) { 2345779Sxy150489 /* 2355779Sxy150489 * If the next fragment is too large to be 2365779Sxy150489 * copied to the current tx buffer, we need 2375779Sxy150489 * to complete the current copy processing. 2385779Sxy150489 */ 2395779Sxy150489 next_flag = (next_len > tx_ring->copy_thresh) ? 2405779Sxy150489 USE_DMA: USE_COPY; 2415779Sxy150489 copy_done = B_TRUE; 2425779Sxy150489 } else if (next_len > tx_ring->copy_thresh) { 2435779Sxy150489 /* 2445779Sxy150489 * The next fragment needs to be processed with 2455779Sxy150489 * DMA binding. So the copy prcessing will be 2465779Sxy150489 * completed with the current fragment. 2475779Sxy150489 */ 2485779Sxy150489 next_flag = USE_DMA; 2495779Sxy150489 copy_done = B_TRUE; 2505779Sxy150489 } else { 2515779Sxy150489 /* 2525779Sxy150489 * Continue to copy the next fragment to the 2535779Sxy150489 * current tx buffer. 2545779Sxy150489 */ 2555779Sxy150489 next_flag = USE_COPY; 2565779Sxy150489 copy_done = B_FALSE; 2575779Sxy150489 } 2585779Sxy150489 2595779Sxy150489 desc_num = igb_tx_copy(tx_ring, tcb, current_mp, 2607072Sxy150489 current_len, copy_done); 2615779Sxy150489 } else { 2625779Sxy150489 /* 2635779Sxy150489 * Check whether to use bcopy or DMA binding to process 2645779Sxy150489 * the next fragment. 2655779Sxy150489 */ 2665779Sxy150489 next_flag = (next_len > tx_ring->copy_thresh) ? 2675779Sxy150489 USE_DMA: USE_COPY; 2685779Sxy150489 ASSERT(copy_done == B_TRUE); 2695779Sxy150489 2705779Sxy150489 desc_num = igb_tx_bind(tx_ring, tcb, current_mp, 2715779Sxy150489 current_len); 2725779Sxy150489 } 2735779Sxy150489 2745779Sxy150489 if (desc_num > 0) 2755779Sxy150489 desc_total += desc_num; 2765779Sxy150489 else if (desc_num < 0) 2775779Sxy150489 goto tx_failure; 2785779Sxy150489 2795779Sxy150489 current_mp = next_mp; 2805779Sxy150489 current_len = next_len; 2815779Sxy150489 current_flag = next_flag; 2825779Sxy150489 } 2835779Sxy150489 2845779Sxy150489 /* 2855779Sxy150489 * Attach the mblk to the last tx control block 2865779Sxy150489 */ 2875779Sxy150489 ASSERT(tcb); 2885779Sxy150489 ASSERT(tcb->mp == NULL); 2895779Sxy150489 tcb->mp = mp; 2905779Sxy150489 2915779Sxy150489 if (igb->tx_hcksum_enable) { 2925779Sxy150489 /* 2935779Sxy150489 * Retrieve checksum context information from the mblk that will 2945779Sxy150489 * be used to decide whether/how to fill the context descriptor. 2955779Sxy150489 */ 2965779Sxy150489 hcksum = &hcksum_context; 2975779Sxy150489 igb_get_hcksum_context(mp, hcksum); 2985779Sxy150489 } else { 2995779Sxy150489 hcksum = NULL; 3005779Sxy150489 } 3015779Sxy150489 3025779Sxy150489 /* 3035779Sxy150489 * Before fill the tx descriptor ring with the data, we need to 3045779Sxy150489 * ensure there are adequate free descriptors for transmit 3055779Sxy150489 * (including one context descriptor). 3065779Sxy150489 */ 3075779Sxy150489 if (tx_ring->tbd_free < (desc_total + 1)) { 3085779Sxy150489 tx_ring->tx_recycle(tx_ring); 3095779Sxy150489 } 3105779Sxy150489 3115779Sxy150489 mutex_enter(&tx_ring->tx_lock); 3125779Sxy150489 3135779Sxy150489 /* 3145779Sxy150489 * If the number of free tx descriptors is not enough for transmit 3155779Sxy150489 * then return failure. 3165779Sxy150489 * 3175779Sxy150489 * Note: we must put this check under the mutex protection to 3185779Sxy150489 * ensure the correctness when multiple threads access it in 3195779Sxy150489 * parallel. 3205779Sxy150489 */ 3215779Sxy150489 if (tx_ring->tbd_free < (desc_total + 1)) { 3225779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 3235779Sxy150489 mutex_exit(&tx_ring->tx_lock); 3245779Sxy150489 goto tx_failure; 3255779Sxy150489 } 3265779Sxy150489 3275779Sxy150489 desc_num = igb_tx_fill_ring(tx_ring, &pending_list, hcksum); 3285779Sxy150489 3295779Sxy150489 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 3305779Sxy150489 3315779Sxy150489 mutex_exit(&tx_ring->tx_lock); 3325779Sxy150489 3335779Sxy150489 return (B_TRUE); 3345779Sxy150489 3355779Sxy150489 tx_failure: 3365779Sxy150489 /* 3375779Sxy150489 * Discard the mblk and free the used resources 3385779Sxy150489 */ 3395779Sxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 3405779Sxy150489 while (tcb) { 3415779Sxy150489 tcb->mp = NULL; 3425779Sxy150489 3435779Sxy150489 igb_free_tcb(tcb); 3445779Sxy150489 3455779Sxy150489 tcb = (tx_control_block_t *) 3465779Sxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 3475779Sxy150489 } 3485779Sxy150489 3495779Sxy150489 /* 3505779Sxy150489 * Return the tx control blocks in the pending list to the free list. 3515779Sxy150489 */ 3525779Sxy150489 igb_put_free_list(tx_ring, &pending_list); 3535779Sxy150489 3545779Sxy150489 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 3555779Sxy150489 tx_ring->reschedule = B_TRUE; 3565779Sxy150489 3575779Sxy150489 return (B_FALSE); 3585779Sxy150489 } 3595779Sxy150489 3605779Sxy150489 /* 3615779Sxy150489 * igb_tx_copy 3625779Sxy150489 * 3635779Sxy150489 * Copy the mblk fragment to the pre-allocated tx buffer 3645779Sxy150489 */ 3655779Sxy150489 static int 3665779Sxy150489 igb_tx_copy(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 3677072Sxy150489 uint32_t len, boolean_t copy_done) 3685779Sxy150489 { 3695779Sxy150489 dma_buffer_t *tx_buf; 3705779Sxy150489 uint32_t desc_num; 3715779Sxy150489 _NOTE(ARGUNUSED(tx_ring)); 3725779Sxy150489 3735779Sxy150489 tx_buf = &tcb->tx_buf; 3745779Sxy150489 3755779Sxy150489 /* 3765779Sxy150489 * Copy the packet data of the mblk fragment into the 3775779Sxy150489 * pre-allocated tx buffer, which is maintained by the 3785779Sxy150489 * tx control block. 3795779Sxy150489 * 3805779Sxy150489 * Several mblk fragments can be copied into one tx buffer. 3815779Sxy150489 * The destination address of the current copied fragment in 3825779Sxy150489 * the tx buffer is next to the end of the previous copied 3835779Sxy150489 * fragment. 3845779Sxy150489 */ 3855779Sxy150489 if (len > 0) { 3865779Sxy150489 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 3875779Sxy150489 3885779Sxy150489 tx_buf->len += len; 3895779Sxy150489 tcb->frag_num++; 3905779Sxy150489 } 3915779Sxy150489 3925779Sxy150489 desc_num = 0; 3935779Sxy150489 3945779Sxy150489 /* 3955779Sxy150489 * If it is the last fragment copied to the current tx buffer, 3965779Sxy150489 * in other words, if there's no remaining fragment or the remaining 3975779Sxy150489 * fragment requires a new tx control block to process, we need to 3985779Sxy150489 * complete the current copy processing by syncing up the current 3995779Sxy150489 * DMA buffer and saving the descriptor data. 4005779Sxy150489 */ 4015779Sxy150489 if (copy_done) { 4025779Sxy150489 /* 4035779Sxy150489 * Sync the DMA buffer of the packet data 4045779Sxy150489 */ 4055779Sxy150489 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 4065779Sxy150489 4075779Sxy150489 tcb->tx_type = USE_COPY; 4085779Sxy150489 4095779Sxy150489 /* 4105779Sxy150489 * Save the address and length to the private data structure 4115779Sxy150489 * of the tx control block, which will be used to fill the 4125779Sxy150489 * tx descriptor ring after all the fragments are processed. 4135779Sxy150489 */ 4145779Sxy150489 igb_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 4155779Sxy150489 desc_num++; 4165779Sxy150489 } 4175779Sxy150489 4185779Sxy150489 return (desc_num); 4195779Sxy150489 } 4205779Sxy150489 4215779Sxy150489 /* 4225779Sxy150489 * igb_tx_bind 4235779Sxy150489 * 4245779Sxy150489 * Bind the mblk fragment with DMA 4255779Sxy150489 */ 4265779Sxy150489 static int 4275779Sxy150489 igb_tx_bind(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 4285779Sxy150489 uint32_t len) 4295779Sxy150489 { 4305779Sxy150489 int status, i; 4315779Sxy150489 ddi_dma_cookie_t dma_cookie; 4325779Sxy150489 uint_t ncookies; 4335779Sxy150489 int desc_num; 4345779Sxy150489 4355779Sxy150489 /* 4365779Sxy150489 * Use DMA binding to process the mblk fragment 4375779Sxy150489 */ 4385779Sxy150489 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 4395779Sxy150489 (caddr_t)mp->b_rptr, len, 4405779Sxy150489 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 4415779Sxy150489 0, &dma_cookie, &ncookies); 4425779Sxy150489 4435779Sxy150489 if (status != DDI_DMA_MAPPED) { 4445779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 4455779Sxy150489 return (-1); 4465779Sxy150489 } 4475779Sxy150489 4485779Sxy150489 tcb->frag_num++; 4495779Sxy150489 tcb->tx_type = USE_DMA; 4505779Sxy150489 /* 4515779Sxy150489 * Each fragment can span several cookies. One cookie will have 4525779Sxy150489 * one tx descriptor to transmit. 4535779Sxy150489 */ 4545779Sxy150489 desc_num = 0; 4555779Sxy150489 for (i = ncookies; i > 0; i--) { 4565779Sxy150489 /* 4575779Sxy150489 * Save the address and length to the private data structure 4585779Sxy150489 * of the tx control block, which will be used to fill the 4595779Sxy150489 * tx descriptor ring after all the fragments are processed. 4605779Sxy150489 */ 4615779Sxy150489 igb_save_desc(tcb, 4625779Sxy150489 dma_cookie.dmac_laddress, 4635779Sxy150489 dma_cookie.dmac_size); 4645779Sxy150489 4655779Sxy150489 desc_num++; 4665779Sxy150489 4675779Sxy150489 if (i > 1) 4685779Sxy150489 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 4695779Sxy150489 } 4705779Sxy150489 4715779Sxy150489 return (desc_num); 4725779Sxy150489 } 4735779Sxy150489 4745779Sxy150489 /* 4755779Sxy150489 * igb_get_hcksum_context 4765779Sxy150489 * 4775779Sxy150489 * Get the hcksum context information from the mblk 4785779Sxy150489 */ 4795779Sxy150489 static void 4805779Sxy150489 igb_get_hcksum_context(mblk_t *mp, hcksum_context_t *hcksum) 4815779Sxy150489 { 4825779Sxy150489 uint32_t start; 4835779Sxy150489 uint32_t flags; 4845779Sxy150489 uint32_t len; 4855779Sxy150489 uint32_t size; 4865779Sxy150489 uint32_t offset; 4875779Sxy150489 unsigned char *pos; 4885779Sxy150489 ushort_t etype; 4895779Sxy150489 uint32_t mac_hdr_len; 4905779Sxy150489 uint32_t l4_proto; 4915779Sxy150489 4925779Sxy150489 ASSERT(mp != NULL); 4935779Sxy150489 4945779Sxy150489 hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &flags); 4955779Sxy150489 4965779Sxy150489 hcksum->hcksum_flags = flags; 4975779Sxy150489 4985779Sxy150489 if (flags == 0) 4995779Sxy150489 return; 5005779Sxy150489 5015779Sxy150489 etype = 0; 5025779Sxy150489 mac_hdr_len = 0; 5035779Sxy150489 l4_proto = 0; 5045779Sxy150489 5055779Sxy150489 /* 5065779Sxy150489 * Firstly get the position of the ether_type/ether_tpid. 5075779Sxy150489 * Here we don't assume the ether (VLAN) header is fully included 5085779Sxy150489 * in one mblk fragment, so we go thourgh the fragments to parse 5095779Sxy150489 * the ether type. 5105779Sxy150489 */ 5115779Sxy150489 size = len = MBLK_LEN(mp); 5125779Sxy150489 offset = offsetof(struct ether_header, ether_type); 5135779Sxy150489 while (size <= offset) { 5145779Sxy150489 mp = mp->b_cont; 5155779Sxy150489 ASSERT(mp != NULL); 5165779Sxy150489 len = MBLK_LEN(mp); 5175779Sxy150489 size += len; 5185779Sxy150489 } 5195779Sxy150489 pos = mp->b_rptr + offset + len - size; 5205779Sxy150489 5215779Sxy150489 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 5225779Sxy150489 if (etype == ETHERTYPE_VLAN) { 5235779Sxy150489 /* 5245779Sxy150489 * Get the position of the ether_type in VLAN header 5255779Sxy150489 */ 5265779Sxy150489 offset = offsetof(struct ether_vlan_header, ether_type); 5275779Sxy150489 while (size <= offset) { 5285779Sxy150489 mp = mp->b_cont; 5295779Sxy150489 ASSERT(mp != NULL); 5305779Sxy150489 len = MBLK_LEN(mp); 5315779Sxy150489 size += len; 5325779Sxy150489 } 5335779Sxy150489 pos = mp->b_rptr + offset + len - size; 5345779Sxy150489 5355779Sxy150489 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 5365779Sxy150489 mac_hdr_len = sizeof (struct ether_vlan_header); 5375779Sxy150489 } else { 5385779Sxy150489 mac_hdr_len = sizeof (struct ether_header); 5395779Sxy150489 } 5405779Sxy150489 5415779Sxy150489 /* 5425779Sxy150489 * Here we don't assume the IP(V6) header is fully included in 5435779Sxy150489 * one mblk fragment, so we go thourgh the fragments to parse 5445779Sxy150489 * the protocol type. 5455779Sxy150489 */ 5465779Sxy150489 switch (etype) { 5475779Sxy150489 case ETHERTYPE_IP: 5485779Sxy150489 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 5495779Sxy150489 while (size <= offset) { 5505779Sxy150489 mp = mp->b_cont; 5515779Sxy150489 ASSERT(mp != NULL); 5525779Sxy150489 len = MBLK_LEN(mp); 5535779Sxy150489 size += len; 5545779Sxy150489 } 5555779Sxy150489 pos = mp->b_rptr + offset + len - size; 5565779Sxy150489 5575779Sxy150489 l4_proto = *(uint8_t *)pos; 5585779Sxy150489 break; 5595779Sxy150489 case ETHERTYPE_IPV6: 5605779Sxy150489 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 5615779Sxy150489 while (size <= offset) { 5625779Sxy150489 mp = mp->b_cont; 5635779Sxy150489 ASSERT(mp != NULL); 5645779Sxy150489 len = MBLK_LEN(mp); 5655779Sxy150489 size += len; 5665779Sxy150489 } 5675779Sxy150489 pos = mp->b_rptr + offset + len - size; 5685779Sxy150489 5695779Sxy150489 l4_proto = *(uint8_t *)pos; 5705779Sxy150489 break; 5715779Sxy150489 default: 5725779Sxy150489 /* Unrecoverable error */ 5735779Sxy150489 IGB_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 5745779Sxy150489 return; 5755779Sxy150489 } 5765779Sxy150489 5775779Sxy150489 hcksum->mac_hdr_len = mac_hdr_len; 5785779Sxy150489 hcksum->ip_hdr_len = start; 5795779Sxy150489 hcksum->l4_proto = l4_proto; 5805779Sxy150489 } 5815779Sxy150489 5825779Sxy150489 /* 5835779Sxy150489 * igb_check_hcksum_context 5845779Sxy150489 * 5855779Sxy150489 * Check if a new context descriptor is needed 5865779Sxy150489 */ 5875779Sxy150489 static boolean_t 5885779Sxy150489 igb_check_hcksum_context(igb_tx_ring_t *tx_ring, hcksum_context_t *hcksum) 5895779Sxy150489 { 5905779Sxy150489 hcksum_context_t *last; 5915779Sxy150489 5925779Sxy150489 if (hcksum == NULL) 5935779Sxy150489 return (B_FALSE); 5945779Sxy150489 5955779Sxy150489 /* 5965779Sxy150489 * Compare the checksum data retrieved from the mblk and the 5975779Sxy150489 * stored checksum data of the last context descriptor. The data 5985779Sxy150489 * need to be checked are: 5995779Sxy150489 * hcksum_flags 6005779Sxy150489 * l4_proto 6015779Sxy150489 * mac_hdr_len 6025779Sxy150489 * ip_hdr_len 6035779Sxy150489 * Either one of the above data is changed, a new context descriptor 6045779Sxy150489 * will be needed. 6055779Sxy150489 */ 6065779Sxy150489 last = &tx_ring->hcksum_context; 6075779Sxy150489 6085779Sxy150489 if (hcksum->hcksum_flags != 0) { 6095779Sxy150489 if ((hcksum->hcksum_flags != last->hcksum_flags) || 6105779Sxy150489 (hcksum->l4_proto != last->l4_proto) || 6115779Sxy150489 (hcksum->mac_hdr_len != last->mac_hdr_len) || 6125779Sxy150489 (hcksum->ip_hdr_len != last->ip_hdr_len)) { 6135779Sxy150489 6145779Sxy150489 return (B_TRUE); 6155779Sxy150489 } 6165779Sxy150489 } 6175779Sxy150489 6185779Sxy150489 return (B_FALSE); 6195779Sxy150489 } 6205779Sxy150489 6215779Sxy150489 /* 6225779Sxy150489 * igb_fill_hcksum_context 6235779Sxy150489 * 6245779Sxy150489 * Fill the context descriptor with hardware checksum informations 6255779Sxy150489 */ 6265779Sxy150489 static void 6275779Sxy150489 igb_fill_hcksum_context(struct e1000_adv_tx_context_desc *ctx_tbd, 6288275SEric Cheng hcksum_context_t *hcksum, uint32_t ring_index) 6295779Sxy150489 { 6305779Sxy150489 /* 6315779Sxy150489 * Fill the context descriptor with the checksum 6325779Sxy150489 * context information we've got 6335779Sxy150489 */ 6345779Sxy150489 ctx_tbd->vlan_macip_lens = hcksum->ip_hdr_len; 6355779Sxy150489 ctx_tbd->vlan_macip_lens |= hcksum->mac_hdr_len << 6365779Sxy150489 E1000_ADVTXD_MACLEN_SHIFT; 6375779Sxy150489 6385779Sxy150489 ctx_tbd->type_tucmd_mlhl = 6395779Sxy150489 E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 6405779Sxy150489 6415779Sxy150489 if (hcksum->hcksum_flags & HCK_IPV4_HDRCKSUM) 6425779Sxy150489 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 6435779Sxy150489 6445779Sxy150489 if (hcksum->hcksum_flags & HCK_PARTIALCKSUM) { 6455779Sxy150489 switch (hcksum->l4_proto) { 6465779Sxy150489 case IPPROTO_TCP: 6475779Sxy150489 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 6485779Sxy150489 break; 6495779Sxy150489 case IPPROTO_UDP: 6505779Sxy150489 /* 6515779Sxy150489 * We don't have to explicitly set: 6525779Sxy150489 * ctx_tbd->type_tucmd_mlhl |= 6535779Sxy150489 * E1000_ADVTXD_TUCMD_L4T_UDP; 6545779Sxy150489 * Because E1000_ADVTXD_TUCMD_L4T_UDP == 0b 6555779Sxy150489 */ 6565779Sxy150489 break; 6575779Sxy150489 default: 6585779Sxy150489 /* Unrecoverable error */ 6595779Sxy150489 IGB_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 6605779Sxy150489 break; 6615779Sxy150489 } 6625779Sxy150489 } 6635779Sxy150489 6645779Sxy150489 ctx_tbd->seqnum_seed = 0; 6658275SEric Cheng ctx_tbd->mss_l4len_idx = ring_index << 4; 6665779Sxy150489 } 6675779Sxy150489 6685779Sxy150489 /* 6695779Sxy150489 * igb_tx_fill_ring 6705779Sxy150489 * 6715779Sxy150489 * Fill the tx descriptor ring with the data 6725779Sxy150489 */ 6735779Sxy150489 static int 6745779Sxy150489 igb_tx_fill_ring(igb_tx_ring_t *tx_ring, link_list_t *pending_list, 6755779Sxy150489 hcksum_context_t *hcksum) 6765779Sxy150489 { 6775779Sxy150489 struct e1000_hw *hw = &tx_ring->igb->hw; 6785779Sxy150489 boolean_t load_context; 6795779Sxy150489 uint32_t index, tcb_index, desc_num; 6805779Sxy150489 union e1000_adv_tx_desc *tbd, *first_tbd; 6815779Sxy150489 tx_control_block_t *tcb, *first_tcb; 6825779Sxy150489 uint32_t hcksum_flags; 683*8571SChenlu.Chen@Sun.COM uint32_t pay_len; 6845779Sxy150489 int i; 6856624Sgl147354 igb_t *igb = tx_ring->igb; 6865779Sxy150489 6875779Sxy150489 ASSERT(mutex_owned(&tx_ring->tx_lock)); 6885779Sxy150489 6895779Sxy150489 tbd = NULL; 6905779Sxy150489 first_tbd = NULL; 6915779Sxy150489 first_tcb = NULL; 6925779Sxy150489 desc_num = 0; 6935779Sxy150489 hcksum_flags = 0; 694*8571SChenlu.Chen@Sun.COM pay_len = 0; 6955779Sxy150489 load_context = B_FALSE; 6965779Sxy150489 6975779Sxy150489 /* 6985779Sxy150489 * Get the index of the first tx descriptor that will be filled, 6995779Sxy150489 * and the index of the first work list item that will be attached 7005779Sxy150489 * with the first used tx control block in the pending list. 7015779Sxy150489 * Note: the two indexes are the same. 7025779Sxy150489 */ 7035779Sxy150489 index = tx_ring->tbd_tail; 7045779Sxy150489 tcb_index = tx_ring->tbd_tail; 7055779Sxy150489 7065779Sxy150489 if (hcksum != NULL) { 7075779Sxy150489 hcksum_flags = hcksum->hcksum_flags; 7085779Sxy150489 7095779Sxy150489 /* 7105779Sxy150489 * Check if a new context descriptor is needed for this packet 7115779Sxy150489 */ 7125779Sxy150489 load_context = igb_check_hcksum_context(tx_ring, hcksum); 7135779Sxy150489 if (load_context) { 7145779Sxy150489 first_tcb = (tx_control_block_t *) 7155779Sxy150489 LIST_GET_HEAD(pending_list); 7165779Sxy150489 tbd = &tx_ring->tbd_ring[index]; 7175779Sxy150489 7185779Sxy150489 /* 7195779Sxy150489 * Fill the context descriptor with the 7205779Sxy150489 * hardware checksum offload informations. 7215779Sxy150489 */ 7225779Sxy150489 igb_fill_hcksum_context( 7238275SEric Cheng (struct e1000_adv_tx_context_desc *)tbd, hcksum, 7248275SEric Cheng tx_ring->index); 7255779Sxy150489 7265779Sxy150489 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 7275779Sxy150489 desc_num++; 7285779Sxy150489 7295779Sxy150489 /* 7305779Sxy150489 * Store the checksum context data if 7315779Sxy150489 * a new context descriptor is added 7325779Sxy150489 */ 7335779Sxy150489 tx_ring->hcksum_context = *hcksum; 7345779Sxy150489 } 7355779Sxy150489 } 7365779Sxy150489 7375779Sxy150489 first_tbd = &tx_ring->tbd_ring[index]; 7385779Sxy150489 7395779Sxy150489 /* 7405779Sxy150489 * Fill tx data descriptors with the data saved in the pending list. 7415779Sxy150489 * The tx control blocks in the pending list are added to the work list 7425779Sxy150489 * at the same time. 7435779Sxy150489 * 7445779Sxy150489 * The work list is strictly 1:1 corresponding to the descriptor ring. 7455779Sxy150489 * One item of the work list corresponds to one tx descriptor. Because 7465779Sxy150489 * one tx control block can span multiple tx descriptors, the tx 7475779Sxy150489 * control block will be added to the first work list item that 7485779Sxy150489 * corresponds to the first tx descriptor generated from that tx 7495779Sxy150489 * control block. 7505779Sxy150489 */ 7515779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 7525779Sxy150489 while (tcb != NULL) { 7535779Sxy150489 7545779Sxy150489 for (i = 0; i < tcb->desc_num; i++) { 7555779Sxy150489 tbd = &tx_ring->tbd_ring[index]; 7565779Sxy150489 7575779Sxy150489 tbd->read.buffer_addr = tcb->desc[i].address; 7585779Sxy150489 tbd->read.cmd_type_len = tcb->desc[i].length; 7595779Sxy150489 7605779Sxy150489 tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_RS | 761*8571SChenlu.Chen@Sun.COM E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_DATA | 762*8571SChenlu.Chen@Sun.COM E1000_ADVTXD_DCMD_IFCS; 7635779Sxy150489 7645779Sxy150489 tbd->read.olinfo_status = 0; 7655779Sxy150489 766*8571SChenlu.Chen@Sun.COM pay_len += tcb->desc[i].length; 767*8571SChenlu.Chen@Sun.COM 7685779Sxy150489 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 7695779Sxy150489 desc_num++; 7705779Sxy150489 } 7715779Sxy150489 7725779Sxy150489 if (first_tcb != NULL) { 7735779Sxy150489 /* 7745779Sxy150489 * Count the checksum context descriptor for 7755779Sxy150489 * the first tx control block. 7765779Sxy150489 */ 7775779Sxy150489 first_tcb->desc_num++; 7785779Sxy150489 first_tcb = NULL; 7795779Sxy150489 } 7805779Sxy150489 7815779Sxy150489 /* 7825779Sxy150489 * Add the tx control block to the work list 7835779Sxy150489 */ 7845779Sxy150489 ASSERT(tx_ring->work_list[tcb_index] == NULL); 7855779Sxy150489 tx_ring->work_list[tcb_index] = tcb; 7865779Sxy150489 7875779Sxy150489 tcb_index = index; 7885779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 7895779Sxy150489 } 7905779Sxy150489 7915779Sxy150489 /* 7925779Sxy150489 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 7935779Sxy150489 * valid in the first descriptor of the packet. 794*8571SChenlu.Chen@Sun.COM * 82576 also requires the payload length setting even without TSO 7955779Sxy150489 */ 7965779Sxy150489 ASSERT(first_tbd != NULL); 7975779Sxy150489 first_tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_IFCS; 798*8571SChenlu.Chen@Sun.COM if (hw->mac.type == e1000_82576) { 799*8571SChenlu.Chen@Sun.COM first_tbd->read.olinfo_status = 800*8571SChenlu.Chen@Sun.COM (pay_len << E1000_ADVTXD_PAYLEN_SHIFT); 801*8571SChenlu.Chen@Sun.COM } 8025779Sxy150489 8035779Sxy150489 /* Set hardware checksum bits */ 8045779Sxy150489 if (hcksum_flags != 0) { 8055779Sxy150489 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 8065779Sxy150489 first_tbd->read.olinfo_status |= 8075779Sxy150489 E1000_TXD_POPTS_IXSM << 8; 8085779Sxy150489 if (hcksum_flags & HCK_PARTIALCKSUM) 8095779Sxy150489 first_tbd->read.olinfo_status |= 8105779Sxy150489 E1000_TXD_POPTS_TXSM << 8; 8118275SEric Cheng first_tbd->read.olinfo_status |= tx_ring->index << 4; 8125779Sxy150489 } 8135779Sxy150489 8145779Sxy150489 /* 8155779Sxy150489 * The last descriptor of packet needs End Of Packet (EOP), 8165779Sxy150489 * and Report Status (RS) bits set 8175779Sxy150489 */ 8185779Sxy150489 ASSERT(tbd != NULL); 8195779Sxy150489 tbd->read.cmd_type_len |= 8205779Sxy150489 E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS; 8215779Sxy150489 8228275SEric Cheng IGB_DEBUG_STAT(tx_ring->stat_pkt_cnt); 8238275SEric Cheng 8245779Sxy150489 /* 8255779Sxy150489 * Sync the DMA buffer of the tx descriptor ring 8265779Sxy150489 */ 8275779Sxy150489 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 8285779Sxy150489 8295779Sxy150489 /* 8305779Sxy150489 * Update the number of the free tx descriptors. 8315779Sxy150489 * The mutual exclusion between the transmission and the recycling 8325779Sxy150489 * (for the tx descriptor ring and the work list) is implemented 8335779Sxy150489 * with the atomic operation on the number of the free tx descriptors. 8345779Sxy150489 * 8355779Sxy150489 * Note: we should always decrement the counter tbd_free before 8365779Sxy150489 * advancing the hardware TDT pointer to avoid the race condition - 8375779Sxy150489 * before the counter tbd_free is decremented, the transmit of the 8385779Sxy150489 * tx descriptors has done and the counter tbd_free is increased by 8395779Sxy150489 * the tx recycling. 8405779Sxy150489 */ 8415779Sxy150489 i = igb_atomic_reserve(&tx_ring->tbd_free, desc_num); 8425779Sxy150489 ASSERT(i >= 0); 8435779Sxy150489 8445779Sxy150489 tx_ring->tbd_tail = index; 8455779Sxy150489 8465779Sxy150489 /* 8475779Sxy150489 * Advance the hardware TDT pointer of the tx descriptor ring 8485779Sxy150489 */ 8495779Sxy150489 E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), index); 8505779Sxy150489 8516624Sgl147354 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) { 8526624Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 8536624Sgl147354 } 8546624Sgl147354 8555779Sxy150489 return (desc_num); 8565779Sxy150489 } 8575779Sxy150489 8585779Sxy150489 /* 8595779Sxy150489 * igb_save_desc 8605779Sxy150489 * 8615779Sxy150489 * Save the address/length pair to the private array 8625779Sxy150489 * of the tx control block. The address/length pairs 8635779Sxy150489 * will be filled into the tx descriptor ring later. 8645779Sxy150489 */ 8655779Sxy150489 static void 8665779Sxy150489 igb_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 8675779Sxy150489 { 8685779Sxy150489 sw_desc_t *desc; 8695779Sxy150489 8705779Sxy150489 desc = &tcb->desc[tcb->desc_num]; 8715779Sxy150489 desc->address = address; 8725779Sxy150489 desc->length = length; 8735779Sxy150489 8745779Sxy150489 tcb->desc_num++; 8755779Sxy150489 } 8765779Sxy150489 8775779Sxy150489 /* 8785779Sxy150489 * igb_tx_recycle_legacy 8795779Sxy150489 * 8805779Sxy150489 * Recycle the tx descriptors and tx control blocks. 8815779Sxy150489 * 8825779Sxy150489 * The work list is traversed to check if the corresponding 8835779Sxy150489 * tx descriptors have been transmitted. If so, the resources 8845779Sxy150489 * bound to the tx control blocks will be freed, and those 8855779Sxy150489 * tx control blocks will be returned to the free list. 8865779Sxy150489 */ 8875779Sxy150489 uint32_t 8885779Sxy150489 igb_tx_recycle_legacy(igb_tx_ring_t *tx_ring) 8895779Sxy150489 { 8905779Sxy150489 uint32_t index, last_index; 8915779Sxy150489 int desc_num; 8925779Sxy150489 boolean_t desc_done; 8935779Sxy150489 tx_control_block_t *tcb; 8945779Sxy150489 link_list_t pending_list; 8956624Sgl147354 igb_t *igb = tx_ring->igb; 8965779Sxy150489 8975779Sxy150489 /* 8985779Sxy150489 * The mutex_tryenter() is used to avoid unnecessary 8995779Sxy150489 * lock contention. 9005779Sxy150489 */ 9015779Sxy150489 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 9025779Sxy150489 return (0); 9035779Sxy150489 9045779Sxy150489 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 9055779Sxy150489 9065779Sxy150489 if (tx_ring->tbd_free == tx_ring->ring_size) { 9075779Sxy150489 tx_ring->recycle_fail = 0; 9085779Sxy150489 tx_ring->stall_watchdog = 0; 9095779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 9105779Sxy150489 return (0); 9115779Sxy150489 } 9125779Sxy150489 9135779Sxy150489 /* 9145779Sxy150489 * Sync the DMA buffer of the tx descriptor ring 9155779Sxy150489 */ 9165779Sxy150489 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 9175779Sxy150489 9186624Sgl147354 if (igb_check_dma_handle( 9196624Sgl147354 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 9206624Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 9216624Sgl147354 } 9226624Sgl147354 9235779Sxy150489 LINK_LIST_INIT(&pending_list); 9245779Sxy150489 desc_num = 0; 9255779Sxy150489 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 9265779Sxy150489 9275779Sxy150489 tcb = tx_ring->work_list[index]; 9285779Sxy150489 ASSERT(tcb != NULL); 9295779Sxy150489 9305779Sxy150489 desc_done = B_TRUE; 9315779Sxy150489 while (desc_done && (tcb != NULL)) { 9325779Sxy150489 9335779Sxy150489 /* 9345779Sxy150489 * Get the last tx descriptor of the tx control block. 9355779Sxy150489 * If the last tx descriptor is done, it is done with 9365779Sxy150489 * all the tx descriptors of the tx control block. 9375779Sxy150489 * Then the tx control block and all the corresponding 9385779Sxy150489 * tx descriptors can be recycled. 9395779Sxy150489 */ 9405779Sxy150489 last_index = NEXT_INDEX(index, tcb->desc_num - 1, 9415779Sxy150489 tx_ring->ring_size); 9425779Sxy150489 9435779Sxy150489 /* 9445779Sxy150489 * Check if the Descriptor Done bit is set 9455779Sxy150489 */ 9465779Sxy150489 desc_done = tx_ring->tbd_ring[last_index].wb.status & 9475779Sxy150489 E1000_TXD_STAT_DD; 9485779Sxy150489 if (desc_done) { 9495779Sxy150489 /* 9505779Sxy150489 * Strip off the tx control block from the work list, 9515779Sxy150489 * and add it to the pending list. 9525779Sxy150489 */ 9535779Sxy150489 tx_ring->work_list[index] = NULL; 9545779Sxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 9555779Sxy150489 9565779Sxy150489 /* 9575779Sxy150489 * Count the total number of the tx descriptors recycled 9585779Sxy150489 */ 9595779Sxy150489 desc_num += tcb->desc_num; 9605779Sxy150489 9615779Sxy150489 /* 9625779Sxy150489 * Advance the index of the tx descriptor ring 9635779Sxy150489 */ 9645779Sxy150489 index = NEXT_INDEX(last_index, 1, tx_ring->ring_size); 9655779Sxy150489 9665779Sxy150489 tcb = tx_ring->work_list[index]; 9675779Sxy150489 } 9685779Sxy150489 } 9695779Sxy150489 9705779Sxy150489 /* 9715779Sxy150489 * If no tx descriptors are recycled, no need to do more processing 9725779Sxy150489 */ 9735779Sxy150489 if (desc_num == 0) { 9745779Sxy150489 tx_ring->recycle_fail++; 9755779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 9765779Sxy150489 return (0); 9775779Sxy150489 } 9785779Sxy150489 9795779Sxy150489 tx_ring->recycle_fail = 0; 9805779Sxy150489 tx_ring->stall_watchdog = 0; 9815779Sxy150489 9825779Sxy150489 /* 9835779Sxy150489 * Update the head index of the tx descriptor ring 9845779Sxy150489 */ 9855779Sxy150489 tx_ring->tbd_head = index; 9865779Sxy150489 9875779Sxy150489 /* 9885779Sxy150489 * Update the number of the free tx descriptors with atomic operations 9895779Sxy150489 */ 9905779Sxy150489 atomic_add_32(&tx_ring->tbd_free, desc_num); 9915779Sxy150489 9925779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 9935779Sxy150489 9945779Sxy150489 /* 9955779Sxy150489 * Free the resources used by the tx control blocks 9965779Sxy150489 * in the pending list 9975779Sxy150489 */ 9985779Sxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 9995779Sxy150489 while (tcb != NULL) { 10005779Sxy150489 /* 10015779Sxy150489 * Release the resources occupied by the tx control block 10025779Sxy150489 */ 10035779Sxy150489 igb_free_tcb(tcb); 10045779Sxy150489 10055779Sxy150489 tcb = (tx_control_block_t *) 10065779Sxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 10075779Sxy150489 } 10085779Sxy150489 10095779Sxy150489 /* 10105779Sxy150489 * Add the tx control blocks in the pending list to the free list. 10115779Sxy150489 */ 10125779Sxy150489 igb_put_free_list(tx_ring, &pending_list); 10135779Sxy150489 10145779Sxy150489 return (desc_num); 10155779Sxy150489 } 10165779Sxy150489 10175779Sxy150489 /* 10185779Sxy150489 * igb_tx_recycle_head_wb 10195779Sxy150489 * 10205779Sxy150489 * Check the head write-back, and recycle all the transmitted 10215779Sxy150489 * tx descriptors and tx control blocks. 10225779Sxy150489 */ 10235779Sxy150489 uint32_t 10245779Sxy150489 igb_tx_recycle_head_wb(igb_tx_ring_t *tx_ring) 10255779Sxy150489 { 10265779Sxy150489 uint32_t index; 10275779Sxy150489 uint32_t head_wb; 10285779Sxy150489 int desc_num; 10295779Sxy150489 tx_control_block_t *tcb; 10305779Sxy150489 link_list_t pending_list; 10316624Sgl147354 igb_t *igb = tx_ring->igb; 10325779Sxy150489 10335779Sxy150489 /* 10345779Sxy150489 * The mutex_tryenter() is used to avoid unnecessary 10355779Sxy150489 * lock contention. 10365779Sxy150489 */ 10375779Sxy150489 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 10385779Sxy150489 return (0); 10395779Sxy150489 10405779Sxy150489 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 10415779Sxy150489 10425779Sxy150489 if (tx_ring->tbd_free == tx_ring->ring_size) { 10435779Sxy150489 tx_ring->recycle_fail = 0; 10445779Sxy150489 tx_ring->stall_watchdog = 0; 10455779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 10465779Sxy150489 return (0); 10475779Sxy150489 } 10485779Sxy150489 10495779Sxy150489 /* 10505779Sxy150489 * Sync the DMA buffer of the tx descriptor ring 10515779Sxy150489 * 10525779Sxy150489 * Note: For head write-back mode, the tx descriptors will not 10535779Sxy150489 * be written back, but the head write-back value is stored at 10545779Sxy150489 * the last extra tbd at the end of the DMA area, we still need 10555779Sxy150489 * to sync the head write-back value for kernel. 10565779Sxy150489 * 10575779Sxy150489 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 10585779Sxy150489 */ 10595779Sxy150489 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 10605779Sxy150489 sizeof (union e1000_adv_tx_desc) * tx_ring->ring_size, 10615779Sxy150489 sizeof (uint32_t), 10625779Sxy150489 DDI_DMA_SYNC_FORKERNEL); 10635779Sxy150489 10646624Sgl147354 if (igb_check_dma_handle( 10656624Sgl147354 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 10666624Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 10676624Sgl147354 } 10686624Sgl147354 10695779Sxy150489 LINK_LIST_INIT(&pending_list); 10705779Sxy150489 desc_num = 0; 10715779Sxy150489 index = tx_ring->tbd_head; /* Next index to clean */ 10725779Sxy150489 10735779Sxy150489 /* 10745779Sxy150489 * Get the value of head write-back 10755779Sxy150489 */ 10765779Sxy150489 head_wb = *tx_ring->tbd_head_wb; 10775779Sxy150489 while (index != head_wb) { 10785779Sxy150489 tcb = tx_ring->work_list[index]; 10795779Sxy150489 ASSERT(tcb != NULL); 10805779Sxy150489 10815779Sxy150489 if (OFFSET(index, head_wb, tx_ring->ring_size) < 10825779Sxy150489 tcb->desc_num) { 10835779Sxy150489 /* 10845779Sxy150489 * The current tx control block is not 10855779Sxy150489 * completely transmitted, stop recycling 10865779Sxy150489 */ 10875779Sxy150489 break; 10885779Sxy150489 } 10895779Sxy150489 10905779Sxy150489 /* 10915779Sxy150489 * Strip off the tx control block from the work list, 10925779Sxy150489 * and add it to the pending list. 10935779Sxy150489 */ 10945779Sxy150489 tx_ring->work_list[index] = NULL; 10955779Sxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 10965779Sxy150489 10975779Sxy150489 /* 10985779Sxy150489 * Advance the index of the tx descriptor ring 10995779Sxy150489 */ 11005779Sxy150489 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 11015779Sxy150489 11025779Sxy150489 /* 11035779Sxy150489 * Count the total number of the tx descriptors recycled 11045779Sxy150489 */ 11055779Sxy150489 desc_num += tcb->desc_num; 11065779Sxy150489 } 11075779Sxy150489 11085779Sxy150489 /* 11095779Sxy150489 * If no tx descriptors are recycled, no need to do more processing 11105779Sxy150489 */ 11115779Sxy150489 if (desc_num == 0) { 11125779Sxy150489 tx_ring->recycle_fail++; 11135779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 11145779Sxy150489 return (0); 11155779Sxy150489 } 11165779Sxy150489 11175779Sxy150489 tx_ring->recycle_fail = 0; 11185779Sxy150489 tx_ring->stall_watchdog = 0; 11195779Sxy150489 11205779Sxy150489 /* 11215779Sxy150489 * Update the head index of the tx descriptor ring 11225779Sxy150489 */ 11235779Sxy150489 tx_ring->tbd_head = index; 11245779Sxy150489 11255779Sxy150489 /* 11265779Sxy150489 * Update the number of the free tx descriptors with atomic operations 11275779Sxy150489 */ 11285779Sxy150489 atomic_add_32(&tx_ring->tbd_free, desc_num); 11295779Sxy150489 11305779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 11315779Sxy150489 11325779Sxy150489 /* 11335779Sxy150489 * Free the resources used by the tx control blocks 11345779Sxy150489 * in the pending list 11355779Sxy150489 */ 11365779Sxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 11375779Sxy150489 while (tcb) { 11385779Sxy150489 /* 11395779Sxy150489 * Release the resources occupied by the tx control block 11405779Sxy150489 */ 11415779Sxy150489 igb_free_tcb(tcb); 11425779Sxy150489 11435779Sxy150489 tcb = (tx_control_block_t *) 11445779Sxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 11455779Sxy150489 } 11465779Sxy150489 11475779Sxy150489 /* 11485779Sxy150489 * Add the tx control blocks in the pending list to the free list. 11495779Sxy150489 */ 11505779Sxy150489 igb_put_free_list(tx_ring, &pending_list); 11515779Sxy150489 11525779Sxy150489 return (desc_num); 11535779Sxy150489 } 11545779Sxy150489 11555779Sxy150489 /* 11565779Sxy150489 * igb_free_tcb - free up the tx control block 11575779Sxy150489 * 11585779Sxy150489 * Free the resources of the tx control block, including 11595779Sxy150489 * unbind the previously bound DMA handle, and reset other 11605779Sxy150489 * control fields. 11615779Sxy150489 */ 11625779Sxy150489 void 11635779Sxy150489 igb_free_tcb(tx_control_block_t *tcb) 11645779Sxy150489 { 11655779Sxy150489 switch (tcb->tx_type) { 11665779Sxy150489 case USE_COPY: 11675779Sxy150489 /* 11685779Sxy150489 * Reset the buffer length that is used for copy 11695779Sxy150489 */ 11705779Sxy150489 tcb->tx_buf.len = 0; 11715779Sxy150489 break; 11725779Sxy150489 case USE_DMA: 11735779Sxy150489 /* 11745779Sxy150489 * Release the DMA resource that is used for 11755779Sxy150489 * DMA binding. 11765779Sxy150489 */ 11775779Sxy150489 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 11785779Sxy150489 break; 11795779Sxy150489 default: 11805779Sxy150489 break; 11815779Sxy150489 } 11825779Sxy150489 11835779Sxy150489 /* 11845779Sxy150489 * Free the mblk 11855779Sxy150489 */ 11865779Sxy150489 if (tcb->mp != NULL) { 11875779Sxy150489 freemsg(tcb->mp); 11885779Sxy150489 tcb->mp = NULL; 11895779Sxy150489 } 11905779Sxy150489 11915779Sxy150489 tcb->tx_type = USE_NONE; 11925779Sxy150489 tcb->frag_num = 0; 11935779Sxy150489 tcb->desc_num = 0; 11945779Sxy150489 } 11955779Sxy150489 11965779Sxy150489 /* 11975779Sxy150489 * igb_get_free_list - Get a free tx control block from the free list 11985779Sxy150489 * 11995779Sxy150489 * The atomic operation on the number of the available tx control block 12005779Sxy150489 * in the free list is used to keep this routine mutual exclusive with 12015779Sxy150489 * the routine igb_put_check_list. 12025779Sxy150489 */ 12035779Sxy150489 static tx_control_block_t * 12045779Sxy150489 igb_get_free_list(igb_tx_ring_t *tx_ring) 12055779Sxy150489 { 12065779Sxy150489 tx_control_block_t *tcb; 12075779Sxy150489 12085779Sxy150489 /* 12095779Sxy150489 * Check and update the number of the free tx control block 12105779Sxy150489 * in the free list. 12115779Sxy150489 */ 12125779Sxy150489 if (igb_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 12135779Sxy150489 return (NULL); 12145779Sxy150489 12155779Sxy150489 mutex_enter(&tx_ring->tcb_head_lock); 12165779Sxy150489 12175779Sxy150489 tcb = tx_ring->free_list[tx_ring->tcb_head]; 12185779Sxy150489 ASSERT(tcb != NULL); 12195779Sxy150489 tx_ring->free_list[tx_ring->tcb_head] = NULL; 12205779Sxy150489 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 12215779Sxy150489 tx_ring->free_list_size); 12225779Sxy150489 12235779Sxy150489 mutex_exit(&tx_ring->tcb_head_lock); 12245779Sxy150489 12255779Sxy150489 return (tcb); 12265779Sxy150489 } 12275779Sxy150489 12285779Sxy150489 /* 12295779Sxy150489 * igb_put_free_list 12305779Sxy150489 * 12315779Sxy150489 * Put a list of used tx control blocks back to the free list 12325779Sxy150489 * 12335779Sxy150489 * A mutex is used here to ensure the serialization. The mutual exclusion 12345779Sxy150489 * between igb_get_free_list and igb_put_free_list is implemented with 12355779Sxy150489 * the atomic operation on the counter tcb_free. 12365779Sxy150489 */ 12375779Sxy150489 void 12385779Sxy150489 igb_put_free_list(igb_tx_ring_t *tx_ring, link_list_t *pending_list) 12395779Sxy150489 { 12405779Sxy150489 uint32_t index; 12415779Sxy150489 int tcb_num; 12425779Sxy150489 tx_control_block_t *tcb; 12435779Sxy150489 12445779Sxy150489 mutex_enter(&tx_ring->tcb_tail_lock); 12455779Sxy150489 12465779Sxy150489 index = tx_ring->tcb_tail; 12475779Sxy150489 12485779Sxy150489 tcb_num = 0; 12495779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 12505779Sxy150489 while (tcb != NULL) { 12515779Sxy150489 ASSERT(tx_ring->free_list[index] == NULL); 12525779Sxy150489 tx_ring->free_list[index] = tcb; 12535779Sxy150489 12545779Sxy150489 tcb_num++; 12555779Sxy150489 12565779Sxy150489 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 12575779Sxy150489 12585779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 12595779Sxy150489 } 12605779Sxy150489 12615779Sxy150489 tx_ring->tcb_tail = index; 12625779Sxy150489 12635779Sxy150489 /* 12645779Sxy150489 * Update the number of the free tx control block 12655779Sxy150489 * in the free list. This operation must be placed 12665779Sxy150489 * under the protection of the lock. 12675779Sxy150489 */ 12685779Sxy150489 atomic_add_32(&tx_ring->tcb_free, tcb_num); 12695779Sxy150489 12705779Sxy150489 mutex_exit(&tx_ring->tcb_tail_lock); 12715779Sxy150489 } 1272