15779Sxy150489 /* 25779Sxy150489 * CDDL HEADER START 35779Sxy150489 * 48571SChenlu.Chen@Sun.COM * Copyright(c) 2007-2009 Intel Corporation. All rights reserved. 55779Sxy150489 * The contents of this file are subject to the terms of the 65779Sxy150489 * Common Development and Distribution License (the "License"). 75779Sxy150489 * You may not use this file except in compliance with the License. 85779Sxy150489 * 98571SChenlu.Chen@Sun.COM * You can obtain a copy of the license at: 108571SChenlu.Chen@Sun.COM * http://www.opensolaris.org/os/licensing. 115779Sxy150489 * See the License for the specific language governing permissions 125779Sxy150489 * and limitations under the License. 135779Sxy150489 * 148571SChenlu.Chen@Sun.COM * When using or redistributing this file, you may do so under the 158571SChenlu.Chen@Sun.COM * License only. No other modification of this header is permitted. 168571SChenlu.Chen@Sun.COM * 175779Sxy150489 * If applicable, add the following below this CDDL HEADER, with the 185779Sxy150489 * fields enclosed by brackets "[]" replaced with your own identifying 195779Sxy150489 * information: Portions Copyright [yyyy] [name of copyright owner] 205779Sxy150489 * 215779Sxy150489 * CDDL HEADER END 225779Sxy150489 */ 235779Sxy150489 245779Sxy150489 /* 258571SChenlu.Chen@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26*9188SPaul.Guo@Sun.COM * Use is subject to license terms. 278275SEric Cheng */ 285779Sxy150489 295779Sxy150489 #include "igb_sw.h" 305779Sxy150489 315779Sxy150489 static boolean_t igb_tx(igb_tx_ring_t *, mblk_t *); 325779Sxy150489 static int igb_tx_copy(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 337072Sxy150489 uint32_t, boolean_t); 345779Sxy150489 static int igb_tx_bind(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 355779Sxy150489 uint32_t); 36*9188SPaul.Guo@Sun.COM static int igb_tx_fill_ring(igb_tx_ring_t *, link_list_t *, tx_context_t *, 37*9188SPaul.Guo@Sun.COM size_t); 385779Sxy150489 static void igb_save_desc(tx_control_block_t *, uint64_t, size_t); 395779Sxy150489 static tx_control_block_t *igb_get_free_list(igb_tx_ring_t *); 40*9188SPaul.Guo@Sun.COM static int igb_get_tx_context(mblk_t *, tx_context_t *); 41*9188SPaul.Guo@Sun.COM static boolean_t igb_check_tx_context(igb_tx_ring_t *, tx_context_t *); 42*9188SPaul.Guo@Sun.COM static void igb_fill_tx_context(struct e1000_adv_tx_context_desc *, 43*9188SPaul.Guo@Sun.COM tx_context_t *, uint32_t); 445779Sxy150489 455779Sxy150489 #ifndef IGB_DEBUG 465779Sxy150489 #pragma inline(igb_save_desc) 47*9188SPaul.Guo@Sun.COM #pragma inline(igb_get_tx_context) 48*9188SPaul.Guo@Sun.COM #pragma inline(igb_check_tx_context) 49*9188SPaul.Guo@Sun.COM #pragma inline(igb_fill_tx_context) 505779Sxy150489 #endif 515779Sxy150489 525779Sxy150489 mblk_t * 538275SEric Cheng igb_tx_ring_send(void *arg, mblk_t *mp) 545779Sxy150489 { 558275SEric Cheng igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)arg; 565779Sxy150489 578275SEric Cheng ASSERT(tx_ring != NULL); 585779Sxy150489 598275SEric Cheng return ((igb_tx(tx_ring, mp)) ? NULL : mp); 605779Sxy150489 } 615779Sxy150489 625779Sxy150489 /* 635779Sxy150489 * igb_tx - Main transmit processing 645779Sxy150489 * 655779Sxy150489 * Called from igb_m_tx with an mblk ready to transmit. this 665779Sxy150489 * routine sets up the transmit descriptors and sends data to 675779Sxy150489 * the wire. 685779Sxy150489 * 695779Sxy150489 * One mblk can consist of several fragments, each fragment 705779Sxy150489 * will be processed with different methods based on the size. 715779Sxy150489 * For the fragments with size less than the bcopy threshold, 725779Sxy150489 * they will be processed by using bcopy; otherwise, they will 735779Sxy150489 * be processed by using DMA binding. 745779Sxy150489 * 755779Sxy150489 * To process the mblk, a tx control block is got from the 765779Sxy150489 * free list. One tx control block contains one tx buffer, which 775779Sxy150489 * is used to copy mblk fragments' data; and one tx DMA handle, 785779Sxy150489 * which is used to bind a mblk fragment with DMA resource. 795779Sxy150489 * 805779Sxy150489 * Several small mblk fragments can be copied into one tx control 815779Sxy150489 * block's buffer, and then the buffer will be transmitted with 825779Sxy150489 * one tx descriptor. 835779Sxy150489 * 845779Sxy150489 * A large fragment only binds with one tx control block's DMA 855779Sxy150489 * handle, and it can span several tx descriptors for transmitting. 865779Sxy150489 * 875779Sxy150489 * So to transmit a packet (mblk), several tx control blocks can 885779Sxy150489 * be used. After the processing, those tx control blocks will 895779Sxy150489 * be put to the work list. 905779Sxy150489 */ 915779Sxy150489 static boolean_t 925779Sxy150489 igb_tx(igb_tx_ring_t *tx_ring, mblk_t *mp) 935779Sxy150489 { 945779Sxy150489 igb_t *igb = tx_ring->igb; 955779Sxy150489 tx_type_t current_flag, next_flag; 965779Sxy150489 uint32_t current_len, next_len; 975779Sxy150489 uint32_t desc_total; 985779Sxy150489 size_t mbsize; 995779Sxy150489 int desc_num; 1005779Sxy150489 boolean_t copy_done, eop; 1015779Sxy150489 mblk_t *current_mp, *next_mp, *nmp; 1025779Sxy150489 tx_control_block_t *tcb; 103*9188SPaul.Guo@Sun.COM tx_context_t tx_context, *ctx; 1045779Sxy150489 link_list_t pending_list; 105*9188SPaul.Guo@Sun.COM mblk_t *new_mp; 106*9188SPaul.Guo@Sun.COM mblk_t *previous_mp; 107*9188SPaul.Guo@Sun.COM uint32_t hdr_frag_len; 108*9188SPaul.Guo@Sun.COM uint32_t hdr_len, len; 109*9188SPaul.Guo@Sun.COM uint32_t copy_thresh; 110*9188SPaul.Guo@Sun.COM 111*9188SPaul.Guo@Sun.COM copy_thresh = tx_ring->copy_thresh; 1125779Sxy150489 1135779Sxy150489 /* Get the mblk size */ 1145779Sxy150489 mbsize = 0; 1155779Sxy150489 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 116*9188SPaul.Guo@Sun.COM mbsize += MBLKL(nmp); 1175779Sxy150489 } 1185779Sxy150489 119*9188SPaul.Guo@Sun.COM if (igb->tx_hcksum_enable) { 120*9188SPaul.Guo@Sun.COM ctx = &tx_context; 121*9188SPaul.Guo@Sun.COM /* 122*9188SPaul.Guo@Sun.COM * Retrieve offloading context information from the mblk 123*9188SPaul.Guo@Sun.COM * that will be used to decide whether/how to fill the 124*9188SPaul.Guo@Sun.COM * context descriptor. 125*9188SPaul.Guo@Sun.COM */ 126*9188SPaul.Guo@Sun.COM if (igb_get_tx_context(mp, ctx) != TX_CXT_SUCCESS) { 127*9188SPaul.Guo@Sun.COM freemsg(mp); 128*9188SPaul.Guo@Sun.COM return (B_TRUE); 129*9188SPaul.Guo@Sun.COM } 130*9188SPaul.Guo@Sun.COM 131*9188SPaul.Guo@Sun.COM if ((ctx->lso_flag && 132*9188SPaul.Guo@Sun.COM (mbsize > (ctx->mac_hdr_len + IGB_LSO_MAXLEN))) || 133*9188SPaul.Guo@Sun.COM (!ctx->lso_flag && 134*9188SPaul.Guo@Sun.COM (mbsize > (igb->max_frame_size - ETHERFCSL)))) { 135*9188SPaul.Guo@Sun.COM freemsg(mp); 136*9188SPaul.Guo@Sun.COM IGB_DEBUGLOG_0(igb, "igb_tx: packet oversize"); 137*9188SPaul.Guo@Sun.COM return (B_TRUE); 138*9188SPaul.Guo@Sun.COM } 139*9188SPaul.Guo@Sun.COM } else { 140*9188SPaul.Guo@Sun.COM ctx = NULL; 141*9188SPaul.Guo@Sun.COM if (mbsize > (igb->max_frame_size - ETHERFCSL)) { 142*9188SPaul.Guo@Sun.COM freemsg(mp); 143*9188SPaul.Guo@Sun.COM IGB_DEBUGLOG_0(igb, "igb_tx: packet oversize"); 144*9188SPaul.Guo@Sun.COM return (B_TRUE); 145*9188SPaul.Guo@Sun.COM } 1465779Sxy150489 } 1475779Sxy150489 1485779Sxy150489 /* 1495779Sxy150489 * Check and recycle tx descriptors. 1505779Sxy150489 * The recycle threshold here should be selected carefully 1515779Sxy150489 */ 1525779Sxy150489 if (tx_ring->tbd_free < tx_ring->recycle_thresh) 1535779Sxy150489 tx_ring->tx_recycle(tx_ring); 1545779Sxy150489 1555779Sxy150489 /* 1565779Sxy150489 * After the recycling, if the tbd_free is less than the 1575779Sxy150489 * overload_threshold, assert overload, return B_FALSE; 1585779Sxy150489 * and we need to re-schedule the tx again. 1595779Sxy150489 */ 1605779Sxy150489 if (tx_ring->tbd_free < tx_ring->overload_thresh) { 1615779Sxy150489 tx_ring->reschedule = B_TRUE; 1625779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_overload); 1635779Sxy150489 return (B_FALSE); 1645779Sxy150489 } 1655779Sxy150489 1665779Sxy150489 /* 167*9188SPaul.Guo@Sun.COM * The software should guarantee LSO packet header(MAC+IP+TCP) 168*9188SPaul.Guo@Sun.COM * to be within one descriptor - this is required by h/w. 169*9188SPaul.Guo@Sun.COM * Here will reallocate and refill the header if 170*9188SPaul.Guo@Sun.COM * the headers(MAC+IP+TCP) is physical memory non-contiguous. 171*9188SPaul.Guo@Sun.COM */ 172*9188SPaul.Guo@Sun.COM if (ctx && ctx->lso_flag) { 173*9188SPaul.Guo@Sun.COM hdr_len = ctx->mac_hdr_len + ctx->ip_hdr_len + 174*9188SPaul.Guo@Sun.COM ctx->l4_hdr_len; 175*9188SPaul.Guo@Sun.COM len = MBLKL(mp); 176*9188SPaul.Guo@Sun.COM current_mp = mp; 177*9188SPaul.Guo@Sun.COM previous_mp = NULL; 178*9188SPaul.Guo@Sun.COM while (len < hdr_len) { 179*9188SPaul.Guo@Sun.COM previous_mp = current_mp; 180*9188SPaul.Guo@Sun.COM current_mp = current_mp->b_cont; 181*9188SPaul.Guo@Sun.COM len += MBLKL(current_mp); 182*9188SPaul.Guo@Sun.COM } 183*9188SPaul.Guo@Sun.COM 184*9188SPaul.Guo@Sun.COM /* 185*9188SPaul.Guo@Sun.COM * If len is larger than copy_thresh, we do not 186*9188SPaul.Guo@Sun.COM * need to do anything since igb's tx copy mechanism 187*9188SPaul.Guo@Sun.COM * will ensure that the headers will be handled 188*9188SPaul.Guo@Sun.COM * in one descriptor. 189*9188SPaul.Guo@Sun.COM */ 190*9188SPaul.Guo@Sun.COM if (len > copy_thresh) { 191*9188SPaul.Guo@Sun.COM if (len != hdr_len) { 192*9188SPaul.Guo@Sun.COM /* 193*9188SPaul.Guo@Sun.COM * If the header and the payload are in 194*9188SPaul.Guo@Sun.COM * different mblks, we simply force the 195*9188SPaul.Guo@Sun.COM * header to be copied into a 196*9188SPaul.Guo@Sun.COM * new-allocated buffer. 197*9188SPaul.Guo@Sun.COM */ 198*9188SPaul.Guo@Sun.COM hdr_frag_len = hdr_len - 199*9188SPaul.Guo@Sun.COM (len - MBLKL(current_mp)); 200*9188SPaul.Guo@Sun.COM 201*9188SPaul.Guo@Sun.COM /* 202*9188SPaul.Guo@Sun.COM * There are two cases we will reallocate 203*9188SPaul.Guo@Sun.COM * a mblk for the last header fragment. 204*9188SPaul.Guo@Sun.COM * 1. the header is in multiple mblks and 205*9188SPaul.Guo@Sun.COM * the last fragment shares the same mblk 206*9188SPaul.Guo@Sun.COM * with the payload 207*9188SPaul.Guo@Sun.COM * 2. the header is in a single mblk shared 208*9188SPaul.Guo@Sun.COM * with the payload but the header crosses 209*9188SPaul.Guo@Sun.COM * a page. 210*9188SPaul.Guo@Sun.COM */ 211*9188SPaul.Guo@Sun.COM if ((current_mp != mp) || 212*9188SPaul.Guo@Sun.COM (P2NPHASE((uintptr_t)current_mp->b_rptr, 213*9188SPaul.Guo@Sun.COM igb->page_size) < hdr_len)) { 214*9188SPaul.Guo@Sun.COM /* 215*9188SPaul.Guo@Sun.COM * reallocate the mblk for the last 216*9188SPaul.Guo@Sun.COM * header fragment, expect it to be 217*9188SPaul.Guo@Sun.COM * copied into pre-allocated 218*9188SPaul.Guo@Sun.COM * page-aligned buffer 219*9188SPaul.Guo@Sun.COM */ 220*9188SPaul.Guo@Sun.COM new_mp = allocb(hdr_frag_len, NULL); 221*9188SPaul.Guo@Sun.COM if (!new_mp) { 222*9188SPaul.Guo@Sun.COM return (B_FALSE); 223*9188SPaul.Guo@Sun.COM } 224*9188SPaul.Guo@Sun.COM 225*9188SPaul.Guo@Sun.COM /* 226*9188SPaul.Guo@Sun.COM * Insert the new mblk 227*9188SPaul.Guo@Sun.COM */ 228*9188SPaul.Guo@Sun.COM bcopy(current_mp->b_rptr, 229*9188SPaul.Guo@Sun.COM new_mp->b_rptr, hdr_frag_len); 230*9188SPaul.Guo@Sun.COM new_mp->b_wptr = new_mp->b_rptr + 231*9188SPaul.Guo@Sun.COM hdr_frag_len; 232*9188SPaul.Guo@Sun.COM new_mp->b_cont = current_mp; 233*9188SPaul.Guo@Sun.COM if (previous_mp) 234*9188SPaul.Guo@Sun.COM previous_mp->b_cont = new_mp; 235*9188SPaul.Guo@Sun.COM else 236*9188SPaul.Guo@Sun.COM mp = new_mp; 237*9188SPaul.Guo@Sun.COM current_mp->b_rptr += hdr_frag_len; 238*9188SPaul.Guo@Sun.COM } 239*9188SPaul.Guo@Sun.COM } 240*9188SPaul.Guo@Sun.COM 241*9188SPaul.Guo@Sun.COM if (copy_thresh < hdr_len) 242*9188SPaul.Guo@Sun.COM copy_thresh = hdr_len; 243*9188SPaul.Guo@Sun.COM } 244*9188SPaul.Guo@Sun.COM } 245*9188SPaul.Guo@Sun.COM 246*9188SPaul.Guo@Sun.COM /* 2475779Sxy150489 * The pending_list is a linked list that is used to save 2485779Sxy150489 * the tx control blocks that have packet data processed 2495779Sxy150489 * but have not put the data to the tx descriptor ring. 2505779Sxy150489 * It is used to reduce the lock contention of the tx_lock. 2515779Sxy150489 */ 2525779Sxy150489 LINK_LIST_INIT(&pending_list); 2535779Sxy150489 desc_num = 0; 2545779Sxy150489 desc_total = 0; 2555779Sxy150489 2565779Sxy150489 current_mp = mp; 257*9188SPaul.Guo@Sun.COM current_len = MBLKL(current_mp); 2585779Sxy150489 /* 2595779Sxy150489 * Decide which method to use for the first fragment 2605779Sxy150489 */ 261*9188SPaul.Guo@Sun.COM current_flag = (current_len <= copy_thresh) ? 2625779Sxy150489 USE_COPY : USE_DMA; 2635779Sxy150489 /* 2645779Sxy150489 * If the mblk includes several contiguous small fragments, 2655779Sxy150489 * they may be copied into one buffer. This flag is used to 2665779Sxy150489 * indicate whether there are pending fragments that need to 2675779Sxy150489 * be copied to the current tx buffer. 2685779Sxy150489 * 2695779Sxy150489 * If this flag is B_TRUE, it indicates that a new tx control 2705779Sxy150489 * block is needed to process the next fragment using either 2715779Sxy150489 * copy or DMA binding. 2725779Sxy150489 * 2735779Sxy150489 * Otherwise, it indicates that the next fragment will be 2745779Sxy150489 * copied to the current tx buffer that is maintained by the 2755779Sxy150489 * current tx control block. No new tx control block is needed. 2765779Sxy150489 */ 2775779Sxy150489 copy_done = B_TRUE; 2785779Sxy150489 while (current_mp) { 2795779Sxy150489 next_mp = current_mp->b_cont; 2805779Sxy150489 eop = (next_mp == NULL); /* Last fragment of the packet? */ 281*9188SPaul.Guo@Sun.COM next_len = eop ? 0: MBLKL(next_mp); 2825779Sxy150489 2835779Sxy150489 /* 2845779Sxy150489 * When the current fragment is an empty fragment, if 2855779Sxy150489 * the next fragment will still be copied to the current 2865779Sxy150489 * tx buffer, we cannot skip this fragment here. Because 2875779Sxy150489 * the copy processing is pending for completion. We have 2885779Sxy150489 * to process this empty fragment in the tx_copy routine. 2895779Sxy150489 * 2905779Sxy150489 * If the copy processing is completed or a DMA binding 2915779Sxy150489 * processing is just completed, we can just skip this 2925779Sxy150489 * empty fragment. 2935779Sxy150489 */ 2945779Sxy150489 if ((current_len == 0) && (copy_done)) { 2955779Sxy150489 current_mp = next_mp; 2965779Sxy150489 current_len = next_len; 297*9188SPaul.Guo@Sun.COM current_flag = (current_len <= copy_thresh) ? 2985779Sxy150489 USE_COPY : USE_DMA; 2995779Sxy150489 continue; 3005779Sxy150489 } 3015779Sxy150489 3025779Sxy150489 if (copy_done) { 3035779Sxy150489 /* 3045779Sxy150489 * Get a new tx control block from the free list 3055779Sxy150489 */ 3065779Sxy150489 tcb = igb_get_free_list(tx_ring); 3075779Sxy150489 3085779Sxy150489 if (tcb == NULL) { 3095779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 3105779Sxy150489 goto tx_failure; 3115779Sxy150489 } 3125779Sxy150489 3135779Sxy150489 /* 3145779Sxy150489 * Push the tx control block to the pending list 3155779Sxy150489 * to avoid using lock too early 3165779Sxy150489 */ 3175779Sxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 3185779Sxy150489 } 3195779Sxy150489 3205779Sxy150489 if (current_flag == USE_COPY) { 3215779Sxy150489 /* 3225779Sxy150489 * Check whether to use bcopy or DMA binding to process 3235779Sxy150489 * the next fragment, and if using bcopy, whether we 3245779Sxy150489 * need to continue copying the next fragment into the 3255779Sxy150489 * current tx buffer. 3265779Sxy150489 */ 3275779Sxy150489 ASSERT((tcb->tx_buf.len + current_len) <= 3285779Sxy150489 tcb->tx_buf.size); 3295779Sxy150489 3305779Sxy150489 if (eop) { 3315779Sxy150489 /* 3325779Sxy150489 * This is the last fragment of the packet, so 3335779Sxy150489 * the copy processing will be completed with 3345779Sxy150489 * this fragment. 3355779Sxy150489 */ 3365779Sxy150489 next_flag = USE_NONE; 3375779Sxy150489 copy_done = B_TRUE; 3385779Sxy150489 } else if ((tcb->tx_buf.len + current_len + next_len) > 3395779Sxy150489 tcb->tx_buf.size) { 3405779Sxy150489 /* 3415779Sxy150489 * If the next fragment is too large to be 3425779Sxy150489 * copied to the current tx buffer, we need 3435779Sxy150489 * to complete the current copy processing. 3445779Sxy150489 */ 345*9188SPaul.Guo@Sun.COM next_flag = (next_len > copy_thresh) ? 3465779Sxy150489 USE_DMA: USE_COPY; 3475779Sxy150489 copy_done = B_TRUE; 348*9188SPaul.Guo@Sun.COM } else if (next_len > copy_thresh) { 3495779Sxy150489 /* 3505779Sxy150489 * The next fragment needs to be processed with 3515779Sxy150489 * DMA binding. So the copy prcessing will be 3525779Sxy150489 * completed with the current fragment. 3535779Sxy150489 */ 3545779Sxy150489 next_flag = USE_DMA; 3555779Sxy150489 copy_done = B_TRUE; 3565779Sxy150489 } else { 3575779Sxy150489 /* 3585779Sxy150489 * Continue to copy the next fragment to the 3595779Sxy150489 * current tx buffer. 3605779Sxy150489 */ 3615779Sxy150489 next_flag = USE_COPY; 3625779Sxy150489 copy_done = B_FALSE; 3635779Sxy150489 } 3645779Sxy150489 3655779Sxy150489 desc_num = igb_tx_copy(tx_ring, tcb, current_mp, 3667072Sxy150489 current_len, copy_done); 3675779Sxy150489 } else { 3685779Sxy150489 /* 3695779Sxy150489 * Check whether to use bcopy or DMA binding to process 3705779Sxy150489 * the next fragment. 3715779Sxy150489 */ 372*9188SPaul.Guo@Sun.COM next_flag = (next_len > copy_thresh) ? 3735779Sxy150489 USE_DMA: USE_COPY; 3745779Sxy150489 ASSERT(copy_done == B_TRUE); 3755779Sxy150489 3765779Sxy150489 desc_num = igb_tx_bind(tx_ring, tcb, current_mp, 3775779Sxy150489 current_len); 3785779Sxy150489 } 3795779Sxy150489 3805779Sxy150489 if (desc_num > 0) 3815779Sxy150489 desc_total += desc_num; 3825779Sxy150489 else if (desc_num < 0) 3835779Sxy150489 goto tx_failure; 3845779Sxy150489 3855779Sxy150489 current_mp = next_mp; 3865779Sxy150489 current_len = next_len; 3875779Sxy150489 current_flag = next_flag; 3885779Sxy150489 } 3895779Sxy150489 3905779Sxy150489 /* 3915779Sxy150489 * Attach the mblk to the last tx control block 3925779Sxy150489 */ 3935779Sxy150489 ASSERT(tcb); 3945779Sxy150489 ASSERT(tcb->mp == NULL); 3955779Sxy150489 tcb->mp = mp; 3965779Sxy150489 3975779Sxy150489 /* 3985779Sxy150489 * Before fill the tx descriptor ring with the data, we need to 3995779Sxy150489 * ensure there are adequate free descriptors for transmit 4005779Sxy150489 * (including one context descriptor). 4015779Sxy150489 */ 4025779Sxy150489 if (tx_ring->tbd_free < (desc_total + 1)) { 4035779Sxy150489 tx_ring->tx_recycle(tx_ring); 4045779Sxy150489 } 4055779Sxy150489 4065779Sxy150489 mutex_enter(&tx_ring->tx_lock); 4075779Sxy150489 4085779Sxy150489 /* 4095779Sxy150489 * If the number of free tx descriptors is not enough for transmit 4105779Sxy150489 * then return failure. 4115779Sxy150489 * 4125779Sxy150489 * Note: we must put this check under the mutex protection to 4135779Sxy150489 * ensure the correctness when multiple threads access it in 4145779Sxy150489 * parallel. 4155779Sxy150489 */ 4165779Sxy150489 if (tx_ring->tbd_free < (desc_total + 1)) { 4175779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 4185779Sxy150489 mutex_exit(&tx_ring->tx_lock); 4195779Sxy150489 goto tx_failure; 4205779Sxy150489 } 4215779Sxy150489 422*9188SPaul.Guo@Sun.COM desc_num = igb_tx_fill_ring(tx_ring, &pending_list, ctx, mbsize); 4235779Sxy150489 4245779Sxy150489 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 4255779Sxy150489 4265779Sxy150489 mutex_exit(&tx_ring->tx_lock); 4275779Sxy150489 4285779Sxy150489 return (B_TRUE); 4295779Sxy150489 4305779Sxy150489 tx_failure: 4315779Sxy150489 /* 4325779Sxy150489 * Discard the mblk and free the used resources 4335779Sxy150489 */ 4345779Sxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 4355779Sxy150489 while (tcb) { 4365779Sxy150489 tcb->mp = NULL; 4375779Sxy150489 4385779Sxy150489 igb_free_tcb(tcb); 4395779Sxy150489 4405779Sxy150489 tcb = (tx_control_block_t *) 4415779Sxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 4425779Sxy150489 } 4435779Sxy150489 4445779Sxy150489 /* 4455779Sxy150489 * Return the tx control blocks in the pending list to the free list. 4465779Sxy150489 */ 4475779Sxy150489 igb_put_free_list(tx_ring, &pending_list); 4485779Sxy150489 4495779Sxy150489 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 4505779Sxy150489 tx_ring->reschedule = B_TRUE; 4515779Sxy150489 4525779Sxy150489 return (B_FALSE); 4535779Sxy150489 } 4545779Sxy150489 4555779Sxy150489 /* 4565779Sxy150489 * igb_tx_copy 4575779Sxy150489 * 4585779Sxy150489 * Copy the mblk fragment to the pre-allocated tx buffer 4595779Sxy150489 */ 4605779Sxy150489 static int 4615779Sxy150489 igb_tx_copy(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 4627072Sxy150489 uint32_t len, boolean_t copy_done) 4635779Sxy150489 { 4645779Sxy150489 dma_buffer_t *tx_buf; 4655779Sxy150489 uint32_t desc_num; 4665779Sxy150489 _NOTE(ARGUNUSED(tx_ring)); 4675779Sxy150489 4685779Sxy150489 tx_buf = &tcb->tx_buf; 4695779Sxy150489 4705779Sxy150489 /* 4715779Sxy150489 * Copy the packet data of the mblk fragment into the 4725779Sxy150489 * pre-allocated tx buffer, which is maintained by the 4735779Sxy150489 * tx control block. 4745779Sxy150489 * 4755779Sxy150489 * Several mblk fragments can be copied into one tx buffer. 4765779Sxy150489 * The destination address of the current copied fragment in 4775779Sxy150489 * the tx buffer is next to the end of the previous copied 4785779Sxy150489 * fragment. 4795779Sxy150489 */ 4805779Sxy150489 if (len > 0) { 4815779Sxy150489 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 4825779Sxy150489 4835779Sxy150489 tx_buf->len += len; 4845779Sxy150489 tcb->frag_num++; 4855779Sxy150489 } 4865779Sxy150489 4875779Sxy150489 desc_num = 0; 4885779Sxy150489 4895779Sxy150489 /* 4905779Sxy150489 * If it is the last fragment copied to the current tx buffer, 4915779Sxy150489 * in other words, if there's no remaining fragment or the remaining 4925779Sxy150489 * fragment requires a new tx control block to process, we need to 4935779Sxy150489 * complete the current copy processing by syncing up the current 4945779Sxy150489 * DMA buffer and saving the descriptor data. 4955779Sxy150489 */ 4965779Sxy150489 if (copy_done) { 4975779Sxy150489 /* 4985779Sxy150489 * Sync the DMA buffer of the packet data 4995779Sxy150489 */ 5005779Sxy150489 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 5015779Sxy150489 5025779Sxy150489 tcb->tx_type = USE_COPY; 5035779Sxy150489 5045779Sxy150489 /* 5055779Sxy150489 * Save the address and length to the private data structure 5065779Sxy150489 * of the tx control block, which will be used to fill the 5075779Sxy150489 * tx descriptor ring after all the fragments are processed. 5085779Sxy150489 */ 5095779Sxy150489 igb_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 5105779Sxy150489 desc_num++; 5115779Sxy150489 } 5125779Sxy150489 5135779Sxy150489 return (desc_num); 5145779Sxy150489 } 5155779Sxy150489 5165779Sxy150489 /* 5175779Sxy150489 * igb_tx_bind 5185779Sxy150489 * 5195779Sxy150489 * Bind the mblk fragment with DMA 5205779Sxy150489 */ 5215779Sxy150489 static int 5225779Sxy150489 igb_tx_bind(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 5235779Sxy150489 uint32_t len) 5245779Sxy150489 { 5255779Sxy150489 int status, i; 5265779Sxy150489 ddi_dma_cookie_t dma_cookie; 5275779Sxy150489 uint_t ncookies; 5285779Sxy150489 int desc_num; 5295779Sxy150489 5305779Sxy150489 /* 5315779Sxy150489 * Use DMA binding to process the mblk fragment 5325779Sxy150489 */ 5335779Sxy150489 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 5345779Sxy150489 (caddr_t)mp->b_rptr, len, 5355779Sxy150489 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 5365779Sxy150489 0, &dma_cookie, &ncookies); 5375779Sxy150489 5385779Sxy150489 if (status != DDI_DMA_MAPPED) { 5395779Sxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 5405779Sxy150489 return (-1); 5415779Sxy150489 } 5425779Sxy150489 5435779Sxy150489 tcb->frag_num++; 5445779Sxy150489 tcb->tx_type = USE_DMA; 5455779Sxy150489 /* 5465779Sxy150489 * Each fragment can span several cookies. One cookie will have 5475779Sxy150489 * one tx descriptor to transmit. 5485779Sxy150489 */ 5495779Sxy150489 desc_num = 0; 5505779Sxy150489 for (i = ncookies; i > 0; i--) { 5515779Sxy150489 /* 5525779Sxy150489 * Save the address and length to the private data structure 5535779Sxy150489 * of the tx control block, which will be used to fill the 5545779Sxy150489 * tx descriptor ring after all the fragments are processed. 5555779Sxy150489 */ 5565779Sxy150489 igb_save_desc(tcb, 5575779Sxy150489 dma_cookie.dmac_laddress, 5585779Sxy150489 dma_cookie.dmac_size); 5595779Sxy150489 5605779Sxy150489 desc_num++; 5615779Sxy150489 5625779Sxy150489 if (i > 1) 5635779Sxy150489 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 5645779Sxy150489 } 5655779Sxy150489 5665779Sxy150489 return (desc_num); 5675779Sxy150489 } 5685779Sxy150489 5695779Sxy150489 /* 570*9188SPaul.Guo@Sun.COM * igb_get_tx_context 5715779Sxy150489 * 572*9188SPaul.Guo@Sun.COM * Get the tx context information from the mblk 5735779Sxy150489 */ 574*9188SPaul.Guo@Sun.COM static int 575*9188SPaul.Guo@Sun.COM igb_get_tx_context(mblk_t *mp, tx_context_t *ctx) 5765779Sxy150489 { 5775779Sxy150489 uint32_t start; 5785779Sxy150489 uint32_t flags; 579*9188SPaul.Guo@Sun.COM uint32_t lso_flag; 580*9188SPaul.Guo@Sun.COM uint32_t mss; 5815779Sxy150489 uint32_t len; 5825779Sxy150489 uint32_t size; 5835779Sxy150489 uint32_t offset; 5845779Sxy150489 unsigned char *pos; 5855779Sxy150489 ushort_t etype; 5865779Sxy150489 uint32_t mac_hdr_len; 5875779Sxy150489 uint32_t l4_proto; 588*9188SPaul.Guo@Sun.COM uint32_t l4_hdr_len; 5895779Sxy150489 5905779Sxy150489 ASSERT(mp != NULL); 5915779Sxy150489 5925779Sxy150489 hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &flags); 593*9188SPaul.Guo@Sun.COM bzero(ctx, sizeof (tx_context_t)); 5945779Sxy150489 595*9188SPaul.Guo@Sun.COM ctx->hcksum_flags = flags; 5965779Sxy150489 5975779Sxy150489 if (flags == 0) 598*9188SPaul.Guo@Sun.COM return (TX_CXT_SUCCESS); 599*9188SPaul.Guo@Sun.COM 600*9188SPaul.Guo@Sun.COM lso_info_get(mp, &mss, &lso_flag); 601*9188SPaul.Guo@Sun.COM ctx->mss = mss; 602*9188SPaul.Guo@Sun.COM ctx->lso_flag = (lso_flag == HW_LSO); 603*9188SPaul.Guo@Sun.COM 604*9188SPaul.Guo@Sun.COM /* 605*9188SPaul.Guo@Sun.COM * LSO relies on tx h/w checksum, so here the packet will be 606*9188SPaul.Guo@Sun.COM * dropped if the h/w checksum flags are not set. 607*9188SPaul.Guo@Sun.COM */ 608*9188SPaul.Guo@Sun.COM if (ctx->lso_flag) { 609*9188SPaul.Guo@Sun.COM if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 610*9188SPaul.Guo@Sun.COM (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 611*9188SPaul.Guo@Sun.COM IGB_DEBUGLOG_0(NULL, "igb_tx: h/w " 612*9188SPaul.Guo@Sun.COM "checksum flags are not set for LSO"); 613*9188SPaul.Guo@Sun.COM return (TX_CXT_E_LSO_CSUM); 614*9188SPaul.Guo@Sun.COM } 615*9188SPaul.Guo@Sun.COM } 6165779Sxy150489 6175779Sxy150489 etype = 0; 6185779Sxy150489 mac_hdr_len = 0; 6195779Sxy150489 l4_proto = 0; 6205779Sxy150489 6215779Sxy150489 /* 6225779Sxy150489 * Firstly get the position of the ether_type/ether_tpid. 6235779Sxy150489 * Here we don't assume the ether (VLAN) header is fully included 6245779Sxy150489 * in one mblk fragment, so we go thourgh the fragments to parse 6255779Sxy150489 * the ether type. 6265779Sxy150489 */ 627*9188SPaul.Guo@Sun.COM size = len = MBLKL(mp); 6285779Sxy150489 offset = offsetof(struct ether_header, ether_type); 6295779Sxy150489 while (size <= offset) { 6305779Sxy150489 mp = mp->b_cont; 6315779Sxy150489 ASSERT(mp != NULL); 632*9188SPaul.Guo@Sun.COM len = MBLKL(mp); 6335779Sxy150489 size += len; 6345779Sxy150489 } 6355779Sxy150489 pos = mp->b_rptr + offset + len - size; 6365779Sxy150489 6375779Sxy150489 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 6385779Sxy150489 if (etype == ETHERTYPE_VLAN) { 6395779Sxy150489 /* 6405779Sxy150489 * Get the position of the ether_type in VLAN header 6415779Sxy150489 */ 6425779Sxy150489 offset = offsetof(struct ether_vlan_header, ether_type); 6435779Sxy150489 while (size <= offset) { 6445779Sxy150489 mp = mp->b_cont; 6455779Sxy150489 ASSERT(mp != NULL); 646*9188SPaul.Guo@Sun.COM len = MBLKL(mp); 6475779Sxy150489 size += len; 6485779Sxy150489 } 6495779Sxy150489 pos = mp->b_rptr + offset + len - size; 6505779Sxy150489 6515779Sxy150489 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 6525779Sxy150489 mac_hdr_len = sizeof (struct ether_vlan_header); 6535779Sxy150489 } else { 6545779Sxy150489 mac_hdr_len = sizeof (struct ether_header); 6555779Sxy150489 } 6565779Sxy150489 6575779Sxy150489 /* 658*9188SPaul.Guo@Sun.COM * Here we assume the IP(V6) header is fully included in one 659*9188SPaul.Guo@Sun.COM * mblk fragment. 6605779Sxy150489 */ 6615779Sxy150489 switch (etype) { 6625779Sxy150489 case ETHERTYPE_IP: 663*9188SPaul.Guo@Sun.COM offset = mac_hdr_len; 6645779Sxy150489 while (size <= offset) { 6655779Sxy150489 mp = mp->b_cont; 6665779Sxy150489 ASSERT(mp != NULL); 667*9188SPaul.Guo@Sun.COM len = MBLKL(mp); 6685779Sxy150489 size += len; 6695779Sxy150489 } 6705779Sxy150489 pos = mp->b_rptr + offset + len - size; 6715779Sxy150489 672*9188SPaul.Guo@Sun.COM if (ctx->lso_flag) { 673*9188SPaul.Guo@Sun.COM *((uint16_t *)(uintptr_t)(pos + offsetof(ipha_t, 674*9188SPaul.Guo@Sun.COM ipha_length))) = 0; 675*9188SPaul.Guo@Sun.COM 676*9188SPaul.Guo@Sun.COM /* 677*9188SPaul.Guo@Sun.COM * To utilize igb LSO, here need to fill 678*9188SPaul.Guo@Sun.COM * the tcp checksum field of the packet with the 679*9188SPaul.Guo@Sun.COM * following pseudo-header checksum: 680*9188SPaul.Guo@Sun.COM * (ip_source_addr, ip_destination_addr, l4_proto) 681*9188SPaul.Guo@Sun.COM * and also need to fill the ip header checksum 682*9188SPaul.Guo@Sun.COM * with zero. Currently the tcp/ip stack has done 683*9188SPaul.Guo@Sun.COM * these. 684*9188SPaul.Guo@Sun.COM */ 685*9188SPaul.Guo@Sun.COM } 686*9188SPaul.Guo@Sun.COM 687*9188SPaul.Guo@Sun.COM l4_proto = *(uint8_t *)(pos + offsetof(ipha_t, ipha_protocol)); 6885779Sxy150489 break; 6895779Sxy150489 case ETHERTYPE_IPV6: 6905779Sxy150489 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 6915779Sxy150489 while (size <= offset) { 6925779Sxy150489 mp = mp->b_cont; 6935779Sxy150489 ASSERT(mp != NULL); 694*9188SPaul.Guo@Sun.COM len = MBLKL(mp); 6955779Sxy150489 size += len; 6965779Sxy150489 } 6975779Sxy150489 pos = mp->b_rptr + offset + len - size; 6985779Sxy150489 6995779Sxy150489 l4_proto = *(uint8_t *)pos; 7005779Sxy150489 break; 7015779Sxy150489 default: 7025779Sxy150489 /* Unrecoverable error */ 703*9188SPaul.Guo@Sun.COM IGB_DEBUGLOG_0(NULL, "Ethernet type field error with " 704*9188SPaul.Guo@Sun.COM "tx hcksum flag set"); 705*9188SPaul.Guo@Sun.COM return (TX_CXT_E_ETHER_TYPE); 7065779Sxy150489 } 7075779Sxy150489 708*9188SPaul.Guo@Sun.COM if (ctx->lso_flag) { 709*9188SPaul.Guo@Sun.COM offset = mac_hdr_len + start; 710*9188SPaul.Guo@Sun.COM while (size <= offset) { 711*9188SPaul.Guo@Sun.COM mp = mp->b_cont; 712*9188SPaul.Guo@Sun.COM ASSERT(mp != NULL); 713*9188SPaul.Guo@Sun.COM len = MBLKL(mp); 714*9188SPaul.Guo@Sun.COM size += len; 715*9188SPaul.Guo@Sun.COM } 716*9188SPaul.Guo@Sun.COM pos = mp->b_rptr + offset + len - size; 717*9188SPaul.Guo@Sun.COM 718*9188SPaul.Guo@Sun.COM l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 719*9188SPaul.Guo@Sun.COM } else { 720*9188SPaul.Guo@Sun.COM /* 721*9188SPaul.Guo@Sun.COM * l4 header length is only required for LSO 722*9188SPaul.Guo@Sun.COM */ 723*9188SPaul.Guo@Sun.COM l4_hdr_len = 0; 724*9188SPaul.Guo@Sun.COM } 725*9188SPaul.Guo@Sun.COM 726*9188SPaul.Guo@Sun.COM ctx->mac_hdr_len = mac_hdr_len; 727*9188SPaul.Guo@Sun.COM ctx->ip_hdr_len = start; 728*9188SPaul.Guo@Sun.COM ctx->l4_proto = l4_proto; 729*9188SPaul.Guo@Sun.COM ctx->l4_hdr_len = l4_hdr_len; 730*9188SPaul.Guo@Sun.COM 731*9188SPaul.Guo@Sun.COM return (TX_CXT_SUCCESS); 7325779Sxy150489 } 7335779Sxy150489 7345779Sxy150489 /* 735*9188SPaul.Guo@Sun.COM * igb_check_tx_context 7365779Sxy150489 * 7375779Sxy150489 * Check if a new context descriptor is needed 7385779Sxy150489 */ 7395779Sxy150489 static boolean_t 740*9188SPaul.Guo@Sun.COM igb_check_tx_context(igb_tx_ring_t *tx_ring, tx_context_t *ctx) 7415779Sxy150489 { 742*9188SPaul.Guo@Sun.COM tx_context_t *last; 7435779Sxy150489 744*9188SPaul.Guo@Sun.COM if (ctx == NULL) 7455779Sxy150489 return (B_FALSE); 7465779Sxy150489 7475779Sxy150489 /* 748*9188SPaul.Guo@Sun.COM * Compare the context data retrieved from the mblk and the 749*9188SPaul.Guo@Sun.COM * stored context data of the last context descriptor. The data 7505779Sxy150489 * need to be checked are: 7515779Sxy150489 * hcksum_flags 7525779Sxy150489 * l4_proto 753*9188SPaul.Guo@Sun.COM * mss (only check for LSO) 754*9188SPaul.Guo@Sun.COM * l4_hdr_len (only check for LSO) 755*9188SPaul.Guo@Sun.COM * ip_hdr_len 7565779Sxy150489 * mac_hdr_len 7575779Sxy150489 * Either one of the above data is changed, a new context descriptor 7585779Sxy150489 * will be needed. 7595779Sxy150489 */ 760*9188SPaul.Guo@Sun.COM last = &tx_ring->tx_context; 7615779Sxy150489 762*9188SPaul.Guo@Sun.COM if (ctx->hcksum_flags != 0) { 763*9188SPaul.Guo@Sun.COM if ((ctx->hcksum_flags != last->hcksum_flags) || 764*9188SPaul.Guo@Sun.COM (ctx->l4_proto != last->l4_proto) || 765*9188SPaul.Guo@Sun.COM (ctx->lso_flag && ((ctx->mss != last->mss) || 766*9188SPaul.Guo@Sun.COM (ctx->l4_hdr_len != last->l4_hdr_len))) || 767*9188SPaul.Guo@Sun.COM (ctx->ip_hdr_len != last->ip_hdr_len) || 768*9188SPaul.Guo@Sun.COM (ctx->mac_hdr_len != last->mac_hdr_len)) { 7695779Sxy150489 return (B_TRUE); 7705779Sxy150489 } 7715779Sxy150489 } 7725779Sxy150489 7735779Sxy150489 return (B_FALSE); 7745779Sxy150489 } 7755779Sxy150489 7765779Sxy150489 /* 777*9188SPaul.Guo@Sun.COM * igb_fill_tx_context 7785779Sxy150489 * 7795779Sxy150489 * Fill the context descriptor with hardware checksum informations 7805779Sxy150489 */ 7815779Sxy150489 static void 782*9188SPaul.Guo@Sun.COM igb_fill_tx_context(struct e1000_adv_tx_context_desc *ctx_tbd, 783*9188SPaul.Guo@Sun.COM tx_context_t *ctx, uint32_t ring_index) 7845779Sxy150489 { 7855779Sxy150489 /* 7865779Sxy150489 * Fill the context descriptor with the checksum 7875779Sxy150489 * context information we've got 7885779Sxy150489 */ 789*9188SPaul.Guo@Sun.COM ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 790*9188SPaul.Guo@Sun.COM ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 7915779Sxy150489 E1000_ADVTXD_MACLEN_SHIFT; 7925779Sxy150489 7935779Sxy150489 ctx_tbd->type_tucmd_mlhl = 7945779Sxy150489 E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 7955779Sxy150489 796*9188SPaul.Guo@Sun.COM if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 7975779Sxy150489 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 7985779Sxy150489 799*9188SPaul.Guo@Sun.COM if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 800*9188SPaul.Guo@Sun.COM switch (ctx->l4_proto) { 8015779Sxy150489 case IPPROTO_TCP: 8025779Sxy150489 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 8035779Sxy150489 break; 8045779Sxy150489 case IPPROTO_UDP: 8055779Sxy150489 /* 8065779Sxy150489 * We don't have to explicitly set: 8075779Sxy150489 * ctx_tbd->type_tucmd_mlhl |= 8085779Sxy150489 * E1000_ADVTXD_TUCMD_L4T_UDP; 8095779Sxy150489 * Because E1000_ADVTXD_TUCMD_L4T_UDP == 0b 8105779Sxy150489 */ 8115779Sxy150489 break; 8125779Sxy150489 default: 8135779Sxy150489 /* Unrecoverable error */ 8145779Sxy150489 IGB_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 8155779Sxy150489 break; 8165779Sxy150489 } 8175779Sxy150489 } 8185779Sxy150489 8195779Sxy150489 ctx_tbd->seqnum_seed = 0; 8208275SEric Cheng ctx_tbd->mss_l4len_idx = ring_index << 4; 821*9188SPaul.Guo@Sun.COM if (ctx->lso_flag) { 822*9188SPaul.Guo@Sun.COM ctx_tbd->mss_l4len_idx |= 823*9188SPaul.Guo@Sun.COM (ctx->l4_hdr_len << E1000_ADVTXD_L4LEN_SHIFT) | 824*9188SPaul.Guo@Sun.COM (ctx->mss << E1000_ADVTXD_MSS_SHIFT); 825*9188SPaul.Guo@Sun.COM } 8265779Sxy150489 } 8275779Sxy150489 8285779Sxy150489 /* 8295779Sxy150489 * igb_tx_fill_ring 8305779Sxy150489 * 8315779Sxy150489 * Fill the tx descriptor ring with the data 8325779Sxy150489 */ 8335779Sxy150489 static int 8345779Sxy150489 igb_tx_fill_ring(igb_tx_ring_t *tx_ring, link_list_t *pending_list, 835*9188SPaul.Guo@Sun.COM tx_context_t *ctx, size_t mbsize) 8365779Sxy150489 { 8375779Sxy150489 struct e1000_hw *hw = &tx_ring->igb->hw; 8385779Sxy150489 boolean_t load_context; 8395779Sxy150489 uint32_t index, tcb_index, desc_num; 8405779Sxy150489 union e1000_adv_tx_desc *tbd, *first_tbd; 8415779Sxy150489 tx_control_block_t *tcb, *first_tcb; 8425779Sxy150489 uint32_t hcksum_flags; 8435779Sxy150489 int i; 8446624Sgl147354 igb_t *igb = tx_ring->igb; 8455779Sxy150489 8465779Sxy150489 ASSERT(mutex_owned(&tx_ring->tx_lock)); 8475779Sxy150489 8485779Sxy150489 tbd = NULL; 8495779Sxy150489 first_tbd = NULL; 8505779Sxy150489 first_tcb = NULL; 8515779Sxy150489 desc_num = 0; 8525779Sxy150489 hcksum_flags = 0; 8535779Sxy150489 load_context = B_FALSE; 8545779Sxy150489 8555779Sxy150489 /* 8565779Sxy150489 * Get the index of the first tx descriptor that will be filled, 8575779Sxy150489 * and the index of the first work list item that will be attached 8585779Sxy150489 * with the first used tx control block in the pending list. 8595779Sxy150489 * Note: the two indexes are the same. 8605779Sxy150489 */ 8615779Sxy150489 index = tx_ring->tbd_tail; 8625779Sxy150489 tcb_index = tx_ring->tbd_tail; 8635779Sxy150489 864*9188SPaul.Guo@Sun.COM if (ctx != NULL) { 865*9188SPaul.Guo@Sun.COM hcksum_flags = ctx->hcksum_flags; 8665779Sxy150489 8675779Sxy150489 /* 8685779Sxy150489 * Check if a new context descriptor is needed for this packet 8695779Sxy150489 */ 870*9188SPaul.Guo@Sun.COM load_context = igb_check_tx_context(tx_ring, ctx); 8715779Sxy150489 if (load_context) { 8725779Sxy150489 first_tcb = (tx_control_block_t *) 8735779Sxy150489 LIST_GET_HEAD(pending_list); 8745779Sxy150489 tbd = &tx_ring->tbd_ring[index]; 8755779Sxy150489 8765779Sxy150489 /* 8775779Sxy150489 * Fill the context descriptor with the 8785779Sxy150489 * hardware checksum offload informations. 8795779Sxy150489 */ 880*9188SPaul.Guo@Sun.COM igb_fill_tx_context( 881*9188SPaul.Guo@Sun.COM (struct e1000_adv_tx_context_desc *)tbd, 882*9188SPaul.Guo@Sun.COM ctx, tx_ring->index); 8835779Sxy150489 8845779Sxy150489 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 8855779Sxy150489 desc_num++; 8865779Sxy150489 8875779Sxy150489 /* 8885779Sxy150489 * Store the checksum context data if 8895779Sxy150489 * a new context descriptor is added 8905779Sxy150489 */ 891*9188SPaul.Guo@Sun.COM tx_ring->tx_context = *ctx; 8925779Sxy150489 } 8935779Sxy150489 } 8945779Sxy150489 8955779Sxy150489 first_tbd = &tx_ring->tbd_ring[index]; 8965779Sxy150489 8975779Sxy150489 /* 8985779Sxy150489 * Fill tx data descriptors with the data saved in the pending list. 8995779Sxy150489 * The tx control blocks in the pending list are added to the work list 9005779Sxy150489 * at the same time. 9015779Sxy150489 * 9025779Sxy150489 * The work list is strictly 1:1 corresponding to the descriptor ring. 9035779Sxy150489 * One item of the work list corresponds to one tx descriptor. Because 9045779Sxy150489 * one tx control block can span multiple tx descriptors, the tx 9055779Sxy150489 * control block will be added to the first work list item that 9065779Sxy150489 * corresponds to the first tx descriptor generated from that tx 9075779Sxy150489 * control block. 9085779Sxy150489 */ 9095779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 9105779Sxy150489 while (tcb != NULL) { 9115779Sxy150489 9125779Sxy150489 for (i = 0; i < tcb->desc_num; i++) { 9135779Sxy150489 tbd = &tx_ring->tbd_ring[index]; 9145779Sxy150489 9155779Sxy150489 tbd->read.buffer_addr = tcb->desc[i].address; 9165779Sxy150489 tbd->read.cmd_type_len = tcb->desc[i].length; 9175779Sxy150489 9185779Sxy150489 tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_RS | 9198571SChenlu.Chen@Sun.COM E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_DATA | 9208571SChenlu.Chen@Sun.COM E1000_ADVTXD_DCMD_IFCS; 9215779Sxy150489 9225779Sxy150489 tbd->read.olinfo_status = 0; 9235779Sxy150489 9245779Sxy150489 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 9255779Sxy150489 desc_num++; 9265779Sxy150489 } 9275779Sxy150489 9285779Sxy150489 if (first_tcb != NULL) { 9295779Sxy150489 /* 9305779Sxy150489 * Count the checksum context descriptor for 9315779Sxy150489 * the first tx control block. 9325779Sxy150489 */ 9335779Sxy150489 first_tcb->desc_num++; 9345779Sxy150489 first_tcb = NULL; 9355779Sxy150489 } 9365779Sxy150489 9375779Sxy150489 /* 9385779Sxy150489 * Add the tx control block to the work list 9395779Sxy150489 */ 9405779Sxy150489 ASSERT(tx_ring->work_list[tcb_index] == NULL); 9415779Sxy150489 tx_ring->work_list[tcb_index] = tcb; 9425779Sxy150489 9435779Sxy150489 tcb_index = index; 9445779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 9455779Sxy150489 } 9465779Sxy150489 9475779Sxy150489 /* 9485779Sxy150489 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 9495779Sxy150489 * valid in the first descriptor of the packet. 950*9188SPaul.Guo@Sun.COM * 82576 also requires the payload length setting even without LSO 9515779Sxy150489 */ 9525779Sxy150489 ASSERT(first_tbd != NULL); 9535779Sxy150489 first_tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_IFCS; 954*9188SPaul.Guo@Sun.COM if (ctx != NULL && ctx->lso_flag) { 955*9188SPaul.Guo@Sun.COM first_tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 956*9188SPaul.Guo@Sun.COM first_tbd->read.olinfo_status |= 957*9188SPaul.Guo@Sun.COM (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 958*9188SPaul.Guo@Sun.COM - ctx->l4_hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT; 959*9188SPaul.Guo@Sun.COM } else { 960*9188SPaul.Guo@Sun.COM if (hw->mac.type == e1000_82576) { 961*9188SPaul.Guo@Sun.COM first_tbd->read.olinfo_status |= 962*9188SPaul.Guo@Sun.COM (mbsize << E1000_ADVTXD_PAYLEN_SHIFT); 963*9188SPaul.Guo@Sun.COM } 9648571SChenlu.Chen@Sun.COM } 9655779Sxy150489 9665779Sxy150489 /* Set hardware checksum bits */ 9675779Sxy150489 if (hcksum_flags != 0) { 9685779Sxy150489 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 9695779Sxy150489 first_tbd->read.olinfo_status |= 9705779Sxy150489 E1000_TXD_POPTS_IXSM << 8; 9715779Sxy150489 if (hcksum_flags & HCK_PARTIALCKSUM) 9725779Sxy150489 first_tbd->read.olinfo_status |= 9735779Sxy150489 E1000_TXD_POPTS_TXSM << 8; 9748275SEric Cheng first_tbd->read.olinfo_status |= tx_ring->index << 4; 9755779Sxy150489 } 9765779Sxy150489 9775779Sxy150489 /* 9785779Sxy150489 * The last descriptor of packet needs End Of Packet (EOP), 9795779Sxy150489 * and Report Status (RS) bits set 9805779Sxy150489 */ 9815779Sxy150489 ASSERT(tbd != NULL); 9825779Sxy150489 tbd->read.cmd_type_len |= 9835779Sxy150489 E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS; 9845779Sxy150489 9858275SEric Cheng IGB_DEBUG_STAT(tx_ring->stat_pkt_cnt); 9868275SEric Cheng 9875779Sxy150489 /* 9885779Sxy150489 * Sync the DMA buffer of the tx descriptor ring 9895779Sxy150489 */ 9905779Sxy150489 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 9915779Sxy150489 9925779Sxy150489 /* 9935779Sxy150489 * Update the number of the free tx descriptors. 9945779Sxy150489 * The mutual exclusion between the transmission and the recycling 9955779Sxy150489 * (for the tx descriptor ring and the work list) is implemented 9965779Sxy150489 * with the atomic operation on the number of the free tx descriptors. 9975779Sxy150489 * 9985779Sxy150489 * Note: we should always decrement the counter tbd_free before 9995779Sxy150489 * advancing the hardware TDT pointer to avoid the race condition - 10005779Sxy150489 * before the counter tbd_free is decremented, the transmit of the 10015779Sxy150489 * tx descriptors has done and the counter tbd_free is increased by 10025779Sxy150489 * the tx recycling. 10035779Sxy150489 */ 10045779Sxy150489 i = igb_atomic_reserve(&tx_ring->tbd_free, desc_num); 10055779Sxy150489 ASSERT(i >= 0); 10065779Sxy150489 10075779Sxy150489 tx_ring->tbd_tail = index; 10085779Sxy150489 10095779Sxy150489 /* 10105779Sxy150489 * Advance the hardware TDT pointer of the tx descriptor ring 10115779Sxy150489 */ 10125779Sxy150489 E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), index); 10135779Sxy150489 10146624Sgl147354 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) { 10156624Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 10166624Sgl147354 } 10176624Sgl147354 10185779Sxy150489 return (desc_num); 10195779Sxy150489 } 10205779Sxy150489 10215779Sxy150489 /* 10225779Sxy150489 * igb_save_desc 10235779Sxy150489 * 10245779Sxy150489 * Save the address/length pair to the private array 10255779Sxy150489 * of the tx control block. The address/length pairs 10265779Sxy150489 * will be filled into the tx descriptor ring later. 10275779Sxy150489 */ 10285779Sxy150489 static void 10295779Sxy150489 igb_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 10305779Sxy150489 { 10315779Sxy150489 sw_desc_t *desc; 10325779Sxy150489 10335779Sxy150489 desc = &tcb->desc[tcb->desc_num]; 10345779Sxy150489 desc->address = address; 10355779Sxy150489 desc->length = length; 10365779Sxy150489 10375779Sxy150489 tcb->desc_num++; 10385779Sxy150489 } 10395779Sxy150489 10405779Sxy150489 /* 10415779Sxy150489 * igb_tx_recycle_legacy 10425779Sxy150489 * 10435779Sxy150489 * Recycle the tx descriptors and tx control blocks. 10445779Sxy150489 * 10455779Sxy150489 * The work list is traversed to check if the corresponding 10465779Sxy150489 * tx descriptors have been transmitted. If so, the resources 10475779Sxy150489 * bound to the tx control blocks will be freed, and those 10485779Sxy150489 * tx control blocks will be returned to the free list. 10495779Sxy150489 */ 10505779Sxy150489 uint32_t 10515779Sxy150489 igb_tx_recycle_legacy(igb_tx_ring_t *tx_ring) 10525779Sxy150489 { 10535779Sxy150489 uint32_t index, last_index; 10545779Sxy150489 int desc_num; 10555779Sxy150489 boolean_t desc_done; 10565779Sxy150489 tx_control_block_t *tcb; 10575779Sxy150489 link_list_t pending_list; 10586624Sgl147354 igb_t *igb = tx_ring->igb; 10595779Sxy150489 10605779Sxy150489 /* 10615779Sxy150489 * The mutex_tryenter() is used to avoid unnecessary 10625779Sxy150489 * lock contention. 10635779Sxy150489 */ 10645779Sxy150489 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 10655779Sxy150489 return (0); 10665779Sxy150489 10675779Sxy150489 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 10685779Sxy150489 10695779Sxy150489 if (tx_ring->tbd_free == tx_ring->ring_size) { 10705779Sxy150489 tx_ring->recycle_fail = 0; 10715779Sxy150489 tx_ring->stall_watchdog = 0; 10725779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 10735779Sxy150489 return (0); 10745779Sxy150489 } 10755779Sxy150489 10765779Sxy150489 /* 10775779Sxy150489 * Sync the DMA buffer of the tx descriptor ring 10785779Sxy150489 */ 10795779Sxy150489 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 10805779Sxy150489 10816624Sgl147354 if (igb_check_dma_handle( 10826624Sgl147354 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 10836624Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 10846624Sgl147354 } 10856624Sgl147354 10865779Sxy150489 LINK_LIST_INIT(&pending_list); 10875779Sxy150489 desc_num = 0; 10885779Sxy150489 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 10895779Sxy150489 10905779Sxy150489 tcb = tx_ring->work_list[index]; 10915779Sxy150489 ASSERT(tcb != NULL); 10925779Sxy150489 10935779Sxy150489 desc_done = B_TRUE; 10945779Sxy150489 while (desc_done && (tcb != NULL)) { 10955779Sxy150489 10965779Sxy150489 /* 10975779Sxy150489 * Get the last tx descriptor of the tx control block. 10985779Sxy150489 * If the last tx descriptor is done, it is done with 10995779Sxy150489 * all the tx descriptors of the tx control block. 11005779Sxy150489 * Then the tx control block and all the corresponding 11015779Sxy150489 * tx descriptors can be recycled. 11025779Sxy150489 */ 11035779Sxy150489 last_index = NEXT_INDEX(index, tcb->desc_num - 1, 11045779Sxy150489 tx_ring->ring_size); 11055779Sxy150489 11065779Sxy150489 /* 11075779Sxy150489 * Check if the Descriptor Done bit is set 11085779Sxy150489 */ 11095779Sxy150489 desc_done = tx_ring->tbd_ring[last_index].wb.status & 11105779Sxy150489 E1000_TXD_STAT_DD; 11115779Sxy150489 if (desc_done) { 11125779Sxy150489 /* 11135779Sxy150489 * Strip off the tx control block from the work list, 11145779Sxy150489 * and add it to the pending list. 11155779Sxy150489 */ 11165779Sxy150489 tx_ring->work_list[index] = NULL; 11175779Sxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 11185779Sxy150489 11195779Sxy150489 /* 11205779Sxy150489 * Count the total number of the tx descriptors recycled 11215779Sxy150489 */ 11225779Sxy150489 desc_num += tcb->desc_num; 11235779Sxy150489 11245779Sxy150489 /* 11255779Sxy150489 * Advance the index of the tx descriptor ring 11265779Sxy150489 */ 11275779Sxy150489 index = NEXT_INDEX(last_index, 1, tx_ring->ring_size); 11285779Sxy150489 11295779Sxy150489 tcb = tx_ring->work_list[index]; 11305779Sxy150489 } 11315779Sxy150489 } 11325779Sxy150489 11335779Sxy150489 /* 11345779Sxy150489 * If no tx descriptors are recycled, no need to do more processing 11355779Sxy150489 */ 11365779Sxy150489 if (desc_num == 0) { 11375779Sxy150489 tx_ring->recycle_fail++; 11385779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 11395779Sxy150489 return (0); 11405779Sxy150489 } 11415779Sxy150489 11425779Sxy150489 tx_ring->recycle_fail = 0; 11435779Sxy150489 tx_ring->stall_watchdog = 0; 11445779Sxy150489 11455779Sxy150489 /* 11465779Sxy150489 * Update the head index of the tx descriptor ring 11475779Sxy150489 */ 11485779Sxy150489 tx_ring->tbd_head = index; 11495779Sxy150489 11505779Sxy150489 /* 11515779Sxy150489 * Update the number of the free tx descriptors with atomic operations 11525779Sxy150489 */ 11535779Sxy150489 atomic_add_32(&tx_ring->tbd_free, desc_num); 11545779Sxy150489 11555779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 11565779Sxy150489 11575779Sxy150489 /* 11585779Sxy150489 * Free the resources used by the tx control blocks 11595779Sxy150489 * in the pending list 11605779Sxy150489 */ 11615779Sxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 11625779Sxy150489 while (tcb != NULL) { 11635779Sxy150489 /* 11645779Sxy150489 * Release the resources occupied by the tx control block 11655779Sxy150489 */ 11665779Sxy150489 igb_free_tcb(tcb); 11675779Sxy150489 11685779Sxy150489 tcb = (tx_control_block_t *) 11695779Sxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 11705779Sxy150489 } 11715779Sxy150489 11725779Sxy150489 /* 11735779Sxy150489 * Add the tx control blocks in the pending list to the free list. 11745779Sxy150489 */ 11755779Sxy150489 igb_put_free_list(tx_ring, &pending_list); 11765779Sxy150489 11775779Sxy150489 return (desc_num); 11785779Sxy150489 } 11795779Sxy150489 11805779Sxy150489 /* 11815779Sxy150489 * igb_tx_recycle_head_wb 11825779Sxy150489 * 11835779Sxy150489 * Check the head write-back, and recycle all the transmitted 11845779Sxy150489 * tx descriptors and tx control blocks. 11855779Sxy150489 */ 11865779Sxy150489 uint32_t 11875779Sxy150489 igb_tx_recycle_head_wb(igb_tx_ring_t *tx_ring) 11885779Sxy150489 { 11895779Sxy150489 uint32_t index; 11905779Sxy150489 uint32_t head_wb; 11915779Sxy150489 int desc_num; 11925779Sxy150489 tx_control_block_t *tcb; 11935779Sxy150489 link_list_t pending_list; 11946624Sgl147354 igb_t *igb = tx_ring->igb; 11955779Sxy150489 11965779Sxy150489 /* 11975779Sxy150489 * The mutex_tryenter() is used to avoid unnecessary 11985779Sxy150489 * lock contention. 11995779Sxy150489 */ 12005779Sxy150489 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 12015779Sxy150489 return (0); 12025779Sxy150489 12035779Sxy150489 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 12045779Sxy150489 12055779Sxy150489 if (tx_ring->tbd_free == tx_ring->ring_size) { 12065779Sxy150489 tx_ring->recycle_fail = 0; 12075779Sxy150489 tx_ring->stall_watchdog = 0; 12085779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 12095779Sxy150489 return (0); 12105779Sxy150489 } 12115779Sxy150489 12125779Sxy150489 /* 12135779Sxy150489 * Sync the DMA buffer of the tx descriptor ring 12145779Sxy150489 * 12155779Sxy150489 * Note: For head write-back mode, the tx descriptors will not 12165779Sxy150489 * be written back, but the head write-back value is stored at 12175779Sxy150489 * the last extra tbd at the end of the DMA area, we still need 12185779Sxy150489 * to sync the head write-back value for kernel. 12195779Sxy150489 * 12205779Sxy150489 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 12215779Sxy150489 */ 12225779Sxy150489 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 12235779Sxy150489 sizeof (union e1000_adv_tx_desc) * tx_ring->ring_size, 12245779Sxy150489 sizeof (uint32_t), 12255779Sxy150489 DDI_DMA_SYNC_FORKERNEL); 12265779Sxy150489 12276624Sgl147354 if (igb_check_dma_handle( 12286624Sgl147354 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 12296624Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 12306624Sgl147354 } 12316624Sgl147354 12325779Sxy150489 LINK_LIST_INIT(&pending_list); 12335779Sxy150489 desc_num = 0; 12345779Sxy150489 index = tx_ring->tbd_head; /* Next index to clean */ 12355779Sxy150489 12365779Sxy150489 /* 12375779Sxy150489 * Get the value of head write-back 12385779Sxy150489 */ 12395779Sxy150489 head_wb = *tx_ring->tbd_head_wb; 12405779Sxy150489 while (index != head_wb) { 12415779Sxy150489 tcb = tx_ring->work_list[index]; 12425779Sxy150489 ASSERT(tcb != NULL); 12435779Sxy150489 12445779Sxy150489 if (OFFSET(index, head_wb, tx_ring->ring_size) < 12455779Sxy150489 tcb->desc_num) { 12465779Sxy150489 /* 12475779Sxy150489 * The current tx control block is not 12485779Sxy150489 * completely transmitted, stop recycling 12495779Sxy150489 */ 12505779Sxy150489 break; 12515779Sxy150489 } 12525779Sxy150489 12535779Sxy150489 /* 12545779Sxy150489 * Strip off the tx control block from the work list, 12555779Sxy150489 * and add it to the pending list. 12565779Sxy150489 */ 12575779Sxy150489 tx_ring->work_list[index] = NULL; 12585779Sxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 12595779Sxy150489 12605779Sxy150489 /* 12615779Sxy150489 * Advance the index of the tx descriptor ring 12625779Sxy150489 */ 12635779Sxy150489 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 12645779Sxy150489 12655779Sxy150489 /* 12665779Sxy150489 * Count the total number of the tx descriptors recycled 12675779Sxy150489 */ 12685779Sxy150489 desc_num += tcb->desc_num; 12695779Sxy150489 } 12705779Sxy150489 12715779Sxy150489 /* 12725779Sxy150489 * If no tx descriptors are recycled, no need to do more processing 12735779Sxy150489 */ 12745779Sxy150489 if (desc_num == 0) { 12755779Sxy150489 tx_ring->recycle_fail++; 12765779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 12775779Sxy150489 return (0); 12785779Sxy150489 } 12795779Sxy150489 12805779Sxy150489 tx_ring->recycle_fail = 0; 12815779Sxy150489 tx_ring->stall_watchdog = 0; 12825779Sxy150489 12835779Sxy150489 /* 12845779Sxy150489 * Update the head index of the tx descriptor ring 12855779Sxy150489 */ 12865779Sxy150489 tx_ring->tbd_head = index; 12875779Sxy150489 12885779Sxy150489 /* 12895779Sxy150489 * Update the number of the free tx descriptors with atomic operations 12905779Sxy150489 */ 12915779Sxy150489 atomic_add_32(&tx_ring->tbd_free, desc_num); 12925779Sxy150489 12935779Sxy150489 mutex_exit(&tx_ring->recycle_lock); 12945779Sxy150489 12955779Sxy150489 /* 12965779Sxy150489 * Free the resources used by the tx control blocks 12975779Sxy150489 * in the pending list 12985779Sxy150489 */ 12995779Sxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 13005779Sxy150489 while (tcb) { 13015779Sxy150489 /* 13025779Sxy150489 * Release the resources occupied by the tx control block 13035779Sxy150489 */ 13045779Sxy150489 igb_free_tcb(tcb); 13055779Sxy150489 13065779Sxy150489 tcb = (tx_control_block_t *) 13075779Sxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 13085779Sxy150489 } 13095779Sxy150489 13105779Sxy150489 /* 13115779Sxy150489 * Add the tx control blocks in the pending list to the free list. 13125779Sxy150489 */ 13135779Sxy150489 igb_put_free_list(tx_ring, &pending_list); 13145779Sxy150489 13155779Sxy150489 return (desc_num); 13165779Sxy150489 } 13175779Sxy150489 13185779Sxy150489 /* 13195779Sxy150489 * igb_free_tcb - free up the tx control block 13205779Sxy150489 * 13215779Sxy150489 * Free the resources of the tx control block, including 13225779Sxy150489 * unbind the previously bound DMA handle, and reset other 13235779Sxy150489 * control fields. 13245779Sxy150489 */ 13255779Sxy150489 void 13265779Sxy150489 igb_free_tcb(tx_control_block_t *tcb) 13275779Sxy150489 { 13285779Sxy150489 switch (tcb->tx_type) { 13295779Sxy150489 case USE_COPY: 13305779Sxy150489 /* 13315779Sxy150489 * Reset the buffer length that is used for copy 13325779Sxy150489 */ 13335779Sxy150489 tcb->tx_buf.len = 0; 13345779Sxy150489 break; 13355779Sxy150489 case USE_DMA: 13365779Sxy150489 /* 13375779Sxy150489 * Release the DMA resource that is used for 13385779Sxy150489 * DMA binding. 13395779Sxy150489 */ 13405779Sxy150489 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 13415779Sxy150489 break; 13425779Sxy150489 default: 13435779Sxy150489 break; 13445779Sxy150489 } 13455779Sxy150489 13465779Sxy150489 /* 13475779Sxy150489 * Free the mblk 13485779Sxy150489 */ 13495779Sxy150489 if (tcb->mp != NULL) { 13505779Sxy150489 freemsg(tcb->mp); 13515779Sxy150489 tcb->mp = NULL; 13525779Sxy150489 } 13535779Sxy150489 13545779Sxy150489 tcb->tx_type = USE_NONE; 13555779Sxy150489 tcb->frag_num = 0; 13565779Sxy150489 tcb->desc_num = 0; 13575779Sxy150489 } 13585779Sxy150489 13595779Sxy150489 /* 13605779Sxy150489 * igb_get_free_list - Get a free tx control block from the free list 13615779Sxy150489 * 13625779Sxy150489 * The atomic operation on the number of the available tx control block 13635779Sxy150489 * in the free list is used to keep this routine mutual exclusive with 13645779Sxy150489 * the routine igb_put_check_list. 13655779Sxy150489 */ 13665779Sxy150489 static tx_control_block_t * 13675779Sxy150489 igb_get_free_list(igb_tx_ring_t *tx_ring) 13685779Sxy150489 { 13695779Sxy150489 tx_control_block_t *tcb; 13705779Sxy150489 13715779Sxy150489 /* 13725779Sxy150489 * Check and update the number of the free tx control block 13735779Sxy150489 * in the free list. 13745779Sxy150489 */ 13755779Sxy150489 if (igb_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 13765779Sxy150489 return (NULL); 13775779Sxy150489 13785779Sxy150489 mutex_enter(&tx_ring->tcb_head_lock); 13795779Sxy150489 13805779Sxy150489 tcb = tx_ring->free_list[tx_ring->tcb_head]; 13815779Sxy150489 ASSERT(tcb != NULL); 13825779Sxy150489 tx_ring->free_list[tx_ring->tcb_head] = NULL; 13835779Sxy150489 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 13845779Sxy150489 tx_ring->free_list_size); 13855779Sxy150489 13865779Sxy150489 mutex_exit(&tx_ring->tcb_head_lock); 13875779Sxy150489 13885779Sxy150489 return (tcb); 13895779Sxy150489 } 13905779Sxy150489 13915779Sxy150489 /* 13925779Sxy150489 * igb_put_free_list 13935779Sxy150489 * 13945779Sxy150489 * Put a list of used tx control blocks back to the free list 13955779Sxy150489 * 13965779Sxy150489 * A mutex is used here to ensure the serialization. The mutual exclusion 13975779Sxy150489 * between igb_get_free_list and igb_put_free_list is implemented with 13985779Sxy150489 * the atomic operation on the counter tcb_free. 13995779Sxy150489 */ 14005779Sxy150489 void 14015779Sxy150489 igb_put_free_list(igb_tx_ring_t *tx_ring, link_list_t *pending_list) 14025779Sxy150489 { 14035779Sxy150489 uint32_t index; 14045779Sxy150489 int tcb_num; 14055779Sxy150489 tx_control_block_t *tcb; 14065779Sxy150489 14075779Sxy150489 mutex_enter(&tx_ring->tcb_tail_lock); 14085779Sxy150489 14095779Sxy150489 index = tx_ring->tcb_tail; 14105779Sxy150489 14115779Sxy150489 tcb_num = 0; 14125779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 14135779Sxy150489 while (tcb != NULL) { 14145779Sxy150489 ASSERT(tx_ring->free_list[index] == NULL); 14155779Sxy150489 tx_ring->free_list[index] = tcb; 14165779Sxy150489 14175779Sxy150489 tcb_num++; 14185779Sxy150489 14195779Sxy150489 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 14205779Sxy150489 14215779Sxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 14225779Sxy150489 } 14235779Sxy150489 14245779Sxy150489 tx_ring->tcb_tail = index; 14255779Sxy150489 14265779Sxy150489 /* 14275779Sxy150489 * Update the number of the free tx control block 14285779Sxy150489 * in the free list. This operation must be placed 14295779Sxy150489 * under the protection of the lock. 14305779Sxy150489 */ 14315779Sxy150489 atomic_add_32(&tx_ring->tcb_free, tcb_num); 14325779Sxy150489 14335779Sxy150489 mutex_exit(&tx_ring->tcb_tail_lock); 14345779Sxy150489 } 1435