16621Sbt150084 /* 26621Sbt150084 * CDDL HEADER START 36621Sbt150084 * 49353SSamuel.Tu@Sun.COM * Copyright(c) 2007-2009 Intel Corporation. All rights reserved. 56621Sbt150084 * The contents of this file are subject to the terms of the 66621Sbt150084 * Common Development and Distribution License (the "License"). 76621Sbt150084 * You may not use this file except in compliance with the License. 86621Sbt150084 * 98275SEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 108275SEric Cheng * or http://www.opensolaris.org/os/licensing. 116621Sbt150084 * See the License for the specific language governing permissions 126621Sbt150084 * and limitations under the License. 136621Sbt150084 * 148275SEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 158275SEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 166621Sbt150084 * If applicable, add the following below this CDDL HEADER, with the 176621Sbt150084 * fields enclosed by brackets "[]" replaced with your own identifying 186621Sbt150084 * information: Portions Copyright [yyyy] [name of copyright owner] 196621Sbt150084 * 206621Sbt150084 * CDDL HEADER END 216621Sbt150084 */ 226621Sbt150084 236621Sbt150084 /* 24*11878SVenu.Iyer@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 258275SEric Cheng * Use is subject to license terms. 268275SEric Cheng */ 276621Sbt150084 286621Sbt150084 #include "ixgbe_sw.h" 296621Sbt150084 306621Sbt150084 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 317167Sgg161487 uint32_t, boolean_t); 326621Sbt150084 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 336621Sbt150084 uint32_t); 346621Sbt150084 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 357167Sgg161487 ixgbe_tx_context_t *, size_t); 366621Sbt150084 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 376621Sbt150084 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); 386621Sbt150084 397167Sgg161487 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 407167Sgg161487 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 417167Sgg161487 ixgbe_tx_context_t *); 427167Sgg161487 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 439353SSamuel.Tu@Sun.COM ixgbe_tx_context_t *); 446621Sbt150084 456621Sbt150084 #ifndef IXGBE_DEBUG 466621Sbt150084 #pragma inline(ixgbe_save_desc) 477167Sgg161487 #pragma inline(ixgbe_get_context) 487167Sgg161487 #pragma inline(ixgbe_check_context) 497167Sgg161487 #pragma inline(ixgbe_fill_context) 506621Sbt150084 #endif 516621Sbt150084 526621Sbt150084 /* 538275SEric Cheng * ixgbe_ring_tx 546621Sbt150084 * 558275SEric Cheng * To transmit one mblk through one specified ring. 566621Sbt150084 * 576621Sbt150084 * One mblk can consist of several fragments, each fragment 586621Sbt150084 * will be processed with different methods based on the size. 596621Sbt150084 * For the fragments with size less than the bcopy threshold, 606621Sbt150084 * they will be processed by using bcopy; otherwise, they will 616621Sbt150084 * be processed by using DMA binding. 626621Sbt150084 * 636621Sbt150084 * To process the mblk, a tx control block is got from the 646621Sbt150084 * free list. One tx control block contains one tx buffer, which 656621Sbt150084 * is used to copy mblk fragments' data; and one tx DMA handle, 666621Sbt150084 * which is used to bind a mblk fragment with DMA resource. 676621Sbt150084 * 686621Sbt150084 * Several small mblk fragments can be copied into one tx control 696621Sbt150084 * block's buffer, and then the buffer will be transmitted with 706621Sbt150084 * one tx descriptor. 716621Sbt150084 * 726621Sbt150084 * A large fragment only binds with one tx control block's DMA 736621Sbt150084 * handle, and it can span several tx descriptors for transmitting. 746621Sbt150084 * 756621Sbt150084 * So to transmit a packet (mblk), several tx control blocks can 766621Sbt150084 * be used. After the processing, those tx control blocks will 776621Sbt150084 * be put to the work list. 786621Sbt150084 */ 798275SEric Cheng mblk_t * 808275SEric Cheng ixgbe_ring_tx(void *arg, mblk_t *mp) 816621Sbt150084 { 828275SEric Cheng ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 836621Sbt150084 ixgbe_t *ixgbe = tx_ring->ixgbe; 846621Sbt150084 tx_type_t current_flag, next_flag; 856621Sbt150084 uint32_t current_len, next_len; 866621Sbt150084 uint32_t desc_total; 876621Sbt150084 size_t mbsize; 886621Sbt150084 int desc_num; 896621Sbt150084 boolean_t copy_done, eop; 9010305SPaul.Guo@Sun.COM mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; 916621Sbt150084 tx_control_block_t *tcb; 927167Sgg161487 ixgbe_tx_context_t tx_context, *ctx; 936621Sbt150084 link_list_t pending_list; 948275SEric Cheng uint32_t len, hdr_frag_len, hdr_len; 958275SEric Cheng uint32_t copy_thresh; 9611150SZhen.W@Sun.COM mblk_t *hdr_new_mp = NULL; 9711150SZhen.W@Sun.COM mblk_t *hdr_pre_mp = NULL; 9811150SZhen.W@Sun.COM mblk_t *hdr_nmp = NULL; 998275SEric Cheng 1008275SEric Cheng ASSERT(mp->b_next == NULL); 1018275SEric Cheng 10211233SPaul.Guo@Sun.COM if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) || 10311233SPaul.Guo@Sun.COM (ixgbe->ixgbe_state & IXGBE_ERROR) || 10411233SPaul.Guo@Sun.COM !(ixgbe->ixgbe_state & IXGBE_STARTED)) { 10511233SPaul.Guo@Sun.COM return (mp); 10611233SPaul.Guo@Sun.COM } 10711233SPaul.Guo@Sun.COM 10810376SChenlu.Chen@Sun.COM copy_thresh = ixgbe->tx_copy_thresh; 1096621Sbt150084 1106621Sbt150084 /* Get the mblk size */ 1116621Sbt150084 mbsize = 0; 1126621Sbt150084 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 1138275SEric Cheng mbsize += MBLKL(nmp); 1146621Sbt150084 } 1156621Sbt150084 1167167Sgg161487 if (ixgbe->tx_hcksum_enable) { 1177167Sgg161487 /* 1187167Sgg161487 * Retrieve checksum context information from the mblk 1197167Sgg161487 * that will be used to decide whether/how to fill the 1207167Sgg161487 * context descriptor. 1217167Sgg161487 */ 1227167Sgg161487 ctx = &tx_context; 1237167Sgg161487 if (ixgbe_get_context(mp, ctx) < 0) { 1247167Sgg161487 freemsg(mp); 1258275SEric Cheng return (NULL); 1267167Sgg161487 } 1277167Sgg161487 1287167Sgg161487 /* 1297167Sgg161487 * If the mblk size exceeds the max size ixgbe could 1308275SEric Cheng * process, then discard this mblk, and return NULL. 1317167Sgg161487 */ 1329353SSamuel.Tu@Sun.COM if ((ctx->lso_flag && 1339353SSamuel.Tu@Sun.COM ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 1349353SSamuel.Tu@Sun.COM (!ctx->lso_flag && 1357167Sgg161487 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 1367167Sgg161487 freemsg(mp); 1377167Sgg161487 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 1388275SEric Cheng return (NULL); 1397167Sgg161487 } 1407167Sgg161487 } else { 1417167Sgg161487 ctx = NULL; 1426621Sbt150084 } 1436621Sbt150084 1446621Sbt150084 /* 1456621Sbt150084 * Check and recycle tx descriptors. 1466621Sbt150084 * The recycle threshold here should be selected carefully 1476621Sbt150084 */ 14810376SChenlu.Chen@Sun.COM if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 1496621Sbt150084 tx_ring->tx_recycle(tx_ring); 1509353SSamuel.Tu@Sun.COM } 1516621Sbt150084 1526621Sbt150084 /* 1536621Sbt150084 * After the recycling, if the tbd_free is less than the 1548275SEric Cheng * overload_threshold, assert overload, return mp; 1556621Sbt150084 * and we need to re-schedule the tx again. 1566621Sbt150084 */ 15710376SChenlu.Chen@Sun.COM if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 1586621Sbt150084 tx_ring->reschedule = B_TRUE; 1596621Sbt150084 IXGBE_DEBUG_STAT(tx_ring->stat_overload); 1608275SEric Cheng return (mp); 1616621Sbt150084 } 1626621Sbt150084 1636621Sbt150084 /* 1646621Sbt150084 * The pending_list is a linked list that is used to save 1656621Sbt150084 * the tx control blocks that have packet data processed 1666621Sbt150084 * but have not put the data to the tx descriptor ring. 1676621Sbt150084 * It is used to reduce the lock contention of the tx_lock. 1686621Sbt150084 */ 1696621Sbt150084 LINK_LIST_INIT(&pending_list); 1706621Sbt150084 desc_num = 0; 1716621Sbt150084 desc_total = 0; 1726621Sbt150084 1738275SEric Cheng /* 1748275SEric Cheng * The software should guarantee LSO packet header(MAC+IP+TCP) 1758275SEric Cheng * to be within one descriptor. Here we reallocate and refill the 1768275SEric Cheng * the header if it's physical memory non-contiguous. 1778275SEric Cheng */ 1788275SEric Cheng if ((ctx != NULL) && ctx->lso_flag) { 1798275SEric Cheng /* find the last fragment of the header */ 1808275SEric Cheng len = MBLKL(mp); 1818275SEric Cheng ASSERT(len > 0); 18211150SZhen.W@Sun.COM hdr_nmp = mp; 1838275SEric Cheng hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 1848275SEric Cheng while (len < hdr_len) { 18511150SZhen.W@Sun.COM hdr_pre_mp = hdr_nmp; 18611150SZhen.W@Sun.COM hdr_nmp = hdr_nmp->b_cont; 18711150SZhen.W@Sun.COM len += MBLKL(hdr_nmp); 1888275SEric Cheng } 1898275SEric Cheng /* 1908275SEric Cheng * If the header and the payload are in different mblks, 1918275SEric Cheng * we simply force the header to be copied into pre-allocated 1928275SEric Cheng * page-aligned buffer. 1938275SEric Cheng */ 1948275SEric Cheng if (len == hdr_len) 1958275SEric Cheng goto adjust_threshold; 1968275SEric Cheng 19711150SZhen.W@Sun.COM hdr_frag_len = hdr_len - (len - MBLKL(hdr_nmp)); 1988275SEric Cheng /* 1998275SEric Cheng * There are two cases we need to reallocate a mblk for the 2008275SEric Cheng * last header fragment: 2018275SEric Cheng * 1. the header is in multiple mblks and the last fragment 2028275SEric Cheng * share the same mblk with the payload 2038275SEric Cheng * 2. the header is in a single mblk shared with the payload 2048275SEric Cheng * and the header is physical memory non-contiguous 2058275SEric Cheng */ 20611150SZhen.W@Sun.COM if ((hdr_nmp != mp) || 20711150SZhen.W@Sun.COM (P2NPHASE((uintptr_t)hdr_nmp->b_rptr, ixgbe->sys_page_size) 2089353SSamuel.Tu@Sun.COM < hdr_len)) { 2098275SEric Cheng IXGBE_DEBUG_STAT(tx_ring->stat_lso_header_fail); 2108275SEric Cheng /* 2118275SEric Cheng * reallocate the mblk for the last header fragment, 2128275SEric Cheng * expect to bcopy into pre-allocated page-aligned 2138275SEric Cheng * buffer 2148275SEric Cheng */ 21511150SZhen.W@Sun.COM hdr_new_mp = allocb(hdr_frag_len, NULL); 21611150SZhen.W@Sun.COM if (!hdr_new_mp) 2179353SSamuel.Tu@Sun.COM return (mp); 21811150SZhen.W@Sun.COM bcopy(hdr_nmp->b_rptr, hdr_new_mp->b_rptr, 21911150SZhen.W@Sun.COM hdr_frag_len); 2208275SEric Cheng /* link the new header fragment with the other parts */ 22111150SZhen.W@Sun.COM hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; 22211150SZhen.W@Sun.COM hdr_new_mp->b_cont = hdr_nmp; 22311150SZhen.W@Sun.COM if (hdr_pre_mp) 22411150SZhen.W@Sun.COM hdr_pre_mp->b_cont = hdr_new_mp; 22511150SZhen.W@Sun.COM else 22611150SZhen.W@Sun.COM mp = hdr_new_mp; 22711150SZhen.W@Sun.COM hdr_nmp->b_rptr += hdr_frag_len; 2288275SEric Cheng } 2298275SEric Cheng adjust_threshold: 2308275SEric Cheng /* 2318275SEric Cheng * adjust the bcopy threshhold to guarantee 2328275SEric Cheng * the header to use bcopy way 2338275SEric Cheng */ 2348275SEric Cheng if (copy_thresh < hdr_len) 2358275SEric Cheng copy_thresh = hdr_len; 2368275SEric Cheng } 2378275SEric Cheng 2386621Sbt150084 current_mp = mp; 2398275SEric Cheng current_len = MBLKL(current_mp); 2406621Sbt150084 /* 2416621Sbt150084 * Decide which method to use for the first fragment 2426621Sbt150084 */ 2438275SEric Cheng current_flag = (current_len <= copy_thresh) ? 2446621Sbt150084 USE_COPY : USE_DMA; 2456621Sbt150084 /* 2466621Sbt150084 * If the mblk includes several contiguous small fragments, 2476621Sbt150084 * they may be copied into one buffer. This flag is used to 2486621Sbt150084 * indicate whether there are pending fragments that need to 2496621Sbt150084 * be copied to the current tx buffer. 2506621Sbt150084 * 2516621Sbt150084 * If this flag is B_TRUE, it indicates that a new tx control 2526621Sbt150084 * block is needed to process the next fragment using either 2536621Sbt150084 * copy or DMA binding. 2546621Sbt150084 * 2556621Sbt150084 * Otherwise, it indicates that the next fragment will be 2566621Sbt150084 * copied to the current tx buffer that is maintained by the 2576621Sbt150084 * current tx control block. No new tx control block is needed. 2586621Sbt150084 */ 2596621Sbt150084 copy_done = B_TRUE; 2606621Sbt150084 while (current_mp) { 2616621Sbt150084 next_mp = current_mp->b_cont; 2626621Sbt150084 eop = (next_mp == NULL); /* Last fragment of the packet? */ 2638275SEric Cheng next_len = eop ? 0: MBLKL(next_mp); 2646621Sbt150084 2656621Sbt150084 /* 2666621Sbt150084 * When the current fragment is an empty fragment, if 2676621Sbt150084 * the next fragment will still be copied to the current 2686621Sbt150084 * tx buffer, we cannot skip this fragment here. Because 2696621Sbt150084 * the copy processing is pending for completion. We have 2706621Sbt150084 * to process this empty fragment in the tx_copy routine. 2716621Sbt150084 * 2726621Sbt150084 * If the copy processing is completed or a DMA binding 2736621Sbt150084 * processing is just completed, we can just skip this 2746621Sbt150084 * empty fragment. 2756621Sbt150084 */ 2766621Sbt150084 if ((current_len == 0) && (copy_done)) { 2776621Sbt150084 current_mp = next_mp; 2786621Sbt150084 current_len = next_len; 2798275SEric Cheng current_flag = (current_len <= copy_thresh) ? 2806621Sbt150084 USE_COPY : USE_DMA; 2816621Sbt150084 continue; 2826621Sbt150084 } 2836621Sbt150084 2846621Sbt150084 if (copy_done) { 2856621Sbt150084 /* 2866621Sbt150084 * Get a new tx control block from the free list 2876621Sbt150084 */ 2886621Sbt150084 tcb = ixgbe_get_free_list(tx_ring); 2896621Sbt150084 2906621Sbt150084 if (tcb == NULL) { 2916621Sbt150084 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 2926621Sbt150084 goto tx_failure; 2936621Sbt150084 } 2946621Sbt150084 2956621Sbt150084 /* 2966621Sbt150084 * Push the tx control block to the pending list 2976621Sbt150084 * to avoid using lock too early 2986621Sbt150084 */ 2996621Sbt150084 LIST_PUSH_TAIL(&pending_list, &tcb->link); 3006621Sbt150084 } 3016621Sbt150084 3026621Sbt150084 if (current_flag == USE_COPY) { 3036621Sbt150084 /* 3046621Sbt150084 * Check whether to use bcopy or DMA binding to process 3056621Sbt150084 * the next fragment, and if using bcopy, whether we 3066621Sbt150084 * need to continue copying the next fragment into the 3076621Sbt150084 * current tx buffer. 3086621Sbt150084 */ 3096621Sbt150084 ASSERT((tcb->tx_buf.len + current_len) <= 3106621Sbt150084 tcb->tx_buf.size); 3116621Sbt150084 3126621Sbt150084 if (eop) { 3136621Sbt150084 /* 3146621Sbt150084 * This is the last fragment of the packet, so 3156621Sbt150084 * the copy processing will be completed with 3166621Sbt150084 * this fragment. 3176621Sbt150084 */ 3186621Sbt150084 next_flag = USE_NONE; 3196621Sbt150084 copy_done = B_TRUE; 3206621Sbt150084 } else if ((tcb->tx_buf.len + current_len + next_len) > 3216621Sbt150084 tcb->tx_buf.size) { 3226621Sbt150084 /* 3236621Sbt150084 * If the next fragment is too large to be 3246621Sbt150084 * copied to the current tx buffer, we need 3256621Sbt150084 * to complete the current copy processing. 3266621Sbt150084 */ 3278275SEric Cheng next_flag = (next_len > copy_thresh) ? 3286621Sbt150084 USE_DMA: USE_COPY; 3296621Sbt150084 copy_done = B_TRUE; 3308275SEric Cheng } else if (next_len > copy_thresh) { 3316621Sbt150084 /* 3326621Sbt150084 * The next fragment needs to be processed with 3336621Sbt150084 * DMA binding. So the copy prcessing will be 3346621Sbt150084 * completed with the current fragment. 3356621Sbt150084 */ 3366621Sbt150084 next_flag = USE_DMA; 3376621Sbt150084 copy_done = B_TRUE; 3386621Sbt150084 } else { 3396621Sbt150084 /* 3406621Sbt150084 * Continue to copy the next fragment to the 3416621Sbt150084 * current tx buffer. 3426621Sbt150084 */ 3436621Sbt150084 next_flag = USE_COPY; 3446621Sbt150084 copy_done = B_FALSE; 3456621Sbt150084 } 3466621Sbt150084 3476621Sbt150084 desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, 3487167Sgg161487 current_len, copy_done); 3496621Sbt150084 } else { 3506621Sbt150084 /* 3516621Sbt150084 * Check whether to use bcopy or DMA binding to process 3526621Sbt150084 * the next fragment. 3536621Sbt150084 */ 3548275SEric Cheng next_flag = (next_len > copy_thresh) ? 3556621Sbt150084 USE_DMA: USE_COPY; 3566621Sbt150084 ASSERT(copy_done == B_TRUE); 3576621Sbt150084 3586621Sbt150084 desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, 3596621Sbt150084 current_len); 3606621Sbt150084 } 3616621Sbt150084 3626621Sbt150084 if (desc_num > 0) 3636621Sbt150084 desc_total += desc_num; 3646621Sbt150084 else if (desc_num < 0) 3656621Sbt150084 goto tx_failure; 3666621Sbt150084 3676621Sbt150084 current_mp = next_mp; 3686621Sbt150084 current_len = next_len; 3696621Sbt150084 current_flag = next_flag; 3706621Sbt150084 } 3716621Sbt150084 3726621Sbt150084 /* 3736621Sbt150084 * Attach the mblk to the last tx control block 3746621Sbt150084 */ 3756621Sbt150084 ASSERT(tcb); 3766621Sbt150084 ASSERT(tcb->mp == NULL); 3776621Sbt150084 tcb->mp = mp; 3786621Sbt150084 3796621Sbt150084 /* 3809681SPaul.Guo@Sun.COM * 82598/82599 chipset has a limitation that no more than 32 tx 3819681SPaul.Guo@Sun.COM * descriptors can be transmited out at one time. 3829681SPaul.Guo@Sun.COM * 3839681SPaul.Guo@Sun.COM * Here is a workaround for it: pull up the mblk then send it 3849681SPaul.Guo@Sun.COM * out with bind way. By doing so, no more than MAX_COOKIE (18) 3859681SPaul.Guo@Sun.COM * descriptors is needed. 3869681SPaul.Guo@Sun.COM */ 3879681SPaul.Guo@Sun.COM if (desc_total + 1 > IXGBE_TX_DESC_LIMIT) { 3889681SPaul.Guo@Sun.COM IXGBE_DEBUG_STAT(tx_ring->stat_break_tbd_limit); 3899681SPaul.Guo@Sun.COM 3909681SPaul.Guo@Sun.COM /* 3919681SPaul.Guo@Sun.COM * Discard the mblk and free the used resources 3929681SPaul.Guo@Sun.COM */ 3939681SPaul.Guo@Sun.COM tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 3949681SPaul.Guo@Sun.COM while (tcb) { 3959681SPaul.Guo@Sun.COM tcb->mp = NULL; 3969681SPaul.Guo@Sun.COM ixgbe_free_tcb(tcb); 3979681SPaul.Guo@Sun.COM tcb = (tx_control_block_t *) 3989681SPaul.Guo@Sun.COM LIST_GET_NEXT(&pending_list, &tcb->link); 3999681SPaul.Guo@Sun.COM } 4009681SPaul.Guo@Sun.COM 4019681SPaul.Guo@Sun.COM /* 4029681SPaul.Guo@Sun.COM * Return the tx control blocks in the pending list to 4039681SPaul.Guo@Sun.COM * the free list. 4049681SPaul.Guo@Sun.COM */ 4059681SPaul.Guo@Sun.COM ixgbe_put_free_list(tx_ring, &pending_list); 4069681SPaul.Guo@Sun.COM 4079681SPaul.Guo@Sun.COM /* 4089681SPaul.Guo@Sun.COM * pull up the mblk and send it out with bind way 4099681SPaul.Guo@Sun.COM */ 41010305SPaul.Guo@Sun.COM if ((pull_mp = msgpullup(mp, -1)) == NULL) { 41110305SPaul.Guo@Sun.COM tx_ring->reschedule = B_TRUE; 41211150SZhen.W@Sun.COM 41311150SZhen.W@Sun.COM /* 41411150SZhen.W@Sun.COM * If new mblk has been allocted for the last header 41511150SZhen.W@Sun.COM * fragment of a LSO packet, we should restore the 41611150SZhen.W@Sun.COM * modified mp. 41711150SZhen.W@Sun.COM */ 41811150SZhen.W@Sun.COM if (hdr_new_mp) { 41911150SZhen.W@Sun.COM hdr_new_mp->b_cont = NULL; 42011150SZhen.W@Sun.COM freeb(hdr_new_mp); 42111150SZhen.W@Sun.COM hdr_nmp->b_rptr -= hdr_frag_len; 42211150SZhen.W@Sun.COM if (hdr_pre_mp) 42311150SZhen.W@Sun.COM hdr_pre_mp->b_cont = hdr_nmp; 42411150SZhen.W@Sun.COM else 42511150SZhen.W@Sun.COM mp = hdr_nmp; 42611150SZhen.W@Sun.COM } 42710305SPaul.Guo@Sun.COM return (mp); 4289681SPaul.Guo@Sun.COM } 4299681SPaul.Guo@Sun.COM 4309681SPaul.Guo@Sun.COM LINK_LIST_INIT(&pending_list); 43110305SPaul.Guo@Sun.COM desc_total = 0; 43210305SPaul.Guo@Sun.COM 43310305SPaul.Guo@Sun.COM /* 43410305SPaul.Guo@Sun.COM * if the packet is a LSO packet, we simply 43510305SPaul.Guo@Sun.COM * transmit the header in one descriptor using the copy way 43610305SPaul.Guo@Sun.COM */ 43710305SPaul.Guo@Sun.COM if ((ctx != NULL) && ctx->lso_flag) { 43810305SPaul.Guo@Sun.COM hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + 43910305SPaul.Guo@Sun.COM ctx->l4_hdr_len; 44010305SPaul.Guo@Sun.COM 44110305SPaul.Guo@Sun.COM tcb = ixgbe_get_free_list(tx_ring); 44210305SPaul.Guo@Sun.COM if (tcb == NULL) { 44310305SPaul.Guo@Sun.COM IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 44410305SPaul.Guo@Sun.COM goto tx_failure; 44510305SPaul.Guo@Sun.COM } 44610305SPaul.Guo@Sun.COM desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, 44710305SPaul.Guo@Sun.COM hdr_len, B_TRUE); 44810305SPaul.Guo@Sun.COM LIST_PUSH_TAIL(&pending_list, &tcb->link); 44910305SPaul.Guo@Sun.COM desc_total += desc_num; 45010305SPaul.Guo@Sun.COM 45110305SPaul.Guo@Sun.COM pull_mp->b_rptr += hdr_len; 45210305SPaul.Guo@Sun.COM } 45310305SPaul.Guo@Sun.COM 4549681SPaul.Guo@Sun.COM tcb = ixgbe_get_free_list(tx_ring); 4559681SPaul.Guo@Sun.COM if (tcb == NULL) { 4569681SPaul.Guo@Sun.COM IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 45710305SPaul.Guo@Sun.COM goto tx_failure; 45810305SPaul.Guo@Sun.COM } 45910305SPaul.Guo@Sun.COM if ((ctx != NULL) && ctx->lso_flag) { 46010305SPaul.Guo@Sun.COM desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 46110305SPaul.Guo@Sun.COM mbsize - hdr_len); 46210305SPaul.Guo@Sun.COM } else { 46310305SPaul.Guo@Sun.COM desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 46410305SPaul.Guo@Sun.COM mbsize); 46510305SPaul.Guo@Sun.COM } 46610305SPaul.Guo@Sun.COM if (desc_num < 0) { 46710305SPaul.Guo@Sun.COM goto tx_failure; 4689681SPaul.Guo@Sun.COM } 4699681SPaul.Guo@Sun.COM LIST_PUSH_TAIL(&pending_list, &tcb->link); 4709681SPaul.Guo@Sun.COM 47110305SPaul.Guo@Sun.COM desc_total += desc_num; 47210305SPaul.Guo@Sun.COM tcb->mp = pull_mp; 4739681SPaul.Guo@Sun.COM } 4749681SPaul.Guo@Sun.COM 4759681SPaul.Guo@Sun.COM /* 4766621Sbt150084 * Before fill the tx descriptor ring with the data, we need to 4776621Sbt150084 * ensure there are adequate free descriptors for transmit 4786621Sbt150084 * (including one context descriptor). 4796621Sbt150084 */ 4806621Sbt150084 if (tx_ring->tbd_free < (desc_total + 1)) { 4816621Sbt150084 tx_ring->tx_recycle(tx_ring); 4826621Sbt150084 } 4836621Sbt150084 4846621Sbt150084 mutex_enter(&tx_ring->tx_lock); 4856621Sbt150084 /* 4866621Sbt150084 * If the number of free tx descriptors is not enough for transmit 4878275SEric Cheng * then return mp. 4886621Sbt150084 * 4896621Sbt150084 * Note: we must put this check under the mutex protection to 4906621Sbt150084 * ensure the correctness when multiple threads access it in 4916621Sbt150084 * parallel. 4926621Sbt150084 */ 4936621Sbt150084 if (tx_ring->tbd_free < (desc_total + 1)) { 4946621Sbt150084 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 4956621Sbt150084 mutex_exit(&tx_ring->tx_lock); 4966621Sbt150084 goto tx_failure; 4976621Sbt150084 } 4986621Sbt150084 4997167Sgg161487 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 5007167Sgg161487 mbsize); 5016621Sbt150084 5026621Sbt150084 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 5036621Sbt150084 504*11878SVenu.Iyer@Sun.COM tx_ring->stat_obytes += mbsize; 505*11878SVenu.Iyer@Sun.COM tx_ring->stat_opackets ++; 506*11878SVenu.Iyer@Sun.COM 5076621Sbt150084 mutex_exit(&tx_ring->tx_lock); 5086621Sbt150084 50910305SPaul.Guo@Sun.COM /* 51010305SPaul.Guo@Sun.COM * now that the transmission succeeds, need to free the original 51110305SPaul.Guo@Sun.COM * mp if we used the pulling up mblk for transmission. 51210305SPaul.Guo@Sun.COM */ 51310305SPaul.Guo@Sun.COM if (pull_mp) { 51410305SPaul.Guo@Sun.COM freemsg(mp); 51510305SPaul.Guo@Sun.COM } 51610305SPaul.Guo@Sun.COM 5178275SEric Cheng return (NULL); 5186621Sbt150084 5196621Sbt150084 tx_failure: 5206621Sbt150084 /* 52110305SPaul.Guo@Sun.COM * If transmission fails, need to free the pulling up mblk. 52210305SPaul.Guo@Sun.COM */ 52310305SPaul.Guo@Sun.COM if (pull_mp) { 52410305SPaul.Guo@Sun.COM freemsg(pull_mp); 52510305SPaul.Guo@Sun.COM } 52610305SPaul.Guo@Sun.COM 52710305SPaul.Guo@Sun.COM /* 52811150SZhen.W@Sun.COM * If new mblk has been allocted for the last header 52911150SZhen.W@Sun.COM * fragment of a LSO packet, we should restore the 53011150SZhen.W@Sun.COM * modified mp. 53111150SZhen.W@Sun.COM */ 53211150SZhen.W@Sun.COM if (hdr_new_mp) { 53311150SZhen.W@Sun.COM hdr_new_mp->b_cont = NULL; 53411150SZhen.W@Sun.COM freeb(hdr_new_mp); 53511150SZhen.W@Sun.COM hdr_nmp->b_rptr -= hdr_frag_len; 53611150SZhen.W@Sun.COM if (hdr_pre_mp) 53711150SZhen.W@Sun.COM hdr_pre_mp->b_cont = hdr_nmp; 53811150SZhen.W@Sun.COM else 53911150SZhen.W@Sun.COM mp = hdr_nmp; 54011150SZhen.W@Sun.COM } 54111150SZhen.W@Sun.COM /* 5426621Sbt150084 * Discard the mblk and free the used resources 5436621Sbt150084 */ 5446621Sbt150084 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 5456621Sbt150084 while (tcb) { 5466621Sbt150084 tcb->mp = NULL; 5476621Sbt150084 5486621Sbt150084 ixgbe_free_tcb(tcb); 5496621Sbt150084 5506621Sbt150084 tcb = (tx_control_block_t *) 5516621Sbt150084 LIST_GET_NEXT(&pending_list, &tcb->link); 5526621Sbt150084 } 5536621Sbt150084 5546621Sbt150084 /* 5556621Sbt150084 * Return the tx control blocks in the pending list to the free list. 5566621Sbt150084 */ 5576621Sbt150084 ixgbe_put_free_list(tx_ring, &pending_list); 5586621Sbt150084 5596621Sbt150084 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 5606621Sbt150084 tx_ring->reschedule = B_TRUE; 5616621Sbt150084 5628275SEric Cheng return (mp); 5636621Sbt150084 } 5646621Sbt150084 5656621Sbt150084 /* 5666621Sbt150084 * ixgbe_tx_copy 5676621Sbt150084 * 5686621Sbt150084 * Copy the mblk fragment to the pre-allocated tx buffer 5696621Sbt150084 */ 5706621Sbt150084 static int 5716621Sbt150084 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 5727167Sgg161487 uint32_t len, boolean_t copy_done) 5736621Sbt150084 { 5746621Sbt150084 dma_buffer_t *tx_buf; 5756621Sbt150084 uint32_t desc_num; 5766621Sbt150084 _NOTE(ARGUNUSED(tx_ring)); 5776621Sbt150084 5786621Sbt150084 tx_buf = &tcb->tx_buf; 5796621Sbt150084 5806621Sbt150084 /* 5816621Sbt150084 * Copy the packet data of the mblk fragment into the 5826621Sbt150084 * pre-allocated tx buffer, which is maintained by the 5836621Sbt150084 * tx control block. 5846621Sbt150084 * 5856621Sbt150084 * Several mblk fragments can be copied into one tx buffer. 5866621Sbt150084 * The destination address of the current copied fragment in 5876621Sbt150084 * the tx buffer is next to the end of the previous copied 5886621Sbt150084 * fragment. 5896621Sbt150084 */ 5906621Sbt150084 if (len > 0) { 5916621Sbt150084 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 5926621Sbt150084 5936621Sbt150084 tx_buf->len += len; 5946621Sbt150084 tcb->frag_num++; 5956621Sbt150084 } 5966621Sbt150084 5976621Sbt150084 desc_num = 0; 5986621Sbt150084 5996621Sbt150084 /* 6006621Sbt150084 * If it is the last fragment copied to the current tx buffer, 6016621Sbt150084 * in other words, if there's no remaining fragment or the remaining 6026621Sbt150084 * fragment requires a new tx control block to process, we need to 6036621Sbt150084 * complete the current copy processing by syncing up the current 6046621Sbt150084 * DMA buffer and saving the descriptor data. 6056621Sbt150084 */ 6066621Sbt150084 if (copy_done) { 6076621Sbt150084 /* 6086621Sbt150084 * Sync the DMA buffer of the packet data 6096621Sbt150084 */ 6106621Sbt150084 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 6116621Sbt150084 6126621Sbt150084 tcb->tx_type = USE_COPY; 6136621Sbt150084 6146621Sbt150084 /* 6156621Sbt150084 * Save the address and length to the private data structure 6166621Sbt150084 * of the tx control block, which will be used to fill the 6176621Sbt150084 * tx descriptor ring after all the fragments are processed. 6186621Sbt150084 */ 6196621Sbt150084 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 6206621Sbt150084 desc_num++; 6216621Sbt150084 } 6226621Sbt150084 6236621Sbt150084 return (desc_num); 6246621Sbt150084 } 6256621Sbt150084 6266621Sbt150084 /* 6276621Sbt150084 * ixgbe_tx_bind 6286621Sbt150084 * 6296621Sbt150084 * Bind the mblk fragment with DMA 6306621Sbt150084 */ 6316621Sbt150084 static int 6326621Sbt150084 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 6336621Sbt150084 uint32_t len) 6346621Sbt150084 { 6356621Sbt150084 int status, i; 6366621Sbt150084 ddi_dma_cookie_t dma_cookie; 6376621Sbt150084 uint_t ncookies; 6386621Sbt150084 int desc_num; 6396621Sbt150084 6406621Sbt150084 /* 6416621Sbt150084 * Use DMA binding to process the mblk fragment 6426621Sbt150084 */ 6436621Sbt150084 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 6446621Sbt150084 (caddr_t)mp->b_rptr, len, 6456621Sbt150084 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 6466621Sbt150084 0, &dma_cookie, &ncookies); 6476621Sbt150084 6486621Sbt150084 if (status != DDI_DMA_MAPPED) { 6496621Sbt150084 IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 6506621Sbt150084 return (-1); 6516621Sbt150084 } 6526621Sbt150084 6536621Sbt150084 tcb->frag_num++; 6546621Sbt150084 tcb->tx_type = USE_DMA; 6556621Sbt150084 /* 6566621Sbt150084 * Each fragment can span several cookies. One cookie will have 6576621Sbt150084 * one tx descriptor to transmit. 6586621Sbt150084 */ 6596621Sbt150084 desc_num = 0; 6606621Sbt150084 for (i = ncookies; i > 0; i--) { 6616621Sbt150084 /* 6626621Sbt150084 * Save the address and length to the private data structure 6636621Sbt150084 * of the tx control block, which will be used to fill the 6646621Sbt150084 * tx descriptor ring after all the fragments are processed. 6656621Sbt150084 */ 6666621Sbt150084 ixgbe_save_desc(tcb, 6676621Sbt150084 dma_cookie.dmac_laddress, 6686621Sbt150084 dma_cookie.dmac_size); 6696621Sbt150084 6706621Sbt150084 desc_num++; 6716621Sbt150084 6726621Sbt150084 if (i > 1) 6736621Sbt150084 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 6746621Sbt150084 } 6756621Sbt150084 6766621Sbt150084 return (desc_num); 6776621Sbt150084 } 6786621Sbt150084 6796621Sbt150084 /* 6807167Sgg161487 * ixgbe_get_context 6816621Sbt150084 * 6827167Sgg161487 * Get the context information from the mblk 6836621Sbt150084 */ 6847167Sgg161487 static int 6857167Sgg161487 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 6866621Sbt150084 { 6876621Sbt150084 uint32_t start; 6888275SEric Cheng uint32_t hckflags; 6898275SEric Cheng uint32_t lsoflags; 6908275SEric Cheng uint32_t mss; 6916621Sbt150084 uint32_t len; 6926621Sbt150084 uint32_t size; 6936621Sbt150084 uint32_t offset; 6946621Sbt150084 unsigned char *pos; 6956621Sbt150084 ushort_t etype; 6966621Sbt150084 uint32_t mac_hdr_len; 6976621Sbt150084 uint32_t l4_proto; 6987167Sgg161487 uint32_t l4_hdr_len; 6996621Sbt150084 7006621Sbt150084 ASSERT(mp != NULL); 7016621Sbt150084 702*11878SVenu.Iyer@Sun.COM mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); 7037167Sgg161487 bzero(ctx, sizeof (ixgbe_tx_context_t)); 7046621Sbt150084 7059353SSamuel.Tu@Sun.COM if (hckflags == 0) { 7067167Sgg161487 return (0); 7079353SSamuel.Tu@Sun.COM } 7089353SSamuel.Tu@Sun.COM 7098275SEric Cheng ctx->hcksum_flags = hckflags; 7107167Sgg161487 711*11878SVenu.Iyer@Sun.COM mac_lso_get(mp, &mss, &lsoflags); 7128275SEric Cheng ctx->mss = mss; 7138275SEric Cheng ctx->lso_flag = (lsoflags == HW_LSO); 7147167Sgg161487 7157167Sgg161487 /* 7167167Sgg161487 * LSO relies on tx h/w checksum, so here will drop the package 7177167Sgg161487 * if h/w checksum flag is not declared. 7187167Sgg161487 */ 7197167Sgg161487 if (ctx->lso_flag) { 7207167Sgg161487 if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 7217167Sgg161487 (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 7227167Sgg161487 IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " 7237167Sgg161487 "checksum flags are not specified when doing LSO"); 7247167Sgg161487 return (-1); 7257167Sgg161487 } 7267167Sgg161487 } 7276621Sbt150084 7286621Sbt150084 etype = 0; 7296621Sbt150084 mac_hdr_len = 0; 7306621Sbt150084 l4_proto = 0; 7316621Sbt150084 7326621Sbt150084 /* 7336621Sbt150084 * Firstly get the position of the ether_type/ether_tpid. 7346621Sbt150084 * Here we don't assume the ether (VLAN) header is fully included 7356621Sbt150084 * in one mblk fragment, so we go thourgh the fragments to parse 7366621Sbt150084 * the ether type. 7376621Sbt150084 */ 7388275SEric Cheng size = len = MBLKL(mp); 7396621Sbt150084 offset = offsetof(struct ether_header, ether_type); 7406621Sbt150084 while (size <= offset) { 7416621Sbt150084 mp = mp->b_cont; 7426621Sbt150084 ASSERT(mp != NULL); 7438275SEric Cheng len = MBLKL(mp); 7446621Sbt150084 size += len; 7456621Sbt150084 } 7466621Sbt150084 pos = mp->b_rptr + offset + len - size; 7476621Sbt150084 7486621Sbt150084 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 7496621Sbt150084 if (etype == ETHERTYPE_VLAN) { 7506621Sbt150084 /* 7516621Sbt150084 * Get the position of the ether_type in VLAN header 7526621Sbt150084 */ 7536621Sbt150084 offset = offsetof(struct ether_vlan_header, ether_type); 7546621Sbt150084 while (size <= offset) { 7556621Sbt150084 mp = mp->b_cont; 7566621Sbt150084 ASSERT(mp != NULL); 7578275SEric Cheng len = MBLKL(mp); 7586621Sbt150084 size += len; 7596621Sbt150084 } 7606621Sbt150084 pos = mp->b_rptr + offset + len - size; 7616621Sbt150084 7626621Sbt150084 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 7636621Sbt150084 mac_hdr_len = sizeof (struct ether_vlan_header); 7646621Sbt150084 } else { 7656621Sbt150084 mac_hdr_len = sizeof (struct ether_header); 7666621Sbt150084 } 7676621Sbt150084 7686621Sbt150084 /* 7698275SEric Cheng * Here we don't assume the IP(V6) header is fully included in 7707167Sgg161487 * one mblk fragment. 7716621Sbt150084 */ 7726621Sbt150084 switch (etype) { 7736621Sbt150084 case ETHERTYPE_IP: 7748275SEric Cheng if (ctx->lso_flag) { 7758275SEric Cheng offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 7768275SEric Cheng while (size <= offset) { 7778275SEric Cheng mp = mp->b_cont; 7788275SEric Cheng ASSERT(mp != NULL); 7798275SEric Cheng len = MBLKL(mp); 7808275SEric Cheng size += len; 7818275SEric Cheng } 7828275SEric Cheng pos = mp->b_rptr + offset + len - size; 7838275SEric Cheng *((uint16_t *)(uintptr_t)(pos)) = 0; 7846621Sbt150084 7858275SEric Cheng offset = offsetof(ipha_t, ipha_hdr_checksum) + 7868275SEric Cheng mac_hdr_len; 7878275SEric Cheng while (size <= offset) { 7888275SEric Cheng mp = mp->b_cont; 7898275SEric Cheng ASSERT(mp != NULL); 7908275SEric Cheng len = MBLKL(mp); 7918275SEric Cheng size += len; 7928275SEric Cheng } 7938275SEric Cheng pos = mp->b_rptr + offset + len - size; 7948275SEric Cheng *((uint16_t *)(uintptr_t)(pos)) = 0; 7957167Sgg161487 7967167Sgg161487 /* 7977167Sgg161487 * To perform ixgbe LSO, here also need to fill 7987167Sgg161487 * the tcp checksum field of the packet with the 7997167Sgg161487 * following pseudo-header checksum: 8007167Sgg161487 * (ip_source_addr, ip_destination_addr, l4_proto) 8017167Sgg161487 * Currently the tcp/ip stack has done it. 8027167Sgg161487 */ 8037167Sgg161487 } 8047167Sgg161487 8058275SEric Cheng offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 8068275SEric Cheng while (size <= offset) { 8078275SEric Cheng mp = mp->b_cont; 8088275SEric Cheng ASSERT(mp != NULL); 8098275SEric Cheng len = MBLKL(mp); 8108275SEric Cheng size += len; 8118275SEric Cheng } 8128275SEric Cheng pos = mp->b_rptr + offset + len - size; 8138275SEric Cheng 8148275SEric Cheng l4_proto = *(uint8_t *)pos; 8156621Sbt150084 break; 8166621Sbt150084 case ETHERTYPE_IPV6: 8176621Sbt150084 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 8186621Sbt150084 while (size <= offset) { 8196621Sbt150084 mp = mp->b_cont; 8206621Sbt150084 ASSERT(mp != NULL); 8218275SEric Cheng len = MBLKL(mp); 8226621Sbt150084 size += len; 8236621Sbt150084 } 8246621Sbt150084 pos = mp->b_rptr + offset + len - size; 8256621Sbt150084 8266621Sbt150084 l4_proto = *(uint8_t *)pos; 8276621Sbt150084 break; 8286621Sbt150084 default: 8296621Sbt150084 /* Unrecoverable error */ 8306621Sbt150084 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 8317167Sgg161487 return (-2); 8326621Sbt150084 } 8336621Sbt150084 8347167Sgg161487 if (ctx->lso_flag) { 8357167Sgg161487 offset = mac_hdr_len + start; 8367167Sgg161487 while (size <= offset) { 8377167Sgg161487 mp = mp->b_cont; 8387167Sgg161487 ASSERT(mp != NULL); 8398275SEric Cheng len = MBLKL(mp); 8407167Sgg161487 size += len; 8417167Sgg161487 } 8427167Sgg161487 pos = mp->b_rptr + offset + len - size; 8437167Sgg161487 8447167Sgg161487 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 8457167Sgg161487 } else { 8467167Sgg161487 /* 8477167Sgg161487 * l4 header length is only required for LSO 8487167Sgg161487 */ 8497167Sgg161487 l4_hdr_len = 0; 8507167Sgg161487 } 8517167Sgg161487 8527167Sgg161487 ctx->mac_hdr_len = mac_hdr_len; 8537167Sgg161487 ctx->ip_hdr_len = start; 8547167Sgg161487 ctx->l4_proto = l4_proto; 8557167Sgg161487 ctx->l4_hdr_len = l4_hdr_len; 8567167Sgg161487 8577167Sgg161487 return (0); 8586621Sbt150084 } 8596621Sbt150084 8606621Sbt150084 /* 8617167Sgg161487 * ixgbe_check_context 8626621Sbt150084 * 8636621Sbt150084 * Check if a new context descriptor is needed 8646621Sbt150084 */ 8656621Sbt150084 static boolean_t 8667167Sgg161487 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 8676621Sbt150084 { 8687167Sgg161487 ixgbe_tx_context_t *last; 8696621Sbt150084 8707167Sgg161487 if (ctx == NULL) 8716621Sbt150084 return (B_FALSE); 8726621Sbt150084 8736621Sbt150084 /* 8748275SEric Cheng * Compare the context data retrieved from the mblk and the 8758275SEric Cheng * stored data of the last context descriptor. The data need 8768275SEric Cheng * to be checked are: 8776621Sbt150084 * hcksum_flags 8786621Sbt150084 * l4_proto 8796621Sbt150084 * mac_hdr_len 8806621Sbt150084 * ip_hdr_len 8818275SEric Cheng * lso_flag 8827167Sgg161487 * mss (only checked for LSO) 8837167Sgg161487 * l4_hr_len (only checked for LSO) 8846621Sbt150084 * Either one of the above data is changed, a new context descriptor 8856621Sbt150084 * will be needed. 8866621Sbt150084 */ 8877167Sgg161487 last = &tx_ring->tx_context; 8886621Sbt150084 8898275SEric Cheng if ((ctx->hcksum_flags != last->hcksum_flags) || 8908275SEric Cheng (ctx->l4_proto != last->l4_proto) || 8918275SEric Cheng (ctx->mac_hdr_len != last->mac_hdr_len) || 8928275SEric Cheng (ctx->ip_hdr_len != last->ip_hdr_len) || 8938275SEric Cheng (ctx->lso_flag != last->lso_flag) || 8948275SEric Cheng (ctx->lso_flag && ((ctx->mss != last->mss) || 8958275SEric Cheng (ctx->l4_hdr_len != last->l4_hdr_len)))) { 8968275SEric Cheng return (B_TRUE); 8976621Sbt150084 } 8986621Sbt150084 8996621Sbt150084 return (B_FALSE); 9006621Sbt150084 } 9016621Sbt150084 9026621Sbt150084 /* 9037167Sgg161487 * ixgbe_fill_context 9046621Sbt150084 * 9056621Sbt150084 * Fill the context descriptor with hardware checksum informations 9066621Sbt150084 */ 9076621Sbt150084 static void 9087167Sgg161487 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 9099353SSamuel.Tu@Sun.COM ixgbe_tx_context_t *ctx) 9106621Sbt150084 { 9116621Sbt150084 /* 9126621Sbt150084 * Fill the context descriptor with the checksum 9138275SEric Cheng * context information we've got. 9146621Sbt150084 */ 9157167Sgg161487 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 9167167Sgg161487 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 9176621Sbt150084 IXGBE_ADVTXD_MACLEN_SHIFT; 9186621Sbt150084 9196621Sbt150084 ctx_tbd->type_tucmd_mlhl = 9206621Sbt150084 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 9216621Sbt150084 9227167Sgg161487 if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 9236621Sbt150084 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 9246621Sbt150084 9257167Sgg161487 if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 9267167Sgg161487 switch (ctx->l4_proto) { 9276621Sbt150084 case IPPROTO_TCP: 9286621Sbt150084 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 9296621Sbt150084 break; 9306621Sbt150084 case IPPROTO_UDP: 9316621Sbt150084 /* 9326621Sbt150084 * We don't have to explicitly set: 9336621Sbt150084 * ctx_tbd->type_tucmd_mlhl |= 9346621Sbt150084 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 9356621Sbt150084 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 9366621Sbt150084 */ 9376621Sbt150084 break; 9386621Sbt150084 default: 9396621Sbt150084 /* Unrecoverable error */ 9406621Sbt150084 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 9416621Sbt150084 break; 9426621Sbt150084 } 9436621Sbt150084 } 9446621Sbt150084 9456621Sbt150084 ctx_tbd->seqnum_seed = 0; 9468275SEric Cheng 9477167Sgg161487 if (ctx->lso_flag) { 9489353SSamuel.Tu@Sun.COM ctx_tbd->mss_l4len_idx = 9497167Sgg161487 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 9507167Sgg161487 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 9519353SSamuel.Tu@Sun.COM } else { 9529353SSamuel.Tu@Sun.COM ctx_tbd->mss_l4len_idx = 0; 9537167Sgg161487 } 9546621Sbt150084 } 9556621Sbt150084 9566621Sbt150084 /* 9576621Sbt150084 * ixgbe_tx_fill_ring 9586621Sbt150084 * 9596621Sbt150084 * Fill the tx descriptor ring with the data 9606621Sbt150084 */ 9616621Sbt150084 static int 9626621Sbt150084 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 9637167Sgg161487 ixgbe_tx_context_t *ctx, size_t mbsize) 9646621Sbt150084 { 9656621Sbt150084 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 9666621Sbt150084 boolean_t load_context; 9676621Sbt150084 uint32_t index, tcb_index, desc_num; 9686621Sbt150084 union ixgbe_adv_tx_desc *tbd, *first_tbd; 9696621Sbt150084 tx_control_block_t *tcb, *first_tcb; 9706621Sbt150084 uint32_t hcksum_flags; 9716621Sbt150084 int i; 9726621Sbt150084 9736621Sbt150084 ASSERT(mutex_owned(&tx_ring->tx_lock)); 9746621Sbt150084 9756621Sbt150084 tbd = NULL; 9766621Sbt150084 first_tbd = NULL; 9776621Sbt150084 first_tcb = NULL; 9786621Sbt150084 desc_num = 0; 9796621Sbt150084 hcksum_flags = 0; 9806621Sbt150084 load_context = B_FALSE; 9816621Sbt150084 9826621Sbt150084 /* 9836621Sbt150084 * Get the index of the first tx descriptor that will be filled, 9846621Sbt150084 * and the index of the first work list item that will be attached 9856621Sbt150084 * with the first used tx control block in the pending list. 9866621Sbt150084 * Note: the two indexes are the same. 9876621Sbt150084 */ 9886621Sbt150084 index = tx_ring->tbd_tail; 9896621Sbt150084 tcb_index = tx_ring->tbd_tail; 9906621Sbt150084 9917167Sgg161487 if (ctx != NULL) { 9927167Sgg161487 hcksum_flags = ctx->hcksum_flags; 9936621Sbt150084 9946621Sbt150084 /* 9956621Sbt150084 * Check if a new context descriptor is needed for this packet 9966621Sbt150084 */ 9977167Sgg161487 load_context = ixgbe_check_context(tx_ring, ctx); 9987167Sgg161487 9996621Sbt150084 if (load_context) { 10006621Sbt150084 tbd = &tx_ring->tbd_ring[index]; 10016621Sbt150084 10026621Sbt150084 /* 10036621Sbt150084 * Fill the context descriptor with the 10046621Sbt150084 * hardware checksum offload informations. 10056621Sbt150084 */ 10067167Sgg161487 ixgbe_fill_context( 10079353SSamuel.Tu@Sun.COM (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 10086621Sbt150084 10096621Sbt150084 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 10106621Sbt150084 desc_num++; 10116621Sbt150084 10126621Sbt150084 /* 10136621Sbt150084 * Store the checksum context data if 10146621Sbt150084 * a new context descriptor is added 10156621Sbt150084 */ 10167167Sgg161487 tx_ring->tx_context = *ctx; 10176621Sbt150084 } 10186621Sbt150084 } 10196621Sbt150084 10206621Sbt150084 first_tbd = &tx_ring->tbd_ring[index]; 10216621Sbt150084 10226621Sbt150084 /* 10236621Sbt150084 * Fill tx data descriptors with the data saved in the pending list. 10246621Sbt150084 * The tx control blocks in the pending list are added to the work list 10256621Sbt150084 * at the same time. 10266621Sbt150084 * 10276621Sbt150084 * The work list is strictly 1:1 corresponding to the descriptor ring. 10286621Sbt150084 * One item of the work list corresponds to one tx descriptor. Because 10296621Sbt150084 * one tx control block can span multiple tx descriptors, the tx 10306621Sbt150084 * control block will be added to the first work list item that 10316621Sbt150084 * corresponds to the first tx descriptor generated from that tx 10326621Sbt150084 * control block. 10336621Sbt150084 */ 10346621Sbt150084 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 10359681SPaul.Guo@Sun.COM first_tcb = tcb; 10366621Sbt150084 while (tcb != NULL) { 10376621Sbt150084 10386621Sbt150084 for (i = 0; i < tcb->desc_num; i++) { 10396621Sbt150084 tbd = &tx_ring->tbd_ring[index]; 10406621Sbt150084 10416621Sbt150084 tbd->read.buffer_addr = tcb->desc[i].address; 10426621Sbt150084 tbd->read.cmd_type_len = tcb->desc[i].length; 10436621Sbt150084 10449681SPaul.Guo@Sun.COM tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 10459681SPaul.Guo@Sun.COM | IXGBE_ADVTXD_DTYP_DATA; 10466621Sbt150084 10476621Sbt150084 tbd->read.olinfo_status = 0; 10486621Sbt150084 10496621Sbt150084 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 10506621Sbt150084 desc_num++; 10516621Sbt150084 } 10526621Sbt150084 10536621Sbt150084 /* 10546621Sbt150084 * Add the tx control block to the work list 10556621Sbt150084 */ 10566621Sbt150084 ASSERT(tx_ring->work_list[tcb_index] == NULL); 10576621Sbt150084 tx_ring->work_list[tcb_index] = tcb; 10586621Sbt150084 10596621Sbt150084 tcb_index = index; 10606621Sbt150084 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 10616621Sbt150084 } 10626621Sbt150084 10639681SPaul.Guo@Sun.COM if (load_context) { 10649681SPaul.Guo@Sun.COM /* 10659681SPaul.Guo@Sun.COM * Count the context descriptor for 10669681SPaul.Guo@Sun.COM * the first tx control block. 10679681SPaul.Guo@Sun.COM */ 10689681SPaul.Guo@Sun.COM first_tcb->desc_num++; 10699681SPaul.Guo@Sun.COM } 10709681SPaul.Guo@Sun.COM first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 10719681SPaul.Guo@Sun.COM 10726621Sbt150084 /* 10736621Sbt150084 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 10746621Sbt150084 * valid in the first descriptor of the packet. 10759353SSamuel.Tu@Sun.COM * Setting paylen in every first_tbd for all parts. 10769353SSamuel.Tu@Sun.COM * 82599 requires the packet length in paylen field with or without 10779353SSamuel.Tu@Sun.COM * LSO and 82598 will ignore it in non-LSO mode. 10786621Sbt150084 */ 10796621Sbt150084 ASSERT(first_tbd != NULL); 10806621Sbt150084 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 10818275SEric Cheng 10829353SSamuel.Tu@Sun.COM switch (hw->mac.type) { 10839353SSamuel.Tu@Sun.COM case ixgbe_mac_82599EB: 10849353SSamuel.Tu@Sun.COM if (ctx != NULL && ctx->lso_flag) { 10859353SSamuel.Tu@Sun.COM first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 10869353SSamuel.Tu@Sun.COM first_tbd->read.olinfo_status |= 10879353SSamuel.Tu@Sun.COM (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 10889353SSamuel.Tu@Sun.COM - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 10899353SSamuel.Tu@Sun.COM } else { 10909353SSamuel.Tu@Sun.COM first_tbd->read.olinfo_status |= 10919353SSamuel.Tu@Sun.COM (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 10929353SSamuel.Tu@Sun.COM } 10939353SSamuel.Tu@Sun.COM break; 10949353SSamuel.Tu@Sun.COM case ixgbe_mac_82598EB: 10959353SSamuel.Tu@Sun.COM if (ctx != NULL && ctx->lso_flag) { 10969353SSamuel.Tu@Sun.COM first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 10979353SSamuel.Tu@Sun.COM first_tbd->read.olinfo_status |= 10989353SSamuel.Tu@Sun.COM (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 10999353SSamuel.Tu@Sun.COM - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 11009353SSamuel.Tu@Sun.COM } 11019353SSamuel.Tu@Sun.COM break; 11029353SSamuel.Tu@Sun.COM default: 11039353SSamuel.Tu@Sun.COM break; 11047167Sgg161487 } 11057167Sgg161487 11066621Sbt150084 /* Set hardware checksum bits */ 11076621Sbt150084 if (hcksum_flags != 0) { 11086621Sbt150084 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 11096621Sbt150084 first_tbd->read.olinfo_status |= 11107167Sgg161487 IXGBE_ADVTXD_POPTS_IXSM; 11116621Sbt150084 if (hcksum_flags & HCK_PARTIALCKSUM) 11126621Sbt150084 first_tbd->read.olinfo_status |= 11137167Sgg161487 IXGBE_ADVTXD_POPTS_TXSM; 11146621Sbt150084 } 11156621Sbt150084 11166621Sbt150084 /* 11176621Sbt150084 * The last descriptor of packet needs End Of Packet (EOP), 11186621Sbt150084 * and Report Status (RS) bits set 11196621Sbt150084 */ 11206621Sbt150084 ASSERT(tbd != NULL); 11216621Sbt150084 tbd->read.cmd_type_len |= 11226621Sbt150084 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 11236621Sbt150084 11246621Sbt150084 /* 11256621Sbt150084 * Sync the DMA buffer of the tx descriptor ring 11266621Sbt150084 */ 11276621Sbt150084 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 11286621Sbt150084 11296621Sbt150084 /* 11306621Sbt150084 * Update the number of the free tx descriptors. 11316621Sbt150084 * The mutual exclusion between the transmission and the recycling 11326621Sbt150084 * (for the tx descriptor ring and the work list) is implemented 11336621Sbt150084 * with the atomic operation on the number of the free tx descriptors. 11346621Sbt150084 * 11356621Sbt150084 * Note: we should always decrement the counter tbd_free before 11366621Sbt150084 * advancing the hardware TDT pointer to avoid the race condition - 11376621Sbt150084 * before the counter tbd_free is decremented, the transmit of the 11386621Sbt150084 * tx descriptors has done and the counter tbd_free is increased by 11396621Sbt150084 * the tx recycling. 11406621Sbt150084 */ 11416621Sbt150084 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 11426621Sbt150084 ASSERT(i >= 0); 11436621Sbt150084 11446621Sbt150084 tx_ring->tbd_tail = index; 11456621Sbt150084 11466621Sbt150084 /* 11476621Sbt150084 * Advance the hardware TDT pointer of the tx descriptor ring 11486621Sbt150084 */ 11496621Sbt150084 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 11506621Sbt150084 11516621Sbt150084 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 11526621Sbt150084 DDI_FM_OK) { 11536621Sbt150084 ddi_fm_service_impact(tx_ring->ixgbe->dip, 11546621Sbt150084 DDI_SERVICE_DEGRADED); 115511233SPaul.Guo@Sun.COM atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR); 11566621Sbt150084 } 11576621Sbt150084 11586621Sbt150084 return (desc_num); 11596621Sbt150084 } 11606621Sbt150084 11616621Sbt150084 /* 11626621Sbt150084 * ixgbe_save_desc 11636621Sbt150084 * 11646621Sbt150084 * Save the address/length pair to the private array 11656621Sbt150084 * of the tx control block. The address/length pairs 11666621Sbt150084 * will be filled into the tx descriptor ring later. 11676621Sbt150084 */ 11686621Sbt150084 static void 11696621Sbt150084 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 11706621Sbt150084 { 11716621Sbt150084 sw_desc_t *desc; 11726621Sbt150084 11736621Sbt150084 desc = &tcb->desc[tcb->desc_num]; 11746621Sbt150084 desc->address = address; 11756621Sbt150084 desc->length = length; 11766621Sbt150084 11776621Sbt150084 tcb->desc_num++; 11786621Sbt150084 } 11796621Sbt150084 11806621Sbt150084 /* 11816621Sbt150084 * ixgbe_tx_recycle_legacy 11826621Sbt150084 * 11836621Sbt150084 * Recycle the tx descriptors and tx control blocks. 11846621Sbt150084 * 11856621Sbt150084 * The work list is traversed to check if the corresponding 11866621Sbt150084 * tx descriptors have been transmitted. If so, the resources 11876621Sbt150084 * bound to the tx control blocks will be freed, and those 11886621Sbt150084 * tx control blocks will be returned to the free list. 11896621Sbt150084 */ 11906621Sbt150084 uint32_t 11916621Sbt150084 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 11926621Sbt150084 { 11939681SPaul.Guo@Sun.COM uint32_t index, last_index, prev_index; 11946621Sbt150084 int desc_num; 11956621Sbt150084 boolean_t desc_done; 11966621Sbt150084 tx_control_block_t *tcb; 11976621Sbt150084 link_list_t pending_list; 119810376SChenlu.Chen@Sun.COM ixgbe_t *ixgbe = tx_ring->ixgbe; 11996621Sbt150084 12008275SEric Cheng mutex_enter(&tx_ring->recycle_lock); 12016621Sbt150084 12026621Sbt150084 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 12036621Sbt150084 12046621Sbt150084 if (tx_ring->tbd_free == tx_ring->ring_size) { 12056621Sbt150084 tx_ring->recycle_fail = 0; 12066621Sbt150084 tx_ring->stall_watchdog = 0; 12078275SEric Cheng if (tx_ring->reschedule) { 12088275SEric Cheng tx_ring->reschedule = B_FALSE; 120910376SChenlu.Chen@Sun.COM mac_tx_ring_update(ixgbe->mac_hdl, 12108275SEric Cheng tx_ring->ring_handle); 12118275SEric Cheng } 12126621Sbt150084 mutex_exit(&tx_ring->recycle_lock); 12136621Sbt150084 return (0); 12146621Sbt150084 } 12156621Sbt150084 12166621Sbt150084 /* 12176621Sbt150084 * Sync the DMA buffer of the tx descriptor ring 12186621Sbt150084 */ 12196621Sbt150084 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 12206621Sbt150084 12216621Sbt150084 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 122210376SChenlu.Chen@Sun.COM ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 122311233SPaul.Guo@Sun.COM atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 122411233SPaul.Guo@Sun.COM return (0); 12256621Sbt150084 } 12266621Sbt150084 12276621Sbt150084 LINK_LIST_INIT(&pending_list); 12286621Sbt150084 desc_num = 0; 12296621Sbt150084 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 12306621Sbt150084 12316621Sbt150084 tcb = tx_ring->work_list[index]; 12326621Sbt150084 ASSERT(tcb != NULL); 12336621Sbt150084 12349681SPaul.Guo@Sun.COM while (tcb != NULL) { 12356621Sbt150084 /* 12369681SPaul.Guo@Sun.COM * Get the last tx descriptor of this packet. 12379681SPaul.Guo@Sun.COM * If the last tx descriptor is done, then 12389681SPaul.Guo@Sun.COM * we can recycle all descriptors of a packet 12399681SPaul.Guo@Sun.COM * which usually includes several tx control blocks. 12409681SPaul.Guo@Sun.COM * For 82599, LSO descriptors can not be recycled 12419681SPaul.Guo@Sun.COM * unless the whole packet's transmission is done. 12429681SPaul.Guo@Sun.COM * That's why packet level recycling is used here. 12439681SPaul.Guo@Sun.COM * For 82598, there's not such limit. 12446621Sbt150084 */ 12459681SPaul.Guo@Sun.COM last_index = tcb->last_index; 12469681SPaul.Guo@Sun.COM /* 12479681SPaul.Guo@Sun.COM * MAX_TX_RING_SIZE is used to judge whether 12489681SPaul.Guo@Sun.COM * the index is a valid value or not. 12499681SPaul.Guo@Sun.COM */ 12509681SPaul.Guo@Sun.COM if (last_index == MAX_TX_RING_SIZE) 12519681SPaul.Guo@Sun.COM break; 12526621Sbt150084 12536621Sbt150084 /* 12546621Sbt150084 * Check if the Descriptor Done bit is set 12556621Sbt150084 */ 12566621Sbt150084 desc_done = tx_ring->tbd_ring[last_index].wb.status & 12576621Sbt150084 IXGBE_TXD_STAT_DD; 12586621Sbt150084 if (desc_done) { 12596621Sbt150084 /* 12609681SPaul.Guo@Sun.COM * recycle all descriptors of the packet 12616621Sbt150084 */ 12629681SPaul.Guo@Sun.COM while (tcb != NULL) { 12639681SPaul.Guo@Sun.COM /* 12649681SPaul.Guo@Sun.COM * Strip off the tx control block from 12659681SPaul.Guo@Sun.COM * the work list, and add it to the 12669681SPaul.Guo@Sun.COM * pending list. 12679681SPaul.Guo@Sun.COM */ 12689681SPaul.Guo@Sun.COM tx_ring->work_list[index] = NULL; 12699681SPaul.Guo@Sun.COM LIST_PUSH_TAIL(&pending_list, &tcb->link); 12706621Sbt150084 12719681SPaul.Guo@Sun.COM /* 12729681SPaul.Guo@Sun.COM * Count the total number of the tx 12739681SPaul.Guo@Sun.COM * descriptors recycled 12749681SPaul.Guo@Sun.COM */ 12759681SPaul.Guo@Sun.COM desc_num += tcb->desc_num; 12769681SPaul.Guo@Sun.COM 12779681SPaul.Guo@Sun.COM index = NEXT_INDEX(index, tcb->desc_num, 12789681SPaul.Guo@Sun.COM tx_ring->ring_size); 12796621Sbt150084 12809681SPaul.Guo@Sun.COM tcb = tx_ring->work_list[index]; 12819681SPaul.Guo@Sun.COM 12829681SPaul.Guo@Sun.COM prev_index = PREV_INDEX(index, 1, 12839681SPaul.Guo@Sun.COM tx_ring->ring_size); 12849681SPaul.Guo@Sun.COM if (prev_index == last_index) 12859681SPaul.Guo@Sun.COM break; 12869681SPaul.Guo@Sun.COM } 12879681SPaul.Guo@Sun.COM } else { 12889681SPaul.Guo@Sun.COM break; 12896621Sbt150084 } 12906621Sbt150084 } 12916621Sbt150084 12926621Sbt150084 /* 12936621Sbt150084 * If no tx descriptors are recycled, no need to do more processing 12946621Sbt150084 */ 12956621Sbt150084 if (desc_num == 0) { 12966621Sbt150084 tx_ring->recycle_fail++; 12976621Sbt150084 mutex_exit(&tx_ring->recycle_lock); 12986621Sbt150084 return (0); 12996621Sbt150084 } 13006621Sbt150084 13016621Sbt150084 tx_ring->recycle_fail = 0; 13026621Sbt150084 tx_ring->stall_watchdog = 0; 13036621Sbt150084 13046621Sbt150084 /* 13056621Sbt150084 * Update the head index of the tx descriptor ring 13066621Sbt150084 */ 13076621Sbt150084 tx_ring->tbd_head = index; 13086621Sbt150084 13096621Sbt150084 /* 13106621Sbt150084 * Update the number of the free tx descriptors with atomic operations 13116621Sbt150084 */ 13126621Sbt150084 atomic_add_32(&tx_ring->tbd_free, desc_num); 13136621Sbt150084 131410376SChenlu.Chen@Sun.COM if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 13158275SEric Cheng (tx_ring->reschedule)) { 13168275SEric Cheng tx_ring->reschedule = B_FALSE; 131710376SChenlu.Chen@Sun.COM mac_tx_ring_update(ixgbe->mac_hdl, 13188275SEric Cheng tx_ring->ring_handle); 13198275SEric Cheng } 13206621Sbt150084 mutex_exit(&tx_ring->recycle_lock); 13216621Sbt150084 13226621Sbt150084 /* 13236621Sbt150084 * Free the resources used by the tx control blocks 13246621Sbt150084 * in the pending list 13256621Sbt150084 */ 13266621Sbt150084 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 13276621Sbt150084 while (tcb != NULL) { 13286621Sbt150084 /* 13296621Sbt150084 * Release the resources occupied by the tx control block 13306621Sbt150084 */ 13316621Sbt150084 ixgbe_free_tcb(tcb); 13326621Sbt150084 13336621Sbt150084 tcb = (tx_control_block_t *) 13346621Sbt150084 LIST_GET_NEXT(&pending_list, &tcb->link); 13356621Sbt150084 } 13366621Sbt150084 13376621Sbt150084 /* 13386621Sbt150084 * Add the tx control blocks in the pending list to the free list. 13396621Sbt150084 */ 13406621Sbt150084 ixgbe_put_free_list(tx_ring, &pending_list); 13416621Sbt150084 13426621Sbt150084 return (desc_num); 13436621Sbt150084 } 13446621Sbt150084 13456621Sbt150084 /* 13466621Sbt150084 * ixgbe_tx_recycle_head_wb 13476621Sbt150084 * 13486621Sbt150084 * Check the head write-back, and recycle all the transmitted 13496621Sbt150084 * tx descriptors and tx control blocks. 13506621Sbt150084 */ 13516621Sbt150084 uint32_t 13526621Sbt150084 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 13536621Sbt150084 { 13546621Sbt150084 uint32_t index; 13556621Sbt150084 uint32_t head_wb; 13566621Sbt150084 int desc_num; 13576621Sbt150084 tx_control_block_t *tcb; 13586621Sbt150084 link_list_t pending_list; 135910376SChenlu.Chen@Sun.COM ixgbe_t *ixgbe = tx_ring->ixgbe; 13606621Sbt150084 13618275SEric Cheng mutex_enter(&tx_ring->recycle_lock); 13626621Sbt150084 13636621Sbt150084 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 13646621Sbt150084 13656621Sbt150084 if (tx_ring->tbd_free == tx_ring->ring_size) { 13666621Sbt150084 tx_ring->recycle_fail = 0; 13676621Sbt150084 tx_ring->stall_watchdog = 0; 13688275SEric Cheng if (tx_ring->reschedule) { 13698275SEric Cheng tx_ring->reschedule = B_FALSE; 137010376SChenlu.Chen@Sun.COM mac_tx_ring_update(ixgbe->mac_hdl, 13718275SEric Cheng tx_ring->ring_handle); 13728275SEric Cheng } 13736621Sbt150084 mutex_exit(&tx_ring->recycle_lock); 13746621Sbt150084 return (0); 13756621Sbt150084 } 13766621Sbt150084 13776621Sbt150084 /* 13786621Sbt150084 * Sync the DMA buffer of the tx descriptor ring 13796621Sbt150084 * 13806621Sbt150084 * Note: For head write-back mode, the tx descriptors will not 13816621Sbt150084 * be written back, but the head write-back value is stored at 13826621Sbt150084 * the last extra tbd at the end of the DMA area, we still need 13836621Sbt150084 * to sync the head write-back value for kernel. 13846621Sbt150084 * 13856621Sbt150084 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 13866621Sbt150084 */ 13876621Sbt150084 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 13886621Sbt150084 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 13896621Sbt150084 sizeof (uint32_t), 13906621Sbt150084 DDI_DMA_SYNC_FORKERNEL); 13916621Sbt150084 13926621Sbt150084 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 139310376SChenlu.Chen@Sun.COM ddi_fm_service_impact(ixgbe->dip, 13946621Sbt150084 DDI_SERVICE_DEGRADED); 139511233SPaul.Guo@Sun.COM atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 139611233SPaul.Guo@Sun.COM return (0); 13976621Sbt150084 } 13986621Sbt150084 13996621Sbt150084 LINK_LIST_INIT(&pending_list); 14006621Sbt150084 desc_num = 0; 14016621Sbt150084 index = tx_ring->tbd_head; /* Next index to clean */ 14026621Sbt150084 14036621Sbt150084 /* 14046621Sbt150084 * Get the value of head write-back 14056621Sbt150084 */ 14066621Sbt150084 head_wb = *tx_ring->tbd_head_wb; 14076621Sbt150084 while (index != head_wb) { 14086621Sbt150084 tcb = tx_ring->work_list[index]; 14096621Sbt150084 ASSERT(tcb != NULL); 14106621Sbt150084 14116621Sbt150084 if (OFFSET(index, head_wb, tx_ring->ring_size) < 14126621Sbt150084 tcb->desc_num) { 14136621Sbt150084 /* 14146621Sbt150084 * The current tx control block is not 14156621Sbt150084 * completely transmitted, stop recycling 14166621Sbt150084 */ 14176621Sbt150084 break; 14186621Sbt150084 } 14196621Sbt150084 14206621Sbt150084 /* 14216621Sbt150084 * Strip off the tx control block from the work list, 14226621Sbt150084 * and add it to the pending list. 14236621Sbt150084 */ 14246621Sbt150084 tx_ring->work_list[index] = NULL; 14256621Sbt150084 LIST_PUSH_TAIL(&pending_list, &tcb->link); 14266621Sbt150084 14276621Sbt150084 /* 14286621Sbt150084 * Advance the index of the tx descriptor ring 14296621Sbt150084 */ 14306621Sbt150084 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 14316621Sbt150084 14326621Sbt150084 /* 14336621Sbt150084 * Count the total number of the tx descriptors recycled 14346621Sbt150084 */ 14356621Sbt150084 desc_num += tcb->desc_num; 14366621Sbt150084 } 14376621Sbt150084 14386621Sbt150084 /* 14396621Sbt150084 * If no tx descriptors are recycled, no need to do more processing 14406621Sbt150084 */ 14416621Sbt150084 if (desc_num == 0) { 14426621Sbt150084 tx_ring->recycle_fail++; 14436621Sbt150084 mutex_exit(&tx_ring->recycle_lock); 14446621Sbt150084 return (0); 14456621Sbt150084 } 14466621Sbt150084 14476621Sbt150084 tx_ring->recycle_fail = 0; 14486621Sbt150084 tx_ring->stall_watchdog = 0; 14496621Sbt150084 14506621Sbt150084 /* 14516621Sbt150084 * Update the head index of the tx descriptor ring 14526621Sbt150084 */ 14536621Sbt150084 tx_ring->tbd_head = index; 14546621Sbt150084 14556621Sbt150084 /* 14566621Sbt150084 * Update the number of the free tx descriptors with atomic operations 14576621Sbt150084 */ 14586621Sbt150084 atomic_add_32(&tx_ring->tbd_free, desc_num); 14596621Sbt150084 146010376SChenlu.Chen@Sun.COM if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 14618275SEric Cheng (tx_ring->reschedule)) { 14628275SEric Cheng tx_ring->reschedule = B_FALSE; 146310376SChenlu.Chen@Sun.COM mac_tx_ring_update(ixgbe->mac_hdl, 14648275SEric Cheng tx_ring->ring_handle); 14658275SEric Cheng } 14666621Sbt150084 mutex_exit(&tx_ring->recycle_lock); 14676621Sbt150084 14686621Sbt150084 /* 14696621Sbt150084 * Free the resources used by the tx control blocks 14706621Sbt150084 * in the pending list 14716621Sbt150084 */ 14726621Sbt150084 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 14736621Sbt150084 while (tcb) { 14746621Sbt150084 /* 14756621Sbt150084 * Release the resources occupied by the tx control block 14766621Sbt150084 */ 14776621Sbt150084 ixgbe_free_tcb(tcb); 14786621Sbt150084 14796621Sbt150084 tcb = (tx_control_block_t *) 14806621Sbt150084 LIST_GET_NEXT(&pending_list, &tcb->link); 14816621Sbt150084 } 14826621Sbt150084 14836621Sbt150084 /* 14846621Sbt150084 * Add the tx control blocks in the pending list to the free list. 14856621Sbt150084 */ 14866621Sbt150084 ixgbe_put_free_list(tx_ring, &pending_list); 14876621Sbt150084 14886621Sbt150084 return (desc_num); 14896621Sbt150084 } 14906621Sbt150084 14916621Sbt150084 /* 14926621Sbt150084 * ixgbe_free_tcb - free up the tx control block 14936621Sbt150084 * 14946621Sbt150084 * Free the resources of the tx control block, including 14956621Sbt150084 * unbind the previously bound DMA handle, and reset other 14966621Sbt150084 * control fields. 14976621Sbt150084 */ 14986621Sbt150084 void 14996621Sbt150084 ixgbe_free_tcb(tx_control_block_t *tcb) 15006621Sbt150084 { 15016621Sbt150084 switch (tcb->tx_type) { 15026621Sbt150084 case USE_COPY: 15036621Sbt150084 /* 15046621Sbt150084 * Reset the buffer length that is used for copy 15056621Sbt150084 */ 15066621Sbt150084 tcb->tx_buf.len = 0; 15076621Sbt150084 break; 15086621Sbt150084 case USE_DMA: 15096621Sbt150084 /* 15106621Sbt150084 * Release the DMA resource that is used for 15116621Sbt150084 * DMA binding. 15126621Sbt150084 */ 15136621Sbt150084 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 15146621Sbt150084 break; 15156621Sbt150084 default: 15166621Sbt150084 break; 15176621Sbt150084 } 15186621Sbt150084 15196621Sbt150084 /* 15206621Sbt150084 * Free the mblk 15216621Sbt150084 */ 15226621Sbt150084 if (tcb->mp != NULL) { 15236621Sbt150084 freemsg(tcb->mp); 15246621Sbt150084 tcb->mp = NULL; 15256621Sbt150084 } 15266621Sbt150084 15276621Sbt150084 tcb->tx_type = USE_NONE; 15289681SPaul.Guo@Sun.COM tcb->last_index = MAX_TX_RING_SIZE; 15296621Sbt150084 tcb->frag_num = 0; 15306621Sbt150084 tcb->desc_num = 0; 15316621Sbt150084 } 15326621Sbt150084 15336621Sbt150084 /* 15346621Sbt150084 * ixgbe_get_free_list - Get a free tx control block from the free list 15356621Sbt150084 * 15366621Sbt150084 * The atomic operation on the number of the available tx control block 15376621Sbt150084 * in the free list is used to keep this routine mutual exclusive with 15386621Sbt150084 * the routine ixgbe_put_check_list. 15396621Sbt150084 */ 15406621Sbt150084 static tx_control_block_t * 15416621Sbt150084 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) 15426621Sbt150084 { 15436621Sbt150084 tx_control_block_t *tcb; 15446621Sbt150084 15456621Sbt150084 /* 15466621Sbt150084 * Check and update the number of the free tx control block 15476621Sbt150084 * in the free list. 15486621Sbt150084 */ 15496621Sbt150084 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 15506621Sbt150084 return (NULL); 15516621Sbt150084 15526621Sbt150084 mutex_enter(&tx_ring->tcb_head_lock); 15536621Sbt150084 15546621Sbt150084 tcb = tx_ring->free_list[tx_ring->tcb_head]; 15556621Sbt150084 ASSERT(tcb != NULL); 15566621Sbt150084 tx_ring->free_list[tx_ring->tcb_head] = NULL; 15576621Sbt150084 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 15586621Sbt150084 tx_ring->free_list_size); 15596621Sbt150084 15606621Sbt150084 mutex_exit(&tx_ring->tcb_head_lock); 15616621Sbt150084 15626621Sbt150084 return (tcb); 15636621Sbt150084 } 15646621Sbt150084 15656621Sbt150084 /* 15666621Sbt150084 * ixgbe_put_free_list 15676621Sbt150084 * 15686621Sbt150084 * Put a list of used tx control blocks back to the free list 15696621Sbt150084 * 15706621Sbt150084 * A mutex is used here to ensure the serialization. The mutual exclusion 15716621Sbt150084 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 15726621Sbt150084 * the atomic operation on the counter tcb_free. 15736621Sbt150084 */ 15746621Sbt150084 void 15756621Sbt150084 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 15766621Sbt150084 { 15776621Sbt150084 uint32_t index; 15786621Sbt150084 int tcb_num; 15796621Sbt150084 tx_control_block_t *tcb; 15806621Sbt150084 15816621Sbt150084 mutex_enter(&tx_ring->tcb_tail_lock); 15826621Sbt150084 15836621Sbt150084 index = tx_ring->tcb_tail; 15846621Sbt150084 15856621Sbt150084 tcb_num = 0; 15866621Sbt150084 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 15876621Sbt150084 while (tcb != NULL) { 15886621Sbt150084 ASSERT(tx_ring->free_list[index] == NULL); 15896621Sbt150084 tx_ring->free_list[index] = tcb; 15906621Sbt150084 15916621Sbt150084 tcb_num++; 15926621Sbt150084 15936621Sbt150084 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 15946621Sbt150084 15956621Sbt150084 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 15966621Sbt150084 } 15976621Sbt150084 15986621Sbt150084 tx_ring->tcb_tail = index; 15996621Sbt150084 16006621Sbt150084 /* 16016621Sbt150084 * Update the number of the free tx control block 16026621Sbt150084 * in the free list. This operation must be placed 16036621Sbt150084 * under the protection of the lock. 16046621Sbt150084 */ 16056621Sbt150084 atomic_add_32(&tx_ring->tcb_free, tcb_num); 16066621Sbt150084 16076621Sbt150084 mutex_exit(&tx_ring->tcb_tail_lock); 16086621Sbt150084 } 1609