xref: /onnv-gate/usr/src/uts/common/io/igb/igb_rx.c (revision 11367:f4bcaf08946c)
15779Sxy150489 /*
25779Sxy150489  * CDDL HEADER START
35779Sxy150489  *
48571SChenlu.Chen@Sun.COM  * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
55779Sxy150489  * The contents of this file are subject to the terms of the
65779Sxy150489  * Common Development and Distribution License (the "License").
75779Sxy150489  * You may not use this file except in compliance with the License.
85779Sxy150489  *
98571SChenlu.Chen@Sun.COM  * You can obtain a copy of the license at:
108571SChenlu.Chen@Sun.COM  *	http://www.opensolaris.org/os/licensing.
115779Sxy150489  * See the License for the specific language governing permissions
125779Sxy150489  * and limitations under the License.
135779Sxy150489  *
148571SChenlu.Chen@Sun.COM  * When using or redistributing this file, you may do so under the
158571SChenlu.Chen@Sun.COM  * License only. No other modification of this header is permitted.
168571SChenlu.Chen@Sun.COM  *
175779Sxy150489  * If applicable, add the following below this CDDL HEADER, with the
185779Sxy150489  * fields enclosed by brackets "[]" replaced with your own identifying
195779Sxy150489  * information: Portions Copyright [yyyy] [name of copyright owner]
205779Sxy150489  *
215779Sxy150489  * CDDL HEADER END
225779Sxy150489  */
235779Sxy150489 
245779Sxy150489 /*
258571SChenlu.Chen@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
268571SChenlu.Chen@Sun.COM  * Use is subject to license terms of the CDDL.
278275SEric Cheng  */
285779Sxy150489 
295779Sxy150489 #include "igb_sw.h"
305779Sxy150489 
315779Sxy150489 /* function prototypes */
325779Sxy150489 static mblk_t *igb_rx_bind(igb_rx_ring_t *, uint32_t, uint32_t);
335779Sxy150489 static mblk_t *igb_rx_copy(igb_rx_ring_t *, uint32_t, uint32_t);
345779Sxy150489 static void igb_rx_assoc_hcksum(mblk_t *, uint32_t);
355779Sxy150489 
365779Sxy150489 #ifndef IGB_DEBUG
375779Sxy150489 #pragma inline(igb_rx_assoc_hcksum)
385779Sxy150489 #endif
395779Sxy150489 
405779Sxy150489 
415779Sxy150489 /*
425779Sxy150489  * igb_rx_recycle - the call-back function to reclaim rx buffer
435779Sxy150489  *
445779Sxy150489  * This function is called when an mp is freed by the user thru
455779Sxy150489  * freeb call (Only for mp constructed through desballoc call).
465779Sxy150489  * It returns back the freed buffer to the free list.
475779Sxy150489  */
485779Sxy150489 void
495779Sxy150489 igb_rx_recycle(caddr_t arg)
505779Sxy150489 {
515779Sxy150489 	igb_rx_ring_t *rx_ring;
525779Sxy150489 	rx_control_block_t *recycle_rcb;
535779Sxy150489 	uint32_t free_index;
545779Sxy150489 
555779Sxy150489 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
565779Sxy150489 	rx_ring = recycle_rcb->rx_ring;
575779Sxy150489 
585779Sxy150489 	if (recycle_rcb->state == RCB_FREE)
595779Sxy150489 		return;
605779Sxy150489 
615779Sxy150489 	recycle_rcb->state = RCB_FREE;
625779Sxy150489 
635779Sxy150489 	ASSERT(recycle_rcb->mp == NULL);
645779Sxy150489 
655779Sxy150489 	/*
665779Sxy150489 	 * Using the recycled data buffer to generate a new mblk
675779Sxy150489 	 */
685779Sxy150489 	recycle_rcb->mp = desballoc((unsigned char *)
698571SChenlu.Chen@Sun.COM 	    recycle_rcb->rx_buf.address,
708571SChenlu.Chen@Sun.COM 	    recycle_rcb->rx_buf.size,
715779Sxy150489 	    0, &recycle_rcb->free_rtn);
725779Sxy150489 
735779Sxy150489 	/*
745779Sxy150489 	 * Put the recycled rx control block into free list
755779Sxy150489 	 */
765779Sxy150489 	mutex_enter(&rx_ring->recycle_lock);
775779Sxy150489 
785779Sxy150489 	free_index = rx_ring->rcb_tail;
795779Sxy150489 	ASSERT(rx_ring->free_list[free_index] == NULL);
805779Sxy150489 
815779Sxy150489 	rx_ring->free_list[free_index] = recycle_rcb;
825779Sxy150489 	rx_ring->rcb_tail = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
835779Sxy150489 
845779Sxy150489 	mutex_exit(&rx_ring->recycle_lock);
855779Sxy150489 
865779Sxy150489 	/*
875779Sxy150489 	 * The atomic operation on the number of the available rx control
885779Sxy150489 	 * blocks in the free list is used to make the recycling mutual
895779Sxy150489 	 * exclusive with the receiving.
905779Sxy150489 	 */
915779Sxy150489 	atomic_inc_32(&rx_ring->rcb_free);
925779Sxy150489 	ASSERT(rx_ring->rcb_free <= rx_ring->free_list_size);
935779Sxy150489 }
945779Sxy150489 
955779Sxy150489 /*
965779Sxy150489  * igb_rx_copy - Use copy to process the received packet
975779Sxy150489  *
985779Sxy150489  * This function will use bcopy to process the packet
995779Sxy150489  * and send the copied packet upstream
1005779Sxy150489  */
1015779Sxy150489 static mblk_t *
1025779Sxy150489 igb_rx_copy(igb_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
1035779Sxy150489 {
1045779Sxy150489 	rx_control_block_t *current_rcb;
1055779Sxy150489 	mblk_t *mp;
1066624Sgl147354 	igb_t *igb = rx_ring->igb;
1075779Sxy150489 
1085779Sxy150489 	current_rcb = rx_ring->work_list[index];
1095779Sxy150489 
1105779Sxy150489 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
1115779Sxy150489 
1126624Sgl147354 	if (igb_check_dma_handle(
1136624Sgl147354 	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
1146624Sgl147354 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
115*11367SJason.Xu@Sun.COM 		atomic_or_32(&igb->igb_state, IGB_ERROR);
116*11367SJason.Xu@Sun.COM 		return (NULL);
1176624Sgl147354 	}
1186624Sgl147354 
1195779Sxy150489 	/*
1205779Sxy150489 	 * Allocate buffer to receive this packet
1215779Sxy150489 	 */
1225779Sxy150489 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
1235779Sxy150489 	if (mp == NULL) {
1245779Sxy150489 		igb_log(rx_ring->igb, "igb_rx_copy: allocate buffer failed");
1255779Sxy150489 		return (NULL);
1265779Sxy150489 	}
1275779Sxy150489 
1285779Sxy150489 	/*
1295779Sxy150489 	 * Copy the data received into the new cluster
1305779Sxy150489 	 */
1315779Sxy150489 	mp->b_rptr += IPHDR_ALIGN_ROOM;
1325779Sxy150489 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
1335779Sxy150489 	mp->b_wptr = mp->b_rptr + pkt_len;
1345779Sxy150489 
1355779Sxy150489 	return (mp);
1365779Sxy150489 }
1375779Sxy150489 
1385779Sxy150489 /*
1395779Sxy150489  * igb_rx_bind - Use existing DMA buffer to build mblk for receiving
1405779Sxy150489  *
1415779Sxy150489  * This function will use pre-bound DMA buffer to receive the packet
1425779Sxy150489  * and build mblk that will be sent upstream.
1435779Sxy150489  */
1445779Sxy150489 static mblk_t *
1455779Sxy150489 igb_rx_bind(igb_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
1465779Sxy150489 {
1475779Sxy150489 	rx_control_block_t *current_rcb;
1485779Sxy150489 	rx_control_block_t *free_rcb;
1495779Sxy150489 	uint32_t free_index;
1505779Sxy150489 	mblk_t *mp;
1516624Sgl147354 	igb_t *igb = rx_ring->igb;
1525779Sxy150489 
1535779Sxy150489 	/*
1545779Sxy150489 	 * If the free list is empty, we cannot proceed to send
1555779Sxy150489 	 * the current DMA buffer upstream. We'll have to return
1565779Sxy150489 	 * and use bcopy to process the packet.
1575779Sxy150489 	 */
1585779Sxy150489 	if (igb_atomic_reserve(&rx_ring->rcb_free, 1) < 0)
1595779Sxy150489 		return (NULL);
1605779Sxy150489 
1615779Sxy150489 	current_rcb = rx_ring->work_list[index];
1625779Sxy150489 	/*
1635779Sxy150489 	 * If the mp of the rx control block is NULL, try to do
1645779Sxy150489 	 * desballoc again.
1655779Sxy150489 	 */
1665779Sxy150489 	if (current_rcb->mp == NULL) {
1675779Sxy150489 		current_rcb->mp = desballoc((unsigned char *)
1688571SChenlu.Chen@Sun.COM 		    current_rcb->rx_buf.address,
1698571SChenlu.Chen@Sun.COM 		    current_rcb->rx_buf.size,
1705779Sxy150489 		    0, &current_rcb->free_rtn);
1715779Sxy150489 		/*
1725779Sxy150489 		 * If it is failed to built a mblk using the current
1735779Sxy150489 		 * DMA buffer, we have to return and use bcopy to
1745779Sxy150489 		 * process the packet.
1755779Sxy150489 		 */
1765779Sxy150489 		if (current_rcb->mp == NULL) {
1775779Sxy150489 			atomic_inc_32(&rx_ring->rcb_free);
1785779Sxy150489 			return (NULL);
1795779Sxy150489 		}
1805779Sxy150489 	}
1815779Sxy150489 	/*
1825779Sxy150489 	 * Sync up the data received
1835779Sxy150489 	 */
1845779Sxy150489 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
1855779Sxy150489 
1866624Sgl147354 	if (igb_check_dma_handle(
1876624Sgl147354 	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
1886624Sgl147354 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
189*11367SJason.Xu@Sun.COM 		atomic_or_32(&igb->igb_state, IGB_ERROR);
190*11367SJason.Xu@Sun.COM 		atomic_inc_32(&rx_ring->rcb_free);
191*11367SJason.Xu@Sun.COM 		return (NULL);
1926624Sgl147354 	}
1936624Sgl147354 
1945779Sxy150489 	mp = current_rcb->mp;
1955779Sxy150489 	current_rcb->mp = NULL;
1965779Sxy150489 	current_rcb->state = RCB_SENDUP;
1975779Sxy150489 
1985779Sxy150489 	mp->b_wptr = mp->b_rptr + pkt_len;
1995779Sxy150489 	mp->b_next = mp->b_cont = NULL;
2005779Sxy150489 
2015779Sxy150489 	/*
2025779Sxy150489 	 * Strip off one free rx control block from the free list
2035779Sxy150489 	 */
2045779Sxy150489 	free_index = rx_ring->rcb_head;
2055779Sxy150489 	free_rcb = rx_ring->free_list[free_index];
2065779Sxy150489 	ASSERT(free_rcb != NULL);
2075779Sxy150489 	rx_ring->free_list[free_index] = NULL;
2085779Sxy150489 	rx_ring->rcb_head = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
2095779Sxy150489 
2105779Sxy150489 	/*
2115779Sxy150489 	 * Put the rx control block to the work list
2125779Sxy150489 	 */
2135779Sxy150489 	rx_ring->work_list[index] = free_rcb;
2145779Sxy150489 
2155779Sxy150489 	return (mp);
2165779Sxy150489 }
2175779Sxy150489 
2185779Sxy150489 /*
2195779Sxy150489  * igb_rx_assoc_hcksum
2205779Sxy150489  *
2215779Sxy150489  * Check the rx hardware checksum status and associate the hcksum flags
2225779Sxy150489  */
2235779Sxy150489 static void
2245779Sxy150489 igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
2255779Sxy150489 {
2265779Sxy150489 	uint32_t hcksum_flags = 0;
2275779Sxy150489 
2285779Sxy150489 	/* Ignore Checksum Indication */
2295779Sxy150489 	if (status_error & E1000_RXD_STAT_IXSM)
2305779Sxy150489 		return;
2315779Sxy150489 
2325779Sxy150489 	/*
2335779Sxy150489 	 * Check TCP/UDP checksum
2345779Sxy150489 	 */
2355779Sxy150489 	if (((status_error & E1000_RXD_STAT_TCPCS) ||
2365779Sxy150489 	    (status_error & E1000_RXD_STAT_UDPCS)) &&
2375779Sxy150489 	    !(status_error & E1000_RXDEXT_STATERR_TCPE))
2385779Sxy150489 		hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
2395779Sxy150489 
2405779Sxy150489 	/*
2415779Sxy150489 	 * Check IP Checksum
2425779Sxy150489 	 */
2435779Sxy150489 	if ((status_error & E1000_RXD_STAT_IPCS) &&
2445779Sxy150489 	    !(status_error & E1000_RXDEXT_STATERR_IPE))
2455779Sxy150489 		hcksum_flags |= HCK_IPV4_HDRCKSUM;
2465779Sxy150489 
2475779Sxy150489 	if (hcksum_flags != 0) {
2485779Sxy150489 		(void) hcksum_assoc(mp,
2495779Sxy150489 		    NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0);
2505779Sxy150489 	}
2515779Sxy150489 }
2525779Sxy150489 
2538275SEric Cheng mblk_t *
2548275SEric Cheng igb_rx_ring_poll(void *arg, int bytes)
2558275SEric Cheng {
2568275SEric Cheng 	igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg;
2578275SEric Cheng 	mblk_t *mp = NULL;
2588275SEric Cheng 
2598275SEric Cheng 	ASSERT(bytes >= 0);
2608275SEric Cheng 
261*11367SJason.Xu@Sun.COM 	if ((bytes == 0) || (rx_ring->igb->igb_state & IGB_SUSPENDED) ||
262*11367SJason.Xu@Sun.COM 	    !(rx_ring->igb->igb_state & IGB_STARTED))
263*11367SJason.Xu@Sun.COM 		return (NULL);
2648275SEric Cheng 
2658275SEric Cheng 	mutex_enter(&rx_ring->rx_lock);
2668275SEric Cheng 	mp = igb_rx(rx_ring, bytes);
2678275SEric Cheng 	mutex_exit(&rx_ring->rx_lock);
2688275SEric Cheng 
2698275SEric Cheng 	return (mp);
2708275SEric Cheng }
2718275SEric Cheng 
2725779Sxy150489 /*
2735779Sxy150489  * igb_rx - Receive the data of one ring
2745779Sxy150489  *
2755779Sxy150489  * This function goes throught h/w descriptor in one specified rx ring,
2765779Sxy150489  * receives the data if the descriptor status shows the data is ready.
2775779Sxy150489  * It returns a chain of mblks containing the received data, to be
2785779Sxy150489  * passed up to mac_rx().
2795779Sxy150489  */
2805779Sxy150489 mblk_t *
2818275SEric Cheng igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes)
2825779Sxy150489 {
2835779Sxy150489 	union e1000_adv_rx_desc *current_rbd;
2845779Sxy150489 	rx_control_block_t *current_rcb;
2855779Sxy150489 	mblk_t *mp;
2865779Sxy150489 	mblk_t *mblk_head;
2875779Sxy150489 	mblk_t **mblk_tail;
2885779Sxy150489 	uint32_t rx_next;
2895779Sxy150489 	uint32_t rx_tail;
2905779Sxy150489 	uint32_t pkt_len;
2915779Sxy150489 	uint32_t status_error;
2925779Sxy150489 	uint32_t pkt_num;
2938275SEric Cheng 	uint32_t total_bytes;
2945779Sxy150489 	igb_t *igb = rx_ring->igb;
2955779Sxy150489 
2965779Sxy150489 	mblk_head = NULL;
2975779Sxy150489 	mblk_tail = &mblk_head;
2985779Sxy150489 
299*11367SJason.Xu@Sun.COM 	if (igb->igb_state & IGB_ERROR)
300*11367SJason.Xu@Sun.COM 		return (NULL);
301*11367SJason.Xu@Sun.COM 
3025779Sxy150489 	/*
3035779Sxy150489 	 * Sync the receive descriptors before
3045779Sxy150489 	 * accepting the packets
3055779Sxy150489 	 */
3065779Sxy150489 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORKERNEL);
3075779Sxy150489 
3086624Sgl147354 	if (igb_check_dma_handle(
3096624Sgl147354 	    rx_ring->rbd_area.dma_handle) != DDI_FM_OK) {
3106624Sgl147354 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
311*11367SJason.Xu@Sun.COM 		atomic_or_32(&igb->igb_state, IGB_ERROR);
312*11367SJason.Xu@Sun.COM 		return (NULL);
3136624Sgl147354 	}
3146624Sgl147354 
3155779Sxy150489 	/*
3165779Sxy150489 	 * Get the start point of rx bd ring which should be examined
3175779Sxy150489 	 * during this cycle.
3185779Sxy150489 	 */
3195779Sxy150489 	rx_next = rx_ring->rbd_next;
3205779Sxy150489 
3215779Sxy150489 	current_rbd = &rx_ring->rbd_ring[rx_next];
3225779Sxy150489 	pkt_num = 0;
3238275SEric Cheng 	total_bytes = 0;
3245779Sxy150489 	status_error = current_rbd->wb.upper.status_error;
3255779Sxy150489 	while (status_error & E1000_RXD_STAT_DD) {
3265779Sxy150489 		/*
3275779Sxy150489 		 * If hardware has found the errors, but the error
3285779Sxy150489 		 * is hardware checksum error, here does not discard the
3295779Sxy150489 		 * packet, and let upper layer compute the checksum;
3305779Sxy150489 		 * Otherwise discard the packet.
3315779Sxy150489 		 */
3325779Sxy150489 		if ((status_error & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
3335779Sxy150489 		    !(status_error & E1000_RXD_STAT_EOP)) {
3345779Sxy150489 			IGB_DEBUG_STAT(rx_ring->stat_frame_error);
3355779Sxy150489 			goto rx_discard;
3365779Sxy150489 		}
3375779Sxy150489 
3385779Sxy150489 		IGB_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
3395779Sxy150489 		    (status_error & E1000_RXDEXT_STATERR_TCPE) ||
3405779Sxy150489 		    (status_error & E1000_RXDEXT_STATERR_IPE));
3415779Sxy150489 
3425779Sxy150489 		pkt_len = current_rbd->wb.upper.length;
3438275SEric Cheng 
3448275SEric Cheng 		if ((poll_bytes != IGB_NO_POLL) &&
3458275SEric Cheng 		    ((pkt_len + total_bytes) > poll_bytes))
3468275SEric Cheng 			break;
3478275SEric Cheng 
3488275SEric Cheng 		IGB_DEBUG_STAT(rx_ring->stat_pkt_cnt);
3498275SEric Cheng 		total_bytes += pkt_len;
3508275SEric Cheng 
3515779Sxy150489 		mp = NULL;
3525779Sxy150489 		/*
3535779Sxy150489 		 * For packets with length more than the copy threshold,
3545779Sxy150489 		 * we'll firstly try to use the existed DMA buffer to built
3555779Sxy150489 		 * a mblk and send the mblk upstream.
3565779Sxy150489 		 *
3575779Sxy150489 		 * If the first method fails, or the packet length is less
3585779Sxy150489 		 * than the copy threshold, we'll allocate a new mblk and
3595779Sxy150489 		 * copy the packet data to the mblk.
3605779Sxy150489 		 */
3615779Sxy150489 		if (pkt_len > rx_ring->copy_thresh)
3625779Sxy150489 			mp = igb_rx_bind(rx_ring, rx_next, pkt_len);
3635779Sxy150489 
3645779Sxy150489 		if (mp == NULL)
3655779Sxy150489 			mp = igb_rx_copy(rx_ring, rx_next, pkt_len);
3665779Sxy150489 
3675779Sxy150489 		if (mp != NULL) {
3685779Sxy150489 			/*
3695779Sxy150489 			 * Check h/w checksum offload status
3705779Sxy150489 			 */
3715779Sxy150489 			if (igb->rx_hcksum_enable)
3725779Sxy150489 				igb_rx_assoc_hcksum(mp, status_error);
3735779Sxy150489 
3745779Sxy150489 			*mblk_tail = mp;
3755779Sxy150489 			mblk_tail = &mp->b_next;
3765779Sxy150489 		}
3775779Sxy150489 
3785779Sxy150489 rx_discard:
3795779Sxy150489 		/*
3805779Sxy150489 		 * Reset rx descriptor read bits
3815779Sxy150489 		 */
3825779Sxy150489 		current_rcb = rx_ring->work_list[rx_next];
3835779Sxy150489 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
3845779Sxy150489 		current_rbd->read.hdr_addr = 0;
3855779Sxy150489 
3865779Sxy150489 		rx_next = NEXT_INDEX(rx_next, 1, rx_ring->ring_size);
3875779Sxy150489 
3885779Sxy150489 		/*
3895779Sxy150489 		 * The receive function is in interrupt context, so here
3905779Sxy150489 		 * limit_per_intr is used to avoid doing receiving too long
3915779Sxy150489 		 * per interrupt.
3925779Sxy150489 		 */
3935779Sxy150489 		if (++pkt_num > rx_ring->limit_per_intr) {
3945779Sxy150489 			IGB_DEBUG_STAT(rx_ring->stat_exceed_pkt);
3955779Sxy150489 			break;
3965779Sxy150489 		}
3975779Sxy150489 
3985779Sxy150489 		current_rbd = &rx_ring->rbd_ring[rx_next];
3995779Sxy150489 		status_error = current_rbd->wb.upper.status_error;
4005779Sxy150489 	}
4015779Sxy150489 
4025779Sxy150489 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORDEV);
4035779Sxy150489 
4045779Sxy150489 	rx_ring->rbd_next = rx_next;
4055779Sxy150489 
4065779Sxy150489 	/*
4075779Sxy150489 	 * Update the h/w tail accordingly
4085779Sxy150489 	 */
4095779Sxy150489 	rx_tail = PREV_INDEX(rx_next, 1, rx_ring->ring_size);
4105779Sxy150489 
4115779Sxy150489 	E1000_WRITE_REG(&igb->hw, E1000_RDT(rx_ring->index), rx_tail);
4125779Sxy150489 
4136624Sgl147354 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
4146624Sgl147354 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
415*11367SJason.Xu@Sun.COM 		atomic_or_32(&igb->igb_state, IGB_ERROR);
4166624Sgl147354 	}
4176624Sgl147354 
4185779Sxy150489 	return (mblk_head);
4195779Sxy150489 }
420