xref: /onnv-gate/usr/src/uts/common/io/ixgbe/ixgbe_rx.c (revision 6621:c38d03b60f12)
1*6621Sbt150084 /*
2*6621Sbt150084  * CDDL HEADER START
3*6621Sbt150084  *
4*6621Sbt150084  * Copyright(c) 2007-2008 Intel Corporation. All rights reserved.
5*6621Sbt150084  * The contents of this file are subject to the terms of the
6*6621Sbt150084  * Common Development and Distribution License (the "License").
7*6621Sbt150084  * You may not use this file except in compliance with the License.
8*6621Sbt150084  *
9*6621Sbt150084  * You can obtain a copy of the license at:
10*6621Sbt150084  *      http://www.opensolaris.org/os/licensing.
11*6621Sbt150084  * See the License for the specific language governing permissions
12*6621Sbt150084  * and limitations under the License.
13*6621Sbt150084  *
14*6621Sbt150084  * When using or redistributing this file, you may do so under the
15*6621Sbt150084  * License only. No other modification of this header is permitted.
16*6621Sbt150084  *
17*6621Sbt150084  * If applicable, add the following below this CDDL HEADER, with the
18*6621Sbt150084  * fields enclosed by brackets "[]" replaced with your own identifying
19*6621Sbt150084  * information: Portions Copyright [yyyy] [name of copyright owner]
20*6621Sbt150084  *
21*6621Sbt150084  * CDDL HEADER END
22*6621Sbt150084  */
23*6621Sbt150084 
24*6621Sbt150084 /*
25*6621Sbt150084  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
26*6621Sbt150084  * Use is subject to license terms of the CDDL.
27*6621Sbt150084  */
28*6621Sbt150084 
29*6621Sbt150084 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30*6621Sbt150084 
31*6621Sbt150084 #include "ixgbe_sw.h"
32*6621Sbt150084 
33*6621Sbt150084 /* function prototypes */
34*6621Sbt150084 static mblk_t *ixgbe_rx_bind(ixgbe_rx_ring_t *, uint32_t, uint32_t);
35*6621Sbt150084 static mblk_t *ixgbe_rx_copy(ixgbe_rx_ring_t *, uint32_t, uint32_t);
36*6621Sbt150084 static void ixgbe_rx_assoc_hcksum(mblk_t *, uint32_t);
37*6621Sbt150084 
38*6621Sbt150084 #ifndef IXGBE_DEBUG
39*6621Sbt150084 #pragma inline(ixgbe_rx_assoc_hcksum)
40*6621Sbt150084 #endif
41*6621Sbt150084 
42*6621Sbt150084 /*
43*6621Sbt150084  * ixgbe_rx_recycle - The call-back function to reclaim rx buffer.
44*6621Sbt150084  *
45*6621Sbt150084  * This function is called when an mp is freed by the user thru
46*6621Sbt150084  * freeb call (Only for mp constructed through desballoc call).
47*6621Sbt150084  * It returns back the freed buffer to the free list.
48*6621Sbt150084  */
49*6621Sbt150084 void
50*6621Sbt150084 ixgbe_rx_recycle(caddr_t arg)
51*6621Sbt150084 {
52*6621Sbt150084 	ixgbe_rx_ring_t *rx_ring;
53*6621Sbt150084 	rx_control_block_t *recycle_rcb;
54*6621Sbt150084 	uint32_t free_index;
55*6621Sbt150084 
56*6621Sbt150084 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
57*6621Sbt150084 	rx_ring = recycle_rcb->rx_ring;
58*6621Sbt150084 
59*6621Sbt150084 	if (recycle_rcb->state == RCB_FREE)
60*6621Sbt150084 		return;
61*6621Sbt150084 
62*6621Sbt150084 	recycle_rcb->state = RCB_FREE;
63*6621Sbt150084 
64*6621Sbt150084 	ASSERT(recycle_rcb->mp == NULL);
65*6621Sbt150084 
66*6621Sbt150084 	/*
67*6621Sbt150084 	 * Using the recycled data buffer to generate a new mblk
68*6621Sbt150084 	 */
69*6621Sbt150084 	recycle_rcb->mp = desballoc((unsigned char *)
70*6621Sbt150084 	    (recycle_rcb->rx_buf.address - IPHDR_ALIGN_ROOM),
71*6621Sbt150084 	    (recycle_rcb->rx_buf.size + IPHDR_ALIGN_ROOM),
72*6621Sbt150084 	    0, &recycle_rcb->free_rtn);
73*6621Sbt150084 	if (recycle_rcb->mp != NULL) {
74*6621Sbt150084 		recycle_rcb->mp->b_rptr += IPHDR_ALIGN_ROOM;
75*6621Sbt150084 		recycle_rcb->mp->b_wptr += IPHDR_ALIGN_ROOM;
76*6621Sbt150084 	}
77*6621Sbt150084 
78*6621Sbt150084 	/*
79*6621Sbt150084 	 * Put the recycled rx control block into free list
80*6621Sbt150084 	 */
81*6621Sbt150084 	mutex_enter(&rx_ring->recycle_lock);
82*6621Sbt150084 
83*6621Sbt150084 	free_index = rx_ring->rcb_tail;
84*6621Sbt150084 	ASSERT(rx_ring->free_list[free_index] == NULL);
85*6621Sbt150084 
86*6621Sbt150084 	rx_ring->free_list[free_index] = recycle_rcb;
87*6621Sbt150084 	rx_ring->rcb_tail = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
88*6621Sbt150084 
89*6621Sbt150084 	mutex_exit(&rx_ring->recycle_lock);
90*6621Sbt150084 
91*6621Sbt150084 	/*
92*6621Sbt150084 	 * The atomic operation on the number of the available rx control
93*6621Sbt150084 	 * blocks in the free list is used to make the recycling mutual
94*6621Sbt150084 	 * exclusive with the receiving.
95*6621Sbt150084 	 */
96*6621Sbt150084 	atomic_inc_32(&rx_ring->rcb_free);
97*6621Sbt150084 	ASSERT(rx_ring->rcb_free <= rx_ring->free_list_size);
98*6621Sbt150084 }
99*6621Sbt150084 
100*6621Sbt150084 /*
101*6621Sbt150084  * ixgbe_rx_copy - Use copy to process the received packet.
102*6621Sbt150084  *
103*6621Sbt150084  * This function will use bcopy to process the packet
104*6621Sbt150084  * and send the copied packet upstream.
105*6621Sbt150084  */
106*6621Sbt150084 static mblk_t *
107*6621Sbt150084 ixgbe_rx_copy(ixgbe_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
108*6621Sbt150084 {
109*6621Sbt150084 	rx_control_block_t *current_rcb;
110*6621Sbt150084 	mblk_t *mp;
111*6621Sbt150084 
112*6621Sbt150084 	current_rcb = rx_ring->work_list[index];
113*6621Sbt150084 
114*6621Sbt150084 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
115*6621Sbt150084 
116*6621Sbt150084 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
117*6621Sbt150084 	    DDI_FM_OK) {
118*6621Sbt150084 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
119*6621Sbt150084 		    DDI_SERVICE_DEGRADED);
120*6621Sbt150084 	}
121*6621Sbt150084 
122*6621Sbt150084 	/*
123*6621Sbt150084 	 * Allocate buffer to receive this packet
124*6621Sbt150084 	 */
125*6621Sbt150084 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
126*6621Sbt150084 	if (mp == NULL) {
127*6621Sbt150084 		ixgbe_log(rx_ring->ixgbe,
128*6621Sbt150084 		    "ixgbe_rx_copy: allocate buffer failed");
129*6621Sbt150084 		return (NULL);
130*6621Sbt150084 	}
131*6621Sbt150084 
132*6621Sbt150084 	/*
133*6621Sbt150084 	 * Copy the data received into the new cluster
134*6621Sbt150084 	 */
135*6621Sbt150084 	mp->b_rptr += IPHDR_ALIGN_ROOM;
136*6621Sbt150084 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
137*6621Sbt150084 	mp->b_wptr = mp->b_rptr + pkt_len;
138*6621Sbt150084 
139*6621Sbt150084 	return (mp);
140*6621Sbt150084 }
141*6621Sbt150084 
142*6621Sbt150084 /*
143*6621Sbt150084  * ixgbe_rx_bind - Use existing DMA buffer to build mblk for receiving.
144*6621Sbt150084  *
145*6621Sbt150084  * This function will use pre-bound DMA buffer to receive the packet
146*6621Sbt150084  * and build mblk that will be sent upstream.
147*6621Sbt150084  */
148*6621Sbt150084 static mblk_t *
149*6621Sbt150084 ixgbe_rx_bind(ixgbe_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
150*6621Sbt150084 {
151*6621Sbt150084 	rx_control_block_t *current_rcb;
152*6621Sbt150084 	rx_control_block_t *free_rcb;
153*6621Sbt150084 	uint32_t free_index;
154*6621Sbt150084 	mblk_t *mp;
155*6621Sbt150084 
156*6621Sbt150084 	/*
157*6621Sbt150084 	 * If the free list is empty, we cannot proceed to send
158*6621Sbt150084 	 * the current DMA buffer upstream. We'll have to return
159*6621Sbt150084 	 * and use bcopy to process the packet.
160*6621Sbt150084 	 */
161*6621Sbt150084 	if (ixgbe_atomic_reserve(&rx_ring->rcb_free, 1) < 0)
162*6621Sbt150084 		return (NULL);
163*6621Sbt150084 
164*6621Sbt150084 	current_rcb = rx_ring->work_list[index];
165*6621Sbt150084 	/*
166*6621Sbt150084 	 * If the mp of the rx control block is NULL, try to do
167*6621Sbt150084 	 * desballoc again.
168*6621Sbt150084 	 */
169*6621Sbt150084 	if (current_rcb->mp == NULL) {
170*6621Sbt150084 		current_rcb->mp = desballoc((unsigned char *)
171*6621Sbt150084 		    (current_rcb->rx_buf.address - IPHDR_ALIGN_ROOM),
172*6621Sbt150084 		    (current_rcb->rx_buf.size + IPHDR_ALIGN_ROOM),
173*6621Sbt150084 		    0, &current_rcb->free_rtn);
174*6621Sbt150084 		/*
175*6621Sbt150084 		 * If it is failed to built a mblk using the current
176*6621Sbt150084 		 * DMA buffer, we have to return and use bcopy to
177*6621Sbt150084 		 * process the packet.
178*6621Sbt150084 		 */
179*6621Sbt150084 		if (current_rcb->mp == NULL) {
180*6621Sbt150084 			atomic_inc_32(&rx_ring->rcb_free);
181*6621Sbt150084 			return (NULL);
182*6621Sbt150084 		}
183*6621Sbt150084 	}
184*6621Sbt150084 	/*
185*6621Sbt150084 	 * Sync up the data received
186*6621Sbt150084 	 */
187*6621Sbt150084 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
188*6621Sbt150084 
189*6621Sbt150084 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
190*6621Sbt150084 	    DDI_FM_OK) {
191*6621Sbt150084 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
192*6621Sbt150084 		    DDI_SERVICE_DEGRADED);
193*6621Sbt150084 	}
194*6621Sbt150084 
195*6621Sbt150084 	mp = current_rcb->mp;
196*6621Sbt150084 	current_rcb->mp = NULL;
197*6621Sbt150084 	current_rcb->state = RCB_SENDUP;
198*6621Sbt150084 
199*6621Sbt150084 	mp->b_wptr = mp->b_rptr + pkt_len;
200*6621Sbt150084 	mp->b_next = mp->b_cont = NULL;
201*6621Sbt150084 
202*6621Sbt150084 	/*
203*6621Sbt150084 	 * Strip off one free rx control block from the free list
204*6621Sbt150084 	 */
205*6621Sbt150084 	free_index = rx_ring->rcb_head;
206*6621Sbt150084 	free_rcb = rx_ring->free_list[free_index];
207*6621Sbt150084 	ASSERT(free_rcb != NULL);
208*6621Sbt150084 	rx_ring->free_list[free_index] = NULL;
209*6621Sbt150084 	rx_ring->rcb_head = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
210*6621Sbt150084 
211*6621Sbt150084 	/*
212*6621Sbt150084 	 * Put the rx control block to the work list
213*6621Sbt150084 	 */
214*6621Sbt150084 	rx_ring->work_list[index] = free_rcb;
215*6621Sbt150084 
216*6621Sbt150084 	return (mp);
217*6621Sbt150084 }
218*6621Sbt150084 
219*6621Sbt150084 /*
220*6621Sbt150084  * ixgbe_rx_assoc_hcksum - Check the rx hardware checksum status and associate
221*6621Sbt150084  * the hcksum flags.
222*6621Sbt150084  */
223*6621Sbt150084 static void
224*6621Sbt150084 ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
225*6621Sbt150084 {
226*6621Sbt150084 	uint32_t hcksum_flags = 0;
227*6621Sbt150084 
228*6621Sbt150084 	/*
229*6621Sbt150084 	 * Check TCP/UDP checksum
230*6621Sbt150084 	 */
231*6621Sbt150084 	if ((status_error & IXGBE_RXD_STAT_L4CS) &&
232*6621Sbt150084 	    !(status_error & IXGBE_RXDADV_ERR_TCPE))
233*6621Sbt150084 		hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
234*6621Sbt150084 
235*6621Sbt150084 	/*
236*6621Sbt150084 	 * Check IP Checksum
237*6621Sbt150084 	 */
238*6621Sbt150084 	if ((status_error & IXGBE_RXD_STAT_IPCS) &&
239*6621Sbt150084 	    !(status_error & IXGBE_RXDADV_ERR_IPE))
240*6621Sbt150084 		hcksum_flags |= HCK_IPV4_HDRCKSUM;
241*6621Sbt150084 
242*6621Sbt150084 	if (hcksum_flags != 0) {
243*6621Sbt150084 		(void) hcksum_assoc(mp,
244*6621Sbt150084 		    NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0);
245*6621Sbt150084 	}
246*6621Sbt150084 }
247*6621Sbt150084 
248*6621Sbt150084 /*
249*6621Sbt150084  * ixgbe_rx - Receive the data of one ring.
250*6621Sbt150084  *
251*6621Sbt150084  * This function goes throught h/w descriptor in one specified rx ring,
252*6621Sbt150084  * receives the data if the descriptor status shows the data is ready.
253*6621Sbt150084  * It returns a chain of mblks containing the received data, to be
254*6621Sbt150084  * passed up to mac_rx().
255*6621Sbt150084  */
256*6621Sbt150084 mblk_t *
257*6621Sbt150084 ixgbe_rx(ixgbe_rx_ring_t *rx_ring)
258*6621Sbt150084 {
259*6621Sbt150084 	union ixgbe_adv_rx_desc *current_rbd;
260*6621Sbt150084 	rx_control_block_t *current_rcb;
261*6621Sbt150084 	mblk_t *mp;
262*6621Sbt150084 	mblk_t *mblk_head;
263*6621Sbt150084 	mblk_t **mblk_tail;
264*6621Sbt150084 	uint32_t rx_next;
265*6621Sbt150084 	uint32_t rx_tail;
266*6621Sbt150084 	uint32_t pkt_len;
267*6621Sbt150084 	uint32_t status_error;
268*6621Sbt150084 	uint32_t pkt_num;
269*6621Sbt150084 	ixgbe_t *ixgbe = rx_ring->ixgbe;
270*6621Sbt150084 	struct ixgbe_hw *hw = &ixgbe->hw;
271*6621Sbt150084 
272*6621Sbt150084 	mblk_head = NULL;
273*6621Sbt150084 	mblk_tail = &mblk_head;
274*6621Sbt150084 
275*6621Sbt150084 	/*
276*6621Sbt150084 	 * Sync the receive descriptors before accepting the packets
277*6621Sbt150084 	 */
278*6621Sbt150084 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORKERNEL);
279*6621Sbt150084 
280*6621Sbt150084 	if (ixgbe_check_dma_handle(rx_ring->rbd_area.dma_handle) != DDI_FM_OK) {
281*6621Sbt150084 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
282*6621Sbt150084 		    DDI_SERVICE_DEGRADED);
283*6621Sbt150084 	}
284*6621Sbt150084 
285*6621Sbt150084 	/*
286*6621Sbt150084 	 * Get the start point of rx bd ring which should be examined
287*6621Sbt150084 	 * during this cycle.
288*6621Sbt150084 	 */
289*6621Sbt150084 	rx_next = rx_ring->rbd_next;
290*6621Sbt150084 
291*6621Sbt150084 	current_rbd = &rx_ring->rbd_ring[rx_next];
292*6621Sbt150084 	pkt_num = 0;
293*6621Sbt150084 	status_error = current_rbd->wb.upper.status_error;
294*6621Sbt150084 	while (status_error & IXGBE_RXD_STAT_DD) {
295*6621Sbt150084 		/*
296*6621Sbt150084 		 * If adapter has found errors, but the error
297*6621Sbt150084 		 * is hardware checksum error, this does not discard the
298*6621Sbt150084 		 * packet: let upper layer compute the checksum;
299*6621Sbt150084 		 * Otherwise discard the packet.
300*6621Sbt150084 		 */
301*6621Sbt150084 		if ((status_error & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
302*6621Sbt150084 		    !(status_error & IXGBE_RXD_STAT_EOP)) {
303*6621Sbt150084 			IXGBE_DEBUG_STAT(rx_ring->stat_frame_error);
304*6621Sbt150084 			goto rx_discard;
305*6621Sbt150084 		}
306*6621Sbt150084 
307*6621Sbt150084 		IXGBE_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
308*6621Sbt150084 		    (status_error & IXGBE_RXDADV_ERR_TCPE) ||
309*6621Sbt150084 		    (status_error & IXGBE_RXDADV_ERR_IPE));
310*6621Sbt150084 
311*6621Sbt150084 		pkt_len = current_rbd->wb.upper.length;
312*6621Sbt150084 		mp = NULL;
313*6621Sbt150084 		/*
314*6621Sbt150084 		 * For packets with length more than the copy threshold,
315*6621Sbt150084 		 * we'll first try to use the existing DMA buffer to build
316*6621Sbt150084 		 * an mblk and send the mblk upstream.
317*6621Sbt150084 		 *
318*6621Sbt150084 		 * If the first method fails, or the packet length is less
319*6621Sbt150084 		 * than the copy threshold, we'll allocate a new mblk and
320*6621Sbt150084 		 * copy the packet data to the new mblk.
321*6621Sbt150084 		 */
322*6621Sbt150084 		if (pkt_len > rx_ring->copy_thresh)
323*6621Sbt150084 			mp = ixgbe_rx_bind(rx_ring, rx_next, pkt_len);
324*6621Sbt150084 
325*6621Sbt150084 		if (mp == NULL)
326*6621Sbt150084 			mp = ixgbe_rx_copy(rx_ring, rx_next, pkt_len);
327*6621Sbt150084 
328*6621Sbt150084 		if (mp != NULL) {
329*6621Sbt150084 			/*
330*6621Sbt150084 			 * Check h/w checksum offload status
331*6621Sbt150084 			 */
332*6621Sbt150084 			if (ixgbe->rx_hcksum_enable)
333*6621Sbt150084 				ixgbe_rx_assoc_hcksum(mp, status_error);
334*6621Sbt150084 
335*6621Sbt150084 			*mblk_tail = mp;
336*6621Sbt150084 			mblk_tail = &mp->b_next;
337*6621Sbt150084 		}
338*6621Sbt150084 
339*6621Sbt150084 rx_discard:
340*6621Sbt150084 		/*
341*6621Sbt150084 		 * Reset rx descriptor read bits
342*6621Sbt150084 		 */
343*6621Sbt150084 		current_rcb = rx_ring->work_list[rx_next];
344*6621Sbt150084 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
345*6621Sbt150084 		current_rbd->read.hdr_addr = 0;
346*6621Sbt150084 
347*6621Sbt150084 		rx_next = NEXT_INDEX(rx_next, 1, rx_ring->ring_size);
348*6621Sbt150084 
349*6621Sbt150084 		/*
350*6621Sbt150084 		 * The receive function is in interrupt context, so here
351*6621Sbt150084 		 * limit_per_intr is used to avoid doing receiving too long
352*6621Sbt150084 		 * per interrupt.
353*6621Sbt150084 		 */
354*6621Sbt150084 		if (++pkt_num > rx_ring->limit_per_intr) {
355*6621Sbt150084 			IXGBE_DEBUG_STAT(rx_ring->stat_exceed_pkt);
356*6621Sbt150084 			break;
357*6621Sbt150084 		}
358*6621Sbt150084 
359*6621Sbt150084 		current_rbd = &rx_ring->rbd_ring[rx_next];
360*6621Sbt150084 		status_error = current_rbd->wb.upper.status_error;
361*6621Sbt150084 	}
362*6621Sbt150084 
363*6621Sbt150084 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORDEV);
364*6621Sbt150084 
365*6621Sbt150084 	rx_ring->rbd_next = rx_next;
366*6621Sbt150084 
367*6621Sbt150084 	/*
368*6621Sbt150084 	 * Update the h/w tail accordingly
369*6621Sbt150084 	 */
370*6621Sbt150084 	rx_tail = PREV_INDEX(rx_next, 1, rx_ring->ring_size);
371*6621Sbt150084 
372*6621Sbt150084 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->index), rx_tail);
373*6621Sbt150084 
374*6621Sbt150084 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
375*6621Sbt150084 		ddi_fm_service_impact(rx_ring->ixgbe->dip,
376*6621Sbt150084 		    DDI_SERVICE_DEGRADED);
377*6621Sbt150084 	}
378*6621Sbt150084 
379*6621Sbt150084 	return (mblk_head);
380*6621Sbt150084 }
381